├── .gitignore
├── LICENSE
├── README.md
├── datasets
├── dataset_factory.py
├── dataset_utils.py
├── pascalvoc_2007.py
├── pascalvoc_2012.py
├── pascalvoc_common.py
└── pascalvoc_to_tfrecords.py
├── deployment
└── model_deploy.py
├── eval_ssd_network.py
├── nets
├── custom_layers.py
├── mobilenet_V2.py
├── nets_factory.py
├── ssd_300_mobilenet2.py
├── ssd_common.py
└── ssd_vgg_300.py
├── preprocessing
├── preprocessing_factory.py
├── ssd_vgg_preprocessing.py
└── tf_image.py
├── ssd_visualize.py
├── tf_convert_data.py
├── tf_extended
├── __init__.py
├── bboxes.py
├── image.py
├── math.py
├── metrics.py
└── tensors.py
├── tf_utils.py
├── train.py
└── visualization.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Fanbinqi
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | SSD: Single Shot MultiBox Detector in TensorFlow
3 | =======
4 | A Tensorflow implementation of [SSD](https://arxiv.org/abs/1512.02325) from the 2016 paper by Wei Liu. As a classical network framework of one-stage detectors, SSD are widely used. Our code is based on [balancap/SSD-Tensorflow](https://github.com/balancap/SSD-Tensorflow). The official and original Caffe code can be found in [Caffe](https://github.com/weiliu89/caffe/tree/ssd).
5 |
6 | DATASET
7 | -------
8 |
9 | You can edit the data and path information yourself in the `tf_convert_data.py` file, then run `python tf_convert_data.py`
10 | Note the previous command generated a collection of TF-Records instead of a single file in order to ease shuffling during training.
11 |
12 |
13 | Pre-trained model
14 | -------------------------------
15 | SSD300 trained on VOC0712[balancap/SSD-Tensorflow](https://github.com/balancap/SSD-Tensorflow)
16 |
17 | Train
18 | ---------
19 | `python train.py` You can track your training on the tensorboard real time
20 | In the CITY data set, single-class car have reached the 84% mAP
21 |
22 | In addition
23 | -------
24 | We implemented *Mobilenet2-SSD*, you can change framework in `nets/ssd_300_mobilenet2.py` Mobilenet-v2 is an improved version of Mobilenet, but we found that it's not a big improvement for detection.
25 |
26 | Modified Network
27 | ---------------------
28 | There are two improved network structures for SSD, [CEBNet](https://github.com/dlyldxwl/CEBNet) ICME2019, and [FFBNet](https://github.com/fanbinqi/FFBNet) ICIP2019.
29 |
--------------------------------------------------------------------------------
/datasets/dataset_factory.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | from datasets import pascalvoc_2007
6 | from datasets import pascalvoc_2012
7 |
8 | datasets_map = {
9 | 'pascalvoc_2007' : pascalvoc_2007,
10 | 'pascalvoc_2012' : pascalvoc_2012,
11 | }
12 |
13 | def get_dataset(name, split_name, dataset_dir, file_pattern = None, reader = None):
14 | """Given a dataset name and a split_name returns a Dataset.
15 |
16 | Args:
17 | name: String, the name of the dataset.
18 | split_name: A train/test split name.
19 | dataset_dir: The directory where the dataset files are stored.
20 | file_pattern: The file pattern to use for matching the dataset source files.
21 | reader: The subclass of tf.ReaderBase. If left as `None`, then the default
22 | reader defined by each dataset is used.
23 | Returns:
24 | A `Dataset` class.
25 | Raises:
26 | ValueError: If the dataset `name` is unknown.
27 | """
28 | if name not in datasets_map:
29 | raise ValueError('Name of dataset unknow %s' % name)
30 | return datasets_map[name].get_split(split_name,
31 | dataset_dir,
32 | file_pattern,
33 | reader)
--------------------------------------------------------------------------------
/datasets/dataset_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains utilities for downloading and converting datasets."""
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | import os
21 | import sys
22 | import tarfile
23 |
24 | from six.moves import urllib
25 | import tensorflow as tf
26 |
27 | LABELS_FILENAME = 'labels.txt'
28 |
29 |
30 | def int64_feature(value):
31 | """Wrapper for inserting int64 features into Example proto.
32 | """
33 | if not isinstance(value, list):
34 | value = [value]
35 | return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
36 |
37 |
38 | def float_feature(value):
39 | """Wrapper for inserting float features into Example proto.
40 | """
41 | if not isinstance(value, list):
42 | value = [value]
43 | return tf.train.Feature(float_list=tf.train.FloatList(value=value))
44 |
45 |
46 | def bytes_feature(value):
47 | """Wrapper for inserting bytes features into Example proto.
48 | """
49 | if not isinstance(value, list):
50 | value = [value]
51 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
52 |
53 |
54 | def image_to_tfexample(image_data, image_format, height, width, class_id):
55 | return tf.train.Example(features=tf.train.Features(feature={
56 | 'image/encoded': bytes_feature(image_data),
57 | 'image/format': bytes_feature(image_format),
58 | 'image/class/label': int64_feature(class_id),
59 | 'image/height': int64_feature(height),
60 | 'image/width': int64_feature(width),
61 | }))
62 |
63 |
64 | def download_and_uncompress_tarball(tarball_url, dataset_dir):
65 | """Downloads the `tarball_url` and uncompresses it locally.
66 |
67 | Args:
68 | tarball_url: The URL of a tarball file.
69 | dataset_dir: The directory where the temporary files are stored.
70 | """
71 | filename = tarball_url.split('/')[-1]
72 | filepath = os.path.join(dataset_dir, filename)
73 |
74 | def _progress(count, block_size, total_size):
75 | sys.stdout.write('\r>> Downloading %s %.1f%%' % (
76 | filename, float(count * block_size) / float(total_size) * 100.0))
77 | sys.stdout.flush()
78 | filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress)
79 | print()
80 | statinfo = os.stat(filepath)
81 | print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
82 | tarfile.open(filepath, 'r:gz').extractall(dataset_dir)
83 |
84 |
85 | def write_label_file(labels_to_class_names, dataset_dir,
86 | filename=LABELS_FILENAME):
87 | """Writes a file with the list of class names.
88 |
89 | Args:
90 | labels_to_class_names: A map of (integer) labels to class names.
91 | dataset_dir: The directory in which the labels file should be written.
92 | filename: The filename where the class names are written.
93 | """
94 | labels_filename = os.path.join(dataset_dir, filename)
95 | with tf.gfile.Open(labels_filename, 'w') as f:
96 | for label in labels_to_class_names:
97 | class_name = labels_to_class_names[label]
98 | f.write('%d:%s\n' % (label, class_name))
99 |
100 |
101 | def has_labels(dataset_dir, filename=LABELS_FILENAME):
102 | """Specifies whether or not the dataset directory contains a label map file.
103 |
104 | Args:
105 | dataset_dir: The directory in which the labels file is found.
106 | filename: The filename where the class names are written.
107 |
108 | Returns:
109 | `True` if the labels file exists and `False` otherwise.
110 | """
111 | return tf.gfile.Exists(os.path.join(dataset_dir, filename))
112 |
113 |
114 | def read_label_file(dataset_dir, filename=LABELS_FILENAME):
115 | """Reads the labels file and returns a mapping from ID to class name.
116 |
117 | Args:
118 | dataset_dir: The directory in which the labels file is found.
119 | filename: The filename where the class names are written.
120 |
121 | Returns:
122 | A map from a label (integer) to class name.
123 | """
124 | labels_filename = os.path.join(dataset_dir, filename)
125 | with tf.gfile.Open(labels_filename, 'rb') as f:
126 | lines = f.read()
127 | lines = lines.split(b'\n')
128 | lines = filter(None, lines)
129 |
130 | labels_to_class_names = {}
131 | for line in lines:
132 | index = line.index(b':')
133 | labels_to_class_names[int(line[:index])] = line[index+1:]
134 | return labels_to_class_names
135 |
--------------------------------------------------------------------------------
/datasets/pascalvoc_2007.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 Paul Balanca. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Provides data for the Pascal VOC Dataset (images + annotations).
16 | """
17 | import tensorflow as tf
18 | from datasets import pascalvoc_common
19 |
20 | slim = tf.contrib.slim
21 |
22 | FILE_PATTERN = 'voc_2007_%s_*.tfrecord'
23 | ITEMS_TO_DESCRIPTIONS = {
24 | 'image': 'A color image of varying height and width.',
25 | 'shape': 'Shape of the image',
26 | 'object/bbox': 'A list of bounding boxes, one per each object.',
27 | 'object/label': 'A list of labels, one per each object.',
28 | }
29 | # (Images, Objects) statistics on every class.
30 | TRAIN_STATISTICS = {
31 | 'none': (0, 0),
32 | 'aeroplane': (238, 306),
33 | 'bicycle': (243, 353),
34 | 'bird': (330, 486),
35 | 'boat': (181, 290),
36 | 'bottle': (244, 505),
37 | 'bus': (186, 229),
38 | 'car': (713, 1250),
39 | 'cat': (337, 376),
40 | 'chair': (445, 798),
41 | 'cow': (141, 259),
42 | 'diningtable': (200, 215),
43 | 'dog': (421, 510),
44 | 'horse': (287, 362),
45 | 'motorbike': (245, 339),
46 | 'person': (2008, 4690),
47 | 'pottedplant': (245, 514),
48 | 'sheep': (96, 257),
49 | 'sofa': (229, 248),
50 | 'train': (261, 297),
51 | 'tvmonitor': (256, 324),
52 | 'total': (5011, 12608),
53 | }
54 | TEST_STATISTICS = {
55 | 'none': (0, 0),
56 | 'aeroplane': (1, 1),
57 | 'bicycle': (1, 1),
58 | 'bird': (1, 1),
59 | 'boat': (1, 1),
60 | 'bottle': (1, 1),
61 | 'bus': (1, 1),
62 | 'car': (1, 1),
63 | 'cat': (1, 1),
64 | 'chair': (1, 1),
65 | 'cow': (1, 1),
66 | 'diningtable': (1, 1),
67 | 'dog': (1, 1),
68 | 'horse': (1, 1),
69 | 'motorbike': (1, 1),
70 | 'person': (1, 1),
71 | 'pottedplant': (1, 1),
72 | 'sheep': (1, 1),
73 | 'sofa': (1, 1),
74 | 'train': (1, 1),
75 | 'tvmonitor': (1, 1),
76 | 'total': (20, 20),
77 | }
78 | SPLITS_TO_SIZES = {
79 | 'train': 5011,
80 | 'test': 4952,
81 | }
82 | SPLITS_TO_STATISTICS = {
83 | 'train': TRAIN_STATISTICS,
84 | 'test': TEST_STATISTICS,
85 | }
86 | NUM_CLASSES = 20
87 |
88 |
89 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
90 | """Gets a dataset tuple with instructions for reading ImageNet.
91 |
92 | Args:
93 | split_name: A train/test split name.
94 | dataset_dir: The base directory of the dataset sources.
95 | file_pattern: The file pattern to use when matching the dataset sources.
96 | It is assumed that the pattern contains a '%s' string so that the split
97 | name can be inserted.
98 | reader: The TensorFlow reader type.
99 |
100 | Returns:
101 | A `Dataset` namedtuple.
102 |
103 | Raises:
104 | ValueError: if `split_name` is not a valid train/test split.
105 | """
106 | if not file_pattern:
107 | file_pattern = FILE_PATTERN
108 | return pascalvoc_common.get_split(split_name, dataset_dir,
109 | file_pattern, reader,
110 | SPLITS_TO_SIZES,
111 | ITEMS_TO_DESCRIPTIONS,
112 | NUM_CLASSES)
113 |
--------------------------------------------------------------------------------
/datasets/pascalvoc_2012.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 Paul Balanca. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Provides data for the Pascal VOC Dataset (images + annotations).
16 | """
17 | import tensorflow as tf
18 | from datasets import pascalvoc_common
19 |
20 | slim = tf.contrib.slim
21 |
22 | FILE_PATTERN = 'voc_2012_%s_*.tfrecord'
23 | ITEMS_TO_DESCRIPTIONS = {
24 | 'image': 'A color image of varying height and width.',
25 | 'shape': 'Shape of the image',
26 | 'object/bbox': 'A list of bounding boxes, one per each object.',
27 | 'object/label': 'A list of labels, one per each object.',
28 | }
29 | # (Images, Objects) statistics on every class.
30 | TRAIN_STATISTICS = {
31 | 'none': (0, 0),
32 | 'aeroplane': (670, 865),
33 | 'bicycle': (552, 711),
34 | 'bird': (765, 1119),
35 | 'boat': (508, 850),
36 | 'bottle': (706, 1259),
37 | 'bus': (421, 593),
38 | 'car': (1161, 2017),
39 | 'cat': (1080, 1217),
40 | 'chair': (1119, 2354),
41 | 'cow': (303, 588),
42 | 'diningtable': (538, 609),
43 | 'dog': (1286, 1515),
44 | 'horse': (482, 710),
45 | 'motorbike': (526, 713),
46 | 'person': (4087, 8566),
47 | 'pottedplant': (527, 973),
48 | 'sheep': (325, 813),
49 | 'sofa': (507, 566),
50 | 'train': (544, 628),
51 | 'tvmonitor': (575, 784),
52 | 'total': (11540, 27450),
53 | }
54 | SPLITS_TO_SIZES = {
55 | 'train': 17125,
56 | }
57 | SPLITS_TO_STATISTICS = {
58 | 'train': TRAIN_STATISTICS,
59 | }
60 | NUM_CLASSES = 20
61 |
62 |
63 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
64 | """Gets a dataset tuple with instructions for reading ImageNet.
65 |
66 | Args:
67 | split_name: A train/test split name.
68 | dataset_dir: The base directory of the dataset sources.
69 | file_pattern: The file pattern to use when matching the dataset sources.
70 | It is assumed that the pattern contains a '%s' string so that the split
71 | name can be inserted.
72 | reader: The TensorFlow reader type.
73 |
74 | Returns:
75 | A `Dataset` namedtuple.
76 |
77 | Raises:
78 | ValueError: if `split_name` is not a valid train/test split.
79 | """
80 | if not file_pattern:
81 | file_pattern = FILE_PATTERN
82 | return pascalvoc_common.get_split(split_name, dataset_dir,
83 | file_pattern, reader,
84 | SPLITS_TO_SIZES,
85 | ITEMS_TO_DESCRIPTIONS,
86 | NUM_CLASSES)
87 |
88 |
--------------------------------------------------------------------------------
/datasets/pascalvoc_common.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 Paul Balanca. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Provides data for the Pascal VOC Dataset (images + annotations).
16 | """
17 | import os
18 |
19 | import tensorflow as tf
20 | from datasets import dataset_utils
21 |
22 | slim = tf.contrib.slim
23 |
24 | VOC_LABELS = {
25 | 'none': (0, 'Background'),
26 | 'aeroplane': (1, 'Vehicle'),
27 | 'bicycle': (2, 'Vehicle'),
28 | 'bird': (3, 'Animal'),
29 | 'boat': (4, 'Vehicle'),
30 | 'bottle': (5, 'Indoor'),
31 | 'bus': (6, 'Vehicle'),
32 | 'car': (7, 'Vehicle'),
33 | 'cat': (8, 'Animal'),
34 | 'chair': (9, 'Indoor'),
35 | 'cow': (10, 'Animal'),
36 | 'diningtable': (11, 'Indoor'),
37 | 'dog': (12, 'Animal'),
38 | 'horse': (13, 'Animal'),
39 | 'motorbike': (14, 'Vehicle'),
40 | 'person': (15, 'Person'),
41 | 'pottedplant': (16, 'Indoor'),
42 | 'sheep': (17, 'Animal'),
43 | 'sofa': (18, 'Indoor'),
44 | 'train': (19, 'Vehicle'),
45 | 'tvmonitor': (20, 'Indoor'),
46 | }
47 |
48 |
49 | def get_split(split_name, dataset_dir, file_pattern, reader,
50 | split_to_sizes, items_to_descriptions, num_classes):
51 | """Gets a dataset tuple with instructions for reading Pascal VOC dataset.
52 |
53 | Args:
54 | split_name: A train/test split name.
55 | dataset_dir: The base directory of the dataset sources.
56 | file_pattern: The file pattern to use when matching the dataset sources.
57 | It is assumed that the pattern contains a '%s' string so that the split
58 | name can be inserted.
59 | reader: The TensorFlow reader type.
60 |
61 | Returns:
62 | A `Dataset` namedtuple.
63 |
64 | Raises:
65 | ValueError: if `split_name` is not a valid train/test split.
66 | """
67 | if split_name not in split_to_sizes:
68 | raise ValueError('split name %s was not recognized.' % split_name)
69 | file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
70 |
71 | # Allowing None in the signature so that dataset_factory can use the default.
72 | if reader is None:
73 | reader = tf.TFRecordReader
74 | # Features in Pascal VOC TFRecords.
75 | keys_to_features = {
76 | 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
77 | 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
78 | 'image/height': tf.FixedLenFeature([1], tf.int64),
79 | 'image/width': tf.FixedLenFeature([1], tf.int64),
80 | 'image/channels': tf.FixedLenFeature([1], tf.int64),
81 | 'image/shape': tf.FixedLenFeature([3], tf.int64),
82 | 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
83 | 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
84 | 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
85 | 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
86 | 'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
87 | 'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64),
88 | 'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64),
89 | }
90 | items_to_handlers = {
91 | 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
92 | 'shape': slim.tfexample_decoder.Tensor('image/shape'),
93 | 'object/bbox': slim.tfexample_decoder.BoundingBox(
94 | ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'),
95 | 'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'),
96 | 'object/difficult': slim.tfexample_decoder.Tensor('image/object/bbox/difficult'),
97 | 'object/truncated': slim.tfexample_decoder.Tensor('image/object/bbox/truncated'),
98 | }
99 | decoder = slim.tfexample_decoder.TFExampleDecoder(
100 | keys_to_features, items_to_handlers)
101 |
102 | labels_to_names = None
103 | if dataset_utils.has_labels(dataset_dir):
104 | labels_to_names = dataset_utils.read_label_file(dataset_dir)
105 | # else:
106 | # labels_to_names = create_readable_names_for_imagenet_labels()
107 | # dataset_utils.write_label_file(labels_to_names, dataset_dir)
108 |
109 | return slim.dataset.Dataset(
110 | data_sources=file_pattern,
111 | reader=reader,
112 | decoder=decoder,
113 | num_samples=split_to_sizes[split_name],
114 | items_to_descriptions=items_to_descriptions,
115 | num_classes=num_classes,
116 | labels_to_names=labels_to_names)
117 |
--------------------------------------------------------------------------------
/datasets/pascalvoc_to_tfrecords.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 Paul Balanca. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Converts Pascal VOC data to TFRecords file format with Example protos.
16 |
17 | The raw Pascal VOC data set is expected to reside in JPEG files located in the
18 | directory 'JPEGImages'. Similarly, bounding box annotations are supposed to be
19 | stored in the 'Annotation directory'
20 |
21 | This TensorFlow script converts the training and evaluation data into
22 | a sharded data set consisting of 1024 and 128 TFRecord files, respectively.
23 |
24 | Each validation TFRecord file contains ~500 records. Each training TFREcord
25 | file contains ~1000 records. Each record within the TFRecord file is a
26 | serialized Example proto. The Example proto contains the following fields:
27 |
28 | image/encoded: string containing JPEG encoded image in RGB colorspace
29 | image/height: integer, image height in pixels
30 | image/width: integer, image width in pixels
31 | image/channels: integer, specifying the number of channels, always 3
32 | image/format: string, specifying the format, always'JPEG'
33 |
34 |
35 | image/object/bbox/xmin: list of float specifying the 0+ human annotated
36 | bounding boxes
37 | image/object/bbox/xmax: list of float specifying the 0+ human annotated
38 | bounding boxes
39 | image/object/bbox/ymin: list of float specifying the 0+ human annotated
40 | bounding boxes
41 | image/object/bbox/ymax: list of float specifying the 0+ human annotated
42 | bounding boxes
43 | image/object/bbox/label: list of integer specifying the classification index.
44 | image/object/bbox/label_text: list of string descriptions.
45 |
46 | Note that the length of xmin is identical to the length of xmax, ymin and ymax
47 | for each example.
48 | """
49 | import os
50 | import sys
51 | import random
52 |
53 | import numpy as np
54 | import tensorflow as tf
55 |
56 | import xml.etree.ElementTree as ET
57 |
58 | from datasets.dataset_utils import int64_feature, float_feature, bytes_feature
59 | from datasets.pascalvoc_common import VOC_LABELS
60 |
61 | # Original dataset organisation.
62 | DIRECTORY_ANNOTATIONS = 'Annotations/'
63 | DIRECTORY_IMAGES = 'JPEGImages/'
64 |
65 | # TFRecords convertion parameters.
66 | RANDOM_SEED = 4242
67 | SAMPLES_PER_FILES = 200
68 |
69 |
70 | def _process_image(directory, name):
71 | """Process a image and annotation file.
72 |
73 | Args:
74 | filename: string, path to an image file e.g., '/path/to/example.JPG'.
75 | coder: instance of ImageCoder to provide TensorFlow image coding utils.
76 | Returns:
77 | image_buffer: string, JPEG encoding of RGB image.
78 | height: integer, image height in pixels.
79 | width: integer, image width in pixels.
80 | """
81 | # Read the image file.
82 | filename = directory + DIRECTORY_IMAGES + name + '.jpg'
83 | image_data = tf.gfile.FastGFile(filename, 'r').read()
84 |
85 | # Read the XML annotation file.
86 | filename = os.path.join(directory, DIRECTORY_ANNOTATIONS, name + '.xml')
87 | tree = ET.parse(filename)
88 | root = tree.getroot()
89 |
90 | # Image shape.
91 | size = root.find('size')
92 | shape = [int(size.find('height').text),
93 | int(size.find('width').text),
94 | int(size.find('depth').text)]
95 | # Find annotations.
96 | bboxes = []
97 | labels = []
98 | labels_text = []
99 | difficult = []
100 | truncated = []
101 | for obj in root.findall('object'):
102 | label = obj.find('name').text
103 | labels.append(int(VOC_LABELS[label][0]))
104 | labels_text.append(label.encode('ascii'))
105 |
106 | if obj.find('difficult'):
107 | difficult.append(int(obj.find('difficult').text))
108 | else:
109 | difficult.append(0)
110 | if obj.find('truncated'):
111 | truncated.append(int(obj.find('truncated').text))
112 | else:
113 | truncated.append(0)
114 |
115 | bbox = obj.find('bndbox')
116 | bboxes.append((float(bbox.find('ymin').text) / shape[0],
117 | float(bbox.find('xmin').text) / shape[1],
118 | float(bbox.find('ymax').text) / shape[0],
119 | float(bbox.find('xmax').text) / shape[1]
120 | ))
121 | return image_data, shape, bboxes, labels, labels_text, difficult, truncated
122 |
123 |
124 | def _convert_to_example(image_data, labels, labels_text, bboxes, shape,
125 | difficult, truncated):
126 | """Build an Example proto for an image example.
127 |
128 | Args:
129 | image_data: string, JPEG encoding of RGB image;
130 | labels: list of integers, identifier for the ground truth;
131 | labels_text: list of strings, human-readable labels;
132 | bboxes: list of bounding boxes; each box is a list of integers;
133 | specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong
134 | to the same label as the image label.
135 | shape: 3 integers, image shapes in pixels.
136 | Returns:
137 | Example proto
138 | """
139 | xmin = []
140 | ymin = []
141 | xmax = []
142 | ymax = []
143 | for b in bboxes:
144 | assert len(b) == 4
145 | # pylint: disable=expression-not-assigned
146 | [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)]
147 | # pylint: enable=expression-not-assigned
148 |
149 | image_format = b'JPEG'
150 | example = tf.train.Example(features=tf.train.Features(feature={
151 | 'image/height': int64_feature(shape[0]),
152 | 'image/width': int64_feature(shape[1]),
153 | 'image/channels': int64_feature(shape[2]),
154 | 'image/shape': int64_feature(shape),
155 | 'image/object/bbox/xmin': float_feature(xmin),
156 | 'image/object/bbox/xmax': float_feature(xmax),
157 | 'image/object/bbox/ymin': float_feature(ymin),
158 | 'image/object/bbox/ymax': float_feature(ymax),
159 | 'image/object/bbox/label': int64_feature(labels),
160 | 'image/object/bbox/label_text': bytes_feature(labels_text),
161 | 'image/object/bbox/difficult': int64_feature(difficult),
162 | 'image/object/bbox/truncated': int64_feature(truncated),
163 | 'image/format': bytes_feature(image_format),
164 | 'image/encoded': bytes_feature(image_data)}))
165 | return example
166 |
167 |
168 | def _add_to_tfrecord(dataset_dir, name, tfrecord_writer):
169 | """Loads data from image and annotations files and add them to a TFRecord.
170 |
171 | Args:
172 | dataset_dir: Dataset directory;
173 | name: Image name to add to the TFRecord;
174 | tfrecord_writer: The TFRecord writer to use for writing.
175 | """
176 | image_data, shape, bboxes, labels, labels_text, difficult, truncated = \
177 | _process_image(dataset_dir, name)
178 | example = _convert_to_example(image_data, labels, labels_text,
179 | bboxes, shape, difficult, truncated)
180 | tfrecord_writer.write(example.SerializeToString())
181 |
182 |
183 | def _get_output_filename(output_dir, name, idx):
184 | return '%s/%s_%03d.tfrecord' % (output_dir, name, idx)
185 |
186 |
187 | def run(dataset_dir, output_dir, name='voc_train', shuffling=False):
188 | """Runs the conversion operation.
189 |
190 | Args:
191 | dataset_dir: The dataset directory where the dataset is stored.
192 | output_dir: Output directory.
193 | """
194 | if not tf.gfile.Exists(dataset_dir): #判断路径是否存在
195 | tf.gfile.MakeDirs(dataset_dir) #创建一个目录
196 |
197 | # Dataset filenames, and shuffling.
198 | path = os.path.join(dataset_dir, DIRECTORY_ANNOTATIONS)
199 | filenames = sorted(os.listdir(path)) #sorted是排序函数
200 | if shuffling:
201 | random.seed(RANDOM_SEED)
202 | random.shuffle(filenames)
203 |
204 | # Process dataset files.
205 | i = 0
206 | fidx = 0
207 | while i < len(filenames):
208 | # Open new TFRecord file.
209 | tf_filename = _get_output_filename(output_dir, name, fidx)
210 | with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer: #tf.python_io模块是tensorflow用来处理tfrecords文件的接口,TFRecordWriter是将记录写入TFRecords文件的类
211 | j = 0
212 | while i < len(filenames) and j < SAMPLES_PER_FILES:
213 | sys.stdout.write('\r>> Converting image %d/%d' % (i+1, len(filenames)))
214 | sys.stdout.flush()# 强制刷新缓冲区 这两行不会生成多行报告 而是在一行不断刷新
215 |
216 | filename = filenames[i]
217 | img_name = filename[:-4]
218 | _add_to_tfrecord(dataset_dir, img_name, tfrecord_writer)
219 | i += 1
220 | j += 1
221 | fidx += 1
222 |
223 | # Finally, write the labels file:
224 | # labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))
225 | # dataset_utils.write_label_file(labels_to_class_names, dataset_dir)
226 | print('\nFinished converting the Pascal VOC dataset!')
227 |
--------------------------------------------------------------------------------
/eval_ssd_network.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Generic evaluation script that evaluates a SSD model
16 | on a given dataset."""
17 | import math
18 | import sys
19 | import six
20 | import time
21 |
22 | import numpy as np
23 | import tensorflow as tf
24 | import tf_extended as tfe
25 | import tf_utils
26 | from tensorflow.python.framework import ops
27 |
28 | from datasets import dataset_factory
29 | from nets import nets_factory
30 | from preprocessing import preprocessing_factory
31 |
32 | slim = tf.contrib.slim
33 |
34 | # =========================================================================== #
35 | # Some default EVAL parameters
36 | # =========================================================================== #
37 | # List of recalls values at which precision is evaluated.
38 | LIST_RECALLS = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.85,
39 | 0.90, 0.95, 0.96, 0.97, 0.98, 0.99]
40 | DATA_FORMAT = 'NHWC'
41 |
42 | # =========================================================================== #
43 | # SSD evaluation Flags.
44 | # =========================================================================== #
45 | tf.app.flags.DEFINE_float(
46 | 'select_threshold', 0.01, 'Selection threshold.')
47 | tf.app.flags.DEFINE_integer(
48 | 'select_top_k', 400, 'Select top-k detected bounding boxes.')
49 | tf.app.flags.DEFINE_integer(
50 | 'keep_top_k', 200, 'Keep top-k detected objects.')
51 | tf.app.flags.DEFINE_float(
52 | 'nms_threshold', 0.45, 'Non-Maximum Selection threshold.')
53 | tf.app.flags.DEFINE_float(
54 | 'matching_threshold', 0.5, 'Matching threshold with groundtruth objects.')
55 | tf.app.flags.DEFINE_integer(
56 | 'eval_resize', 4, 'Image resizing: None / CENTRAL_CROP / PAD_AND_RESIZE / WARP_RESIZE.')
57 | tf.app.flags.DEFINE_integer(
58 | 'eval_image_size', None, 'Eval image size.')
59 | tf.app.flags.DEFINE_boolean(
60 | 'remove_difficult', True, 'Remove difficult objects from evaluation.')
61 |
62 | # =========================================================================== #
63 | # Main evaluation flags.
64 | # =========================================================================== #
65 | tf.app.flags.DEFINE_integer(
66 | 'num_classes', 21, 'Number of classes to use in the dataset.')
67 | tf.app.flags.DEFINE_integer(
68 | 'batch_size', 1, 'The number of samples in each batch.')
69 | tf.app.flags.DEFINE_integer(
70 | 'max_num_batches', None,
71 | 'Max number of batches to evaluate by default use all.')
72 | tf.app.flags.DEFINE_string(
73 | 'master', '', 'The address of the TensorFlow master to use.')
74 | tf.app.flags.DEFINE_string(
75 | 'checkpoint_path', '/tmp/tfmodel/',
76 | 'The directory where the model was written to or an absolute path to a '
77 | 'checkpoint file.')
78 | tf.app.flags.DEFINE_string(
79 | 'eval_dir', '/tmp/tfmodel/', 'Directory where the results are saved to.')
80 | tf.app.flags.DEFINE_integer(
81 | 'num_preprocessing_threads', 4,
82 | 'The number of threads used to create the batches.')
83 | tf.app.flags.DEFINE_string(
84 | 'dataset_name', 'imagenet', 'The name of the dataset to load.')
85 | tf.app.flags.DEFINE_string(
86 | 'dataset_split_name', 'test', 'The name of the train/test split.')
87 | tf.app.flags.DEFINE_string(
88 | 'dataset_dir', None, 'The directory where the dataset files are stored.')
89 | tf.app.flags.DEFINE_string(
90 | 'model_name', 'inception_v3', 'The name of the architecture to evaluate.')
91 | tf.app.flags.DEFINE_string(
92 | 'preprocessing_name', None, 'The name of the preprocessing to use. If left '
93 | 'as `None`, then the model_name flag is used.')
94 | tf.app.flags.DEFINE_float(
95 | 'moving_average_decay', None,
96 | 'The decay to use for the moving average.'
97 | 'If left as None, then moving averages are not used.')
98 | tf.app.flags.DEFINE_float(
99 | 'gpu_memory_fraction', 0.1, 'GPU memory fraction to use.')
100 | tf.app.flags.DEFINE_boolean(
101 | 'wait_for_checkpoints', False, 'Wait for new checkpoints in the eval loop.')
102 |
103 |
104 | FLAGS = tf.app.flags.FLAGS
105 |
106 |
107 | def main(_):
108 | if not FLAGS.dataset_dir:
109 | raise ValueError('You must supply the dataset directory with --dataset_dir')
110 |
111 | tf.logging.set_verbosity(tf.logging.INFO)
112 | with tf.Graph().as_default():
113 | tf_global_step = slim.get_or_create_global_step()
114 |
115 | # =================================================================== #
116 | # Dataset + SSD model + Pre-processing
117 | # =================================================================== #
118 | dataset = dataset_factory.get_dataset(
119 | FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)
120 |
121 | # Get the SSD network and its anchors.
122 | ssd_class = nets_factory.get_network(FLAGS.model_name)
123 | ssd_params = ssd_class.default_params._replace(num_classes=FLAGS.num_classes)
124 | ssd_net = ssd_class(ssd_params)
125 |
126 | # Evaluation shape and associated anchors: eval_image_size
127 | ssd_shape = ssd_net.params.img_shape
128 | ssd_anchors = ssd_net.anchors(ssd_shape)
129 |
130 | # Select the preprocessing function.
131 | preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
132 | image_preprocessing_fn = preprocessing_factory.get_preprocessing(
133 | preprocessing_name, is_training=False)
134 |
135 | tf_utils.print_configuration(FLAGS.__flags, ssd_params,
136 | dataset.data_sources, FLAGS.eval_dir)
137 | # =================================================================== #
138 | # Create a dataset provider and batches.
139 | # =================================================================== #
140 | with tf.device('/cpu:0'):
141 | with tf.name_scope(FLAGS.dataset_name + '_data_provider'):
142 | provider = slim.dataset_data_provider.DatasetDataProvider(
143 | dataset,
144 | common_queue_capacity=2 * FLAGS.batch_size,
145 | common_queue_min=FLAGS.batch_size,
146 | shuffle=False)
147 | # Get for SSD network: image, labels, bboxes.
148 | [image, shape, glabels, gbboxes] = provider.get(['image', 'shape',
149 | 'object/label',
150 | 'object/bbox'])
151 | if FLAGS.remove_difficult:
152 | [gdifficults] = provider.get(['object/difficult'])
153 | else:
154 | gdifficults = tf.zeros(tf.shape(glabels), dtype=tf.int64)
155 |
156 | # Pre-processing image, labels and bboxes.
157 | image, glabels, gbboxes, gbbox_img = \
158 | image_preprocessing_fn(image, glabels, gbboxes,
159 | out_shape=ssd_shape,
160 | data_format=DATA_FORMAT,
161 | resize=FLAGS.eval_resize,
162 | difficults=None)
163 |
164 | # Encode groundtruth labels and bboxes.
165 | gclasses, glocalisations, gscores = \
166 | ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors)
167 | batch_shape = [1] * 5 + [len(ssd_anchors)] * 3
168 |
169 | # Evaluation batch.
170 | r = tf.train.batch(
171 | tf_utils.reshape_list([image, glabels, gbboxes, gdifficults, gbbox_img,
172 | gclasses, glocalisations, gscores]),
173 | batch_size=FLAGS.batch_size,
174 | num_threads=FLAGS.num_preprocessing_threads,
175 | capacity=5 * FLAGS.batch_size,
176 | dynamic_pad=True)
177 | (b_image, b_glabels, b_gbboxes, b_gdifficults, b_gbbox_img, b_gclasses,
178 | b_glocalisations, b_gscores) = tf_utils.reshape_list(r, batch_shape)
179 |
180 | # =================================================================== #
181 | # SSD Network + Ouputs decoding.
182 | # =================================================================== #
183 | dict_metrics = {}
184 | arg_scope = ssd_net.arg_scope(data_format=DATA_FORMAT)
185 | with slim.arg_scope(arg_scope):
186 | predictions, localisations, logits, end_points = \
187 | ssd_net.net(b_image, is_training=False)
188 | # Add losses functions.
189 | ssd_net.losses(logits, localisations,
190 | b_gclasses, b_glocalisations, b_gscores)
191 |
192 | # Performing post-processing on CPU: loop-intensive, usually more efficient.
193 | with tf.device('/device:CPU:0'):
194 | # Detected objects from SSD output.
195 | localisations = ssd_net.bboxes_decode(localisations, ssd_anchors)
196 | rscores, rbboxes = \
197 | ssd_net.detected_bboxes(predictions, localisations,
198 | select_threshold=FLAGS.select_threshold,
199 | nms_threshold=FLAGS.nms_threshold,
200 | clipping_bbox=None,
201 | top_k=FLAGS.select_top_k,
202 | keep_top_k=FLAGS.keep_top_k)
203 | # Compute TP and FP statistics.
204 | num_gbboxes, tp, fp, rscores = \
205 | tfe.bboxes_matching_batch(rscores.keys(), rscores, rbboxes,
206 | b_glabels, b_gbboxes, b_gdifficults,
207 | matching_threshold=FLAGS.matching_threshold)
208 |
209 | # Variables to restore: moving avg. or normal weights.
210 | if FLAGS.moving_average_decay:
211 | variable_averages = tf.train.ExponentialMovingAverage(
212 | FLAGS.moving_average_decay, tf_global_step)
213 | variables_to_restore = variable_averages.variables_to_restore(
214 | slim.get_model_variables())
215 | variables_to_restore[tf_global_step.op.name] = tf_global_step
216 | else:
217 | variables_to_restore = slim.get_variables_to_restore()
218 |
219 | # =================================================================== #
220 | # Evaluation metrics.
221 | # =================================================================== #
222 | with tf.device('/device:CPU:0'):
223 | dict_metrics = {}
224 | # First add all losses.
225 | for loss in tf.get_collection(tf.GraphKeys.LOSSES):
226 | dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss)
227 | # Extra losses as well.
228 | for loss in tf.get_collection('EXTRA_LOSSES'):
229 | dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss)
230 |
231 | # Add metrics to summaries and Print on screen.
232 | for name, metric in dict_metrics.items():
233 | # summary_name = 'eval/%s' % name
234 | summary_name = name
235 | op = tf.summary.scalar(summary_name, metric[0], collections=[])
236 | # op = tf.Print(op, [metric[0]], summary_name)
237 | tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
238 |
239 | # FP and TP metrics.
240 | tp_fp_metric = tfe.streaming_tp_fp_arrays(num_gbboxes, tp, fp, rscores)
241 | for c in tp_fp_metric[0].keys():
242 | dict_metrics['tp_fp_%s' % c] = (tp_fp_metric[0][c],
243 | tp_fp_metric[1][c])
244 |
245 | # Add to summaries precision/recall values.
246 | aps_voc07 = {}
247 | aps_voc12 = {}
248 | for c in tp_fp_metric[0].keys():
249 | # Precison and recall values.
250 | prec, rec = tfe.precision_recall(*tp_fp_metric[0][c])
251 |
252 | # Average precision VOC07.
253 | v = tfe.average_precision_voc07(prec, rec)
254 | summary_name = 'AP_VOC07/%s' % c
255 | op = tf.summary.scalar(summary_name, v, collections=[])
256 | # op = tf.Print(op, [v], summary_name)
257 | tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
258 | aps_voc07[c] = v
259 |
260 | # Average precision VOC12.
261 | v = tfe.average_precision_voc12(prec, rec)
262 | summary_name = 'AP_VOC12/%s' % c
263 | op = tf.summary.scalar(summary_name, v, collections=[])
264 | # op = tf.Print(op, [v], summary_name)
265 | tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
266 | aps_voc12[c] = v
267 |
268 | # Mean average precision VOC07.
269 | summary_name = 'AP_VOC07/mAP'
270 | mAP = tf.add_n(list(aps_voc07.values())) / len(aps_voc07)
271 | op = tf.summary.scalar(summary_name, mAP, collections=[])
272 | op = tf.Print(op, [mAP], summary_name)
273 | tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
274 |
275 | # Mean average precision VOC12.
276 | summary_name = 'AP_VOC12/mAP'
277 | mAP = tf.add_n(list(aps_voc12.values())) / len(aps_voc12)
278 | op = tf.summary.scalar(summary_name, mAP, collections=[])
279 | op = tf.Print(op, [mAP], summary_name)
280 | tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
281 |
282 | # for i, v in enumerate(l_precisions):
283 | # summary_name = 'eval/precision_at_recall_%.2f' % LIST_RECALLS[i]
284 | # op = tf.summary.scalar(summary_name, v, collections=[])
285 | # op = tf.Print(op, [v], summary_name)
286 | # tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
287 |
288 | # Split into values and updates ops.
289 | names_to_values, names_to_updates = slim.metrics.aggregate_metric_map(dict_metrics)
290 |
291 | # =================================================================== #
292 | # Evaluation loop.
293 | # =================================================================== #
294 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
295 | config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
296 | # config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
297 |
298 | # Number of batches...
299 | if FLAGS.max_num_batches:
300 | num_batches = FLAGS.max_num_batches
301 | else:
302 | num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size))
303 |
304 | if not FLAGS.wait_for_checkpoints:
305 | if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
306 | checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
307 | else:
308 | checkpoint_path = FLAGS.checkpoint_path
309 | tf.logging.info('Evaluating %s' % checkpoint_path)
310 |
311 | # Standard evaluation loop.
312 | start = time.time()
313 | slim.evaluation.evaluate_once(
314 | master=FLAGS.master,
315 | checkpoint_path=checkpoint_path,
316 | logdir=FLAGS.eval_dir,
317 | num_evals=num_batches,
318 | eval_op=list(names_to_updates.values()),
319 | variables_to_restore=variables_to_restore,
320 | session_config=config)
321 | # Log time spent.
322 | elapsed = time.time()
323 | elapsed = elapsed - start
324 | print('Time spent : %.3f seconds.' % elapsed)
325 | print('Time spent per BATCH: %.3f seconds.' % (elapsed / num_batches))
326 |
327 | else:
328 | checkpoint_path = FLAGS.checkpoint_path
329 | tf.logging.info('Evaluating %s' % checkpoint_path)
330 |
331 | # Waiting loop.
332 | slim.evaluation.evaluation_loop(
333 | master=FLAGS.master,
334 | checkpoint_dir=checkpoint_path,
335 | logdir=FLAGS.eval_dir,
336 | num_evals=num_batches,
337 | eval_op=list(names_to_updates.values()),
338 | variables_to_restore=variables_to_restore,
339 | eval_interval_secs=60,
340 | max_number_of_evaluations=np.inf,
341 | session_config=config,
342 | timeout=None)
343 |
344 |
345 | if __name__ == '__main__':
346 | tf.app.run()
347 |
--------------------------------------------------------------------------------
/nets/custom_layers.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | from tensorflow.contrib.framework.python.ops import add_arg_scope
4 | from tensorflow.contrib.layers.python.layers import initializers
5 | from tensorflow.contrib.framework.python.ops import variables
6 | from tensorflow.contrib.layers.python.layers import utils
7 | from tensorflow.python.ops import nn
8 | from tensorflow.python.ops import init_ops
9 | from tensorflow.python.ops import variable_scope
10 |
11 |
12 | def abs_smooth(x):
13 | """Smoothed absolute function. Useful to compute an L1 smooth error.
14 |
15 | Define as:
16 | x^2 / 2 if abs(x) < 1
17 | abs(x) - 0.5 if abs(x) > 1
18 | We use here a differentiable definition using min(x) and abs(x). Clearly
19 | not optimal, but good enough for our purpose!
20 | """
21 | absx = tf.abs(x)
22 | minx = tf.minimum(absx, 1)
23 | r = 0.5 * ((absx - 1) * minx + absx)
24 | return r
25 |
26 | @add_arg_scope
27 | def l2_normalization(
28 | inputs,
29 | scaling = False,
30 | scale_initializer = init_ops.ones_initializer(),
31 | reuse = None,
32 | variables_collections = None,
33 | outputs_collections = None,
34 | data_format = 'NHWC',
35 | trainable = True,
36 | scope = None
37 | ):
38 | """Implement L2 normalization on every feature (i.e. spatial normalization).
39 |
40 | Should be extended in some near future to other dimensions, providing a more
41 | flexible normalization framework.
42 |
43 | Args:
44 | inputs: a 4-D tensor with dimensions [batch_size, height, width, channels].
45 | scaling: whether or not to add a post scaling operation along the dimensions
46 | which have been normalized.
47 | scale_initializer: An initializer for the weights.
48 | reuse: whether or not the layer and its variables should be reused. To be
49 | able to reuse the layer scope must be given.
50 | variables_collections: optional list of collections for all the variables or
51 | a dictionary containing a different list of collection per variable.
52 | outputs_collections: collection to add the outputs.
53 | data_format: NHWC or NCHW data format.
54 | trainable: If `True` also add variables to the graph collection
55 | `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
56 | scope: Optional scope for `variable_scope`.
57 | Returns:
58 | A `Tensor` representing the output of the operation.
59 | """
60 | with variable_scope.variable_scope(
61 | scope, 'L2Normalization', [inputs], reuse = reuse) as sc:
62 | inputs_shape = inputs.get_shape()
63 | inputs_rank = inputs_shape.ndims
64 | dtype = inputs.dtype.base_dtype
65 | if data_format == 'NHWC':
66 | norm_dim = tf.range(inputs_rank - 1, inputs_rank)
67 | params_shape = inputs_shape[-1 : ]
68 | elif data_format == 'NCHW':
69 | norm_dim = tf.range(1, 2)
70 | params_shape = (inputs_shape[1])
71 |
72 | outputs = nn.l2_normalize(inputs, norm_dim, epsilon = 1e-12)
73 | if scaling:
74 | scale_collections = utils.get_variable_collections(
75 | variables_collections, 'scale')
76 | scale = variables.model_variable('gamma',
77 | shape=params_shape,
78 | dtype=dtype,
79 | initializer=scale_initializer,
80 | collections=scale_collections,
81 | trainable=trainable)
82 | if data_format == 'NHWC':
83 | outputs = tf.multiply(outputs, scale)
84 | elif data_format == 'NCHW':
85 | scale = tf.expand_dims(scale, axis=-1)
86 | scale = tf.expand_dims(scale, axis=-1)
87 | outputs = tf.multiply(outputs, scale)
88 | # outputs = tf.transpose(outputs, perm=(0, 2, 3, 1))
89 |
90 | return utils.collect_named_outputs(outputs_collections,
91 | sc.original_name_scope, outputs)
92 |
93 |
94 | @add_arg_scope
95 | def pad2d(inputs,
96 | pad=(0, 0),
97 | mode='CONSTANT',
98 | data_format='NHWC',
99 | trainable=True,
100 | scope=None):
101 | """2D Padding layer, adding a symmetric padding to H and W dimensions.
102 |
103 | Aims to mimic padding in Caffe and MXNet, helping the port of models to
104 | TensorFlow. Tries to follow the naming convention of `tf.contrib.layers`.
105 |
106 | Args:
107 | inputs: 4D input Tensor;
108 | pad: 2-Tuple with padding values for H and W dimensions;
109 | mode: Padding mode. C.f. `tf.pad`
110 | data_format: NHWC or NCHW data format.
111 | """
112 | with tf.name_scope(scope, 'pad2d', [inputs]):
113 | # Padding shape.
114 | if data_format == 'NHWC':
115 | paddings = [[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]]
116 | elif data_format == 'NCHW':
117 | paddings = [[0, 0], [0, 0], [pad[0], pad[0]], [pad[1], pad[1]]]
118 | net = tf.pad(inputs, paddings, mode=mode)
119 | return net
120 |
121 |
122 | @add_arg_scope
123 | def channel_to_last(inputs,
124 | data_format='NHWC',
125 | scope=None):
126 | """Move the channel axis to the last dimension. Allows to
127 | provide a single output format whatever the input data format.
128 |
129 | Args:
130 | inputs: Input Tensor;
131 | data_format: NHWC or NCHW.
132 | Return:
133 | Input in NHWC format.
134 | """
135 | with tf.name_scope(scope, 'channel_to_last', [inputs]):
136 | if data_format == 'NHWC':
137 | net = inputs
138 | elif data_format == 'NCHW':
139 | net = tf.transpose(inputs, perm=(0, 2, 3, 1))
140 | return net
--------------------------------------------------------------------------------
/nets/mobilenet_V2.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Implementation of Mobilenet V2.
16 | Architecture: https://arxiv.org/abs/1801.04381
17 | The base model gives 72.2% accuracy on ImageNet, with 300MMadds,
18 | 3.4 M parameters.
19 | """
20 |
21 | from __future__ import absolute_import
22 | from __future__ import division
23 | from __future__ import print_function
24 |
25 | import copy
26 | import functools
27 |
28 | import tensorflow as tf
29 |
30 | from nets import conv_blocks as ops
31 | from nets import mobilenet as lib
32 |
33 | slim = tf.contrib.slim
34 | op = lib.op
35 |
36 | expand_input = ops.expand_input_by_factor
37 |
38 | # pyformat: disable
39 | # Architecture: https://arxiv.org/abs/1801.04381
40 | V2_DEF = dict(
41 | defaults={
42 | # Note: these parameters of batch norm affect the architecture
43 | # that's why they are here and not in training_scope.
44 | (slim.batch_norm,): {'center': True, 'scale': True},
45 | (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
46 | 'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
47 | },
48 | (ops.expanded_conv,): {
49 | 'expansion_size': expand_input(6),
50 | 'split_expansion': 1,
51 | 'normalizer_fn': slim.batch_norm,
52 | 'residual': True
53 | },
54 | (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'}
55 | },
56 | spec=[
57 | op(slim.conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]),
58 | op(ops.expanded_conv,
59 | expansion_size=expand_input(1, divisible_by=1),
60 | num_outputs=16),
61 | op(ops.expanded_conv, stride=2, num_outputs=24),
62 | op(ops.expanded_conv, stride=1, num_outputs=24),
63 | op(ops.expanded_conv, stride=2, num_outputs=32),
64 | op(ops.expanded_conv, stride=1, num_outputs=32),
65 | op(ops.expanded_conv, stride=1, num_outputs=32),
66 | op(ops.expanded_conv, stride=2, num_outputs=64),
67 | op(ops.expanded_conv, stride=1, num_outputs=64),
68 | op(ops.expanded_conv, stride=1, num_outputs=64),
69 | op(ops.expanded_conv, stride=1, num_outputs=64),
70 | op(ops.expanded_conv, stride=1, num_outputs=96),
71 | op(ops.expanded_conv, stride=1, num_outputs=96),
72 | op(ops.expanded_conv, stride=1, num_outputs=96),
73 | op(ops.expanded_conv, stride=2, num_outputs=160),
74 | op(ops.expanded_conv, stride=1, num_outputs=160),
75 | op(ops.expanded_conv, stride=1, num_outputs=160),
76 | op(ops.expanded_conv, stride=1, num_outputs=320),
77 | #op(slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=1280)
78 | ],
79 | )
80 | # pyformat: enable
81 |
82 |
83 | @slim.add_arg_scope
84 | def mobilenet(input_tensor,
85 | num_classes=1001,
86 | depth_multiplier=1.0,
87 | scope='MobilenetV2',
88 | conv_defs=None,
89 | finegrain_classification_mode=False,
90 | min_depth=None,
91 | divisible_by=None,
92 | activation_fn=None,
93 | **kwargs):
94 | """Creates mobilenet V2 network.
95 | Inference mode is created by default. To create training use training_scope
96 | below.
97 | with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
98 | logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
99 | Args:
100 | input_tensor: The input tensor
101 | num_classes: number of classes
102 | depth_multiplier: The multiplier applied to scale number of
103 | channels in each layer. Note: this is called depth multiplier in the
104 | paper but the name is kept for consistency with slim's model builder.
105 | scope: Scope of the operator
106 | conv_defs: Allows to override default conv def.
107 | finegrain_classification_mode: When set to True, the model
108 | will keep the last layer large even for small multipliers. Following
109 | https://arxiv.org/abs/1801.04381
110 | suggests that it improves performance for ImageNet-type of problems.
111 | *Note* ignored if final_endpoint makes the builder exit earlier.
112 | min_depth: If provided, will ensure that all layers will have that
113 | many channels after application of depth multiplier.
114 | divisible_by: If provided will ensure that all layers # channels
115 | will be divisible by this number.
116 | activation_fn: Activation function to use, defaults to tf.nn.relu6 if not
117 | specified.
118 | **kwargs: passed directly to mobilenet.mobilenet:
119 | prediction_fn- what prediction function to use.
120 | reuse-: whether to reuse variables (if reuse set to true, scope
121 | must be given).
122 | Returns:
123 | logits/endpoints pair
124 | Raises:
125 | ValueError: On invalid arguments
126 | """
127 | if conv_defs is None:
128 | conv_defs = V2_DEF
129 | if 'multiplier' in kwargs:
130 | raise ValueError('mobilenetv2 doesn\'t support generic '
131 | 'multiplier parameter use "depth_multiplier" instead.')
132 | if finegrain_classification_mode:
133 | conv_defs = copy.deepcopy(conv_defs)
134 | if depth_multiplier < 1:
135 | conv_defs['spec'][-1].params['num_outputs'] /= depth_multiplier
136 | if activation_fn:
137 | conv_defs = copy.deepcopy(conv_defs)
138 | defaults = conv_defs['defaults']
139 | conv_defaults = (
140 | defaults[(slim.conv2d, slim.fully_connected, slim.separable_conv2d)])
141 | conv_defaults['activation_fn'] = activation_fn
142 |
143 | depth_args = {}
144 | # NB: do not set depth_args unless they are provided to avoid overriding
145 | # whatever default depth_multiplier might have thanks to arg_scope.
146 | if min_depth is not None:
147 | depth_args['min_depth'] = min_depth
148 | if divisible_by is not None:
149 | depth_args['divisible_by'] = divisible_by
150 |
151 | with slim.arg_scope((lib.depth_multiplier,), **depth_args):
152 | return lib.mobilenet(
153 | input_tensor,
154 | num_classes=num_classes,
155 | conv_defs=conv_defs,
156 | scope=scope,
157 | multiplier=depth_multiplier,
158 | **kwargs)
159 |
160 | mobilenet.default_image_size = 224
161 |
162 | def wrapped_partial(func, *args, **kwargs):
163 | partial_func = functools.partial(func, *args, **kwargs)
164 | functools.update_wrapper(partial_func, func)
165 | return partial_func
166 |
167 |
168 | # Wrappers for mobilenet v2 with depth-multipliers. Be noticed that
169 | # 'finegrain_classification_mode' is set to True, which means the embedding
170 | # layer will not be shrinked when given a depth-multiplier < 1.0.
171 | mobilenet_v2_140 = wrapped_partial(mobilenet, depth_multiplier=1.4)
172 | mobilenet_v2_050 = wrapped_partial(mobilenet, depth_multiplier=0.50,
173 | finegrain_classification_mode=True)
174 | mobilenet_v2_035 = wrapped_partial(mobilenet, depth_multiplier=0.35,
175 | finegrain_classification_mode=True)
176 |
177 |
178 | @slim.add_arg_scope
179 | def mobilenet_base(input_tensor, depth_multiplier=1.0, **kwargs):
180 | """Creates base of the mobilenet (no pooling and no logits) ."""
181 | return mobilenet(input_tensor,
182 | depth_multiplier=depth_multiplier,
183 | base_only=True, **kwargs)
184 |
185 |
186 | def training_scope(**kwargs):
187 | """Defines MobilenetV2 training scope.
188 | Usage:
189 | with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
190 | logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
191 | with slim.
192 | Args:
193 | **kwargs: Passed to mobilenet.training_scope. The following parameters
194 | are supported:
195 | weight_decay- The weight decay to use for regularizing the model.
196 | stddev- Standard deviation for initialization, if negative uses xavier.
197 | dropout_keep_prob- dropout keep probability
198 | bn_decay- decay for the batch norm moving averages.
199 | Returns:
200 | An `arg_scope` to use for the mobilenet v2 model.
201 | """
202 | return lib.training_scope(**kwargs)
203 |
204 |
205 | __all__ = ['training_scope', 'mobilenet_base', 'mobilenet', 'V2_DEF']
--------------------------------------------------------------------------------
/nets/nets_factory.py:
--------------------------------------------------------------------------------
1 | import functools
2 | import tensorflow as tf
3 |
4 | from nets import ssd_vgg_300
5 | #from nets import ssd_vgg_512
6 |
7 | slim = tf.contrib.slim
8 |
9 | networks_map = {#'vgg_a': vgg.vgg_a,
10 | #'vgg_16': vgg.vgg_16,
11 | #'vgg_19': vgg.vgg_19,
12 | 'ssd_300_vgg': ssd_vgg_300.ssd_net,
13 | #'ssd_512_vgg': ssd_vgg_512.ssd_net,
14 | }
15 |
16 | arg_scopes_map = {#'vgg_a': vgg.vgg_arg_scope,
17 | #'vgg_16': vgg.vgg_arg_scope,
18 | #'vgg_19': vgg.vgg_arg_scope,
19 | 'ssd_300_vgg': ssd_vgg_300.ssd_arg_scope,
20 | #'ssd_512_vgg': ssd_vgg_512.ssd_arg_scope,
21 | }
22 |
23 | networks_obj = {'ssd_300_vgg': ssd_vgg_300.SSDNet,
24 | #'ssd_512_vgg': ssd_vgg_512.SSDNet,
25 | }
26 |
27 | def get_network(name):
28 | return networks_obj[name]
29 |
30 | def get_network_fn(name, num_classes, is_training = False, **kwargs):
31 | """Returns a network_fn such as `logits, end_points = network_fn(images)`.
32 |
33 | Args:
34 | name: The name of the network.
35 | num_classes: The number of classes to use for classification.
36 | is_training: `True` if the model is being used for training and `False`
37 | otherwise.
38 | weight_decay: The l2 coefficient for the model weights.
39 | Returns:
40 | network_fn: A function that applies the model to a batch of images. It has
41 | the following signature: logits, end_points = network_fn(images)
42 | Raises:
43 | ValueError: If network `name` is not recognized.
44 | """
45 | if name not in networks_map:
46 | raise ValueError('Name of network unknown %s' % name)
47 | arg_scope = arg_scopes_map[name](**kwargs)
48 | func = networks_map[name]
49 |
50 | @functools.wraps(func)
51 | def network_fn(images, **kwargs):
52 | with slim.arg_scope(arg_scope):
53 | return func(images, num_classes, is_training = is_training, **kwargs)
54 | if hasattr(func, 'default_image_size'):
55 | network_fn.default_image_size = func.default_image_size
56 |
57 | return network_fn
58 |
--------------------------------------------------------------------------------
/nets/ssd_common.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | import tf_extended as tfe
4 |
5 | def tf_ssd_bboxes_encode_layer(labels,
6 | bboxes,
7 | anchors_layer,
8 | num_classes,
9 | no_annotation_label,
10 | ignore_threshold=0.5,
11 | prior_scaling=[0.1, 0.1, 0.2, 0.2],
12 | dtype=tf.float32):
13 | """Encode groundtruth labels and bounding boxes using SSD anchors from
14 | one layer.
15 |
16 | Arguments:
17 | labels: 1D Tensor(int64) containing groundtruth labels;
18 | bboxes: Nx4 Tensor(float) with bboxes relative coordinates;
19 | anchors_layer: Numpy array with layer anchors;
20 | matching_threshold: Threshold for positive match with groundtruth bboxes;
21 | prior_scaling: Scaling of encoded coordinates.
22 |
23 | Return:
24 | (target_labels, target_localizations, target_scores): Target Tensors.
25 | """
26 | # Anchors coordinates and volume.
27 | yref, xref, href, wref = anchors_layer
28 | ymin = yref - href / 2.
29 | xmin = xref - wref / 2.
30 | ymax = yref + href / 2.
31 | xmax = xref + wref / 2.
32 | vol_anchors = (xmax - xmin) * (ymax - ymin)
33 |
34 | # Initialize tensors...
35 | shape = (yref.shape[0], yref.shape[1], href.size)
36 | feat_labels = tf.zeros(shape, dtype=tf.int64)
37 | feat_scores = tf.zeros(shape, dtype=dtype)
38 |
39 | feat_ymin = tf.zeros(shape, dtype=dtype)
40 | feat_xmin = tf.zeros(shape, dtype=dtype)
41 | feat_ymax = tf.ones(shape, dtype=dtype)
42 | feat_xmax = tf.ones(shape, dtype=dtype)
43 |
44 | def jaccard_with_anchors(bbox):
45 | """Compute jaccard score between a box and the anchors.
46 | """
47 | int_ymin = tf.maximum(ymin, bbox[0])
48 | int_xmin = tf.maximum(xmin, bbox[1])
49 | int_ymax = tf.minimum(ymax, bbox[2])
50 | int_xmax = tf.minimum(xmax, bbox[3])
51 | h = tf.maximum(int_ymax - int_ymin, 0.)
52 | w = tf.maximum(int_xmax - int_xmin, 0.)
53 | # Volumes.
54 | inter_vol = h * w
55 | union_vol = vol_anchors - inter_vol \
56 | + (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
57 | jaccard = tf.div(inter_vol, union_vol)
58 | return jaccard
59 |
60 | def intersection_with_anchors(bbox):
61 | """Compute intersection between score a box and the anchors.
62 | """
63 | int_ymin = tf.maximum(ymin, bbox[0])
64 | int_xmin = tf.maximum(xmin, bbox[1])
65 | int_ymax = tf.minimum(ymax, bbox[2])
66 | int_xmax = tf.minimum(xmax, bbox[3])
67 | h = tf.maximum(int_ymax - int_ymin, 0.)
68 | w = tf.maximum(int_xmax - int_xmin, 0.)
69 | inter_vol = h * w
70 | scores = tf.div(inter_vol, vol_anchors)
71 | return scores
72 |
73 | def condition(i, feat_labels, feat_scores,
74 | feat_ymin, feat_xmin, feat_ymax, feat_xmax):
75 | """Condition: check label index.
76 | """
77 | r = tf.less(i, tf.shape(labels))
78 | return r[0]
79 |
80 | def body(i, feat_labels, feat_scores,
81 | feat_ymin, feat_xmin, feat_ymax, feat_xmax):
82 | """Body: update feature labels, scores and bboxes.
83 | Follow the original SSD paper for that purpose:
84 | - assign values when jaccard > 0.5;
85 | - only update if beat the score of other bboxes.
86 | """
87 | # Jaccard score.
88 | label = labels[i]
89 | bbox = bboxes[i]
90 | jaccard = jaccard_with_anchors(bbox)
91 | # Mask: check threshold + scores + no annotations + num_classes.
92 | mask = tf.greater(jaccard, feat_scores)
93 | # mask = tf.logical_and(mask, tf.greater(jaccard, matching_threshold))
94 | mask = tf.logical_and(mask, feat_scores > -0.5)
95 | mask = tf.logical_and(mask, label < num_classes)
96 | imask = tf.cast(mask, tf.int64)
97 | fmask = tf.cast(mask, dtype)
98 | # Update values using mask.
99 | feat_labels = imask * label + (1 - imask) * feat_labels
100 | feat_scores = tf.where(mask, jaccard, feat_scores)
101 |
102 | feat_ymin = fmask * bbox[0] + (1 - fmask) * feat_ymin
103 | feat_xmin = fmask * bbox[1] + (1 - fmask) * feat_xmin
104 | feat_ymax = fmask * bbox[2] + (1 - fmask) * feat_ymax
105 | feat_xmax = fmask * bbox[3] + (1 - fmask) * feat_xmax
106 |
107 | # Check no annotation label: ignore these anchors...
108 | # interscts = intersection_with_anchors(bbox)
109 | # mask = tf.logical_and(interscts > ignore_threshold,
110 | # label == no_annotation_label)
111 | # # Replace scores by -1.
112 | # feat_scores = tf.where(mask, -tf.cast(mask, dtype), feat_scores)
113 |
114 | return [i+1, feat_labels, feat_scores,
115 | feat_ymin, feat_xmin, feat_ymax, feat_xmax]
116 | # Main loop definition.
117 | i = 0
118 | [i, feat_labels, feat_scores,
119 | feat_ymin, feat_xmin,
120 | feat_ymax, feat_xmax] = tf.while_loop(condition, body,
121 | [i, feat_labels, feat_scores,
122 | feat_ymin, feat_xmin,
123 | feat_ymax, feat_xmax])
124 | # Transform to center / size.
125 | feat_cy = (feat_ymax + feat_ymin) / 2.
126 | feat_cx = (feat_xmax + feat_xmin) / 2.
127 | feat_h = feat_ymax - feat_ymin
128 | feat_w = feat_xmax - feat_xmin
129 | # Encode features.
130 | feat_cy = (feat_cy - yref) / href / prior_scaling[0]
131 | feat_cx = (feat_cx - xref) / wref / prior_scaling[1]
132 | feat_h = tf.log(feat_h / href) / prior_scaling[2]
133 | feat_w = tf.log(feat_w / wref) / prior_scaling[3]
134 | # Use SSD ordering: x / y / w / h instead of ours.
135 | feat_localizations = tf.stack([feat_cx, feat_cy, feat_w, feat_h], axis=-1)
136 | return feat_labels, feat_localizations, feat_scores
137 |
138 |
139 | def tf_ssd_bboxes_encode(labels,
140 | bboxes,
141 | anchors,
142 | num_classes,
143 | no_annotation_label,
144 | ignore_threshold=0.5,
145 | prior_scaling=[0.1, 0.1, 0.2, 0.2],
146 | dtype=tf.float32,
147 | scope='ssd_bboxes_encode'):
148 | """Encode groundtruth labels and bounding boxes using SSD net anchors.
149 | Encoding boxes for all feature layers.
150 |
151 | Arguments:
152 | labels: 1D Tensor(int64) containing groundtruth labels;
153 | bboxes: Nx4 Tensor(float) with bboxes relative coordinates;
154 | anchors: List of Numpy array with layer anchors;
155 | matching_threshold: Threshold for positive match with groundtruth bboxes;
156 | prior_scaling: Scaling of encoded coordinates.
157 |
158 | Return:
159 | (target_labels, target_localizations, target_scores):
160 | Each element is a list of target Tensors.
161 | """
162 | with tf.name_scope(scope):
163 | target_labels = []
164 | target_localizations = []
165 | target_scores = []
166 | for i, anchors_layer in enumerate(anchors):
167 | with tf.name_scope('bboxes_encode_block_%i' % i):
168 | t_labels, t_loc, t_scores = \
169 | tf_ssd_bboxes_encode_layer(labels, bboxes, anchors_layer,
170 | num_classes, no_annotation_label,
171 | ignore_threshold,
172 | prior_scaling, dtype)
173 | target_labels.append(t_labels)
174 | target_localizations.append(t_loc)
175 | target_scores.append(t_scores)
176 | return target_labels, target_localizations, target_scores
177 |
178 | def ssd_bboxes_select(predictions_net,
179 | localizations_net,
180 | anchors_net,
181 | select_threshold = 0.5,
182 | img_shape = (300, 300),
183 | num_classes = 21,
184 | decode = True):
185 | """Extract classes, scores and bounding boxes from network output layers.
186 |
187 | Return:
188 | classes, scores, bboxes: Numpy arrays...
189 | """
190 | l_classes = []
191 | l_scores = []
192 | l_bboxes = []
193 |
194 | for i in range(len(predictions_net)):
195 | classes, scores, bboxes = ssd_bboxes_select_layer(
196 | predictions_net[i], localizations_net[i], anchors_net[i],
197 | select_threshold, img_shape, num_classes, decode
198 | )
199 | l_classes.append(classes)
200 | l_scores.append(scores)
201 | l_bboxes.append(bboxes)
202 |
203 | classes = np.concatenate(l_classes, 0)
204 | scores = np.concatenate(l_scores, 0)
205 | bboxes = np.concatenate(l_bboxes, 0)
206 | return classes, scores, bboxes
207 |
208 |
209 | def ssd_bboxes_select_layer(predictions_layer,
210 | localizations_layer,
211 | anchors_layer,
212 | select_threshold = 0.5,
213 | img_shape = (300, 300),
214 | num_classes = 21,
215 | decode = True):
216 | """Extract classes, scores and bounding boxes from features in one layer.
217 |
218 | Return:
219 | classes, scores, bboxes: Numpy arrays...
220 | """
221 | # First decode localizations features if necessary.
222 | if decode:
223 | localizations_layer = ssd_bboxes_decode(localizations_layer, anchors_layer)
224 | p_shape = predictions_layer.shape
225 | batch_size = p_shape[0] if len(p_shape) == 5 else 1
226 | predictions_layer = np.reshape(predictions_layer,
227 | (batch_size, -1, p_shape[-1]))
228 | l_shape = localizations_layer.shape
229 | localizations_layer = np.reshape(localizations_layer,
230 | (batch_size, -1, l_shape[-1]))
231 |
232 | if select_threshold is None or select_threshold == 0:
233 | classes = np.argmax(predictions_layer, axis=2)
234 | scores = np.amax(predictions_layer, axis = 2)
235 | mask = (classes > 0)
236 | classes = classes[mask]
237 | scores = scores[mask]
238 | bboxes = localizations_layer[mask]
239 | else:
240 | sub_predictions = predictions_layer[:, :, 1:]
241 | idxes = np.where(sub_predictions > select_threshold)
242 | classes = idxes[-1] + 1
243 | scores = sub_predictions[idxes]
244 | bboxes = localizations_layer[idxes[:-1]]
245 |
246 | return classes, scores, bboxes
247 |
248 |
249 | def ssd_bboxes_decode(feat_localizations,
250 | anchor_bboxes,
251 | prior_scaling = [0.1, 0.1, 0.2, 0.2]):
252 | """Compute the relative bounding boxes from the layer features and
253 | reference anchor bounding boxes.
254 |
255 | Return:
256 | numpy array Nx4: ymin, xmin, ymax, xmax
257 | """
258 | l_shape = feat_localizations.shape
259 | feat_localizations = np.reshape(feat_localizations,
260 | (-1, l_shape[-2], l_shape[-1]))
261 | yref, xref, href, wref = anchor_bboxes
262 | xref = np.reshape(xref, [-1, 1])
263 | yref = np.reshape(yref, [-1, 1])
264 |
265 | cx = feat_localizations[:, :, 0] * wref * prior_scaling[0] + xref
266 | cy = feat_localizations[:, :, 1] * href * prior_scaling[1] + yref
267 | w = wref * np.exp(feat_localizations[:, :, 2] * prior_scaling[2])
268 | h = href * np.exp(feat_localizations[:, :, 3] * prior_scaling[3])
269 |
270 | bboxes = np.zeros_like(feat_localizations)
271 | bboxes[:, :, 0] = cy - h / 2.
272 | bboxes[:, :, 1] = cx - w / 2.
273 | bboxes[:, :, 2] = cy + h / 2.
274 | bboxes[:, :, 3] = cx + w / 2.
275 |
276 | bboxes = np.reshape(bboxes, l_shape)
277 | return bboxes
278 |
279 | def bboxes_clip(bbox_ref, bboxes):
280 | bboxes = np.copy(bboxes)
281 | bboxes = np.transpose(bboxes)
282 | bbox_ref = np.transpose(bbox_ref)
283 | bboxes[0] = np.maximum(bboxes[0], bbox_ref[0])
284 | bboxes[1] = np.maximum(bboxes[1], bbox_ref[1])
285 | bboxes[2] = np.minimum(bboxes[2], bbox_ref[2])
286 | bboxes[3] = np.minimum(bboxes[3], bbox_ref[3])
287 | bboxes = np.transpose(bboxes)
288 | return bboxes
289 |
290 | def bboxes_sort(classes, scores, bboxes, top_k = 400):
291 | """Sort bounding boxes by decreasing order and keep only the top_k
292 | """
293 | # if priority_inside:
294 | # inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \
295 | # (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin)
296 | # idxes = np.argsort(-scores)
297 | # inside = inside[idxes]
298 | # idxes = np.concatenate([idxes[inside], idxes[~inside]])
299 | idxes = np.argsort(-scores)
300 | classes = classes[idxes][:top_k]
301 | scores = scores[idxes][:top_k]
302 | bboxes = bboxes[idxes][:top_k]
303 | return classes, scores, bboxes
304 |
305 | def bboxes_nms(classes, scores, bboxes, nms_threshold = 0.45):
306 | keep_bboxes = np.ones(scores.shape, dtype=np.bool)
307 | for i in range(scores.size-1):
308 | if keep_bboxes[i]:
309 | overlap = bboxes_jaccard(bboxes[i], bboxes[(i + 1):])
310 | keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i + 1):] != classes[i])
311 | keep_bboxes[(i + 1):] = np.logical_and(keep_bboxes[(i + 1):], keep_overlap)
312 | idxes = np.where(keep_bboxes)
313 | return classes[idxes], scores[idxes], bboxes[idxes]
314 |
315 | def bboxes_jaccard(bboxes1, bboxes2):
316 | """Computing jaccard index between bboxes1 and bboxes2.
317 | Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable.
318 | """
319 | bboxes1 = np.transpose(bboxes1)
320 | bboxes2 = np.transpose(bboxes2)
321 | # Intersection bbox and volume.
322 | int_ymin = np.maximum(bboxes1[0], bboxes2[0])
323 | int_xmin = np.maximum(bboxes1[1], bboxes2[1])
324 | int_ymax = np.minimum(bboxes1[2], bboxes2[2])
325 | int_xmax = np.minimum(bboxes1[3], bboxes2[3])
326 |
327 | int_h = np.maximum(int_ymax - int_ymin, 0.)
328 | int_w = np.maximum(int_xmax - int_xmin, 0.)
329 | int_vol = int_h * int_w
330 | # Union volume.
331 | vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1])
332 | vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1])
333 | jaccard = int_vol / (vol1 + vol2 - int_vol)
334 | return jaccard
335 |
336 | def bboxes_resize(bbox_ref, bboxes):
337 | """Resize bounding boxes based on a reference bounding box,
338 | assuming that the latter is [0, 0, 1, 1] after transform.
339 | """
340 | bboxes = np.copy(bboxes)
341 | # Translate.
342 | bboxes[:, 0] -= bbox_ref[0]
343 | bboxes[:, 1] -= bbox_ref[1]
344 | bboxes[:, 2] -= bbox_ref[0]
345 | bboxes[:, 3] -= bbox_ref[1]
346 | # Resize.
347 | resize = [bbox_ref[2] - bbox_ref[0], bbox_ref[3] - bbox_ref[1]]
348 | bboxes[:, 0] /= resize[0]
349 | bboxes[:, 1] /= resize[1]
350 | bboxes[:, 2] /= resize[0]
351 | bboxes[:, 3] /= resize[1]
352 | return bboxes
353 |
--------------------------------------------------------------------------------
/preprocessing/preprocessing_factory.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains a factory for building various models."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import tensorflow as tf
22 |
23 | # from preprocessing import cifarnet_preprocessing
24 | # from preprocessing import inception_preprocessing
25 | # from preprocessing import vgg_preprocessing
26 |
27 | from preprocessing import ssd_vgg_preprocessing
28 |
29 | slim = tf.contrib.slim
30 |
31 |
32 | def get_preprocessing(name, is_training=False):
33 | """Returns preprocessing_fn(image, height, width, **kwargs).
34 |
35 | Args:
36 | name: The name of the preprocessing function.
37 | is_training: `True` if the model is being used for training.
38 |
39 | Returns:
40 | preprocessing_fn: A function that preprocessing a single image (pre-batch).
41 | It has the following signature:
42 | image = preprocessing_fn(image, output_height, output_width, ...).
43 |
44 | Raises:
45 | ValueError: If Preprocessing `name` is not recognized.
46 | """
47 | preprocessing_fn_map = {
48 | 'ssd_300_vgg': ssd_vgg_preprocessing,
49 | 'ssd_512_vgg': ssd_vgg_preprocessing,
50 | }
51 |
52 | if name not in preprocessing_fn_map:
53 | raise ValueError('Preprocessing name [%s] was not recognized' % name)
54 |
55 | def preprocessing_fn(image, labels, bboxes,
56 | out_shape, data_format='NHWC', **kwargs):
57 | return preprocessing_fn_map[name].preprocess_image(
58 | image, labels, bboxes, out_shape, data_format=data_format,
59 | is_training=is_training, **kwargs)
60 | return preprocessing_fn
61 |
--------------------------------------------------------------------------------
/preprocessing/ssd_vgg_preprocessing.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 Paul Balanca. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Pre-processing images for SSD-type networks.
16 | """
17 | from enum import Enum, IntEnum
18 | import numpy as np
19 |
20 | import tensorflow as tf
21 | import tf_extended as tfe
22 |
23 | from tensorflow.python.ops import control_flow_ops
24 |
25 | from preprocessing import tf_image
26 | from nets import ssd_common
27 |
28 | slim = tf.contrib.slim
29 |
30 | # Resizing strategies.
31 | Resize = IntEnum('Resize', ('NONE', # Nothing!
32 | 'CENTRAL_CROP', # Crop (and pad if necessary).
33 | 'PAD_AND_RESIZE', # Pad, and resize to output shape.
34 | 'WARP_RESIZE')) # Warp resize.
35 |
36 | # VGG mean parameters.
37 | _R_MEAN = 123.
38 | _G_MEAN = 117.
39 | _B_MEAN = 104.
40 |
41 | # Some training pre-processing parameters.
42 | BBOX_CROP_OVERLAP = 0.5 # Minimum overlap to keep a bbox after cropping.
43 | MIN_OBJECT_COVERED = 0.25
44 | CROP_RATIO_RANGE = (0.6, 1.67) # Distortion ratio during cropping.
45 | EVAL_SIZE = (300, 300)
46 |
47 |
48 | def tf_image_whitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN]):
49 | """Subtracts the given means from each image channel.
50 |
51 | Returns:
52 | the centered image.
53 | """
54 | if image.get_shape().ndims != 3:
55 | raise ValueError('Input must be of size [height, width, C>0]')
56 | num_channels = image.get_shape().as_list()[-1]
57 | if len(means) != num_channels:
58 | raise ValueError('len(means) must match the number of channels')
59 |
60 | mean = tf.constant(means, dtype=image.dtype)
61 | image = image - mean
62 | return image
63 |
64 |
65 | def tf_image_unwhitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN], to_int=True):
66 | """Re-convert to original image distribution, and convert to int if
67 | necessary.
68 |
69 | Returns:
70 | Centered image.
71 | """
72 | mean = tf.constant(means, dtype=image.dtype)
73 | image = image + mean
74 | if to_int:
75 | image = tf.cast(image, tf.int32)
76 | return image
77 |
78 |
79 | def np_image_unwhitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN], to_int=True):
80 | """Re-convert to original image distribution, and convert to int if
81 | necessary. Numpy version.
82 |
83 | Returns:
84 | Centered image.
85 | """
86 | img = np.copy(image)
87 | img += np.array(means, dtype=img.dtype)
88 | if to_int:
89 | img = img.astype(np.uint8)
90 | return img
91 |
92 |
93 | def tf_summary_image(image, bboxes, name='image', unwhitened=False):
94 | """Add image with bounding boxes to summary.
95 | """
96 | if unwhitened:
97 | image = tf_image_unwhitened(image)
98 | image = tf.expand_dims(image, 0)
99 | bboxes = tf.expand_dims(bboxes, 0)
100 | image_with_box = tf.image.draw_bounding_boxes(image, bboxes)
101 | # 边界框坐标是相对于宽度和宽度在[0.0,1.0]内的浮点数,即这里给出的都是图像的相对位置[0.1, 0.2, 0.8, 0.8]即(0.1*wide, 0.2*high)到(0.8*wide, 0.8*high)
102 | tf.summary.image(name, image_with_box)
103 | #将图像写入summary,可以在tensorboard上进行可视化
104 |
105 | def apply_with_random_selector(x, func, num_cases):
106 | """Computes func(x, sel), with sel sampled from [0...num_cases-1].
107 |
108 | Args:
109 | x: input Tensor.
110 | func: Python function to apply.
111 | num_cases: Python int32, number of cases to sample sel from.
112 |
113 | Returns:
114 | The result of func(x, sel), where func receives the value of the
115 | selector as a python integer, but sel is sampled dynamically.
116 | """
117 | sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
118 | # Pass the real x only to one of the func calls.
119 | return control_flow_ops.merge([
120 | func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case)
121 | for case in range(num_cases)])[0]
122 |
123 |
124 | def distort_color(image, color_ordering=0, fast_mode=True, scope=None):
125 | """Distort the color of a Tensor image.
126 |
127 | Each color distortion is non-commutative and thus ordering of the color ops
128 | matters. Ideally we would randomly permute the ordering of the color ops.
129 | Rather then adding that level of complication, we select a distinct ordering
130 | of color ops for each preprocessing thread.
131 |
132 | Args:
133 | image: 3-D Tensor containing single image in [0, 1].
134 | color_ordering: Python int, a type of distortion (valid values: 0-3).
135 | fast_mode: Avoids slower ops (random_hue and random_contrast)
136 | scope: Optional scope for name_scope.
137 | Returns:
138 | 3-D Tensor color-distorted image on range [0, 1]
139 | Raises:
140 | ValueError: if color_ordering not in [0, 3]
141 | """
142 | with tf.name_scope(scope, 'distort_color', [image]):
143 | if fast_mode:
144 | if color_ordering == 0:
145 | image = tf.image.random_brightness(image, max_delta=32. / 255.)
146 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
147 | else:
148 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
149 | image = tf.image.random_brightness(image, max_delta=32. / 255.)
150 | else:
151 | if color_ordering == 0:
152 | image = tf.image.random_brightness(image, max_delta=32. / 255.)
153 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
154 | image = tf.image.random_hue(image, max_delta=0.2)
155 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
156 | elif color_ordering == 1:
157 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
158 | image = tf.image.random_brightness(image, max_delta=32. / 255.)
159 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
160 | image = tf.image.random_hue(image, max_delta=0.2)
161 | elif color_ordering == 2:
162 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
163 | image = tf.image.random_hue(image, max_delta=0.2)
164 | image = tf.image.random_brightness(image, max_delta=32. / 255.)
165 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
166 | elif color_ordering == 3:
167 | image = tf.image.random_hue(image, max_delta=0.2)
168 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
169 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
170 | image = tf.image.random_brightness(image, max_delta=32. / 255.)
171 | else:
172 | raise ValueError('color_ordering must be in [0, 3]')
173 | # The random_* ops do not necessarily clamp.
174 | return tf.clip_by_value(image, 0.0, 1.0)
175 |
176 |
177 | def distorted_bounding_box_crop(image,
178 | labels,
179 | bboxes,
180 | min_object_covered=0.3,
181 | aspect_ratio_range=(0.9, 1.1),
182 | area_range=(0.1, 1.0),
183 | max_attempts=200,
184 | clip_bboxes=True,
185 | scope=None):
186 | """Generates cropped_image using a one of the bboxes randomly distorted.
187 |
188 | See `tf.image.sample_distorted_bounding_box` for more documentation.
189 |
190 | Args:
191 | image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
192 | bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
193 | where each coordinate is [0, 1) and the coordinates are arranged
194 | as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
195 | image.
196 | min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
197 | area of the image must contain at least this fraction of any bounding box
198 | supplied.
199 | aspect_ratio_range: An optional list of `floats`. The cropped area of the
200 | image must have an aspect ratio = width / height within this range.
201 | area_range: An optional list of `floats`. The cropped area of the image
202 | must contain a fraction of the supplied image within in this range.
203 | max_attempts: An optional `int`. Number of attempts at generating a cropped
204 | region of the image of the specified constraints. After `max_attempts`
205 | failures, return the entire image.
206 | scope: Optional scope for name_scope.
207 | Returns:
208 | A tuple, a 3-D Tensor cropped_image and the distorted bbox
209 | """
210 | with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes]):
211 | # Each bounding box has shape [1, num_boxes, box coords] and
212 | # the coordinates are ordered [ymin, xmin, ymax, xmax].
213 | bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
214 | tf.shape(image),
215 | bounding_boxes=tf.expand_dims(bboxes, 0),
216 | min_object_covered=min_object_covered,
217 | aspect_ratio_range=aspect_ratio_range,
218 | area_range=area_range,
219 | max_attempts=max_attempts,
220 | use_image_if_no_bounding_boxes=True)
221 | distort_bbox = distort_bbox[0, 0]
222 |
223 | # Crop the image to the specified bounding box.
224 | cropped_image = tf.slice(image, bbox_begin, bbox_size)
225 | # Restore the shape since the dynamic slice loses 3rd dimension.
226 | cropped_image.set_shape([None, None, 3])
227 |
228 | # Update bounding boxes: resize and filter out.
229 | bboxes = tfe.bboxes_resize(distort_bbox, bboxes)
230 | labels, bboxes = tfe.bboxes_filter_overlap(labels, bboxes,
231 | threshold=BBOX_CROP_OVERLAP,
232 | assign_negative=False)
233 | return cropped_image, labels, bboxes, distort_bbox
234 |
235 |
236 | def preprocess_for_train(image, labels, bboxes,
237 | out_shape, data_format='NHWC',
238 | scope='ssd_preprocessing_train'):
239 | """Preprocesses the given image for training.
240 |
241 | Note that the actual resizing scale is sampled from
242 | [`resize_size_min`, `resize_size_max`].
243 |
244 | Args:
245 | image: A `Tensor` representing an image of arbitrary size.
246 | output_height: The height of the image after preprocessing.
247 | output_width: The width of the image after preprocessing.
248 | resize_side_min: The lower bound for the smallest side of the image for
249 | aspect-preserving resizing.
250 | resize_side_max: The upper bound for the smallest side of the image for
251 | aspect-preserving resizing.
252 |
253 | Returns:
254 | A preprocessed image.
255 | """
256 | fast_mode = False
257 | with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]):
258 | if image.get_shape().ndims != 3:
259 | raise ValueError('Input must be of size [height, width, C>0]')
260 | # Convert to float scaled [0, 1].
261 | if image.dtype != tf.float32:
262 | image = tf.image.convert_image_dtype(image, dtype=tf.float32) #tf.image.draw_bounding_boxes要求图像矩阵中的数字为实数
263 | tf_summary_image(image, bboxes, 'image_with_bboxes') # 利用tf.image.convert_image_dtype将图像矩阵转化为实数
264 |
265 | # # Remove DontCare labels.
266 | # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label,
267 | # labels,
268 | # bboxes)
269 |
270 | # Distort image and bounding boxes.
271 | dst_image = image
272 | dst_image, labels, bboxes, distort_bbox = \
273 | distorted_bounding_box_crop(image, labels, bboxes,
274 | min_object_covered=MIN_OBJECT_COVERED,
275 | aspect_ratio_range=CROP_RATIO_RANGE)
276 | # Resize image to output size.
277 | dst_image = tf_image.resize_image(dst_image, out_shape,
278 | method=tf.image.ResizeMethod.BILINEAR,
279 | align_corners=False)
280 | tf_summary_image(dst_image, bboxes, 'image_shape_distorted')
281 |
282 | # Randomly flip the image horizontally.
283 | dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)
284 |
285 | # Randomly distort the colors. There are 4 ways to do it.
286 | dst_image = apply_with_random_selector(
287 | dst_image,
288 | lambda x, ordering: distort_color(x, ordering, fast_mode),
289 | num_cases=4)
290 | tf_summary_image(dst_image, bboxes, 'image_color_distorted')
291 |
292 | # Rescale to VGG input scale.
293 | image = dst_image * 255.
294 | image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
295 | # Image data format.
296 | if data_format == 'NCHW':
297 | image = tf.transpose(image, perm=(2, 0, 1))
298 | return image, labels, bboxes
299 |
300 |
301 | def preprocess_for_eval(image, labels, bboxes,
302 | out_shape=EVAL_SIZE, data_format='NHWC',
303 | difficults=None, resize=Resize.WARP_RESIZE,
304 | scope='ssd_preprocessing_train'):
305 | """Preprocess an image for evaluation.
306 |
307 | Args:
308 | image: A `Tensor` representing an image of arbitrary size.
309 | out_shape: Output shape after pre-processing (if resize != None)
310 | resize: Resize strategy.
311 |
312 | Returns:
313 | A preprocessed image.
314 | """
315 | with tf.name_scope(scope):
316 | if image.get_shape().ndims != 3:
317 | raise ValueError('Input must be of size [height, width, C>0]')
318 |
319 | image = tf.to_float(image)
320 | image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
321 |
322 | # Add image rectangle to bboxes.
323 | bbox_img = tf.constant([[0., 0., 1., 1.]])
324 | if bboxes is None:
325 | bboxes = bbox_img
326 | else:
327 | bboxes = tf.concat([bbox_img, bboxes], axis=0)
328 |
329 | if resize == Resize.NONE:
330 | # No resizing...
331 | pass
332 | elif resize == Resize.CENTRAL_CROP:
333 | # Central cropping of the image.
334 | image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
335 | image, bboxes, out_shape[0], out_shape[1])
336 | elif resize == Resize.PAD_AND_RESIZE:
337 | # Resize image first: find the correct factor...
338 | shape = tf.shape(image)
339 | factor = tf.minimum(tf.to_double(1.0),
340 | tf.minimum(tf.to_double(out_shape[0] / shape[0]),
341 | tf.to_double(out_shape[1] / shape[1])))
342 | resize_shape = factor * tf.to_double(shape[0:2])
343 | resize_shape = tf.cast(tf.floor(resize_shape), tf.int32)
344 |
345 | image = tf_image.resize_image(image, resize_shape,
346 | method=tf.image.ResizeMethod.BILINEAR,
347 | align_corners=False)
348 | # Pad to expected size.
349 | image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
350 | image, bboxes, out_shape[0], out_shape[1])
351 | elif resize == Resize.WARP_RESIZE:
352 | # Warp resize of the image.
353 | image = tf_image.resize_image(image, out_shape,
354 | method=tf.image.ResizeMethod.BILINEAR,
355 | align_corners=False)
356 |
357 | # Split back bounding boxes.
358 | bbox_img = bboxes[0]
359 | bboxes = bboxes[1:]
360 | # Remove difficult boxes.
361 | if difficults is not None:
362 | mask = tf.logical_not(tf.cast(difficults, tf.bool))
363 | labels = tf.boolean_mask(labels, mask)
364 | bboxes = tf.boolean_mask(bboxes, mask)
365 | # Image data format.
366 | if data_format == 'NCHW':
367 | image = tf.transpose(image, perm=(2, 0, 1))
368 | return image, labels, bboxes, bbox_img
369 |
370 |
371 | def preprocess_image(image,
372 | labels,
373 | bboxes,
374 | out_shape,
375 | data_format,
376 | is_training=False,
377 | **kwargs):
378 | """Pre-process an given image.
379 |
380 | Args:
381 | image: A `Tensor` representing an image of arbitrary size.
382 | output_height: The height of the image after preprocessing.
383 | output_width: The width of the image after preprocessing.
384 | is_training: `True` if we're preprocessing the image for training and
385 | `False` otherwise.
386 | resize_side_min: The lower bound for the smallest side of the image for
387 | aspect-preserving resizing. If `is_training` is `False`, then this value
388 | is used for rescaling.
389 | resize_side_max: The upper bound for the smallest side of the image for
390 | aspect-preserving resizing. If `is_training` is `False`, this value is
391 | ignored. Otherwise, the resize side is sampled from
392 | [resize_size_min, resize_size_max].
393 |
394 | Returns:
395 | A preprocessed image.
396 | """
397 | if is_training:
398 | return preprocess_for_train(image, labels, bboxes,
399 | out_shape=out_shape,
400 | data_format=data_format)
401 | else:
402 | return preprocess_for_eval(image, labels, bboxes,
403 | out_shape=out_shape,
404 | data_format=data_format,
405 | **kwargs)
406 |
--------------------------------------------------------------------------------
/preprocessing/tf_image.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 The TensorFlow Authors and Paul Balanca. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Custom image operations.
16 | Most of the following methods extend TensorFlow image library, and part of
17 | the code is shameless copy-paste of the former!
18 | """
19 | import tensorflow as tf
20 |
21 | from tensorflow.python.framework import constant_op
22 | from tensorflow.python.framework import dtypes
23 | from tensorflow.python.framework import ops
24 | from tensorflow.python.framework import tensor_shape
25 | from tensorflow.python.framework import tensor_util
26 | from tensorflow.python.ops import array_ops
27 | from tensorflow.python.ops import check_ops
28 | from tensorflow.python.ops import clip_ops
29 | from tensorflow.python.ops import control_flow_ops
30 | from tensorflow.python.ops import gen_image_ops
31 | from tensorflow.python.ops import gen_nn_ops
32 | from tensorflow.python.ops import string_ops
33 | from tensorflow.python.ops import math_ops
34 | from tensorflow.python.ops import random_ops
35 | from tensorflow.python.ops import variables
36 |
37 |
38 | # =========================================================================== #
39 | # Modification of TensorFlow image routines.
40 | # =========================================================================== #
41 | def _assert(cond, ex_type, msg):
42 | """A polymorphic assert, works with tensors and boolean expressions.
43 | If `cond` is not a tensor, behave like an ordinary assert statement, except
44 | that a empty list is returned. If `cond` is a tensor, return a list
45 | containing a single TensorFlow assert op.
46 | Args:
47 | cond: Something evaluates to a boolean value. May be a tensor.
48 | ex_type: The exception class to use.
49 | msg: The error message.
50 | Returns:
51 | A list, containing at most one assert op.
52 | """
53 | if _is_tensor(cond):
54 | return [control_flow_ops.Assert(cond, [msg])]
55 | else:
56 | if not cond:
57 | raise ex_type(msg)
58 | else:
59 | return []
60 |
61 |
62 | def _is_tensor(x):
63 | """Returns `True` if `x` is a symbolic tensor-like object.
64 | Args:
65 | x: A python object to check.
66 | Returns:
67 | `True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`.
68 | """
69 | return isinstance(x, (ops.Tensor, variables.Variable))
70 |
71 |
72 | def _ImageDimensions(image):
73 | """Returns the dimensions of an image tensor.
74 | Args:
75 | image: A 3-D Tensor of shape `[height, width, channels]`.
76 | Returns:
77 | A list of `[height, width, channels]` corresponding to the dimensions of the
78 | input image. Dimensions that are statically known are python integers,
79 | otherwise they are integer scalar tensors.
80 | """
81 | if image.get_shape().is_fully_defined():
82 | return image.get_shape().as_list()
83 | else:
84 | static_shape = image.get_shape().with_rank(3).as_list()
85 | dynamic_shape = array_ops.unstack(array_ops.shape(image), 3)
86 | return [s if s is not None else d
87 | for s, d in zip(static_shape, dynamic_shape)]
88 |
89 |
90 | def _Check3DImage(image, require_static=True):
91 | """Assert that we are working with properly shaped image.
92 | Args:
93 | image: 3-D Tensor of shape [height, width, channels]
94 | require_static: If `True`, requires that all dimensions of `image` are
95 | known and non-zero.
96 | Raises:
97 | ValueError: if `image.shape` is not a 3-vector.
98 | Returns:
99 | An empty list, if `image` has fully defined dimensions. Otherwise, a list
100 | containing an assert op is returned.
101 | """
102 | try:
103 | image_shape = image.get_shape().with_rank(3)
104 | except ValueError:
105 | raise ValueError("'image' must be three-dimensional.")
106 | if require_static and not image_shape.is_fully_defined():
107 | raise ValueError("'image' must be fully defined.")
108 | if any(x == 0 for x in image_shape):
109 | raise ValueError("all dims of 'image.shape' must be > 0: %s" %
110 | image_shape)
111 | if not image_shape.is_fully_defined():
112 | return [check_ops.assert_positive(array_ops.shape(image),
113 | ["all dims of 'image.shape' "
114 | "must be > 0."])]
115 | else:
116 | return []
117 |
118 |
119 | def fix_image_flip_shape(image, result):
120 | """Set the shape to 3 dimensional if we don't know anything else.
121 | Args:
122 | image: original image size
123 | result: flipped or transformed image
124 | Returns:
125 | An image whose shape is at least None,None,None.
126 | """
127 | image_shape = image.get_shape()
128 | if image_shape == tensor_shape.unknown_shape():
129 | result.set_shape([None, None, None])
130 | else:
131 | result.set_shape(image_shape)
132 | return result
133 |
134 |
135 | # =========================================================================== #
136 | # Image + BBoxes methods: cropping, resizing, flipping, ...
137 | # =========================================================================== #
138 | def bboxes_crop_or_pad(bboxes,
139 | height, width,
140 | offset_y, offset_x,
141 | target_height, target_width):
142 | """Adapt bounding boxes to crop or pad operations.
143 | Coordinates are always supposed to be relative to the image.
144 |
145 | Arguments:
146 | bboxes: Tensor Nx4 with bboxes coordinates [y_min, x_min, y_max, x_max];
147 | height, width: Original image dimension;
148 | offset_y, offset_x: Offset to apply,
149 | negative if cropping, positive if padding;
150 | target_height, target_width: Target dimension after cropping / padding.
151 | """
152 | with tf.name_scope('bboxes_crop_or_pad'):
153 | # Rescale bounding boxes in pixels.
154 | scale = tf.cast(tf.stack([height, width, height, width]), bboxes.dtype)
155 | bboxes = bboxes * scale
156 | # Add offset.
157 | offset = tf.cast(tf.stack([offset_y, offset_x, offset_y, offset_x]), bboxes.dtype)
158 | bboxes = bboxes + offset
159 | # Rescale to target dimension.
160 | scale = tf.cast(tf.stack([target_height, target_width,
161 | target_height, target_width]), bboxes.dtype)
162 | bboxes = bboxes / scale
163 | return bboxes
164 |
165 |
166 | def resize_image_bboxes_with_crop_or_pad(image, bboxes,
167 | target_height, target_width):
168 | """Crops and/or pads an image to a target width and height.
169 | Resizes an image to a target width and height by either centrally
170 | cropping the image or padding it evenly with zeros.
171 |
172 | If `width` or `height` is greater than the specified `target_width` or
173 | `target_height` respectively, this op centrally crops along that dimension.
174 | If `width` or `height` is smaller than the specified `target_width` or
175 | `target_height` respectively, this op centrally pads with 0 along that
176 | dimension.
177 | Args:
178 | image: 3-D tensor of shape `[height, width, channels]`
179 | target_height: Target height.
180 | target_width: Target width.
181 | Raises:
182 | ValueError: if `target_height` or `target_width` are zero or negative.
183 | Returns:
184 | Cropped and/or padded image of shape
185 | `[target_height, target_width, channels]`
186 | """
187 | with tf.name_scope('resize_with_crop_or_pad'):
188 | image = ops.convert_to_tensor(image, name='image')
189 |
190 | assert_ops = []
191 | assert_ops += _Check3DImage(image, require_static=False)
192 | assert_ops += _assert(target_width > 0, ValueError,
193 | 'target_width must be > 0.')
194 | assert_ops += _assert(target_height > 0, ValueError,
195 | 'target_height must be > 0.')
196 |
197 | image = control_flow_ops.with_dependencies(assert_ops, image)
198 | # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks.
199 | # Make sure our checks come first, so that error messages are clearer.
200 | if _is_tensor(target_height):
201 | target_height = control_flow_ops.with_dependencies(
202 | assert_ops, target_height)
203 | if _is_tensor(target_width):
204 | target_width = control_flow_ops.with_dependencies(assert_ops, target_width)
205 |
206 | def max_(x, y):
207 | if _is_tensor(x) or _is_tensor(y):
208 | return math_ops.maximum(x, y)
209 | else:
210 | return max(x, y)
211 |
212 | def min_(x, y):
213 | if _is_tensor(x) or _is_tensor(y):
214 | return math_ops.minimum(x, y)
215 | else:
216 | return min(x, y)
217 |
218 | def equal_(x, y):
219 | if _is_tensor(x) or _is_tensor(y):
220 | return math_ops.equal(x, y)
221 | else:
222 | return x == y
223 |
224 | height, width, _ = _ImageDimensions(image)
225 | width_diff = target_width - width
226 | offset_crop_width = max_(-width_diff // 2, 0)
227 | offset_pad_width = max_(width_diff // 2, 0)
228 |
229 | height_diff = target_height - height
230 | offset_crop_height = max_(-height_diff // 2, 0)
231 | offset_pad_height = max_(height_diff // 2, 0)
232 |
233 | # Maybe crop if needed.
234 | height_crop = min_(target_height, height)
235 | width_crop = min_(target_width, width)
236 | cropped = tf.image.crop_to_bounding_box(image, offset_crop_height, offset_crop_width,
237 | height_crop, width_crop)
238 | bboxes = bboxes_crop_or_pad(bboxes,
239 | height, width,
240 | -offset_crop_height, -offset_crop_width,
241 | height_crop, width_crop)
242 | # Maybe pad if needed.
243 | resized = tf.image.pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width,
244 | target_height, target_width)
245 | bboxes = bboxes_crop_or_pad(bboxes,
246 | height_crop, width_crop,
247 | offset_pad_height, offset_pad_width,
248 | target_height, target_width)
249 |
250 | # In theory all the checks below are redundant.
251 | if resized.get_shape().ndims is None:
252 | raise ValueError('resized contains no shape.')
253 |
254 | resized_height, resized_width, _ = _ImageDimensions(resized)
255 |
256 | assert_ops = []
257 | assert_ops += _assert(equal_(resized_height, target_height), ValueError,
258 | 'resized height is not correct.')
259 | assert_ops += _assert(equal_(resized_width, target_width), ValueError,
260 | 'resized width is not correct.')
261 |
262 | resized = control_flow_ops.with_dependencies(assert_ops, resized)
263 | return resized, bboxes
264 |
265 |
266 | def resize_image(image, size,
267 | method=tf.image.ResizeMethod.BILINEAR,
268 | align_corners=False):
269 | """Resize an image and bounding boxes.
270 | """
271 | # Resize image.
272 | with tf.name_scope('resize_image'):
273 | height, width, channels = _ImageDimensions(image)
274 | image = tf.expand_dims(image, 0)
275 | image = tf.image.resize_images(image, size,
276 | method, align_corners)
277 | image = tf.reshape(image, tf.stack([size[0], size[1], channels]))
278 | return image
279 |
280 |
281 | def random_flip_left_right(image, bboxes, seed=None):
282 | """Random flip left-right of an image and its bounding boxes.
283 | """
284 | def flip_bboxes(bboxes):
285 | """Flip bounding boxes coordinates.
286 | """
287 | bboxes = tf.stack([bboxes[:, 0], 1 - bboxes[:, 3],
288 | bboxes[:, 2], 1 - bboxes[:, 1]], axis=-1)
289 | return bboxes
290 |
291 | # Random flip. Tensorflow implementation.
292 | with tf.name_scope('random_flip_left_right'):
293 | image = ops.convert_to_tensor(image, name='image')
294 | _Check3DImage(image, require_static=False)
295 | uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
296 | mirror_cond = math_ops.less(uniform_random, .5)
297 | # Flip image.
298 | result = control_flow_ops.cond(mirror_cond,
299 | lambda: array_ops.reverse_v2(image, [1]),
300 | lambda: image)
301 | # Flip bboxes.
302 | bboxes = control_flow_ops.cond(mirror_cond,
303 | lambda: flip_bboxes(bboxes),
304 | lambda: bboxes)
305 | return fix_image_flip_shape(image, result), bboxes
306 |
307 |
--------------------------------------------------------------------------------
/ssd_visualize.py:
--------------------------------------------------------------------------------
1 | import os
2 | import math
3 | import random
4 | import numpy as np
5 | import tensorflow as tf
6 | import cv2
7 |
8 | slim = tf.contrib.slim
9 |
10 | import matplotlib.pyplot as plt
11 | import matplotlib.image as mpimg
12 | import sys
13 | sys.path.append('../')
14 |
15 | from nets import ssd_vgg_300, ssd_common
16 | from preprocessing import ssd_vgg_preprocessing
17 | import visualization
18 |
19 | gpu_options = tf.GPUOptions(allow_growth = True)
20 | config = tf.ConfigProto(log_device_placement = False, gpu_options = gpu_options)
21 | isess = tf.InteractiveSession(config = config)
22 |
23 | net_shape = (300, 300)
24 | data_format = 'NHWC'
25 | img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
26 |
27 | image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
28 | img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
29 | image_4d = tf.expand_dims(image_pre, 0)
30 |
31 | # Define the SSD model.
32 | reuse = True if 'ssd_net' in locals() else None
33 | ssd_net = ssd_vgg_300.SSDNet()
34 | with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
35 | predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse)
36 |
37 | # Restore SSD model.
38 | ckpt_filename = './logs/model.ckpt-62962'
39 | # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'
40 | isess.run(tf.global_variables_initializer())
41 | saver = tf.train.Saver()
42 | saver.restore(isess, ckpt_filename)
43 |
44 | # SSD default anchor boxes.
45 | ssd_anchors = ssd_net.anchors(net_shape)
46 |
47 |
48 | # Main image processing routine.
49 | def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)):
50 | # Run SSD network.
51 | rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img],
52 | feed_dict={img_input: img})
53 |
54 | # Get classes and bboxes from the net outputs.
55 | rclasses, rscores, rbboxes = ssd_common.ssd_bboxes_select(
56 | rpredictions, rlocalisations, ssd_anchors,
57 | select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True)
58 |
59 | rbboxes = ssd_common.bboxes_clip(rbbox_img, rbboxes)
60 | rclasses, rscores, rbboxes = ssd_common.bboxes_sort(rclasses, rscores, rbboxes, top_k=400)
61 | rclasses, rscores, rbboxes = ssd_common.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
62 | # Resize bboxes to original image shape. Note: useless for Resize.WARP!
63 | rbboxes = ssd_common.bboxes_resize(rbbox_img, rbboxes)
64 | return rclasses, rscores, rbboxes
65 |
66 | # Test on some demo image and visualize output.
67 | path = './demo/'
68 | image_names = sorted(os.listdir(path))
69 | for i in range(10):
70 | img = mpimg.imread(path + image_names[i])
71 | rclasses, rscores, rbboxes = process_image(img)
72 |
73 | # visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma)
74 | visualization.plt_bboxes(img, rclasses, rscores, rbboxes)
75 |
--------------------------------------------------------------------------------
/tf_convert_data.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Convert a dataset to TFRecords format, which can be easily integrated into
16 | a TensorFlow pipeline.
17 |
18 | Usage:
19 | ```shell
20 | python tf_convert_data.py \
21 | --dataset_name=pascalvoc \
22 | --dataset_dir=/tmp/pascalvoc \
23 | --output_name=pascalvoc \
24 | --output_dir=/tmp/
25 | ```
26 | """
27 | import tensorflow as tf
28 |
29 | from datasets import pascalvoc_to_tfrecords
30 |
31 | FLAGS = tf.app.flags.FLAGS
32 | #tf定义了tf.app.flags,用于支持接受命令行传递参数,相当于接受argv。
33 | tf.app.flags.DEFINE_string(
34 | 'dataset_name', 'pascalvoc',
35 | 'The name of the dataset to convert.')
36 | tf.app.flags.DEFINE_string(
37 | 'dataset_dir', None,
38 | 'Directory where the original dataset is stored.')
39 | tf.app.flags.DEFINE_string(
40 | 'output_name', 'pascalvoc',
41 | 'Basename used for TFRecords output files.')
42 | tf.app.flags.DEFINE_string(
43 | 'output_dir', './',
44 | 'Output directory where to store TFRecords files.')
45 |
46 |
47 | def main(_):
48 | if not FLAGS.dataset_dir:
49 | raise ValueError('You must supply the dataset directory with --dataset_dir')
50 | print('Dataset directory:', FLAGS.dataset_dir)
51 | print('Output directory:', FLAGS.output_dir)
52 |
53 | if FLAGS.dataset_name == 'pascalvoc':
54 | pascalvoc_to_tfrecords.run(FLAGS.dataset_dir, FLAGS.output_dir, FLAGS.output_name)
55 | else:
56 | raise ValueError('Dataset [%s] was not recognized.' % FLAGS.dataset_name)
57 |
58 | if __name__ == '__main__':
59 | tf.app.run()
60 |
61 |
--------------------------------------------------------------------------------
/tf_extended/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Paul Balanca. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """TF Extended: additional metrics.
16 | """
17 |
18 | # pylint: disable=unused-import,line-too-long,g-importing-member,wildcard-import
19 | from tf_extended.metrics import *
20 | from tf_extended.tensors import *
21 | from tf_extended.bboxes import *
22 | from tf_extended.image import *
23 | from tf_extended.math import *
24 |
25 |
--------------------------------------------------------------------------------
/tf_extended/bboxes.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Paul Balanca. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """TF Extended: additional bounding boxes methods.
16 | """
17 | import numpy as np
18 | import tensorflow as tf
19 |
20 | from tf_extended import tensors as tfe_tensors
21 | from tf_extended import math as tfe_math
22 |
23 |
24 | # =========================================================================== #
25 | # Standard boxes algorithms.
26 | # =========================================================================== #
27 | def bboxes_sort_all_classes(classes, scores, bboxes, top_k=400, scope=None):
28 | """Sort bounding boxes by decreasing order and keep only the top_k.
29 | Assume the input Tensors mix-up objects with different classes.
30 | Assume a batch-type input.
31 |
32 | Args:
33 | classes: Batch x N Tensor containing integer classes.
34 | scores: Batch x N Tensor containing float scores.
35 | bboxes: Batch x N x 4 Tensor containing boxes coordinates.
36 | top_k: Top_k boxes to keep.
37 | Return:
38 | classes, scores, bboxes: Sorted tensors of shape Batch x Top_k.
39 | """
40 | with tf.name_scope(scope, 'bboxes_sort', [classes, scores, bboxes]):
41 | scores, idxes = tf.nn.top_k(scores, k=top_k, sorted=True)
42 |
43 | # Trick to be able to use tf.gather: map for each element in the batch.
44 | def fn_gather(classes, bboxes, idxes):
45 | cl = tf.gather(classes, idxes)
46 | bb = tf.gather(bboxes, idxes)
47 | return [cl, bb]
48 | r = tf.map_fn(lambda x: fn_gather(x[0], x[1], x[2]),
49 | [classes, bboxes, idxes],
50 | dtype=[classes.dtype, bboxes.dtype],
51 | parallel_iterations=10,
52 | back_prop=False,
53 | swap_memory=False,
54 | infer_shape=True)
55 | classes = r[0]
56 | bboxes = r[1]
57 | return classes, scores, bboxes
58 |
59 |
60 | def bboxes_sort(scores, bboxes, top_k=400, scope=None):
61 | """Sort bounding boxes by decreasing order and keep only the top_k.
62 | If inputs are dictionnaries, assume every key is a different class.
63 | Assume a batch-type input.
64 |
65 | Args:
66 | scores: Batch x N Tensor/Dictionary containing float scores.
67 | bboxes: Batch x N x 4 Tensor/Dictionary containing boxes coordinates.
68 | top_k: Top_k boxes to keep.
69 | Return:
70 | scores, bboxes: Sorted Tensors/Dictionaries of shape Batch x Top_k x 1|4.
71 | """
72 | # Dictionaries as inputs.
73 | if isinstance(scores, dict) or isinstance(bboxes, dict):
74 | with tf.name_scope(scope, 'bboxes_sort_dict'):
75 | d_scores = {}
76 | d_bboxes = {}
77 | for c in scores.keys():
78 | s, b = bboxes_sort(scores[c], bboxes[c], top_k=top_k)
79 | d_scores[c] = s
80 | d_bboxes[c] = b
81 | return d_scores, d_bboxes
82 |
83 | # Tensors inputs.
84 | with tf.name_scope(scope, 'bboxes_sort', [scores, bboxes]):
85 | # Sort scores...
86 | scores, idxes = tf.nn.top_k(scores, k=top_k, sorted=True)
87 |
88 | # Trick to be able to use tf.gather: map for each element in the first dim.
89 | def fn_gather(bboxes, idxes):
90 | bb = tf.gather(bboxes, idxes)
91 | return [bb]
92 | r = tf.map_fn(lambda x: fn_gather(x[0], x[1]),
93 | [bboxes, idxes],
94 | dtype=[bboxes.dtype],
95 | parallel_iterations=10,
96 | back_prop=False,
97 | swap_memory=False,
98 | infer_shape=True)
99 | bboxes = r[0]
100 | return scores, bboxes
101 |
102 |
103 | def bboxes_clip(bbox_ref, bboxes, scope=None):
104 | """Clip bounding boxes to a reference box.
105 | Batch-compatible if the first dimension of `bbox_ref` and `bboxes`
106 | can be broadcasted.
107 |
108 | Args:
109 | bbox_ref: Reference bounding box. Nx4 or 4 shaped-Tensor;
110 | bboxes: Bounding boxes to clip. Nx4 or 4 shaped-Tensor or dictionary.
111 | Return:
112 | Clipped bboxes.
113 | """
114 | # Bboxes is dictionary.
115 | if isinstance(bboxes, dict):
116 | with tf.name_scope(scope, 'bboxes_clip_dict'):
117 | d_bboxes = {}
118 | for c in bboxes.keys():
119 | d_bboxes[c] = bboxes_clip(bbox_ref, bboxes[c])
120 | return d_bboxes
121 |
122 | # Tensors inputs.
123 | with tf.name_scope(scope, 'bboxes_clip'):
124 | # Easier with transposed bboxes. Especially for broadcasting.
125 | bbox_ref = tf.transpose(bbox_ref)
126 | bboxes = tf.transpose(bboxes)
127 | # Intersection bboxes and reference bbox.
128 | ymin = tf.maximum(bboxes[0], bbox_ref[0])
129 | xmin = tf.maximum(bboxes[1], bbox_ref[1])
130 | ymax = tf.minimum(bboxes[2], bbox_ref[2])
131 | xmax = tf.minimum(bboxes[3], bbox_ref[3])
132 | # Double check! Empty boxes when no-intersection.
133 | ymin = tf.minimum(ymin, ymax)
134 | xmin = tf.minimum(xmin, xmax)
135 | bboxes = tf.transpose(tf.stack([ymin, xmin, ymax, xmax], axis=0))
136 | return bboxes
137 |
138 |
139 | def bboxes_resize(bbox_ref, bboxes, name=None):
140 | """Resize bounding boxes based on a reference bounding box,
141 | assuming that the latter is [0, 0, 1, 1] after transform. Useful for
142 | updating a collection of boxes after cropping an image.
143 | """
144 | # Bboxes is dictionary.
145 | if isinstance(bboxes, dict):
146 | with tf.name_scope(name, 'bboxes_resize_dict'):
147 | d_bboxes = {}
148 | for c in bboxes.keys():
149 | d_bboxes[c] = bboxes_resize(bbox_ref, bboxes[c])
150 | return d_bboxes
151 |
152 | # Tensors inputs.
153 | with tf.name_scope(name, 'bboxes_resize'):
154 | # Translate.
155 | v = tf.stack([bbox_ref[0], bbox_ref[1], bbox_ref[0], bbox_ref[1]])
156 | bboxes = bboxes - v
157 | # Scale.
158 | s = tf.stack([bbox_ref[2] - bbox_ref[0],
159 | bbox_ref[3] - bbox_ref[1],
160 | bbox_ref[2] - bbox_ref[0],
161 | bbox_ref[3] - bbox_ref[1]])
162 | bboxes = bboxes / s
163 | return bboxes
164 |
165 |
166 | def bboxes_nms(scores, bboxes, nms_threshold=0.5, keep_top_k=200, scope=None):
167 | """Apply non-maximum selection to bounding boxes. In comparison to TF
168 | implementation, use classes information for matching.
169 | Should only be used on single-entries. Use batch version otherwise.
170 |
171 | Args:
172 | scores: N Tensor containing float scores.
173 | bboxes: N x 4 Tensor containing boxes coordinates.
174 | nms_threshold: Matching threshold in NMS algorithm;
175 | keep_top_k: Number of total object to keep after NMS.
176 | Return:
177 | classes, scores, bboxes Tensors, sorted by score.
178 | Padded with zero if necessary.
179 | """
180 | with tf.name_scope(scope, 'bboxes_nms_single', [scores, bboxes]):
181 | # Apply NMS algorithm.
182 | idxes = tf.image.non_max_suppression(bboxes, scores,
183 | keep_top_k, nms_threshold)
184 | scores = tf.gather(scores, idxes)
185 | bboxes = tf.gather(bboxes, idxes)
186 | # Pad results.
187 | scores = tfe_tensors.pad_axis(scores, 0, keep_top_k, axis=0)
188 | bboxes = tfe_tensors.pad_axis(bboxes, 0, keep_top_k, axis=0)
189 | return scores, bboxes
190 |
191 |
192 | def bboxes_nms_batch(scores, bboxes, nms_threshold=0.5, keep_top_k=200,
193 | scope=None):
194 | """Apply non-maximum selection to bounding boxes. In comparison to TF
195 | implementation, use classes information for matching.
196 | Use only on batched-inputs. Use zero-padding in order to batch output
197 | results.
198 |
199 | Args:
200 | scores: Batch x N Tensor/Dictionary containing float scores.
201 | bboxes: Batch x N x 4 Tensor/Dictionary containing boxes coordinates.
202 | nms_threshold: Matching threshold in NMS algorithm;
203 | keep_top_k: Number of total object to keep after NMS.
204 | Return:
205 | scores, bboxes Tensors/Dictionaries, sorted by score.
206 | Padded with zero if necessary.
207 | """
208 | # Dictionaries as inputs.
209 | if isinstance(scores, dict) or isinstance(bboxes, dict):
210 | with tf.name_scope(scope, 'bboxes_nms_batch_dict'):
211 | d_scores = {}
212 | d_bboxes = {}
213 | for c in scores.keys():
214 | s, b = bboxes_nms_batch(scores[c], bboxes[c],
215 | nms_threshold=nms_threshold,
216 | keep_top_k=keep_top_k)
217 | d_scores[c] = s
218 | d_bboxes[c] = b
219 | return d_scores, d_bboxes
220 |
221 | # Tensors inputs.
222 | with tf.name_scope(scope, 'bboxes_nms_batch'):
223 | r = tf.map_fn(lambda x: bboxes_nms(x[0], x[1],
224 | nms_threshold, keep_top_k),
225 | (scores, bboxes),
226 | dtype=(scores.dtype, bboxes.dtype),
227 | parallel_iterations=10,
228 | back_prop=False,
229 | swap_memory=False,
230 | infer_shape=True)
231 | scores, bboxes = r
232 | return scores, bboxes
233 |
234 |
235 | # def bboxes_fast_nms(classes, scores, bboxes,
236 | # nms_threshold=0.5, eta=3., num_classes=21,
237 | # pad_output=True, scope=None):
238 | # with tf.name_scope(scope, 'bboxes_fast_nms',
239 | # [classes, scores, bboxes]):
240 |
241 | # nms_classes = tf.zeros((0,), dtype=classes.dtype)
242 | # nms_scores = tf.zeros((0,), dtype=scores.dtype)
243 | # nms_bboxes = tf.zeros((0, 4), dtype=bboxes.dtype)
244 |
245 |
246 | def bboxes_matching(label, scores, bboxes,
247 | glabels, gbboxes, gdifficults,
248 | matching_threshold=0.5, scope=None):
249 | """Matching a collection of detected boxes with groundtruth values.
250 | Does not accept batched-inputs.
251 | The algorithm goes as follows: for every detected box, check
252 | if one grountruth box is matching. If none, then considered as False Positive.
253 | If the grountruth box is already matched with another one, it also counts
254 | as a False Positive. We refer the Pascal VOC documentation for the details.
255 |
256 | Args:
257 | rclasses, rscores, rbboxes: N(x4) Tensors. Detected objects, sorted by score;
258 | glabels, gbboxes: Groundtruth bounding boxes. May be zero padded, hence
259 | zero-class objects are ignored.
260 | matching_threshold: Threshold for a positive match.
261 | Return: Tuple of:
262 | n_gbboxes: Scalar Tensor with number of groundtruth boxes (may difer from
263 | size because of zero padding).
264 | tp_match: (N,)-shaped boolean Tensor containing with True Positives.
265 | fp_match: (N,)-shaped boolean Tensor containing with False Positives.
266 | """
267 | with tf.name_scope(scope, 'bboxes_matching_single',
268 | [scores, bboxes, glabels, gbboxes]):
269 | rsize = tf.size(scores)
270 | rshape = tf.shape(scores)
271 | rlabel = tf.cast(label, glabels.dtype)
272 | # Number of groundtruth boxes.
273 | gdifficults = tf.cast(gdifficults, tf.bool)
274 | n_gbboxes = tf.count_nonzero(tf.logical_and(tf.equal(glabels, label),
275 | tf.logical_not(gdifficults)))
276 | # Grountruth matching arrays.
277 | gmatch = tf.zeros(tf.shape(glabels), dtype=tf.bool)
278 | grange = tf.range(tf.size(glabels), dtype=tf.int32)
279 | # True/False positive matching TensorArrays.
280 | sdtype = tf.bool
281 | ta_tp_bool = tf.TensorArray(sdtype, size=rsize, dynamic_size=False, infer_shape=True)
282 | ta_fp_bool = tf.TensorArray(sdtype, size=rsize, dynamic_size=False, infer_shape=True)
283 |
284 | # Loop over returned objects.
285 | def m_condition(i, ta_tp, ta_fp, gmatch):
286 | r = tf.less(i, rsize)
287 | return r
288 |
289 | def m_body(i, ta_tp, ta_fp, gmatch):
290 | # Jaccard score with groundtruth bboxes.
291 | rbbox = bboxes[i]
292 | jaccard = bboxes_jaccard(rbbox, gbboxes)
293 | jaccard = jaccard * tf.cast(tf.equal(glabels, rlabel), dtype=jaccard.dtype)
294 |
295 | # Best fit, checking it's above threshold.
296 | idxmax = tf.cast(tf.argmax(jaccard, axis=0), tf.int32)
297 | jcdmax = jaccard[idxmax]
298 | match = jcdmax > matching_threshold
299 | existing_match = gmatch[idxmax]
300 | not_difficult = tf.logical_not(gdifficults[idxmax])
301 |
302 | # TP: match & no previous match and FP: previous match | no match.
303 | # If difficult: no record, i.e FP=False and TP=False.
304 | tp = tf.logical_and(not_difficult,
305 | tf.logical_and(match, tf.logical_not(existing_match)))
306 | ta_tp = ta_tp.write(i, tp)
307 | fp = tf.logical_and(not_difficult,
308 | tf.logical_or(existing_match, tf.logical_not(match)))
309 | ta_fp = ta_fp.write(i, fp)
310 | # Update grountruth match.
311 | mask = tf.logical_and(tf.equal(grange, idxmax),
312 | tf.logical_and(not_difficult, match))
313 | gmatch = tf.logical_or(gmatch, mask)
314 |
315 | return [i+1, ta_tp, ta_fp, gmatch]
316 | # Main loop definition.
317 | i = 0
318 | [i, ta_tp_bool, ta_fp_bool, gmatch] = \
319 | tf.while_loop(m_condition, m_body,
320 | [i, ta_tp_bool, ta_fp_bool, gmatch],
321 | parallel_iterations=1,
322 | back_prop=False)
323 | # TensorArrays to Tensors and reshape.
324 | tp_match = tf.reshape(ta_tp_bool.stack(), rshape)
325 | fp_match = tf.reshape(ta_fp_bool.stack(), rshape)
326 |
327 | # Some debugging information...
328 | # tp_match = tf.Print(tp_match,
329 | # [n_gbboxes,
330 | # tf.reduce_sum(tf.cast(tp_match, tf.int64)),
331 | # tf.reduce_sum(tf.cast(fp_match, tf.int64)),
332 | # tf.reduce_sum(tf.cast(gmatch, tf.int64))],
333 | # 'Matching (NG, TP, FP, GM): ')
334 | return n_gbboxes, tp_match, fp_match
335 |
336 |
337 | def bboxes_matching_batch(labels, scores, bboxes,
338 | glabels, gbboxes, gdifficults,
339 | matching_threshold=0.5, scope=None):
340 | """Matching a collection of detected boxes with groundtruth values.
341 | Batched-inputs version.
342 |
343 | Args:
344 | rclasses, rscores, rbboxes: BxN(x4) Tensors. Detected objects, sorted by score;
345 | glabels, gbboxes: Groundtruth bounding boxes. May be zero padded, hence
346 | zero-class objects are ignored.
347 | matching_threshold: Threshold for a positive match.
348 | Return: Tuple or Dictionaries with:
349 | n_gbboxes: Scalar Tensor with number of groundtruth boxes (may difer from
350 | size because of zero padding).
351 | tp: (B, N)-shaped boolean Tensor containing with True Positives.
352 | fp: (B, N)-shaped boolean Tensor containing with False Positives.
353 | """
354 | # Dictionaries as inputs.
355 | if isinstance(scores, dict) or isinstance(bboxes, dict):
356 | with tf.name_scope(scope, 'bboxes_matching_batch_dict'):
357 | d_n_gbboxes = {}
358 | d_tp = {}
359 | d_fp = {}
360 | for c in labels:
361 | n, tp, fp, _ = bboxes_matching_batch(c, scores[c], bboxes[c],
362 | glabels, gbboxes, gdifficults,
363 | matching_threshold)
364 | d_n_gbboxes[c] = n
365 | d_tp[c] = tp
366 | d_fp[c] = fp
367 | return d_n_gbboxes, d_tp, d_fp, scores
368 |
369 | with tf.name_scope(scope, 'bboxes_matching_batch',
370 | [scores, bboxes, glabels, gbboxes]):
371 | r = tf.map_fn(lambda x: bboxes_matching(labels, x[0], x[1],
372 | x[2], x[3], x[4],
373 | matching_threshold),
374 | (scores, bboxes, glabels, gbboxes, gdifficults),
375 | dtype=(tf.int64, tf.bool, tf.bool),
376 | parallel_iterations=10,
377 | back_prop=False,
378 | swap_memory=True,
379 | infer_shape=True)
380 | return r[0], r[1], r[2], scores
381 |
382 |
383 | # =========================================================================== #
384 | # Some filteting methods.
385 | # =========================================================================== #
386 | def bboxes_filter_center(labels, bboxes, margins=[0., 0., 0., 0.],
387 | scope=None):
388 | """Filter out bounding boxes whose center are not in
389 | the rectangle [0, 0, 1, 1] + margins. The margin Tensor
390 | can be used to enforce or loosen this condition.
391 |
392 | Return:
393 | labels, bboxes: Filtered elements.
394 | """
395 | with tf.name_scope(scope, 'bboxes_filter', [labels, bboxes]):
396 | cy = (bboxes[:, 0] + bboxes[:, 2]) / 2.
397 | cx = (bboxes[:, 1] + bboxes[:, 3]) / 2.
398 | mask = tf.greater(cy, margins[0])
399 | mask = tf.logical_and(mask, tf.greater(cx, margins[1]))
400 | mask = tf.logical_and(mask, tf.less(cx, 1. + margins[2]))
401 | mask = tf.logical_and(mask, tf.less(cx, 1. + margins[3]))
402 | # Boolean masking...
403 | labels = tf.boolean_mask(labels, mask)
404 | bboxes = tf.boolean_mask(bboxes, mask)
405 | return labels, bboxes
406 |
407 |
408 | def bboxes_filter_overlap(labels, bboxes,
409 | threshold=0.5, assign_negative=False,
410 | scope=None):
411 | """Filter out bounding boxes based on (relative )overlap with reference
412 | box [0, 0, 1, 1]. Remove completely bounding boxes, or assign negative
413 | labels to the one outside (useful for latter processing...).
414 |
415 | Return:
416 | labels, bboxes: Filtered (or newly assigned) elements.
417 | """
418 | with tf.name_scope(scope, 'bboxes_filter', [labels, bboxes]):
419 | scores = bboxes_intersection(tf.constant([0, 0, 1, 1], bboxes.dtype),
420 | bboxes)
421 | mask = scores > threshold
422 | if assign_negative:
423 | labels = tf.where(mask, labels, -labels)
424 | # bboxes = tf.where(mask, bboxes, bboxes)
425 | else:
426 | labels = tf.boolean_mask(labels, mask)
427 | bboxes = tf.boolean_mask(bboxes, mask)
428 | return labels, bboxes
429 |
430 |
431 | def bboxes_filter_labels(labels, bboxes,
432 | out_labels=[], num_classes=np.inf,
433 | scope=None):
434 | """Filter out labels from a collection. Typically used to get
435 | of DontCare elements. Also remove elements based on the number of classes.
436 |
437 | Return:
438 | labels, bboxes: Filtered elements.
439 | """
440 | with tf.name_scope(scope, 'bboxes_filter_labels', [labels, bboxes]):
441 | mask = tf.greater_equal(labels, num_classes)
442 | for l in labels:
443 | mask = tf.logical_and(mask, tf.not_equal(labels, l))
444 | labels = tf.boolean_mask(labels, mask)
445 | bboxes = tf.boolean_mask(bboxes, mask)
446 | return labels, bboxes
447 |
448 |
449 | # =========================================================================== #
450 | # Standard boxes computation.
451 | # =========================================================================== #
452 | def bboxes_jaccard(bbox_ref, bboxes, name=None):
453 | """Compute jaccard score between a reference box and a collection
454 | of bounding boxes.
455 |
456 | Args:
457 | bbox_ref: (N, 4) or (4,) Tensor with reference bounding box(es).
458 | bboxes: (N, 4) Tensor, collection of bounding boxes.
459 | Return:
460 | (N,) Tensor with Jaccard scores.
461 | """
462 | with tf.name_scope(name, 'bboxes_jaccard'):
463 | # Should be more efficient to first transpose.
464 | bboxes = tf.transpose(bboxes)
465 | bbox_ref = tf.transpose(bbox_ref)
466 | # Intersection bbox and volume.
467 | int_ymin = tf.maximum(bboxes[0], bbox_ref[0])
468 | int_xmin = tf.maximum(bboxes[1], bbox_ref[1])
469 | int_ymax = tf.minimum(bboxes[2], bbox_ref[2])
470 | int_xmax = tf.minimum(bboxes[3], bbox_ref[3])
471 | h = tf.maximum(int_ymax - int_ymin, 0.)
472 | w = tf.maximum(int_xmax - int_xmin, 0.)
473 | # Volumes.
474 | inter_vol = h * w
475 | union_vol = -inter_vol \
476 | + (bboxes[2] - bboxes[0]) * (bboxes[3] - bboxes[1]) \
477 | + (bbox_ref[2] - bbox_ref[0]) * (bbox_ref[3] - bbox_ref[1])
478 | jaccard = tfe_math.safe_divide(inter_vol, union_vol, 'jaccard')
479 | return jaccard
480 |
481 |
482 | def bboxes_intersection(bbox_ref, bboxes, name=None):
483 | """Compute relative intersection between a reference box and a
484 | collection of bounding boxes. Namely, compute the quotient between
485 | intersection area and box area.
486 |
487 | Args:
488 | bbox_ref: (N, 4) or (4,) Tensor with reference bounding box(es).
489 | bboxes: (N, 4) Tensor, collection of bounding boxes.
490 | Return:
491 | (N,) Tensor with relative intersection.
492 | """
493 | with tf.name_scope(name, 'bboxes_intersection'):
494 | # Should be more efficient to first transpose.
495 | bboxes = tf.transpose(bboxes)
496 | bbox_ref = tf.transpose(bbox_ref)
497 | # Intersection bbox and volume.
498 | int_ymin = tf.maximum(bboxes[0], bbox_ref[0])
499 | int_xmin = tf.maximum(bboxes[1], bbox_ref[1])
500 | int_ymax = tf.minimum(bboxes[2], bbox_ref[2])
501 | int_xmax = tf.minimum(bboxes[3], bbox_ref[3])
502 | h = tf.maximum(int_ymax - int_ymin, 0.)
503 | w = tf.maximum(int_xmax - int_xmin, 0.)
504 | # Volumes.
505 | inter_vol = h * w
506 | bboxes_vol = (bboxes[2] - bboxes[0]) * (bboxes[3] - bboxes[1])
507 | scores = tfe_math.safe_divide(inter_vol, bboxes_vol, 'intersection')
508 | return scores
509 |
--------------------------------------------------------------------------------
/tf_extended/image.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fanbinqi/SSD-Tensorflow/7d77fc3e4eda3109ea104f59644d6d93cb829215/tf_extended/image.py
--------------------------------------------------------------------------------
/tf_extended/math.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Paul Balanca. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """TF Extended: additional math functions.
16 | """
17 | import tensorflow as tf
18 |
19 | from tensorflow.python.ops import array_ops
20 | from tensorflow.python.ops import math_ops
21 | from tensorflow.python.framework import dtypes
22 | from tensorflow.python.framework import ops
23 |
24 |
25 | def safe_divide(numerator, denominator, name):
26 | """Divides two values, returning 0 if the denominator is <= 0.
27 | Args:
28 | numerator: A real `Tensor`.
29 | denominator: A real `Tensor`, with dtype matching `numerator`.
30 | name: Name for the returned op.
31 | Returns:
32 | 0 if `denominator` <= 0, else `numerator` / `denominator`
33 | """
34 | return tf.where(
35 | math_ops.greater(denominator, 0),
36 | math_ops.divide(numerator, denominator),
37 | tf.zeros_like(numerator),
38 | name=name)
39 |
40 |
41 | def cummax(x, reverse=False, name=None):
42 | """Compute the cumulative maximum of the tensor `x` along `axis`. This
43 | operation is similar to the more classic `cumsum`. Only support 1D Tensor
44 | for now.
45 |
46 | Args:
47 | x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
48 | `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
49 | `complex128`, `qint8`, `quint8`, `qint32`, `half`.
50 | axis: A `Tensor` of type `int32` (default: 0).
51 | reverse: A `bool` (default: False).
52 | name: A name for the operation (optional).
53 | Returns:
54 | A `Tensor`. Has the same type as `x`.
55 | """
56 | with ops.name_scope(name, "Cummax", [x]) as name:
57 | x = ops.convert_to_tensor(x, name="x")
58 | # Not very optimal: should directly integrate reverse into tf.scan.
59 | if reverse:
60 | x = tf.reverse(x, axis=[0])
61 | # 'Accumlating' maximum: ensure it is always increasing.
62 | cmax = tf.scan(lambda a, y: tf.maximum(a, y), x,
63 | initializer=None, parallel_iterations=1,
64 | back_prop=False, swap_memory=False)
65 | if reverse:
66 | cmax = tf.reverse(cmax, axis=[0])
67 | return cmax
68 |
--------------------------------------------------------------------------------
/tf_extended/metrics.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Paul Balanca. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """TF Extended: additional metrics.
16 | """
17 | import tensorflow as tf
18 | import numpy as np
19 |
20 | from tensorflow.contrib.framework.python.ops import variables as contrib_variables
21 | from tensorflow.python.framework import dtypes
22 | from tensorflow.python.framework import ops
23 | from tensorflow.python.ops import array_ops
24 | from tensorflow.python.ops import math_ops
25 | from tensorflow.python.ops import nn
26 | from tensorflow.python.ops import state_ops
27 | from tensorflow.python.ops import variable_scope
28 | from tensorflow.python.ops import variables
29 |
30 | from tf_extended import math as tfe_math
31 |
32 |
33 | # =========================================================================== #
34 | # TensorFlow utils
35 | # =========================================================================== #
36 | def _create_local(name, shape, collections=None, validate_shape=True,
37 | dtype=dtypes.float32):
38 | """Creates a new local variable.
39 | Args:
40 | name: The name of the new or existing variable.
41 | shape: Shape of the new or existing variable.
42 | collections: A list of collection names to which the Variable will be added.
43 | validate_shape: Whether to validate the shape of the variable.
44 | dtype: Data type of the variables.
45 | Returns:
46 | The created variable.
47 | """
48 | # Make sure local variables are added to tf.GraphKeys.LOCAL_VARIABLES
49 | collections = list(collections or [])
50 | collections += [ops.GraphKeys.LOCAL_VARIABLES]
51 | return variables.Variable(
52 | initial_value=array_ops.zeros(shape, dtype=dtype),
53 | name=name,
54 | trainable=False,
55 | collections=collections,
56 | validate_shape=validate_shape)
57 |
58 |
59 | def _safe_div(numerator, denominator, name):
60 | """Divides two values, returning 0 if the denominator is <= 0.
61 | Args:
62 | numerator: A real `Tensor`.
63 | denominator: A real `Tensor`, with dtype matching `numerator`.
64 | name: Name for the returned op.
65 | Returns:
66 | 0 if `denominator` <= 0, else `numerator` / `denominator`
67 | """
68 | return tf.where(
69 | math_ops.greater(denominator, 0),
70 | math_ops.divide(numerator, denominator),
71 | tf.zeros_like(numerator),
72 | name=name)
73 |
74 |
75 | def _broadcast_weights(weights, values):
76 | """Broadcast `weights` to the same shape as `values`.
77 | This returns a version of `weights` following the same broadcast rules as
78 | `mul(weights, values)`. When computing a weighted average, use this function
79 | to broadcast `weights` before summing them; e.g.,
80 | `reduce_sum(w * v) / reduce_sum(_broadcast_weights(w, v))`.
81 | Args:
82 | weights: `Tensor` whose shape is broadcastable to `values`.
83 | values: `Tensor` of any shape.
84 | Returns:
85 | `weights` broadcast to `values` shape.
86 | """
87 | weights_shape = weights.get_shape()
88 | values_shape = values.get_shape()
89 | if(weights_shape.is_fully_defined() and
90 | values_shape.is_fully_defined() and
91 | weights_shape.is_compatible_with(values_shape)):
92 | return weights
93 | return math_ops.mul(
94 | weights, array_ops.ones_like(values), name='broadcast_weights')
95 |
96 |
97 | # =========================================================================== #
98 | # TF Extended metrics: TP and FP arrays.
99 | # =========================================================================== #
100 | def precision_recall(num_gbboxes, num_detections, tp, fp, scores,
101 | dtype=tf.float64, scope=None):
102 | """Compute precision and recall from scores, true positives and false
103 | positives booleans arrays
104 | """
105 | # Input dictionaries: dict outputs as streaming metrics.
106 | if isinstance(scores, dict):
107 | d_precision = {}
108 | d_recall = {}
109 | for c in num_gbboxes.keys():
110 | scope = 'precision_recall_%s' % c
111 | p, r = precision_recall(num_gbboxes[c], num_detections[c],
112 | tp[c], fp[c], scores[c],
113 | dtype, scope)
114 | d_precision[c] = p
115 | d_recall[c] = r
116 | return d_precision, d_recall
117 |
118 | # Sort by score.
119 | with tf.name_scope(scope, 'precision_recall',
120 | [num_gbboxes, num_detections, tp, fp, scores]):
121 | # Sort detections by score.
122 | scores, idxes = tf.nn.top_k(scores, k=num_detections, sorted=True)
123 | tp = tf.gather(tp, idxes)
124 | fp = tf.gather(fp, idxes)
125 | # Computer recall and precision.
126 | tp = tf.cumsum(tf.cast(tp, dtype), axis=0)
127 | fp = tf.cumsum(tf.cast(fp, dtype), axis=0)
128 | recall = _safe_div(tp, tf.cast(num_gbboxes, dtype), 'recall')
129 | precision = _safe_div(tp, tp + fp, 'precision')
130 | return tf.tuple([precision, recall])
131 |
132 |
133 | def streaming_tp_fp_arrays(num_gbboxes, tp, fp, scores,
134 | remove_zero_scores=True,
135 | metrics_collections=None,
136 | updates_collections=None,
137 | name=None):
138 | """Streaming computation of True and False Positive arrays. This metrics
139 | also keeps track of scores and number of grountruth objects.
140 | """
141 | # Input dictionaries: dict outputs as streaming metrics.
142 | if isinstance(scores, dict) or isinstance(fp, dict):
143 | d_values = {}
144 | d_update_ops = {}
145 | for c in num_gbboxes.keys():
146 | scope = 'streaming_tp_fp_%s' % c
147 | v, up = streaming_tp_fp_arrays(num_gbboxes[c], tp[c], fp[c], scores[c],
148 | remove_zero_scores,
149 | metrics_collections,
150 | updates_collections,
151 | name=scope)
152 | d_values[c] = v
153 | d_update_ops[c] = up
154 | return d_values, d_update_ops
155 |
156 | # Input Tensors...
157 | with variable_scope.variable_scope(name, 'streaming_tp_fp',
158 | [num_gbboxes, tp, fp, scores]):
159 | num_gbboxes = math_ops.to_int64(num_gbboxes)
160 | scores = math_ops.to_float(scores)
161 | stype = tf.bool
162 | tp = tf.cast(tp, stype)
163 | fp = tf.cast(fp, stype)
164 | # Reshape TP and FP tensors and clean away 0 class values.
165 | scores = tf.reshape(scores, [-1])
166 | tp = tf.reshape(tp, [-1])
167 | fp = tf.reshape(fp, [-1])
168 | # Remove TP and FP both false.
169 | mask = tf.logical_or(tp, fp)
170 | if remove_zero_scores:
171 | rm_threshold = 1e-4
172 | mask = tf.logical_and(mask, tf.greater(scores, rm_threshold))
173 | scores = tf.boolean_mask(scores, mask)
174 | tp = tf.boolean_mask(tp, mask)
175 | fp = tf.boolean_mask(fp, mask)
176 |
177 | # Local variables accumlating information over batches.
178 | v_nobjects = _create_local('v_num_gbboxes', shape=[], dtype=tf.int64)
179 | v_ndetections = _create_local('v_num_detections', shape=[], dtype=tf.int32)
180 | v_scores = _create_local('v_scores', shape=[0, ])
181 | v_tp = _create_local('v_tp', shape=[0, ], dtype=stype)
182 | v_fp = _create_local('v_fp', shape=[0, ], dtype=stype)
183 |
184 | # Update operations.
185 | nobjects_op = state_ops.assign_add(v_nobjects,
186 | tf.reduce_sum(num_gbboxes))
187 | ndetections_op = state_ops.assign_add(v_ndetections,
188 | tf.size(scores, out_type=tf.int32))
189 | scores_op = state_ops.assign(v_scores, tf.concat([v_scores, scores], axis=0),
190 | validate_shape=False)
191 | tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp], axis=0),
192 | validate_shape=False)
193 | fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp], axis=0),
194 | validate_shape=False)
195 |
196 | # Value and update ops.
197 | val = (v_nobjects, v_ndetections, v_tp, v_fp, v_scores)
198 | with ops.control_dependencies([nobjects_op, ndetections_op,
199 | scores_op, tp_op, fp_op]):
200 | update_op = (nobjects_op, ndetections_op, tp_op, fp_op, scores_op)
201 |
202 | if metrics_collections:
203 | ops.add_to_collections(metrics_collections, val)
204 | if updates_collections:
205 | ops.add_to_collections(updates_collections, update_op)
206 | return val, update_op
207 |
208 |
209 | # =========================================================================== #
210 | # Average precision computations.
211 | # =========================================================================== #
212 | def average_precision_voc12(precision, recall, name=None):
213 | """Compute (interpolated) average precision from precision and recall Tensors.
214 |
215 | The implementation follows Pascal 2012 and ILSVRC guidelines.
216 | See also: https://sanchom.wordpress.com/tag/average-precision/
217 | """
218 | with tf.name_scope(name, 'average_precision_voc12', [precision, recall]):
219 | # Convert to float64 to decrease error on Riemann sums.
220 | precision = tf.cast(precision, dtype=tf.float64)
221 | recall = tf.cast(recall, dtype=tf.float64)
222 |
223 | # Add bounds values to precision and recall.
224 | precision = tf.concat([[0.], precision, [0.]], axis=0)
225 | recall = tf.concat([[0.], recall, [1.]], axis=0)
226 | # Ensures precision is increasing in reverse order.
227 | precision = tfe_math.cummax(precision, reverse=True)
228 |
229 | # Riemann sums for estimating the integral.
230 | # mean_pre = (precision[1:] + precision[:-1]) / 2.
231 | mean_pre = precision[1:]
232 | diff_rec = recall[1:] - recall[:-1]
233 | ap = tf.reduce_sum(mean_pre * diff_rec)
234 | return ap
235 |
236 |
237 | def average_precision_voc07(precision, recall, name=None):
238 | """Compute (interpolated) average precision from precision and recall Tensors.
239 |
240 | The implementation follows Pascal 2007 guidelines.
241 | See also: https://sanchom.wordpress.com/tag/average-precision/
242 | """
243 | with tf.name_scope(name, 'average_precision_voc07', [precision, recall]):
244 | # Convert to float64 to decrease error on cumulated sums.
245 | precision = tf.cast(precision, dtype=tf.float64)
246 | recall = tf.cast(recall, dtype=tf.float64)
247 | # Add zero-limit value to avoid any boundary problem...
248 | precision = tf.concat([precision, [0.]], axis=0)
249 | recall = tf.concat([recall, [np.inf]], axis=0)
250 |
251 | # Split the integral into 10 bins.
252 | l_aps = []
253 | for t in np.arange(0., 1.1, 0.1):
254 | mask = tf.greater_equal(recall, t)
255 | v = tf.reduce_max(tf.boolean_mask(precision, mask))
256 | l_aps.append(v / 11.)
257 | ap = tf.add_n(l_aps)
258 | return ap
259 |
260 |
261 | def precision_recall_values(xvals, precision, recall, name=None):
262 | """Compute values on the precision/recall curve.
263 |
264 | Args:
265 | x: Python list of floats;
266 | precision: 1D Tensor decreasing.
267 | recall: 1D Tensor increasing.
268 | Return:
269 | list of precision values.
270 | """
271 | with ops.name_scope(name, "precision_recall_values",
272 | [precision, recall]) as name:
273 | # Add bounds values to precision and recall.
274 | precision = tf.concat([[0.], precision, [0.]], axis=0)
275 | recall = tf.concat([[0.], recall, [1.]], axis=0)
276 | precision = tfe_math.cummax(precision, reverse=True)
277 |
278 | prec_values = []
279 | for x in xvals:
280 | mask = tf.less_equal(recall, x)
281 | val = tf.reduce_min(tf.boolean_mask(precision, mask))
282 | prec_values.append(val)
283 | return tf.tuple(prec_values)
284 |
285 |
286 | # =========================================================================== #
287 | # TF Extended metrics: old stuff!
288 | # =========================================================================== #
289 | def _precision_recall(n_gbboxes, n_detections, scores, tp, fp, scope=None):
290 | """Compute precision and recall from scores, true positives and false
291 | positives booleans arrays
292 | """
293 | # Sort by score.
294 | with tf.name_scope(scope, 'prec_rec', [n_gbboxes, scores, tp, fp]):
295 | # Sort detections by score.
296 | scores, idxes = tf.nn.top_k(scores, k=n_detections, sorted=True)
297 | tp = tf.gather(tp, idxes)
298 | fp = tf.gather(fp, idxes)
299 | # Computer recall and precision.
300 | dtype = tf.float64
301 | tp = tf.cumsum(tf.cast(tp, dtype), axis=0)
302 | fp = tf.cumsum(tf.cast(fp, dtype), axis=0)
303 | recall = _safe_div(tp, tf.cast(n_gbboxes, dtype), 'recall')
304 | precision = _safe_div(tp, tp + fp, 'precision')
305 |
306 | return tf.tuple([precision, recall])
307 |
308 |
309 | def streaming_precision_recall_arrays(n_gbboxes, rclasses, rscores,
310 | tp_tensor, fp_tensor,
311 | remove_zero_labels=True,
312 | metrics_collections=None,
313 | updates_collections=None,
314 | name=None):
315 | """Streaming computation of precision / recall arrays. This metrics
316 | keeps tracks of boolean True positives and False positives arrays.
317 | """
318 | with variable_scope.variable_scope(name, 'stream_precision_recall',
319 | [n_gbboxes, rclasses, tp_tensor, fp_tensor]):
320 | n_gbboxes = math_ops.to_int64(n_gbboxes)
321 | rclasses = math_ops.to_int64(rclasses)
322 | rscores = math_ops.to_float(rscores)
323 |
324 | stype = tf.int32
325 | tp_tensor = tf.cast(tp_tensor, stype)
326 | fp_tensor = tf.cast(fp_tensor, stype)
327 |
328 | # Reshape TP and FP tensors and clean away 0 class values.
329 | rclasses = tf.reshape(rclasses, [-1])
330 | rscores = tf.reshape(rscores, [-1])
331 | tp_tensor = tf.reshape(tp_tensor, [-1])
332 | fp_tensor = tf.reshape(fp_tensor, [-1])
333 | if remove_zero_labels:
334 | mask = tf.greater(rclasses, 0)
335 | rclasses = tf.boolean_mask(rclasses, mask)
336 | rscores = tf.boolean_mask(rscores, mask)
337 | tp_tensor = tf.boolean_mask(tp_tensor, mask)
338 | fp_tensor = tf.boolean_mask(fp_tensor, mask)
339 |
340 | # Local variables accumlating information over batches.
341 | v_nobjects = _create_local('v_nobjects', shape=[], dtype=tf.int64)
342 | v_ndetections = _create_local('v_ndetections', shape=[], dtype=tf.int32)
343 | v_scores = _create_local('v_scores', shape=[0, ])
344 | v_tp = _create_local('v_tp', shape=[0, ], dtype=stype)
345 | v_fp = _create_local('v_fp', shape=[0, ], dtype=stype)
346 |
347 | # Update operations.
348 | nobjects_op = state_ops.assign_add(v_nobjects,
349 | tf.reduce_sum(n_gbboxes))
350 | ndetections_op = state_ops.assign_add(v_ndetections,
351 | tf.size(rscores, out_type=tf.int32))
352 | scores_op = state_ops.assign(v_scores, tf.concat([v_scores, rscores], axis=0),
353 | validate_shape=False)
354 | tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp_tensor], axis=0),
355 | validate_shape=False)
356 | fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp_tensor], axis=0),
357 | validate_shape=False)
358 |
359 | # Precision and recall computations.
360 | # r = _precision_recall(nobjects_op, scores_op, tp_op, fp_op, 'value')
361 | r = _precision_recall(v_nobjects, v_ndetections, v_scores,
362 | v_tp, v_fp, 'value')
363 |
364 | with ops.control_dependencies([nobjects_op, ndetections_op,
365 | scores_op, tp_op, fp_op]):
366 | update_op = _precision_recall(nobjects_op, ndetections_op,
367 | scores_op, tp_op, fp_op, 'update_op')
368 |
369 | # update_op = tf.Print(update_op,
370 | # [tf.reduce_sum(tf.cast(mask, tf.int64)),
371 | # tf.reduce_sum(tf.cast(mask2, tf.int64)),
372 | # tf.reduce_min(rscores),
373 | # tf.reduce_sum(n_gbboxes)],
374 | # 'Metric: ')
375 | # Some debugging stuff!
376 | # update_op = tf.Print(update_op,
377 | # [tf.shape(tp_op),
378 | # tf.reduce_sum(tf.cast(tp_op, tf.int64), axis=0)],
379 | # 'TP and FP shape: ')
380 | # update_op[0] = tf.Print(update_op,
381 | # [nobjects_op],
382 | # '# Groundtruth bboxes: ')
383 | # update_op = tf.Print(update_op,
384 | # [update_op[0][0],
385 | # update_op[0][-1],
386 | # tf.reduce_min(update_op[0]),
387 | # tf.reduce_max(update_op[0]),
388 | # tf.reduce_min(update_op[1]),
389 | # tf.reduce_max(update_op[1])],
390 | # 'Precision and recall :')
391 |
392 | if metrics_collections:
393 | ops.add_to_collections(metrics_collections, r)
394 | if updates_collections:
395 | ops.add_to_collections(updates_collections, update_op)
396 | return r, update_op
397 |
398 |
--------------------------------------------------------------------------------
/tf_extended/tensors.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Paul Balanca. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """TF Extended: additional tensors operations.
16 | """
17 | import tensorflow as tf
18 |
19 | from tensorflow.contrib.framework.python.ops import variables as contrib_variables
20 | from tensorflow.contrib.metrics.python.ops import set_ops
21 | from tensorflow.python.framework import dtypes
22 | from tensorflow.python.framework import ops
23 | from tensorflow.python.framework import sparse_tensor
24 | from tensorflow.python.ops import array_ops
25 | from tensorflow.python.ops import check_ops
26 | from tensorflow.python.ops import control_flow_ops
27 | from tensorflow.python.ops import math_ops
28 | from tensorflow.python.ops import nn
29 | from tensorflow.python.ops import state_ops
30 | from tensorflow.python.ops import variable_scope
31 | from tensorflow.python.ops import variables
32 |
33 |
34 | def get_shape(x, rank=None):
35 | """Returns the dimensions of a Tensor as list of integers or scale tensors.
36 |
37 | Args:
38 | x: N-d Tensor;
39 | rank: Rank of the Tensor. If None, will try to guess it.
40 | Returns:
41 | A list of `[d1, d2, ..., dN]` corresponding to the dimensions of the
42 | input tensor. Dimensions that are statically known are python integers,
43 | otherwise they are integer scalar tensors.
44 | """
45 | if x.get_shape().is_fully_defined():
46 | return x.get_shape().as_list()
47 | else:
48 | static_shape = x.get_shape()
49 | if rank is None:
50 | static_shape = static_shape.as_list()
51 | rank = len(static_shape)
52 | else:
53 | static_shape = x.get_shape().with_rank(rank).as_list()
54 | dynamic_shape = tf.unstack(tf.shape(x), rank)
55 | return [s if s is not None else d
56 | for s, d in zip(static_shape, dynamic_shape)]
57 |
58 |
59 | def pad_axis(x, offset, size, axis=0, name=None):
60 | """Pad a tensor on an axis, with a given offset and output size.
61 | The tensor is padded with zero (i.e. CONSTANT mode). Note that the if the
62 | `size` is smaller than existing size + `offset`, the output tensor
63 | was the latter dimension.
64 |
65 | Args:
66 | x: Tensor to pad;
67 | offset: Offset to add on the dimension chosen;
68 | size: Final size of the dimension.
69 | Return:
70 | Padded tensor whose dimension on `axis` is `size`, or greater if
71 | the input vector was larger.
72 | """
73 | with tf.name_scope(name, 'pad_axis'):
74 | shape = get_shape(x)
75 | rank = len(shape)
76 | # Padding description.
77 | new_size = tf.maximum(size-offset-shape[axis], 0)
78 | pad1 = tf.stack([0]*axis + [offset] + [0]*(rank-axis-1))
79 | pad2 = tf.stack([0]*axis + [new_size] + [0]*(rank-axis-1))
80 | paddings = tf.stack([pad1, pad2], axis=1)
81 | x = tf.pad(x, paddings, mode='CONSTANT')
82 | # Reshape, to get fully defined shape if possible.
83 | # TODO: fix with tf.slice
84 | shape[axis] = size
85 | x = tf.reshape(x, tf.stack(shape))
86 | return x
87 |
88 |
89 | # def select_at_index(idx, val, t):
90 | # """Return a tensor.
91 | # """
92 | # idx = tf.expand_dims(tf.expand_dims(idx, 0), 0)
93 | # val = tf.expand_dims(val, 0)
94 | # t = t + tf.scatter_nd(idx, val, tf.shape(t))
95 | # return t
96 |
--------------------------------------------------------------------------------
/tf_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 Paul Balanca. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Diverse TensorFlow utils, for training, evaluation and so on!
16 | """
17 | import os
18 | from pprint import pprint
19 |
20 | import tensorflow as tf
21 | from tensorflow.contrib.slim.python.slim.data import parallel_reader
22 |
23 | slim = tf.contrib.slim
24 |
25 |
26 | # =========================================================================== #
27 | # General tools.
28 | # =========================================================================== #
29 | def reshape_list(l, shape=None):
30 | """Reshape list of (list): 1D to 2D or the other way around.
31 |
32 | Args:
33 | l: List or List of list.
34 | shape: 1D or 2D shape.
35 | Return
36 | Reshaped list.
37 | """
38 | r = []
39 | if shape is None:
40 | # Flatten everything.
41 | for a in l:
42 | if isinstance(a, (list, tuple)):
43 | r = r + list(a)
44 | else:
45 | r.append(a)
46 | else:
47 | # Reshape to list of list.
48 | i = 0
49 | for s in shape:
50 | if s == 1:
51 | r.append(l[i])
52 | else:
53 | r.append(l[i:i+s])
54 | i += s
55 | return r
56 |
57 |
58 | # =========================================================================== #
59 | # Training utils.
60 | # =========================================================================== #
61 | def print_configuration(flags, ssd_params, data_sources, save_dir=None):
62 | """Print the training configuration.
63 | """
64 | def print_config(stream=None):
65 | #print('\n# =========================================================================== #', file=stream)
66 | #print('# Training | Evaluation flags:', file=stream)
67 | #print('# =========================================================================== #', file=stream)
68 | pprint(flags, stream=stream)
69 |
70 | #print('\n# =========================================================================== #', file=stream)
71 | #print('# SSD net parameters:', file=stream)
72 | #print('# =========================================================================== #', file=stream)
73 | pprint(dict(ssd_params._asdict()), stream=stream)
74 |
75 | #print('\n# =========================================================================== #', file=stream)
76 | #print('# Training | Evaluation dataset files:', file=stream)
77 | #print('# =========================================================================== #', file=stream)
78 | data_files = parallel_reader.get_data_files(data_sources)
79 | pprint(sorted(data_files), stream=stream)
80 | #print('', file=stream)
81 |
82 | print_config(None)
83 | # Save to a text file as well.
84 | if save_dir is not None:
85 | if not os.path.exists(save_dir):
86 | os.makedirs(save_dir)
87 | path = os.path.join(save_dir, 'training_config.txt')
88 | with open(path, "w") as out:
89 | print_config(out)
90 |
91 |
92 | def configure_learning_rate(flags, num_samples_per_epoch, global_step):
93 | """Configures the learning rate.
94 |
95 | Args:
96 | num_samples_per_epoch: The number of samples in each epoch of training.
97 | global_step: The global_step tensor.
98 | Returns:
99 | A `Tensor` representing the learning rate.
100 | """
101 | decay_steps = int(num_samples_per_epoch / flags.batch_size *
102 | flags.num_epochs_per_decay)
103 |
104 | if flags.learning_rate_decay_type == 'exponential':
105 | return tf.train.exponential_decay(flags.learning_rate,
106 | global_step,
107 | decay_steps,
108 | flags.learning_rate_decay_factor,
109 | staircase=True,
110 | name='exponential_decay_learning_rate')
111 | elif flags.learning_rate_decay_type == 'fixed':
112 | return tf.constant(flags.learning_rate, name='fixed_learning_rate')
113 | elif flags.learning_rate_decay_type == 'polynomial':
114 | return tf.train.polynomial_decay(flags.learning_rate,
115 | global_step,
116 | decay_steps,
117 | flags.end_learning_rate,
118 | power=1.0,
119 | cycle=False,
120 | name='polynomial_decay_learning_rate')
121 | else:
122 | raise ValueError('learning_rate_decay_type [%s] was not recognized',
123 | flags.learning_rate_decay_type)
124 |
125 |
126 | def configure_optimizer(flags, learning_rate):
127 | """Configures the optimizer used for training.
128 |
129 | Args:
130 | learning_rate: A scalar or `Tensor` learning rate.
131 | Returns:
132 | An instance of an optimizer.
133 | """
134 | if flags.optimizer == 'adadelta':
135 | optimizer = tf.train.AdadeltaOptimizer(
136 | learning_rate,
137 | rho=flags.adadelta_rho,
138 | epsilon=flags.opt_epsilon)
139 | elif flags.optimizer == 'adagrad':
140 | optimizer = tf.train.AdagradOptimizer(
141 | learning_rate,
142 | initial_accumulator_value=flags.adagrad_initial_accumulator_value)
143 | elif flags.optimizer == 'adam':
144 | optimizer = tf.train.AdamOptimizer(
145 | learning_rate,
146 | beta1=flags.adam_beta1,
147 | beta2=flags.adam_beta2,
148 | epsilon=flags.opt_epsilon)
149 | elif flags.optimizer == 'ftrl':
150 | optimizer = tf.train.FtrlOptimizer(
151 | learning_rate,
152 | learning_rate_power=flags.ftrl_learning_rate_power,
153 | initial_accumulator_value=flags.ftrl_initial_accumulator_value,
154 | l1_regularization_strength=flags.ftrl_l1,
155 | l2_regularization_strength=flags.ftrl_l2)
156 | elif flags.optimizer == 'momentum':
157 | optimizer = tf.train.MomentumOptimizer(
158 | learning_rate,
159 | momentum=flags.momentum,
160 | name='Momentum')
161 | elif flags.optimizer == 'rmsprop':
162 | optimizer = tf.train.RMSPropOptimizer(
163 | learning_rate,
164 | decay=flags.rmsprop_decay,
165 | momentum=flags.rmsprop_momentum,
166 | epsilon=flags.opt_epsilon)
167 | elif flags.optimizer == 'sgd':
168 | optimizer = tf.train.GradientDescentOptimizer(learning_rate)
169 | else:
170 | raise ValueError('Optimizer [%s] was not recognized', flags.optimizer)
171 | return optimizer
172 |
173 |
174 | def add_variables_summaries(learning_rate):
175 | summaries = []
176 | for variable in slim.get_model_variables():
177 | summaries.append(tf.summary.histogram(variable.op.name, variable))
178 | summaries.append(tf.summary.scalar('training/Learning Rate', learning_rate))
179 | return summaries
180 |
181 |
182 | def update_model_scope(var, ckpt_scope, new_scope):
183 | return var.op.name.replace(new_scope,'vgg_16')
184 |
185 |
186 | def get_init_fn(flags):
187 | """Returns a function run by the chief worker to warm-start the training.
188 | Note that the init_fn is only run when initializing the model during the very
189 | first global step.
190 |
191 | Returns:
192 | An init function run by the supervisor.
193 | """
194 | if flags.checkpoint_path is None:
195 | return None
196 | # Warn the user if a checkpoint exists in the train_dir. Then ignore.
197 | if tf.train.latest_checkpoint(flags.train_dir):
198 | tf.logging.info(
199 | 'Ignoring --checkpoint_path because a checkpoint already exists in %s'
200 | % flags.train_dir)
201 | return None
202 |
203 | exclusions = []
204 | if flags.checkpoint_exclude_scopes:
205 | exclusions = [scope.strip()
206 | for scope in flags.checkpoint_exclude_scopes.split(',')]
207 |
208 | # TODO(sguada) variables.filter_variables()
209 | variables_to_restore = []
210 | for var in slim.get_model_variables():
211 | excluded = False
212 | for exclusion in exclusions:
213 | if var.op.name.startswith(exclusion):
214 | excluded = True
215 | break
216 | if not excluded:
217 | variables_to_restore.append(var)
218 | # Change model scope if necessary.
219 | if flags.checkpoint_model_scope is not None:
220 | variables_to_restore = \
221 | {var.op.name.replace(flags.model_name,
222 | flags.checkpoint_model_scope): var
223 | for var in variables_to_restore}
224 |
225 |
226 | if tf.gfile.IsDirectory(flags.checkpoint_path):
227 | checkpoint_path = tf.train.latest_checkpoint(flags.checkpoint_path)
228 | else:
229 | checkpoint_path = flags.checkpoint_path
230 | tf.logging.info('Fine-tuning from %s. Ignoring missing vars: %s' % (checkpoint_path, flags.ignore_missing_vars))
231 |
232 | return slim.assign_from_checkpoint_fn(
233 | checkpoint_path,
234 | variables_to_restore,
235 | ignore_missing_vars=flags.ignore_missing_vars)
236 |
237 |
238 | def get_variables_to_train(flags):
239 | """Returns a list of variables to train.
240 |
241 | Returns:
242 | A list of variables to train by the optimizer.
243 | """
244 | if flags.trainable_scopes is None:
245 | return tf.trainable_variables()
246 | else:
247 | scopes = [scope.strip() for scope in flags.trainable_scopes.split(',')]
248 |
249 | variables_to_train = []
250 | for scope in scopes:
251 | variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
252 | variables_to_train.extend(variables)
253 | return variables_to_train
254 |
255 |
256 | # =========================================================================== #
257 | # Evaluation utils.
258 | # =========================================================================== #
259 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.ops import control_flow_ops
3 |
4 | from datasets import dataset_factory
5 | from deployment import model_deploy
6 | from nets import nets_factory
7 | from preprocessing import preprocessing_factory
8 | import tf_utils
9 | import os
10 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
11 | slim = tf.contrib.slim
12 |
13 | DATA_FORMAT = 'NCHW'
14 |
15 | # =========================================================================== #
16 | # SSD Network flags.
17 | # =========================================================================== #
18 | tf.app.flags.DEFINE_float(
19 | 'loss_alpha', 1., 'Alpha parameter in the loss function.')
20 | tf.app.flags.DEFINE_float(
21 | 'negative_ratio', 3., 'Negative ratio in the loss function.')
22 | tf.app.flags.DEFINE_float(
23 | 'match_threshold', 0.5, 'Matching threshold in the loss function.')
24 |
25 | # =========================================================================== #
26 | # General Flags.
27 | # =========================================================================== #
28 | tf.app.flags.DEFINE_string(
29 | 'train_dir', './logs/',
30 | 'Directory where checkpoints and event logs are written to.')
31 | tf.app.flags.DEFINE_integer('num_clones', 1,
32 | 'Number of model clones to deploy.')
33 | tf.app.flags.DEFINE_boolean('clone_on_cpu', False,
34 | 'Use CPUs to deploy clones.')
35 | tf.app.flags.DEFINE_integer(
36 | 'num_readers', 4,
37 | 'The number of parallel readers that read data from the dataset.')
38 | tf.app.flags.DEFINE_integer(
39 | 'num_preprocessing_threads', 4,
40 | 'The number of threads used to create the batches.')
41 |
42 | tf.app.flags.DEFINE_integer(
43 | 'log_every_n_steps', 10,
44 | 'The frequency with which logs are print.')
45 | tf.app.flags.DEFINE_integer(
46 | 'save_summaries_secs', 60,
47 | 'The frequency with which summaries are saved, in seconds.')
48 | tf.app.flags.DEFINE_integer(
49 | 'save_interval_secs', 600,
50 | 'The frequency with which the model is saved, in seconds.')
51 | tf.app.flags.DEFINE_float(
52 | 'gpu_memory_fraction', 0.8, 'GPU memory fraction to use.')
53 |
54 | # =========================================================================== #
55 | # Optimization Flags.
56 | # =========================================================================== #
57 | tf.app.flags.DEFINE_float(
58 | 'weight_decay', 0.0005, 'The weight decay on the model weights.')
59 | tf.app.flags.DEFINE_string(
60 | 'optimizer', 'sgd',
61 | 'The name of the optimizer, one of "adadelta", "adagrad", "adam",'
62 | '"ftrl", "momentum", "sgd" or "rmsprop".')
63 | tf.app.flags.DEFINE_float(
64 | 'adadelta_rho', 0.95,
65 | 'The decay rate for adadelta.')
66 | tf.app.flags.DEFINE_float(
67 | 'adagrad_initial_accumulator_value', 0.1,
68 | 'Starting value for the AdaGrad accumulators.')
69 | tf.app.flags.DEFINE_float(
70 | 'adam_beta1', 0.9,
71 | 'The exponential decay rate for the 1st moment estimates.')
72 | tf.app.flags.DEFINE_float(
73 | 'adam_beta2', 0.999,
74 | 'The exponential decay rate for the 2nd moment estimates.')
75 | tf.app.flags.DEFINE_float('opt_epsilon', 1.0, 'Epsilon term for the optimizer.')
76 | tf.app.flags.DEFINE_float('ftrl_learning_rate_power', -0.5,
77 | 'The learning rate power.')
78 | tf.app.flags.DEFINE_float(
79 | 'ftrl_initial_accumulator_value', 0.1,
80 | 'Starting value for the FTRL accumulators.')
81 | tf.app.flags.DEFINE_float(
82 | 'ftrl_l1', 0.0, 'The FTRL l1 regularization strength.')
83 | tf.app.flags.DEFINE_float(
84 | 'ftrl_l2', 0.0, 'The FTRL l2 regularization strength.')
85 | tf.app.flags.DEFINE_float(
86 | 'momentum', 0.9,
87 | 'The momentum for the MomentumOptimizer and RMSPropOptimizer.')
88 | tf.app.flags.DEFINE_float('rmsprop_momentum', 0.9, 'Momentum.')
89 | tf.app.flags.DEFINE_float('rmsprop_decay', 0.9, 'Decay term for RMSProp.')
90 |
91 | # =========================================================================== #
92 | # Learning Rate Flags.
93 | # =========================================================================== #
94 | tf.app.flags.DEFINE_string(
95 | 'learning_rate_decay_type',
96 | 'fixed',
97 | 'Specifies how the learning rate is decayed. One of "fixed", "exponential",'
98 | ' or "polynomial"')
99 | tf.app.flags.DEFINE_float('learning_rate', 0.001, 'Initial learning rate.')
100 | tf.app.flags.DEFINE_float(
101 | 'end_learning_rate', 0.00001,
102 | 'The minimal end learning rate used by a polynomial decay learning rate.')
103 | tf.app.flags.DEFINE_float(
104 | 'label_smoothing', 0.0, 'The amount of label smoothing.')
105 | tf.app.flags.DEFINE_float(
106 | 'learning_rate_decay_factor', 0.94, 'Learning rate decay factor.')
107 | tf.app.flags.DEFINE_float(
108 | 'num_epochs_per_decay', 2.0,
109 | 'Number of epochs after which learning rate decays.')
110 | tf.app.flags.DEFINE_float(
111 | 'moving_average_decay', None,
112 | 'The decay to use for the moving average.'
113 | 'If left as None, then moving averages are not used.')
114 |
115 | # =========================================================================== #
116 | # Dataset Flags.
117 | # =========================================================================== #
118 | tf.app.flags.DEFINE_string(
119 | 'dataset_name', 'pascalvoc_2007', 'The name of the dataset to load.')
120 | tf.app.flags.DEFINE_integer(
121 | 'num_classes', 21, 'Number of classes to use in the dataset.')
122 | tf.app.flags.DEFINE_string(
123 | 'dataset_split_name', 'train', 'The name of the train/test split.')
124 | tf.app.flags.DEFINE_string(
125 | 'dataset_dir', './tfrecords/', 'The directory where the dataset files are stored.')
126 | tf.app.flags.DEFINE_integer(
127 | 'labels_offset', 0,
128 | 'An offset for the labels in the dataset. This flag is primarily used to '
129 | 'evaluate the VGG and ResNet architectures which do not use a background '
130 | 'class for the ImageNet dataset.')
131 | tf.app.flags.DEFINE_string(
132 | 'model_name', 'ssd_300_vgg', 'The name of the architecture to train.')
133 | tf.app.flags.DEFINE_string(
134 | 'preprocessing_name', None, 'The name of the preprocessing to use. If left '
135 | 'as `None`, then the model_name flag is used.')
136 | tf.app.flags.DEFINE_integer(
137 | 'batch_size', 32, 'The number of samples in each batch.')
138 | tf.app.flags.DEFINE_integer(
139 | 'train_image_size', None, 'Train image size')
140 | tf.app.flags.DEFINE_integer('max_number_of_steps', None,
141 | 'The maximum number of training steps.')
142 |
143 | # =========================================================================== #
144 | # Fine-Tuning Flags.
145 | # =========================================================================== #
146 | tf.app.flags.DEFINE_string(
147 | 'checkpoint_path', './checkpoints/vgg_16.ckpt',
148 | 'The path to a checkpoint from which to fine-tune.')
149 | tf.app.flags.DEFINE_string(
150 | 'checkpoint_model_scope', 'vgg_16',
151 | 'Model scope in the checkpoint. None if the same as the trained model.')
152 | tf.app.flags.DEFINE_string(
153 | 'checkpoint_exclude_scopes', 'ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box',
154 | 'Comma-separated list of scopes of variables to exclude when restoring '
155 | 'from a checkpoint.')
156 | tf.app.flags.DEFINE_string(
157 | 'trainable_scopes', 'ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box',
158 | 'Comma-separated list of scopes to filter the set of variables to train.'
159 | 'By default, None would train all the variables.')
160 | tf.app.flags.DEFINE_boolean(
161 | 'ignore_missing_vars', False,
162 | 'When restoring a checkpoint would ignore missing variables.')
163 |
164 | FLAGS = tf.app.flags.FLAGS
165 |
166 |
167 | # =========================================================================== #
168 | # Main training routine.
169 | # =========================================================================== #
170 | def main(_):
171 | if not FLAGS.dataset_dir:
172 | raise ValueError('You must supply the dataset directory with --dataset_dir')
173 |
174 | tf.logging.set_verbosity(tf.logging.DEBUG)
175 | with tf.Graph().as_default():
176 | # Config model_deploy. Keep TF Slim Models structure.
177 | # Useful if want to need multiple GPUs and/or servers in the future.
178 | deploy_config = model_deploy.DeploymentConfig(
179 | num_clones=FLAGS.num_clones,
180 | clone_on_cpu=FLAGS.clone_on_cpu,
181 | replica_id=0,
182 | num_replicas=1,
183 | num_ps_tasks=0)
184 | # Create global_step.
185 | with tf.device(deploy_config.variables_device()):
186 | global_step = slim.create_global_step()
187 |
188 | # Select the dataset.
189 | dataset = dataset_factory.get_dataset(
190 | FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)
191 |
192 | # Get the SSD network and its anchors.
193 | ssd_class = nets_factory.get_network(FLAGS.model_name)
194 | ssd_params = ssd_class.default_params._replace(num_classes=FLAGS.num_classes)
195 | ssd_net = ssd_class(ssd_params)
196 | ssd_shape = ssd_net.params.img_shape
197 | ssd_anchors = ssd_net.anchors(ssd_shape)
198 |
199 | # Select the preprocessing function.
200 | preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
201 | image_preprocessing_fn = preprocessing_factory.get_preprocessing(
202 | preprocessing_name, is_training=True)
203 |
204 | tf_utils.print_configuration(FLAGS.__flags, ssd_params,
205 | dataset.data_sources, FLAGS.train_dir)
206 | # =================================================================== #
207 | # Create a dataset provider and batches.
208 | # =================================================================== #
209 | with tf.device(deploy_config.inputs_device()):
210 | with tf.name_scope(FLAGS.dataset_name + '_data_provider'):
211 | provider = slim.dataset_data_provider.DatasetDataProvider(
212 | dataset,
213 | num_readers=FLAGS.num_readers,
214 | common_queue_capacity=20 * FLAGS.batch_size,
215 | common_queue_min=10 * FLAGS.batch_size,
216 | shuffle=True)
217 | # Get for SSD network: image, labels, bboxes.
218 | [image, shape, glabels, gbboxes] = provider.get(['image', 'shape',
219 | 'object/label',
220 | 'object/bbox'])
221 | # Pre-processing image, labels and bboxes.
222 | image, glabels, gbboxes = \
223 | image_preprocessing_fn(image, glabels, gbboxes,
224 | out_shape=ssd_shape,
225 | data_format=DATA_FORMAT)
226 | # Encode groundtruth labels and bboxes.
227 | gclasses, glocalisations, gscores = \
228 | ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors)
229 | batch_shape = [1] + [len(ssd_anchors)] * 3
230 |
231 | # Training batches and queue.
232 | r = tf.train.batch(
233 | tf_utils.reshape_list([image, gclasses, glocalisations, gscores]),
234 | batch_size=FLAGS.batch_size,
235 | num_threads=FLAGS.num_preprocessing_threads,
236 | capacity=5 * FLAGS.batch_size)
237 | b_image, b_gclasses, b_glocalisations, b_gscores = \
238 | tf_utils.reshape_list(r, batch_shape)
239 |
240 | # Intermediate queueing: unique batch computation pipeline for all
241 | # GPUs running the training.
242 | batch_queue = slim.prefetch_queue.prefetch_queue(
243 | tf_utils.reshape_list([b_image, b_gclasses, b_glocalisations, b_gscores]),
244 | capacity=2 * deploy_config.num_clones)
245 |
246 | # =================================================================== #
247 | # Define the model running on every GPU.
248 | # =================================================================== #
249 | def clone_fn(batch_queue):
250 | """Allows data parallelism by creating multiple
251 | clones of network_fn."""
252 | # Dequeue batch.
253 | b_image, b_gclasses, b_glocalisations, b_gscores = \
254 | tf_utils.reshape_list(batch_queue.dequeue(), batch_shape)
255 |
256 | # Construct SSD network.
257 | arg_scope = ssd_net.arg_scope(weight_decay=FLAGS.weight_decay,
258 | data_format=DATA_FORMAT)
259 | with slim.arg_scope(arg_scope):
260 | predictions, localisations, logits, end_points = \
261 | ssd_net.net(b_image, is_training=True)
262 | # Add loss function.
263 | ssd_net.losses(logits, localisations,
264 | b_gclasses, b_glocalisations, b_gscores,
265 | match_threshold=FLAGS.match_threshold,
266 | negative_ratio=FLAGS.negative_ratio,
267 | alpha=FLAGS.loss_alpha,
268 | label_smoothing=FLAGS.label_smoothing)
269 | return end_points
270 |
271 | # Gather initial summaries.
272 | summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
273 |
274 | # =================================================================== #
275 | # Add summaries from first clone.
276 | # =================================================================== #
277 | clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
278 | first_clone_scope = deploy_config.clone_scope(0)
279 | # Gather update_ops from the first clone. These contain, for example,
280 | # the updates for the batch_norm variables created by network_fn.
281 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)
282 |
283 | # Add summaries for end_points.
284 | end_points = clones[0].outputs
285 | for end_point in end_points:
286 | x = end_points[end_point]
287 | summaries.add(tf.summary.histogram('activations/' + end_point, x))
288 | summaries.add(tf.summary.scalar('sparsity/' + end_point,
289 | tf.nn.zero_fraction(x)))
290 | # Add summaries for losses and extra losses.
291 | for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
292 | summaries.add(tf.summary.scalar(loss.op.name, loss))
293 | for loss in tf.get_collection('EXTRA_LOSSES', first_clone_scope):
294 | summaries.add(tf.summary.scalar(loss.op.name, loss))
295 |
296 | # Add summaries for variables.
297 | for variable in slim.get_model_variables():
298 | summaries.add(tf.summary.histogram(variable.op.name, variable))
299 |
300 | # =================================================================== #
301 | # Configure the moving averages.
302 | # =================================================================== #
303 | if FLAGS.moving_average_decay:
304 | moving_average_variables = slim.get_model_variables()
305 | variable_averages = tf.train.ExponentialMovingAverage(
306 | FLAGS.moving_average_decay, global_step)
307 | else:
308 | moving_average_variables, variable_averages = None, None
309 |
310 | # =================================================================== #
311 | # Configure the optimization procedure.
312 | # =================================================================== #
313 | with tf.device(deploy_config.optimizer_device()):
314 | learning_rate = tf_utils.configure_learning_rate(FLAGS,
315 | dataset.num_samples,
316 | global_step)
317 | optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate)
318 | summaries.add(tf.summary.scalar('learning_rate', learning_rate))
319 |
320 | if FLAGS.moving_average_decay:
321 | # Update ops executed locally by trainer.
322 | update_ops.append(variable_averages.apply(moving_average_variables))
323 |
324 | # Variables to train.
325 | variables_to_train = tf_utils.get_variables_to_train(FLAGS)
326 |
327 | # and returns a train_tensor and summary_op
328 | total_loss, clones_gradients = model_deploy.optimize_clones(
329 | clones,
330 | optimizer,
331 | var_list=variables_to_train)
332 | # Add total_loss to summary.
333 | summaries.add(tf.summary.scalar('total_loss', total_loss))
334 |
335 | # Create gradient updates.
336 | grad_updates = optimizer.apply_gradients(clones_gradients,
337 | global_step=global_step)
338 | update_ops.append(grad_updates)
339 | update_op = tf.group(*update_ops)
340 | train_tensor = control_flow_ops.with_dependencies([update_op], total_loss,
341 | name='train_op')
342 |
343 | # Add the summaries from the first clone. These contain the summaries
344 | summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
345 | first_clone_scope))
346 | # Merge all summaries together.
347 | summary_op = tf.summary.merge(list(summaries), name='summary_op')
348 |
349 | # =================================================================== #
350 | # Kicks off the training.
351 | # =================================================================== #
352 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
353 | config = tf.ConfigProto(log_device_placement=False,
354 | gpu_options=gpu_options)
355 | saver = tf.train.Saver(max_to_keep=5,
356 | keep_checkpoint_every_n_hours=1.0,
357 | write_version=2,
358 | pad_step_number=False)
359 | slim.learning.train(
360 | train_tensor,
361 | logdir=FLAGS.train_dir,
362 | master='',
363 | is_chief=True,
364 | init_fn=tf_utils.get_init_fn(FLAGS),
365 | summary_op=summary_op,
366 | number_of_steps=FLAGS.max_number_of_steps,
367 | log_every_n_steps=FLAGS.log_every_n_steps,
368 | save_summaries_secs=FLAGS.save_summaries_secs,
369 | saver=saver,
370 | save_interval_secs=FLAGS.save_interval_secs,
371 | session_config=config,
372 | sync_optimizer=None)
373 |
374 |
375 | if __name__ == '__main__':
376 | tf.app.run()
377 |
--------------------------------------------------------------------------------
/visualization.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Paul Balanca. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | import cv2
16 | import random
17 |
18 | import matplotlib.pyplot as plt
19 | import matplotlib.image as mpimg
20 | import matplotlib.cm as mpcm
21 |
22 |
23 | # =========================================================================== #
24 | # Some colormaps.
25 | # =========================================================================== #
26 | def colors_subselect(colors, num_classes=21):
27 | dt = len(colors) // num_classes
28 | sub_colors = []
29 | for i in range(num_classes):
30 | color = colors[i*dt]
31 | if isinstance(color[0], float):
32 | sub_colors.append([int(c * 255) for c in color])
33 | else:
34 | sub_colors.append([c for c in color])
35 | return sub_colors
36 |
37 | colors_plasma = colors_subselect(mpcm.plasma.colors, num_classes=21)
38 | colors_tableau = [(255, 255, 255), (31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),
39 | (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),
40 | (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),
41 | (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),
42 | (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]
43 |
44 |
45 | # =========================================================================== #
46 | # OpenCV drawing.
47 | # =========================================================================== #
48 | def draw_lines(img, lines, color=[255, 0, 0], thickness=2):
49 | """Draw a collection of lines on an image.
50 | """
51 | for line in lines:
52 | for x1, y1, x2, y2 in line:
53 | cv2.line(img, (x1, y1), (x2, y2), color, thickness)
54 |
55 |
56 | def draw_rectangle(img, p1, p2, color=[255, 0, 0], thickness=2):
57 | cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness)
58 |
59 |
60 | def draw_bbox(img, bbox, shape, label, color=[255, 0, 0], thickness=2):
61 | p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1]))
62 | p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1]))
63 | cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness)
64 | p1 = (p1[0]+15, p1[1])
65 | cv2.putText(img, str(label), p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.5, color, 1)
66 |
67 |
68 | def bboxes_draw_on_img(img, classes, scores, bboxes, colors, thickness=2):
69 | shape = img.shape
70 | for i in range(bboxes.shape[0]):
71 | bbox = bboxes[i]
72 | color = colors[classes[i]]
73 | # Draw bounding box...
74 | p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1]))
75 | p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1]))
76 | cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness)
77 | # Draw text...
78 | s = '%s/%.3f' % (classes[i], scores[i])
79 | p1 = (p1[0]-5, p1[1])
80 | cv2.putText(img, s, p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1)
81 |
82 |
83 | # =========================================================================== #
84 | # Matplotlib show...
85 | # =========================================================================== #
86 | def plt_bboxes(img, classes, scores, bboxes, figsize=(10,10), linewidth=1.5):
87 | """Visualize bounding boxes. Largely inspired by SSD-MXNET!
88 | """
89 | fig = plt.figure(figsize=figsize)
90 | plt.imshow(img)
91 | height = img.shape[0]
92 | width = img.shape[1]
93 | colors = dict()
94 | for i in range(classes.shape[0]):
95 | cls_id = int(classes[i])
96 | if cls_id >= 0:
97 | score = scores[i]
98 | if cls_id not in colors:
99 | colors[cls_id] = (random.random(), random.random(), random.random())
100 | ymin = int(bboxes[i, 0] * height)
101 | xmin = int(bboxes[i, 1] * width)
102 | ymax = int(bboxes[i, 2] * height)
103 | xmax = int(bboxes[i, 3] * width)
104 | rect = plt.Rectangle((xmin, ymin), xmax - xmin,
105 | ymax - ymin, fill=False,
106 | edgecolor=colors[cls_id],
107 | linewidth=linewidth)
108 | plt.gca().add_patch(rect)
109 | class_name = str(cls_id)
110 | plt.gca().text(xmin, ymin - 2,
111 | '{:s} | {:.3f}'.format(class_name, score),
112 | bbox=dict(facecolor=colors[cls_id], alpha=0.5),
113 | fontsize=12, color='white')
114 | plt.show()
115 |
--------------------------------------------------------------------------------