├── keras ├── __init__.py ├── README ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── anchors.cpython-36.pyc │ ├── train_lib.cpython-36.pyc │ ├── postprocess.cpython-36.pyc │ ├── utils_keras.cpython-36.pyc │ └── efficientdet_keras.cpython-36.pyc ├── utils_keras_test.py ├── utils_keras.py ├── eval.py ├── infer.py ├── postprocess_test.py ├── train_lib_test.py ├── efficientdet_keras_test.py ├── anchors.py └── train.py ├── train.cmd ├── checkpoint ├── g3doc ├── flops.png ├── network.png ├── params.png ├── street.jpg ├── coco_ids.yaml └── faq.md ├── testdata ├── img1.jpg └── img1-d1.jpg ├── __pycache__ ├── utils.cpython-36.pyc ├── nms_np.cpython-36.pyc ├── dataloader.cpython-36.pyc ├── inference.cpython-36.pyc ├── iou_utils.cpython-36.pyc ├── coco_metric.cpython-36.pyc ├── det_model_fn.cpython-36.pyc ├── hparams_config.cpython-36.pyc ├── retinanet_arch.cpython-36.pyc └── efficientdet_arch.cpython-36.pyc ├── backbone ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── backbone_factory.cpython-36.pyc │ ├── efficientnet_model.cpython-36.pyc │ ├── efficientnet_builder.cpython-36.pyc │ └── efficientnet_lite_builder.cpython-36.pyc ├── __init__.py ├── efficientnet_lite_builder_test.py ├── backbone_factory.py ├── efficientnet_builder_test.py ├── efficientnet_lite_builder.py └── efficientnet_model_test.py ├── visualize ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── vis_utils.cpython-36.pyc │ ├── shape_utils.cpython-36.pyc │ ├── static_shape.cpython-36.pyc │ └── standard_fields.cpython-36.pyc ├── __init__.py └── static_shape.py ├── object_detection ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── box_list.cpython-36.pyc │ ├── matcher.cpython-36.pyc │ ├── box_coder.cpython-36.pyc │ ├── preprocessor.cpython-36.pyc │ ├── shape_utils.cpython-36.pyc │ ├── argmax_matcher.cpython-36.pyc │ ├── target_assigner.cpython-36.pyc │ ├── tf_example_decoder.cpython-36.pyc │ ├── faster_rcnn_box_coder.cpython-36.pyc │ └── region_similarity_calculator.cpython-36.pyc ├── __init__.py ├── shape_utils.py ├── faster_rcnn_box_coder.py ├── region_similarity_calculator.py ├── box_coder.py ├── box_list.py ├── tf_example_decoder.py ├── argmax_matcher.py └── matcher.py ├── requirements.txt ├── __init__.py ├── aug ├── __init__.py └── autoaugment_test.py ├── dataset ├── __init__.py ├── README.md ├── tfrecord_util.py ├── create_pascal_tfrecord_test.py ├── label_map_util.py └── create_coco_tfrecord_test.py ├── det_model_fn_test.py ├── hparams_config_test.py ├── iou_utils_test.py ├── utils_test.py ├── model_inspect_test.py ├── iou_utils.py └── coco_metric.py /keras/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train.cmd: -------------------------------------------------------------------------------- 1 | python train.py 2 | 3 | pause -------------------------------------------------------------------------------- /keras/README: -------------------------------------------------------------------------------- 1 | WIP for keras layers and models. 2 | -------------------------------------------------------------------------------- /checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "model" 2 | all_model_checkpoint_paths: "model" 3 | -------------------------------------------------------------------------------- /g3doc/flops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/g3doc/flops.png -------------------------------------------------------------------------------- /g3doc/network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/g3doc/network.png -------------------------------------------------------------------------------- /g3doc/params.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/g3doc/params.png -------------------------------------------------------------------------------- /g3doc/street.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/g3doc/street.jpg -------------------------------------------------------------------------------- /testdata/img1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/testdata/img1.jpg -------------------------------------------------------------------------------- /testdata/img1-d1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/testdata/img1-d1.jpg -------------------------------------------------------------------------------- /__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/nms_np.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/__pycache__/nms_np.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/dataloader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/__pycache__/dataloader.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/inference.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/__pycache__/inference.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/iou_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/__pycache__/iou_utils.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/coco_metric.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/__pycache__/coco_metric.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/det_model_fn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/__pycache__/det_model_fn.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/hparams_config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/__pycache__/hparams_config.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/retinanet_arch.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/__pycache__/retinanet_arch.cpython-36.pyc -------------------------------------------------------------------------------- /keras/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/keras/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /keras/__pycache__/anchors.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/keras/__pycache__/anchors.cpython-36.pyc -------------------------------------------------------------------------------- /keras/__pycache__/train_lib.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/keras/__pycache__/train_lib.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/efficientdet_arch.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/__pycache__/efficientdet_arch.cpython-36.pyc -------------------------------------------------------------------------------- /backbone/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/backbone/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /keras/__pycache__/postprocess.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/keras/__pycache__/postprocess.cpython-36.pyc -------------------------------------------------------------------------------- /keras/__pycache__/utils_keras.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/keras/__pycache__/utils_keras.cpython-36.pyc -------------------------------------------------------------------------------- /visualize/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/visualize/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /visualize/__pycache__/vis_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/visualize/__pycache__/vis_utils.cpython-36.pyc -------------------------------------------------------------------------------- /visualize/__pycache__/shape_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/visualize/__pycache__/shape_utils.cpython-36.pyc -------------------------------------------------------------------------------- /visualize/__pycache__/static_shape.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/visualize/__pycache__/static_shape.cpython-36.pyc -------------------------------------------------------------------------------- /backbone/__pycache__/backbone_factory.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/backbone/__pycache__/backbone_factory.cpython-36.pyc -------------------------------------------------------------------------------- /keras/__pycache__/efficientdet_keras.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/keras/__pycache__/efficientdet_keras.cpython-36.pyc -------------------------------------------------------------------------------- /object_detection/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/object_detection/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /object_detection/__pycache__/box_list.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/object_detection/__pycache__/box_list.cpython-36.pyc -------------------------------------------------------------------------------- /object_detection/__pycache__/matcher.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/object_detection/__pycache__/matcher.cpython-36.pyc -------------------------------------------------------------------------------- /visualize/__pycache__/standard_fields.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/visualize/__pycache__/standard_fields.cpython-36.pyc -------------------------------------------------------------------------------- /backbone/__pycache__/efficientnet_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/backbone/__pycache__/efficientnet_model.cpython-36.pyc -------------------------------------------------------------------------------- /object_detection/__pycache__/box_coder.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/object_detection/__pycache__/box_coder.cpython-36.pyc -------------------------------------------------------------------------------- /backbone/__pycache__/efficientnet_builder.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/backbone/__pycache__/efficientnet_builder.cpython-36.pyc -------------------------------------------------------------------------------- /object_detection/__pycache__/preprocessor.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/object_detection/__pycache__/preprocessor.cpython-36.pyc -------------------------------------------------------------------------------- /object_detection/__pycache__/shape_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/object_detection/__pycache__/shape_utils.cpython-36.pyc -------------------------------------------------------------------------------- /object_detection/__pycache__/argmax_matcher.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/object_detection/__pycache__/argmax_matcher.cpython-36.pyc -------------------------------------------------------------------------------- /object_detection/__pycache__/target_assigner.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/object_detection/__pycache__/target_assigner.cpython-36.pyc -------------------------------------------------------------------------------- /backbone/__pycache__/efficientnet_lite_builder.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/backbone/__pycache__/efficientnet_lite_builder.cpython-36.pyc -------------------------------------------------------------------------------- /object_detection/__pycache__/tf_example_decoder.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/object_detection/__pycache__/tf_example_decoder.cpython-36.pyc -------------------------------------------------------------------------------- /object_detection/__pycache__/faster_rcnn_box_coder.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/object_detection/__pycache__/faster_rcnn_box_coder.cpython-36.pyc -------------------------------------------------------------------------------- /object_detection/__pycache__/region_similarity_calculator.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ravi02512/efficientdet-keras/HEAD/object_detection/__pycache__/region_similarity_calculator.cpython-36.pyc -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py>=0.7.1 2 | matplotlib>=3.0.3 3 | numpy>=1.16.4 4 | Pillow>=6.0.0 5 | PyYAML>=5.1 6 | six>=1.12.0 7 | tensorflow>=1.15.0 8 | tensorflow-addons>=0.9.1 9 | tensorflow-probability>=0.9.0 10 | git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI 11 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /aug/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /visualize/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | # Visualization library is mostly based on TensorFlow object detection API: 16 | # https://github.com/tensorflow/models/tree/master/research/object_detection 17 | -------------------------------------------------------------------------------- /object_detection/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | # Object detection data loaders and libraries are mostly based on RetinaNet: 16 | # https://github.com/tensorflow/tpu/tree/master/models/official/retinanet 17 | -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | # This library is mostly based on tensorflow object detection API 16 | # https://github.com/tensorflow/models/blob/master/research/object_detection/dataset_tools/create_coco_tf_record.py 17 | -------------------------------------------------------------------------------- /dataset/README.md: -------------------------------------------------------------------------------- 1 | This folder provides tools for converting raw coco/pascal data to tfrecord. 2 | 3 | ### 1. Convert COCO validation set to tfrecord: 4 | 5 | # Download coco data. 6 | !wget http://images.cocodataset.org/zips/val2017.zip 7 | !wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip 8 | !unzip val2017.zip 9 | !unzip annotations_trainval2017.zip 10 | 11 | # convert coco data to tfrecord. 12 | !mkdir tfrecord 13 | !PYTHONPATH=".:$PYTHONPATH" python dataset/create_coco_tfrecord.py \ 14 | --image_dir=val2017 \ 15 | --caption_annotations_file=annotations/captions_val2017.json \ 16 | --output_file_prefix=tfrecord/val \ 17 | --num_shards=32 18 | 19 | ### 2. Convert Pascal VOC 2012 to tfrecord: 20 | 21 | # Download and convert pascal data. 22 | !wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 23 | !tar xf VOCtrainval_11-May-2012.tar 24 | !mkdir tfrecord 25 | !PYTHONPATH=".:$PYTHONPATH" python dataset/create_pascal_tfrecord.py \ 26 | --data_dir=VOCdevkit --year=VOC2012 --output_path=tfrecord/pascal 27 | 28 | Attention: soure_id (or image_id) needs to be an integer due to the official COCO library requreiments. 29 | -------------------------------------------------------------------------------- /g3doc/coco_ids.yaml: -------------------------------------------------------------------------------- 1 | 0: background 2 | 1: person 3 | 2: bicycle 4 | 3: car 5 | 4: motorcycle 6 | 5: airplane 7 | 6: bus 8 | 7: train 9 | 8: truck 10 | 9: boat 11 | 10: traffic light 12 | 11: fire hydrant 13 | 13: stop sign 14 | 14: parking meter 15 | 15: bench 16 | 16: bird 17 | 17: cat 18 | 18: dog 19 | 19: horse 20 | 20: sheep 21 | 21: cow 22 | 22: elephant 23 | 23: bear 24 | 24: zebra 25 | 25: giraffe 26 | 27: backpack 27 | 28: umbrella 28 | 31: handbag 29 | 32: tie 30 | 33: suitcase 31 | 34: frisbee 32 | 35: skis 33 | 36: snowboard 34 | 37: sports ball 35 | 38: kite 36 | 39: baseball bat 37 | 40: baseball glove 38 | 41: skateboard 39 | 42: surfboard 40 | 43: tennis racket 41 | 44: bottle 42 | 46: wine glass 43 | 47: cup 44 | 48: fork 45 | 49: knife 46 | 50: spoon 47 | 51: bowl 48 | 52: banana 49 | 53: apple 50 | 54: sandwich 51 | 55: orange 52 | 56: broccoli 53 | 57: carrot 54 | 58: hot dog 55 | 59: pizza 56 | 60: donut 57 | 61: cake 58 | 62: chair 59 | 63: couch 60 | 64: potted plant 61 | 65: bed 62 | 67: dining table 63 | 70: toilet 64 | 72: tv 65 | 73: laptop 66 | 74: mouse 67 | 75: remote 68 | 76: keyboard 69 | 77: cell phone 70 | 78: microwave 71 | 79: oven 72 | 80: toaster 73 | 81: sink 74 | 82: refrigerator 75 | 84: book 76 | 85: clock 77 | 86: vase 78 | 87: scissors 79 | 88: teddy bear 80 | 89: hair drier 81 | 90: toothbrush 82 | -------------------------------------------------------------------------------- /aug/autoaugment_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for Autoaugment.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from absl import logging 22 | import tensorflow.compat.v1 as tf 23 | 24 | from aug import autoaugment 25 | 26 | 27 | class AutoaugmentTest(tf.test.TestCase): 28 | 29 | def test_autoaugment_policy(self): 30 | # A very simple test to verify no syntax error. 31 | image = tf.placeholder(tf.uint8, shape=[640, 640, 3]) 32 | bboxes = tf.placeholder(tf.float32, shape=[4, 4]) 33 | autoaugment.distort_image_with_autoaugment(image, bboxes, 'test') 34 | autoaugment.distort_image_with_autoaugment( 35 | image, bboxes, 'test', use_augmix=True) 36 | 37 | 38 | if __name__ == '__main__': 39 | logging.set_verbosity(logging.WARNING) 40 | tf.disable_eager_execution() 41 | tf.test.main() 42 | 43 | -------------------------------------------------------------------------------- /keras/utils_keras_test.py: -------------------------------------------------------------------------------- 1 | # Lint as: python3 2 | # Copyright 2020 Google Research. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | from absl import logging 17 | from absl.testing import parameterized 18 | import tensorflow as tf 19 | 20 | import utils 21 | from keras import utils_keras 22 | 23 | 24 | class KerasUtilTest(tf.test.TestCase, parameterized.TestCase): 25 | 26 | @parameterized.named_parameters( 27 | ('train_local', True, ''), ('eval_local', False, ''), 28 | ('train_tpu', True, 'tpu'), ('eval_tpu', False, 'tpu')) 29 | def test_batch_norm(self, is_training, strategy): 30 | inputs = tf.random.uniform([8, 40, 40, 3]) 31 | expect_results = utils.batch_norm_act(inputs, is_training, None) 32 | 33 | # Call batch norm layer with is_training parameter. 34 | bn_layer = utils_keras.build_batch_norm(is_training, strategy=strategy) 35 | self.assertAllClose(expect_results, bn_layer(inputs, is_training)) 36 | 37 | 38 | if __name__ == '__main__': 39 | logging.set_verbosity(logging.WARNING) 40 | tf.test.main() 41 | -------------------------------------------------------------------------------- /g3doc/faq.md: -------------------------------------------------------------------------------- 1 | # EfficientDet FQA 2 | 3 | 7 | 8 | [TOC] 9 | 10 | ## 1. For Users 11 | 12 | ### 1.1 How can I convert the saved model to tflite? 13 | 14 | Unfortunately, there is no way to do that with the current public tensorflow 15 | release due to some issues in tf converter. We have some internal fixes, which 16 | could potentially be available with the next TensorFlow release. 17 | 18 | ### 1.2 Why I see NaN during my training and how to debug it? 19 | 20 | Because we use batch norm, which needs reasonable batch size. If your batch size 21 | is too small, it may causes NaN. (We may add group norm to deal with this in 22 | futurre) 23 | 24 | If you see NaN, you can check the followings: 25 | 26 | - Is my batch size too small? It usually needs to be >=8. 27 | - Should I clip my gradient? How about h.clip_gradients_norm=5.0? 28 | - Should I use smaller jitter? How about train_scale_min=0.8 and train_scale_max=1.2? 29 | 30 | If you want to debug it, you can use these tools: 31 | 32 | ``` 33 | tf.compat.v1.add_check_numerics_ops() # for Tensorflow 1.x 34 | tf.debugging.disable_check_numerics() # for TensorFlow 2.x 35 | ``` 36 | 37 | ### 1.3 Why my last class eval AP is always zero? 38 | 39 | The current code assume class 0 is always reserved for background, so you if you K classes, then you should set num_classes=K+1. 40 | 41 | See #391 and #398 for more discussion. 42 | 43 | ### 1.4 Why my input pipeline has assert failure? 44 | 45 | This is most likely that your dataset has some images with many objects (more 46 | than the 100 limit for COCO), you should set --hparams="max_instances_per_image=200" or larger. 47 | 48 | See https://github.com/google/automl/issues/93 for more discussion. 49 | 50 | 51 | ## 2. For Developers 52 | 53 | ### 2.1 How can I format my code for PRs? 54 | 55 | Please use [yapf](https://github.com/google/yapf) with option 56 | --style='{based_on_style: yapf}'. You can also save the 57 | following file to ~/.config/yapf/style: 58 | 59 | [style] 60 | based_on_style = yapf 61 | 62 | If you want to check the format with lint, please run: 63 | 64 | !pylint --rcfile=../.pylintrc your_file.py 65 | 66 | ### 2.2 How can I run all tests? 67 | 68 | !export PYTHONPATH="`pwd`:$PYTHONPATH" 69 | !find . -name "*_test.py" | parallel python &> /tmp/test.log \ 70 | && echo "All passed" || echo "Failed! Search keyword FAILED in /tmp/test.log" 71 | -------------------------------------------------------------------------------- /object_detection/shape_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Utils used to manipulate tensor shapes.""" 16 | 17 | import tensorflow.compat.v1 as tf 18 | 19 | 20 | def assert_shape_equal(shape_a, shape_b): 21 | """Asserts that shape_a and shape_b are equal. 22 | 23 | If the shapes are static, raises a ValueError when the shapes 24 | mismatch. 25 | 26 | If the shapes are dynamic, raises a tf InvalidArgumentError when the shapes 27 | mismatch. 28 | 29 | Args: 30 | shape_a: a list containing shape of the first tensor. 31 | shape_b: a list containing shape of the second tensor. 32 | 33 | Returns: 34 | Either a tf.no_op() when shapes are all static and a tf.assert_equal() op 35 | when the shapes are dynamic. 36 | 37 | Raises: 38 | ValueError: When shapes are both static and unequal. 39 | """ 40 | if (all(isinstance(dim, int) for dim in shape_a) and 41 | all(isinstance(dim, int) for dim in shape_b)): 42 | if shape_a != shape_b: 43 | raise ValueError('Unequal shapes {}, {}'.format(shape_a, shape_b)) 44 | else: return tf.no_op() 45 | else: 46 | return tf.assert_equal(shape_a, shape_b) 47 | 48 | 49 | def combined_static_and_dynamic_shape(tensor): 50 | """Returns a list containing static and dynamic values for the dimensions. 51 | 52 | Returns a list of static and dynamic values for shape dimensions. This is 53 | useful to preserve static shapes when available in reshape operation. 54 | 55 | Args: 56 | tensor: A tensor of any type. 57 | 58 | Returns: 59 | A list of size tensor.shape.ndims containing integers or a scalar tensor. 60 | """ 61 | static_tensor_shape = tensor.shape.as_list() 62 | dynamic_tensor_shape = tf.shape(tensor) 63 | combined_shape = [] 64 | for index, dim in enumerate(static_tensor_shape): 65 | if dim is not None: 66 | combined_shape.append(dim) 67 | else: 68 | combined_shape.append(dynamic_tensor_shape[index]) 69 | return combined_shape 70 | -------------------------------------------------------------------------------- /visualize/static_shape.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Helper functions to access TensorShape values. 17 | 18 | The rank 4 tensor_shape must be of the form [batch_size, height, width, depth]. 19 | """ 20 | 21 | from __future__ import absolute_import 22 | from __future__ import division 23 | from __future__ import print_function 24 | 25 | 26 | def get_dim_as_int(dim): 27 | """Utility to get v1 or v2 TensorShape dim as an int. 28 | 29 | Args: 30 | dim: The TensorShape dimension to get as an int 31 | 32 | Returns: 33 | None or an int. 34 | """ 35 | try: 36 | return dim.value 37 | except AttributeError: 38 | return dim 39 | 40 | 41 | def get_batch_size(tensor_shape): 42 | """Returns batch size from the tensor shape. 43 | 44 | Args: 45 | tensor_shape: A rank 4 TensorShape. 46 | 47 | Returns: 48 | An integer representing the batch size of the tensor. 49 | """ 50 | tensor_shape.assert_has_rank(rank=4) 51 | return get_dim_as_int(tensor_shape[0]) 52 | 53 | 54 | def get_height(tensor_shape): 55 | """Returns height from the tensor shape. 56 | 57 | Args: 58 | tensor_shape: A rank 4 TensorShape. 59 | 60 | Returns: 61 | An integer representing the height of the tensor. 62 | """ 63 | tensor_shape.assert_has_rank(rank=4) 64 | return get_dim_as_int(tensor_shape[1]) 65 | 66 | 67 | def get_width(tensor_shape): 68 | """Returns width from the tensor shape. 69 | 70 | Args: 71 | tensor_shape: A rank 4 TensorShape. 72 | 73 | Returns: 74 | An integer representing the width of the tensor. 75 | """ 76 | tensor_shape.assert_has_rank(rank=4) 77 | return get_dim_as_int(tensor_shape[2]) 78 | 79 | 80 | def get_depth(tensor_shape): 81 | """Returns depth from the tensor shape. 82 | 83 | Args: 84 | tensor_shape: A rank 4 TensorShape. 85 | 86 | Returns: 87 | An integer representing the depth of the tensor. 88 | """ 89 | tensor_shape.assert_has_rank(rank=4) 90 | return get_dim_as_int(tensor_shape[3]) 91 | -------------------------------------------------------------------------------- /keras/utils_keras.py: -------------------------------------------------------------------------------- 1 | # Lint as: python3 2 | # Copyright 2020 Google Research. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """Common keras utils.""" 17 | # gtype import 18 | 19 | from typing import Text 20 | import tensorflow as tf 21 | 22 | import utils 23 | 24 | 25 | def build_batch_norm(is_training_bn: bool, 26 | beta_initializer: Text = 'zeros', 27 | gamma_initializer: Text = 'ones', 28 | data_format: Text = 'channels_last', 29 | momentum: float = 0.99, 30 | epsilon: float = 1e-3, 31 | strategy: Text = None, 32 | name: Text = 'tpu_batch_normalization'): 33 | """Build a batch normalization layer. 34 | 35 | Args: 36 | is_training_bn: `bool` for whether the model is training. 37 | beta_initializer: `str`, beta initializer. 38 | gamma_initializer: `str`, gamma initializer. 39 | data_format: `str` either "channels_first" for `[batch, channels, height, 40 | width]` or "channels_last for `[batch, height, width, channels]`. 41 | momentum: `float`, momentume of batch norm. 42 | epsilon: `float`, small value for numerical stability. 43 | strategy: `str`, whether to use tpu, horovod or other version of batch norm. 44 | name: the name of the batch normalization layer 45 | 46 | Returns: 47 | A normalized `Tensor` with the same `data_format`. 48 | """ 49 | axis = 1 if data_format == 'channels_first' else -1 50 | if is_training_bn: 51 | if strategy in ('gpus',): 52 | batch_norm_class = tf.keras.layers.experimental.SyncBatchNormalization 53 | else: 54 | # TODO(tanmingxing): compare them on TPU. 55 | batch_norm_class = utils.batch_norm_class(is_training_bn, strategy) 56 | else: 57 | batch_norm_class = tf.keras.layers.BatchNormalization 58 | 59 | bn_layer = batch_norm_class( 60 | axis=axis, 61 | momentum=momentum, 62 | epsilon=epsilon, 63 | center=True, 64 | scale=True, 65 | beta_initializer=beta_initializer, 66 | gamma_initializer=gamma_initializer, 67 | name=name) 68 | 69 | return bn_layer 70 | -------------------------------------------------------------------------------- /det_model_fn_test.py: -------------------------------------------------------------------------------- 1 | # Lint as: python3 2 | # Copyright 2020 Google Research. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """Tests for det_model_fn.""" 17 | import tensorflow as tf 18 | import det_model_fn 19 | 20 | 21 | def legacy_focal_loss(logits, targets, alpha, gamma, normalizer, _=0): 22 | """A legacy focal loss that does not support label smoothing.""" 23 | with tf.name_scope('focal_loss'): 24 | positive_label_mask = tf.equal(targets, 1.0) 25 | cross_entropy = ( 26 | tf.nn.sigmoid_cross_entropy_with_logits(labels=targets, logits=logits)) 27 | 28 | neg_logits = -1.0 * logits 29 | modulator = tf.exp(gamma * targets * neg_logits - 30 | gamma * tf.math.log1p(tf.exp(neg_logits))) 31 | loss = modulator * cross_entropy 32 | weighted_loss = tf.where(positive_label_mask, alpha * loss, 33 | (1.0 - alpha) * loss) 34 | weighted_loss /= normalizer 35 | return weighted_loss 36 | 37 | 38 | class FocalLossTest(tf.test.TestCase): 39 | 40 | def test_focal_loss(self): 41 | tf.random.set_seed(1111) 42 | y_pred = tf.random.uniform([4, 32, 32, 90]) 43 | y_true = tf.ones([4, 32, 32, 90]) 44 | alpha, gamma, n = 0.25, 1.5, 100 45 | legacy_output = legacy_focal_loss(y_pred, y_true, alpha, gamma, n) 46 | new_output = det_model_fn.focal_loss(y_pred, y_true, alpha, gamma, n) 47 | self.assertAllClose(legacy_output, new_output) 48 | 49 | def test_focal_loss_with_label_smoothing(self): 50 | tf.random.set_seed(1111) 51 | shape = [2, 2, 2, 2] 52 | y_pred = tf.random.uniform(shape) 53 | 54 | # A binary classification target [0.0, 1.0] becomes [.1, .9] 55 | # with smoothing .2 56 | y_true = tf.ones(shape) * [0.0, 1.0] 57 | y_true_presmoothed = tf.ones(shape) * [0.1, 0.9] 58 | 59 | alpha, gamma, n = 1, 0, 100 60 | presmoothed = det_model_fn.focal_loss(y_pred, y_true_presmoothed, alpha, 61 | gamma, n, 0) 62 | alpha, gamma, n = 0.9, 0, 100 63 | unsmoothed = det_model_fn.focal_loss(y_pred, y_true, alpha, gamma, n, 0.2) 64 | 65 | self.assertAllClose(presmoothed, unsmoothed) 66 | 67 | 68 | if __name__ == '__main__': 69 | tf.test.main() 70 | -------------------------------------------------------------------------------- /backbone/efficientnet_lite_builder_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for efficientnet_lite_builder.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from absl import logging 22 | import numpy as np 23 | import tensorflow.compat.v1 as tf 24 | 25 | from backbone import efficientnet_lite_builder 26 | 27 | 28 | class EfficientnetBuilderTest(tf.test.TestCase): 29 | 30 | def _test_model_params(self, 31 | model_name, 32 | input_size, 33 | expected_params, 34 | override_params=None, 35 | features_only=False, 36 | pooled_features_only=False): 37 | images = tf.zeros((1, input_size, input_size, 3), dtype=tf.float32) 38 | efficientnet_lite_builder.build_model( 39 | images, 40 | model_name=model_name, 41 | override_params=override_params, 42 | training=False, 43 | features_only=features_only, 44 | pooled_features_only=pooled_features_only) 45 | num_params = np.sum([np.prod(v.shape) for v in tf.trainable_variables()]) 46 | 47 | self.assertEqual(num_params, expected_params) 48 | 49 | def test_efficientnet_b0(self): 50 | self._test_model_params( 51 | 'efficientnet-lite0', 224, expected_params=4652008) 52 | 53 | def test_efficientnet_b1(self): 54 | self._test_model_params( 55 | 'efficientnet-lite1', 240, expected_params=5416680) 56 | 57 | def test_efficientnet_b2(self): 58 | self._test_model_params( 59 | 'efficientnet-lite2', 260, expected_params=6092072) 60 | 61 | def test_efficientnet_b3(self): 62 | self._test_model_params( 63 | 'efficientnet-lite3', 280, expected_params=8197096) 64 | 65 | def test_efficientnet_b4(self): 66 | self._test_model_params( 67 | 'efficientnet-lite4', 300, expected_params=13006568) 68 | 69 | 70 | if __name__ == '__main__': 71 | logging.set_verbosity(logging.WARNING) 72 | # Disable eager to allow tf.profile works for #params/#flops. 73 | tf.disable_eager_execution() 74 | tf.test.main() 75 | -------------------------------------------------------------------------------- /hparams_config_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ====================================== 15 | """Tests for hparams_config.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import os 22 | import tempfile 23 | 24 | from absl import logging 25 | import tensorflow.compat.v1 as tf 26 | import yaml 27 | 28 | import hparams_config 29 | 30 | 31 | class HparamsConfigTest(tf.test.TestCase): 32 | 33 | def test_config_override(self): 34 | c = hparams_config.Config({'a': 1, 'b': 2}) 35 | self.assertEqual(c.as_dict(), {'a': 1, 'b': 2}) 36 | 37 | c.update({'a': 10}) 38 | self.assertEqual(c.as_dict(), {'a': 10, 'b': 2}) 39 | 40 | c.b = 20 41 | self.assertEqual(c.as_dict(), {'a': 10, 'b': 20}) 42 | 43 | c.override('a=true,b=ss') 44 | self.assertEqual(c.as_dict(), {'a': True, 'b': 'ss'}) 45 | 46 | c.override('a=100,,,b=2.3,') # extra ',' is fine. 47 | self.assertEqual(c.as_dict(), {'a': 100, 'b': 2.3}) 48 | 49 | c.override('a=2x3,b=50') # a is a special format for image size. 50 | self.assertEqual(c.as_dict(), {'a': '2x3', 'b': 50}) 51 | 52 | # overrride string must be in the format of xx=yy. 53 | with self.assertRaises(ValueError): 54 | c.override('a=true,invalid_string') 55 | 56 | def test_config_yaml(self): 57 | tmpdir = tempfile.gettempdir() 58 | yaml_file_path = os.path.join(tmpdir, 'x.yaml') 59 | with open(yaml_file_path, 'w') as f: 60 | f.write(""" 61 | x: 2 62 | y: 63 | z: 'test' 64 | """) 65 | c = hparams_config.Config(dict(x=234, y=2342)) 66 | c.override(yaml_file_path) 67 | self.assertEqual(c.as_dict(), {'x': 2, 'y': {'z': 'test'}}) 68 | 69 | yaml_file_path2 = os.path.join(tmpdir, 'y.yaml') 70 | c.save_to_yaml(yaml_file_path2) 71 | with open(yaml_file_path2, 'r') as f: 72 | config_dict = yaml.load(f, Loader=yaml.FullLoader) 73 | self.assertEqual(config_dict, {'x': 2, 'y': {'z': 'test'}}) 74 | 75 | def test_config_override_recursive(self): 76 | c = hparams_config.Config({'x': 1}) 77 | self.assertEqual(c.as_dict(), {'x': 1}) 78 | c.override('y.y0=2,y.y1=3', allow_new_keys=True) 79 | self.assertEqual(c.as_dict(), {'x': 1, 'y': {'y0': 2, 'y1': 3}}) 80 | c.update({'y': {'y0': 5, 'y1': {'y11': 100}}}) 81 | self.assertEqual(c.as_dict(), {'x': 1, 'y': {'y0': 5, 'y1': {'y11': 100}}}) 82 | self.assertEqual(c.y.y1.y11, 100) 83 | 84 | 85 | if __name__ == '__main__': 86 | logging.set_verbosity(logging.WARNING) 87 | tf.test.main() 88 | -------------------------------------------------------------------------------- /backbone/backbone_factory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Backbone network factory.""" 16 | import os 17 | from absl import logging 18 | import tensorflow as tf 19 | 20 | from backbone import efficientnet_builder 21 | from backbone import efficientnet_lite_builder 22 | from backbone import efficientnet_model 23 | 24 | 25 | def get_model_builder(model_name): 26 | """Get the model_builder module for a given model name.""" 27 | if model_name.startswith('efficientnet-lite'): 28 | return efficientnet_lite_builder 29 | elif model_name.startswith('efficientnet-b'): 30 | return efficientnet_builder 31 | else: 32 | raise ValueError('Unknown model name {}'.format(model_name)) 33 | 34 | 35 | def get_model(model_name, override_params=None, model_dir=None): 36 | """A helper function to create and return model. 37 | 38 | Args: 39 | model_name: string, the predefined model name. 40 | override_params: A dictionary of params for overriding. Fields must exist in 41 | efficientnet_model.GlobalParams. 42 | model_dir: string, optional model dir for saving configs. 43 | 44 | Returns: 45 | created model 46 | 47 | Raises: 48 | When model_name specified an undefined model, raises NotImplementedError. 49 | When override_params has invalid fields, raises ValueError. 50 | """ 51 | 52 | # For backward compatibility. 53 | if override_params and override_params.get('drop_connect_rate', None): 54 | override_params['survival_prob'] = 1 - override_params['drop_connect_rate'] 55 | 56 | if not override_params: 57 | override_params = {} 58 | 59 | if model_name.startswith('efficientnet-lite'): 60 | builder = efficientnet_lite_builder 61 | elif model_name.startswith('efficientnet-b'): 62 | builder = efficientnet_builder 63 | else: 64 | raise ValueError('Unknown model name {}'.format(model_name)) 65 | 66 | blocks_args, global_params = builder.get_model_params(model_name, 67 | override_params) 68 | 69 | if model_dir: 70 | param_file = os.path.join(model_dir, 'model_params.txt') 71 | if not tf.io.gfile.exists(param_file): 72 | if not tf.io.gfile.exists(model_dir): 73 | tf.io.gfile.mkdir(model_dir) 74 | with tf.io.gfile.GFile(param_file, 'w') as f: 75 | logging.info('writing to %s', param_file) 76 | f.write('model_name= %s\n\n' % model_name) 77 | f.write('global_params= %s\n\n' % str(global_params)) 78 | f.write('blocks_args= %s\n\n' % str(blocks_args)) 79 | 80 | return efficientnet_model.Model(blocks_args, global_params, model_name) 81 | -------------------------------------------------------------------------------- /iou_utils_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ====================================== 15 | """Tests for iou_utils.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from absl import logging 22 | import tensorflow as tf 23 | import iou_utils 24 | 25 | 26 | class IouUtilsTest(tf.test.TestCase): 27 | """IoU test class.""" 28 | 29 | def setUp(self): 30 | super(IouUtilsTest, self).setUp() 31 | self.pb = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]], 32 | dtype=tf.float32) 33 | self.tb = tf.constant( 34 | [[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]], dtype=tf.float32) 35 | self.zeros = tf.constant([[0, 0, 0, 0], [0, 0, 0, 0]], dtype=tf.float32) 36 | 37 | def test_iou(self): 38 | self.assertAllClose( 39 | iou_utils.iou_loss(self.pb, self.tb, 'iou'), [0.875, 1.]) 40 | 41 | def test_ciou(self): 42 | self.assertAllClose( 43 | iou_utils.iou_loss(self.pb, self.tb, 'ciou'), [1.408893, 1.548753]) 44 | 45 | def test_diou(self): 46 | self.assertAllClose( 47 | iou_utils.iou_loss(self.pb, self.tb, 'diou'), [1.406532, 1.531532]) 48 | 49 | def test_giou(self): 50 | self.assertAllClose( 51 | iou_utils.iou_loss(self.pb, self.tb, 'giou'), [1.075000, 1.933333]) 52 | 53 | def test_iou_zero_target(self): 54 | self.assertAllClose( 55 | iou_utils.iou_loss(self.pb, self.zeros, 'iou'), [0.0, 0.0]) 56 | self.assertAllClose( 57 | iou_utils.iou_loss(self.pb, self.zeros, 'ciou'), [0.0, 0.0]) 58 | self.assertAllClose( 59 | iou_utils.iou_loss(self.pb, self.zeros, 'diou'), [0.0, 0.0]) 60 | self.assertAllClose( 61 | iou_utils.iou_loss(self.pb, self.zeros, 'giou'), [0.0, 0.0]) 62 | 63 | def test_iou_multiple_anchors(self): 64 | pb = tf.tile(self.pb, [1, 2]) 65 | tb = tf.tile(self.tb, [1, 2]) 66 | self.assertAllClose(iou_utils.iou_loss(pb, tb, 'iou'), [1.75, 2.0]) 67 | 68 | def test_iou_multiple_anchors_mixed(self): 69 | pb = tf.concat([self.pb, self.zeros], axis=-1) 70 | tb = tf.concat([self.tb, self.zeros], axis=-1) 71 | self.assertAllClose(iou_utils.iou_loss(pb, tb, 'iou'), [0.875, 1.0]) 72 | 73 | def test_ciou_grad(self): 74 | pb = tf.concat([self.pb, self.zeros], axis=-1) 75 | tb = tf.concat([self.tb, self.zeros], axis=-1) 76 | with tf.GradientTape() as tape: 77 | tape.watch([pb, tb]) 78 | loss = iou_utils.iou_loss(pb, tb, 'ciou') 79 | grad = tape.gradient(loss, [tb, pb]) 80 | self.assertAlmostEqual(tf.reduce_sum(grad[0]).numpy(), 0.16687772) 81 | self.assertAlmostEqual(tf.reduce_sum(grad[1]).numpy(), -0.16687769) 82 | 83 | 84 | if __name__ == '__main__': 85 | logging.set_verbosity(logging.WARNING) 86 | tf.test.main() 87 | -------------------------------------------------------------------------------- /keras/eval.py: -------------------------------------------------------------------------------- 1 | # Lint as: python3 2 | # Copyright 2020 Google Research. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """Eval libraries.""" 17 | from absl import app 18 | from absl import flags 19 | from absl import logging 20 | 21 | import coco_metric 22 | import dataloader 23 | import hparams_config 24 | 25 | from keras import anchors 26 | from keras import efficientdet_keras 27 | from keras import postprocess 28 | 29 | flags.DEFINE_string('val_file_pattern', None, 30 | 'Glob for eval tfrecords, e.g. coco/val-*.tfrecord.') 31 | flags.DEFINE_string('val_json_file', None, 32 | 'Groudtruth file, e.g. annotations/instances_val2017.json.') 33 | flags.DEFINE_string('model_name', 'efficientdet-d0', 'Model name to use.') 34 | flags.DEFINE_string('checkpoint', None, 'Location of the checkpoint to run.') 35 | flags.DEFINE_integer('batch_size', 8, 'Batch size.') 36 | flags.DEFINE_string('hparams', '', 'Comma separated k=v pairs or a yaml file') 37 | FLAGS = flags.FLAGS 38 | 39 | 40 | def main(_): 41 | config = hparams_config.get_efficientdet_config(FLAGS.model_name) 42 | config.override(FLAGS.hparams) 43 | config.batch_size = FLAGS.batch_size 44 | config.val_json_file = FLAGS.val_json_file 45 | 46 | # dataset 47 | ds = dataloader.InputReader( 48 | FLAGS.val_file_pattern, 49 | is_training=False, 50 | use_fake_data=False, 51 | max_instances_per_image=config.max_instances_per_image)( 52 | config) 53 | 54 | # Network 55 | model = efficientdet_keras.EfficientDetNet(config=config) 56 | model.build((config.batch_size, 512, 512, 3)) 57 | model.load_weights(FLAGS.checkpoint) 58 | 59 | evaluator = coco_metric.EvaluationMetric( 60 | filename=config.val_json_file) 61 | 62 | # compute stats for all batches. 63 | for images, labels in ds: 64 | cls_outputs, box_outputs = model(images, training=False) 65 | config.nms_configs.max_nms_inputs = anchors.MAX_DETECTION_POINTS 66 | detections = postprocess.generate_detections(config, cls_outputs, 67 | box_outputs, 68 | labels['image_scales'], 69 | labels['source_ids']) 70 | evaluator.update_state(labels['groundtruth_data'].numpy(), 71 | detections.numpy()) 72 | 73 | # compute the final eval results. 74 | metric_values = evaluator.result() 75 | metric_dict = {} 76 | for i, metric_value in enumerate(metric_values): 77 | metric_dict[evaluator.metric_names[i]] = metric_value 78 | print(metric_dict) 79 | 80 | 81 | if __name__ == '__main__': 82 | flags.mark_flag_as_required('val_file_pattern') 83 | flags.mark_flag_as_required('val_json_file') 84 | flags.mark_flag_as_required('checkpoint') 85 | logging.set_verbosity(logging.WARNING) 86 | app.run(main) 87 | -------------------------------------------------------------------------------- /keras/infer.py: -------------------------------------------------------------------------------- 1 | # Lint as: python3 2 | # Copyright 2020 Google Research. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """A simple example on how to use keras model for inference.""" 17 | import os 18 | from absl import app 19 | from absl import flags 20 | from absl import logging 21 | import numpy as np 22 | from PIL import Image 23 | import tensorflow as tf 24 | 25 | import hparams_config 26 | import inference 27 | import utils 28 | from keras import efficientdet_keras 29 | 30 | flags.DEFINE_string('image_path', None, 'Location of test image.') 31 | flags.DEFINE_string('output_dir', None, 'Directory of annotated output images.') 32 | flags.DEFINE_string('checkpoint', None, 'Location of the checkpoint to run.') 33 | flags.DEFINE_string('model_name', 'efficientdet-d0', 'Model name to use.') 34 | flags.DEFINE_string('hparams', '', 'Comma separated k=v pairs or a yaml file') 35 | FLAGS = flags.FLAGS 36 | 37 | 38 | def main(_): 39 | # pylint: disable=line-too-long 40 | # Prepare images and checkpoints: please run these commands in shell. 41 | # !mkdir tmp 42 | # !wget https://user-images.githubusercontent.com/11736571/77320690-099af300-6d37-11ea-9d86-24f14dc2d540.png -O tmp/img.png 43 | # !wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco/efficientdet-d0.tar.gz -O tmp/efficientdet-d0.tar.gz 44 | # !tar zxf tmp/efficientdet-d0.tar.gz -C tmp 45 | imgs = [np.array(Image.open(FLAGS.image_path))] 46 | nms_score_thresh, nms_max_output_size = 0.4, 100 47 | 48 | # Create model config. 49 | config = hparams_config.get_efficientdet_config('efficientdet-d0') 50 | config.is_training_bn = False 51 | config.image_size = '1920x1280' 52 | config.nms_configs.score_thresh = nms_score_thresh 53 | config.nms_configs.max_output_size = nms_max_output_size 54 | config.anchor_scale = [1.0, 1.0, 1.0, 1.0, 1.0] 55 | 56 | # Use 'mixed_float16' if running on GPUs. 57 | policy = tf.keras.mixed_precision.experimental.Policy('float32') 58 | tf.keras.mixed_precision.experimental.set_policy(policy) 59 | 60 | # Create and run the model. 61 | model = efficientdet_keras.EfficientDetModel(config=config) 62 | height, width = utils.parse_image_size(config['image_size']) 63 | model.build((1, height, width, 3)) 64 | model.load_weights(FLAGS.checkpoint) 65 | boxes, scores, classes, valid_len = model( 66 | imgs, training=False, post_mode='global') 67 | model.summary() 68 | 69 | # Visualize results. 70 | for i, img in enumerate(imgs): 71 | length = valid_len[i] 72 | img = inference.visualize_image( 73 | img, 74 | boxes[i].numpy()[:length], 75 | classes[i].numpy().astype(np.int)[:length], 76 | scores[i].numpy()[:length], 77 | min_score_thresh=nms_score_thresh, 78 | max_boxes_to_draw=nms_max_output_size) 79 | output_image_path = os.path.join(FLAGS.output_dir, str(i) + '.jpg') 80 | Image.fromarray(img).save(output_image_path) 81 | print('writing annotated image to %s', output_image_path) 82 | 83 | 84 | if __name__ == '__main__': 85 | flags.mark_flag_as_required('image_path') 86 | flags.mark_flag_as_required('output_dir') 87 | flags.mark_flag_as_required('checkpoint') 88 | logging.set_verbosity(logging.WARNING) 89 | app.run(main) 90 | -------------------------------------------------------------------------------- /dataset/tfrecord_util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | r"""TFRecord related utilities.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | from six.moves import range 21 | import tensorflow.compat.v1 as tf 22 | 23 | 24 | def int64_feature(value): 25 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 26 | 27 | 28 | def int64_list_feature(value): 29 | return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) 30 | 31 | 32 | def bytes_feature(value): 33 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 34 | 35 | 36 | def bytes_list_feature(value): 37 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) 38 | 39 | 40 | def float_list_feature(value): 41 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 42 | 43 | 44 | def read_examples_list(path): 45 | """Read list of training or validation examples. 46 | 47 | The file is assumed to contain a single example per line where the first 48 | token in the line is an identifier that allows us to find the image and 49 | annotation xml for that example. 50 | 51 | For example, the line: 52 | xyz 3 53 | would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored). 54 | 55 | Args: 56 | path: absolute path to examples list file. 57 | 58 | Returns: 59 | list of example identifiers (strings). 60 | """ 61 | with tf.gfile.GFile(path) as fid: 62 | lines = fid.readlines() 63 | return [line.strip().split(' ')[0] for line in lines] 64 | 65 | 66 | def recursive_parse_xml_to_dict(xml): 67 | """Recursively parses XML contents to python dict. 68 | 69 | We assume that `object` tags are the only ones that can appear 70 | multiple times at the same level of a tree. 71 | 72 | Args: 73 | xml: xml tree obtained by parsing XML file contents using lxml.etree 74 | 75 | Returns: 76 | Python dictionary holding XML contents. 77 | """ 78 | if not xml: 79 | return {xml.tag: xml.text} 80 | result = {} 81 | for child in xml: 82 | child_result = recursive_parse_xml_to_dict(child) 83 | if child.tag != 'object': 84 | result[child.tag] = child_result[child.tag] 85 | else: 86 | if child.tag not in result: 87 | result[child.tag] = [] 88 | result[child.tag].append(child_result[child.tag]) 89 | return {xml.tag: result} 90 | 91 | 92 | def open_sharded_output_tfrecords(exit_stack, base_path, num_shards): 93 | """Opens all TFRecord shards for writing and adds them to an exit stack. 94 | 95 | Args: 96 | exit_stack: A context2.ExitStack used to automatically closed the TFRecords 97 | opened in this function. 98 | base_path: The base path for all shards 99 | num_shards: The number of shards 100 | 101 | Returns: 102 | The list of opened TFRecords. Position k in the list corresponds to shard k. 103 | """ 104 | tf_record_output_filenames = [ 105 | '{}-{:05d}-of-{:05d}'.format(base_path, idx, num_shards) 106 | for idx in range(num_shards) 107 | ] 108 | 109 | tfrecords = [ 110 | exit_stack.enter_context(tf.python_io.TFRecordWriter(file_name)) 111 | for file_name in tf_record_output_filenames 112 | ] 113 | 114 | return tfrecords 115 | -------------------------------------------------------------------------------- /object_detection/faster_rcnn_box_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Faster RCNN box coder. 16 | 17 | Faster RCNN box coder follows the coding schema described below: 18 | ty = (y - ya) / ha 19 | tx = (x - xa) / wa 20 | th = log(h / ha) 21 | tw = log(w / wa) 22 | where x, y, w, h denote the box's center coordinates, width and height 23 | respectively. Similarly, xa, ya, wa, ha denote the anchor's center 24 | coordinates, width and height. tx, ty, tw and th denote the anchor-encoded 25 | center, width and height respectively. 26 | 27 | See http://arxiv.org/abs/1506.01497 for details. 28 | """ 29 | 30 | import tensorflow.compat.v1 as tf 31 | 32 | from object_detection import box_coder 33 | from object_detection import box_list 34 | 35 | EPSILON = 1e-8 36 | 37 | 38 | class FasterRcnnBoxCoder(box_coder.BoxCoder): 39 | """Faster RCNN box coder.""" 40 | 41 | def __init__(self, scale_factors=None): 42 | """Constructor for FasterRcnnBoxCoder. 43 | 44 | Args: 45 | scale_factors: List of 4 positive scalars to scale ty, tx, th and tw. 46 | If set to None, does not perform scaling. For Faster RCNN, 47 | the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0]. 48 | """ 49 | if scale_factors: 50 | assert len(scale_factors) == 4 51 | for scalar in scale_factors: 52 | assert scalar > 0 53 | self._scale_factors = scale_factors 54 | 55 | @property 56 | def code_size(self): 57 | return 4 58 | 59 | def _encode(self, boxes, anchors): 60 | """Encode a box collection with respect to anchor collection. 61 | 62 | Args: 63 | boxes: BoxList holding N boxes to be encoded. 64 | anchors: BoxList of anchors. 65 | 66 | Returns: 67 | a tensor representing N anchor-encoded boxes of the format 68 | [ty, tx, th, tw]. 69 | """ 70 | # Convert anchors to the center coordinate representation. 71 | ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() 72 | ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes() 73 | # Avoid NaN in division and log below. 74 | ha = tf.maximum(EPSILON, ha) 75 | wa = tf.maximum(EPSILON, wa) 76 | h = tf.maximum(EPSILON, h) 77 | w = tf.maximum(EPSILON, w) 78 | 79 | tx = (xcenter - xcenter_a) / wa 80 | ty = (ycenter - ycenter_a) / ha 81 | tw = tf.log(w / wa) 82 | th = tf.log(h / ha) 83 | # Scales location targets as used in paper for joint training. 84 | if self._scale_factors: 85 | ty *= self._scale_factors[0] 86 | tx *= self._scale_factors[1] 87 | th *= self._scale_factors[2] 88 | tw *= self._scale_factors[3] 89 | return tf.transpose(tf.stack([ty, tx, th, tw])) 90 | 91 | def _decode(self, rel_codes, anchors): 92 | """Decode relative codes to boxes. 93 | 94 | Args: 95 | rel_codes: a tensor representing N anchor-encoded boxes. 96 | anchors: BoxList of anchors. 97 | 98 | Returns: 99 | boxes: BoxList holding N bounding boxes. 100 | """ 101 | ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() 102 | 103 | ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes)) 104 | if self._scale_factors: 105 | ty /= self._scale_factors[0] 106 | tx /= self._scale_factors[1] 107 | th /= self._scale_factors[2] 108 | tw /= self._scale_factors[3] 109 | w = tf.exp(tw) * wa 110 | h = tf.exp(th) * ha 111 | ycenter = ty * ha + ycenter_a 112 | xcenter = tx * wa + xcenter_a 113 | ymin = ycenter - h / 2. 114 | xmin = xcenter - w / 2. 115 | ymax = ycenter + h / 2. 116 | xmax = xcenter + w / 2. 117 | return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax]))) 118 | -------------------------------------------------------------------------------- /dataset/create_pascal_tfrecord_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Test for create_pascal_tfrecord.py.""" 16 | 17 | import os 18 | 19 | from absl import logging 20 | import numpy as np 21 | import PIL.Image 22 | import six 23 | import tensorflow.compat.v1 as tf 24 | 25 | from dataset import create_pascal_tfrecord 26 | 27 | 28 | class CreatePascalTFRecordTest(tf.test.TestCase): 29 | 30 | def _assertProtoEqual(self, proto_field, expectation): 31 | """Helper function to assert if a proto field equals some value. 32 | 33 | Args: 34 | proto_field: The protobuf field to compare. 35 | expectation: The expected value of the protobuf field. 36 | """ 37 | proto_list = [p for p in proto_field] 38 | self.assertListEqual(proto_list, expectation) 39 | 40 | def test_dict_to_tf_example(self): 41 | image_file_name = '2012_12.jpg' 42 | image_data = np.random.rand(256, 256, 3) 43 | save_path = os.path.join(self.get_temp_dir(), image_file_name) 44 | image = PIL.Image.fromarray(image_data, 'RGB') 45 | image.save(save_path) 46 | 47 | data = { 48 | 'folder': '', 49 | 'filename': image_file_name, 50 | 'size': { 51 | 'height': 256, 52 | 'width': 256, 53 | }, 54 | 'object': [ 55 | { 56 | 'difficult': 1, 57 | 'bndbox': { 58 | 'xmin': 64, 59 | 'ymin': 64, 60 | 'xmax': 192, 61 | 'ymax': 192, 62 | }, 63 | 'name': 'person', 64 | 'truncated': 0, 65 | 'pose': '', 66 | }, 67 | ], 68 | } 69 | 70 | label_map_dict = { 71 | 'background': 0, 72 | 'person': 1, 73 | 'notperson': 2, 74 | } 75 | 76 | example = create_pascal_tfrecord.dict_to_tf_example( 77 | data, self.get_temp_dir(), label_map_dict, image_subdirectory='') 78 | self._assertProtoEqual( 79 | example.features.feature['image/height'].int64_list.value, [256]) 80 | self._assertProtoEqual( 81 | example.features.feature['image/width'].int64_list.value, [256]) 82 | self._assertProtoEqual( 83 | example.features.feature['image/filename'].bytes_list.value, 84 | [six.b(image_file_name)]) 85 | self._assertProtoEqual( 86 | example.features.feature['image/source_id'].bytes_list.value, 87 | [six.b(str(1))]) 88 | self._assertProtoEqual( 89 | example.features.feature['image/format'].bytes_list.value, 90 | [six.b('jpeg')]) 91 | self._assertProtoEqual( 92 | example.features.feature['image/object/bbox/xmin'].float_list.value, 93 | [0.25]) 94 | self._assertProtoEqual( 95 | example.features.feature['image/object/bbox/ymin'].float_list.value, 96 | [0.25]) 97 | self._assertProtoEqual( 98 | example.features.feature['image/object/bbox/xmax'].float_list.value, 99 | [0.75]) 100 | self._assertProtoEqual( 101 | example.features.feature['image/object/bbox/ymax'].float_list.value, 102 | [0.75]) 103 | self._assertProtoEqual( 104 | example.features.feature['image/object/class/text'].bytes_list.value, 105 | [six.b('person')]) 106 | self._assertProtoEqual( 107 | example.features.feature['image/object/class/label'].int64_list.value, 108 | [1]) 109 | self._assertProtoEqual( 110 | example.features.feature['image/object/difficult'].int64_list.value, 111 | [1]) 112 | self._assertProtoEqual( 113 | example.features.feature['image/object/truncated'].int64_list.value, 114 | [0]) 115 | self._assertProtoEqual( 116 | example.features.feature['image/object/view'].bytes_list.value, 117 | [six.b('')]) 118 | 119 | 120 | if __name__ == '__main__': 121 | logging.set_verbosity(logging.WARNING) 122 | tf.test.main() 123 | -------------------------------------------------------------------------------- /backbone/efficientnet_builder_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for efficientnet_builder.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from absl import logging 22 | import numpy as np 23 | import tensorflow.compat.v1 as tf 24 | 25 | from backbone import efficientnet_builder 26 | 27 | 28 | class EfficientnetBuilderTest(tf.test.TestCase): 29 | 30 | def _test_model_params(self, 31 | model_name, 32 | input_size, 33 | expected_params, 34 | override_params=None, 35 | features_only=False, 36 | pooled_features_only=False): 37 | images = tf.zeros((1, input_size, input_size, 3), dtype=tf.float32) 38 | efficientnet_builder.build_model( 39 | images, 40 | model_name=model_name, 41 | override_params=override_params, 42 | training=False, 43 | features_only=features_only, 44 | pooled_features_only=pooled_features_only) 45 | num_params = np.sum([np.prod(v.shape) for v in tf.trainable_variables()]) 46 | self.assertEqual(num_params, expected_params) 47 | 48 | def test_efficientnet_b0(self): 49 | self._test_model_params('efficientnet-b0', 224, expected_params=5288548) 50 | 51 | def test_efficientnet_b1(self): 52 | self._test_model_params('efficientnet-b1', 240, expected_params=7794184) 53 | 54 | def test_efficientnet_b2(self): 55 | self._test_model_params('efficientnet-b2', 260, expected_params=9109994) 56 | 57 | def test_efficientnet_b3(self): 58 | self._test_model_params('efficientnet-b3', 300, expected_params=12233232) 59 | 60 | def test_efficientnet_b4(self): 61 | self._test_model_params('efficientnet-b4', 380, expected_params=19341616) 62 | 63 | def test_efficientnet_b5(self): 64 | self._test_model_params('efficientnet-b5', 456, expected_params=30389784) 65 | 66 | def test_efficientnet_b6(self): 67 | self._test_model_params('efficientnet-b6', 528, expected_params=43040704) 68 | 69 | def test_efficientnet_b7(self): 70 | self._test_model_params('efficientnet-b7', 600, expected_params=66347960) 71 | 72 | def test_efficientnet_b0_with_customized_num_classes(self): 73 | self._test_model_params( 74 | 'efficientnet-b0', 75 | 224, 76 | expected_params=4135648, 77 | override_params={'num_classes': 100}) 78 | 79 | def test_efficientnet_b0_with_features_only(self): 80 | self._test_model_params( 81 | 'efficientnet-b0', 224, features_only=True, expected_params=3595388) 82 | 83 | def test_efficientnet_b0_with_pooled_features_only(self): 84 | self._test_model_params( 85 | 'efficientnet-b0', 86 | 224, 87 | pooled_features_only=True, 88 | expected_params=4007548) 89 | 90 | def test_efficientnet_b0_fails_if_both_features_requested(self): 91 | with self.assertRaises(AssertionError): 92 | efficientnet_builder.build_model( 93 | None, 94 | model_name='efficientnet-b0', 95 | training=False, 96 | features_only=True, 97 | pooled_features_only=True) 98 | 99 | def test_efficientnet_b0_base(self): 100 | # Creates a base model using the model configuration. 101 | images = tf.zeros((1, 224, 224, 3), dtype=tf.float32) 102 | _, endpoints = efficientnet_builder.build_model_base( 103 | images, model_name='efficientnet-b0', training=False) 104 | 105 | # reduction_1 to reduction_5 should be in endpoints 106 | self.assertIn('reduction_1', endpoints) 107 | self.assertIn('reduction_5', endpoints) 108 | # reduction_5 should be the last one: no reduction_6. 109 | self.assertNotIn('reduction_6', endpoints) 110 | 111 | 112 | if __name__ == '__main__': 113 | logging.set_verbosity(logging.WARNING) 114 | # Disable eager to allow tf.profile works for #params/#flops. 115 | tf.disable_eager_execution() 116 | tf.test.main() 117 | -------------------------------------------------------------------------------- /keras/postprocess_test.py: -------------------------------------------------------------------------------- 1 | # Lint as: python3 2 | # Copyright 2020 Google Research. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================= 16 | """Test for postprocess.""" 17 | from absl import logging 18 | import tensorflow as tf 19 | 20 | from keras import postprocess 21 | 22 | 23 | class PostprocessTest(tf.test.TestCase): 24 | """A test for postprocess.""" 25 | 26 | def setUp(self): 27 | super().setUp() 28 | self.params = { 29 | 'min_level': 1, 30 | 'max_level': 2, 31 | 'aspect_ratios': [(1.0, 1.0)], 32 | 'num_scales': 1, 33 | 'anchor_scale': 1, 34 | 'image_size': 8, 35 | 'num_classes': 2, 36 | 'data_format': 'channels_last', 37 | 'max_detection_points': 10, 38 | 'nms_configs': { 39 | 'method': 'hard', 40 | 'iou_thresh': None, 41 | 'score_thresh': None, 42 | 'sigma': None, 43 | 'max_nms_inputs': 0, 44 | 'max_output_size': 2, 45 | } 46 | } 47 | 48 | def test_postprocess_global(self): 49 | """Test the postprocess with global nms.""" 50 | tf.random.set_seed(1111) 51 | cls_outputs = { 52 | 1: tf.random.normal([2, 4, 4, 2]), 53 | 2: tf.random.normal([2, 2, 2, 2]) 54 | } 55 | box_outputs = { 56 | 1: tf.random.normal([2, 4, 4, 4]), 57 | 2: tf.random.normal([2, 2, 2, 4]) 58 | } 59 | cls_outputs_list = [cls_outputs[1], cls_outputs[2]] 60 | box_outputs_list = [box_outputs[1], box_outputs[2]] 61 | scales = [1.0, 2.0] 62 | 63 | self.params['max_detection_points'] = 10 64 | _, scores, classes, valid_len = postprocess.postprocess_global( 65 | self.params, cls_outputs_list, box_outputs_list, scales) 66 | self.assertAllClose(valid_len, [2, 2]) 67 | self.assertAllClose(classes.numpy(), [[2., 1.], [1., 2.]]) 68 | self.assertAllClose(scores.numpy(), 69 | [[0.90157586, 0.88812476], [0.88454413, 0.8158828]]) 70 | 71 | def test_postprocess_per_class(self): 72 | """Test postprocess with per class nms.""" 73 | tf.random.set_seed(1111) 74 | cls_outputs = { 75 | 1: tf.random.normal([2, 4, 4, 2]), 76 | 2: tf.random.normal([2, 2, 2, 2]) 77 | } 78 | box_outputs = { 79 | 1: tf.random.normal([2, 4, 4, 4]), 80 | 2: tf.random.normal([2, 2, 2, 4]) 81 | } 82 | cls_outputs_list = [cls_outputs[1], cls_outputs[2]] 83 | box_outputs_list = [box_outputs[1], box_outputs[2]] 84 | scales = [1.0, 2.0] 85 | ids = [0, 1] 86 | 87 | self.params['max_detection_points'] = 10 88 | outputs = postprocess.generate_detections(self.params, cls_outputs_list, 89 | box_outputs_list, scales, ids) 90 | self.assertAllClose( 91 | outputs.numpy(), 92 | [[[0., -1.177383, 1.793507, 9.518328, 2.624881, 0.901576, 2.], 93 | [0., 5.676410, 6.102146, 2.109282, 2.435021, 0.888125, 1.]], 94 | [[1., 5.885427, 13.529362, 5.524654, 0.624685, 0.884544, 1.], 95 | [1., 8.145872, -9.660868, 6.028101, 20.073238, 0.815883, 2.]]]) 96 | 97 | def test_postprocess_combined(self): 98 | """Test postprocess with per class nms.""" 99 | tf.random.set_seed(1111) 100 | cls_outputs = { 101 | 1: tf.random.normal([2, 4, 4, 2]), 102 | 2: tf.random.normal([2, 2, 2, 2]) 103 | } 104 | box_outputs = { 105 | 1: tf.random.normal([2, 4, 4, 4]), 106 | 2: tf.random.normal([2, 2, 2, 4]) 107 | } 108 | cls_outputs_list = [cls_outputs[1], cls_outputs[2]] 109 | box_outputs_list = [box_outputs[1], box_outputs[2]] 110 | scales = [1.0, 2.0] 111 | 112 | self.params['max_detection_points'] = 10 113 | _, scores, classes, valid_len = postprocess.postprocess_combined( 114 | self.params, cls_outputs_list, box_outputs_list, scales) 115 | self.assertAllClose(valid_len, [2, 2]) 116 | self.assertAllClose(classes.numpy(), [[2., 1.], [1., 2.]]) 117 | self.assertAllClose(scores.numpy(), 118 | [[0.90157586, 0.88812476], [0.88454413, 0.8158828]]) 119 | 120 | if __name__ == '__main__': 121 | logging.set_verbosity(logging.WARNING) 122 | tf.test.main() 123 | -------------------------------------------------------------------------------- /object_detection/region_similarity_calculator.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Region Similarity Calculators for BoxLists. 16 | 17 | Region Similarity Calculators compare a pairwise measure of similarity 18 | between the boxes in two BoxLists. 19 | """ 20 | from abc import ABCMeta 21 | from abc import abstractmethod 22 | 23 | import tensorflow.compat.v1 as tf 24 | 25 | 26 | def area(boxlist, scope=None): 27 | """Computes area of boxes. 28 | 29 | Args: 30 | boxlist: BoxList holding N boxes 31 | scope: name scope. 32 | 33 | Returns: 34 | a tensor with shape [N] representing box areas. 35 | """ 36 | with tf.name_scope(scope, 'Area'): 37 | y_min, x_min, y_max, x_max = tf.split( 38 | value=boxlist.get(), num_or_size_splits=4, axis=1) 39 | return tf.squeeze((y_max - y_min) * (x_max - x_min), [1]) 40 | 41 | 42 | def intersection(boxlist1, boxlist2, scope=None): 43 | """Compute pairwise intersection areas between boxes. 44 | 45 | Args: 46 | boxlist1: BoxList holding N boxes 47 | boxlist2: BoxList holding M boxes 48 | scope: name scope. 49 | 50 | Returns: 51 | a tensor with shape [N, M] representing pairwise intersections 52 | """ 53 | with tf.name_scope(scope, 'Intersection'): 54 | y_min1, x_min1, y_max1, x_max1 = tf.split( 55 | value=boxlist1.get(), num_or_size_splits=4, axis=1) 56 | y_min2, x_min2, y_max2, x_max2 = tf.split( 57 | value=boxlist2.get(), num_or_size_splits=4, axis=1) 58 | all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2)) 59 | all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2)) 60 | intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin) 61 | all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2)) 62 | all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2)) 63 | intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin) 64 | return intersect_heights * intersect_widths 65 | 66 | 67 | def iou(boxlist1, boxlist2, scope=None): 68 | """Computes pairwise intersection-over-union between box collections. 69 | 70 | Args: 71 | boxlist1: BoxList holding N boxes 72 | boxlist2: BoxList holding M boxes 73 | scope: name scope. 74 | 75 | Returns: 76 | a tensor with shape [N, M] representing pairwise iou scores. 77 | """ 78 | with tf.name_scope(scope, 'IOU'): 79 | intersections = intersection(boxlist1, boxlist2) 80 | areas1 = area(boxlist1) 81 | areas2 = area(boxlist2) 82 | unions = ( 83 | tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections) 84 | return tf.where( 85 | tf.equal(intersections, 0.0), 86 | tf.zeros_like(intersections), tf.truediv(intersections, unions)) 87 | 88 | 89 | class RegionSimilarityCalculator(object): 90 | """Abstract base class for region similarity calculator.""" 91 | __metaclass__ = ABCMeta 92 | 93 | def compare(self, boxlist1, boxlist2, scope=None): 94 | """Computes matrix of pairwise similarity between BoxLists. 95 | 96 | This op (to be overridden) computes a measure of pairwise similarity between 97 | the boxes in the given BoxLists. Higher values indicate more similarity. 98 | 99 | Note that this method simply measures similarity and does not explicitly 100 | perform a matching. 101 | 102 | Args: 103 | boxlist1: BoxList holding N boxes. 104 | boxlist2: BoxList holding M boxes. 105 | scope: Op scope name. Defaults to 'Compare' if None. 106 | 107 | Returns: 108 | a (float32) tensor of shape [N, M] with pairwise similarity score. 109 | """ 110 | with tf.name_scope(scope, 'Compare', [boxlist1, boxlist2]) as scope: 111 | return self._compare(boxlist1, boxlist2) 112 | 113 | @abstractmethod 114 | def _compare(self, boxlist1, boxlist2): 115 | pass 116 | 117 | 118 | class IouSimilarity(RegionSimilarityCalculator): 119 | """Class to compute similarity based on Intersection over Union (IOU) metric. 120 | 121 | This class computes pairwise similarity between two BoxLists based on IOU. 122 | """ 123 | 124 | def _compare(self, boxlist1, boxlist2): 125 | """Compute pairwise IOU similarity between the two BoxLists. 126 | 127 | Args: 128 | boxlist1: BoxList holding N boxes. 129 | boxlist2: BoxList holding M boxes. 130 | 131 | Returns: 132 | A tensor with shape [N, M] representing pairwise iou scores. 133 | """ 134 | return iou(boxlist1, boxlist2) 135 | -------------------------------------------------------------------------------- /dataset/label_map_util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Label map utility functions.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from absl import logging 22 | from six.moves import range 23 | 24 | 25 | def _validate_label_map(label_map): 26 | """Checks if a label map is valid. 27 | 28 | Args: 29 | label_map: StringIntLabelMap to validate. 30 | 31 | Raises: 32 | ValueError: if label map is invalid. 33 | """ 34 | for item in label_map.item: 35 | if item.id < 0: 36 | raise ValueError('Label map ids should be >= 0.') 37 | if (item.id == 0 and item.name != 'background' and 38 | item.display_name != 'background'): 39 | raise ValueError('Label map id 0 is reserved for the background label') 40 | 41 | 42 | def create_category_index(categories): 43 | """Creates dictionary of COCO compatible categories keyed by category id. 44 | 45 | Args: 46 | categories: a list of dicts, each of which has the following keys: 47 | 'id': (required) an integer id uniquely identifying this category. 48 | 'name': (required) string representing category name 49 | e.g., 'cat', 'dog', 'pizza'. 50 | 51 | Returns: 52 | category_index: a dict containing the same entries as categories, but keyed 53 | by the 'id' field of each category. 54 | """ 55 | category_index = {} 56 | for cat in categories: 57 | category_index[cat['id']] = cat 58 | return category_index 59 | 60 | 61 | def get_max_label_map_index(label_map): 62 | """Get maximum index in label map. 63 | 64 | Args: 65 | label_map: a StringIntLabelMapProto 66 | 67 | Returns: 68 | an integer 69 | """ 70 | return max([item.id for item in label_map.item]) 71 | 72 | 73 | def convert_label_map_to_categories(label_map, 74 | max_num_classes, 75 | use_display_name=True): 76 | """Given label map proto returns categories list compatible with eval. 77 | 78 | This function converts label map proto and returns a list of dicts, each of 79 | which has the following keys: 80 | 'id': (required) an integer id uniquely identifying this category. 81 | 'name': (required) string representing category name 82 | e.g., 'cat', 'dog', 'pizza'. 83 | 'keypoints': (optional) a dictionary of keypoint string 'label' to integer 84 | 'id'. 85 | We only allow class into the list if its id-label_id_offset is 86 | between 0 (inclusive) and max_num_classes (exclusive). 87 | If there are several items mapping to the same id in the label map, 88 | we will only keep the first one in the categories list. 89 | 90 | Args: 91 | label_map: a StringIntLabelMapProto or None. If None, a default categories 92 | list is created with max_num_classes categories. 93 | max_num_classes: maximum number of (consecutive) label indices to include. 94 | use_display_name: (boolean) choose whether to load 'display_name' field as 95 | category name. If False or if the display_name field does not exist, uses 96 | 'name' field as category names instead. 97 | 98 | Returns: 99 | categories: a list of dictionaries representing all possible categories. 100 | """ 101 | categories = [] 102 | list_of_ids_already_added = [] 103 | if not label_map: 104 | label_id_offset = 1 105 | for class_id in range(max_num_classes): 106 | categories.append({ 107 | 'id': class_id + label_id_offset, 108 | 'name': 'category_{}'.format(class_id + label_id_offset) 109 | }) 110 | return categories 111 | for item in label_map.item: 112 | if not 0 < item.id <= max_num_classes: 113 | logging.info( 114 | 'Ignore item %d since it falls outside of requested ' 115 | 'label range.', item.id) 116 | continue 117 | if use_display_name and item.HasField('display_name'): 118 | name = item.display_name 119 | else: 120 | name = item.name 121 | if item.id not in list_of_ids_already_added: 122 | list_of_ids_already_added.append(item.id) 123 | category = {'id': item.id, 'name': name} 124 | if item.keypoints: 125 | keypoints = {} 126 | list_of_keypoint_ids = [] 127 | for kv in item.keypoints: 128 | if kv.id in list_of_keypoint_ids: 129 | raise ValueError('Duplicate keypoint ids are not allowed. ' 130 | 'Found {} more than once'.format(kv.id)) 131 | keypoints[kv.label] = kv.id 132 | list_of_keypoint_ids.append(kv.id) 133 | category['keypoints'] = keypoints 134 | categories.append(category) 135 | return categories 136 | 137 | 138 | def create_class_agnostic_category_index(): 139 | """Creates a category index with a single `object` class.""" 140 | return {1: {'id': 1, 'name': 'object'}} 141 | -------------------------------------------------------------------------------- /object_detection/box_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Base box coder. 16 | 17 | Box coders convert between coordinate frames, namely image-centric 18 | (with (0,0) on the top left of image) and anchor-centric (with (0,0) being 19 | defined by a specific anchor). 20 | 21 | Users of a BoxCoder can call two methods: 22 | encode: which encodes a box with respect to a given anchor 23 | (or rather, a tensor of boxes wrt a corresponding tensor of anchors) and 24 | decode: which inverts this encoding with a decode operation. 25 | In both cases, the arguments are assumed to be in 1-1 correspondence already; 26 | it is not the job of a BoxCoder to perform matching. 27 | """ 28 | from abc import ABCMeta 29 | from abc import abstractmethod 30 | from abc import abstractproperty 31 | 32 | import tensorflow.compat.v1 as tf 33 | 34 | 35 | # Box coder types. 36 | FASTER_RCNN = 'faster_rcnn' 37 | KEYPOINT = 'keypoint' 38 | MEAN_STDDEV = 'mean_stddev' 39 | SQUARE = 'square' 40 | 41 | 42 | class BoxCoder(object): 43 | """Abstract base class for box coder.""" 44 | __metaclass__ = ABCMeta 45 | 46 | @abstractproperty 47 | def code_size(self): 48 | """Return the size of each code. 49 | 50 | This number is a constant and should agree with the output of the `encode` 51 | op (e.g. if rel_codes is the output of self.encode(...), then it should have 52 | shape [N, code_size()]). This abstractproperty should be overridden by 53 | implementations. 54 | 55 | Returns: 56 | an integer constant 57 | """ 58 | pass 59 | 60 | def encode(self, boxes, anchors): 61 | """Encode a box list relative to an anchor collection. 62 | 63 | Args: 64 | boxes: BoxList holding N boxes to be encoded 65 | anchors: BoxList of N anchors 66 | 67 | Returns: 68 | a tensor representing N relative-encoded boxes 69 | """ 70 | with tf.name_scope('Encode'): 71 | return self._encode(boxes, anchors) 72 | 73 | def decode(self, rel_codes, anchors): 74 | """Decode boxes that are encoded relative to an anchor collection. 75 | 76 | Args: 77 | rel_codes: a tensor representing N relative-encoded boxes 78 | anchors: BoxList of anchors 79 | 80 | Returns: 81 | boxlist: BoxList holding N boxes encoded in the ordinary way (i.e., 82 | with corners y_min, x_min, y_max, x_max) 83 | """ 84 | with tf.name_scope('Decode'): 85 | return self._decode(rel_codes, anchors) 86 | 87 | @abstractmethod 88 | def _encode(self, boxes, anchors): 89 | """Method to be overridden by implementations. 90 | 91 | Args: 92 | boxes: BoxList holding N boxes to be encoded 93 | anchors: BoxList of N anchors 94 | 95 | Returns: 96 | a tensor representing N relative-encoded boxes 97 | """ 98 | pass 99 | 100 | @abstractmethod 101 | def _decode(self, rel_codes, anchors): 102 | """Method to be overridden by implementations. 103 | 104 | Args: 105 | rel_codes: a tensor representing N relative-encoded boxes 106 | anchors: BoxList of anchors 107 | 108 | Returns: 109 | boxlist: BoxList holding N boxes encoded in the ordinary way (i.e., 110 | with corners y_min, x_min, y_max, x_max) 111 | """ 112 | pass 113 | 114 | 115 | def batch_decode(encoded_boxes, box_coder, anchors): 116 | """Decode a batch of encoded boxes. 117 | 118 | This op takes a batch of encoded bounding boxes and transforms 119 | them to a batch of bounding boxes specified by their corners in 120 | the order of [y_min, x_min, y_max, x_max]. 121 | 122 | Args: 123 | encoded_boxes: a float32 tensor of shape [batch_size, num_anchors, 124 | code_size] representing the location of the objects. 125 | box_coder: a BoxCoder object. 126 | anchors: a BoxList of anchors used to encode `encoded_boxes`. 127 | 128 | Returns: 129 | decoded_boxes: a float32 tensor of shape [batch_size, num_anchors, 130 | coder_size] representing the corners of the objects in the order 131 | of [y_min, x_min, y_max, x_max]. 132 | 133 | Raises: 134 | ValueError: if batch sizes of the inputs are inconsistent, or if 135 | the number of anchors inferred from encoded_boxes and anchors are 136 | inconsistent. 137 | """ 138 | encoded_boxes.get_shape().assert_has_rank(3) 139 | if encoded_boxes.get_shape()[1].value != anchors.num_boxes_static(): 140 | raise ValueError('The number of anchors inferred from encoded_boxes' 141 | ' and anchors are inconsistent: shape[1] of encoded_boxes' 142 | ' %s should be equal to the number of anchors: %s.' % 143 | (encoded_boxes.get_shape()[1].value, 144 | anchors.num_boxes_static())) 145 | 146 | decoded_boxes = tf.stack([ 147 | box_coder.decode(boxes, anchors).get() 148 | for boxes in tf.unstack(encoded_boxes) 149 | ]) 150 | return decoded_boxes 151 | -------------------------------------------------------------------------------- /utils_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for utils.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import os 22 | 23 | from absl import logging 24 | import tensorflow.compat.v1 as tf 25 | 26 | import utils 27 | 28 | 29 | class UtilsTest(tf.test.TestCase): 30 | 31 | def setUp(self): 32 | super(UtilsTest, self).setUp() 33 | self.model_dir = os.path.join(tf.test.get_temp_dir(), 'model_dir') 34 | 35 | def build_model(self): 36 | x = tf.Variable(1.0) 37 | y = tf.Variable(2.0) 38 | z = x + y 39 | return z 40 | 41 | def test_archive_ckpt(self): 42 | model_dir = os.path.join(tf.test.get_temp_dir(), 'model_dir') 43 | ckpt_path = os.path.join(model_dir, 'ckpt') 44 | self.build_model() 45 | saver = tf.train.Saver() 46 | with self.session() as sess: 47 | sess.run(tf.global_variables_initializer()) 48 | saver.save(sess, ckpt_path) 49 | 50 | # Save checkpoint if the new objective is better. 51 | self.assertTrue(utils.archive_ckpt('eval1', 0.1, ckpt_path)) 52 | logging.info(os.listdir(model_dir)) 53 | self.assertTrue(tf.io.gfile.exists(os.path.join(model_dir, 'archive'))) 54 | self.assertFalse(tf.io.gfile.exists(os.path.join(model_dir, 'backup'))) 55 | 56 | # Save checkpoint if the new objective is better. 57 | self.assertTrue(utils.archive_ckpt('eval2', 0.2, ckpt_path)) 58 | self.assertTrue(tf.io.gfile.exists(os.path.join(model_dir, 'archive'))) 59 | self.assertTrue(tf.io.gfile.exists(os.path.join(model_dir, 'backup'))) 60 | 61 | # Skip checkpoint if the new objective is worse. 62 | self.assertFalse(utils.archive_ckpt('eval3', 0.1, ckpt_path)) 63 | 64 | # Save checkpoint if the new objective is better. 65 | self.assertTrue(utils.archive_ckpt('eval4', 0.3, ckpt_path)) 66 | 67 | # Save checkpoint if the new objective is equal. 68 | self.assertTrue(utils.archive_ckpt('eval5', 0.3, ckpt_path)) 69 | self.assertTrue(tf.io.gfile.exists(os.path.join(model_dir, 'archive'))) 70 | self.assertTrue(tf.io.gfile.exists(os.path.join(model_dir, 'backup'))) 71 | 72 | def test_image_size(self): 73 | self.assertEqual(utils.parse_image_size('1280x640'), (640, 1280)) 74 | self.assertEqual(utils.parse_image_size(1280), (1280, 1280)) 75 | self.assertEqual(utils.parse_image_size((1280, 640)), (1280, 640)) 76 | 77 | def test_get_feat_sizes(self): 78 | feats = utils.get_feat_sizes(640, 2) 79 | self.assertEqual(feats, [{ 80 | 'height': 640, 81 | 'width': 640 82 | }, { 83 | 'height': 320, 84 | 'width': 320 85 | }, { 86 | 'height': 160, 87 | 'width': 160 88 | }]) 89 | 90 | feats = utils.get_feat_sizes((640, 300), 2) 91 | self.assertEqual(feats, [{ 92 | 'height': 640, 93 | 'width': 300, 94 | }, { 95 | 'height': 320, 96 | 'width': 150, 97 | }, { 98 | 'height': 160, 99 | 'width': 75, 100 | }]) 101 | 102 | def test_precision_float16(self): 103 | def _model(inputs): 104 | x = tf.ones((4, 4, 4, 4), dtype='float32') 105 | conv = tf.keras.layers.Conv2D(filters=4, kernel_size=2, use_bias=False) 106 | a = tf.Variable(1.0) 107 | return tf.cast(a, inputs.dtype) * conv(x) * inputs 108 | 109 | x = tf.constant(2.0, dtype=tf.float32) # input can be any type. 110 | out = utils.build_model_with_precision('mixed_float16', _model, x, False) 111 | # Variables should be float32. 112 | for v in tf.global_variables(): 113 | self.assertIn(v.dtype, (tf.float32, tf.dtypes.as_dtype('float32_ref'))) 114 | self.assertIs(out.dtype, tf.float16) # output should be float16. 115 | 116 | 117 | class ActivationTest(tf.test.TestCase): 118 | 119 | def test_swish(self): 120 | features = tf.constant([.5, 10]) 121 | 122 | result = utils.activation_fn(features, 'swish') 123 | expected = features * tf.sigmoid(features) 124 | self.assertAllClose(result, expected) 125 | 126 | result = utils.activation_fn(features, 'swish_native') 127 | self.assertAllClose(result, expected) 128 | 129 | def test_hswish(self): 130 | features = tf.constant([.5, 10]) 131 | result = utils.activation_fn(features, 'hswish') 132 | self.assertAllClose(result, [0.29166667, 10.0]) 133 | 134 | def test_relu(self): 135 | features = tf.constant([.5, 10]) 136 | result = utils.activation_fn(features, 'relu') 137 | self.assertAllClose(result, [0.5, 10]) 138 | 139 | def test_relu6(self): 140 | features = tf.constant([.5, 10]) 141 | result = utils.activation_fn(features, 'relu6') 142 | self.assertAllClose(result, [0.5, 6]) 143 | 144 | def test_mish(self): 145 | features = tf.constant([.5, 10]) 146 | result = utils.activation_fn(features, 'mish') 147 | self.assertAllClose(result, [0.37524524, 10.0]) 148 | 149 | 150 | if __name__ == '__main__': 151 | logging.set_verbosity(logging.WARNING) 152 | tf.disable_eager_execution() 153 | tf.test.main() 154 | -------------------------------------------------------------------------------- /keras/train_lib_test.py: -------------------------------------------------------------------------------- 1 | # Lint as: python3 2 | # Copyright 2020 Google Research. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | import tempfile 17 | from absl import logging 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | import det_model_fn as legacy_fn 22 | import hparams_config 23 | from keras import train_lib 24 | 25 | 26 | class TrainLibTest(tf.test.TestCase): 27 | 28 | def test_display_callback(self): 29 | config = hparams_config.get_detection_config('efficientdet-d0') 30 | config.batch_size = 1 31 | config.num_examples_per_epoch = 1 32 | config.model_dir = tempfile.mkdtemp() 33 | sample_image = tf.ones([416, 416, 3]) 34 | display_callback = train_lib.DisplayCallback(sample_image) 35 | model = train_lib.EfficientDetNetTrain(config=config) 36 | model.build((1, 512, 512, 3)) 37 | display_callback.set_model(model) 38 | display_callback.on_epoch_end(0, {}) 39 | 40 | def test_lr_schedule(self): 41 | stepwise = train_lib.StepwiseLrSchedule(1e-3, 1e-4, 1, 3, 5) 42 | cosine = train_lib.CosineLrSchedule(1e-3, 1e-4, 1, 5) 43 | polynomial = train_lib.PolynomialLrSchedule(1e-3, 1e-4, 1, 2, 5) 44 | for i in range(5): 45 | self.assertEqual( 46 | legacy_fn.stepwise_lr_schedule(1e-3, 1e-4, 1, 3, 5, i), stepwise(i)) 47 | self.assertEqual( 48 | legacy_fn.cosine_lr_schedule(1e-3, 1e-4, 1, 5, i), cosine(i)) 49 | self.assertEqual( 50 | legacy_fn.polynomial_lr_schedule(1e-3, 1e-4, 1, 2, 5, i), 51 | polynomial(i)) 52 | 53 | def test_losses(self): 54 | tf.random.set_seed(1111) 55 | box_loss = train_lib.BoxLoss() 56 | box_iou_loss = train_lib.BoxIouLoss( 57 | iou_loss_type='ciou', 58 | min_level=3, 59 | max_level=3, 60 | num_scales=1, 61 | aspect_ratios=[(1.0, 1.0)], 62 | anchor_scale=1.0, 63 | image_size=32) 64 | alpha = 0.25 65 | gamma = 1.5 66 | focal_loss_v2 = train_lib.FocalLoss( 67 | alpha, gamma, reduction=tf.keras.losses.Reduction.NONE) 68 | box_outputs = tf.random.normal([64, 4]) 69 | box_targets = tf.random.normal([64, 4]) 70 | num_positives = 4.0 71 | self.assertEqual( 72 | legacy_fn._box_loss(box_outputs, box_targets, num_positives), 73 | box_loss([num_positives, box_targets], box_outputs)) 74 | self.assertAllEqual( 75 | legacy_fn.focal_loss(box_outputs, box_targets, alpha, gamma, 76 | num_positives), 77 | focal_loss_v2([num_positives, box_targets], box_outputs)) 78 | # TODO(tanmingxing): Re-enable this test after fixing this failing test. 79 | # self.assertEqual( 80 | # legacy_fn._box_iou_loss(box_outputs, box_targets, num_positives, 81 | # 'ciou'), 82 | # box_iou_loss([num_positives, box_targets], box_outputs)) 83 | iou_loss = box_iou_loss([num_positives, box_targets], box_outputs) 84 | self.assertAlmostEqual(iou_loss.numpy(), 4.507848) 85 | 86 | def test_predict(self): 87 | x = np.random.random((1, 512, 512, 3)).astype(np.float32) 88 | model = train_lib.EfficientDetNetTrain('efficientdet-d0') 89 | cls_outputs, box_outputs = model(x) 90 | self.assertLen(cls_outputs, 5) 91 | self.assertLen(box_outputs, 5) 92 | 93 | def test_train(self): 94 | tf.random.set_seed(1111) 95 | config = hparams_config.get_detection_config('efficientdet-d0') 96 | config.batch_size = 1 97 | config.num_examples_per_epoch = 1 98 | config.model_dir = tempfile.mkdtemp() 99 | x = tf.ones((1, 512, 512, 3)) 100 | labels = { 101 | 'box_targets_%d' % i: tf.ones((1, 512 // 2**i, 512 // 2**i, 36)) 102 | for i in range(3, 8) 103 | } 104 | labels.update({ 105 | 'cls_targets_%d' % i: tf.ones((1, 512 // 2**i, 512 // 2**i, 9), 106 | dtype=tf.int32) for i in range(3, 8) 107 | }) 108 | labels.update({'mean_num_positives': tf.constant([10.0])}) 109 | 110 | params = config.as_dict() 111 | params['num_shards'] = 1 112 | model = train_lib.EfficientDetNetTrain(config=config) 113 | model.build((1, 512, 512, 3)) 114 | model.compile( 115 | optimizer=train_lib.get_optimizer(params), 116 | loss={ 117 | 'box_loss': 118 | train_lib.BoxLoss( 119 | params['delta'], reduction=tf.keras.losses.Reduction.NONE), 120 | 'box_iou_loss': 121 | train_lib.BoxIouLoss( 122 | params['iou_loss_type'], 123 | params['min_level'], 124 | params['max_level'], 125 | params['num_scales'], 126 | params['aspect_ratios'], 127 | params['anchor_scale'], 128 | params['image_size'], 129 | reduction=tf.keras.losses.Reduction.NONE), 130 | 'class_loss': 131 | train_lib.FocalLoss( 132 | params['alpha'], 133 | params['gamma'], 134 | label_smoothing=params['label_smoothing'], 135 | reduction=tf.keras.losses.Reduction.NONE) 136 | }) 137 | 138 | # Test single-batch 139 | outputs = model.train_on_batch(x, labels, return_dict=True) 140 | expect_results = {'loss': 26278.25, 141 | 'det_loss': 26277.033203125, 142 | 'cls_loss': 5060.716796875, 143 | 'box_loss': 424.3263244628906, 144 | 'box_iou_loss': 0, 145 | 'gnorm': 5873.78759765625} 146 | self.assertAllClose(outputs, expect_results, rtol=.1, atol=100.) 147 | outputs = model.test_on_batch(x, labels, return_dict=True) 148 | expect_results = {'loss': 26079.712890625, 149 | 'det_loss': 26078.49609375, 150 | 'cls_loss': 5063.3759765625, 151 | 'box_loss': 420.30242919921875, 152 | 'box_iou_loss': 0} 153 | self.assertAllClose(outputs, expect_results, rtol=.1, atol=100.) 154 | 155 | # Test fit. 156 | hist = model.fit( 157 | x, 158 | labels, 159 | steps_per_epoch=1, 160 | epochs=1, 161 | callbacks=train_lib.get_callbacks(params)) 162 | expect_results = {'loss': [26063.099609375], 163 | 'det_loss': [26061.8828125], 164 | 'cls_loss': [5058.1337890625], 165 | 'box_loss': [420.074951171875], 166 | 'box_iou_loss': [0], 167 | 'gnorm': [5216.858887]} 168 | self.assertAllClose( 169 | hist.history, expect_results, rtol=.1, atol=100.) 170 | 171 | 172 | if __name__ == '__main__': 173 | logging.set_verbosity(logging.WARNING) 174 | tf.test.main() 175 | -------------------------------------------------------------------------------- /keras/efficientdet_keras_test.py: -------------------------------------------------------------------------------- 1 | # Lint as: python3 2 | # Copyright 2020 Google Research. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """Tests for efficientdet_keras.""" 17 | import os 18 | import tempfile 19 | from absl import logging 20 | import tensorflow.compat.v1 as tf 21 | 22 | import efficientdet_arch as legacy_arch 23 | import hparams_config 24 | from keras import efficientdet_keras 25 | 26 | SEED = 111111 27 | 28 | 29 | class EfficientDetKerasTest(tf.test.TestCase): 30 | 31 | def test_model_output(self): 32 | inputs_shape = [1, 512, 512, 3] 33 | config = hparams_config.get_efficientdet_config('efficientdet-d0') 34 | tmp_ckpt = os.path.join(tempfile.mkdtemp(), 'ckpt') 35 | with tf.Session(graph=tf.Graph()) as sess: 36 | feats = tf.ones(inputs_shape) 37 | tf.random.set_random_seed(SEED) 38 | model = efficientdet_keras.EfficientDetNet(config=config) 39 | outputs = model(feats, True) 40 | sess.run(tf.global_variables_initializer()) 41 | keras_class_out, keras_box_out = sess.run(outputs) 42 | model.save_weights(tmp_ckpt) 43 | with tf.Session(graph=tf.Graph()) as sess: 44 | feats = tf.ones(inputs_shape) 45 | tf.random.set_random_seed(SEED) 46 | feats = legacy_arch.efficientdet(feats, config=config) 47 | sess.run(tf.global_variables_initializer()) 48 | legacy_class_out, legacy_box_out = sess.run(feats) 49 | for i in range(3, 8): 50 | self.assertAllClose( 51 | keras_class_out[i - 3], legacy_class_out[i], rtol=1e-4, atol=1e-4) 52 | self.assertAllClose( 53 | keras_box_out[i - 3], legacy_box_out[i], rtol=1e-4, atol=1e-4) 54 | 55 | feats = tf.ones(inputs_shape) 56 | model = efficientdet_keras.EfficientDetNet(config=config) 57 | model.load_weights(tmp_ckpt) 58 | eager_class_out, eager_box_out = model(feats, True) 59 | for i in range(3, 8): 60 | self.assertAllClose( 61 | eager_class_out[i - 3], legacy_class_out[i], rtol=1e-4, atol=1e-4) 62 | self.assertAllClose( 63 | eager_box_out[i - 3], legacy_box_out[i], rtol=1e-4, atol=1e-4) 64 | 65 | def test_build_feature_network(self): 66 | config = hparams_config.get_efficientdet_config('efficientdet-d0') 67 | config.max_level = 5 68 | with tf.Session(graph=tf.Graph()) as sess: 69 | inputs = [ 70 | tf.ones([1, 64, 64, 40]), # level 3 71 | tf.ones([1, 32, 32, 112]), # level 4 72 | tf.ones([1, 16, 16, 320]), # level 5 73 | ] 74 | tf.random.set_random_seed(SEED) 75 | new_feats1 = efficientdet_keras.FPNCells(config)(inputs, True) 76 | sess.run(tf.global_variables_initializer()) 77 | keras_feats = sess.run(new_feats1) 78 | with tf.Session(graph=tf.Graph()) as sess: 79 | inputs = { 80 | 0: tf.ones([1, 512, 512, 3]), 81 | 1: tf.ones([1, 256, 256, 16]), 82 | 2: tf.ones([1, 128, 128, 24]), 83 | 3: tf.ones([1, 64, 64, 40]), 84 | 4: tf.ones([1, 32, 32, 112]), 85 | 5: tf.ones([1, 16, 16, 320]) 86 | } 87 | tf.random.set_random_seed(SEED) 88 | new_feats2 = legacy_arch.build_feature_network(inputs, config) 89 | sess.run(tf.global_variables_initializer()) 90 | legacy_feats = sess.run(new_feats2) 91 | 92 | for i in range(config.min_level, config.max_level + 1): 93 | self.assertAllClose(keras_feats[i - config.min_level], legacy_feats[i]) 94 | 95 | def test_model_variables(self): 96 | input_shape = (1, 512, 512, 3) 97 | model = efficientdet_keras.EfficientDetNet('efficientdet-d0') 98 | model.build(input_shape) 99 | eager_train_vars = sorted([var.name for var in model.trainable_variables]) 100 | eager_model_vars = sorted([var.name for var in model.variables]) 101 | with tf.Graph().as_default(): 102 | feats = tf.ones([1, 512, 512, 3]) 103 | model = efficientdet_keras.EfficientDetNet('efficientdet-d0') 104 | model.build(input_shape) 105 | keras_train_vars = sorted([var.name for var in model.trainable_variables]) 106 | keras_model_vars = sorted([var.name for var in model.variables]) 107 | with tf.Graph().as_default(): 108 | feats = tf.ones([1, 512, 512, 3]) 109 | legacy_arch.efficientdet(feats, 'efficientdet-d0') 110 | legacy_train_vars = sorted([var.name for var in tf.trainable_variables()]) 111 | legacy_model_vars = sorted([var.name for var in tf.global_variables()]) 112 | 113 | self.assertEqual(keras_train_vars, legacy_train_vars) 114 | self.assertEqual(keras_model_vars, legacy_model_vars) 115 | self.assertEqual(eager_train_vars, legacy_train_vars) 116 | self.assertEqual(eager_model_vars, legacy_model_vars) 117 | 118 | def test_resample_feature_map(self): 119 | feat = tf.random.uniform([1, 16, 16, 320]) 120 | for apply_bn in [True, False]: 121 | for training in [True, False]: 122 | for strategy in ['tpu', '']: 123 | with self.subTest( 124 | apply_bn=apply_bn, training=training, strategy=strategy): 125 | tf.random.set_random_seed(SEED) 126 | expect_result = legacy_arch.resample_feature_map( 127 | feat, 128 | name='resample_p0', 129 | target_height=8, 130 | target_width=8, 131 | target_num_channels=64, 132 | apply_bn=apply_bn, 133 | is_training=training, 134 | strategy=strategy) 135 | tf.random.set_random_seed(SEED) 136 | resample_layer = efficientdet_keras.ResampleFeatureMap( 137 | name='resample_p0', 138 | target_height=8, 139 | target_width=8, 140 | target_num_channels=64, 141 | apply_bn=apply_bn, 142 | is_training_bn=training, 143 | strategy=strategy) 144 | actual_result = resample_layer(feat, training) 145 | self.assertAllCloseAccordingToType(expect_result, actual_result) 146 | 147 | def test_resample_var_names(self): 148 | with tf.Graph().as_default(): 149 | feat = tf.random.uniform([1, 16, 16, 320]) 150 | resample_layer = efficientdet_keras.ResampleFeatureMap( 151 | name='resample_p0', 152 | target_height=8, 153 | target_width=8, 154 | target_num_channels=64) 155 | resample_layer(feat, True) 156 | vars1 = sorted([var.name for var in tf.trainable_variables()]) 157 | 158 | with tf.Graph().as_default(): 159 | feat = tf.random.uniform([1, 16, 16, 320]) 160 | legacy_arch.resample_feature_map( 161 | feat, 162 | name='p0', 163 | target_height=8, 164 | target_width=8, 165 | target_num_channels=64) 166 | vars2 = sorted([var.name for var in tf.trainable_variables()]) 167 | 168 | self.assertEqual(vars1, vars2) 169 | 170 | 171 | if __name__ == '__main__': 172 | logging.set_verbosity(logging.WARNING) 173 | tf.test.main() 174 | -------------------------------------------------------------------------------- /object_detection/box_list.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Bounding Box List definition. 16 | 17 | BoxList represents a list of bounding boxes as tensorflow 18 | tensors, where each bounding box is represented as a row of 4 numbers, 19 | [y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes 20 | within a given list correspond to a single image. See also 21 | box_list_ops.py for common box related operations (such as area, iou, etc). 22 | 23 | Optionally, users can add additional related fields (such as weights). 24 | We assume the following things to be true about fields: 25 | * they correspond to boxes in the box_list along the 0th dimension 26 | * they have inferable rank at graph construction time 27 | * all dimensions except for possibly the 0th can be inferred 28 | (i.e., not None) at graph construction time. 29 | 30 | Some other notes: 31 | * Following tensorflow conventions, we use height, width ordering, 32 | and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering 33 | * Tensors are always provided as (flat) [N, 4] tensors. 34 | """ 35 | 36 | import tensorflow.compat.v1 as tf 37 | 38 | 39 | class BoxList(object): 40 | """Box collection.""" 41 | 42 | def __init__(self, boxes): 43 | """Constructs box collection. 44 | 45 | Args: 46 | boxes: a tensor of shape [N, 4] representing box corners 47 | 48 | Raises: 49 | ValueError: if invalid dimensions for bbox data or if bbox data is not in 50 | float32 format. 51 | """ 52 | if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4: 53 | raise ValueError('Invalid dimensions for box data.') 54 | if boxes.dtype != tf.float32: 55 | raise ValueError('Invalid tensor type: should be tf.float32') 56 | self.data = {'boxes': boxes} 57 | 58 | def num_boxes(self): 59 | """Returns number of boxes held in collection. 60 | 61 | Returns: 62 | a tensor representing the number of boxes held in the collection. 63 | """ 64 | return tf.shape(self.data['boxes'])[0] 65 | 66 | def num_boxes_static(self): 67 | """Returns number of boxes held in collection. 68 | 69 | This number is inferred at graph construction time rather than run-time. 70 | 71 | Returns: 72 | Number of boxes held in collection (integer) or None if this is not 73 | inferable at graph construction time. 74 | """ 75 | return self.data['boxes'].get_shape().as_list()[0] 76 | 77 | def get_all_fields(self): 78 | """Returns all fields.""" 79 | return self.data.keys() 80 | 81 | def get_extra_fields(self): 82 | """Returns all non-box fields (i.e., everything not named 'boxes').""" 83 | return [k for k in self.data.keys() if k != 'boxes'] 84 | 85 | def add_field(self, field, field_data): 86 | """Add field to box list. 87 | 88 | This method can be used to add related box data such as 89 | weights/labels, etc. 90 | 91 | Args: 92 | field: a string key to access the data via `get` 93 | field_data: a tensor containing the data to store in the BoxList 94 | """ 95 | self.data[field] = field_data 96 | 97 | def has_field(self, field): 98 | return field in self.data 99 | 100 | def get(self): 101 | """Convenience function for accessing box coordinates. 102 | 103 | Returns: 104 | a tensor with shape [N, 4] representing box coordinates. 105 | """ 106 | return self.get_field('boxes') 107 | 108 | def set(self, boxes): 109 | """Convenience function for setting box coordinates. 110 | 111 | Args: 112 | boxes: a tensor of shape [N, 4] representing box corners 113 | 114 | Raises: 115 | ValueError: if invalid dimensions for bbox data 116 | """ 117 | if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4: 118 | raise ValueError('Invalid dimensions for box data.') 119 | self.data['boxes'] = boxes 120 | 121 | def get_field(self, field): 122 | """Accesses a box collection and associated fields. 123 | 124 | This function returns specified field with object; if no field is specified, 125 | it returns the box coordinates. 126 | 127 | Args: 128 | field: this optional string parameter can be used to specify 129 | a related field to be accessed. 130 | 131 | Returns: 132 | a tensor representing the box collection or an associated field. 133 | 134 | Raises: 135 | ValueError: if invalid field 136 | """ 137 | if not self.has_field(field): 138 | raise ValueError('field ' + str(field) + ' does not exist') 139 | return self.data[field] 140 | 141 | def set_field(self, field, value): 142 | """Sets the value of a field. 143 | 144 | Updates the field of a box_list with a given value. 145 | 146 | Args: 147 | field: (string) name of the field to set value. 148 | value: the value to assign to the field. 149 | 150 | Raises: 151 | ValueError: if the box_list does not have specified field. 152 | """ 153 | if not self.has_field(field): 154 | raise ValueError('field %s does not exist' % field) 155 | self.data[field] = value 156 | 157 | def get_center_coordinates_and_sizes(self, scope=None): 158 | """Computes the center coordinates, height and width of the boxes. 159 | 160 | Args: 161 | scope: name scope of the function. 162 | 163 | Returns: 164 | a list of 4 1-D tensors [ycenter, xcenter, height, width]. 165 | """ 166 | with tf.name_scope(scope, 'get_center_coordinates_and_sizes'): 167 | box_corners = self.get() 168 | ymin, xmin, ymax, xmax = tf.unstack(tf.transpose(box_corners)) 169 | width = xmax - xmin 170 | height = ymax - ymin 171 | ycenter = ymin + height / 2. 172 | xcenter = xmin + width / 2. 173 | return [ycenter, xcenter, height, width] 174 | 175 | def transpose_coordinates(self, scope=None): 176 | """Transpose the coordinate representation in a boxlist. 177 | 178 | Args: 179 | scope: name scope of the function. 180 | """ 181 | with tf.name_scope(scope, 'transpose_coordinates'): 182 | y_min, x_min, y_max, x_max = tf.split( 183 | value=self.get(), num_or_size_splits=4, axis=1) 184 | self.set(tf.concat([x_min, y_min, x_max, y_max], 1)) 185 | 186 | def as_tensor_dict(self, fields=None): 187 | """Retrieves specified fields as a dictionary of tensors. 188 | 189 | Args: 190 | fields: (optional) list of fields to return in the dictionary. 191 | If None (default), all fields are returned. 192 | 193 | Returns: 194 | tensor_dict: A dictionary of tensors specified by fields. 195 | 196 | Raises: 197 | ValueError: if specified field is not contained in boxlist. 198 | """ 199 | tensor_dict = {} 200 | if fields is None: 201 | fields = self.get_all_fields() 202 | for field in fields: 203 | if not self.has_field(field): 204 | raise ValueError('boxlist must contain all specified fields') 205 | tensor_dict[field] = self.get_field(field) 206 | return tensor_dict 207 | -------------------------------------------------------------------------------- /object_detection/tf_example_decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tensorflow Example proto decoder for object detection. 16 | 17 | A decoder to decode string tensors containing serialized tensorflow.Example 18 | protos for object detection. 19 | """ 20 | 21 | import tensorflow.compat.v1 as tf 22 | 23 | 24 | def _get_source_id_from_encoded_image(parsed_tensors): 25 | return tf.strings.as_string( 26 | tf.strings.to_hash_bucket_fast(parsed_tensors['image/encoded'], 27 | 2**63 - 1)) 28 | 29 | 30 | class TfExampleDecoder(object): 31 | """Tensorflow Example proto decoder.""" 32 | 33 | def __init__(self, include_mask=False, regenerate_source_id=False): 34 | self._include_mask = include_mask 35 | self._regenerate_source_id = regenerate_source_id 36 | self._keys_to_features = { 37 | 'image/encoded': tf.FixedLenFeature((), tf.string), 38 | 'image/source_id': tf.FixedLenFeature((), tf.string, ''), 39 | 'image/height': tf.FixedLenFeature((), tf.int64, -1), 40 | 'image/width': tf.FixedLenFeature((), tf.int64, -1), 41 | 'image/object/bbox/xmin': tf.VarLenFeature(tf.float32), 42 | 'image/object/bbox/xmax': tf.VarLenFeature(tf.float32), 43 | 'image/object/bbox/ymin': tf.VarLenFeature(tf.float32), 44 | 'image/object/bbox/ymax': tf.VarLenFeature(tf.float32), 45 | 'image/object/class/label': tf.VarLenFeature(tf.int64), 46 | 'image/object/area': tf.VarLenFeature(tf.float32), 47 | 'image/object/is_crowd': tf.VarLenFeature(tf.int64), 48 | } 49 | if include_mask: 50 | self._keys_to_features.update({ 51 | 'image/object/mask': 52 | tf.VarLenFeature(tf.string), 53 | }) 54 | 55 | def _decode_image(self, parsed_tensors): 56 | """Decodes the image and set its static shape.""" 57 | image = tf.io.decode_image(parsed_tensors['image/encoded'], channels=3) 58 | image.set_shape([None, None, 3]) 59 | return image 60 | 61 | def _decode_boxes(self, parsed_tensors): 62 | """Concat box coordinates in the format of [ymin, xmin, ymax, xmax].""" 63 | xmin = parsed_tensors['image/object/bbox/xmin'] 64 | xmax = parsed_tensors['image/object/bbox/xmax'] 65 | ymin = parsed_tensors['image/object/bbox/ymin'] 66 | ymax = parsed_tensors['image/object/bbox/ymax'] 67 | return tf.stack([ymin, xmin, ymax, xmax], axis=-1) 68 | 69 | def _decode_masks(self, parsed_tensors): 70 | """Decode a set of PNG masks to the tf.float32 tensors.""" 71 | def _decode_png_mask(png_bytes): 72 | mask = tf.squeeze( 73 | tf.io.decode_png(png_bytes, channels=1, dtype=tf.uint8), axis=-1) 74 | mask = tf.cast(mask, dtype=tf.float32) 75 | mask.set_shape([None, None]) 76 | return mask 77 | 78 | height = parsed_tensors['image/height'] 79 | width = parsed_tensors['image/width'] 80 | masks = parsed_tensors['image/object/mask'] 81 | return tf.cond( 82 | tf.greater(tf.shape(masks)[0], 0), 83 | lambda: tf.map_fn(_decode_png_mask, masks, dtype=tf.float32), 84 | lambda: tf.zeros([0, height, width], dtype=tf.float32)) 85 | 86 | def _decode_areas(self, parsed_tensors): 87 | xmin = parsed_tensors['image/object/bbox/xmin'] 88 | xmax = parsed_tensors['image/object/bbox/xmax'] 89 | ymin = parsed_tensors['image/object/bbox/ymin'] 90 | ymax = parsed_tensors['image/object/bbox/ymax'] 91 | return tf.cond( 92 | tf.greater(tf.shape(parsed_tensors['image/object/area'])[0], 0), 93 | lambda: parsed_tensors['image/object/area'], 94 | lambda: (xmax - xmin) * (ymax - ymin)) 95 | 96 | def decode(self, serialized_example): 97 | """Decode the serialized example. 98 | 99 | Args: 100 | serialized_example: a single serialized tf.Example string. 101 | 102 | Returns: 103 | decoded_tensors: a dictionary of tensors with the following fields: 104 | - image: a uint8 tensor of shape [None, None, 3]. 105 | - source_id: a string scalar tensor. 106 | - height: an integer scalar tensor. 107 | - width: an integer scalar tensor. 108 | - groundtruth_classes: a int64 tensor of shape [None]. 109 | - groundtruth_is_crowd: a bool tensor of shape [None]. 110 | - groundtruth_area: a float32 tensor of shape [None]. 111 | - groundtruth_boxes: a float32 tensor of shape [None, 4]. 112 | - groundtruth_instance_masks: a float32 tensor of shape 113 | [None, None, None]. 114 | - groundtruth_instance_masks_png: a string tensor of shape [None]. 115 | """ 116 | parsed_tensors = tf.io.parse_single_example( 117 | serialized_example, self._keys_to_features) 118 | for k in parsed_tensors: 119 | if isinstance(parsed_tensors[k], tf.SparseTensor): 120 | if parsed_tensors[k].dtype == tf.string: 121 | parsed_tensors[k] = tf.sparse_tensor_to_dense( 122 | parsed_tensors[k], default_value='') 123 | else: 124 | parsed_tensors[k] = tf.sparse_tensor_to_dense( 125 | parsed_tensors[k], default_value=0) 126 | 127 | image = self._decode_image(parsed_tensors) 128 | boxes = self._decode_boxes(parsed_tensors) 129 | areas = self._decode_areas(parsed_tensors) 130 | 131 | decode_image_shape = tf.logical_or( 132 | tf.equal(parsed_tensors['image/height'], -1), 133 | tf.equal(parsed_tensors['image/width'], -1)) 134 | image_shape = tf.cast(tf.shape(image), dtype=tf.int64) 135 | 136 | parsed_tensors['image/height'] = tf.where(decode_image_shape, 137 | image_shape[0], 138 | parsed_tensors['image/height']) 139 | parsed_tensors['image/width'] = tf.where(decode_image_shape, image_shape[1], 140 | parsed_tensors['image/width']) 141 | 142 | is_crowds = tf.cond( 143 | tf.greater(tf.shape(parsed_tensors['image/object/is_crowd'])[0], 0), 144 | lambda: tf.cast(parsed_tensors['image/object/is_crowd'], dtype=tf.bool), 145 | lambda: tf.zeros_like(parsed_tensors['image/object/class/label'], dtype=tf.bool)) # pylint: disable=line-too-long 146 | if self._regenerate_source_id: 147 | source_id = _get_source_id_from_encoded_image(parsed_tensors) 148 | else: 149 | source_id = tf.cond( 150 | tf.greater(tf.strings.length(parsed_tensors['image/source_id']), 151 | 0), lambda: parsed_tensors['image/source_id'], 152 | lambda: _get_source_id_from_encoded_image(parsed_tensors)) 153 | if self._include_mask: 154 | masks = self._decode_masks(parsed_tensors) 155 | 156 | decoded_tensors = { 157 | 'image': image, 158 | 'source_id': source_id, 159 | 'height': parsed_tensors['image/height'], 160 | 'width': parsed_tensors['image/width'], 161 | 'groundtruth_classes': parsed_tensors['image/object/class/label'], 162 | 'groundtruth_is_crowd': is_crowds, 163 | 'groundtruth_area': areas, 164 | 'groundtruth_boxes': boxes, 165 | } 166 | if self._include_mask: 167 | decoded_tensors.update({ 168 | 'groundtruth_instance_masks': masks, 169 | 'groundtruth_instance_masks_png': parsed_tensors['image/object/mask'], 170 | }) 171 | return decoded_tensors 172 | -------------------------------------------------------------------------------- /model_inspect_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | r"""Tests for model inspect tool.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import os 22 | import shutil 23 | import tempfile 24 | 25 | from absl import flags 26 | from absl import logging 27 | import numpy as np 28 | from PIL import Image 29 | import tensorflow.compat.v1 as tf 30 | 31 | import model_inspect 32 | import utils 33 | FLAGS = flags.FLAGS 34 | 35 | 36 | class ModelInspectTest(tf.test.TestCase): 37 | """Model inspect tests.""" 38 | 39 | def setUp(self): 40 | super(ModelInspectTest, self).setUp() 41 | sys_tempdir = tempfile.gettempdir() 42 | self.tempdir = os.path.join(sys_tempdir, '_inspect_test') 43 | os.mkdir(self.tempdir) 44 | 45 | np.random.seed(111) 46 | tf.random.set_random_seed(111) 47 | self.test_image = np.random.randint(0, 244, (640, 720, 3)).astype(np.uint8) 48 | 49 | self.savedmodel_dir = os.path.join(self.tempdir, 'savedmodel') 50 | if os.path.exists(self.savedmodel_dir): 51 | shutil.rmtree(self.savedmodel_dir) 52 | 53 | self.params = dict( 54 | model_name='efficientdet-d0', 55 | logdir=os.path.join(self.tempdir, 'logdir'), 56 | tensorrt=False, 57 | use_xla=False, 58 | ckpt_path='_', 59 | export_ckpt=None, 60 | saved_model_dir=self.savedmodel_dir, 61 | batch_size=1, 62 | hparams='') 63 | 64 | def tearDown(self): 65 | super(ModelInspectTest, self).tearDown() 66 | shutil.rmtree(self.tempdir) 67 | 68 | def test_dry_run(self): 69 | inspector = model_inspect.ModelInspector(**self.params) 70 | inspector.run_model('dry') 71 | 72 | def test_freeze_model(self): 73 | inspector = model_inspect.ModelInspector(**self.params) 74 | inspector.run_model('freeze') 75 | 76 | def test_bm(self): 77 | inspector = model_inspect.ModelInspector(**self.params) 78 | inspector.run_model('bm') 79 | 80 | def test_eval_ckpt(self): 81 | inspector = model_inspect.ModelInspector(**self.params) 82 | inspector.run_model('ckpt') 83 | 84 | def test_infer(self): 85 | outdir = os.path.join(self.tempdir, 'infer_imgout') 86 | os.mkdir(outdir) 87 | inspector = model_inspect.ModelInspector(**self.params) 88 | 89 | img_path = os.path.join(self.tempdir, 'img.jpg') 90 | Image.fromarray(self.test_image).save(img_path) 91 | 92 | self.assertFalse(os.path.exists(os.path.join(outdir, '0.jpg'))) 93 | inspector.run_model('infer', input_image=img_path, output_image_dir=outdir) 94 | self.assertTrue(os.path.exists(os.path.join(outdir, '0.jpg'))) 95 | 96 | out = np.sum(np.array(Image.open(os.path.join(outdir, '0.jpg')))) 97 | self.assertEqual(out // 1000000, 167) 98 | 99 | def test_saved_model(self): 100 | if tf.__version__ >= '2.3.0-dev20200521': 101 | self.params['tflite_path'] = os.path.join(self.savedmodel_dir, 'x.tflite') 102 | inspector = model_inspect.ModelInspector(**self.params) 103 | self.assertFalse( 104 | os.path.exists(os.path.join(self.savedmodel_dir, 'saved_model.pb'))) 105 | inspector.run_model('saved_model') 106 | self.assertTrue( 107 | os.path.exists(os.path.join(self.savedmodel_dir, 'saved_model.pb'))) 108 | self.assertTrue( 109 | os.path.exists( 110 | os.path.join(self.savedmodel_dir, 'efficientdet-d0_frozen.pb'))) 111 | if self.params.get('tflite_path', None): 112 | self.assertTrue( 113 | os.path.exists(os.path.join(self.savedmodel_dir, 'x.tflite'))) 114 | 115 | def test_saved_model_fp16(self): 116 | self.params['hparams'] = 'mixed_precision=true' 117 | inspector = model_inspect.ModelInspector(**self.params) 118 | inspector.run_model('saved_model') 119 | self.assertTrue( 120 | os.path.exists(os.path.join(self.savedmodel_dir, 'saved_model.pb'))) 121 | utils.set_precision_policy('float32') 122 | 123 | def test_saved_model_infer(self): 124 | inspector = model_inspect.ModelInspector(**self.params) 125 | inspector.run_model('saved_model') 126 | 127 | outdir = os.path.join(self.tempdir, 'infer_imgout') 128 | os.mkdir(outdir) 129 | 130 | tf.reset_default_graph() 131 | self.assertFalse(os.path.exists(os.path.join(outdir, '0.jpg'))) 132 | 133 | img_path = os.path.join(self.tempdir, 'img.jpg') 134 | Image.fromarray(self.test_image).save(img_path) 135 | inspector.run_model( 136 | 'saved_model_infer', input_image=img_path, output_image_dir=outdir) 137 | self.assertTrue(os.path.exists(os.path.join(outdir, '0.jpg'))) 138 | 139 | out = np.sum(np.array(Image.open(os.path.join(outdir, '0.jpg')))) 140 | self.assertEqual(out // 1000000, 167) 141 | 142 | def test_saved_model_infer_dynamic_batch(self): 143 | # Build saved model with dynamic batch size. 144 | self.params['batch_size'] = None 145 | inspector = model_inspect.ModelInspector(**self.params) 146 | inspector.run_model('saved_model') 147 | 148 | outdir = os.path.join(self.tempdir, 'infer_imgout_dyn') 149 | os.mkdir(outdir) 150 | 151 | img_path = os.path.join(self.tempdir, 'img.jpg') 152 | Image.fromarray(self.test_image).save(img_path) 153 | test_image2 = np.random.randint(0, 244, (640, 320, 3)).astype(np.uint8) 154 | img2_path = os.path.join(self.tempdir, 'img2.jpg') 155 | Image.fromarray(test_image2).save(img2_path) 156 | 157 | # serve images with batch size 1. 158 | tf.reset_default_graph() 159 | self.params['batch_size'] = 1 160 | self.assertFalse(os.path.exists(os.path.join(outdir, '0.jpg'))) 161 | inspector.run_model( 162 | 'saved_model_infer', input_image=img_path, output_image_dir=outdir) 163 | self.assertTrue(os.path.exists(os.path.join(outdir, '0.jpg'))) 164 | 165 | # serve images with batch size 2. 166 | tf.reset_default_graph() 167 | self.params['batch_size'] = 2 168 | self.assertFalse(os.path.exists(os.path.join(outdir, '1.jpg'))) 169 | fname = img_path.replace('img.jpg', 'img*.jpg') 170 | inspector.run_model( 171 | 'saved_model_infer', input_image=fname, output_image_dir=outdir) 172 | self.assertTrue(os.path.exists(os.path.join(outdir, '1.jpg'))) 173 | 174 | def test_saved_model_graph_infer(self): 175 | inspector = model_inspect.ModelInspector(**self.params) 176 | inspector.run_model('saved_model') 177 | tf.reset_default_graph() 178 | 179 | # Use the frozen graph to do inference. 180 | inspector.saved_model_dir = os.path.join(self.params['saved_model_dir'], 181 | 'efficientdet-d0_frozen.pb') 182 | outdir = os.path.join(self.tempdir, 'pb_infer_imgout') 183 | os.mkdir(outdir) 184 | 185 | img_path = os.path.join(self.tempdir, 'img.jpg') 186 | Image.fromarray(self.test_image).save(img_path) 187 | 188 | self.assertFalse(os.path.exists(os.path.join(outdir, '0.jpg'))) 189 | inspector.run_model( 190 | 'saved_model_infer', input_image=img_path, output_image_dir=outdir) 191 | self.assertTrue(os.path.exists(os.path.join(outdir, '0.jpg'))) 192 | 193 | out = np.sum(np.array(Image.open(os.path.join(outdir, '0.jpg')))) 194 | self.assertEqual(out // 1000000, 167) 195 | 196 | 197 | if __name__ == '__main__': 198 | logging.set_verbosity(logging.WARNING) 199 | tf.disable_eager_execution() 200 | tf.test.main() 201 | -------------------------------------------------------------------------------- /iou_utils.py: -------------------------------------------------------------------------------- 1 | # Lint as: python3 2 | # Copyright 2020 Google Research. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """IoU utils for box regression with iou losses. 17 | 18 | Distance-IoU Loss: Faster and Better Learning for Bounding Box Regression. 19 | https://arxiv.org/pdf/1911.08287.pdf 20 | """ 21 | import math 22 | from typing import Union, Text 23 | import numpy as np 24 | import tensorflow as tf 25 | 26 | FloatType = Union[tf.Tensor, float, np.float32, np.float64] 27 | 28 | 29 | def _get_v(b1_height: FloatType, b1_width: FloatType, b2_height: FloatType, 30 | b2_width: FloatType) -> tf.Tensor: 31 | """Get the consistency measurement of aspect ratio for ciou.""" 32 | 33 | @tf.custom_gradient 34 | def _get_grad_v(height, width): 35 | """backpropogate gradient.""" 36 | arctan = tf.atan(tf.math.divide_no_nan(b1_width, b1_height)) - tf.atan( 37 | tf.math.divide_no_nan(width, height)) 38 | v = 4 * ((arctan / math.pi)**2) 39 | 40 | def _grad_v(dv): 41 | """Grad for eager mode.""" 42 | gdw = dv * 8 * arctan * height / (math.pi**2) 43 | gdh = -dv * 8 * arctan * width / (math.pi**2) 44 | return [gdh, gdw] 45 | 46 | def _grad_v_graph(dv, variables): 47 | """Grad for graph mode.""" 48 | gdw = dv * 8 * arctan * height / (math.pi**2) 49 | gdh = -dv * 8 * arctan * width / (math.pi**2) 50 | return [gdh, gdw], tf.gradients(v, variables, grad_ys=dv) 51 | 52 | if tf.compat.v1.executing_eagerly_outside_functions(): 53 | return v, _grad_v 54 | return v, _grad_v_graph 55 | 56 | return _get_grad_v(b2_height, b2_width) 57 | 58 | 59 | def _iou_per_anchor(pred_boxes: FloatType, 60 | target_boxes: FloatType, 61 | iou_type: Text = 'iou') -> tf.Tensor: 62 | """Computing the IoU for a single anchor. 63 | 64 | Args: 65 | pred_boxes: predicted boxes, with coordinate [y_min, x_min, y_max, x_max]. 66 | target_boxes: target boxes, with coordinate [y_min, x_min, y_max, x_max]. 67 | iou_type: one of ['iou', 'ciou', 'diou', 'giou']. 68 | 69 | Returns: 70 | IoU loss float `Tensor`. 71 | """ 72 | # t_ denotes target boxes and p_ denotes predicted boxes. 73 | t_ymin, t_xmin, t_ymax, t_xmax = target_boxes 74 | p_ymin, p_xmin, p_ymax, p_xmax = pred_boxes 75 | 76 | zero = tf.convert_to_tensor(0.0, t_ymin.dtype) 77 | p_width = tf.maximum(zero, p_xmax - p_xmin) 78 | p_height = tf.maximum(zero, p_ymax - p_ymin) 79 | t_width = tf.maximum(zero, t_xmax - t_xmin) 80 | t_height = tf.maximum(zero, t_ymax - t_ymin) 81 | p_area = p_width * p_height 82 | t_area = t_width * t_height 83 | 84 | intersect_ymin = tf.maximum(p_ymin, t_ymin) 85 | intersect_xmin = tf.maximum(p_xmin, t_xmin) 86 | intersect_ymax = tf.minimum(p_ymax, t_ymax) 87 | intersect_xmax = tf.minimum(p_xmax, t_xmax) 88 | intersect_width = tf.maximum(zero, intersect_xmax - intersect_xmin) 89 | intersect_height = tf.maximum(zero, intersect_ymax - intersect_ymin) 90 | intersect_area = intersect_width * intersect_height 91 | 92 | union_area = p_area + t_area - intersect_area 93 | iou_v = tf.math.divide_no_nan(intersect_area, union_area) 94 | if iou_type == 'iou': 95 | return iou_v # iou is the simplest form. 96 | 97 | enclose_ymin = tf.minimum(p_ymin, t_ymin) 98 | enclose_xmin = tf.minimum(p_xmin, t_xmin) 99 | enclose_ymax = tf.maximum(p_ymax, t_ymax) 100 | enclose_xmax = tf.maximum(p_xmax, t_xmax) 101 | 102 | assert iou_type in ('giou', 'diou', 'ciou') 103 | if iou_type == 'giou': # giou is the generalized iou. 104 | enclose_width = tf.maximum(zero, enclose_xmax - enclose_xmin) 105 | enclose_height = tf.maximum(zero, enclose_ymax - enclose_ymin) 106 | enclose_area = enclose_width * enclose_height 107 | giou_v = iou_v - tf.math.divide_no_nan( 108 | (enclose_area - union_area), enclose_area) 109 | return giou_v 110 | 111 | assert iou_type in ('diou', 'ciou') 112 | p_center = tf.stack([(p_ymin + p_ymax) / 2, (p_xmin + p_xmax) / 2]) 113 | t_center = tf.stack([(t_ymin + t_ymax) / 2, (t_xmin + t_xmax) / 2]) 114 | euclidean = tf.linalg.norm(t_center - p_center) 115 | diag_length = tf.linalg.norm( 116 | [enclose_ymax - enclose_ymin, enclose_xmax - enclose_xmin]) 117 | diou_v = iou_v - tf.math.divide_no_nan(euclidean**2, diag_length**2) 118 | if iou_type == 'diou': # diou is the distance iou. 119 | return diou_v 120 | 121 | assert iou_type == 'ciou' 122 | v = _get_v(p_height, p_width, t_height, t_width) 123 | alpha = tf.math.divide_no_nan(v, ((1 - iou_v) + v)) 124 | return diou_v - alpha * v # the last one is ciou. 125 | 126 | 127 | def iou_loss(pred_boxes: FloatType, 128 | target_boxes: FloatType, 129 | iou_type: Text = 'iou') -> tf.Tensor: 130 | """A unified interface for computing various IoU losses. 131 | 132 | Let B and B_gt denotes the pred_box and B_gt is the target box (ground truth): 133 | 134 | IoU = |B & B_gt| / |B | B_gt| 135 | 136 | GIoU = IoU - |C - B U B_gt| / C, where C is the smallest box covering B and 137 | B_gt. 138 | 139 | DIoU = IoU - E(B, B_gt)^2 / c^2, E is the Euclidean distance of the center 140 | points of B and B_gt, and c is the diagonal length of the smallest box 141 | covering the two boxes 142 | 143 | CIoU = IoU - DIoU - a * v, where a is a positive trade-off parameter, and 144 | v measures the consistency of aspect ratio: 145 | v = (arctan(w_gt / h_gt) - arctan(w / h)) * 4 / pi^2 146 | where (w_gt, h_gt) and (w, h) are the width and height of the target and 147 | predicted box respectively. 148 | 149 | The returned loss is computed as 1 - one of {IoU, GIoU, DIoU, CIoU}. 150 | 151 | Args: 152 | pred_boxes: predicted boxes, with coordinate [y_min, x_min, y_max, x_max]*. 153 | It can be multiple anchors, with each anchor box has four coordinates. 154 | target_boxes: target boxes, with coordinate [y_min, x_min, y_max, x_max]*. 155 | It can be multiple anchors, with each anchor box has four coordinates. 156 | iou_type: one of ['iou', 'ciou', 'diou', 'giou']. 157 | 158 | Returns: 159 | IoU loss float `Tensor`. 160 | """ 161 | if iou_type not in ('iou', 'ciou', 'diou', 'giou'): 162 | raise ValueError( 163 | 'Unknown loss_type {}, not iou/ciou/diou/giou'.format(iou_type)) 164 | 165 | pred_boxes = tf.convert_to_tensor(pred_boxes, tf.float32) 166 | target_boxes = tf.cast(target_boxes, pred_boxes.dtype) 167 | 168 | # t_ denotes target boxes and p_ denotes predicted boxes: (y, x, y_max, x_max) 169 | pred_boxes_list = tf.unstack(pred_boxes, None, axis=-1) 170 | target_boxes_list = tf.unstack(target_boxes, None, axis=-1) 171 | assert len(pred_boxes_list) == len(target_boxes_list) 172 | assert len(pred_boxes_list) % 4 == 0 173 | 174 | iou_loss_list = [] 175 | for i in range(0, len(pred_boxes_list), 4): 176 | pred_boxes = pred_boxes_list[i:i + 4] 177 | target_boxes = target_boxes_list[i:i + 4] 178 | 179 | # Compute mask. 180 | t_ymin, t_xmin, t_ymax, t_xmax = target_boxes 181 | mask = tf.math.logical_and(t_ymax > t_ymin, t_xmax > t_xmin) 182 | mask = tf.cast(mask, t_ymin.dtype) 183 | # Loss should be mask * (1 - iou) = mask - masked_iou. 184 | pred_boxes = [b * mask for b in pred_boxes] 185 | target_boxes = [b * mask for b in target_boxes] 186 | iou_loss_list.append( 187 | mask * 188 | (1 - tf.squeeze(_iou_per_anchor(pred_boxes, target_boxes, iou_type)))) 189 | if len(iou_loss_list) == 1: 190 | return iou_loss_list[0] 191 | return tf.reduce_sum(tf.stack(iou_loss_list), 0) 192 | -------------------------------------------------------------------------------- /backbone/efficientnet_lite_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Model Builder for EfficientNet Edge Models. 16 | 17 | efficientnet-litex (x=0,1,2,3,4) checkpoints are located in: 18 | https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/lite/efficientnet-litex.tar.gz 19 | """ 20 | 21 | from __future__ import absolute_import 22 | from __future__ import division 23 | from __future__ import print_function 24 | 25 | import os 26 | from absl import logging 27 | import tensorflow.compat.v1 as tf 28 | 29 | import utils 30 | from backbone import efficientnet_builder 31 | from backbone import efficientnet_model 32 | 33 | # Edge models use inception-style MEAN and STDDEV for better post-quantization. 34 | MEAN_RGB = [127.0, 127.0, 127.0] 35 | STDDEV_RGB = [128.0, 128.0, 128.0] 36 | 37 | 38 | def efficientnet_lite_params(model_name): 39 | """Get efficientnet params based on model name.""" 40 | params_dict = { 41 | # (width_coefficient, depth_coefficient, resolution, dropout_rate) 42 | 'efficientnet-lite0': (1.0, 1.0, 224, 0.2), 43 | 'efficientnet-lite1': (1.0, 1.1, 240, 0.2), 44 | 'efficientnet-lite2': (1.1, 1.2, 260, 0.3), 45 | 'efficientnet-lite3': (1.2, 1.4, 280, 0.3), 46 | 'efficientnet-lite4': (1.4, 1.8, 300, 0.3), 47 | } 48 | return params_dict[model_name] 49 | 50 | 51 | _DEFAULT_BLOCKS_ARGS = [ 52 | 'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25', 53 | 'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25', 54 | 'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25', 55 | 'r1_k3_s11_e6_i192_o320_se0.25', 56 | ] 57 | 58 | 59 | def efficientnet_lite(width_coefficient=None, 60 | depth_coefficient=None, 61 | dropout_rate=0.2, 62 | survival_prob=0.8): 63 | """Creates a efficientnet model.""" 64 | global_params = efficientnet_model.GlobalParams( 65 | blocks_args=_DEFAULT_BLOCKS_ARGS, 66 | batch_norm_momentum=0.99, 67 | batch_norm_epsilon=1e-3, 68 | dropout_rate=dropout_rate, 69 | survival_prob=survival_prob, 70 | data_format='channels_last', 71 | num_classes=1000, 72 | width_coefficient=width_coefficient, 73 | depth_coefficient=depth_coefficient, 74 | depth_divisor=8, 75 | min_depth=None, 76 | relu_fn=tf.nn.relu6, # Relu6 is for easier quantization. 77 | # The default is TPU-specific batch norm. 78 | # The alternative is tf.layers.BatchNormalization. 79 | batch_norm=utils.TpuBatchNormalization, # TPU-specific requirement. 80 | clip_projection_output=False, 81 | fix_head_stem=True, # Don't scale stem and head. 82 | local_pooling=True, # special cases for tflite issues. 83 | use_se=False) # SE is not well supported on many lite devices. 84 | return global_params 85 | 86 | 87 | def get_model_params(model_name, override_params): 88 | """Get the block args and global params for a given model.""" 89 | if model_name.startswith('efficientnet-lite'): 90 | width_coefficient, depth_coefficient, _, dropout_rate = ( 91 | efficientnet_lite_params(model_name)) 92 | global_params = efficientnet_lite( 93 | width_coefficient, depth_coefficient, dropout_rate) 94 | else: 95 | raise NotImplementedError('model name is not pre-defined: %s' % model_name) 96 | 97 | if override_params: 98 | # ValueError will be raised here if override_params has fields not included 99 | # in global_params. 100 | global_params = global_params._replace(**override_params) 101 | 102 | decoder = efficientnet_builder.BlockDecoder() 103 | blocks_args = decoder.decode(global_params.blocks_args) 104 | 105 | logging.info('global_params= %s', global_params) 106 | return blocks_args, global_params 107 | 108 | 109 | def build_model(images, 110 | model_name, 111 | training, 112 | override_params=None, 113 | model_dir=None, 114 | fine_tuning=False, 115 | features_only=False, 116 | pooled_features_only=False): 117 | """A helper function to create a model and return predicted logits. 118 | 119 | Args: 120 | images: input images tensor. 121 | model_name: string, the predefined model name. 122 | training: boolean, whether the model is constructed for training. 123 | override_params: A dictionary of params for overriding. Fields must exist in 124 | efficientnet_model.GlobalParams. 125 | model_dir: string, optional model dir for saving configs. 126 | fine_tuning: boolean, whether the model is used for finetuning. 127 | features_only: build the base feature network only (excluding final 128 | 1x1 conv layer, global pooling, dropout and fc head). 129 | pooled_features_only: build the base network for features extraction (after 130 | 1x1 conv layer and global pooling, but before dropout and fc head). 131 | 132 | Returns: 133 | logits: the logits tensor of classes. 134 | endpoints: the endpoints for each layer. 135 | 136 | Raises: 137 | When model_name specified an undefined model, raises NotImplementedError. 138 | When override_params has invalid fields, raises ValueError. 139 | """ 140 | assert isinstance(images, tf.Tensor) 141 | assert not (features_only and pooled_features_only) 142 | 143 | # For backward compatibility. 144 | if override_params and override_params.get('drop_connect_rate', None): 145 | override_params['survival_prob'] = 1 - override_params['drop_connect_rate'] 146 | 147 | if not training or fine_tuning: 148 | if not override_params: 149 | override_params = {} 150 | override_params['batch_norm'] = utils.BatchNormalization 151 | blocks_args, global_params = get_model_params(model_name, override_params) 152 | 153 | if model_dir: 154 | param_file = os.path.join(model_dir, 'model_params.txt') 155 | if not tf.gfile.Exists(param_file): 156 | if not tf.gfile.Exists(model_dir): 157 | tf.gfile.MakeDirs(model_dir) 158 | with tf.gfile.GFile(param_file, 'w') as f: 159 | logging.info('writing to %s', param_file) 160 | f.write('model_name= %s\n\n' % model_name) 161 | f.write('global_params= %s\n\n' % str(global_params)) 162 | f.write('blocks_args= %s\n\n' % str(blocks_args)) 163 | 164 | model = efficientnet_model.Model(blocks_args, global_params, model_name) 165 | outputs = model( 166 | images, 167 | training=training, 168 | features_only=features_only, 169 | pooled_features_only=pooled_features_only) 170 | if features_only: 171 | outputs = tf.identity(outputs, 'features') 172 | elif pooled_features_only: 173 | outputs = tf.identity(outputs, 'pooled_features') 174 | else: 175 | outputs = tf.identity(outputs, 'logits') 176 | return outputs, model.endpoints 177 | 178 | 179 | def build_model_base(images, model_name, training, override_params=None): 180 | """Create a base feature network and return the features before pooling. 181 | 182 | Args: 183 | images: input images tensor. 184 | model_name: string, the predefined model name. 185 | training: boolean, whether the model is constructed for training. 186 | override_params: A dictionary of params for overriding. Fields must exist in 187 | efficientnet_model.GlobalParams. 188 | 189 | Returns: 190 | features: base features before pooling. 191 | endpoints: the endpoints for each layer. 192 | 193 | Raises: 194 | When model_name specified an undefined model, raises NotImplementedError. 195 | When override_params has invalid fields, raises ValueError. 196 | """ 197 | assert isinstance(images, tf.Tensor) 198 | # For backward compatibility. 199 | if override_params and override_params.get('drop_connect_rate', None): 200 | override_params['survival_prob'] = 1 - override_params['drop_connect_rate'] 201 | 202 | blocks_args, global_params = get_model_params(model_name, override_params) 203 | 204 | model = efficientnet_model.Model(blocks_args, global_params, model_name) 205 | features = model(images, training=training, features_only=True) 206 | 207 | features = tf.identity(features, 'features') 208 | return features, model.endpoints 209 | -------------------------------------------------------------------------------- /coco_metric.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """COCO-style evaluation metrics. 16 | 17 | Implements the interface of COCO API and metric_fn in tf.TPUEstimator. 18 | 19 | COCO API: github.com/cocodataset/cocoapi/ 20 | """ 21 | 22 | from __future__ import absolute_import 23 | from __future__ import division 24 | from __future__ import print_function 25 | 26 | import json 27 | import os 28 | import zipfile 29 | from absl import flags 30 | from absl import logging 31 | 32 | import numpy as np 33 | from pycocotools.coco import COCO 34 | from pycocotools.cocoeval import COCOeval 35 | 36 | import tensorflow as tf 37 | 38 | FLAGS = flags.FLAGS 39 | 40 | 41 | class EvaluationMetric(): 42 | """COCO evaluation metric class. 43 | 44 | This class cannot inherit from tf.keras.metrics.Metric due to numpy. 45 | """ 46 | 47 | def __init__(self, filename=None, testdev_dir=None): 48 | """Constructs COCO evaluation class. 49 | 50 | The class provides the interface to metrics_fn in TPUEstimator. The 51 | _update_op() takes detections from each image and push them to 52 | self.detections. The _evaluate() loads a JSON file in COCO annotation format 53 | as the groundtruth and runs COCO evaluation. 54 | 55 | Args: 56 | filename: Ground truth JSON file name. If filename is None, use 57 | groundtruth data passed from the dataloader for evaluation. filename is 58 | ignored if testdev_dir is not None. 59 | testdev_dir: folder name for testdev data. If None, run eval without 60 | groundtruth, and filename will be ignored. 61 | """ 62 | self.filename = filename 63 | self.testdev_dir = testdev_dir 64 | self.metric_names = ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1', 65 | 'ARmax10', 'ARmax100', 'ARs', 'ARm', 'ARl'] 66 | self.reset_states() 67 | 68 | def reset_states(self): 69 | """Reset COCO API object.""" 70 | self.detections = [] 71 | self.dataset = { 72 | 'images': [], 73 | 'annotations': [], 74 | 'categories': [] 75 | } 76 | self.image_id = 1 77 | self.annotation_id = 1 78 | self.category_ids = [] 79 | self.metric_values = None 80 | 81 | def evaluate(self): 82 | """Evaluates with detections from all images with COCO API. 83 | 84 | Returns: 85 | coco_metric: float numpy array with shape [12] representing the 86 | coco-style evaluation metrics. 87 | """ 88 | if self.filename: 89 | coco_gt = COCO(self.filename) 90 | else: 91 | coco_gt = COCO() 92 | coco_gt.dataset = self.dataset 93 | coco_gt.createIndex() 94 | 95 | if self.testdev_dir: 96 | # Run on test-dev dataset. 97 | box_result_list = [] 98 | for det in self.detections: 99 | box_result_list.append({ 100 | 'image_id': int(det[0]), 101 | 'category_id': int(det[6]), 102 | 'bbox': np.around( 103 | det[1:5].astype(np.float64), decimals=2).tolist(), 104 | 'score': float(np.around(det[5], decimals=3)), 105 | }) 106 | json.encoder.FLOAT_REPR = lambda o: format(o, '.3f') 107 | # Must be in the formst of 'detections_test-dev2017_xxx_results'. 108 | fname = 'detections_test-dev2017_test_results' 109 | output_path = os.path.join(self.testdev_dir, fname + '.json') 110 | logging.info('Writing output json file to: %s', output_path) 111 | with tf.io.gfile.GFile(output_path, 'w') as fid: 112 | json.dump(box_result_list, fid) 113 | return np.array([0.], dtype=np.float32) 114 | else: 115 | # Run on validation dataset. 116 | detections = np.array(self.detections) 117 | image_ids = list(set(detections[:, 0])) 118 | coco_dt = coco_gt.loadRes(detections) 119 | coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox') 120 | coco_eval.params.imgIds = image_ids 121 | coco_eval.evaluate() 122 | coco_eval.accumulate() 123 | coco_eval.summarize() 124 | coco_metrics = coco_eval.stats 125 | return np.array(coco_metrics, dtype=np.float32) 126 | 127 | def result(self): 128 | """Return the metric values (and compute it if needed).""" 129 | if not self.metric_values: 130 | self.metric_values = self.evaluate() 131 | return self.metric_values 132 | 133 | def update_state(self, groundtruth_data, detections): 134 | """Update detection results and groundtruth data. 135 | 136 | Append detection results to self.detections to aggregate results from 137 | all validation set. The groundtruth_data is parsed and added into a 138 | dictionary with the same format as COCO dataset, which can be used for 139 | evaluation. 140 | 141 | Args: 142 | groundtruth_data: Groundtruth annotations in a tensor with each row 143 | representing [y1, x1, y2, x2, is_crowd, area, class]. 144 | detections: Detection results in a tensor with each row representing 145 | [image_id, x, y, width, height, score, class]. 146 | """ 147 | for i, det in enumerate(detections): 148 | # Filter out detections with predicted class label = -1. 149 | indices = np.where(det[:, -1] > -1)[0] 150 | det = det[indices] 151 | if det.shape[0] == 0: 152 | continue 153 | # Append groundtruth annotations to create COCO dataset object. 154 | # Add images. 155 | image_id = det[0, 0] 156 | if image_id == -1: 157 | image_id = self.image_id 158 | det[:, 0] = image_id 159 | self.detections.extend(det) 160 | 161 | if not self.filename and not self.testdev_dir: 162 | # process groudtruth data only if filename is empty and no test_dev. 163 | self.dataset['images'].append({ 164 | 'id': int(image_id), 165 | }) 166 | 167 | # Add annotations. 168 | indices = np.where(groundtruth_data[i, :, -1] > -1)[0] 169 | for data in groundtruth_data[i, indices]: 170 | box = data[0:4] 171 | is_crowd = data[4] 172 | area = (box[3] - box[1]) * (box[2] - box[0]) 173 | category_id = data[6] 174 | if category_id < 0: 175 | break 176 | self.dataset['annotations'].append({ 177 | 'id': int(self.annotation_id), 178 | 'image_id': int(image_id), 179 | 'category_id': int(category_id), 180 | 'bbox': [box[1], box[0], box[3] - box[1], box[2] - box[0]], 181 | 'area': area, 182 | 'iscrowd': int(is_crowd) 183 | }) 184 | self.annotation_id += 1 185 | self.category_ids.append(category_id) 186 | 187 | self.image_id += 1 188 | 189 | if not self.filename: 190 | self.category_ids = list(set(self.category_ids)) 191 | self.dataset['categories'] = [ 192 | {'id': int(category_id)} for category_id in self.category_ids 193 | ] 194 | 195 | def estimator_metric_fn(self, detections, groundtruth_data): 196 | """Constructs the metric function for tf.TPUEstimator. 197 | 198 | For each metric, we return the evaluation op and an update op; the update op 199 | is shared across all metrics and simply appends the set of detections to the 200 | `self.detections` list. The metric op is invoked after all examples have 201 | been seen and computes the aggregate COCO metrics. Please find details API 202 | in: https://www.tensorflow.org/api_docs/python/tf/contrib/learn/MetricSpec 203 | Args: 204 | detections: Detection results in a tensor with each row representing 205 | [image_id, x, y, width, height, score, class] 206 | groundtruth_data: Groundtruth annotations in a tensor with each row 207 | representing [y1, x1, y2, x2, is_crowd, area, class]. 208 | Returns: 209 | metrics_dict: A dictionary mapping from evaluation name to a tuple of 210 | operations (`metric_op`, `update_op`). `update_op` appends the 211 | detections for the metric to the `self.detections` list. 212 | """ 213 | with tf.name_scope('coco_metric'): 214 | if self.testdev_dir: 215 | update_op = tf.numpy_function(self.update_state, 216 | [groundtruth_data, detections], []) 217 | metrics = tf.numpy_function(self.result, [], tf.float32) 218 | metrics_dict = {'AP': (metrics, update_op)} 219 | return metrics_dict 220 | else: 221 | update_op = tf.numpy_function(self.update_state, 222 | [groundtruth_data, detections], []) 223 | metrics = tf.numpy_function(self.result, [], tf.float32) 224 | metrics_dict = {} 225 | for i, name in enumerate(self.metric_names): 226 | metrics_dict[name] = (metrics[i], update_op) 227 | return metrics_dict 228 | -------------------------------------------------------------------------------- /dataset/create_coco_tfrecord_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Test for create_coco_tfrecord.py.""" 16 | 17 | import io 18 | import json 19 | import os 20 | 21 | from absl import flags 22 | from absl import logging 23 | import numpy as np 24 | import PIL.Image 25 | import six 26 | import tensorflow.compat.v1 as tf 27 | 28 | from dataset import create_coco_tfrecord 29 | 30 | 31 | class CreateCocoTFRecordTest(tf.test.TestCase): 32 | 33 | def setUp(self): 34 | super(CreateCocoTFRecordTest, self).setUp() 35 | flags.FLAGS.num_threads = 1 36 | 37 | def _assertProtoEqual(self, proto_field, expectation): 38 | """Helper function to assert if a proto field equals some value. 39 | 40 | Args: 41 | proto_field: The protobuf field to compare. 42 | expectation: The expected value of the protobuf field. 43 | """ 44 | proto_list = [p for p in proto_field] 45 | self.assertListEqual(proto_list, expectation) 46 | 47 | def test_create_tf_example(self): 48 | image_file_name = 'tmp_image.jpg' 49 | image_data = np.random.rand(256, 256, 3) 50 | tmp_dir = self.get_temp_dir() 51 | save_path = os.path.join(tmp_dir, image_file_name) 52 | image = PIL.Image.fromarray(image_data, 'RGB') 53 | image.save(save_path) 54 | 55 | image = { 56 | 'file_name': image_file_name, 57 | 'height': 256, 58 | 'width': 256, 59 | 'id': 11, 60 | } 61 | 62 | annotations_list = [{ 63 | 'area': .5, 64 | 'iscrowd': False, 65 | 'image_id': 11, 66 | 'bbox': [64, 64, 128, 128], 67 | 'category_id': 2, 68 | 'id': 1000, 69 | }] 70 | 71 | image_dir = tmp_dir 72 | category_index = { 73 | 1: { 74 | 'name': 'dog', 75 | 'id': 1 76 | }, 77 | 2: { 78 | 'name': 'cat', 79 | 'id': 2 80 | }, 81 | 3: { 82 | 'name': 'human', 83 | 'id': 3 84 | } 85 | } 86 | 87 | (_, example, 88 | num_annotations_skipped) = create_coco_tfrecord.create_tf_example( 89 | image, image_dir, annotations_list, category_index) 90 | 91 | self.assertEqual(num_annotations_skipped, 0) 92 | self._assertProtoEqual( 93 | example.features.feature['image/height'].int64_list.value, [256]) 94 | self._assertProtoEqual( 95 | example.features.feature['image/width'].int64_list.value, [256]) 96 | self._assertProtoEqual( 97 | example.features.feature['image/filename'].bytes_list.value, 98 | [six.b(image_file_name)]) 99 | self._assertProtoEqual( 100 | example.features.feature['image/source_id'].bytes_list.value, 101 | [six.b(str(image['id']))]) 102 | self._assertProtoEqual( 103 | example.features.feature['image/format'].bytes_list.value, 104 | [six.b('jpeg')]) 105 | self._assertProtoEqual( 106 | example.features.feature['image/object/bbox/xmin'].float_list.value, 107 | [0.25]) 108 | self._assertProtoEqual( 109 | example.features.feature['image/object/bbox/ymin'].float_list.value, 110 | [0.25]) 111 | self._assertProtoEqual( 112 | example.features.feature['image/object/bbox/xmax'].float_list.value, 113 | [0.75]) 114 | self._assertProtoEqual( 115 | example.features.feature['image/object/bbox/ymax'].float_list.value, 116 | [0.75]) 117 | self._assertProtoEqual( 118 | example.features.feature['image/object/class/text'].bytes_list.value, 119 | [six.b('cat')]) 120 | 121 | def test_create_tf_example_with_instance_masks(self): 122 | image_file_name = 'tmp_image.jpg' 123 | image_data = np.random.rand(8, 8, 3) 124 | tmp_dir = self.get_temp_dir() 125 | save_path = os.path.join(tmp_dir, image_file_name) 126 | image = PIL.Image.fromarray(image_data, 'RGB') 127 | image.save(save_path) 128 | 129 | image = { 130 | 'file_name': image_file_name, 131 | 'height': 8, 132 | 'width': 8, 133 | 'id': 11, 134 | } 135 | 136 | annotations_list = [{ 137 | 'area': .5, 138 | 'iscrowd': False, 139 | 'image_id': 11, 140 | 'bbox': [0, 0, 8, 8], 141 | 'segmentation': [[4, 0, 0, 0, 0, 4], [8, 4, 4, 8, 8, 8]], 142 | 'category_id': 1, 143 | 'id': 1000, 144 | }] 145 | 146 | image_dir = tmp_dir 147 | category_index = { 148 | 1: { 149 | 'name': 'dog', 150 | 'id': 1 151 | }, 152 | } 153 | 154 | (_, example, 155 | num_annotations_skipped) = create_coco_tfrecord.create_tf_example( 156 | image, image_dir, annotations_list, category_index, include_masks=True) 157 | 158 | self.assertEqual(num_annotations_skipped, 0) 159 | self._assertProtoEqual( 160 | example.features.feature['image/height'].int64_list.value, [8]) 161 | self._assertProtoEqual( 162 | example.features.feature['image/width'].int64_list.value, [8]) 163 | self._assertProtoEqual( 164 | example.features.feature['image/filename'].bytes_list.value, 165 | [six.b(image_file_name)]) 166 | self._assertProtoEqual( 167 | example.features.feature['image/source_id'].bytes_list.value, 168 | [six.b(str(image['id']))]) 169 | self._assertProtoEqual( 170 | example.features.feature['image/format'].bytes_list.value, 171 | [six.b('jpeg')]) 172 | self._assertProtoEqual( 173 | example.features.feature['image/object/bbox/xmin'].float_list.value, 174 | [0]) 175 | self._assertProtoEqual( 176 | example.features.feature['image/object/bbox/ymin'].float_list.value, 177 | [0]) 178 | self._assertProtoEqual( 179 | example.features.feature['image/object/bbox/xmax'].float_list.value, 180 | [1]) 181 | self._assertProtoEqual( 182 | example.features.feature['image/object/bbox/ymax'].float_list.value, 183 | [1]) 184 | self._assertProtoEqual( 185 | example.features.feature['image/object/class/text'].bytes_list.value, 186 | [six.b('dog')]) 187 | encoded_mask_pngs = [ 188 | io.BytesIO(encoded_masks) for encoded_masks in example.features.feature[ 189 | 'image/object/mask'].bytes_list.value 190 | ] 191 | pil_masks = [ 192 | np.array(PIL.Image.open(encoded_mask_png)) 193 | for encoded_mask_png in encoded_mask_pngs 194 | ] 195 | self.assertEqual(len(pil_masks), 1) 196 | self.assertAllEqual(pil_masks[0], 197 | [[1, 1, 1, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0], 198 | [1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], 199 | [0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 1, 1], 200 | [0, 0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1]]) 201 | 202 | def test_create_sharded_tf_record(self): 203 | tmp_dir = self.get_temp_dir() 204 | image_paths = ['tmp1_image.jpg', 'tmp2_image.jpg'] 205 | for image_path in image_paths: 206 | image_data = np.random.rand(256, 256, 3) 207 | save_path = os.path.join(tmp_dir, image_path) 208 | image = PIL.Image.fromarray(image_data, 'RGB') 209 | image.save(save_path) 210 | 211 | images = [{ 212 | 'file_name': image_paths[0], 213 | 'height': 256, 214 | 'width': 256, 215 | 'id': 11, 216 | }, { 217 | 'file_name': image_paths[1], 218 | 'height': 256, 219 | 'width': 256, 220 | 'id': 12, 221 | }] 222 | 223 | annotations = [{ 224 | 'area': .5, 225 | 'iscrowd': False, 226 | 'image_id': 11, 227 | 'bbox': [64, 64, 128, 128], 228 | 'category_id': 2, 229 | 'id': 1000, 230 | }] 231 | 232 | category_index = [{ 233 | 'name': 'dog', 234 | 'id': 1 235 | }, { 236 | 'name': 'cat', 237 | 'id': 2 238 | }, { 239 | 'name': 'human', 240 | 'id': 3 241 | }] 242 | groundtruth_data = {'images': images, 'annotations': annotations, 243 | 'categories': category_index} 244 | annotation_file = os.path.join(tmp_dir, 'annotation.json') 245 | with open(annotation_file, 'w') as annotation_fid: 246 | json.dump(groundtruth_data, annotation_fid) 247 | 248 | output_path = os.path.join(tmp_dir, 'out') 249 | create_coco_tfrecord._create_tf_record_from_coco_annotations( 250 | annotation_file, 251 | tmp_dir, 252 | output_path, 253 | num_shards=2, 254 | include_masks=False) 255 | self.assertTrue(os.path.exists(output_path + '-00000-of-00002.tfrecord')) 256 | self.assertTrue(os.path.exists(output_path + '-00001-of-00002.tfrecord')) 257 | 258 | 259 | if __name__ == '__main__': 260 | logging.set_verbosity(logging.WARNING) 261 | tf.test.main() 262 | -------------------------------------------------------------------------------- /backbone/efficientnet_model_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for efficientnet_model.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from absl import logging 22 | import tensorflow.compat.v1 as tf 23 | 24 | import utils 25 | from backbone import efficientnet_model 26 | 27 | 28 | class ModelTest(tf.test.TestCase): 29 | 30 | def test_bottleneck_block(self): 31 | """Test for creating a model with bottleneck block arguments.""" 32 | images = tf.zeros((10, 128, 128, 3), dtype=tf.float32) 33 | global_params = efficientnet_model.GlobalParams( 34 | 1.0, 35 | 1.0, 36 | 0, 37 | 'channels_last', 38 | num_classes=10, 39 | batch_norm=utils.batch_norm_class(False)) 40 | blocks_args = [ 41 | efficientnet_model.BlockArgs( 42 | kernel_size=3, 43 | num_repeat=3, 44 | input_filters=3, 45 | output_filters=6, 46 | expand_ratio=6, 47 | id_skip=True, 48 | strides=[2, 2], 49 | conv_type=0, 50 | fused_conv=0, 51 | super_pixel=0) 52 | ] 53 | model = efficientnet_model.Model(blocks_args, global_params) 54 | outputs = model(images, training=True) 55 | self.assertEqual((10, 10), outputs.shape) 56 | 57 | def test_fused_bottleneck_block(self): 58 | """Test for creating a model with fused bottleneck block arguments.""" 59 | images = tf.zeros((10, 128, 128, 3), dtype=tf.float32) 60 | global_params = efficientnet_model.GlobalParams( 61 | 1.0, 62 | 1.0, 63 | 0, 64 | 'channels_last', 65 | num_classes=10, 66 | batch_norm=utils.TpuBatchNormalization) 67 | blocks_args = [ 68 | efficientnet_model.BlockArgs( 69 | kernel_size=3, 70 | num_repeat=3, 71 | input_filters=3, 72 | output_filters=6, 73 | expand_ratio=6, 74 | id_skip=True, 75 | strides=[2, 2], 76 | conv_type=0, 77 | fused_conv=1, 78 | super_pixel=0) 79 | ] 80 | model = efficientnet_model.Model(blocks_args, global_params) 81 | outputs = model(images, training=True) 82 | self.assertEqual((10, 10), outputs.shape) 83 | 84 | def test_bottleneck_block_with_superpixel_layer(self): 85 | """Test for creating a model with fused bottleneck block arguments.""" 86 | images = tf.zeros((10, 128, 128, 3), dtype=tf.float32) 87 | global_params = efficientnet_model.GlobalParams( 88 | 1.0, 89 | 1.0, 90 | 0, 91 | 'channels_last', 92 | num_classes=10, 93 | batch_norm=utils.TpuBatchNormalization) 94 | blocks_args = [ 95 | efficientnet_model.BlockArgs( 96 | kernel_size=3, 97 | num_repeat=3, 98 | input_filters=3, 99 | output_filters=6, 100 | expand_ratio=6, 101 | id_skip=True, 102 | strides=[2, 2], 103 | conv_type=0, 104 | fused_conv=0, 105 | super_pixel=1) 106 | ] 107 | model = efficientnet_model.Model(blocks_args, global_params) 108 | outputs = model(images, training=True) 109 | self.assertEqual((10, 10), outputs.shape) 110 | 111 | def test_bottleneck_block_with_superpixel_tranformation(self): 112 | """Test for creating a model with fused bottleneck block arguments.""" 113 | images = tf.zeros((10, 128, 128, 3), dtype=tf.float32) 114 | global_params = efficientnet_model.GlobalParams( 115 | 1.0, 116 | 1.0, 117 | 0, 118 | 'channels_last', 119 | num_classes=10, 120 | batch_norm=utils.TpuBatchNormalization) 121 | blocks_args = [ 122 | efficientnet_model.BlockArgs( 123 | kernel_size=3, 124 | num_repeat=3, 125 | input_filters=3, 126 | output_filters=6, 127 | expand_ratio=6, 128 | id_skip=True, 129 | strides=[2, 2], 130 | conv_type=0, 131 | fused_conv=0, 132 | super_pixel=2) 133 | ] 134 | model = efficientnet_model.Model(blocks_args, global_params) 135 | outputs = model(images, training=True) 136 | self.assertEqual((10, 10), outputs.shape) 137 | 138 | def test_se_block(self): 139 | """Test for creating a model with SE block arguments.""" 140 | images = tf.zeros((10, 128, 128, 3), dtype=tf.float32) 141 | global_params = efficientnet_model.GlobalParams( 142 | 1.0, 143 | 1.0, 144 | 0, 145 | 'channels_last', 146 | num_classes=10, 147 | batch_norm=utils.TpuBatchNormalization) 148 | blocks_args = [ 149 | efficientnet_model.BlockArgs( 150 | kernel_size=3, 151 | num_repeat=3, 152 | input_filters=3, 153 | output_filters=6, 154 | expand_ratio=6, 155 | id_skip=False, 156 | strides=[2, 2], 157 | se_ratio=0.8, 158 | conv_type=0, 159 | fused_conv=0, 160 | super_pixel=0) 161 | ] 162 | model = efficientnet_model.Model(blocks_args, global_params) 163 | outputs = model(images, training=True) 164 | self.assertEqual((10, 10), outputs.shape) 165 | 166 | def test_variables(self): 167 | """Test for variables in blocks to be included in `model.variables`.""" 168 | images = tf.zeros((10, 128, 128, 3), dtype=tf.float32) 169 | global_params = efficientnet_model.GlobalParams( 170 | 1.0, 171 | 1.0, 172 | 0, 173 | 'channels_last', 174 | num_classes=10, 175 | batch_norm=utils.TpuBatchNormalization) 176 | blocks_args = [ 177 | efficientnet_model.BlockArgs( 178 | kernel_size=3, 179 | num_repeat=3, 180 | input_filters=3, 181 | output_filters=6, 182 | expand_ratio=6, 183 | id_skip=False, 184 | strides=[2, 2], 185 | se_ratio=0.8, 186 | conv_type=0, 187 | fused_conv=0, 188 | super_pixel=0) 189 | ] 190 | model = efficientnet_model.Model(blocks_args, global_params) 191 | _ = model(images, training=True) 192 | var_names = {var.name for var in model.variables} 193 | self.assertIn('model/blocks_0/conv2d/kernel:0', var_names) 194 | 195 | def test_reduction_endpoint_with_single_block_with_sp(self): 196 | """Test reduction point with single block/layer.""" 197 | images = tf.zeros((10, 128, 128, 3), dtype=tf.float32) 198 | global_params = efficientnet_model.GlobalParams( 199 | 1.0, 200 | 1.0, 201 | 0, 202 | 'channels_last', 203 | num_classes=10, 204 | batch_norm=utils.TpuBatchNormalization) 205 | blocks_args = [ 206 | efficientnet_model.BlockArgs( 207 | kernel_size=3, 208 | num_repeat=1, 209 | input_filters=3, 210 | output_filters=6, 211 | expand_ratio=6, 212 | id_skip=False, 213 | strides=[2, 2], 214 | se_ratio=0.8, 215 | conv_type=0, 216 | fused_conv=0, 217 | super_pixel=1) 218 | ] 219 | model = efficientnet_model.Model(blocks_args, global_params) 220 | _ = model(images, training=True) 221 | self.assertIn('reduction_1', model.endpoints) 222 | # single block should have one and only one reduction endpoint 223 | self.assertNotIn('reduction_2', model.endpoints) 224 | 225 | def test_reduction_endpoint_with_single_block_without_sp(self): 226 | """Test reduction point with single block/layer.""" 227 | images = tf.zeros((10, 128, 128, 3), dtype=tf.float32) 228 | global_params = efficientnet_model.GlobalParams( 229 | 1.0, 230 | 1.0, 231 | 0, 232 | 'channels_last', 233 | num_classes=10, 234 | batch_norm=utils.TpuBatchNormalization) 235 | blocks_args = [ 236 | efficientnet_model.BlockArgs( 237 | kernel_size=3, 238 | num_repeat=1, 239 | input_filters=3, 240 | output_filters=6, 241 | expand_ratio=6, 242 | id_skip=False, 243 | strides=[2, 2], 244 | se_ratio=0.8, 245 | conv_type=0, 246 | fused_conv=0, 247 | super_pixel=0) 248 | ] 249 | model = efficientnet_model.Model(blocks_args, global_params) 250 | _ = model(images, training=True) 251 | self.assertIn('reduction_1', model.endpoints) 252 | # single block should have one and only one reduction endpoint 253 | self.assertNotIn('reduction_2', model.endpoints) 254 | 255 | if __name__ == '__main__': 256 | logging.set_verbosity(logging.WARNING) 257 | tf.test.main() 258 | -------------------------------------------------------------------------------- /object_detection/argmax_matcher.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Argmax matcher implementation. 16 | 17 | This class takes a similarity matrix and matches columns to rows based on the 18 | maximum value per column. One can specify matched_thresholds and 19 | to prevent columns from matching to rows (generally resulting in a negative 20 | training example) and unmatched_theshold to ignore the match (generally 21 | resulting in neither a positive or negative training example). 22 | 23 | This matcher is used in Fast(er)-RCNN. 24 | 25 | Note: matchers are used in TargetAssigners. There is a create_target_assigner 26 | factory function for popular implementations. 27 | """ 28 | import tensorflow.compat.v1 as tf 29 | 30 | from object_detection import matcher 31 | from object_detection import shape_utils 32 | 33 | 34 | class ArgMaxMatcher(matcher.Matcher): 35 | """Matcher based on highest value. 36 | 37 | This class computes matches from a similarity matrix. Each column is matched 38 | to a single row. 39 | 40 | To support object detection target assignment this class enables setting both 41 | matched_threshold (upper threshold) and unmatched_threshold (lower thresholds) 42 | defining three categories of similarity which define whether examples are 43 | positive, negative, or ignored: 44 | (1) similarity >= matched_threshold: Highest similarity. Matched/Positive! 45 | (2) matched_threshold > similarity >= unmatched_threshold: Medium similarity. 46 | Depending on negatives_lower_than_unmatched, this is either 47 | Unmatched/Negative OR Ignore. 48 | (3) unmatched_threshold > similarity: Lowest similarity. Depending on flag 49 | negatives_lower_than_unmatched, either Unmatched/Negative OR Ignore. 50 | For ignored matches this class sets the values in the Match object to -2. 51 | """ 52 | 53 | def __init__(self, 54 | matched_threshold, 55 | unmatched_threshold=None, 56 | negatives_lower_than_unmatched=True, 57 | force_match_for_each_row=False): 58 | """Construct ArgMaxMatcher. 59 | 60 | Args: 61 | matched_threshold: Threshold for positive matches. Positive if 62 | sim >= matched_threshold, where sim is the maximum value of the 63 | similarity matrix for a given column. Set to None for no threshold. 64 | unmatched_threshold: Threshold for negative matches. Negative if 65 | sim < unmatched_threshold. Defaults to matched_threshold 66 | when set to None. 67 | negatives_lower_than_unmatched: Boolean which defaults to True. If True 68 | then negative matches are the ones below the unmatched_threshold, 69 | whereas ignored matches are in between the matched and unmatched 70 | threshold. If False, then negative matches are in between the matched 71 | and unmatched threshold, and everything lower than unmatched is ignored. 72 | force_match_for_each_row: If True, ensures that each row is matched to 73 | at least one column (which is not guaranteed otherwise if the 74 | matched_threshold is high). Defaults to False. See 75 | argmax_matcher_test.testMatcherForceMatch() for an example. 76 | 77 | Raises: 78 | ValueError: if unmatched_threshold is set but matched_threshold is not set 79 | or if unmatched_threshold > matched_threshold. 80 | """ 81 | if (matched_threshold is None) and (unmatched_threshold is not None): 82 | raise ValueError('Need to also define matched_threshold when' 83 | 'unmatched_threshold is defined') 84 | self._matched_threshold = matched_threshold 85 | if unmatched_threshold is None: 86 | self._unmatched_threshold = matched_threshold 87 | else: 88 | if unmatched_threshold > matched_threshold: 89 | raise ValueError('unmatched_threshold needs to be smaller or equal' 90 | 'to matched_threshold') 91 | self._unmatched_threshold = unmatched_threshold 92 | if not negatives_lower_than_unmatched: 93 | if self._unmatched_threshold == self._matched_threshold: 94 | raise ValueError('When negatives are in between matched and ' 95 | 'unmatched thresholds, these cannot be of equal ' 96 | 'value. matched: %s, unmatched: %s', 97 | self._matched_threshold, self._unmatched_threshold) 98 | self._force_match_for_each_row = force_match_for_each_row 99 | self._negatives_lower_than_unmatched = negatives_lower_than_unmatched 100 | 101 | def _match(self, similarity_matrix): 102 | """Tries to match each column of the similarity matrix to a row. 103 | 104 | Args: 105 | similarity_matrix: tensor of shape [N, M] representing any similarity 106 | metric. 107 | 108 | Returns: 109 | Match object with corresponding matches for each of M columns. 110 | """ 111 | 112 | def _match_when_rows_are_empty(): 113 | """Performs matching when the rows of similarity matrix are empty. 114 | 115 | When the rows are empty, all detections are false positives. So we return 116 | a tensor of -1's to indicate that the columns do not match to any rows. 117 | 118 | Returns: 119 | matches: int32 tensor indicating the row each column matches to. 120 | """ 121 | similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape( 122 | similarity_matrix) 123 | return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32) 124 | 125 | def _match_when_rows_are_non_empty(): 126 | """Performs matching when the rows of similarity matrix are non empty. 127 | 128 | Returns: 129 | matches: int32 tensor indicating the row each column matches to. 130 | """ 131 | # Matches for each column 132 | matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32) 133 | 134 | # Deal with matched and unmatched threshold 135 | if self._matched_threshold is not None: 136 | # Get logical indices of ignored and unmatched columns as tf.int64 137 | matched_vals = tf.reduce_max(similarity_matrix, 0) 138 | below_unmatched_threshold = tf.greater(self._unmatched_threshold, 139 | matched_vals) 140 | between_thresholds = tf.logical_and( 141 | tf.greater_equal(matched_vals, self._unmatched_threshold), 142 | tf.greater(self._matched_threshold, matched_vals)) 143 | 144 | if self._negatives_lower_than_unmatched: 145 | matches = self._set_values_using_indicator(matches, 146 | below_unmatched_threshold, 147 | -1) 148 | matches = self._set_values_using_indicator(matches, 149 | between_thresholds, 150 | -2) 151 | else: 152 | matches = self._set_values_using_indicator(matches, 153 | below_unmatched_threshold, 154 | -2) 155 | matches = self._set_values_using_indicator(matches, 156 | between_thresholds, 157 | -1) 158 | 159 | if self._force_match_for_each_row: 160 | similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape( 161 | similarity_matrix) 162 | force_match_column_ids = tf.argmax(similarity_matrix, 1, 163 | output_type=tf.int32) 164 | force_match_column_indicators = tf.one_hot( 165 | force_match_column_ids, depth=similarity_matrix_shape[1]) 166 | force_match_row_ids = tf.argmax(force_match_column_indicators, 0, 167 | output_type=tf.int32) 168 | force_match_column_mask = tf.cast( 169 | tf.reduce_max(force_match_column_indicators, 0), tf.bool) 170 | final_matches = tf.where(force_match_column_mask, 171 | force_match_row_ids, matches) 172 | return final_matches 173 | else: 174 | return matches 175 | 176 | if similarity_matrix.shape.is_fully_defined(): 177 | if similarity_matrix.shape[0] == 0: 178 | return _match_when_rows_are_empty() 179 | else: 180 | return _match_when_rows_are_non_empty() 181 | else: 182 | return tf.cond( 183 | tf.greater(tf.shape(similarity_matrix)[0], 0), 184 | _match_when_rows_are_non_empty, _match_when_rows_are_empty) 185 | 186 | def _set_values_using_indicator(self, x, indicator, val): 187 | """Set the indicated fields of x to val. 188 | 189 | Args: 190 | x: tensor. 191 | indicator: boolean with same shape as x. 192 | val: scalar with value to set. 193 | 194 | Returns: 195 | modified tensor. 196 | """ 197 | indicator = tf.cast(indicator, x.dtype) 198 | return x * (1 - indicator) + val * indicator 199 | -------------------------------------------------------------------------------- /object_detection/matcher.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Matcher interface and Match class. 16 | 17 | This module defines the Matcher interface and the Match object. The job of the 18 | matcher is to match row and column indices based on the similarity matrix and 19 | other optional parameters. Each column is matched to at most one row. There 20 | are three possibilities for the matching: 21 | 22 | 1) match: A column matches a row. 23 | 2) no_match: A column does not match any row. 24 | 3) ignore: A column that is neither 'match' nor no_match. 25 | 26 | The ignore case is regularly encountered in object detection: when an anchor has 27 | a relatively small overlap with a ground-truth box, one neither wants to 28 | consider this box a positive example (match) nor a negative example (no match). 29 | 30 | The Match class is used to store the match results and it provides simple apis 31 | to query the results. 32 | """ 33 | import abc 34 | import tensorflow.compat.v1 as tf 35 | 36 | 37 | class Match(object): 38 | """Class to store results from the matcher. 39 | 40 | This class is used to store the results from the matcher. It provides 41 | convenient methods to query the matching results. 42 | """ 43 | 44 | def __init__(self, match_results): 45 | """Constructs a Match object. 46 | 47 | Args: 48 | match_results: Integer tensor of shape [N] with (1) match_results[i]>=0, 49 | meaning that column i is matched with row match_results[i]. 50 | (2) match_results[i]=-1, meaning that column i is not matched. 51 | (3) match_results[i]=-2, meaning that column i is ignored. 52 | 53 | Raises: 54 | ValueError: if match_results does not have rank 1 or is not an 55 | integer int32 scalar tensor 56 | """ 57 | if match_results.shape.ndims != 1: 58 | raise ValueError('match_results should have rank 1') 59 | if match_results.dtype != tf.int32: 60 | raise ValueError('match_results should be an int32 or int64 scalar ' 61 | 'tensor') 62 | self._match_results = match_results 63 | 64 | @property 65 | def match_results(self): 66 | """The accessor for match results. 67 | 68 | Returns: 69 | the tensor which encodes the match results. 70 | """ 71 | return self._match_results 72 | 73 | def matched_column_indices(self): 74 | """Returns column indices that match to some row. 75 | 76 | The indices returned by this op are always sorted in increasing order. 77 | 78 | Returns: 79 | column_indices: int32 tensor of shape [K] with column indices. 80 | """ 81 | return self._reshape_and_cast(tf.where(tf.greater(self._match_results, -1))) 82 | 83 | def matched_column_indicator(self): 84 | """Returns column indices that are matched. 85 | 86 | Returns: 87 | column_indices: int32 tensor of shape [K] with column indices. 88 | """ 89 | return tf.greater_equal(self._match_results, 0) 90 | 91 | def num_matched_columns(self): 92 | """Returns number (int32 scalar tensor) of matched columns.""" 93 | return tf.shape(self.matched_column_indices())[0] 94 | 95 | def unmatched_column_indices(self): 96 | """Returns column indices that do not match any row. 97 | 98 | The indices returned by this op are always sorted in increasing order. 99 | 100 | Returns: 101 | column_indices: int32 tensor of shape [K] with column indices. 102 | """ 103 | return self._reshape_and_cast(tf.where(tf.equal(self._match_results, -1))) 104 | 105 | def unmatched_column_indicator(self): 106 | """Returns column indices that are unmatched. 107 | 108 | Returns: 109 | column_indices: int32 tensor of shape [K] with column indices. 110 | """ 111 | return tf.equal(self._match_results, -1) 112 | 113 | def num_unmatched_columns(self): 114 | """Returns number (int32 scalar tensor) of unmatched columns.""" 115 | return tf.shape(self.unmatched_column_indices())[0] 116 | 117 | def ignored_column_indices(self): 118 | """Returns column indices that are ignored (neither Matched nor Unmatched). 119 | 120 | The indices returned by this op are always sorted in increasing order. 121 | 122 | Returns: 123 | column_indices: int32 tensor of shape [K] with column indices. 124 | """ 125 | return self._reshape_and_cast(tf.where(self.ignored_column_indicator())) 126 | 127 | def ignored_column_indicator(self): 128 | """Returns boolean column indicator where True means the column is ignored. 129 | 130 | Returns: 131 | column_indicator: boolean vector which is True for all ignored column 132 | indices. 133 | """ 134 | return tf.equal(self._match_results, -2) 135 | 136 | def num_ignored_columns(self): 137 | """Returns number (int32 scalar tensor) of matched columns.""" 138 | return tf.shape(self.ignored_column_indices())[0] 139 | 140 | def unmatched_or_ignored_column_indices(self): 141 | """Returns column indices that are unmatched or ignored. 142 | 143 | The indices returned by this op are always sorted in increasing order. 144 | 145 | Returns: 146 | column_indices: int32 tensor of shape [K] with column indices. 147 | """ 148 | return self._reshape_and_cast(tf.where(tf.greater(0, self._match_results))) 149 | 150 | def matched_row_indices(self): 151 | """Returns row indices that match some column. 152 | 153 | The indices returned by this op are ordered so as to be in correspondence 154 | with the output of matched_column_indicator(). For example if 155 | self.matched_column_indicator() is [0,2], and self.matched_row_indices() is 156 | [7, 3], then we know that column 0 was matched to row 7 and column 2 was 157 | matched to row 3. 158 | 159 | Returns: 160 | row_indices: int32 tensor of shape [K] with row indices. 161 | """ 162 | return self._reshape_and_cast( 163 | tf.gather(self._match_results, self.matched_column_indices())) 164 | 165 | def _reshape_and_cast(self, t): 166 | return tf.cast(tf.reshape(t, [-1]), tf.int32) 167 | 168 | def gather_based_on_match(self, input_tensor, unmatched_value, 169 | ignored_value): 170 | """Gathers elements from `input_tensor` based on match results. 171 | 172 | For columns that are matched to a row, gathered_tensor[col] is set to 173 | input_tensor[match_results[col]]. For columns that are unmatched, 174 | gathered_tensor[col] is set to unmatched_value. Finally, for columns that 175 | are ignored gathered_tensor[col] is set to ignored_value. 176 | 177 | Note that the input_tensor.shape[1:] must match with unmatched_value.shape 178 | and ignored_value.shape 179 | 180 | Args: 181 | input_tensor: Tensor to gather values from. 182 | unmatched_value: Constant tensor value for unmatched columns. 183 | ignored_value: Constant tensor value for ignored columns. 184 | 185 | Returns: 186 | gathered_tensor: A tensor containing values gathered from input_tensor. 187 | The shape of the gathered tensor is [match_results.shape[0]] + 188 | input_tensor.shape[1:]. 189 | """ 190 | input_tensor = tf.concat([tf.stack([ignored_value, unmatched_value]), 191 | input_tensor], axis=0) 192 | gather_indices = tf.maximum(self.match_results + 2, 0) 193 | gathered_tensor = tf.gather(input_tensor, gather_indices) 194 | return gathered_tensor 195 | 196 | 197 | class Matcher(object): 198 | """Abstract base class for matcher. 199 | """ 200 | __metaclass__ = abc.ABCMeta 201 | 202 | def match(self, similarity_matrix, scope=None, **params): 203 | """Computes matches among row and column indices and returns the result. 204 | 205 | Computes matches among the row and column indices based on the similarity 206 | matrix and optional arguments. 207 | 208 | Args: 209 | similarity_matrix: Float tensor of shape [N, M] with pairwise similarity 210 | where higher value means more similar. 211 | scope: Op scope name. Defaults to 'Match' if None. 212 | **params: Additional keyword arguments for specific implementations of 213 | the Matcher. 214 | 215 | Returns: 216 | A Match object with the results of matching. 217 | """ 218 | with tf.name_scope(scope, 'Match', [similarity_matrix, params]) as scope: 219 | return Match(self._match(similarity_matrix, **params)) 220 | 221 | @abc.abstractmethod 222 | def _match(self, similarity_matrix, **params): 223 | """Method to be overridden by implementations. 224 | 225 | Args: 226 | similarity_matrix: Float tensor of shape [N, M] with pairwise similarity 227 | where higher value means more similar. 228 | **params: Additional keyword arguments for specific implementations of 229 | the Matcher. 230 | 231 | Returns: 232 | match_results: Integer tensor of shape [M]: match_results[i]>=0 means 233 | that column i is matched to row match_results[i], match_results[i]=-1 234 | means that the column is not matched. match_results[i]=-2 means that 235 | the column is ignored (usually this happens when there is a very weak 236 | match which one neither wants as positive nor negative example). 237 | """ 238 | pass 239 | -------------------------------------------------------------------------------- /keras/anchors.py: -------------------------------------------------------------------------------- 1 | # Lint as: python3 2 | # Copyright 2020 Google Research. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """Anchor definition.""" 17 | import collections 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | import utils 22 | from object_detection import argmax_matcher 23 | from object_detection import box_list 24 | from object_detection import faster_rcnn_box_coder 25 | from object_detection import region_similarity_calculator 26 | from object_detection import target_assigner 27 | 28 | MAX_DETECTION_POINTS = 5000 29 | 30 | 31 | def decode_box_outputs(pred_boxes, anchor_boxes): 32 | """Transforms relative regression coordinates to absolute positions. 33 | 34 | Network predictions are normalized and relative to a given anchor; this 35 | reverses the transformation and outputs absolute coordinates for the input 36 | image. 37 | 38 | Args: 39 | pred_boxes: predicted box regression targets. 40 | anchor_boxes: anchors on all feature levels. 41 | Returns: 42 | outputs: bounding boxes. 43 | """ 44 | ycenter_a = (anchor_boxes[..., 0] + anchor_boxes[..., 2]) / 2 45 | xcenter_a = (anchor_boxes[..., 1] + anchor_boxes[..., 3]) / 2 46 | ha = anchor_boxes[..., 2] - anchor_boxes[..., 0] 47 | wa = anchor_boxes[..., 3] - anchor_boxes[..., 1] 48 | ty, tx, th, tw = tf.unstack(pred_boxes, num=4, axis=-1) 49 | 50 | w = tf.math.exp(tw) * wa 51 | h = tf.math.exp(th) * ha 52 | ycenter = ty * ha + ycenter_a 53 | xcenter = tx * wa + xcenter_a 54 | ymin = ycenter - h / 2. 55 | xmin = xcenter - w / 2. 56 | ymax = ycenter + h / 2. 57 | xmax = xcenter + w / 2. 58 | return tf.stack([ymin, xmin, ymax, xmax], axis=-1) 59 | 60 | 61 | class Anchors(): 62 | """Multi-scale anchors class.""" 63 | 64 | def __init__(self, min_level, max_level, num_scales, aspect_ratios, 65 | anchor_scale, image_size): 66 | """Constructs multiscale anchors. 67 | 68 | Args: 69 | min_level: integer number of minimum level of the output feature pyramid. 70 | max_level: integer number of maximum level of the output feature pyramid. 71 | num_scales: integer number representing intermediate scales added 72 | on each level. For instances, num_scales=2 adds two additional 73 | anchor scales [2^0, 2^0.5] on each level. 74 | aspect_ratios: list of tuples representing the aspect ratio anchors added 75 | on each level. For instances, aspect_ratios = 76 | [(1, 1), (1.4, 0.7), (0.7, 1.4)] adds three anchors on each level. 77 | anchor_scale: float number representing the scale of size of the base 78 | anchor to the feature stride 2^level. Or a list, one value per layer. 79 | image_size: integer number or tuple of integer number of input image size. 80 | """ 81 | self.min_level = min_level 82 | self.max_level = max_level 83 | self.num_scales = num_scales 84 | self.aspect_ratios = aspect_ratios 85 | if isinstance(anchor_scale, (list, tuple)): 86 | assert len(anchor_scale) == max_level - min_level + 1 87 | self.anchor_scales = anchor_scale 88 | else: 89 | self.anchor_scales = [anchor_scale] * (max_level - min_level + 1) 90 | self.image_size = utils.parse_image_size(image_size) 91 | self.feat_sizes = utils.get_feat_sizes(image_size, max_level) 92 | self.config = self._generate_configs() 93 | self.boxes = self._generate_boxes() 94 | 95 | def _generate_configs(self): 96 | """Generate configurations of anchor boxes.""" 97 | anchor_configs = {} 98 | feat_sizes = self.feat_sizes 99 | for level in range(self.min_level, self.max_level + 1): 100 | anchor_configs[level] = [] 101 | for scale_octave in range(self.num_scales): 102 | for aspect in self.aspect_ratios: 103 | anchor_configs[level].append( 104 | ((feat_sizes[0]['height'] / float(feat_sizes[level]['height']), 105 | feat_sizes[0]['width'] / float(feat_sizes[level]['width'])), 106 | scale_octave / float(self.num_scales), aspect, 107 | self.anchor_scales[level - self.min_level])) 108 | return anchor_configs 109 | 110 | def _generate_boxes(self): 111 | """Generates multiscale anchor boxes.""" 112 | boxes_all = [] 113 | for _, configs in self.config.items(): 114 | boxes_level = [] 115 | for config in configs: 116 | stride, octave_scale, aspect, anchor_scale = config 117 | base_anchor_size_x = anchor_scale * stride[1] * 2**octave_scale 118 | base_anchor_size_y = anchor_scale * stride[0] * 2**octave_scale 119 | anchor_size_x_2 = base_anchor_size_x * aspect[0] / 2.0 120 | anchor_size_y_2 = base_anchor_size_y * aspect[1] / 2.0 121 | 122 | x = np.arange(stride[1] / 2, self.image_size[1], stride[1]) 123 | y = np.arange(stride[0] / 2, self.image_size[0], stride[0]) 124 | xv, yv = np.meshgrid(x, y) 125 | xv = xv.reshape(-1) 126 | yv = yv.reshape(-1) 127 | 128 | boxes = np.vstack((yv - anchor_size_y_2, xv - anchor_size_x_2, 129 | yv + anchor_size_y_2, xv + anchor_size_x_2)) 130 | boxes = np.swapaxes(boxes, 0, 1) 131 | boxes_level.append(np.expand_dims(boxes, axis=1)) 132 | # concat anchors on the same level to the reshape NxAx4 133 | boxes_level = np.concatenate(boxes_level, axis=1) 134 | boxes_all.append(boxes_level.reshape([-1, 4])) 135 | 136 | anchor_boxes = np.vstack(boxes_all) 137 | anchor_boxes = tf.convert_to_tensor(anchor_boxes, dtype=tf.float32) 138 | return anchor_boxes 139 | 140 | def get_anchors_per_location(self): 141 | return self.num_scales * len(self.aspect_ratios) 142 | 143 | 144 | class AnchorLabeler(object): 145 | """Labeler for multiscale anchor boxes.""" 146 | 147 | def __init__(self, anchors, num_classes, match_threshold=0.5): 148 | """Constructs anchor labeler to assign labels to anchors. 149 | 150 | Args: 151 | anchors: an instance of class Anchors. 152 | num_classes: integer number representing number of classes in the dataset. 153 | match_threshold: float number between 0 and 1 representing the threshold 154 | to assign positive labels for anchors. 155 | """ 156 | similarity_calc = region_similarity_calculator.IouSimilarity() 157 | matcher = argmax_matcher.ArgMaxMatcher( 158 | match_threshold, 159 | unmatched_threshold=match_threshold, 160 | negatives_lower_than_unmatched=True, 161 | force_match_for_each_row=True) 162 | box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() 163 | 164 | self._target_assigner = target_assigner.TargetAssigner( 165 | similarity_calc, matcher, box_coder) 166 | self._anchors = anchors 167 | self._match_threshold = match_threshold 168 | self._num_classes = num_classes 169 | 170 | def _unpack_labels(self, labels): 171 | """Unpacks an array of labels into multiscales labels.""" 172 | labels_unpacked = collections.OrderedDict() 173 | anchors = self._anchors 174 | count = 0 175 | for level in range(anchors.min_level, anchors.max_level + 1): 176 | feat_size = anchors.feat_sizes[level] 177 | steps = feat_size['height'] * feat_size[ 178 | 'width'] * anchors.get_anchors_per_location() 179 | indices = tf.range(count, count + steps) 180 | count += steps 181 | labels_unpacked[level] = tf.reshape( 182 | tf.gather(labels, indices), 183 | [feat_size['height'], feat_size['width'], -1]) 184 | return labels_unpacked 185 | 186 | def label_anchors(self, gt_boxes, gt_labels): 187 | """Labels anchors with ground truth inputs. 188 | 189 | Args: 190 | gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes. 191 | For each row, it stores [y0, x0, y1, x1] for four corners of a box. 192 | gt_labels: A integer tensor with shape [N, 1] representing groundtruth 193 | classes. 194 | Returns: 195 | cls_targets_dict: ordered dictionary with keys 196 | [min_level, min_level+1, ..., max_level]. The values are tensor with 197 | shape [height_l, width_l, num_anchors]. The height_l and width_l 198 | represent the dimension of class logits at l-th level. 199 | box_targets_dict: ordered dictionary with keys 200 | [min_level, min_level+1, ..., max_level]. The values are tensor with 201 | shape [height_l, width_l, num_anchors * 4]. The height_l and 202 | width_l represent the dimension of bounding box regression output at 203 | l-th level. 204 | num_positives: scalar tensor storing number of positives in an image. 205 | """ 206 | gt_box_list = box_list.BoxList(gt_boxes) 207 | anchor_box_list = box_list.BoxList(self._anchors.boxes) 208 | 209 | # cls_weights, box_weights are not used 210 | cls_targets, _, box_targets, _, matches = self._target_assigner.assign( 211 | anchor_box_list, gt_box_list, gt_labels) 212 | 213 | # class labels start from 1 and the background class = -1 214 | cls_targets -= 1 215 | cls_targets = tf.cast(cls_targets, tf.int32) 216 | 217 | # Unpack labels. 218 | cls_targets_dict = self._unpack_labels(cls_targets) 219 | box_targets_dict = self._unpack_labels(box_targets) 220 | num_positives = tf.reduce_sum( 221 | tf.cast(tf.not_equal(matches.match_results, -1), tf.float32)) 222 | 223 | return cls_targets_dict, box_targets_dict, num_positives 224 | -------------------------------------------------------------------------------- /keras/train.py: -------------------------------------------------------------------------------- 1 | # Lint as: python3 2 | # Copyright 2020 Google Research. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """The main training script.""" 17 | import os 18 | from absl import app 19 | from absl import flags 20 | from absl import logging 21 | import tensorflow as tf 22 | 23 | import dataloader 24 | import hparams_config 25 | import utils 26 | from keras import train_lib 27 | 28 | # Cloud TPU Cluster Resolvers 29 | flags.DEFINE_string( 30 | 'tpu', 31 | default=None, 32 | help='The Cloud TPU to use for training. This should be either the name ' 33 | 'used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 ' 34 | 'url.') 35 | flags.DEFINE_string( 36 | 'gcp_project', 37 | default=None, 38 | help='Project name for the Cloud TPU-enabled project. If not specified, we ' 39 | 'will attempt to automatically detect the GCE project from metadata.') 40 | flags.DEFINE_string( 41 | 'tpu_zone', 42 | default=None, 43 | help='GCE zone where the Cloud TPU is located in. If not specified, we ' 44 | 'will attempt to automatically detect the GCE project from metadata.') 45 | 46 | # Model specific paramenters 47 | flags.DEFINE_string( 48 | 'eval_master', 49 | default='', 50 | help='GRPC URL of the eval master. Set to an appropriate value when running' 51 | ' on CPU/GPU') 52 | flags.DEFINE_string('eval_name', default=None, help='Eval job name') 53 | flags.DEFINE_enum('strategy', None, ['tpu', 'gpus', ''], 54 | 'Training: gpus for multi-gpu, if None, use TF default.') 55 | 56 | flags.DEFINE_integer( 57 | 'num_cores', default=8, help='Number of TPU cores for training') 58 | 59 | flags.DEFINE_bool('use_fake_data', False, 'Use fake input.') 60 | flags.DEFINE_bool( 61 | 'use_xla', False, 62 | 'Use XLA even if strategy is not tpu. If strategy is tpu, always use XLA, ' 63 | 'and this flag has no effect.') 64 | flags.DEFINE_string('model_dir', None, 'Location of model_dir') 65 | 66 | flags.DEFINE_string( 67 | 'hparams', '', 'Comma separated k=v pairs of hyperparameters or a module' 68 | ' containing attributes to use as hyperparameters.') 69 | flags.DEFINE_integer('batch_size', 16, 'training batch size') 70 | flags.DEFINE_integer('eval_samples', 5000, 'The number of samples for ' 71 | 'evaluation.') 72 | flags.DEFINE_integer('iterations_per_loop', 100, 73 | 'Number of iterations per TPU training loop') 74 | flags.DEFINE_string( 75 | 'training_file_pattern', "train.record", 76 | 'Glob for training data files (e.g., COCO train - minival set)') 77 | flags.DEFINE_string('validation_file_pattern', None, 78 | 'Glob for evaluation tfrecords (e.g., COCO val2017 set)') 79 | flags.DEFINE_string( 80 | 'val_json_file', None, 81 | 'COCO validation JSON containing golden bounding boxes. If None, use the ' 82 | 'ground truth from the dataloader. Ignored if testdev_dir is not None.') 83 | flags.DEFINE_string('testdev_dir', None, 84 | 'COCO testdev dir. If not None, ignorer val_json_file.') 85 | flags.DEFINE_integer('num_examples_per_epoch', 100, 86 | 'Number of examples in one epoch') 87 | flags.DEFINE_integer('num_epochs', None, 'Number of epochs for training') 88 | flags.DEFINE_string('mode', 'train', 89 | 'Mode to run: train or eval (default: train)') 90 | flags.DEFINE_string('model_name', 'efficientdet-d1', 91 | 'Model name: retinanet or efficientdet') 92 | flags.DEFINE_bool('eval_after_training', False, 'Run one eval after the ' 93 | 'training finishes.') 94 | flags.DEFINE_bool('debug', False, 'Enable debug mode') 95 | flags.DEFINE_bool('profile', False, 'Enable profile mode') 96 | 97 | # For Eval mode 98 | flags.DEFINE_integer('min_eval_interval', 180, 99 | 'Minimum seconds between evaluations.') 100 | flags.DEFINE_integer( 101 | 'eval_timeout', None, 102 | 'Maximum seconds between checkpoints before evaluation terminates.') 103 | 104 | FLAGS = flags.FLAGS 105 | 106 | 107 | def main(_): 108 | # Parse and override hparams 109 | config = hparams_config.get_detection_config(FLAGS.model_name) 110 | config.override(FLAGS.hparams) 111 | if FLAGS.num_epochs: # NOTE: remove this flag after updating all docs. 112 | config.num_epochs = FLAGS.num_epochs 113 | 114 | # Parse image size in case it is in string format. 115 | config.image_size = utils.parse_image_size(config.image_size) 116 | 117 | if FLAGS.use_xla and FLAGS.strategy != 'tpu': 118 | tf.config.optimizer.set_jit(True) 119 | for gpu in tf.config.list_physical_devices('GPU'): 120 | tf.config.experimental.set_memory_growth(gpu, True) 121 | 122 | if FLAGS.debug: 123 | tf.config.experimental_run_functions_eagerly(True) 124 | tf.debugging.set_log_device_placement(True) 125 | tf.random.set_seed(111111) 126 | logging.set_verbosity(logging.DEBUG) 127 | 128 | if FLAGS.strategy == 'tpu': 129 | tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( 130 | FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) 131 | tf.config.experimental_connect_to_cluster(tpu_cluster_resolver) 132 | tf.tpu.experimental.initialize_tpu_system(tpu_cluster_resolver) 133 | ds_strategy = tf.distribute.TPUStrategy(tpu_cluster_resolver) 134 | logging.info('All devices: %s', tf.config.list_logical_devices('TPU')) 135 | elif FLAGS.strategy == 'gpus': 136 | ds_strategy = tf.distribute.MirroredStrategy() 137 | logging.info('All devices: %s', tf.config.list_physical_devices('GPU')) 138 | else: 139 | if tf.config.list_physical_devices('GPU'): 140 | ds_strategy = tf.distribute.OneDeviceStrategy('device:GPU:0') 141 | else: 142 | ds_strategy = tf.distribute.OneDeviceStrategy('device:CPU:0') 143 | 144 | # Check data path 145 | if FLAGS.mode in ('train', 146 | 'train_and_eval') and FLAGS.training_file_pattern is None: 147 | raise RuntimeError('You must specify --training_file_pattern for training.') 148 | if FLAGS.mode in ('eval', 'train_and_eval'): 149 | if FLAGS.validation_file_pattern is None: 150 | raise RuntimeError('You must specify --validation_file_pattern ' 151 | 'for evaluation.') 152 | 153 | params = dict( 154 | config.as_dict(), 155 | model_name=FLAGS.model_name, 156 | iterations_per_loop=FLAGS.iterations_per_loop, 157 | model_dir=FLAGS.model_dir, 158 | num_examples_per_epoch=FLAGS.num_examples_per_epoch, 159 | strategy=FLAGS.strategy, 160 | batch_size=FLAGS.batch_size // ds_strategy.num_replicas_in_sync, 161 | num_shards=ds_strategy.num_replicas_in_sync, 162 | val_json_file=FLAGS.val_json_file, 163 | testdev_dir=FLAGS.testdev_dir, 164 | mode=FLAGS.mode) 165 | 166 | # set mixed precision policy by keras api. 167 | precision = utils.get_precision(params['strategy'], params['mixed_precision']) 168 | policy = tf.keras.mixed_precision.experimental.Policy(precision) 169 | tf.keras.mixed_precision.experimental.set_policy(policy) 170 | 171 | def get_dataset(is_training, params): 172 | file_pattern = ( 173 | FLAGS.training_file_pattern 174 | if is_training else FLAGS.validation_file_pattern) 175 | return dataloader.InputReader( 176 | file_pattern, 177 | is_training=is_training, 178 | use_fake_data=FLAGS.use_fake_data, 179 | max_instances_per_image=config.max_instances_per_image)( 180 | params) 181 | 182 | with ds_strategy.scope(): 183 | model = train_lib.EfficientDetNetTrain(params['model_name'], config) 184 | height, width = utils.parse_image_size(params['image_size']) 185 | model.build((params['batch_size'], height, width, 3)) 186 | model.compile( 187 | optimizer=train_lib.get_optimizer(params), 188 | loss={ 189 | 'box_loss': 190 | train_lib.BoxLoss( 191 | params['delta'], reduction=tf.keras.losses.Reduction.NONE), 192 | 'box_iou_loss': 193 | train_lib.BoxIouLoss( 194 | params['iou_loss_type'], 195 | params['min_level'], 196 | params['max_level'], 197 | params['num_scales'], 198 | params['aspect_ratios'], 199 | params['anchor_scale'], 200 | params['image_size'], 201 | reduction=tf.keras.losses.Reduction.NONE), 202 | 'class_loss': 203 | train_lib.FocalLoss( 204 | params['alpha'], 205 | params['gamma'], 206 | label_smoothing=params['label_smoothing'], 207 | reduction=tf.keras.losses.Reduction.NONE) 208 | }) 209 | ckpt_path = tf.train.latest_checkpoint(FLAGS.model_dir) 210 | if ckpt_path: 211 | model.load_weights(ckpt_path) 212 | model.freeze_vars(params['var_freeze_expr']) 213 | model.fit( 214 | get_dataset(True, params=params), 215 | steps_per_epoch=FLAGS.num_examples_per_epoch, 216 | callbacks=train_lib.get_callbacks(params, FLAGS.profile), 217 | validation_data=get_dataset(False, params=params), 218 | validation_steps=FLAGS.eval_samples) 219 | model.save_weights(os.path.join(FLAGS.model_dir, 'model')) 220 | 221 | 222 | if __name__ == '__main__': 223 | logging.set_verbosity(logging.WARNING) 224 | app.run(main) 225 | --------------------------------------------------------------------------------