├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── environment-linux.yaml ├── environment.yml ├── object_detection ├── BUILD ├── CONTRIBUTING.md ├── README.md ├── __init__.py ├── anchor_generators │ ├── BUILD │ ├── __init__.py │ ├── grid_anchor_generator.py │ ├── grid_anchor_generator_test.py │ ├── multiple_grid_anchor_generator.py │ └── multiple_grid_anchor_generator_test.py ├── box_coders │ ├── BUILD │ ├── __init__.py │ ├── faster_rcnn_box_coder.py │ ├── faster_rcnn_box_coder_test.py │ ├── keypoint_box_coder.py │ ├── keypoint_box_coder_test.py │ ├── mean_stddev_box_coder.py │ ├── mean_stddev_box_coder_test.py │ ├── square_box_coder.py │ └── square_box_coder_test.py ├── builders │ ├── BUILD │ ├── __init__.py │ ├── anchor_generator_builder.py │ ├── anchor_generator_builder_test.py │ ├── box_coder_builder.py │ ├── box_coder_builder_test.py │ ├── box_predictor_builder.py │ ├── box_predictor_builder_test.py │ ├── hyperparams_builder.py │ ├── hyperparams_builder_test.py │ ├── image_resizer_builder.py │ ├── image_resizer_builder_test.py │ ├── input_reader_builder.py │ ├── input_reader_builder_test.py │ ├── losses_builder.py │ ├── losses_builder_test.py │ ├── matcher_builder.py │ ├── matcher_builder_test.py │ ├── model_builder.py │ ├── model_builder_test.py │ ├── optimizer_builder.py │ ├── optimizer_builder_test.py │ ├── post_processing_builder.py │ ├── post_processing_builder_test.py │ ├── preprocessor_builder.py │ ├── preprocessor_builder_test.py │ ├── region_similarity_calculator_builder.py │ └── region_similarity_calculator_builder_test.py ├── core │ ├── BUILD │ ├── __init__.py │ ├── anchor_generator.py │ ├── balanced_positive_negative_sampler.py │ ├── balanced_positive_negative_sampler_test.py │ ├── batcher.py │ ├── batcher_test.py │ ├── box_coder.py │ ├── box_coder_test.py │ ├── box_list.py │ ├── box_list_ops.py │ ├── box_list_ops_test.py │ ├── box_list_test.py │ ├── box_predictor.py │ ├── box_predictor_test.py │ ├── data_decoder.py │ ├── keypoint_ops.py │ ├── keypoint_ops_test.py │ ├── losses.py │ ├── losses_test.py │ ├── matcher.py │ ├── matcher_test.py │ ├── minibatch_sampler.py │ ├── minibatch_sampler_test.py │ ├── model.py │ ├── post_processing.py │ ├── post_processing_test.py │ ├── prefetcher.py │ ├── prefetcher_test.py │ ├── preprocessor.py │ ├── preprocessor_test.py │ ├── region_similarity_calculator.py │ ├── region_similarity_calculator_test.py │ ├── standard_fields.py │ ├── target_assigner.py │ └── target_assigner_test.py ├── create_pascal_tf_record.py ├── create_pascal_tf_record_test.py ├── create_pet_tf_record.py ├── data │ ├── mscoco_label_map.pbtxt │ ├── pascal_label_map.pbtxt │ └── pet_label_map.pbtxt ├── data_decoders │ ├── BUILD │ ├── __init__.py │ ├── tf_example_decoder.py │ └── tf_example_decoder_test.py ├── eval.py ├── eval_util.py ├── evaluator.py ├── export_inference_graph.py ├── exporter.py ├── exporter_test.py ├── g3doc │ ├── configuring_jobs.md │ ├── defining_your_own_model.md │ ├── detection_model_zoo.md │ ├── exporting_models.md │ ├── img │ │ ├── dogs_detections_output.jpg │ │ ├── kites_detections_output.jpg │ │ ├── oxford_pet.png │ │ ├── tensorboard.png │ │ └── tensorboard2.png │ ├── installation.md │ ├── preparing_inputs.md │ ├── running_locally.md │ ├── running_notebook.md │ ├── running_on_cloud.md │ └── running_pets.md ├── matchers │ ├── BUILD │ ├── __init__.py │ ├── argmax_matcher.py │ ├── argmax_matcher_test.py │ ├── bipartite_matcher.py │ └── bipartite_matcher_test.py ├── meta_architectures │ ├── BUILD │ ├── __init__.py │ ├── faster_rcnn_meta_arch.py │ ├── faster_rcnn_meta_arch_test.py │ ├── faster_rcnn_meta_arch_test_lib.py │ ├── rfcn_meta_arch.py │ ├── rfcn_meta_arch_test.py │ ├── ssd_meta_arch.py │ └── ssd_meta_arch_test.py ├── models │ ├── BUILD │ ├── __init__.py │ ├── faster_rcnn_inception_resnet_v2_feature_extractor.py │ ├── faster_rcnn_inception_resnet_v2_feature_extractor_test.py │ ├── faster_rcnn_resnet_v1_feature_extractor.py │ ├── faster_rcnn_resnet_v1_feature_extractor_test.py │ ├── feature_map_generators.py │ ├── feature_map_generators_test.py │ ├── ssd_feature_extractor_test.py │ ├── ssd_inception_v2_feature_extractor.py │ ├── ssd_inception_v2_feature_extractor_test.py │ ├── ssd_mobilenet_v1_feature_extractor.py │ └── ssd_mobilenet_v1_feature_extractor_test.py ├── object_detection_tutorial.ipynb ├── protos │ ├── BUILD │ ├── __init__.py │ ├── anchor_generator.proto │ ├── anchor_generator_pb2.py │ ├── argmax_matcher.proto │ ├── argmax_matcher_pb2.py │ ├── bipartite_matcher.proto │ ├── bipartite_matcher_pb2.py │ ├── box_coder.proto │ ├── box_coder_pb2.py │ ├── box_predictor.proto │ ├── box_predictor_pb2.py │ ├── eval.proto │ ├── eval_pb2.py │ ├── faster_rcnn.proto │ ├── faster_rcnn_box_coder.proto │ ├── faster_rcnn_box_coder_pb2.py │ ├── faster_rcnn_pb2.py │ ├── grid_anchor_generator.proto │ ├── grid_anchor_generator_pb2.py │ ├── hyperparams.proto │ ├── hyperparams_pb2.py │ ├── image_resizer.proto │ ├── image_resizer_pb2.py │ ├── input_reader.proto │ ├── input_reader_pb2.py │ ├── losses.proto │ ├── losses_pb2.py │ ├── matcher.proto │ ├── matcher_pb2.py │ ├── mean_stddev_box_coder.proto │ ├── mean_stddev_box_coder_pb2.py │ ├── model.proto │ ├── model_pb2.py │ ├── optimizer.proto │ ├── optimizer_pb2.py │ ├── pipeline.proto │ ├── pipeline_pb2.py │ ├── post_processing.proto │ ├── post_processing_pb2.py │ ├── preprocessor.proto │ ├── preprocessor_pb2.py │ ├── region_similarity_calculator.proto │ ├── region_similarity_calculator_pb2.py │ ├── square_box_coder.proto │ ├── square_box_coder_pb2.py │ ├── ssd.proto │ ├── ssd_anchor_generator.proto │ ├── ssd_anchor_generator_pb2.py │ ├── ssd_pb2.py │ ├── string_int_label_map.proto │ ├── string_int_label_map_pb2.py │ ├── train.proto │ └── train_pb2.py ├── samples │ ├── cloud │ │ └── cloud.yml │ └── configs │ │ ├── faster_rcnn_inception_resnet_v2_atrous_pets.config │ │ ├── faster_rcnn_resnet101_pets.config │ │ ├── faster_rcnn_resnet101_voc07.config │ │ ├── faster_rcnn_resnet152_pets.config │ │ ├── faster_rcnn_resnet50_pets.config │ │ ├── rfcn_resnet101_pets.config │ │ ├── ssd_inception_v2_pets.config │ │ └── ssd_mobilenet_v1_pets.config ├── ssd_mobilenet_v1_coco_11_06_2017 │ └── frozen_inference_graph.pb ├── test_images │ ├── image1.jpg │ ├── image2.jpg │ └── image_info.txt ├── train.py ├── trainer.py ├── trainer_test.py └── utils │ ├── BUILD │ ├── __init__.py │ ├── category_util.py │ ├── category_util_test.py │ ├── dataset_util.py │ ├── dataset_util_test.py │ ├── label_map_util.py │ ├── label_map_util_test.py │ ├── learning_schedules.py │ ├── learning_schedules_test.py │ ├── metrics.py │ ├── metrics_test.py │ ├── np_box_list.py │ ├── np_box_list_ops.py │ ├── np_box_list_ops_test.py │ ├── np_box_list_test.py │ ├── np_box_ops.py │ ├── np_box_ops_test.py │ ├── object_detection_evaluation.py │ ├── object_detection_evaluation_test.py │ ├── ops.py │ ├── ops_test.py │ ├── per_image_evaluation.py │ ├── per_image_evaluation_test.py │ ├── shape_utils.py │ ├── shape_utils_test.py │ ├── static_shape.py │ ├── static_shape_test.py │ ├── test_utils.py │ ├── test_utils_test.py │ ├── variables_helper.py │ ├── variables_helper_test.py │ ├── visualization_utils.py │ └── visualization_utils_test.py ├── object_detection_app.py ├── object_detection_multithreading.py └── utils ├── __init__.py ├── app_utils.py └── test_app_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | # Custom 92 | .idea/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Dat Tran 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Object-Detector-App 2 | 3 | A real-time object recognition application using [Google's TensorFlow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection) and [OpenCV](http://opencv.org/). 4 | 5 | ## Getting Started 6 | 1. `conda env create -f environment.yml` 7 | 2. `python object_detection_app.py` / `python object_detection_multithreading.py` 8 | Optional arguments (default value): 9 | * Device index of the camera `--source=0` 10 | * Width of the frames in the video stream `--width=480` 11 | * Height of the frames in the video stream `--height=360` 12 | * Number of workers `--num-workers=2` 13 | * Size of the queue `--queue-size=5` 14 | * Get video from HLS stream rather than webcam '--stream-input=http://somertmpserver.com/hls/live.m3u8' 15 | * Send stream to livestreaming server '--stream-output=--stream=http://somertmpserver.com/hls/live.m3u8' 16 | 17 | ## Tests 18 | ``` 19 | pytest -vs utils/ 20 | ``` 21 | 22 | ## Requirements 23 | - [Anaconda / Python 3.5](https://www.continuum.io/downloads) 24 | - [TensorFlow 1.2](https://www.tensorflow.org/) 25 | - [OpenCV 3.0](http://opencv.org/) 26 | 27 | ## Notes 28 | - OpenCV 3.1 might crash on OSX after a while, so that's why I had to switch to version 3.0. See open issue and solution [here](https://github.com/opencv/opencv/issues/5874). 29 | - Moving the `.read()` part of the video stream in a multiple child processes did not work. However, it was possible to move it to a separate thread. 30 | 31 | ## Copyright 32 | 33 | See [LICENSE](LICENSE) for details. 34 | Copyright (c) 2017 [Dat Tran](http://www.dat-tran.com/). 35 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/__init__.py -------------------------------------------------------------------------------- /environment-linux.yaml: -------------------------------------------------------------------------------- 1 | name: object-detection 2 | channels: !!python/tuple 3 | - menpo 4 | - defaults 5 | dependencies: 6 | - cycler=0.10.0=py35_0 7 | - freetype=2.5.5=2 8 | - icu=54.1=0 9 | - jbig=2.1=0 10 | - menpo::opencv3=3.0.1=py35_0 11 | - jpeg=9b=0 12 | - libpng=1.6.27=0 13 | - libtiff=4.0.6=3 14 | - matplotlib=2.0.2=np113py35_0 15 | - menpo::tbb 16 | - mkl=2017.0.1=0 17 | - numpy=1.13.0=py35_0 18 | - olefile=0.44=py35_0 19 | - openssl=1.0.2l=0 20 | - pillow=4.2.1=py35_0 21 | - pip=9.0.1=py35_1 22 | - py=1.4.34=py35_0 23 | - pyparsing=2.2.0=py35_0 24 | - pyqt=5.6.0=py35_2 25 | - pytest=3.2.1=py35_0 26 | - python=3.5.3=1 27 | - python-dateutil=2.6.1=py35_0 28 | - pytz=2017.2=py35_0 29 | - qt=5.6.2 30 | - readline=6.2=2 31 | - setuptools=27.2.0=py35_0 32 | - sip=4.18=py35_0 33 | - six=1.10.0=py35_0 34 | - sqlite=3.13.0=0 35 | - tk=8.5.18=0 36 | - wheel=0.29.0=py35_0 37 | - xz=5.2.2=1 38 | - zlib=1.2.8=3 39 | - pip: 40 | - backports.weakref==1.0rc1 41 | - bleach==1.5.0 42 | - html5lib==0.9999999 43 | - markdown==2.2.0 44 | - protobuf==3.3.0 45 | - tensorflow==1.2.0 46 | - werkzeug==0.12.2 47 | prefix: /Users/datitran/anaconda/envs/object-detection 48 | 49 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: object-detection 2 | channels: !!python/tuple 3 | - menpo 4 | - defaults 5 | dependencies: 6 | - cycler=0.10.0=py35_0 7 | - freetype=2.5.5=2 8 | - icu=54.1=0 9 | - jbig=2.1=0 10 | - menpo::opencv3=3.1.0=py35_0 11 | - jpeg=9b=0 12 | - libpng=1.6.27=0 13 | - libtiff=4.0.6=3 14 | - matplotlib=2.0.2=np113py35_0 15 | - menpo::tbb=4.3_20141023=0 16 | - mkl=2017.0.1=0 17 | - numpy=1.13.0=py35_0 18 | - olefile=0.44=py35_0 19 | - openssl=1.0.2l=0 20 | - pillow=4.1.1=py35_0 21 | - pip=9.0.1=py35_1 22 | - py=1.4.34=py35_0 23 | - pyparsing=2.2.0=py35_0 24 | - pyqt=5.6.0=py35_2 25 | - pytest=3.2.1=py35_0 26 | - python=3.5.3=1 27 | - python-dateutil=2.6.1=py35_0 28 | - pytz=2017.2=py35_0 29 | - qt=5.6.2=2 30 | - readline=6.2=2 31 | - setuptools=27.2.0=py35_0 32 | - sip=4.18=py35_0 33 | - six=1.10.0=py35_0 34 | - sqlite=3.13.0=0 35 | - tk=8.5.18=0 36 | - wheel=0.29.0=py35_0 37 | - xz=5.2.2=1 38 | - zlib=1.2.8=3 39 | - pip: 40 | - backports.weakref==1.0rc1 41 | - bleach==1.5.0 42 | - html5lib==0.9999999 43 | - markdown==2.2.0 44 | - protobuf==3.3.0 45 | - tensorflow==1.2.0 46 | - werkzeug==0.12.2 47 | - scipy 48 | prefix: /Users/datitran/anaconda/envs/object-detection 49 | 50 | -------------------------------------------------------------------------------- /object_detection/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to the Tensorflow Object Detection API 2 | 3 | Patches to Tensorflow Object Detection API are welcome! 4 | 5 | We require contributors to fill out either the individual or corporate 6 | Contributor License Agreement (CLA). 7 | 8 | * If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA](http://code.google.com/legal/individual-cla-v1.0.html). 9 | * If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](http://code.google.com/legal/corporate-cla-v1.0.html). 10 | 11 | Please follow the 12 | [Tensorflow contributing guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md) 13 | when submitting pull requests. 14 | -------------------------------------------------------------------------------- /object_detection/README.md: -------------------------------------------------------------------------------- 1 | # Tensorflow Object Detection API 2 | Creating accurate machine learning models capable of localizing and identifying 3 | multiple objects in a single image remains a core challenge in computer vision. 4 | The TensorFlow Object Detection API is an open source framework built on top of 5 | TensorFlow that makes it easy to construct, train and deploy object detection 6 | models. At Google we’ve certainly found this codebase to be useful for our 7 | computer vision needs, and we hope that you will as well. 8 |

9 | 10 |

11 | Contributions to the codebase are welcome and we would love to hear back from 12 | you if you find this API useful. Finally if you use the Tensorflow Object 13 | Detection API for a research publication, please consider citing: 14 | 15 | ``` 16 | "Speed/accuracy trade-offs for modern convolutional object detectors." 17 | Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z, 18 | Song Y, Guadarrama S, Murphy K, CVPR 2017 19 | ``` 20 | \[[link](https://arxiv.org/abs/1611.10012)\]\[[bibtex]( 21 | https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAWUIIlnPZ_L9jxvPwcC49kDlELtaeIyU-&scisf=4&ct=citation&cd=-1&hl=en&scfhb=1)\] 22 | 23 | ## Maintainers 24 | 25 | * Jonathan Huang, github: [jch1](https://github.com/jch1) 26 | * Vivek Rathod, github: [tombstone](https://github.com/tombstone) 27 | * Derek Chow, github: [derekjchow](https://github.com/derekjchow) 28 | * Chen Sun, github: [jesu9](https://github.com/jesu9) 29 | * Menglong Zhu, github: [dreamdragon](https://github.com/dreamdragon) 30 | 31 | 32 | ## Table of contents 33 | 34 | Quick Start: 35 | * 36 | Quick Start: Jupyter notebook for off-the-shelf inference
37 | * Quick Start: Training a pet detector
38 | 39 | Setup: 40 | * Installation
41 | * 42 | Configuring an object detection pipeline
43 | * Preparing inputs
44 | 45 | Running: 46 | * Running locally
47 | * Running on the cloud
48 | 49 | Extras: 50 | * Tensorflow detection model zoo
51 | * 52 | Exporting a trained model for inference
53 | * 54 | Defining your own model architecture
55 | 56 | ## Release information 57 | 58 | ### June 15, 2017 59 | 60 | In addition to our base Tensorflow detection model definitions, this 61 | release includes: 62 | 63 | * A selection of trainable detection models, including: 64 | * Single Shot Multibox Detector (SSD) with MobileNet, 65 | * SSD with Inception V2, 66 | * Region-Based Fully Convolutional Networks (R-FCN) with Resnet 101, 67 | * Faster RCNN with Resnet 101, 68 | * Faster RCNN with Inception Resnet v2 69 | * Frozen weights (trained on the COCO dataset) for each of the above models to 70 | be used for out-of-the-box inference purposes. 71 | * A [Jupyter notebook](object_detection_tutorial.ipynb) for performing 72 | out-of-the-box inference with one of our released models 73 | * Convenient [local training](g3doc/running_locally.md) scripts as well as 74 | distributed training and evaluation pipelines via 75 | [Google Cloud](g3doc/running_on_cloud.md). 76 | 77 | 78 | Thanks to contributors: Jonathan Huang, Vivek Rathod, Derek Chow, 79 | Chen Sun, Menglong Zhu, Matthew Tang, Anoop Korattikara, Alireza Fathi, Ian Fischer, Zbigniew Wojna, Yang Song, Sergio Guadarrama, Jasper Uijlings, 80 | Viacheslav Kovalevskyi, Kevin Murphy 81 | -------------------------------------------------------------------------------- /object_detection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/__init__.py -------------------------------------------------------------------------------- /object_detection/anchor_generators/BUILD: -------------------------------------------------------------------------------- 1 | # Tensorflow Object Detection API: Anchor Generator implementations. 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | ) 6 | 7 | licenses(["notice"]) 8 | 9 | # Apache 2.0 10 | py_library( 11 | name = "grid_anchor_generator", 12 | srcs = [ 13 | "grid_anchor_generator.py", 14 | ], 15 | deps = [ 16 | "//tensorflow", 17 | "//tensorflow_models/object_detection/core:anchor_generator", 18 | "//tensorflow_models/object_detection/core:box_list", 19 | "//tensorflow_models/object_detection/utils:ops", 20 | ], 21 | ) 22 | 23 | py_test( 24 | name = "grid_anchor_generator_test", 25 | srcs = [ 26 | "grid_anchor_generator_test.py", 27 | ], 28 | deps = [ 29 | ":grid_anchor_generator", 30 | "//tensorflow", 31 | ], 32 | ) 33 | 34 | py_library( 35 | name = "multiple_grid_anchor_generator", 36 | srcs = [ 37 | "multiple_grid_anchor_generator.py", 38 | ], 39 | deps = [ 40 | ":grid_anchor_generator", 41 | "//tensorflow", 42 | "//tensorflow_models/object_detection/core:anchor_generator", 43 | "//tensorflow_models/object_detection/core:box_list_ops", 44 | ], 45 | ) 46 | 47 | py_test( 48 | name = "multiple_grid_anchor_generator_test", 49 | srcs = [ 50 | "multiple_grid_anchor_generator_test.py", 51 | ], 52 | deps = [ 53 | ":multiple_grid_anchor_generator", 54 | "//third_party/py/numpy", 55 | ], 56 | ) 57 | -------------------------------------------------------------------------------- /object_detection/anchor_generators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/anchor_generators/__init__.py -------------------------------------------------------------------------------- /object_detection/anchor_generators/grid_anchor_generator_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.grid_anchor_generator.""" 17 | 18 | import tensorflow as tf 19 | 20 | from object_detection.anchor_generators import grid_anchor_generator 21 | 22 | 23 | class GridAnchorGeneratorTest(tf.test.TestCase): 24 | 25 | def test_construct_single_anchor(self): 26 | """Builds a 1x1 anchor grid to test the size of the output boxes.""" 27 | scales = [0.5, 1.0, 2.0] 28 | aspect_ratios = [0.25, 1.0, 4.0] 29 | anchor_offset = [7, -3] 30 | exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61], 31 | [-505, -131, 519, 125], [-57, -67, 71, 61], 32 | [-121, -131, 135, 125], [-249, -259, 263, 253], 33 | [-25, -131, 39, 125], [-57, -259, 71, 253], 34 | [-121, -515, 135, 509]] 35 | 36 | anchor_generator = grid_anchor_generator.GridAnchorGenerator( 37 | scales, aspect_ratios, 38 | anchor_offset=anchor_offset) 39 | anchors = anchor_generator.generate(feature_map_shape_list=[(1, 1)]) 40 | anchor_corners = anchors.get() 41 | 42 | with self.test_session(): 43 | anchor_corners_out = anchor_corners.eval() 44 | self.assertAllClose(anchor_corners_out, exp_anchor_corners) 45 | 46 | def test_construct_anchor_grid(self): 47 | base_anchor_size = [10, 10] 48 | anchor_stride = [19, 19] 49 | anchor_offset = [0, 0] 50 | scales = [0.5, 1.0, 2.0] 51 | aspect_ratios = [1.0] 52 | 53 | exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.], 54 | [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5], 55 | [-5., 14., 5, 24], [-10., 9., 10, 29], 56 | [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5], 57 | [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5], 58 | [14., 14., 24, 24], [9., 9., 29, 29]] 59 | 60 | anchor_generator = grid_anchor_generator.GridAnchorGenerator( 61 | scales, 62 | aspect_ratios, 63 | base_anchor_size=base_anchor_size, 64 | anchor_stride=anchor_stride, 65 | anchor_offset=anchor_offset) 66 | 67 | anchors = anchor_generator.generate(feature_map_shape_list=[(2, 2)]) 68 | anchor_corners = anchors.get() 69 | 70 | with self.test_session(): 71 | anchor_corners_out = anchor_corners.eval() 72 | self.assertAllClose(anchor_corners_out, exp_anchor_corners) 73 | 74 | 75 | if __name__ == '__main__': 76 | tf.test.main() 77 | -------------------------------------------------------------------------------- /object_detection/box_coders/BUILD: -------------------------------------------------------------------------------- 1 | # Tensorflow Object Detection API: Box Coder implementations. 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | ) 6 | 7 | licenses(["notice"]) 8 | 9 | # Apache 2.0 10 | py_library( 11 | name = "faster_rcnn_box_coder", 12 | srcs = [ 13 | "faster_rcnn_box_coder.py", 14 | ], 15 | deps = [ 16 | "//tensorflow_models/object_detection/core:box_coder", 17 | "//tensorflow_models/object_detection/core:box_list", 18 | ], 19 | ) 20 | 21 | py_test( 22 | name = "faster_rcnn_box_coder_test", 23 | srcs = [ 24 | "faster_rcnn_box_coder_test.py", 25 | ], 26 | deps = [ 27 | ":faster_rcnn_box_coder", 28 | "//tensorflow", 29 | "//tensorflow_models/object_detection/core:box_list", 30 | ], 31 | ) 32 | 33 | py_library( 34 | name = "keypoint_box_coder", 35 | srcs = [ 36 | "keypoint_box_coder.py", 37 | ], 38 | deps = [ 39 | "//tensorflow_models/object_detection/core:box_coder", 40 | "//tensorflow_models/object_detection/core:box_list", 41 | "//tensorflow_models/object_detection/core:standard_fields", 42 | ], 43 | ) 44 | 45 | py_test( 46 | name = "keypoint_box_coder_test", 47 | srcs = [ 48 | "keypoint_box_coder_test.py", 49 | ], 50 | deps = [ 51 | ":keypoint_box_coder", 52 | "//tensorflow", 53 | "//tensorflow_models/object_detection/core:box_list", 54 | "//tensorflow_models/object_detection/core:standard_fields", 55 | ], 56 | ) 57 | 58 | py_library( 59 | name = "mean_stddev_box_coder", 60 | srcs = [ 61 | "mean_stddev_box_coder.py", 62 | ], 63 | deps = [ 64 | "//tensorflow_models/object_detection/core:box_coder", 65 | "//tensorflow_models/object_detection/core:box_list", 66 | ], 67 | ) 68 | 69 | py_test( 70 | name = "mean_stddev_box_coder_test", 71 | srcs = [ 72 | "mean_stddev_box_coder_test.py", 73 | ], 74 | deps = [ 75 | ":mean_stddev_box_coder", 76 | "//tensorflow", 77 | "//tensorflow_models/object_detection/core:box_list", 78 | ], 79 | ) 80 | 81 | py_library( 82 | name = "square_box_coder", 83 | srcs = [ 84 | "square_box_coder.py", 85 | ], 86 | deps = [ 87 | "//tensorflow_models/object_detection/core:box_coder", 88 | "//tensorflow_models/object_detection/core:box_list", 89 | ], 90 | ) 91 | 92 | py_test( 93 | name = "square_box_coder_test", 94 | srcs = [ 95 | "square_box_coder_test.py", 96 | ], 97 | deps = [ 98 | ":square_box_coder", 99 | "//tensorflow", 100 | "//tensorflow_models/object_detection/core:box_list", 101 | ], 102 | ) 103 | -------------------------------------------------------------------------------- /object_detection/box_coders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/box_coders/__init__.py -------------------------------------------------------------------------------- /object_detection/box_coders/mean_stddev_box_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Mean stddev box coder. 17 | 18 | This box coder use the following coding schema to encode boxes: 19 | rel_code = (box_corner - anchor_corner_mean) / anchor_corner_stddev. 20 | """ 21 | from object_detection.core import box_coder 22 | from object_detection.core import box_list 23 | 24 | 25 | class MeanStddevBoxCoder(box_coder.BoxCoder): 26 | """Mean stddev box coder.""" 27 | 28 | @property 29 | def code_size(self): 30 | return 4 31 | 32 | def _encode(self, boxes, anchors): 33 | """Encode a box collection with respect to anchor collection. 34 | 35 | Args: 36 | boxes: BoxList holding N boxes to be encoded. 37 | anchors: BoxList of N anchors. We assume that anchors has an associated 38 | stddev field. 39 | 40 | Returns: 41 | a tensor representing N anchor-encoded boxes 42 | Raises: 43 | ValueError: if the anchors BoxList does not have a stddev field 44 | """ 45 | if not anchors.has_field('stddev'): 46 | raise ValueError('anchors must have a stddev field') 47 | box_corners = boxes.get() 48 | means = anchors.get() 49 | stddev = anchors.get_field('stddev') 50 | return (box_corners - means) / stddev 51 | 52 | def _decode(self, rel_codes, anchors): 53 | """Decode. 54 | 55 | Args: 56 | rel_codes: a tensor representing N anchor-encoded boxes. 57 | anchors: BoxList of anchors. We assume that anchors has an associated 58 | stddev field. 59 | 60 | Returns: 61 | boxes: BoxList holding N bounding boxes 62 | Raises: 63 | ValueError: if the anchors BoxList does not have a stddev field 64 | """ 65 | if not anchors.has_field('stddev'): 66 | raise ValueError('anchors must have a stddev field') 67 | means = anchors.get() 68 | stddevs = anchors.get_field('stddev') 69 | box_corners = rel_codes * stddevs + means 70 | return box_list.BoxList(box_corners) 71 | -------------------------------------------------------------------------------- /object_detection/box_coders/mean_stddev_box_coder_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.box_coder.mean_stddev_boxcoder.""" 17 | 18 | import tensorflow as tf 19 | 20 | from object_detection.box_coders import mean_stddev_box_coder 21 | from object_detection.core import box_list 22 | 23 | 24 | class MeanStddevBoxCoderTest(tf.test.TestCase): 25 | 26 | def testGetCorrectRelativeCodesAfterEncoding(self): 27 | box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]] 28 | boxes = box_list.BoxList(tf.constant(box_corners)) 29 | expected_rel_codes = [[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]] 30 | prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]]) 31 | prior_stddevs = tf.constant(2 * [4 * [.1]]) 32 | priors = box_list.BoxList(prior_means) 33 | priors.add_field('stddev', prior_stddevs) 34 | 35 | coder = mean_stddev_box_coder.MeanStddevBoxCoder() 36 | rel_codes = coder.encode(boxes, priors) 37 | with self.test_session() as sess: 38 | rel_codes_out = sess.run(rel_codes) 39 | self.assertAllClose(rel_codes_out, expected_rel_codes) 40 | 41 | def testGetCorrectBoxesAfterDecoding(self): 42 | rel_codes = tf.constant([[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]]) 43 | expected_box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]] 44 | prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]]) 45 | prior_stddevs = tf.constant(2 * [4 * [.1]]) 46 | priors = box_list.BoxList(prior_means) 47 | priors.add_field('stddev', prior_stddevs) 48 | 49 | coder = mean_stddev_box_coder.MeanStddevBoxCoder() 50 | decoded_boxes = coder.decode(rel_codes, priors) 51 | decoded_box_corners = decoded_boxes.get() 52 | with self.test_session() as sess: 53 | decoded_out = sess.run(decoded_box_corners) 54 | self.assertAllClose(decoded_out, expected_box_corners) 55 | 56 | 57 | if __name__ == '__main__': 58 | tf.test.main() 59 | -------------------------------------------------------------------------------- /object_detection/builders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/builders/__init__.py -------------------------------------------------------------------------------- /object_detection/builders/anchor_generator_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """A function to build an object detection anchor generator from config.""" 17 | 18 | from object_detection.anchor_generators import grid_anchor_generator 19 | from object_detection.anchor_generators import multiple_grid_anchor_generator 20 | from object_detection.protos import anchor_generator_pb2 21 | 22 | 23 | def build(anchor_generator_config): 24 | """Builds an anchor generator based on the config. 25 | 26 | Args: 27 | anchor_generator_config: An anchor_generator.proto object containing the 28 | config for the desired anchor generator. 29 | 30 | Returns: 31 | Anchor generator based on the config. 32 | 33 | Raises: 34 | ValueError: On empty anchor generator proto. 35 | """ 36 | if not isinstance(anchor_generator_config, 37 | anchor_generator_pb2.AnchorGenerator): 38 | raise ValueError('anchor_generator_config not of type ' 39 | 'anchor_generator_pb2.AnchorGenerator') 40 | if anchor_generator_config.WhichOneof( 41 | 'anchor_generator_oneof') == 'grid_anchor_generator': 42 | grid_anchor_generator_config = anchor_generator_config.grid_anchor_generator 43 | return grid_anchor_generator.GridAnchorGenerator( 44 | scales=[float(scale) for scale in grid_anchor_generator_config.scales], 45 | aspect_ratios=[float(aspect_ratio) 46 | for aspect_ratio 47 | in grid_anchor_generator_config.aspect_ratios], 48 | base_anchor_size=[grid_anchor_generator_config.height, 49 | grid_anchor_generator_config.width], 50 | anchor_stride=[grid_anchor_generator_config.height_stride, 51 | grid_anchor_generator_config.width_stride], 52 | anchor_offset=[grid_anchor_generator_config.height_offset, 53 | grid_anchor_generator_config.width_offset]) 54 | elif anchor_generator_config.WhichOneof( 55 | 'anchor_generator_oneof') == 'ssd_anchor_generator': 56 | ssd_anchor_generator_config = anchor_generator_config.ssd_anchor_generator 57 | return multiple_grid_anchor_generator.create_ssd_anchors( 58 | num_layers=ssd_anchor_generator_config.num_layers, 59 | min_scale=ssd_anchor_generator_config.min_scale, 60 | max_scale=ssd_anchor_generator_config.max_scale, 61 | aspect_ratios=ssd_anchor_generator_config.aspect_ratios, 62 | reduce_boxes_in_lowest_layer=(ssd_anchor_generator_config 63 | .reduce_boxes_in_lowest_layer)) 64 | else: 65 | raise ValueError('Empty anchor generator.') 66 | 67 | -------------------------------------------------------------------------------- /object_detection/builders/box_coder_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """A function to build an object detection box coder from configuration.""" 17 | from object_detection.box_coders import faster_rcnn_box_coder 18 | from object_detection.box_coders import mean_stddev_box_coder 19 | from object_detection.box_coders import square_box_coder 20 | from object_detection.protos import box_coder_pb2 21 | 22 | 23 | def build(box_coder_config): 24 | """Builds a box coder object based on the box coder config. 25 | 26 | Args: 27 | box_coder_config: A box_coder.proto object containing the config for the 28 | desired box coder. 29 | 30 | Returns: 31 | BoxCoder based on the config. 32 | 33 | Raises: 34 | ValueError: On empty box coder proto. 35 | """ 36 | if not isinstance(box_coder_config, box_coder_pb2.BoxCoder): 37 | raise ValueError('box_coder_config not of type box_coder_pb2.BoxCoder.') 38 | 39 | if box_coder_config.WhichOneof('box_coder_oneof') == 'faster_rcnn_box_coder': 40 | return faster_rcnn_box_coder.FasterRcnnBoxCoder(scale_factors=[ 41 | box_coder_config.faster_rcnn_box_coder.y_scale, 42 | box_coder_config.faster_rcnn_box_coder.x_scale, 43 | box_coder_config.faster_rcnn_box_coder.height_scale, 44 | box_coder_config.faster_rcnn_box_coder.width_scale 45 | ]) 46 | if (box_coder_config.WhichOneof('box_coder_oneof') == 47 | 'mean_stddev_box_coder'): 48 | return mean_stddev_box_coder.MeanStddevBoxCoder() 49 | if box_coder_config.WhichOneof('box_coder_oneof') == 'square_box_coder': 50 | return square_box_coder.SquareBoxCoder(scale_factors=[ 51 | box_coder_config.square_box_coder.y_scale, 52 | box_coder_config.square_box_coder.x_scale, 53 | box_coder_config.square_box_coder.length_scale 54 | ]) 55 | raise ValueError('Empty box coder.') 56 | -------------------------------------------------------------------------------- /object_detection/builders/image_resizer_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Builder function for image resizing operations.""" 17 | import functools 18 | 19 | from object_detection.core import preprocessor 20 | from object_detection.protos import image_resizer_pb2 21 | 22 | 23 | def build(image_resizer_config): 24 | """Builds callable for image resizing operations. 25 | 26 | Args: 27 | image_resizer_config: image_resizer.proto object containing parameters for 28 | an image resizing operation. 29 | 30 | Returns: 31 | image_resizer_fn: Callable for image resizing. This callable always takes 32 | a rank-3 image tensor (corresponding to a single image) and returns a 33 | rank-3 image tensor, possibly with new spatial dimensions. 34 | 35 | Raises: 36 | ValueError: if `image_resizer_config` is of incorrect type. 37 | ValueError: if `image_resizer_config.image_resizer_oneof` is of expected 38 | type. 39 | ValueError: if min_dimension > max_dimension when keep_aspect_ratio_resizer 40 | is used. 41 | """ 42 | if not isinstance(image_resizer_config, image_resizer_pb2.ImageResizer): 43 | raise ValueError('image_resizer_config not of type ' 44 | 'image_resizer_pb2.ImageResizer.') 45 | 46 | if image_resizer_config.WhichOneof( 47 | 'image_resizer_oneof') == 'keep_aspect_ratio_resizer': 48 | keep_aspect_ratio_config = image_resizer_config.keep_aspect_ratio_resizer 49 | if not (keep_aspect_ratio_config.min_dimension 50 | <= keep_aspect_ratio_config.max_dimension): 51 | raise ValueError('min_dimension > max_dimension') 52 | return functools.partial( 53 | preprocessor.resize_to_range, 54 | min_dimension=keep_aspect_ratio_config.min_dimension, 55 | max_dimension=keep_aspect_ratio_config.max_dimension) 56 | if image_resizer_config.WhichOneof( 57 | 'image_resizer_oneof') == 'fixed_shape_resizer': 58 | fixed_shape_resizer_config = image_resizer_config.fixed_shape_resizer 59 | return functools.partial(preprocessor.resize_image, 60 | new_height=fixed_shape_resizer_config.height, 61 | new_width=fixed_shape_resizer_config.width) 62 | raise ValueError('Invalid image resizer option.') 63 | -------------------------------------------------------------------------------- /object_detection/builders/image_resizer_builder_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.builders.image_resizer_builder.""" 17 | import tensorflow as tf 18 | from google.protobuf import text_format 19 | from object_detection.builders import image_resizer_builder 20 | from object_detection.protos import image_resizer_pb2 21 | 22 | 23 | class ImageResizerBuilderTest(tf.test.TestCase): 24 | 25 | def _shape_of_resized_random_image_given_text_proto( 26 | self, input_shape, text_proto): 27 | image_resizer_config = image_resizer_pb2.ImageResizer() 28 | text_format.Merge(text_proto, image_resizer_config) 29 | image_resizer_fn = image_resizer_builder.build(image_resizer_config) 30 | images = tf.to_float(tf.random_uniform( 31 | input_shape, minval=0, maxval=255, dtype=tf.int32)) 32 | resized_images = image_resizer_fn(images) 33 | with self.test_session() as sess: 34 | return sess.run(resized_images).shape 35 | 36 | def test_built_keep_aspect_ratio_resizer_returns_expected_shape(self): 37 | image_resizer_text_proto = """ 38 | keep_aspect_ratio_resizer { 39 | min_dimension: 10 40 | max_dimension: 20 41 | } 42 | """ 43 | input_shape = (50, 25, 3) 44 | expected_output_shape = (20, 10, 3) 45 | output_shape = self._shape_of_resized_random_image_given_text_proto( 46 | input_shape, image_resizer_text_proto) 47 | self.assertEqual(output_shape, expected_output_shape) 48 | 49 | def test_built_fixed_shape_resizer_returns_expected_shape(self): 50 | image_resizer_text_proto = """ 51 | fixed_shape_resizer { 52 | height: 10 53 | width: 20 54 | } 55 | """ 56 | input_shape = (50, 25, 3) 57 | expected_output_shape = (10, 20, 3) 58 | output_shape = self._shape_of_resized_random_image_given_text_proto( 59 | input_shape, image_resizer_text_proto) 60 | self.assertEqual(output_shape, expected_output_shape) 61 | 62 | def test_raises_error_on_invalid_input(self): 63 | invalid_input = 'invalid_input' 64 | with self.assertRaises(ValueError): 65 | image_resizer_builder.build(invalid_input) 66 | 67 | 68 | if __name__ == '__main__': 69 | tf.test.main() 70 | 71 | -------------------------------------------------------------------------------- /object_detection/builders/input_reader_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Input reader builder. 17 | 18 | Creates data sources for DetectionModels from an InputReader config. See 19 | input_reader.proto for options. 20 | 21 | Note: If users wishes to also use their own InputReaders with the Object 22 | Detection configuration framework, they should define their own builder function 23 | that wraps the build function. 24 | """ 25 | 26 | import tensorflow as tf 27 | 28 | from object_detection.data_decoders import tf_example_decoder 29 | from object_detection.protos import input_reader_pb2 30 | 31 | parallel_reader = tf.contrib.slim.parallel_reader 32 | 33 | 34 | def build(input_reader_config): 35 | """Builds a tensor dictionary based on the InputReader config. 36 | 37 | Args: 38 | input_reader_config: A input_reader_pb2.InputReader object. 39 | 40 | Returns: 41 | A tensor dict based on the input_reader_config. 42 | 43 | Raises: 44 | ValueError: On invalid input reader proto. 45 | """ 46 | if not isinstance(input_reader_config, input_reader_pb2.InputReader): 47 | raise ValueError('input_reader_config not of type ' 48 | 'input_reader_pb2.InputReader.') 49 | 50 | if input_reader_config.WhichOneof('input_reader') == 'tf_record_input_reader': 51 | config = input_reader_config.tf_record_input_reader 52 | _, string_tensor = parallel_reader.parallel_read( 53 | config.input_path, 54 | reader_class=tf.TFRecordReader, 55 | num_epochs=(input_reader_config.num_epochs 56 | if input_reader_config.num_epochs else None), 57 | num_readers=input_reader_config.num_readers, 58 | shuffle=input_reader_config.shuffle, 59 | dtypes=[tf.string, tf.string], 60 | capacity=input_reader_config.queue_capacity, 61 | min_after_dequeue=input_reader_config.min_after_dequeue) 62 | 63 | return tf_example_decoder.TfExampleDecoder().Decode(string_tensor) 64 | 65 | raise ValueError('Unsupported input_reader_config.') 66 | -------------------------------------------------------------------------------- /object_detection/builders/input_reader_builder_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for input_reader_builder.""" 17 | 18 | import os 19 | import numpy as np 20 | import tensorflow as tf 21 | 22 | from google.protobuf import text_format 23 | 24 | from tensorflow.core.example import example_pb2 25 | from tensorflow.core.example import feature_pb2 26 | from object_detection.builders import input_reader_builder 27 | from object_detection.core import standard_fields as fields 28 | from object_detection.protos import input_reader_pb2 29 | 30 | 31 | class InputReaderBuilderTest(tf.test.TestCase): 32 | 33 | def create_tf_record(self): 34 | path = os.path.join(self.get_temp_dir(), 'tfrecord') 35 | writer = tf.python_io.TFRecordWriter(path) 36 | 37 | image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) 38 | with self.test_session(): 39 | encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval() 40 | example = example_pb2.Example(features=feature_pb2.Features(feature={ 41 | 'image/encoded': feature_pb2.Feature( 42 | bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])), 43 | 'image/format': feature_pb2.Feature( 44 | bytes_list=feature_pb2.BytesList(value=['jpeg'.encode('utf-8')])), 45 | 'image/object/bbox/xmin': feature_pb2.Feature( 46 | float_list=feature_pb2.FloatList(value=[0.0])), 47 | 'image/object/bbox/xmax': feature_pb2.Feature( 48 | float_list=feature_pb2.FloatList(value=[1.0])), 49 | 'image/object/bbox/ymin': feature_pb2.Feature( 50 | float_list=feature_pb2.FloatList(value=[0.0])), 51 | 'image/object/bbox/ymax': feature_pb2.Feature( 52 | float_list=feature_pb2.FloatList(value=[1.0])), 53 | 'image/object/class/label': feature_pb2.Feature( 54 | int64_list=feature_pb2.Int64List(value=[2])), 55 | })) 56 | writer.write(example.SerializeToString()) 57 | writer.close() 58 | 59 | return path 60 | 61 | def test_build_tf_record_input_reader(self): 62 | tf_record_path = self.create_tf_record() 63 | 64 | input_reader_text_proto = """ 65 | shuffle: false 66 | num_readers: 1 67 | tf_record_input_reader {{ 68 | input_path: '{0}' 69 | }} 70 | """.format(tf_record_path) 71 | input_reader_proto = input_reader_pb2.InputReader() 72 | text_format.Merge(input_reader_text_proto, input_reader_proto) 73 | tensor_dict = input_reader_builder.build(input_reader_proto) 74 | 75 | sv = tf.train.Supervisor(logdir=self.get_temp_dir()) 76 | with sv.prepare_or_wait_for_session() as sess: 77 | sv.start_queue_runners(sess) 78 | output_dict = sess.run(tensor_dict) 79 | 80 | self.assertEquals( 81 | (4, 5, 3), output_dict[fields.InputDataFields.image].shape) 82 | self.assertEquals( 83 | [2], output_dict[fields.InputDataFields.groundtruth_classes]) 84 | self.assertEquals( 85 | (1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape) 86 | self.assertAllEqual( 87 | [0.0, 0.0, 1.0, 1.0], 88 | output_dict[fields.InputDataFields.groundtruth_boxes][0]) 89 | 90 | 91 | if __name__ == '__main__': 92 | tf.test.main() 93 | -------------------------------------------------------------------------------- /object_detection/builders/matcher_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """A function to build an object detection matcher from configuration.""" 17 | 18 | from object_detection.matchers import argmax_matcher 19 | from object_detection.matchers import bipartite_matcher 20 | from object_detection.protos import matcher_pb2 21 | 22 | 23 | def build(matcher_config): 24 | """Builds a matcher object based on the matcher config. 25 | 26 | Args: 27 | matcher_config: A matcher.proto object containing the config for the desired 28 | Matcher. 29 | 30 | Returns: 31 | Matcher based on the config. 32 | 33 | Raises: 34 | ValueError: On empty matcher proto. 35 | """ 36 | if not isinstance(matcher_config, matcher_pb2.Matcher): 37 | raise ValueError('matcher_config not of type matcher_pb2.Matcher.') 38 | if matcher_config.WhichOneof('matcher_oneof') == 'argmax_matcher': 39 | matcher = matcher_config.argmax_matcher 40 | matched_threshold = unmatched_threshold = None 41 | if not matcher.ignore_thresholds: 42 | matched_threshold = matcher.matched_threshold 43 | unmatched_threshold = matcher.unmatched_threshold 44 | return argmax_matcher.ArgMaxMatcher( 45 | matched_threshold=matched_threshold, 46 | unmatched_threshold=unmatched_threshold, 47 | negatives_lower_than_unmatched=matcher.negatives_lower_than_unmatched, 48 | force_match_for_each_row=matcher.force_match_for_each_row) 49 | if matcher_config.WhichOneof('matcher_oneof') == 'bipartite_matcher': 50 | return bipartite_matcher.GreedyBipartiteMatcher() 51 | raise ValueError('Empty matcher.') 52 | -------------------------------------------------------------------------------- /object_detection/builders/post_processing_builder_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for post_processing_builder.""" 17 | 18 | import tensorflow as tf 19 | from google.protobuf import text_format 20 | from object_detection.builders import post_processing_builder 21 | from object_detection.protos import post_processing_pb2 22 | 23 | 24 | class PostProcessingBuilderTest(tf.test.TestCase): 25 | 26 | def test_build_non_max_suppressor_with_correct_parameters(self): 27 | post_processing_text_proto = """ 28 | batch_non_max_suppression { 29 | score_threshold: 0.7 30 | iou_threshold: 0.6 31 | max_detections_per_class: 100 32 | max_total_detections: 300 33 | } 34 | """ 35 | post_processing_config = post_processing_pb2.PostProcessing() 36 | text_format.Merge(post_processing_text_proto, post_processing_config) 37 | non_max_suppressor, _ = post_processing_builder.build( 38 | post_processing_config) 39 | self.assertEqual(non_max_suppressor.keywords['max_size_per_class'], 100) 40 | self.assertEqual(non_max_suppressor.keywords['max_total_size'], 300) 41 | self.assertAlmostEqual(non_max_suppressor.keywords['score_thresh'], 0.7) 42 | self.assertAlmostEqual(non_max_suppressor.keywords['iou_thresh'], 0.6) 43 | 44 | def test_build_identity_score_converter(self): 45 | post_processing_text_proto = """ 46 | score_converter: IDENTITY 47 | """ 48 | post_processing_config = post_processing_pb2.PostProcessing() 49 | text_format.Merge(post_processing_text_proto, post_processing_config) 50 | _, score_converter = post_processing_builder.build(post_processing_config) 51 | self.assertEqual(score_converter, tf.identity) 52 | 53 | def test_build_sigmoid_score_converter(self): 54 | post_processing_text_proto = """ 55 | score_converter: SIGMOID 56 | """ 57 | post_processing_config = post_processing_pb2.PostProcessing() 58 | text_format.Merge(post_processing_text_proto, post_processing_config) 59 | _, score_converter = post_processing_builder.build(post_processing_config) 60 | self.assertEqual(score_converter, tf.sigmoid) 61 | 62 | def test_build_softmax_score_converter(self): 63 | post_processing_text_proto = """ 64 | score_converter: SOFTMAX 65 | """ 66 | post_processing_config = post_processing_pb2.PostProcessing() 67 | text_format.Merge(post_processing_text_proto, post_processing_config) 68 | _, score_converter = post_processing_builder.build(post_processing_config) 69 | self.assertEqual(score_converter, tf.nn.softmax) 70 | 71 | 72 | if __name__ == '__main__': 73 | tf.test.main() 74 | -------------------------------------------------------------------------------- /object_detection/builders/region_similarity_calculator_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Builder for region similarity calculators.""" 17 | 18 | from object_detection.core import region_similarity_calculator 19 | from object_detection.protos import region_similarity_calculator_pb2 20 | 21 | 22 | def build(region_similarity_calculator_config): 23 | """Builds region similarity calculator based on the configuration. 24 | 25 | Builds one of [IouSimilarity, IoaSimilarity, NegSqDistSimilarity] objects. See 26 | core/region_similarity_calculator.proto for details. 27 | 28 | Args: 29 | region_similarity_calculator_config: RegionSimilarityCalculator 30 | configuration proto. 31 | 32 | Returns: 33 | region_similarity_calculator: RegionSimilarityCalculator object. 34 | 35 | Raises: 36 | ValueError: On unknown region similarity calculator. 37 | """ 38 | 39 | if not isinstance( 40 | region_similarity_calculator_config, 41 | region_similarity_calculator_pb2.RegionSimilarityCalculator): 42 | raise ValueError( 43 | 'region_similarity_calculator_config not of type ' 44 | 'region_similarity_calculator_pb2.RegionsSimilarityCalculator') 45 | 46 | similarity_calculator = region_similarity_calculator_config.WhichOneof( 47 | 'region_similarity') 48 | if similarity_calculator == 'iou_similarity': 49 | return region_similarity_calculator.IouSimilarity() 50 | if similarity_calculator == 'ioa_similarity': 51 | return region_similarity_calculator.IoaSimilarity() 52 | if similarity_calculator == 'neg_sq_dist_similarity': 53 | return region_similarity_calculator.NegSqDistSimilarity() 54 | 55 | raise ValueError('Unknown region similarity calculator.') 56 | 57 | -------------------------------------------------------------------------------- /object_detection/builders/region_similarity_calculator_builder_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for region_similarity_calculator_builder.""" 17 | 18 | import tensorflow as tf 19 | 20 | from google.protobuf import text_format 21 | from object_detection.builders import region_similarity_calculator_builder 22 | from object_detection.core import region_similarity_calculator 23 | from object_detection.protos import region_similarity_calculator_pb2 as sim_calc_pb2 24 | 25 | 26 | class RegionSimilarityCalculatorBuilderTest(tf.test.TestCase): 27 | 28 | def testBuildIoaSimilarityCalculator(self): 29 | similarity_calc_text_proto = """ 30 | ioa_similarity { 31 | } 32 | """ 33 | similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator() 34 | text_format.Merge(similarity_calc_text_proto, similarity_calc_proto) 35 | similarity_calc = region_similarity_calculator_builder.build( 36 | similarity_calc_proto) 37 | self.assertTrue(isinstance(similarity_calc, 38 | region_similarity_calculator.IoaSimilarity)) 39 | 40 | def testBuildIouSimilarityCalculator(self): 41 | similarity_calc_text_proto = """ 42 | iou_similarity { 43 | } 44 | """ 45 | similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator() 46 | text_format.Merge(similarity_calc_text_proto, similarity_calc_proto) 47 | similarity_calc = region_similarity_calculator_builder.build( 48 | similarity_calc_proto) 49 | self.assertTrue(isinstance(similarity_calc, 50 | region_similarity_calculator.IouSimilarity)) 51 | 52 | def testBuildNegSqDistSimilarityCalculator(self): 53 | similarity_calc_text_proto = """ 54 | neg_sq_dist_similarity { 55 | } 56 | """ 57 | similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator() 58 | text_format.Merge(similarity_calc_text_proto, similarity_calc_proto) 59 | similarity_calc = region_similarity_calculator_builder.build( 60 | similarity_calc_proto) 61 | self.assertTrue(isinstance(similarity_calc, 62 | region_similarity_calculator. 63 | NegSqDistSimilarity)) 64 | 65 | 66 | if __name__ == '__main__': 67 | tf.test.main() 68 | -------------------------------------------------------------------------------- /object_detection/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/core/__init__.py -------------------------------------------------------------------------------- /object_detection/core/balanced_positive_negative_sampler_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.core.balanced_positive_negative_sampler.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from object_detection.core import balanced_positive_negative_sampler 22 | 23 | 24 | class BalancedPositiveNegativeSamplerTest(tf.test.TestCase): 25 | 26 | def test_subsample_all_examples(self): 27 | numpy_labels = np.random.permutation(300) 28 | indicator = tf.constant(np.ones(300) == 1) 29 | numpy_labels = (numpy_labels - 200) > 0 30 | 31 | labels = tf.constant(numpy_labels) 32 | 33 | sampler = (balanced_positive_negative_sampler. 34 | BalancedPositiveNegativeSampler()) 35 | is_sampled = sampler.subsample(indicator, 64, labels) 36 | with self.test_session() as sess: 37 | is_sampled = sess.run(is_sampled) 38 | self.assertTrue(sum(is_sampled) == 64) 39 | self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 32) 40 | self.assertTrue(sum(np.logical_and( 41 | np.logical_not(numpy_labels), is_sampled)) == 32) 42 | 43 | def test_subsample_selection(self): 44 | # Test random sampling when only some examples can be sampled: 45 | # 100 samples, 20 positives, 10 positives cannot be sampled 46 | numpy_labels = np.arange(100) 47 | numpy_indicator = numpy_labels < 90 48 | indicator = tf.constant(numpy_indicator) 49 | numpy_labels = (numpy_labels - 80) >= 0 50 | 51 | labels = tf.constant(numpy_labels) 52 | 53 | sampler = (balanced_positive_negative_sampler. 54 | BalancedPositiveNegativeSampler()) 55 | is_sampled = sampler.subsample(indicator, 64, labels) 56 | with self.test_session() as sess: 57 | is_sampled = sess.run(is_sampled) 58 | self.assertTrue(sum(is_sampled) == 64) 59 | self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 10) 60 | self.assertTrue(sum(np.logical_and( 61 | np.logical_not(numpy_labels), is_sampled)) == 54) 62 | self.assertAllEqual(is_sampled, np.logical_and(is_sampled, 63 | numpy_indicator)) 64 | 65 | def test_raises_error_with_incorrect_label_shape(self): 66 | labels = tf.constant([[True, False, False]]) 67 | indicator = tf.constant([True, False, True]) 68 | sampler = (balanced_positive_negative_sampler. 69 | BalancedPositiveNegativeSampler()) 70 | with self.assertRaises(ValueError): 71 | sampler.subsample(indicator, 64, labels) 72 | 73 | def test_raises_error_with_incorrect_indicator_shape(self): 74 | labels = tf.constant([True, False, False]) 75 | indicator = tf.constant([[True, False, True]]) 76 | sampler = (balanced_positive_negative_sampler. 77 | BalancedPositiveNegativeSampler()) 78 | with self.assertRaises(ValueError): 79 | sampler.subsample(indicator, 64, labels) 80 | 81 | 82 | if __name__ == '__main__': 83 | tf.test.main() 84 | -------------------------------------------------------------------------------- /object_detection/core/box_coder_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.core.box_coder.""" 17 | 18 | import tensorflow as tf 19 | 20 | from object_detection.core import box_coder 21 | from object_detection.core import box_list 22 | 23 | 24 | class MockBoxCoder(box_coder.BoxCoder): 25 | """Test BoxCoder that encodes/decodes using the multiply-by-two function.""" 26 | 27 | def code_size(self): 28 | return 4 29 | 30 | def _encode(self, boxes, anchors): 31 | return 2.0 * boxes.get() 32 | 33 | def _decode(self, rel_codes, anchors): 34 | return box_list.BoxList(rel_codes / 2.0) 35 | 36 | 37 | class BoxCoderTest(tf.test.TestCase): 38 | 39 | def test_batch_decode(self): 40 | mock_anchor_corners = tf.constant( 41 | [[0, 0.1, 0.2, 0.3], [0.2, 0.4, 0.4, 0.6]], tf.float32) 42 | mock_anchors = box_list.BoxList(mock_anchor_corners) 43 | mock_box_coder = MockBoxCoder() 44 | 45 | expected_boxes = [[[0.0, 0.1, 0.5, 0.6], [0.5, 0.6, 0.7, 0.8]], 46 | [[0.1, 0.2, 0.3, 0.4], [0.7, 0.8, 0.9, 1.0]]] 47 | 48 | encoded_boxes_list = [mock_box_coder.encode( 49 | box_list.BoxList(tf.constant(boxes)), mock_anchors) 50 | for boxes in expected_boxes] 51 | encoded_boxes = tf.stack(encoded_boxes_list) 52 | decoded_boxes = box_coder.batch_decode( 53 | encoded_boxes, mock_box_coder, mock_anchors) 54 | 55 | with self.test_session() as sess: 56 | decoded_boxes_result = sess.run(decoded_boxes) 57 | self.assertAllClose(expected_boxes, decoded_boxes_result) 58 | 59 | 60 | if __name__ == '__main__': 61 | tf.test.main() 62 | -------------------------------------------------------------------------------- /object_detection/core/data_decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Interface for data decoders. 17 | 18 | Data decoders decode the input data and return a dictionary of tensors keyed by 19 | the entries in core.reader.Fields. 20 | """ 21 | from abc import ABCMeta 22 | from abc import abstractmethod 23 | 24 | 25 | class DataDecoder(object): 26 | """Interface for data decoders.""" 27 | __metaclass__ = ABCMeta 28 | 29 | # TODO: snake_case this method. 30 | @abstractmethod 31 | def Decode(self, data): 32 | """Return a single image and associated labels. 33 | 34 | Args: 35 | data: a string tensor holding a serialized protocol buffer corresponding 36 | to data for a single image. 37 | 38 | Returns: 39 | tensor_dict: a dictionary containing tensors. Possible keys are defined in 40 | reader.Fields. 41 | """ 42 | pass 43 | -------------------------------------------------------------------------------- /object_detection/core/minibatch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Base minibatch sampler module. 17 | 18 | The job of the minibatch_sampler is to subsample a minibatch based on some 19 | criterion. 20 | 21 | The main function call is: 22 | subsample(indicator, batch_size, **params). 23 | Indicator is a 1d boolean tensor where True denotes which examples can be 24 | sampled. It returns a boolean indicator where True denotes an example has been 25 | sampled.. 26 | 27 | Subclasses should implement the Subsample function and can make use of the 28 | @staticmethod SubsampleIndicator. 29 | """ 30 | 31 | from abc import ABCMeta 32 | from abc import abstractmethod 33 | 34 | import tensorflow as tf 35 | 36 | from object_detection.utils import ops 37 | 38 | 39 | class MinibatchSampler(object): 40 | """Abstract base class for subsampling minibatches.""" 41 | __metaclass__ = ABCMeta 42 | 43 | def __init__(self): 44 | """Constructs a minibatch sampler.""" 45 | pass 46 | 47 | @abstractmethod 48 | def subsample(self, indicator, batch_size, **params): 49 | """Returns subsample of entries in indicator. 50 | 51 | Args: 52 | indicator: boolean tensor of shape [N] whose True entries can be sampled. 53 | batch_size: desired batch size. 54 | **params: additional keyword arguments for specific implementations of 55 | the MinibatchSampler. 56 | 57 | Returns: 58 | sample_indicator: boolean tensor of shape [N] whose True entries have been 59 | sampled. If sum(indicator) >= batch_size, sum(is_sampled) = batch_size 60 | """ 61 | pass 62 | 63 | @staticmethod 64 | def subsample_indicator(indicator, num_samples): 65 | """Subsample indicator vector. 66 | 67 | Given a boolean indicator vector with M elements set to `True`, the function 68 | assigns all but `num_samples` of these previously `True` elements to 69 | `False`. If `num_samples` is greater than M, the original indicator vector 70 | is returned. 71 | 72 | Args: 73 | indicator: a 1-dimensional boolean tensor indicating which elements 74 | are allowed to be sampled and which are not. 75 | num_samples: int32 scalar tensor 76 | 77 | Returns: 78 | a boolean tensor with the same shape as input (indicator) tensor 79 | """ 80 | indices = tf.where(indicator) 81 | indices = tf.random_shuffle(indices) 82 | indices = tf.reshape(indices, [-1]) 83 | 84 | num_samples = tf.minimum(tf.size(indices), num_samples) 85 | selected_indices = tf.slice(indices, [0], tf.reshape(num_samples, [1])) 86 | 87 | selected_indicator = ops.indices_to_dense_vector(selected_indices, 88 | tf.shape(indicator)[0]) 89 | 90 | return tf.equal(selected_indicator, 1) 91 | -------------------------------------------------------------------------------- /object_detection/core/minibatch_sampler_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for google3.research.vale.object_detection.minibatch_sampler.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from object_detection.core import minibatch_sampler 22 | 23 | 24 | class MinibatchSamplerTest(tf.test.TestCase): 25 | 26 | def test_subsample_indicator_when_more_true_elements_than_num_samples(self): 27 | np_indicator = [True, False, True, False, True, True, False] 28 | indicator = tf.constant(np_indicator) 29 | samples = minibatch_sampler.MinibatchSampler.subsample_indicator( 30 | indicator, 3) 31 | with self.test_session() as sess: 32 | samples_out = sess.run(samples) 33 | self.assertTrue(np.sum(samples_out), 3) 34 | self.assertAllEqual(samples_out, 35 | np.logical_and(samples_out, np_indicator)) 36 | 37 | def test_subsample_when_more_true_elements_than_num_samples_no_shape(self): 38 | np_indicator = [True, False, True, False, True, True, False] 39 | indicator = tf.placeholder(tf.bool) 40 | feed_dict = {indicator: np_indicator} 41 | 42 | samples = minibatch_sampler.MinibatchSampler.subsample_indicator( 43 | indicator, 3) 44 | with self.test_session() as sess: 45 | samples_out = sess.run(samples, feed_dict=feed_dict) 46 | self.assertTrue(np.sum(samples_out), 3) 47 | self.assertAllEqual(samples_out, 48 | np.logical_and(samples_out, np_indicator)) 49 | 50 | def test_subsample_indicator_when_less_true_elements_than_num_samples(self): 51 | np_indicator = [True, False, True, False, True, True, False] 52 | indicator = tf.constant(np_indicator) 53 | samples = minibatch_sampler.MinibatchSampler.subsample_indicator( 54 | indicator, 5) 55 | with self.test_session() as sess: 56 | samples_out = sess.run(samples) 57 | self.assertTrue(np.sum(samples_out), 4) 58 | self.assertAllEqual(samples_out, 59 | np.logical_and(samples_out, np_indicator)) 60 | 61 | def test_subsample_indicator_when_num_samples_is_zero(self): 62 | np_indicator = [True, False, True, False, True, True, False] 63 | indicator = tf.constant(np_indicator) 64 | samples_none = minibatch_sampler.MinibatchSampler.subsample_indicator( 65 | indicator, 0) 66 | with self.test_session() as sess: 67 | samples_none_out = sess.run(samples_none) 68 | self.assertAllEqual( 69 | np.zeros_like(samples_none_out, dtype=bool), 70 | samples_none_out) 71 | 72 | def test_subsample_indicator_when_indicator_all_false(self): 73 | indicator_empty = tf.zeros([0], dtype=tf.bool) 74 | samples_empty = minibatch_sampler.MinibatchSampler.subsample_indicator( 75 | indicator_empty, 4) 76 | with self.test_session() as sess: 77 | samples_empty_out = sess.run(samples_empty) 78 | self.assertEqual(0, samples_empty_out.size) 79 | 80 | 81 | if __name__ == '__main__': 82 | tf.test.main() 83 | -------------------------------------------------------------------------------- /object_detection/core/prefetcher.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Provides functions to prefetch tensors to feed into models.""" 17 | import tensorflow as tf 18 | 19 | 20 | def prefetch(tensor_dict, capacity): 21 | """Creates a prefetch queue for tensors. 22 | 23 | Creates a FIFO queue to asynchronously enqueue tensor_dicts and returns a 24 | dequeue op that evaluates to a tensor_dict. This function is useful in 25 | prefetching preprocessed tensors so that the data is readily available for 26 | consumers. 27 | 28 | Example input pipeline when you don't need batching: 29 | ---------------------------------------------------- 30 | key, string_tensor = slim.parallel_reader.parallel_read(...) 31 | tensor_dict = decoder.decode(string_tensor) 32 | tensor_dict = preprocessor.preprocess(tensor_dict, ...) 33 | prefetch_queue = prefetcher.prefetch(tensor_dict, capacity=20) 34 | tensor_dict = prefetch_queue.dequeue() 35 | outputs = Model(tensor_dict) 36 | ... 37 | ---------------------------------------------------- 38 | 39 | For input pipelines with batching, refer to core/batcher.py 40 | 41 | Args: 42 | tensor_dict: a dictionary of tensors to prefetch. 43 | capacity: the size of the prefetch queue. 44 | 45 | Returns: 46 | a FIFO prefetcher queue 47 | """ 48 | names = tensor_dict.keys() 49 | dtypes = [t.dtype for t in tensor_dict.values()] 50 | shapes = [t.get_shape() for t in tensor_dict.values()] 51 | prefetch_queue = tf.PaddingFIFOQueue(capacity, dtypes=dtypes, 52 | shapes=shapes, 53 | names=names, 54 | name='prefetch_queue') 55 | enqueue_op = prefetch_queue.enqueue(tensor_dict) 56 | tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner( 57 | prefetch_queue, [enqueue_op])) 58 | tf.summary.scalar('queue/%s/fraction_of_%d_full' % (prefetch_queue.name, 59 | capacity), 60 | tf.to_float(prefetch_queue.size()) * (1. / capacity)) 61 | return prefetch_queue 62 | -------------------------------------------------------------------------------- /object_detection/core/region_similarity_calculator_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for region_similarity_calculator.""" 17 | import tensorflow as tf 18 | 19 | from object_detection.core import box_list 20 | from object_detection.core import region_similarity_calculator 21 | 22 | 23 | class RegionSimilarityCalculatorTest(tf.test.TestCase): 24 | 25 | def test_get_correct_pairwise_similarity_based_on_iou(self): 26 | corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) 27 | corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], 28 | [0.0, 0.0, 20.0, 20.0]]) 29 | exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]] 30 | boxes1 = box_list.BoxList(corners1) 31 | boxes2 = box_list.BoxList(corners2) 32 | iou_similarity_calculator = region_similarity_calculator.IouSimilarity() 33 | iou_similarity = iou_similarity_calculator.compare(boxes1, boxes2) 34 | with self.test_session() as sess: 35 | iou_output = sess.run(iou_similarity) 36 | self.assertAllClose(iou_output, exp_output) 37 | 38 | def test_get_correct_pairwise_similarity_based_on_squared_distances(self): 39 | corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0], 40 | [1.0, 1.0, 0.0, 2.0]]) 41 | corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0], 42 | [-4.0, 0.0, 0.0, 3.0], 43 | [0.0, 0.0, 0.0, 0.0]]) 44 | exp_output = [[-26, -25, 0], [-18, -27, -6]] 45 | boxes1 = box_list.BoxList(corners1) 46 | boxes2 = box_list.BoxList(corners2) 47 | dist_similarity_calc = region_similarity_calculator.NegSqDistSimilarity() 48 | dist_similarity = dist_similarity_calc.compare(boxes1, boxes2) 49 | with self.test_session() as sess: 50 | dist_output = sess.run(dist_similarity) 51 | self.assertAllClose(dist_output, exp_output) 52 | 53 | def test_get_correct_pairwise_similarity_based_on_ioa(self): 54 | corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) 55 | corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], 56 | [0.0, 0.0, 20.0, 20.0]]) 57 | exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0], 58 | [1.0 / 12.0, 0.0, 5.0 / 400.0]] 59 | exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0], 60 | [0, 0], 61 | [6.0 / 6.0, 5.0 / 5.0]] 62 | boxes1 = box_list.BoxList(corners1) 63 | boxes2 = box_list.BoxList(corners2) 64 | ioa_similarity_calculator = region_similarity_calculator.IoaSimilarity() 65 | ioa_similarity_1 = ioa_similarity_calculator.compare(boxes1, boxes2) 66 | ioa_similarity_2 = ioa_similarity_calculator.compare(boxes2, boxes1) 67 | with self.test_session() as sess: 68 | iou_output_1, iou_output_2 = sess.run( 69 | [ioa_similarity_1, ioa_similarity_2]) 70 | self.assertAllClose(iou_output_1, exp_output_1) 71 | self.assertAllClose(iou_output_2, exp_output_2) 72 | 73 | 74 | if __name__ == '__main__': 75 | tf.test.main() 76 | -------------------------------------------------------------------------------- /object_detection/data/pascal_label_map.pbtxt: -------------------------------------------------------------------------------- 1 | item { 2 | id: 0 3 | name: 'none_of_the_above' 4 | } 5 | 6 | item { 7 | id: 1 8 | name: 'aeroplane' 9 | } 10 | 11 | item { 12 | id: 2 13 | name: 'bicycle' 14 | } 15 | 16 | item { 17 | id: 3 18 | name: 'bird' 19 | } 20 | 21 | item { 22 | id: 4 23 | name: 'boat' 24 | } 25 | 26 | item { 27 | id: 5 28 | name: 'bottle' 29 | } 30 | 31 | item { 32 | id: 6 33 | name: 'bus' 34 | } 35 | 36 | item { 37 | id: 7 38 | name: 'car' 39 | } 40 | 41 | item { 42 | id: 8 43 | name: 'cat' 44 | } 45 | 46 | item { 47 | id: 9 48 | name: 'chair' 49 | } 50 | 51 | item { 52 | id: 10 53 | name: 'cow' 54 | } 55 | 56 | item { 57 | id: 11 58 | name: 'diningtable' 59 | } 60 | 61 | item { 62 | id: 12 63 | name: 'dog' 64 | } 65 | 66 | item { 67 | id: 13 68 | name: 'horse' 69 | } 70 | 71 | item { 72 | id: 14 73 | name: 'motorbike' 74 | } 75 | 76 | item { 77 | id: 15 78 | name: 'person' 79 | } 80 | 81 | item { 82 | id: 16 83 | name: 'pottedplant' 84 | } 85 | 86 | item { 87 | id: 17 88 | name: 'sheep' 89 | } 90 | 91 | item { 92 | id: 18 93 | name: 'sofa' 94 | } 95 | 96 | item { 97 | id: 19 98 | name: 'train' 99 | } 100 | 101 | item { 102 | id: 20 103 | name: 'tvmonitor' 104 | } 105 | -------------------------------------------------------------------------------- /object_detection/data/pet_label_map.pbtxt: -------------------------------------------------------------------------------- 1 | item { 2 | id: 0 3 | name: 'none_of_the_above' 4 | } 5 | 6 | item { 7 | id: 1 8 | name: 'Abyssinian' 9 | } 10 | 11 | item { 12 | id: 2 13 | name: 'american_bulldog' 14 | } 15 | 16 | item { 17 | id: 3 18 | name: 'american_pit_bull_terrier' 19 | } 20 | 21 | item { 22 | id: 4 23 | name: 'basset_hound' 24 | } 25 | 26 | item { 27 | id: 5 28 | name: 'beagle' 29 | } 30 | 31 | item { 32 | id: 6 33 | name: 'Bengal' 34 | } 35 | 36 | item { 37 | id: 7 38 | name: 'Birman' 39 | } 40 | 41 | item { 42 | id: 8 43 | name: 'Bombay' 44 | } 45 | 46 | item { 47 | id: 9 48 | name: 'boxer' 49 | } 50 | 51 | item { 52 | id: 10 53 | name: 'British_Shorthair' 54 | } 55 | 56 | item { 57 | id: 11 58 | name: 'chihuahua' 59 | } 60 | 61 | item { 62 | id: 12 63 | name: 'Egyptian_Mau' 64 | } 65 | 66 | item { 67 | id: 13 68 | name: 'english_cocker_spaniel' 69 | } 70 | 71 | item { 72 | id: 14 73 | name: 'english_setter' 74 | } 75 | 76 | item { 77 | id: 15 78 | name: 'german_shorthaired' 79 | } 80 | 81 | item { 82 | id: 16 83 | name: 'great_pyrenees' 84 | } 85 | 86 | item { 87 | id: 17 88 | name: 'havanese' 89 | } 90 | 91 | item { 92 | id: 18 93 | name: 'japanese_chin' 94 | } 95 | 96 | item { 97 | id: 19 98 | name: 'keeshond' 99 | } 100 | 101 | item { 102 | id: 20 103 | name: 'leonberger' 104 | } 105 | 106 | item { 107 | id: 21 108 | name: 'Maine_Coon' 109 | } 110 | 111 | item { 112 | id: 22 113 | name: 'miniature_pinscher' 114 | } 115 | 116 | item { 117 | id: 23 118 | name: 'newfoundland' 119 | } 120 | 121 | item { 122 | id: 24 123 | name: 'Persian' 124 | } 125 | 126 | item { 127 | id: 25 128 | name: 'pomeranian' 129 | } 130 | 131 | item { 132 | id: 26 133 | name: 'pug' 134 | } 135 | 136 | item { 137 | id: 27 138 | name: 'Ragdoll' 139 | } 140 | 141 | item { 142 | id: 28 143 | name: 'Russian_Blue' 144 | } 145 | 146 | item { 147 | id: 29 148 | name: 'saint_bernard' 149 | } 150 | 151 | item { 152 | id: 30 153 | name: 'samoyed' 154 | } 155 | 156 | item { 157 | id: 31 158 | name: 'scottish_terrier' 159 | } 160 | 161 | item { 162 | id: 32 163 | name: 'shiba_inu' 164 | } 165 | 166 | item { 167 | id: 33 168 | name: 'Siamese' 169 | } 170 | 171 | item { 172 | id: 34 173 | name: 'Sphynx' 174 | } 175 | 176 | item { 177 | id: 35 178 | name: 'staffordshire_bull_terrier' 179 | } 180 | 181 | item { 182 | id: 36 183 | name: 'wheaten_terrier' 184 | } 185 | 186 | item { 187 | id: 37 188 | name: 'yorkshire_terrier' 189 | } 190 | -------------------------------------------------------------------------------- /object_detection/data_decoders/BUILD: -------------------------------------------------------------------------------- 1 | # Tensorflow Object Detection API: data decoders. 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | ) 6 | 7 | licenses(["notice"]) 8 | # Apache 2.0 9 | 10 | py_library( 11 | name = "tf_example_decoder", 12 | srcs = ["tf_example_decoder.py"], 13 | deps = [ 14 | "//tensorflow", 15 | "//tensorflow_models/object_detection/core:data_decoder", 16 | "//tensorflow_models/object_detection/core:standard_fields", 17 | ], 18 | ) 19 | 20 | py_test( 21 | name = "tf_example_decoder_test", 22 | srcs = ["tf_example_decoder_test.py"], 23 | deps = [ 24 | ":tf_example_decoder", 25 | "//tensorflow", 26 | "//tensorflow_models/object_detection/core:standard_fields", 27 | ], 28 | ) 29 | -------------------------------------------------------------------------------- /object_detection/data_decoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/data_decoders/__init__.py -------------------------------------------------------------------------------- /object_detection/export_inference_graph.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | r"""Tool to export an object detection model for inference. 17 | 18 | Prepares an object detection tensorflow graph for inference using model 19 | configuration and an optional trained checkpoint. 20 | 21 | The inference graph contains one of two input nodes depending on the user 22 | specified option. 23 | * `image_tensor`: Accepts a uint8 4-D tensor of shape [1, None, None, 3] 24 | * `tf_example`: Accepts a serialized TFExample proto. The batch size in this 25 | case is always 1. 26 | 27 | and the following output nodes: 28 | * `num_detections` : Outputs float32 tensors of the form [batch] 29 | that specifies the number of valid boxes per image in the batch. 30 | * `detection_boxes` : Outputs float32 tensors of the form 31 | [batch, num_boxes, 4] containing detected boxes. 32 | * `detection_scores` : Outputs float32 tensors of the form 33 | [batch, num_boxes] containing class scores for the detections. 34 | * `detection_classes`: Outputs float32 tensors of the form 35 | [batch, num_boxes] containing classes for the detections. 36 | 37 | Note that currently `batch` is always 1, but we will support `batch` > 1 in 38 | the future. 39 | 40 | Optionally, one can freeze the graph by converting the weights in the provided 41 | checkpoint as graph constants thereby eliminating the need to use a checkpoint 42 | file during inference. 43 | 44 | Note that this tool uses `use_moving_averages` from eval_config to decide 45 | which weights to freeze. 46 | 47 | Example Usage: 48 | -------------- 49 | python export_inference_graph \ 50 | --input_type image_tensor \ 51 | --pipeline_config_path path/to/ssd_inception_v2.config \ 52 | --checkpoint_path path/to/model-ckpt \ 53 | --inference_graph_path path/to/inference_graph.pb 54 | """ 55 | import tensorflow as tf 56 | from google.protobuf import text_format 57 | from object_detection import exporter 58 | from object_detection.protos import pipeline_pb2 59 | 60 | slim = tf.contrib.slim 61 | flags = tf.app.flags 62 | 63 | flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be ' 64 | 'one of [`image_tensor` `tf_example_proto`]') 65 | flags.DEFINE_string('pipeline_config_path', '', 66 | 'Path to a pipeline_pb2.TrainEvalPipelineConfig config ' 67 | 'file.') 68 | flags.DEFINE_string('checkpoint_path', '', 'Optional path to checkpoint file. ' 69 | 'If provided, bakes the weights from the checkpoint into ' 70 | 'the graph.') 71 | flags.DEFINE_string('inference_graph_path', '', 'Path to write the output ' 72 | 'inference graph.') 73 | 74 | FLAGS = flags.FLAGS 75 | 76 | 77 | def main(_): 78 | assert FLAGS.pipeline_config_path, 'TrainEvalPipelineConfig missing.' 79 | assert FLAGS.inference_graph_path, 'Inference graph path missing.' 80 | assert FLAGS.input_type, 'Input type missing.' 81 | pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() 82 | with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f: 83 | text_format.Merge(f.read(), pipeline_config) 84 | exporter.export_inference_graph(FLAGS.input_type, pipeline_config, 85 | FLAGS.checkpoint_path, 86 | FLAGS.inference_graph_path) 87 | 88 | 89 | if __name__ == '__main__': 90 | tf.app.run() 91 | -------------------------------------------------------------------------------- /object_detection/g3doc/detection_model_zoo.md: -------------------------------------------------------------------------------- 1 | # Tensorflow detection model zoo 2 | 3 | We provide a collection of detection models pre-trained on the 4 | [COCO dataset](http://mscoco.org). 5 | These models can be useful for out-of-the-box inference if you are interested 6 | in categories already in COCO (e.g., humans, cars, etc). 7 | They are also useful for initializing your models when training on novel 8 | datasets. 9 | 10 | In the table below, we list each such pre-trained model including: 11 | 12 | * a model name that corresponds to a config file that was used to train this 13 | model in the `samples/configs` directory, 14 | * a download link to a tar.gz file containing the pre-trained model, 15 | * model speed (one of {slow, medium, fast}), 16 | * detector performance on COCO data as measured by the COCO mAP measure. 17 | Here, higher is better, and we only report bounding box mAP rounded to the 18 | nearest integer. 19 | * Output types (currently only `Boxes`) 20 | 21 | You can un-tar each tar.gz file via, e.g.,: 22 | 23 | ``` 24 | tar -xzvf ssd_mobilenet_v1_coco.tar.gz 25 | ``` 26 | 27 | Inside the un-tar'ed directory, you will find: 28 | 29 | * a graph proto (`graph.pbtxt`) 30 | * a checkpoint 31 | (`model.ckpt.data-00000-of-00001`, `model.ckpt.index`, `model.ckpt.meta`) 32 | * a frozen graph proto with weights baked into the graph as constants 33 | (`frozen_inference_graph.pb`) to be used for out of the box inference 34 | (try this out in the Jupyter notebook!) 35 | 36 | | Model name | Speed | COCO mAP | Outputs | 37 | | ------------ | :--------------: | :--------------: | :-------------: | 38 | | [ssd_mobilenet_v1_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_11_06_2017.tar.gz) | fast | 21 | Boxes | 39 | | [ssd_inception_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_11_06_2017.tar.gz) | fast | 24 | Boxes | 40 | | [rfcn_resnet101_coco](http://download.tensorflow.org/models/object_detection/rfcn_resnet101_coco_11_06_2017.tar.gz) | medium | 30 | Boxes | 41 | | [faster_rcnn_resnet101_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_11_06_2017.tar.gz) | medium | 32 | Boxes | 42 | | [faster_rcnn_inception_resnet_v2_atrous_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_coco_11_06_2017.tar.gz) | slow | 37 | Boxes | 43 | -------------------------------------------------------------------------------- /object_detection/g3doc/exporting_models.md: -------------------------------------------------------------------------------- 1 | # Exporting a trained model for inference 2 | 3 | After your model has been trained, you should export it to a Tensorflow 4 | graph proto. A checkpoint will typically consist of three files: 5 | 6 | * model.ckpt-${CHECKPOINT_NUMBER}.data-00000-of-00001, 7 | * model.ckpt-${CHECKPOINT_NUMBER}.index 8 | * model.ckpt-${CHECKPOINT_NUMBER}.meta 9 | 10 | After you've identified a candidate checkpoint to export, run the following 11 | command from tensorflow/models/object_detection: 12 | 13 | ``` bash 14 | # From tensorflow/models 15 | python object_detection/export_inference_graph \ 16 | --input_type image_tensor \ 17 | --pipeline_config_path ${PIPELINE_CONFIG_PATH} \ 18 | --checkpoint_path model.ckpt-${CHECKPOINT_NUMBER} \ 19 | --inference_graph_path output_inference_graph.pb 20 | ``` 21 | 22 | Afterwards, you should see a graph named output_inference_graph.pb. 23 | -------------------------------------------------------------------------------- /object_detection/g3doc/img/dogs_detections_output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/g3doc/img/dogs_detections_output.jpg -------------------------------------------------------------------------------- /object_detection/g3doc/img/kites_detections_output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/g3doc/img/kites_detections_output.jpg -------------------------------------------------------------------------------- /object_detection/g3doc/img/oxford_pet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/g3doc/img/oxford_pet.png -------------------------------------------------------------------------------- /object_detection/g3doc/img/tensorboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/g3doc/img/tensorboard.png -------------------------------------------------------------------------------- /object_detection/g3doc/img/tensorboard2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/g3doc/img/tensorboard2.png -------------------------------------------------------------------------------- /object_detection/g3doc/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ## Dependencies 4 | 5 | Tensorflow Object Detection API depends on the following libraries: 6 | 7 | * Protobuf 2.6 8 | * Pillow 1.0 9 | * lxml 10 | * tf Slim (which is included in the "tensorflow/models" checkout) 11 | * Jupyter notebook 12 | * Matplotlib 13 | * Tensorflow 14 | 15 | For detailed steps to install Tensorflow, follow the 16 | [Tensorflow installation instructions](https://www.tensorflow.org/install/). 17 | A typically user can install Tensorflow using one of the following commands: 18 | 19 | ``` bash 20 | # For CPU 21 | pip install tensorflow 22 | # For GPU 23 | pip install tensorflow-gpu 24 | ``` 25 | 26 | The remaining libraries can be installed on Ubuntu 16.04 using via apt-get: 27 | 28 | ``` bash 29 | sudo apt-get install protobuf-compiler python-pil python-lxml 30 | sudo pip install jupyter 31 | sudo pip install matplotlib 32 | ``` 33 | 34 | Alternatively, users can install dependencies using pip: 35 | 36 | ``` bash 37 | sudo pip install pillow 38 | sudo pip install lxml 39 | sudo pip install jupyter 40 | sudo pip install matplotlib 41 | ``` 42 | 43 | ## Protobuf Compilation 44 | 45 | The Tensorflow Object Detection API uses Protobufs to configure model and 46 | training parameters. Before the framework can be used, the Protobuf libraries 47 | must be compiled. This should be done by running the following command from 48 | the tensorflow/models directory: 49 | 50 | 51 | ``` bash 52 | # From tensorflow/models/ 53 | protoc object_detection/protos/*.proto --python_out=. 54 | ``` 55 | 56 | ## Add Libraries to PYTHONPATH 57 | 58 | When running locally, the tensorflow/models/ and slim directories should be 59 | appended to PYTHONPATH. This can be done by running the following from 60 | tensorflow/models/: 61 | 62 | 63 | ``` bash 64 | # From tensorflow/models/ 65 | export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim 66 | ``` 67 | 68 | Note: This command needs to run from every new terminal you start. If you wish 69 | to avoid running this manually, you can add it as a new line to the end of your 70 | ~/.bashrc file. 71 | 72 | # Testing the Installation 73 | 74 | You can test that you have correctly installed the Tensorflow Object Detection\ 75 | API by running the following command: 76 | 77 | ``` bash 78 | python object_detection/builders/model_builder_test.py 79 | ``` 80 | -------------------------------------------------------------------------------- /object_detection/g3doc/preparing_inputs.md: -------------------------------------------------------------------------------- 1 | # Preparing Inputs 2 | 3 | Tensorflow Object Detection API reads data using the TFRecord file format. Two 4 | sample scripts (`create_pascal_tf_record.py` and `create_pet_tf_record.py`) are 5 | provided to convert from the PASCAL VOC dataset and Oxford-IIT Pet dataset to 6 | TFRecords. 7 | 8 | ## Generating the PASCAL VOC TFRecord files. 9 | 10 | The raw 2012 PASCAL VOC data set can be downloaded 11 | [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar). 12 | Extract the tar file and run the `create_pascal_tf_record` script: 13 | 14 | ``` 15 | # From tensorflow/models/object_detection 16 | tar -xvf VOCtrainval_11-May-2012.tar 17 | ./create_pascal_tf_record --data_dir=VOCdevkit \ 18 | --year=VOC2012 --set=train --output_path=pascal_train.record 19 | ./create_pascal_tf_record --data_dir=/home/user/VOCdevkit \ 20 | --year=VOC2012 --set=val --output_path=pascal_val.record 21 | ``` 22 | 23 | You should end up with two TFRecord files named pascal_train.record and 24 | pascal_val.record in the tensorflow/models/object_detection directory. 25 | 26 | The label map for the PASCAL VOC data set can be found at 27 | data/pascal_label_map.pbtxt. 28 | 29 | ## Generation the Oxford-IIT Pet TFRecord files. 30 | 31 | The Oxford-IIT Pet data set can be downloaded from 32 | [their website](http://www.robots.ox.ac.uk/~vgg/data/pets/). Extract the tar 33 | file and run the `create_pet_tf_record` script to generate TFRecords. 34 | 35 | ``` 36 | # From tensorflow/models/object_detection 37 | tar -xvf annotations.tar.gz 38 | tar -xvf images.tar.gz 39 | ./create_pet_tf_record --data_dir=`pwd` --output_dir=`pwd` 40 | ``` 41 | 42 | You should end up with two TFRecord files named pet_train.record and 43 | pet_val.record in the tensorflow/models/object_detection directory. 44 | 45 | The label map for the Pet dataset can be found at data/pet_label_map.pbtxt. 46 | -------------------------------------------------------------------------------- /object_detection/g3doc/running_locally.md: -------------------------------------------------------------------------------- 1 | # Running Locally 2 | 3 | This page walks through the steps required to train an object detection model 4 | on a local machine. It assumes the reader has completed the 5 | following prerequisites: 6 | 7 | 1. The Tensorflow Object Detection API has been installed as documented in the 8 | [installation instructions](installation.md). This includes installing library 9 | dependencies, compiling the configuration protobufs and setting up the Python 10 | environment. 11 | 2. A valid data set has been created. See [this page](preparing_inputs.md) for 12 | instructions on how to generate a dataset for the PASCAL VOC challenge or the 13 | Oxford-IIT Pet dataset. 14 | 3. A Object Detection pipeline configuration has been written. See 15 | [this page](configuring_jobs.md) for details on how to write a pipeline configuration. 16 | 17 | ## Recommended Directory Structure for Training and Evaluation 18 | 19 | ``` 20 | +data 21 | -label_map file 22 | -train TFRecord file 23 | -eval TFRecord file 24 | +models 25 | + model 26 | -pipeline config file 27 | +train 28 | +eval 29 | ``` 30 | 31 | ## Running the Training Job 32 | 33 | A local training job can be run with the following command: 34 | 35 | ```bash 36 | # From the tensorflow/models/ directory 37 | python object_detection/train.py \ 38 | --logtostderr \ 39 | --pipeline_config_path=${PATH_TO_YOUR_PIPELINE_CONFIG} \ 40 | --train_dir=${PATH_TO_TRAIN_DIR} 41 | ``` 42 | 43 | where `${PATH_TO_YOUR_PIPELINE_CONFIG}` points to the pipeline config and 44 | `${PATH_TO_TRAIN_DIR}` points to the directory in which training checkpoints 45 | and events will be written to. By default, the training job will 46 | run indefinitely until the user kills it. 47 | 48 | ## Running the Evaluation Job 49 | 50 | Evaluation is run as a separate job. The eval job will periodically poll the 51 | train directory for new checkpoints and evaluate them on a test dataset. The 52 | job can be run using the following command: 53 | 54 | ```bash 55 | # From the tensorflow/models/ directory 56 | python object_detection/eval.py \ 57 | --logtostderr \ 58 | --pipeline_config_path=${PATH_TO_YOUR_PIPELINE_CONFIG} \ 59 | --checkpoint_dir=${PATH_TO_TRAIN_DIR} \ 60 | --eval_dir=${PATH_TO_EVAL_DIR} 61 | ``` 62 | 63 | where `${PATH_TO_YOUR_PIPELINE_CONFIG}` points to the pipeline config, 64 | `${PATH_TO_TRAIN_DIR}` points to the directory in which training checkpoints 65 | were saved (same as the training job) and `${PATH_TO_EVAL_DIR}` points to the 66 | directory in which evaluation events will be saved. As with the training job, 67 | the eval job run until terminated by default. 68 | 69 | ## Running Tensorboard 70 | 71 | Progress for training and eval jobs can be inspected using Tensorboard. If 72 | using the recommended directory structure, Tensorboard can be run using the 73 | following command: 74 | 75 | ```bash 76 | tensorboard --logdir=${PATH_TO_MODEL_DIRECTORY} 77 | ``` 78 | 79 | where `${PATH_TO_MODEL_DIRECTORY}` points to the directory that contains the 80 | train and eval directories. Please note it make take Tensorboard a couple 81 | minutes to populate with data. 82 | -------------------------------------------------------------------------------- /object_detection/g3doc/running_notebook.md: -------------------------------------------------------------------------------- 1 | # Quick Start: Jupyter notebook for off-the-shelf inference 2 | 3 | If you'd like to hit the ground running and run detection on a few example 4 | images right out of the box, we recommend trying out the Jupyter notebook demo. 5 | To run the Jupyter notebook, run the following command from 6 | `tensorflow/models/object_detection`: 7 | 8 | ``` 9 | # From tensorflow/models/object_detection 10 | jupyter notebook 11 | ``` 12 | 13 | The notebook should open in your favorite web browser. Click the 14 | [`object_detection_tutorial.ipynb`](../object_detection_tutorial.ipynb) link 15 | to open the demo. 16 | -------------------------------------------------------------------------------- /object_detection/matchers/BUILD: -------------------------------------------------------------------------------- 1 | # Tensorflow Object Detection API: Matcher implementations. 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | ) 6 | 7 | licenses(["notice"]) 8 | 9 | # Apache 2.0 10 | py_library( 11 | name = "argmax_matcher", 12 | srcs = [ 13 | "argmax_matcher.py", 14 | ], 15 | deps = [ 16 | "//tensorflow", 17 | "//tensorflow_models/object_detection/core:matcher", 18 | ], 19 | ) 20 | 21 | py_test( 22 | name = "argmax_matcher_test", 23 | srcs = ["argmax_matcher_test.py"], 24 | deps = [ 25 | ":argmax_matcher", 26 | "//tensorflow", 27 | ], 28 | ) 29 | 30 | py_library( 31 | name = "bipartite_matcher", 32 | srcs = [ 33 | "bipartite_matcher.py", 34 | ], 35 | deps = [ 36 | "//tensorflow", 37 | "//tensorflow/contrib/image:image_py", 38 | "//tensorflow_models/object_detection/core:matcher", 39 | ], 40 | ) 41 | 42 | py_test( 43 | name = "bipartite_matcher_test", 44 | srcs = [ 45 | "bipartite_matcher_test.py", 46 | ], 47 | deps = [ 48 | ":bipartite_matcher", 49 | "//tensorflow", 50 | ], 51 | ) 52 | -------------------------------------------------------------------------------- /object_detection/matchers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/matchers/__init__.py -------------------------------------------------------------------------------- /object_detection/matchers/bipartite_matcher.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Bipartite matcher implementation.""" 17 | 18 | import tensorflow as tf 19 | 20 | from tensorflow.contrib.image.python.ops import image_ops 21 | from object_detection.core import matcher 22 | 23 | 24 | class GreedyBipartiteMatcher(matcher.Matcher): 25 | """Wraps a Tensorflow greedy bipartite matcher.""" 26 | 27 | def _match(self, similarity_matrix, num_valid_rows=-1): 28 | """Bipartite matches a collection rows and columns. A greedy bi-partite. 29 | 30 | TODO: Add num_valid_columns options to match only that many columns with 31 | all the rows. 32 | 33 | Args: 34 | similarity_matrix: Float tensor of shape [N, M] with pairwise similarity 35 | where higher values mean more similar. 36 | num_valid_rows: A scalar or a 1-D tensor with one element describing the 37 | number of valid rows of similarity_matrix to consider for the bipartite 38 | matching. If set to be negative, then all rows from similarity_matrix 39 | are used. 40 | 41 | Returns: 42 | match_results: int32 tensor of shape [M] with match_results[i]=-1 43 | meaning that column i is not matched and otherwise that it is matched to 44 | row match_results[i]. 45 | """ 46 | # Convert similarity matrix to distance matrix as tf.image.bipartite tries 47 | # to find minimum distance matches. 48 | distance_matrix = -1 * similarity_matrix 49 | _, match_results = image_ops.bipartite_match( 50 | distance_matrix, num_valid_rows) 51 | match_results = tf.reshape(match_results, [-1]) 52 | match_results = tf.cast(match_results, tf.int32) 53 | return match_results 54 | -------------------------------------------------------------------------------- /object_detection/matchers/bipartite_matcher_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.core.bipartite_matcher.""" 17 | 18 | import tensorflow as tf 19 | 20 | from object_detection.matchers import bipartite_matcher 21 | 22 | 23 | class GreedyBipartiteMatcherTest(tf.test.TestCase): 24 | 25 | def test_get_expected_matches_when_all_rows_are_valid(self): 26 | similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]]) 27 | num_valid_rows = 2 28 | expected_match_results = [-1, 1, 0] 29 | 30 | matcher = bipartite_matcher.GreedyBipartiteMatcher() 31 | match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows) 32 | with self.test_session() as sess: 33 | match_results_out = sess.run(match._match_results) 34 | self.assertAllEqual(match_results_out, expected_match_results) 35 | 36 | def test_get_expected_matches_with_valid_rows_set_to_minus_one(self): 37 | similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]]) 38 | num_valid_rows = -1 39 | expected_match_results = [-1, 1, 0] 40 | 41 | matcher = bipartite_matcher.GreedyBipartiteMatcher() 42 | match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows) 43 | with self.test_session() as sess: 44 | match_results_out = sess.run(match._match_results) 45 | self.assertAllEqual(match_results_out, expected_match_results) 46 | 47 | def test_get_no_matches_with_zero_valid_rows(self): 48 | similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]]) 49 | num_valid_rows = 0 50 | expected_match_results = [-1, -1, -1] 51 | 52 | matcher = bipartite_matcher.GreedyBipartiteMatcher() 53 | match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows) 54 | with self.test_session() as sess: 55 | match_results_out = sess.run(match._match_results) 56 | self.assertAllEqual(match_results_out, expected_match_results) 57 | 58 | def test_get_expected_matches_with_only_one_valid_row(self): 59 | similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]]) 60 | num_valid_rows = 1 61 | expected_match_results = [-1, -1, 0] 62 | 63 | matcher = bipartite_matcher.GreedyBipartiteMatcher() 64 | match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows) 65 | with self.test_session() as sess: 66 | match_results_out = sess.run(match._match_results) 67 | self.assertAllEqual(match_results_out, expected_match_results) 68 | 69 | 70 | if __name__ == '__main__': 71 | tf.test.main() 72 | -------------------------------------------------------------------------------- /object_detection/meta_architectures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/meta_architectures/__init__.py -------------------------------------------------------------------------------- /object_detection/meta_architectures/faster_rcnn_meta_arch_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.meta_architectures.faster_rcnn_meta_arch.""" 17 | 18 | import tensorflow as tf 19 | 20 | from object_detection.meta_architectures import faster_rcnn_meta_arch_test_lib 21 | 22 | 23 | class FasterRCNNMetaArchTest( 24 | faster_rcnn_meta_arch_test_lib.FasterRCNNMetaArchTestBase): 25 | 26 | def test_postprocess_second_stage_only_inference_mode_with_masks(self): 27 | model = self._build_model( 28 | is_training=False, first_stage_only=False, second_stage_batch_size=6) 29 | 30 | batch_size = 2 31 | total_num_padded_proposals = batch_size * model.max_num_proposals 32 | proposal_boxes = tf.constant( 33 | [[[1, 1, 2, 3], 34 | [0, 0, 1, 1], 35 | [.5, .5, .6, .6], 36 | 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]], 37 | [[2, 3, 6, 8], 38 | [1, 2, 5, 3], 39 | 4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]], dtype=tf.float32) 40 | num_proposals = tf.constant([3, 2], dtype=tf.int32) 41 | refined_box_encodings = tf.zeros( 42 | [total_num_padded_proposals, model.num_classes, 4], dtype=tf.float32) 43 | class_predictions_with_background = tf.ones( 44 | [total_num_padded_proposals, model.num_classes+1], dtype=tf.float32) 45 | image_shape = tf.constant([batch_size, 36, 48, 3], dtype=tf.int32) 46 | 47 | mask_height = 2 48 | mask_width = 2 49 | mask_predictions = .6 * tf.ones( 50 | [total_num_padded_proposals, model.num_classes, 51 | mask_height, mask_width], dtype=tf.float32) 52 | exp_detection_masks = [[[[1, 1], [1, 1]], 53 | [[1, 1], [1, 1]], 54 | [[1, 1], [1, 1]], 55 | [[1, 1], [1, 1]], 56 | [[1, 1], [1, 1]]], 57 | [[[1, 1], [1, 1]], 58 | [[1, 1], [1, 1]], 59 | [[1, 1], [1, 1]], 60 | [[1, 1], [1, 1]], 61 | [[0, 0], [0, 0]]]] 62 | 63 | detections = model.postprocess({ 64 | 'refined_box_encodings': refined_box_encodings, 65 | 'class_predictions_with_background': class_predictions_with_background, 66 | 'num_proposals': num_proposals, 67 | 'proposal_boxes': proposal_boxes, 68 | 'image_shape': image_shape, 69 | 'mask_predictions': mask_predictions 70 | }) 71 | with self.test_session() as sess: 72 | detections_out = sess.run(detections) 73 | self.assertAllEqual(detections_out['detection_boxes'].shape, [2, 5, 4]) 74 | self.assertAllClose(detections_out['detection_scores'], 75 | [[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]]) 76 | self.assertAllClose(detections_out['detection_classes'], 77 | [[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]]) 78 | self.assertAllClose(detections_out['num_detections'], [5, 4]) 79 | self.assertAllClose(detections_out['detection_masks'], 80 | exp_detection_masks) 81 | 82 | 83 | if __name__ == '__main__': 84 | tf.test.main() 85 | -------------------------------------------------------------------------------- /object_detection/meta_architectures/rfcn_meta_arch_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.meta_architectures.rfcn_meta_arch.""" 17 | 18 | import tensorflow as tf 19 | 20 | from object_detection.meta_architectures import faster_rcnn_meta_arch_test_lib 21 | from object_detection.meta_architectures import rfcn_meta_arch 22 | 23 | 24 | class RFCNMetaArchTest( 25 | faster_rcnn_meta_arch_test_lib.FasterRCNNMetaArchTestBase): 26 | 27 | def _get_second_stage_box_predictor_text_proto(self): 28 | box_predictor_text_proto = """ 29 | rfcn_box_predictor { 30 | conv_hyperparams { 31 | op: CONV 32 | activation: NONE 33 | regularizer { 34 | l2_regularizer { 35 | weight: 0.0005 36 | } 37 | } 38 | initializer { 39 | variance_scaling_initializer { 40 | factor: 1.0 41 | uniform: true 42 | mode: FAN_AVG 43 | } 44 | } 45 | } 46 | } 47 | """ 48 | return box_predictor_text_proto 49 | 50 | def _get_model(self, box_predictor, **common_kwargs): 51 | return rfcn_meta_arch.RFCNMetaArch( 52 | second_stage_rfcn_box_predictor=box_predictor, **common_kwargs) 53 | 54 | 55 | if __name__ == '__main__': 56 | tf.test.main() 57 | -------------------------------------------------------------------------------- /object_detection/models/BUILD: -------------------------------------------------------------------------------- 1 | # Tensorflow Object Detection API: Models. 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | ) 6 | 7 | licenses(["notice"]) 8 | 9 | # Apache 2.0 10 | 11 | py_library( 12 | name = "feature_map_generators", 13 | srcs = [ 14 | "feature_map_generators.py", 15 | ], 16 | deps = [ 17 | "//tensorflow", 18 | "//tensorflow_models/object_detection/utils:ops", 19 | ], 20 | ) 21 | 22 | py_test( 23 | name = "feature_map_generators_test", 24 | srcs = [ 25 | "feature_map_generators_test.py", 26 | ], 27 | deps = [ 28 | ":feature_map_generators", 29 | "//tensorflow", 30 | ], 31 | ) 32 | 33 | py_library( 34 | name = "ssd_feature_extractor_test", 35 | srcs = [ 36 | "ssd_feature_extractor_test.py", 37 | ], 38 | deps = [ 39 | "//tensorflow", 40 | ], 41 | ) 42 | 43 | py_library( 44 | name = "ssd_inception_v2_feature_extractor", 45 | srcs = [ 46 | "ssd_inception_v2_feature_extractor.py", 47 | ], 48 | deps = [ 49 | ":feature_map_generators", 50 | "//tensorflow", 51 | "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch", 52 | "//tensorflow_models/slim:inception_v2", 53 | ], 54 | ) 55 | 56 | py_library( 57 | name = "ssd_mobilenet_v1_feature_extractor", 58 | srcs = ["ssd_mobilenet_v1_feature_extractor.py"], 59 | deps = [ 60 | ":feature_map_generators", 61 | "//tensorflow", 62 | "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch", 63 | "//tensorflow_models/slim:mobilenet_v1", 64 | ], 65 | ) 66 | 67 | py_test( 68 | name = "ssd_inception_v2_feature_extractor_test", 69 | srcs = [ 70 | "ssd_inception_v2_feature_extractor_test.py", 71 | ], 72 | deps = [ 73 | ":ssd_feature_extractor_test", 74 | ":ssd_inception_v2_feature_extractor", 75 | "//tensorflow", 76 | ], 77 | ) 78 | 79 | py_test( 80 | name = "ssd_mobilenet_v1_feature_extractor_test", 81 | srcs = ["ssd_mobilenet_v1_feature_extractor_test.py"], 82 | deps = [ 83 | ":ssd_feature_extractor_test", 84 | ":ssd_mobilenet_v1_feature_extractor", 85 | "//tensorflow", 86 | ], 87 | ) 88 | 89 | py_library( 90 | name = "faster_rcnn_inception_resnet_v2_feature_extractor", 91 | srcs = [ 92 | "faster_rcnn_inception_resnet_v2_feature_extractor.py", 93 | ], 94 | deps = [ 95 | "//tensorflow", 96 | "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch", 97 | "//tensorflow_models/object_detection/utils:variables_helper", 98 | "//tensorflow_models/slim:inception_resnet_v2", 99 | ], 100 | ) 101 | 102 | py_test( 103 | name = "faster_rcnn_inception_resnet_v2_feature_extractor_test", 104 | srcs = [ 105 | "faster_rcnn_inception_resnet_v2_feature_extractor_test.py", 106 | ], 107 | deps = [ 108 | ":faster_rcnn_inception_resnet_v2_feature_extractor", 109 | "//tensorflow", 110 | ], 111 | ) 112 | 113 | py_library( 114 | name = "faster_rcnn_resnet_v1_feature_extractor", 115 | srcs = [ 116 | "faster_rcnn_resnet_v1_feature_extractor.py", 117 | ], 118 | deps = [ 119 | "//tensorflow", 120 | "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch", 121 | "//tensorflow_models/slim:resnet_utils", 122 | "//tensorflow_models/slim:resnet_v1", 123 | ], 124 | ) 125 | 126 | py_test( 127 | name = "faster_rcnn_resnet_v1_feature_extractor_test", 128 | srcs = [ 129 | "faster_rcnn_resnet_v1_feature_extractor_test.py", 130 | ], 131 | deps = [ 132 | ":faster_rcnn_resnet_v1_feature_extractor", 133 | "//tensorflow", 134 | ], 135 | ) 136 | -------------------------------------------------------------------------------- /object_detection/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/models/__init__.py -------------------------------------------------------------------------------- /object_detection/protos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/protos/__init__.py -------------------------------------------------------------------------------- /object_detection/protos/anchor_generator.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | import "object_detection/protos/grid_anchor_generator.proto"; 6 | import "object_detection/protos/ssd_anchor_generator.proto"; 7 | 8 | // Configuration proto for the anchor generator to use in the object detection 9 | // pipeline. See core/anchor_generator.py for details. 10 | message AnchorGenerator { 11 | oneof anchor_generator_oneof { 12 | GridAnchorGenerator grid_anchor_generator = 1; 13 | SsdAnchorGenerator ssd_anchor_generator = 2; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /object_detection/protos/argmax_matcher.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | // Configuration proto for ArgMaxMatcher. See 6 | // matchers/argmax_matcher.py for details. 7 | message ArgMaxMatcher { 8 | // Threshold for positive matches. 9 | optional float matched_threshold = 1 [default = 0.5]; 10 | 11 | // Threshold for negative matches. 12 | optional float unmatched_threshold = 2 [default = 0.5]; 13 | 14 | // Whether to construct ArgMaxMatcher without thresholds. 15 | optional bool ignore_thresholds = 3 [default = false]; 16 | 17 | // If True then negative matches are the ones below the unmatched_threshold, 18 | // whereas ignored matches are in between the matched and umatched 19 | // threshold. If False, then negative matches are in between the matched 20 | // and unmatched threshold, and everything lower than unmatched is ignored. 21 | optional bool negatives_lower_than_unmatched = 4 [default = true]; 22 | 23 | // Whether to ensure each row is matched to at least one column. 24 | optional bool force_match_for_each_row = 5 [default = false]; 25 | } 26 | -------------------------------------------------------------------------------- /object_detection/protos/bipartite_matcher.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | // Configuration proto for bipartite matcher. See 6 | // matchers/bipartite_matcher.py for details. 7 | message BipartiteMatcher { 8 | } 9 | -------------------------------------------------------------------------------- /object_detection/protos/bipartite_matcher_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: object_detection/protos/bipartite_matcher.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | 17 | 18 | DESCRIPTOR = _descriptor.FileDescriptor( 19 | name='object_detection/protos/bipartite_matcher.proto', 20 | package='object_detection.protos', 21 | serialized_pb=_b('\n/object_detection/protos/bipartite_matcher.proto\x12\x17object_detection.protos\"\x12\n\x10\x42ipartiteMatcher') 22 | ) 23 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 24 | 25 | 26 | 27 | 28 | _BIPARTITEMATCHER = _descriptor.Descriptor( 29 | name='BipartiteMatcher', 30 | full_name='object_detection.protos.BipartiteMatcher', 31 | filename=None, 32 | file=DESCRIPTOR, 33 | containing_type=None, 34 | fields=[ 35 | ], 36 | extensions=[ 37 | ], 38 | nested_types=[], 39 | enum_types=[ 40 | ], 41 | options=None, 42 | is_extendable=False, 43 | extension_ranges=[], 44 | oneofs=[ 45 | ], 46 | serialized_start=76, 47 | serialized_end=94, 48 | ) 49 | 50 | DESCRIPTOR.message_types_by_name['BipartiteMatcher'] = _BIPARTITEMATCHER 51 | 52 | BipartiteMatcher = _reflection.GeneratedProtocolMessageType('BipartiteMatcher', (_message.Message,), dict( 53 | DESCRIPTOR = _BIPARTITEMATCHER, 54 | __module__ = 'object_detection.protos.bipartite_matcher_pb2' 55 | # @@protoc_insertion_point(class_scope:object_detection.protos.BipartiteMatcher) 56 | )) 57 | _sym_db.RegisterMessage(BipartiteMatcher) 58 | 59 | 60 | # @@protoc_insertion_point(module_scope) 61 | -------------------------------------------------------------------------------- /object_detection/protos/box_coder.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | import "object_detection/protos/faster_rcnn_box_coder.proto"; 6 | import "object_detection/protos/mean_stddev_box_coder.proto"; 7 | import "object_detection/protos/square_box_coder.proto"; 8 | 9 | // Configuration proto for the box coder to be used in the object detection 10 | // pipeline. See core/box_coder.py for details. 11 | message BoxCoder { 12 | oneof box_coder_oneof { 13 | FasterRcnnBoxCoder faster_rcnn_box_coder = 1; 14 | MeanStddevBoxCoder mean_stddev_box_coder = 2; 15 | SquareBoxCoder square_box_coder = 3; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /object_detection/protos/box_predictor.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | import "object_detection/protos/hyperparams.proto"; 6 | 7 | 8 | // Configuration proto for box predictor. See core/box_predictor.py for details. 9 | message BoxPredictor { 10 | oneof box_predictor_oneof { 11 | ConvolutionalBoxPredictor convolutional_box_predictor = 1; 12 | MaskRCNNBoxPredictor mask_rcnn_box_predictor = 2; 13 | RfcnBoxPredictor rfcn_box_predictor = 3; 14 | } 15 | } 16 | 17 | // Configuration proto for Convolutional box predictor. 18 | message ConvolutionalBoxPredictor { 19 | // Hyperparameters for convolution ops used in the box predictor. 20 | optional Hyperparams conv_hyperparams = 1; 21 | 22 | // Minumum feature depth prior to predicting box encodings and class 23 | // predictions. 24 | optional int32 min_depth = 2 [default = 0]; 25 | 26 | // Maximum feature depth prior to predicting box encodings and class 27 | // predictions. If max_depth is set to 0, no additional feature map will be 28 | // inserted before location and class predictions. 29 | optional int32 max_depth = 3 [default = 0]; 30 | 31 | // Number of the additional conv layers before the predictor. 32 | optional int32 num_layers_before_predictor = 4 [default = 0]; 33 | 34 | // Whether to use dropout for class prediction. 35 | optional bool use_dropout = 5 [default = true]; 36 | 37 | // Keep probability for dropout 38 | optional float dropout_keep_probability = 6 [default = 0.8]; 39 | 40 | // Size of final convolution kernel. If the spatial resolution of the feature 41 | // map is smaller than the kernel size, then the kernel size is set to 42 | // min(feature_width, feature_height). 43 | optional int32 kernel_size = 7 [default = 1]; 44 | 45 | // Size of the encoding for boxes. 46 | optional int32 box_code_size = 8 [default = 4]; 47 | 48 | // Whether to apply sigmoid to the output of class predictions. 49 | // TODO: Do we need this since we have a post processing module.? 50 | optional bool apply_sigmoid_to_scores = 9 [default = false]; 51 | } 52 | 53 | message MaskRCNNBoxPredictor { 54 | // Hyperparameters for fully connected ops used in the box predictor. 55 | optional Hyperparams fc_hyperparams = 1; 56 | 57 | // Whether to use dropout op prior to the both box and class predictions. 58 | optional bool use_dropout = 2 [default= false]; 59 | 60 | // Keep probability for dropout. This is only used if use_dropout is true. 61 | optional float dropout_keep_probability = 3 [default = 0.5]; 62 | 63 | // Size of the encoding for the boxes. 64 | optional int32 box_code_size = 4 [default = 4]; 65 | 66 | // Hyperparameters for convolution ops used in the box predictor. 67 | optional Hyperparams conv_hyperparams = 5; 68 | 69 | // Whether to predict instance masks inside detection boxes. 70 | optional bool predict_instance_masks = 6 [default = false]; 71 | 72 | // The depth for the first conv2d_transpose op applied to the 73 | // image_features in the mask prediciton branch 74 | optional int32 mask_prediction_conv_depth = 7 [default = 256]; 75 | 76 | // Whether to predict keypoints inside detection boxes. 77 | optional bool predict_keypoints = 8 [default = false]; 78 | } 79 | 80 | message RfcnBoxPredictor { 81 | // Hyperparameters for convolution ops used in the box predictor. 82 | optional Hyperparams conv_hyperparams = 1; 83 | 84 | // Bin sizes for RFCN crops. 85 | optional int32 num_spatial_bins_height = 2 [default = 3]; 86 | 87 | optional int32 num_spatial_bins_width = 3 [default = 3]; 88 | 89 | // Target depth to reduce the input image features to. 90 | optional int32 depth = 4 [default=1024]; 91 | 92 | // Size of the encoding for the boxes. 93 | optional int32 box_code_size = 5 [default = 4]; 94 | 95 | // Size to resize the rfcn crops to. 96 | optional int32 crop_height = 6 [default= 12]; 97 | 98 | optional int32 crop_width = 7 [default=12]; 99 | } 100 | -------------------------------------------------------------------------------- /object_detection/protos/eval.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | // Message for configuring DetectionModel evaluation jobs (eval.py). 6 | message EvalConfig { 7 | // Number of visualization images to generate. 8 | optional uint32 num_visualizations = 1 [default=10]; 9 | 10 | // Number of examples to process of evaluation. 11 | optional uint32 num_examples = 2 [default=5000]; 12 | 13 | // How often to run evaluation. 14 | optional uint32 eval_interval_secs = 3 [default=300]; 15 | 16 | // Maximum number of times to run evaluation. If set to 0, will run forever. 17 | optional uint32 max_evals = 4 [default=0]; 18 | 19 | // Whether the TensorFlow graph used for evaluation should be saved to disk. 20 | optional bool save_graph = 5 [default=false]; 21 | 22 | // Path to directory to store visualizations in. If empty, visualization 23 | // images are not exported (only shown on Tensorboard). 24 | optional string visualization_export_dir = 6 [default=""]; 25 | 26 | // BNS name of the TensorFlow master. 27 | optional string eval_master = 7 [default=""]; 28 | 29 | // Type of metrics to use for evaluation. Currently supports only Pascal VOC 30 | // detection metrics. 31 | optional string metrics_set = 8 [default="pascal_voc_metrics"]; 32 | 33 | // Path to export detections to COCO compatible JSON format. 34 | optional string export_path = 9 [default='']; 35 | 36 | // Option to not read groundtruth labels and only export detections to 37 | // COCO-compatible JSON file. 38 | optional bool ignore_groundtruth = 10 [default=false]; 39 | 40 | // Use exponential moving averages of variables for evaluation. 41 | // TODO: When this is false make sure the model is constructed 42 | // without moving averages in restore_fn. 43 | optional bool use_moving_averages = 11 [default=false]; 44 | 45 | // Whether to evaluate instance masks. 46 | optional bool eval_instance_masks = 12 [default=false]; 47 | } 48 | -------------------------------------------------------------------------------- /object_detection/protos/faster_rcnn_box_coder.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | // Configuration proto for FasterRCNNBoxCoder. See 6 | // box_coders/faster_rcnn_box_coder.py for details. 7 | message FasterRcnnBoxCoder { 8 | // Scale factor for anchor encoded box center. 9 | optional float y_scale = 1 [default = 10.0]; 10 | optional float x_scale = 2 [default = 10.0]; 11 | 12 | // Scale factor for anchor encoded box height. 13 | optional float height_scale = 3 [default = 5.0]; 14 | 15 | // Scale factor for anchor encoded box width. 16 | optional float width_scale = 4 [default = 5.0]; 17 | } 18 | -------------------------------------------------------------------------------- /object_detection/protos/faster_rcnn_box_coder_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: object_detection/protos/faster_rcnn_box_coder.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | 17 | 18 | DESCRIPTOR = _descriptor.FileDescriptor( 19 | name='object_detection/protos/faster_rcnn_box_coder.proto', 20 | package='object_detection.protos', 21 | serialized_pb=_b('\n3object_detection/protos/faster_rcnn_box_coder.proto\x12\x17object_detection.protos\"o\n\x12\x46\x61sterRcnnBoxCoder\x12\x13\n\x07y_scale\x18\x01 \x01(\x02:\x02\x31\x30\x12\x13\n\x07x_scale\x18\x02 \x01(\x02:\x02\x31\x30\x12\x17\n\x0cheight_scale\x18\x03 \x01(\x02:\x01\x35\x12\x16\n\x0bwidth_scale\x18\x04 \x01(\x02:\x01\x35') 22 | ) 23 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 24 | 25 | 26 | 27 | 28 | _FASTERRCNNBOXCODER = _descriptor.Descriptor( 29 | name='FasterRcnnBoxCoder', 30 | full_name='object_detection.protos.FasterRcnnBoxCoder', 31 | filename=None, 32 | file=DESCRIPTOR, 33 | containing_type=None, 34 | fields=[ 35 | _descriptor.FieldDescriptor( 36 | name='y_scale', full_name='object_detection.protos.FasterRcnnBoxCoder.y_scale', index=0, 37 | number=1, type=2, cpp_type=6, label=1, 38 | has_default_value=True, default_value=10, 39 | message_type=None, enum_type=None, containing_type=None, 40 | is_extension=False, extension_scope=None, 41 | options=None), 42 | _descriptor.FieldDescriptor( 43 | name='x_scale', full_name='object_detection.protos.FasterRcnnBoxCoder.x_scale', index=1, 44 | number=2, type=2, cpp_type=6, label=1, 45 | has_default_value=True, default_value=10, 46 | message_type=None, enum_type=None, containing_type=None, 47 | is_extension=False, extension_scope=None, 48 | options=None), 49 | _descriptor.FieldDescriptor( 50 | name='height_scale', full_name='object_detection.protos.FasterRcnnBoxCoder.height_scale', index=2, 51 | number=3, type=2, cpp_type=6, label=1, 52 | has_default_value=True, default_value=5, 53 | message_type=None, enum_type=None, containing_type=None, 54 | is_extension=False, extension_scope=None, 55 | options=None), 56 | _descriptor.FieldDescriptor( 57 | name='width_scale', full_name='object_detection.protos.FasterRcnnBoxCoder.width_scale', index=3, 58 | number=4, type=2, cpp_type=6, label=1, 59 | has_default_value=True, default_value=5, 60 | message_type=None, enum_type=None, containing_type=None, 61 | is_extension=False, extension_scope=None, 62 | options=None), 63 | ], 64 | extensions=[ 65 | ], 66 | nested_types=[], 67 | enum_types=[ 68 | ], 69 | options=None, 70 | is_extendable=False, 71 | extension_ranges=[], 72 | oneofs=[ 73 | ], 74 | serialized_start=80, 75 | serialized_end=191, 76 | ) 77 | 78 | DESCRIPTOR.message_types_by_name['FasterRcnnBoxCoder'] = _FASTERRCNNBOXCODER 79 | 80 | FasterRcnnBoxCoder = _reflection.GeneratedProtocolMessageType('FasterRcnnBoxCoder', (_message.Message,), dict( 81 | DESCRIPTOR = _FASTERRCNNBOXCODER, 82 | __module__ = 'object_detection.protos.faster_rcnn_box_coder_pb2' 83 | # @@protoc_insertion_point(class_scope:object_detection.protos.FasterRcnnBoxCoder) 84 | )) 85 | _sym_db.RegisterMessage(FasterRcnnBoxCoder) 86 | 87 | 88 | # @@protoc_insertion_point(module_scope) 89 | -------------------------------------------------------------------------------- /object_detection/protos/grid_anchor_generator.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | // Configuration proto for GridAnchorGenerator. See 6 | // anchor_generators/grid_anchor_generator.py for details. 7 | message GridAnchorGenerator { 8 | // Anchor height in pixels. 9 | optional int32 height = 1 [default = 256]; 10 | 11 | // Anchor width in pixels. 12 | optional int32 width = 2 [default = 256]; 13 | 14 | // Anchor stride in height dimension in pixels. 15 | optional int32 height_stride = 3 [default = 16]; 16 | 17 | // Anchor stride in width dimension in pixels. 18 | optional int32 width_stride = 4 [default = 16]; 19 | 20 | // Anchor height offset in pixels. 21 | optional int32 height_offset = 5 [default = 0]; 22 | 23 | // Anchor width offset in pixels. 24 | optional int32 width_offset = 6 [default = 0]; 25 | 26 | // At any given location, len(scales) * len(aspect_ratios) anchors are 27 | // generated with all possible combinations of scales and aspect ratios. 28 | 29 | // List of scales for the anchors. 30 | repeated float scales = 7; 31 | 32 | // List of aspect ratios for the anchors. 33 | repeated float aspect_ratios = 8; 34 | } 35 | -------------------------------------------------------------------------------- /object_detection/protos/hyperparams.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | // Configuration proto for the convolution op hyperparameters to use in the 6 | // object detection pipeline. 7 | message Hyperparams { 8 | 9 | // Operations affected by hyperparameters. 10 | enum Op { 11 | // Convolution, Separable Convolution, Convolution transpose. 12 | CONV = 1; 13 | 14 | // Fully connected 15 | FC = 2; 16 | } 17 | optional Op op = 1 [default = CONV]; 18 | 19 | // Regularizer for the weights of the convolution op. 20 | optional Regularizer regularizer = 2; 21 | 22 | // Initializer for the weights of the convolution op. 23 | optional Initializer initializer = 3; 24 | 25 | // Type of activation to apply after convolution. 26 | enum Activation { 27 | // Use None (no activation) 28 | NONE = 0; 29 | 30 | // Use tf.nn.relu 31 | RELU = 1; 32 | 33 | // Use tf.nn.relu6 34 | RELU_6 = 2; 35 | } 36 | optional Activation activation = 4 [default = RELU]; 37 | 38 | // BatchNorm hyperparameters. If this parameter is NOT set then BatchNorm is 39 | // not applied! 40 | optional BatchNorm batch_norm = 5; 41 | } 42 | 43 | // Proto with one-of field for regularizers. 44 | message Regularizer { 45 | oneof regularizer_oneof { 46 | L1Regularizer l1_regularizer = 1; 47 | L2Regularizer l2_regularizer = 2; 48 | } 49 | } 50 | 51 | // Configuration proto for L1 Regularizer. 52 | // See https://www.tensorflow.org/api_docs/python/tf/contrib/layers/l1_regularizer 53 | message L1Regularizer { 54 | optional float weight = 1 [default = 1.0]; 55 | } 56 | 57 | // Configuration proto for L2 Regularizer. 58 | // See https://www.tensorflow.org/api_docs/python/tf/contrib/layers/l2_regularizer 59 | message L2Regularizer { 60 | optional float weight = 1 [default = 1.0]; 61 | } 62 | 63 | // Proto with one-of field for initializers. 64 | message Initializer { 65 | oneof initializer_oneof { 66 | TruncatedNormalInitializer truncated_normal_initializer = 1; 67 | VarianceScalingInitializer variance_scaling_initializer = 2; 68 | } 69 | } 70 | 71 | // Configuration proto for truncated normal initializer. See 72 | // https://www.tensorflow.org/api_docs/python/tf/truncated_normal_initializer 73 | message TruncatedNormalInitializer { 74 | optional float mean = 1 [default = 0.0]; 75 | optional float stddev = 2 [default = 1.0]; 76 | } 77 | 78 | // Configuration proto for variance scaling initializer. See 79 | // https://www.tensorflow.org/api_docs/python/tf/contrib/layers/ 80 | // variance_scaling_initializer 81 | message VarianceScalingInitializer { 82 | optional float factor = 1 [default = 2.0]; 83 | optional bool uniform = 2 [default = false]; 84 | enum Mode { 85 | FAN_IN = 0; 86 | FAN_OUT = 1; 87 | FAN_AVG = 2; 88 | } 89 | optional Mode mode = 3 [default = FAN_IN]; 90 | } 91 | 92 | // Configuration proto for batch norm to apply after convolution op. See 93 | // https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm 94 | message BatchNorm { 95 | optional float decay = 1 [default = 0.999]; 96 | optional bool center = 2 [default = true]; 97 | optional bool scale = 3 [default = false]; 98 | optional float epsilon = 4 [default = 0.001]; 99 | // Whether to train the batch norm variables. If this is set to false during 100 | // training, the current value of the batch_norm variables are used for 101 | // forward pass but they are never updated. 102 | optional bool train = 5 [default = true]; 103 | } 104 | -------------------------------------------------------------------------------- /object_detection/protos/image_resizer.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | // Configuration proto for image resizing operations. 6 | // See builders/image_resizer_builder.py for details. 7 | message ImageResizer { 8 | oneof image_resizer_oneof { 9 | KeepAspectRatioResizer keep_aspect_ratio_resizer = 1; 10 | FixedShapeResizer fixed_shape_resizer = 2; 11 | } 12 | } 13 | 14 | 15 | // Configuration proto for image resizer that keeps aspect ratio. 16 | message KeepAspectRatioResizer { 17 | // Desired size of the smaller image dimension in pixels. 18 | optional int32 min_dimension = 1 [default = 600]; 19 | 20 | // Desired size of the larger image dimension in pixels. 21 | optional int32 max_dimension = 2 [default = 1024]; 22 | } 23 | 24 | 25 | // Configuration proto for image resizer that resizes to a fixed shape. 26 | message FixedShapeResizer { 27 | // Desired height of image in pixels. 28 | optional int32 height = 1 [default = 300]; 29 | 30 | // Desired width of image in pixels. 31 | optional int32 width = 2 [default = 300]; 32 | } 33 | -------------------------------------------------------------------------------- /object_detection/protos/input_reader.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | // Configuration proto for defining input readers that generate Object Detection 6 | // Examples from input sources. Input readers are expected to generate a 7 | // dictionary of tensors, with the following fields populated: 8 | // 9 | // 'image': an [image_height, image_width, channels] image tensor that detection 10 | // will be run on. 11 | // 'groundtruth_classes': a [num_boxes] int32 tensor storing the class 12 | // labels of detected boxes in the image. 13 | // 'groundtruth_boxes': a [num_boxes, 4] float tensor storing the coordinates of 14 | // detected boxes in the image. 15 | // 'groundtruth_instance_masks': (Optional), a [num_boxes, image_height, 16 | // image_width] float tensor storing binary mask of the objects in boxes. 17 | 18 | message InputReader { 19 | // Path to StringIntLabelMap pbtxt file specifying the mapping from string 20 | // labels to integer ids. 21 | optional string label_map_path = 1 [default=""]; 22 | 23 | // Whether data should be processed in the order they are read in, or 24 | // shuffled randomly. 25 | optional bool shuffle = 2 [default=true]; 26 | 27 | // Maximum number of records to keep in reader queue. 28 | optional uint32 queue_capacity = 3 [default=2000]; 29 | 30 | // Minimum number of records to keep in reader queue. A large value is needed 31 | // to generate a good random shuffle. 32 | optional uint32 min_after_dequeue = 4 [default=1000]; 33 | 34 | // The number of times a data source is read. If set to zero, the data source 35 | // will be reused indefinitely. 36 | optional uint32 num_epochs = 5 [default=0]; 37 | 38 | // Number of reader instances to create. 39 | optional uint32 num_readers = 6 [default=8]; 40 | 41 | // Whether to load groundtruth instance masks. 42 | optional bool load_instance_masks = 7 [default = false]; 43 | 44 | oneof input_reader { 45 | TFRecordInputReader tf_record_input_reader = 8; 46 | ExternalInputReader external_input_reader = 9; 47 | } 48 | } 49 | 50 | // An input reader that reads TF Example protos from local TFRecord files. 51 | message TFRecordInputReader { 52 | // Path to TFRecordFile. 53 | optional string input_path = 1 [default=""]; 54 | } 55 | 56 | // An externally defined input reader. Users may define an extension to this 57 | // proto to interface their own input readers. 58 | message ExternalInputReader { 59 | extensions 1 to 999; 60 | } 61 | -------------------------------------------------------------------------------- /object_detection/protos/matcher.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | import "object_detection/protos/argmax_matcher.proto"; 6 | import "object_detection/protos/bipartite_matcher.proto"; 7 | 8 | // Configuration proto for the matcher to be used in the object detection 9 | // pipeline. See core/matcher.py for details. 10 | message Matcher { 11 | oneof matcher_oneof { 12 | ArgMaxMatcher argmax_matcher = 1; 13 | BipartiteMatcher bipartite_matcher = 2; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /object_detection/protos/matcher_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: object_detection/protos/matcher.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | import object_detection.protos.argmax_matcher_pb2 17 | import object_detection.protos.bipartite_matcher_pb2 18 | 19 | 20 | DESCRIPTOR = _descriptor.FileDescriptor( 21 | name='object_detection/protos/matcher.proto', 22 | package='object_detection.protos', 23 | serialized_pb=_b('\n%object_detection/protos/matcher.proto\x12\x17object_detection.protos\x1a,object_detection/protos/argmax_matcher.proto\x1a/object_detection/protos/bipartite_matcher.proto\"\xa4\x01\n\x07Matcher\x12@\n\x0e\x61rgmax_matcher\x18\x01 \x01(\x0b\x32&.object_detection.protos.ArgMaxMatcherH\x00\x12\x46\n\x11\x62ipartite_matcher\x18\x02 \x01(\x0b\x32).object_detection.protos.BipartiteMatcherH\x00\x42\x0f\n\rmatcher_oneof') 24 | , 25 | dependencies=[object_detection.protos.argmax_matcher_pb2.DESCRIPTOR,object_detection.protos.bipartite_matcher_pb2.DESCRIPTOR,]) 26 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 27 | 28 | 29 | 30 | 31 | _MATCHER = _descriptor.Descriptor( 32 | name='Matcher', 33 | full_name='object_detection.protos.Matcher', 34 | filename=None, 35 | file=DESCRIPTOR, 36 | containing_type=None, 37 | fields=[ 38 | _descriptor.FieldDescriptor( 39 | name='argmax_matcher', full_name='object_detection.protos.Matcher.argmax_matcher', index=0, 40 | number=1, type=11, cpp_type=10, label=1, 41 | has_default_value=False, default_value=None, 42 | message_type=None, enum_type=None, containing_type=None, 43 | is_extension=False, extension_scope=None, 44 | options=None), 45 | _descriptor.FieldDescriptor( 46 | name='bipartite_matcher', full_name='object_detection.protos.Matcher.bipartite_matcher', index=1, 47 | number=2, type=11, cpp_type=10, label=1, 48 | has_default_value=False, default_value=None, 49 | message_type=None, enum_type=None, containing_type=None, 50 | is_extension=False, extension_scope=None, 51 | options=None), 52 | ], 53 | extensions=[ 54 | ], 55 | nested_types=[], 56 | enum_types=[ 57 | ], 58 | options=None, 59 | is_extendable=False, 60 | extension_ranges=[], 61 | oneofs=[ 62 | _descriptor.OneofDescriptor( 63 | name='matcher_oneof', full_name='object_detection.protos.Matcher.matcher_oneof', 64 | index=0, containing_type=None, fields=[]), 65 | ], 66 | serialized_start=162, 67 | serialized_end=326, 68 | ) 69 | 70 | _MATCHER.fields_by_name['argmax_matcher'].message_type = object_detection.protos.argmax_matcher_pb2._ARGMAXMATCHER 71 | _MATCHER.fields_by_name['bipartite_matcher'].message_type = object_detection.protos.bipartite_matcher_pb2._BIPARTITEMATCHER 72 | _MATCHER.oneofs_by_name['matcher_oneof'].fields.append( 73 | _MATCHER.fields_by_name['argmax_matcher']) 74 | _MATCHER.fields_by_name['argmax_matcher'].containing_oneof = _MATCHER.oneofs_by_name['matcher_oneof'] 75 | _MATCHER.oneofs_by_name['matcher_oneof'].fields.append( 76 | _MATCHER.fields_by_name['bipartite_matcher']) 77 | _MATCHER.fields_by_name['bipartite_matcher'].containing_oneof = _MATCHER.oneofs_by_name['matcher_oneof'] 78 | DESCRIPTOR.message_types_by_name['Matcher'] = _MATCHER 79 | 80 | Matcher = _reflection.GeneratedProtocolMessageType('Matcher', (_message.Message,), dict( 81 | DESCRIPTOR = _MATCHER, 82 | __module__ = 'object_detection.protos.matcher_pb2' 83 | # @@protoc_insertion_point(class_scope:object_detection.protos.Matcher) 84 | )) 85 | _sym_db.RegisterMessage(Matcher) 86 | 87 | 88 | # @@protoc_insertion_point(module_scope) 89 | -------------------------------------------------------------------------------- /object_detection/protos/mean_stddev_box_coder.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | // Configuration proto for MeanStddevBoxCoder. See 6 | // box_coders/mean_stddev_box_coder.py for details. 7 | message MeanStddevBoxCoder { 8 | } 9 | -------------------------------------------------------------------------------- /object_detection/protos/mean_stddev_box_coder_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: object_detection/protos/mean_stddev_box_coder.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | 17 | 18 | DESCRIPTOR = _descriptor.FileDescriptor( 19 | name='object_detection/protos/mean_stddev_box_coder.proto', 20 | package='object_detection.protos', 21 | serialized_pb=_b('\n3object_detection/protos/mean_stddev_box_coder.proto\x12\x17object_detection.protos\"\x14\n\x12MeanStddevBoxCoder') 22 | ) 23 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 24 | 25 | 26 | 27 | 28 | _MEANSTDDEVBOXCODER = _descriptor.Descriptor( 29 | name='MeanStddevBoxCoder', 30 | full_name='object_detection.protos.MeanStddevBoxCoder', 31 | filename=None, 32 | file=DESCRIPTOR, 33 | containing_type=None, 34 | fields=[ 35 | ], 36 | extensions=[ 37 | ], 38 | nested_types=[], 39 | enum_types=[ 40 | ], 41 | options=None, 42 | is_extendable=False, 43 | extension_ranges=[], 44 | oneofs=[ 45 | ], 46 | serialized_start=80, 47 | serialized_end=100, 48 | ) 49 | 50 | DESCRIPTOR.message_types_by_name['MeanStddevBoxCoder'] = _MEANSTDDEVBOXCODER 51 | 52 | MeanStddevBoxCoder = _reflection.GeneratedProtocolMessageType('MeanStddevBoxCoder', (_message.Message,), dict( 53 | DESCRIPTOR = _MEANSTDDEVBOXCODER, 54 | __module__ = 'object_detection.protos.mean_stddev_box_coder_pb2' 55 | # @@protoc_insertion_point(class_scope:object_detection.protos.MeanStddevBoxCoder) 56 | )) 57 | _sym_db.RegisterMessage(MeanStddevBoxCoder) 58 | 59 | 60 | # @@protoc_insertion_point(module_scope) 61 | -------------------------------------------------------------------------------- /object_detection/protos/model.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | import "object_detection/protos/faster_rcnn.proto"; 6 | import "object_detection/protos/ssd.proto"; 7 | 8 | // Top level configuration for DetectionModels. 9 | message DetectionModel { 10 | oneof model { 11 | FasterRcnn faster_rcnn = 1; 12 | Ssd ssd = 2; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /object_detection/protos/model_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: object_detection/protos/model.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | import object_detection.protos.faster_rcnn_pb2 17 | import object_detection.protos.ssd_pb2 18 | 19 | 20 | DESCRIPTOR = _descriptor.FileDescriptor( 21 | name='object_detection/protos/model.proto', 22 | package='object_detection.protos', 23 | serialized_pb=_b('\n#object_detection/protos/model.proto\x12\x17object_detection.protos\x1a)object_detection/protos/faster_rcnn.proto\x1a!object_detection/protos/ssd.proto\"\x82\x01\n\x0e\x44\x65tectionModel\x12:\n\x0b\x66\x61ster_rcnn\x18\x01 \x01(\x0b\x32#.object_detection.protos.FasterRcnnH\x00\x12+\n\x03ssd\x18\x02 \x01(\x0b\x32\x1c.object_detection.protos.SsdH\x00\x42\x07\n\x05model') 24 | , 25 | dependencies=[object_detection.protos.faster_rcnn_pb2.DESCRIPTOR,object_detection.protos.ssd_pb2.DESCRIPTOR,]) 26 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 27 | 28 | 29 | 30 | 31 | _DETECTIONMODEL = _descriptor.Descriptor( 32 | name='DetectionModel', 33 | full_name='object_detection.protos.DetectionModel', 34 | filename=None, 35 | file=DESCRIPTOR, 36 | containing_type=None, 37 | fields=[ 38 | _descriptor.FieldDescriptor( 39 | name='faster_rcnn', full_name='object_detection.protos.DetectionModel.faster_rcnn', index=0, 40 | number=1, type=11, cpp_type=10, label=1, 41 | has_default_value=False, default_value=None, 42 | message_type=None, enum_type=None, containing_type=None, 43 | is_extension=False, extension_scope=None, 44 | options=None), 45 | _descriptor.FieldDescriptor( 46 | name='ssd', full_name='object_detection.protos.DetectionModel.ssd', index=1, 47 | number=2, type=11, cpp_type=10, label=1, 48 | has_default_value=False, default_value=None, 49 | message_type=None, enum_type=None, containing_type=None, 50 | is_extension=False, extension_scope=None, 51 | options=None), 52 | ], 53 | extensions=[ 54 | ], 55 | nested_types=[], 56 | enum_types=[ 57 | ], 58 | options=None, 59 | is_extendable=False, 60 | extension_ranges=[], 61 | oneofs=[ 62 | _descriptor.OneofDescriptor( 63 | name='model', full_name='object_detection.protos.DetectionModel.model', 64 | index=0, containing_type=None, fields=[]), 65 | ], 66 | serialized_start=143, 67 | serialized_end=273, 68 | ) 69 | 70 | _DETECTIONMODEL.fields_by_name['faster_rcnn'].message_type = object_detection.protos.faster_rcnn_pb2._FASTERRCNN 71 | _DETECTIONMODEL.fields_by_name['ssd'].message_type = object_detection.protos.ssd_pb2._SSD 72 | _DETECTIONMODEL.oneofs_by_name['model'].fields.append( 73 | _DETECTIONMODEL.fields_by_name['faster_rcnn']) 74 | _DETECTIONMODEL.fields_by_name['faster_rcnn'].containing_oneof = _DETECTIONMODEL.oneofs_by_name['model'] 75 | _DETECTIONMODEL.oneofs_by_name['model'].fields.append( 76 | _DETECTIONMODEL.fields_by_name['ssd']) 77 | _DETECTIONMODEL.fields_by_name['ssd'].containing_oneof = _DETECTIONMODEL.oneofs_by_name['model'] 78 | DESCRIPTOR.message_types_by_name['DetectionModel'] = _DETECTIONMODEL 79 | 80 | DetectionModel = _reflection.GeneratedProtocolMessageType('DetectionModel', (_message.Message,), dict( 81 | DESCRIPTOR = _DETECTIONMODEL, 82 | __module__ = 'object_detection.protos.model_pb2' 83 | # @@protoc_insertion_point(class_scope:object_detection.protos.DetectionModel) 84 | )) 85 | _sym_db.RegisterMessage(DetectionModel) 86 | 87 | 88 | # @@protoc_insertion_point(module_scope) 89 | -------------------------------------------------------------------------------- /object_detection/protos/optimizer.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | // Messages for configuring the optimizing strategy for training object 6 | // detection models. 7 | 8 | // Top level optimizer message. 9 | message Optimizer { 10 | oneof optimizer { 11 | RMSPropOptimizer rms_prop_optimizer = 1; 12 | MomentumOptimizer momentum_optimizer = 2; 13 | AdamOptimizer adam_optimizer = 3; 14 | } 15 | optional bool use_moving_average = 4 [default=true]; 16 | optional float moving_average_decay = 5 [default=0.9999]; 17 | } 18 | 19 | // Configuration message for the RMSPropOptimizer 20 | // See: https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer 21 | message RMSPropOptimizer { 22 | optional LearningRate learning_rate = 1; 23 | optional float momentum_optimizer_value = 2 [default=0.9]; 24 | optional float decay = 3 [default=0.9]; 25 | optional float epsilon = 4 [default=1.0]; 26 | } 27 | 28 | // Configuration message for the MomentumOptimizer 29 | // See: https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer 30 | message MomentumOptimizer { 31 | optional LearningRate learning_rate = 1; 32 | optional float momentum_optimizer_value = 2 [default=0.9]; 33 | } 34 | 35 | // Configuration message for the AdamOptimizer 36 | // See: https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer 37 | message AdamOptimizer { 38 | optional LearningRate learning_rate = 1; 39 | } 40 | 41 | // Configuration message for optimizer learning rate. 42 | message LearningRate { 43 | oneof learning_rate { 44 | ConstantLearningRate constant_learning_rate = 1; 45 | ExponentialDecayLearningRate exponential_decay_learning_rate = 2; 46 | ManualStepLearningRate manual_step_learning_rate = 3; 47 | } 48 | } 49 | 50 | // Configuration message for a constant learning rate. 51 | message ConstantLearningRate { 52 | optional float learning_rate = 1 [default=0.002]; 53 | } 54 | 55 | // Configuration message for an exponentially decaying learning rate. 56 | // See https://www.tensorflow.org/versions/master/api_docs/python/train/ \ 57 | // decaying_the_learning_rate#exponential_decay 58 | message ExponentialDecayLearningRate { 59 | optional float initial_learning_rate = 1 [default=0.002]; 60 | optional uint32 decay_steps = 2 [default=4000000]; 61 | optional float decay_factor = 3 [default=0.95]; 62 | optional bool staircase = 4 [default=true]; 63 | } 64 | 65 | // Configuration message for a manually defined learning rate schedule. 66 | message ManualStepLearningRate { 67 | optional float initial_learning_rate = 1 [default=0.002]; 68 | message LearningRateSchedule { 69 | optional uint32 step = 1; 70 | optional float learning_rate = 2 [default=0.002]; 71 | } 72 | repeated LearningRateSchedule schedule = 2; 73 | } 74 | -------------------------------------------------------------------------------- /object_detection/protos/pipeline.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | import "object_detection/protos/eval.proto"; 6 | import "object_detection/protos/input_reader.proto"; 7 | import "object_detection/protos/model.proto"; 8 | import "object_detection/protos/train.proto"; 9 | 10 | // Convenience message for configuring a training and eval pipeline. Allows all 11 | // of the pipeline parameters to be configured from one file. 12 | message TrainEvalPipelineConfig { 13 | optional DetectionModel model = 1; 14 | optional TrainConfig train_config = 2; 15 | optional InputReader train_input_reader = 3; 16 | optional EvalConfig eval_config = 4; 17 | optional InputReader eval_input_reader = 5; 18 | } 19 | -------------------------------------------------------------------------------- /object_detection/protos/post_processing.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | // Configuration proto for non-max-suppression operation on a batch of 6 | // detections. 7 | message BatchNonMaxSuppression { 8 | // Scalar threshold for score (low scoring boxes are removed). 9 | optional float score_threshold = 1 [default = 0.0]; 10 | 11 | // Scalar threshold for IOU (boxes that have high IOU overlap 12 | // with previously selected boxes are removed). 13 | optional float iou_threshold = 2 [default = 0.6]; 14 | 15 | // Maximum number of detections to retain per class. 16 | optional int32 max_detections_per_class = 3 [default = 100]; 17 | 18 | // Maximum number of detections to retain across all classes. 19 | optional int32 max_total_detections = 5 [default = 100]; 20 | } 21 | 22 | // Configuration proto for post-processing predicted boxes and 23 | // scores. 24 | message PostProcessing { 25 | // Non max suppression parameters. 26 | optional BatchNonMaxSuppression batch_non_max_suppression = 1; 27 | 28 | // Enum to specify how to convert the detection scores. 29 | enum ScoreConverter { 30 | // Input scores equals output scores. 31 | IDENTITY = 0; 32 | 33 | // Applies a sigmoid on input scores. 34 | SIGMOID = 1; 35 | 36 | // Applies a softmax on input scores 37 | SOFTMAX = 2; 38 | } 39 | 40 | // Score converter to use. 41 | optional ScoreConverter score_converter = 2 [default = IDENTITY]; 42 | } 43 | -------------------------------------------------------------------------------- /object_detection/protos/region_similarity_calculator.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | // Configuration proto for region similarity calculators. See 6 | // core/region_similarity_calculator.py for details. 7 | message RegionSimilarityCalculator { 8 | oneof region_similarity { 9 | NegSqDistSimilarity neg_sq_dist_similarity = 1; 10 | IouSimilarity iou_similarity = 2; 11 | IoaSimilarity ioa_similarity = 3; 12 | } 13 | } 14 | 15 | // Configuration for negative squared distance similarity calculator. 16 | message NegSqDistSimilarity { 17 | } 18 | 19 | // Configuration for intersection-over-union (IOU) similarity calculator. 20 | message IouSimilarity { 21 | } 22 | 23 | // Configuration for intersection-over-area (IOA) similarity calculator. 24 | message IoaSimilarity { 25 | } 26 | -------------------------------------------------------------------------------- /object_detection/protos/square_box_coder.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | // Configuration proto for SquareBoxCoder. See 6 | // box_coders/square_box_coder.py for details. 7 | message SquareBoxCoder { 8 | // Scale factor for anchor encoded box center. 9 | optional float y_scale = 1 [default = 10.0]; 10 | optional float x_scale = 2 [default = 10.0]; 11 | 12 | // Scale factor for anchor encoded box length. 13 | optional float length_scale = 3 [default = 5.0]; 14 | } 15 | -------------------------------------------------------------------------------- /object_detection/protos/square_box_coder_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: object_detection/protos/square_box_coder.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | 17 | 18 | DESCRIPTOR = _descriptor.FileDescriptor( 19 | name='object_detection/protos/square_box_coder.proto', 20 | package='object_detection.protos', 21 | serialized_pb=_b('\n.object_detection/protos/square_box_coder.proto\x12\x17object_detection.protos\"S\n\x0eSquareBoxCoder\x12\x13\n\x07y_scale\x18\x01 \x01(\x02:\x02\x31\x30\x12\x13\n\x07x_scale\x18\x02 \x01(\x02:\x02\x31\x30\x12\x17\n\x0clength_scale\x18\x03 \x01(\x02:\x01\x35') 22 | ) 23 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 24 | 25 | 26 | 27 | 28 | _SQUAREBOXCODER = _descriptor.Descriptor( 29 | name='SquareBoxCoder', 30 | full_name='object_detection.protos.SquareBoxCoder', 31 | filename=None, 32 | file=DESCRIPTOR, 33 | containing_type=None, 34 | fields=[ 35 | _descriptor.FieldDescriptor( 36 | name='y_scale', full_name='object_detection.protos.SquareBoxCoder.y_scale', index=0, 37 | number=1, type=2, cpp_type=6, label=1, 38 | has_default_value=True, default_value=10, 39 | message_type=None, enum_type=None, containing_type=None, 40 | is_extension=False, extension_scope=None, 41 | options=None), 42 | _descriptor.FieldDescriptor( 43 | name='x_scale', full_name='object_detection.protos.SquareBoxCoder.x_scale', index=1, 44 | number=2, type=2, cpp_type=6, label=1, 45 | has_default_value=True, default_value=10, 46 | message_type=None, enum_type=None, containing_type=None, 47 | is_extension=False, extension_scope=None, 48 | options=None), 49 | _descriptor.FieldDescriptor( 50 | name='length_scale', full_name='object_detection.protos.SquareBoxCoder.length_scale', index=2, 51 | number=3, type=2, cpp_type=6, label=1, 52 | has_default_value=True, default_value=5, 53 | message_type=None, enum_type=None, containing_type=None, 54 | is_extension=False, extension_scope=None, 55 | options=None), 56 | ], 57 | extensions=[ 58 | ], 59 | nested_types=[], 60 | enum_types=[ 61 | ], 62 | options=None, 63 | is_extendable=False, 64 | extension_ranges=[], 65 | oneofs=[ 66 | ], 67 | serialized_start=75, 68 | serialized_end=158, 69 | ) 70 | 71 | DESCRIPTOR.message_types_by_name['SquareBoxCoder'] = _SQUAREBOXCODER 72 | 73 | SquareBoxCoder = _reflection.GeneratedProtocolMessageType('SquareBoxCoder', (_message.Message,), dict( 74 | DESCRIPTOR = _SQUAREBOXCODER, 75 | __module__ = 'object_detection.protos.square_box_coder_pb2' 76 | # @@protoc_insertion_point(class_scope:object_detection.protos.SquareBoxCoder) 77 | )) 78 | _sym_db.RegisterMessage(SquareBoxCoder) 79 | 80 | 81 | # @@protoc_insertion_point(module_scope) 82 | -------------------------------------------------------------------------------- /object_detection/protos/ssd.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | package object_detection.protos; 3 | 4 | import "object_detection/protos/anchor_generator.proto"; 5 | import "object_detection/protos/box_coder.proto"; 6 | import "object_detection/protos/box_predictor.proto"; 7 | import "object_detection/protos/hyperparams.proto"; 8 | import "object_detection/protos/image_resizer.proto"; 9 | import "object_detection/protos/matcher.proto"; 10 | import "object_detection/protos/losses.proto"; 11 | import "object_detection/protos/post_processing.proto"; 12 | import "object_detection/protos/region_similarity_calculator.proto"; 13 | 14 | // Configuration for Single Shot Detection (SSD) models. 15 | message Ssd { 16 | 17 | // Number of classes to predict. 18 | optional int32 num_classes = 1; 19 | 20 | // Image resizer for preprocessing the input image. 21 | optional ImageResizer image_resizer = 2; 22 | 23 | // Feature extractor config. 24 | optional SsdFeatureExtractor feature_extractor = 3; 25 | 26 | // Box coder to encode the boxes. 27 | optional BoxCoder box_coder = 4; 28 | 29 | // Matcher to match groundtruth with anchors. 30 | optional Matcher matcher = 5; 31 | 32 | // Region similarity calculator to compute similarity of boxes. 33 | optional RegionSimilarityCalculator similarity_calculator = 6; 34 | 35 | // Box predictor to attach to the features. 36 | optional BoxPredictor box_predictor = 7; 37 | 38 | // Anchor generator to compute anchors. 39 | optional AnchorGenerator anchor_generator = 8; 40 | 41 | // Post processing to apply on the predictions. 42 | optional PostProcessing post_processing = 9; 43 | 44 | // Whether to normalize the loss by number of groundtruth boxes that match to 45 | // the anchors. 46 | optional bool normalize_loss_by_num_matches = 10 [default=true]; 47 | 48 | // Loss configuration for training. 49 | optional Loss loss = 11; 50 | } 51 | 52 | 53 | message SsdFeatureExtractor { 54 | // Type of ssd feature extractor. 55 | optional string type = 1; 56 | 57 | // The factor to alter the depth of the channels in the feature extractor. 58 | optional float depth_multiplier = 2 [default=1.0]; 59 | 60 | // Minimum number of the channels in the feature extractor. 61 | optional int32 min_depth = 3 [default=16]; 62 | 63 | // Hyperparameters for the feature extractor. 64 | optional Hyperparams conv_hyperparams = 4; 65 | } 66 | -------------------------------------------------------------------------------- /object_detection/protos/ssd_anchor_generator.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | // Configuration proto for SSD anchor generator described in 6 | // https://arxiv.org/abs/1512.02325. See 7 | // anchor_generators/multiple_grid_anchor_generator.py for details. 8 | message SsdAnchorGenerator { 9 | // Number of grid layers to create anchors for. 10 | optional int32 num_layers = 1 [default = 6]; 11 | 12 | // Scale of anchors corresponding to finest resolution. 13 | optional float min_scale = 2 [default = 0.2]; 14 | 15 | // Scale of anchors corresponding to coarsest resolution 16 | optional float max_scale = 3 [default = 0.95]; 17 | 18 | // Aspect ratios for anchors at each grid point. 19 | repeated float aspect_ratios = 4; 20 | 21 | // Whether to use the following aspect ratio and scale combination for the 22 | // layer with the finest resolution : (scale=0.1, aspect_ratio=1.0), 23 | // (scale=min_scale, aspect_ration=2.0), (scale=min_scale, aspect_ratio=0.5). 24 | optional bool reduce_boxes_in_lowest_layer = 5 [default = true]; 25 | } 26 | -------------------------------------------------------------------------------- /object_detection/protos/string_int_label_map.proto: -------------------------------------------------------------------------------- 1 | // Message to store the mapping from class label strings to class id. Datasets 2 | // use string labels to represent classes while the object detection framework 3 | // works with class ids. This message maps them so they can be converted back 4 | // and forth as needed. 5 | syntax = "proto2"; 6 | 7 | package object_detection.protos; 8 | 9 | message StringIntLabelMapItem { 10 | // String name. The most common practice is to set this to a MID or synsets 11 | // id. 12 | optional string name = 1; 13 | 14 | // Integer id that maps to the string name above. Label ids should start from 15 | // 1. 16 | optional int32 id = 2; 17 | 18 | // Human readable string label. 19 | optional string display_name = 3; 20 | }; 21 | 22 | message StringIntLabelMap { 23 | repeated StringIntLabelMapItem item = 1; 24 | }; 25 | -------------------------------------------------------------------------------- /object_detection/protos/train.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package object_detection.protos; 4 | 5 | import "object_detection/protos/optimizer.proto"; 6 | import "object_detection/protos/preprocessor.proto"; 7 | 8 | // Message for configuring DetectionModel training jobs (train.py). 9 | message TrainConfig { 10 | // Input queue batch size. 11 | optional uint32 batch_size = 1 [default=32]; 12 | 13 | // Data augmentation options. 14 | repeated PreprocessingStep data_augmentation_options = 2; 15 | 16 | // Whether to synchronize replicas during training. 17 | optional bool sync_replicas = 3 [default=false]; 18 | 19 | // How frequently to keep checkpoints. 20 | optional uint32 keep_checkpoint_every_n_hours = 4 [default=1000]; 21 | 22 | // Optimizer used to train the DetectionModel. 23 | optional Optimizer optimizer = 5; 24 | 25 | // If greater than 0, clips gradients by this value. 26 | optional float gradient_clipping_by_norm = 6 [default=0.0]; 27 | 28 | // Checkpoint to restore variables from. Typically used to load feature 29 | // extractor variables trained outside of object detection. 30 | optional string fine_tune_checkpoint = 7 [default=""]; 31 | 32 | // Specifies if the finetune checkpoint is from an object detection model. 33 | // If from an object detection model, the model being trained should have 34 | // the same parameters with the exception of the num_classes parameter. 35 | // If false, it assumes the checkpoint was a object classification model. 36 | optional bool from_detection_checkpoint = 8 [default=false]; 37 | 38 | // Number of steps to train the DetectionModel for. If 0, will train the model 39 | // indefinitely. 40 | optional uint32 num_steps = 9 [default=0]; 41 | 42 | // Number of training steps between replica startup. 43 | // This flag must be set to 0 if sync_replicas is set to true. 44 | optional float startup_delay_steps = 10 [default=15]; 45 | 46 | // If greater than 0, multiplies the gradient of bias variables by this 47 | // amount. 48 | optional float bias_grad_multiplier = 11 [default=0]; 49 | 50 | // Variables that should not be updated during training. 51 | repeated string freeze_variables = 12; 52 | 53 | // Number of replicas to aggregate before making parameter updates. 54 | optional int32 replicas_to_aggregate = 13 [default=1]; 55 | 56 | // Maximum number of elements to store within a queue. 57 | optional int32 batch_queue_capacity = 14 [default=600]; 58 | 59 | // Number of threads to use for batching. 60 | optional int32 num_batch_queue_threads = 15 [default=8]; 61 | 62 | // Maximum capacity of the queue used to prefetch assembled batches. 63 | optional int32 prefetch_queue_capacity = 16 [default=10]; 64 | } 65 | -------------------------------------------------------------------------------- /object_detection/samples/cloud/cloud.yml: -------------------------------------------------------------------------------- 1 | trainingInput: 2 | runtimeVersion: "1.0" 3 | scaleTier: CUSTOM 4 | masterType: standard_gpu 5 | workerCount: 5 6 | workerType: standard_gpu 7 | parameterServerCount: 3 8 | parameterServerType: standard 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_pets.config: -------------------------------------------------------------------------------- 1 | # Faster R-CNN with Inception Resnet v2, Atrous version; 2 | # Configured for Oxford-IIT Pets Dataset. 3 | # Users should configure the fine_tune_checkpoint field in the train config as 4 | # well as the label_map_path and input_path fields in the train_input_reader and 5 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that 6 | # should be configured. 7 | 8 | model { 9 | faster_rcnn { 10 | num_classes: 37 11 | image_resizer { 12 | keep_aspect_ratio_resizer { 13 | min_dimension: 600 14 | max_dimension: 1024 15 | } 16 | } 17 | feature_extractor { 18 | type: 'faster_rcnn_inception_resnet_v2' 19 | first_stage_features_stride: 8 20 | } 21 | first_stage_anchor_generator { 22 | grid_anchor_generator { 23 | scales: [0.25, 0.5, 1.0, 2.0] 24 | aspect_ratios: [0.5, 1.0, 2.0] 25 | height_stride: 8 26 | width_stride: 8 27 | } 28 | } 29 | first_stage_atrous_rate: 2 30 | first_stage_box_predictor_conv_hyperparams { 31 | op: CONV 32 | regularizer { 33 | l2_regularizer { 34 | weight: 0.0 35 | } 36 | } 37 | initializer { 38 | truncated_normal_initializer { 39 | stddev: 0.01 40 | } 41 | } 42 | } 43 | first_stage_nms_score_threshold: 0.0 44 | first_stage_nms_iou_threshold: 0.7 45 | first_stage_max_proposals: 300 46 | first_stage_localization_loss_weight: 2.0 47 | first_stage_objectness_loss_weight: 1.0 48 | initial_crop_size: 17 49 | maxpool_kernel_size: 1 50 | maxpool_stride: 1 51 | second_stage_box_predictor { 52 | mask_rcnn_box_predictor { 53 | use_dropout: false 54 | dropout_keep_probability: 1.0 55 | fc_hyperparams { 56 | op: FC 57 | regularizer { 58 | l2_regularizer { 59 | weight: 0.0 60 | } 61 | } 62 | initializer { 63 | variance_scaling_initializer { 64 | factor: 1.0 65 | uniform: true 66 | mode: FAN_AVG 67 | } 68 | } 69 | } 70 | } 71 | } 72 | second_stage_post_processing { 73 | batch_non_max_suppression { 74 | score_threshold: 0.0 75 | iou_threshold: 0.6 76 | max_detections_per_class: 100 77 | max_total_detections: 100 78 | } 79 | score_converter: SOFTMAX 80 | } 81 | second_stage_localization_loss_weight: 2.0 82 | second_stage_classification_loss_weight: 1.0 83 | } 84 | } 85 | 86 | train_config: { 87 | batch_size: 1 88 | optimizer { 89 | momentum_optimizer: { 90 | learning_rate: { 91 | manual_step_learning_rate { 92 | initial_learning_rate: 0.0003 93 | schedule { 94 | step: 0 95 | learning_rate: .0003 96 | } 97 | schedule { 98 | step: 900000 99 | learning_rate: .00003 100 | } 101 | schedule { 102 | step: 1200000 103 | learning_rate: .000003 104 | } 105 | } 106 | } 107 | momentum_optimizer_value: 0.9 108 | } 109 | use_moving_average: false 110 | } 111 | gradient_clipping_by_norm: 10.0 112 | fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" 113 | from_detection_checkpoint: true 114 | data_augmentation_options { 115 | random_horizontal_flip { 116 | } 117 | } 118 | } 119 | 120 | train_input_reader: { 121 | tf_record_input_reader { 122 | input_path: "PATH_TO_BE_CONFIGURED/pet_train.record" 123 | } 124 | label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" 125 | } 126 | 127 | eval_config: { 128 | num_examples: 2000 129 | } 130 | 131 | eval_input_reader: { 132 | tf_record_input_reader { 133 | input_path: "PATH_TO_BE_CONFIGURED/pet_val.record" 134 | } 135 | label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" 136 | } 137 | -------------------------------------------------------------------------------- /object_detection/samples/configs/faster_rcnn_resnet101_pets.config: -------------------------------------------------------------------------------- 1 | # Faster R-CNN with Resnet-101 (v1) configured for the Oxford-IIT Pet Dataset. 2 | # Users should configure the fine_tune_checkpoint field in the train config as 3 | # well as the label_map_path and input_path fields in the train_input_reader and 4 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that 5 | # should be configured. 6 | 7 | model { 8 | faster_rcnn { 9 | num_classes: 37 10 | image_resizer { 11 | keep_aspect_ratio_resizer { 12 | min_dimension: 600 13 | max_dimension: 1024 14 | } 15 | } 16 | feature_extractor { 17 | type: 'faster_rcnn_resnet101' 18 | first_stage_features_stride: 16 19 | } 20 | first_stage_anchor_generator { 21 | grid_anchor_generator { 22 | scales: [0.25, 0.5, 1.0, 2.0] 23 | aspect_ratios: [0.5, 1.0, 2.0] 24 | height_stride: 16 25 | width_stride: 16 26 | } 27 | } 28 | first_stage_box_predictor_conv_hyperparams { 29 | op: CONV 30 | regularizer { 31 | l2_regularizer { 32 | weight: 0.0 33 | } 34 | } 35 | initializer { 36 | truncated_normal_initializer { 37 | stddev: 0.01 38 | } 39 | } 40 | } 41 | first_stage_nms_score_threshold: 0.0 42 | first_stage_nms_iou_threshold: 0.7 43 | first_stage_max_proposals: 300 44 | first_stage_localization_loss_weight: 2.0 45 | first_stage_objectness_loss_weight: 1.0 46 | initial_crop_size: 14 47 | maxpool_kernel_size: 2 48 | maxpool_stride: 2 49 | second_stage_box_predictor { 50 | mask_rcnn_box_predictor { 51 | use_dropout: false 52 | dropout_keep_probability: 1.0 53 | fc_hyperparams { 54 | op: FC 55 | regularizer { 56 | l2_regularizer { 57 | weight: 0.0 58 | } 59 | } 60 | initializer { 61 | variance_scaling_initializer { 62 | factor: 1.0 63 | uniform: true 64 | mode: FAN_AVG 65 | } 66 | } 67 | } 68 | } 69 | } 70 | second_stage_post_processing { 71 | batch_non_max_suppression { 72 | score_threshold: 0.0 73 | iou_threshold: 0.6 74 | max_detections_per_class: 100 75 | max_total_detections: 300 76 | } 77 | score_converter: SOFTMAX 78 | } 79 | second_stage_localization_loss_weight: 2.0 80 | second_stage_classification_loss_weight: 1.0 81 | } 82 | } 83 | 84 | train_config: { 85 | batch_size: 1 86 | optimizer { 87 | momentum_optimizer: { 88 | learning_rate: { 89 | manual_step_learning_rate { 90 | initial_learning_rate: 0.0003 91 | schedule { 92 | step: 0 93 | learning_rate: .0003 94 | } 95 | schedule { 96 | step: 900000 97 | learning_rate: .00003 98 | } 99 | schedule { 100 | step: 1200000 101 | learning_rate: .000003 102 | } 103 | } 104 | } 105 | momentum_optimizer_value: 0.9 106 | } 107 | use_moving_average: false 108 | } 109 | gradient_clipping_by_norm: 10.0 110 | fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" 111 | from_detection_checkpoint: true 112 | data_augmentation_options { 113 | random_horizontal_flip { 114 | } 115 | } 116 | } 117 | 118 | train_input_reader: { 119 | tf_record_input_reader { 120 | input_path: "PATH_TO_BE_CONFIGURED/pet_train.record" 121 | } 122 | label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" 123 | } 124 | 125 | eval_config: { 126 | num_examples: 2000 127 | } 128 | 129 | eval_input_reader: { 130 | tf_record_input_reader { 131 | input_path: "PATH_TO_BE_CONFIGURED/pet_val.record" 132 | } 133 | label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" 134 | } 135 | -------------------------------------------------------------------------------- /object_detection/samples/configs/faster_rcnn_resnet101_voc07.config: -------------------------------------------------------------------------------- 1 | # Faster R-CNN with Resnet-101 (v1), configured for Pascal VOC Dataset. 2 | # Users should configure the fine_tune_checkpoint field in the train config as 3 | # well as the label_map_path and input_path fields in the train_input_reader and 4 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that 5 | # should be configured. 6 | 7 | model { 8 | faster_rcnn { 9 | num_classes: 20 10 | image_resizer { 11 | keep_aspect_ratio_resizer { 12 | min_dimension: 600 13 | max_dimension: 1024 14 | } 15 | } 16 | feature_extractor { 17 | type: 'faster_rcnn_resnet101' 18 | first_stage_features_stride: 16 19 | } 20 | first_stage_anchor_generator { 21 | grid_anchor_generator { 22 | scales: [0.25, 0.5, 1.0, 2.0] 23 | aspect_ratios: [0.5, 1.0, 2.0] 24 | height_stride: 16 25 | width_stride: 16 26 | } 27 | } 28 | first_stage_box_predictor_conv_hyperparams { 29 | op: CONV 30 | regularizer { 31 | l2_regularizer { 32 | weight: 0.0 33 | } 34 | } 35 | initializer { 36 | truncated_normal_initializer { 37 | stddev: 0.01 38 | } 39 | } 40 | } 41 | first_stage_nms_score_threshold: 0.0 42 | first_stage_nms_iou_threshold: 0.7 43 | first_stage_max_proposals: 300 44 | first_stage_localization_loss_weight: 2.0 45 | first_stage_objectness_loss_weight: 1.0 46 | initial_crop_size: 14 47 | maxpool_kernel_size: 2 48 | maxpool_stride: 2 49 | second_stage_box_predictor { 50 | mask_rcnn_box_predictor { 51 | use_dropout: false 52 | dropout_keep_probability: 1.0 53 | fc_hyperparams { 54 | op: FC 55 | regularizer { 56 | l2_regularizer { 57 | weight: 0.0 58 | } 59 | } 60 | initializer { 61 | variance_scaling_initializer { 62 | factor: 1.0 63 | uniform: true 64 | mode: FAN_AVG 65 | } 66 | } 67 | } 68 | } 69 | } 70 | second_stage_post_processing { 71 | batch_non_max_suppression { 72 | score_threshold: 0.0 73 | iou_threshold: 0.6 74 | max_detections_per_class: 100 75 | max_total_detections: 300 76 | } 77 | score_converter: SOFTMAX 78 | } 79 | second_stage_localization_loss_weight: 2.0 80 | second_stage_classification_loss_weight: 1.0 81 | } 82 | } 83 | 84 | train_config: { 85 | batch_size: 1 86 | optimizer { 87 | momentum_optimizer: { 88 | learning_rate: { 89 | manual_step_learning_rate { 90 | initial_learning_rate: 0.0001 91 | schedule { 92 | step: 0 93 | learning_rate: .0001 94 | } 95 | schedule { 96 | step: 500000 97 | learning_rate: .00001 98 | } 99 | schedule { 100 | step: 700000 101 | learning_rate: .000001 102 | } 103 | } 104 | } 105 | momentum_optimizer_value: 0.9 106 | } 107 | use_moving_average: false 108 | } 109 | gradient_clipping_by_norm: 10.0 110 | fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" 111 | from_detection_checkpoint: true 112 | num_steps: 800000 113 | data_augmentation_options { 114 | random_horizontal_flip { 115 | } 116 | } 117 | } 118 | 119 | train_input_reader: { 120 | tf_record_input_reader { 121 | input_path: "PATH_TO_BE_CONFIGURED/pascal_voc_train.record" 122 | } 123 | label_map_path: "PATH_TO_BE_CONFIGURED/pascal_voc_label_map.pbtxt" 124 | } 125 | 126 | eval_config: { 127 | num_examples: 4952 128 | } 129 | 130 | eval_input_reader: { 131 | tf_record_input_reader { 132 | input_path: "PATH_TO_BE_CONFIGURED/pascal_voc_val.record" 133 | } 134 | label_map_path: "PATH_TO_BE_CONFIGURED/pascal_voc_label_map.pbtxt" 135 | } 136 | -------------------------------------------------------------------------------- /object_detection/samples/configs/faster_rcnn_resnet152_pets.config: -------------------------------------------------------------------------------- 1 | # Faster R-CNN with Resnet-152 (v1), configured for Oxford-IIT Pets Dataset. 2 | # Users should configure the fine_tune_checkpoint field in the train config as 3 | # well as the label_map_path and input_path fields in the train_input_reader and 4 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that 5 | # should be configured. 6 | 7 | model { 8 | faster_rcnn { 9 | num_classes: 37 10 | image_resizer { 11 | keep_aspect_ratio_resizer { 12 | min_dimension: 600 13 | max_dimension: 1024 14 | } 15 | } 16 | feature_extractor { 17 | type: 'faster_rcnn_resnet152' 18 | first_stage_features_stride: 16 19 | } 20 | first_stage_anchor_generator { 21 | grid_anchor_generator { 22 | scales: [0.25, 0.5, 1.0, 2.0] 23 | aspect_ratios: [0.5, 1.0, 2.0] 24 | height_stride: 16 25 | width_stride: 16 26 | } 27 | } 28 | first_stage_box_predictor_conv_hyperparams { 29 | op: CONV 30 | regularizer { 31 | l2_regularizer { 32 | weight: 0.0 33 | } 34 | } 35 | initializer { 36 | truncated_normal_initializer { 37 | stddev: 0.01 38 | } 39 | } 40 | } 41 | first_stage_nms_score_threshold: 0.0 42 | first_stage_nms_iou_threshold: 0.7 43 | first_stage_max_proposals: 300 44 | first_stage_localization_loss_weight: 2.0 45 | first_stage_objectness_loss_weight: 1.0 46 | initial_crop_size: 14 47 | maxpool_kernel_size: 2 48 | maxpool_stride: 2 49 | second_stage_box_predictor { 50 | mask_rcnn_box_predictor { 51 | use_dropout: false 52 | dropout_keep_probability: 1.0 53 | fc_hyperparams { 54 | op: FC 55 | regularizer { 56 | l2_regularizer { 57 | weight: 0.0 58 | } 59 | } 60 | initializer { 61 | variance_scaling_initializer { 62 | factor: 1.0 63 | uniform: true 64 | mode: FAN_AVG 65 | } 66 | } 67 | } 68 | } 69 | } 70 | second_stage_post_processing { 71 | batch_non_max_suppression { 72 | score_threshold: 0.0 73 | iou_threshold: 0.6 74 | max_detections_per_class: 100 75 | max_total_detections: 300 76 | } 77 | score_converter: SOFTMAX 78 | } 79 | second_stage_localization_loss_weight: 2.0 80 | second_stage_classification_loss_weight: 1.0 81 | } 82 | } 83 | 84 | train_config: { 85 | batch_size: 1 86 | optimizer { 87 | momentum_optimizer: { 88 | learning_rate: { 89 | manual_step_learning_rate { 90 | initial_learning_rate: 0.0003 91 | schedule { 92 | step: 0 93 | learning_rate: .0003 94 | } 95 | schedule { 96 | step: 900000 97 | learning_rate: .00003 98 | } 99 | schedule { 100 | step: 1200000 101 | learning_rate: .000003 102 | } 103 | } 104 | } 105 | momentum_optimizer_value: 0.9 106 | } 107 | use_moving_average: false 108 | } 109 | gradient_clipping_by_norm: 10.0 110 | fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" 111 | from_detection_checkpoint: true 112 | data_augmentation_options { 113 | random_horizontal_flip { 114 | } 115 | } 116 | } 117 | 118 | train_input_reader: { 119 | tf_record_input_reader { 120 | input_path: "PATH_TO_BE_CONFIGURED/pet_train.record" 121 | } 122 | label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" 123 | } 124 | 125 | eval_config: { 126 | num_examples: 2000 127 | } 128 | 129 | eval_input_reader: { 130 | tf_record_input_reader { 131 | input_path: "PATH_TO_BE_CONFIGURED/pet_val.record" 132 | } 133 | label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" 134 | } 135 | -------------------------------------------------------------------------------- /object_detection/samples/configs/faster_rcnn_resnet50_pets.config: -------------------------------------------------------------------------------- 1 | # Faster R-CNN with Resnet-50 (v1), configured for Oxford-IIT Pets Dataset. 2 | # Users should configure the fine_tune_checkpoint field in the train config as 3 | # well as the label_map_path and input_path fields in the train_input_reader and 4 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that 5 | # should be configured. 6 | 7 | model { 8 | faster_rcnn { 9 | num_classes: 37 10 | image_resizer { 11 | keep_aspect_ratio_resizer { 12 | min_dimension: 600 13 | max_dimension: 1024 14 | } 15 | } 16 | feature_extractor { 17 | type: 'faster_rcnn_resnet50' 18 | first_stage_features_stride: 16 19 | } 20 | first_stage_anchor_generator { 21 | grid_anchor_generator { 22 | scales: [0.25, 0.5, 1.0, 2.0] 23 | aspect_ratios: [0.5, 1.0, 2.0] 24 | height_stride: 16 25 | width_stride: 16 26 | } 27 | } 28 | first_stage_box_predictor_conv_hyperparams { 29 | op: CONV 30 | regularizer { 31 | l2_regularizer { 32 | weight: 0.0 33 | } 34 | } 35 | initializer { 36 | truncated_normal_initializer { 37 | stddev: 0.01 38 | } 39 | } 40 | } 41 | first_stage_nms_score_threshold: 0.0 42 | first_stage_nms_iou_threshold: 0.7 43 | first_stage_max_proposals: 300 44 | first_stage_localization_loss_weight: 2.0 45 | first_stage_objectness_loss_weight: 1.0 46 | initial_crop_size: 14 47 | maxpool_kernel_size: 2 48 | maxpool_stride: 2 49 | second_stage_box_predictor { 50 | mask_rcnn_box_predictor { 51 | use_dropout: false 52 | dropout_keep_probability: 1.0 53 | fc_hyperparams { 54 | op: FC 55 | regularizer { 56 | l2_regularizer { 57 | weight: 0.0 58 | } 59 | } 60 | initializer { 61 | variance_scaling_initializer { 62 | factor: 1.0 63 | uniform: true 64 | mode: FAN_AVG 65 | } 66 | } 67 | } 68 | } 69 | } 70 | second_stage_post_processing { 71 | batch_non_max_suppression { 72 | score_threshold: 0.0 73 | iou_threshold: 0.6 74 | max_detections_per_class: 100 75 | max_total_detections: 300 76 | } 77 | score_converter: SOFTMAX 78 | } 79 | second_stage_localization_loss_weight: 2.0 80 | second_stage_classification_loss_weight: 1.0 81 | } 82 | } 83 | 84 | train_config: { 85 | batch_size: 1 86 | optimizer { 87 | momentum_optimizer: { 88 | learning_rate: { 89 | manual_step_learning_rate { 90 | initial_learning_rate: 0.0003 91 | schedule { 92 | step: 0 93 | learning_rate: .0003 94 | } 95 | schedule { 96 | step: 900000 97 | learning_rate: .00003 98 | } 99 | schedule { 100 | step: 1200000 101 | learning_rate: .000003 102 | } 103 | } 104 | } 105 | momentum_optimizer_value: 0.9 106 | } 107 | use_moving_average: false 108 | } 109 | gradient_clipping_by_norm: 10.0 110 | fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" 111 | from_detection_checkpoint: true 112 | data_augmentation_options { 113 | random_horizontal_flip { 114 | } 115 | } 116 | } 117 | 118 | train_input_reader: { 119 | tf_record_input_reader { 120 | input_path: "PATH_TO_BE_CONFIGURED/pet_train.record" 121 | } 122 | label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" 123 | } 124 | 125 | eval_config: { 126 | num_examples: 2000 127 | } 128 | 129 | eval_input_reader: { 130 | tf_record_input_reader { 131 | input_path: "PATH_TO_BE_CONFIGURED/pet_val.record" 132 | } 133 | label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" 134 | } 135 | -------------------------------------------------------------------------------- /object_detection/samples/configs/rfcn_resnet101_pets.config: -------------------------------------------------------------------------------- 1 | # R-FCN with Resnet-101 (v1), configured for Oxford-IIT Pets Dataset. 2 | # Users should configure the fine_tune_checkpoint field in the train config as 3 | # well as the label_map_path and input_path fields in the train_input_reader and 4 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that 5 | # should be configured. 6 | 7 | model { 8 | faster_rcnn { 9 | num_classes: 37 10 | image_resizer { 11 | keep_aspect_ratio_resizer { 12 | min_dimension: 600 13 | max_dimension: 1024 14 | } 15 | } 16 | feature_extractor { 17 | type: 'faster_rcnn_resnet101' 18 | first_stage_features_stride: 16 19 | } 20 | first_stage_anchor_generator { 21 | grid_anchor_generator { 22 | scales: [0.25, 0.5, 1.0, 2.0] 23 | aspect_ratios: [0.5, 1.0, 2.0] 24 | height_stride: 16 25 | width_stride: 16 26 | } 27 | } 28 | first_stage_box_predictor_conv_hyperparams { 29 | op: CONV 30 | regularizer { 31 | l2_regularizer { 32 | weight: 0.0 33 | } 34 | } 35 | initializer { 36 | truncated_normal_initializer { 37 | stddev: 0.01 38 | } 39 | } 40 | } 41 | first_stage_nms_score_threshold: 0.0 42 | first_stage_nms_iou_threshold: 0.7 43 | first_stage_max_proposals: 300 44 | first_stage_localization_loss_weight: 2.0 45 | first_stage_objectness_loss_weight: 1.0 46 | second_stage_box_predictor { 47 | rfcn_box_predictor { 48 | conv_hyperparams { 49 | op: CONV 50 | regularizer { 51 | l2_regularizer { 52 | weight: 0.0 53 | } 54 | } 55 | initializer { 56 | truncated_normal_initializer { 57 | stddev: 0.01 58 | } 59 | } 60 | } 61 | crop_height: 18 62 | crop_width: 18 63 | num_spatial_bins_height: 3 64 | num_spatial_bins_width: 3 65 | } 66 | } 67 | second_stage_post_processing { 68 | batch_non_max_suppression { 69 | score_threshold: 0.0 70 | iou_threshold: 0.6 71 | max_detections_per_class: 100 72 | max_total_detections: 300 73 | } 74 | score_converter: SOFTMAX 75 | } 76 | second_stage_localization_loss_weight: 2.0 77 | second_stage_classification_loss_weight: 1.0 78 | } 79 | } 80 | 81 | train_config: { 82 | batch_size: 1 83 | optimizer { 84 | momentum_optimizer: { 85 | learning_rate: { 86 | manual_step_learning_rate { 87 | initial_learning_rate: 0.0003 88 | schedule { 89 | step: 0 90 | learning_rate: .0003 91 | } 92 | schedule { 93 | step: 900000 94 | learning_rate: .00003 95 | } 96 | schedule { 97 | step: 1200000 98 | learning_rate: .000003 99 | } 100 | } 101 | } 102 | momentum_optimizer_value: 0.9 103 | } 104 | use_moving_average: false 105 | } 106 | gradient_clipping_by_norm: 10.0 107 | fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" 108 | from_detection_checkpoint: true 109 | data_augmentation_options { 110 | random_horizontal_flip { 111 | } 112 | } 113 | } 114 | 115 | train_input_reader: { 116 | tf_record_input_reader { 117 | input_path: "PATH_TO_BE_CONFIGURED/pet_train.record" 118 | } 119 | label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" 120 | } 121 | 122 | eval_config: { 123 | num_examples: 2000 124 | } 125 | 126 | eval_input_reader: { 127 | tf_record_input_reader { 128 | input_path: "PATH_TO_BE_CONFIGURED/pet_val.record" 129 | } 130 | label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" 131 | } -------------------------------------------------------------------------------- /object_detection/ssd_mobilenet_v1_coco_11_06_2017/frozen_inference_graph.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/ssd_mobilenet_v1_coco_11_06_2017/frozen_inference_graph.pb -------------------------------------------------------------------------------- /object_detection/test_images/image1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/test_images/image1.jpg -------------------------------------------------------------------------------- /object_detection/test_images/image2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/test_images/image2.jpg -------------------------------------------------------------------------------- /object_detection/test_images/image_info.txt: -------------------------------------------------------------------------------- 1 | 2 | Image provenance: 3 | image1.jpg: https://commons.wikimedia.org/wiki/File:Baegle_dwa.jpg 4 | image2.jpg: Michael Miley, 5 | https://www.flickr.com/photos/mike_miley/4678754542/in/photolist-88rQHL-88oBVp-88oC2B-88rS6J-88rSqm-88oBLv-88oBC4 6 | 7 | -------------------------------------------------------------------------------- /object_detection/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/object_detection/utils/__init__.py -------------------------------------------------------------------------------- /object_detection/utils/category_util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Functions for importing/exporting Object Detection categories.""" 17 | import csv 18 | 19 | import tensorflow as tf 20 | 21 | 22 | def load_categories_from_csv_file(csv_path): 23 | """Loads categories from a csv file. 24 | 25 | The CSV file should have one comma delimited numeric category id and string 26 | category name pair per line. For example: 27 | 28 | 0,"cat" 29 | 1,"dog" 30 | 2,"bird" 31 | ... 32 | 33 | Args: 34 | csv_path: Path to the csv file to be parsed into categories. 35 | Returns: 36 | categories: A list of dictionaries representing all possible categories. 37 | The categories will contain an integer 'id' field and a string 38 | 'name' field. 39 | Raises: 40 | ValueError: If the csv file is incorrectly formatted. 41 | """ 42 | categories = [] 43 | 44 | with tf.gfile.Open(csv_path, 'r') as csvfile: 45 | reader = csv.reader(csvfile, delimiter=',', quotechar='"') 46 | for row in reader: 47 | if not row: 48 | continue 49 | 50 | if len(row) != 2: 51 | raise ValueError('Expected 2 fields per row in csv: %s' % ','.join(row)) 52 | 53 | category_id = int(row[0]) 54 | category_name = row[1] 55 | categories.append({'id': category_id, 'name': category_name}) 56 | 57 | return categories 58 | 59 | 60 | def save_categories_to_csv_file(categories, csv_path): 61 | """Saves categories to a csv file. 62 | 63 | Args: 64 | categories: A list of dictionaries representing categories to save to file. 65 | Each category must contain an 'id' and 'name' field. 66 | csv_path: Path to the csv file to be parsed into categories. 67 | """ 68 | categories.sort(key=lambda x: x['id']) 69 | with tf.gfile.Open(csv_path, 'w') as csvfile: 70 | writer = csv.writer(csvfile, delimiter=',', quotechar='"') 71 | for category in categories: 72 | writer.writerow([category['id'], category['name']]) 73 | -------------------------------------------------------------------------------- /object_detection/utils/category_util_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.utils.category_util.""" 17 | import os 18 | 19 | import tensorflow as tf 20 | 21 | from object_detection.utils import category_util 22 | 23 | 24 | class EvalUtilTest(tf.test.TestCase): 25 | 26 | def test_load_categories_from_csv_file(self): 27 | csv_data = """ 28 | 0,"cat" 29 | 1,"dog" 30 | 2,"bird" 31 | """.strip(' ') 32 | csv_path = os.path.join(self.get_temp_dir(), 'test.csv') 33 | with tf.gfile.Open(csv_path, 'wb') as f: 34 | f.write(csv_data) 35 | 36 | categories = category_util.load_categories_from_csv_file(csv_path) 37 | self.assertTrue({'id': 0, 'name': 'cat'} in categories) 38 | self.assertTrue({'id': 1, 'name': 'dog'} in categories) 39 | self.assertTrue({'id': 2, 'name': 'bird'} in categories) 40 | 41 | def test_save_categories_to_csv_file(self): 42 | categories = [ 43 | {'id': 0, 'name': 'cat'}, 44 | {'id': 1, 'name': 'dog'}, 45 | {'id': 2, 'name': 'bird'}, 46 | ] 47 | csv_path = os.path.join(self.get_temp_dir(), 'test.csv') 48 | category_util.save_categories_to_csv_file(categories, csv_path) 49 | saved_categories = category_util.load_categories_from_csv_file(csv_path) 50 | self.assertEqual(saved_categories, categories) 51 | 52 | 53 | if __name__ == '__main__': 54 | tf.test.main() 55 | -------------------------------------------------------------------------------- /object_detection/utils/dataset_util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Utility functions for creating TFRecord data sets.""" 17 | 18 | import tensorflow as tf 19 | 20 | 21 | def int64_feature(value): 22 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 23 | 24 | 25 | def int64_list_feature(value): 26 | return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) 27 | 28 | 29 | def bytes_feature(value): 30 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 31 | 32 | 33 | def bytes_list_feature(value): 34 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) 35 | 36 | 37 | def float_list_feature(value): 38 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 39 | 40 | 41 | def read_examples_list(path): 42 | """Read list of training or validation examples. 43 | 44 | The file is assumed to contain a single example per line where the first 45 | token in the line is an identifier that allows us to find the image and 46 | annotation xml for that example. 47 | 48 | For example, the line: 49 | xyz 3 50 | would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored). 51 | 52 | Args: 53 | path: absolute path to examples list file. 54 | 55 | Returns: 56 | list of example identifiers (strings). 57 | """ 58 | with tf.gfile.GFile(path) as fid: 59 | lines = fid.readlines() 60 | return [line.strip().split(' ')[0] for line in lines] 61 | 62 | 63 | def recursive_parse_xml_to_dict(xml): 64 | """Recursively parses XML contents to python dict. 65 | 66 | We assume that `object` tags are the only ones that can appear 67 | multiple times at the same level of a tree. 68 | 69 | Args: 70 | xml: xml tree obtained by parsing XML file contents using lxml.etree 71 | 72 | Returns: 73 | Python dictionary holding XML contents. 74 | """ 75 | if not xml: 76 | return {xml.tag: xml.text} 77 | result = {} 78 | for child in xml: 79 | child_result = recursive_parse_xml_to_dict(child) 80 | if child.tag != 'object': 81 | result[child.tag] = child_result[child.tag] 82 | else: 83 | if child.tag not in result: 84 | result[child.tag] = [] 85 | result[child.tag].append(child_result[child.tag]) 86 | return {xml.tag: result} 87 | -------------------------------------------------------------------------------- /object_detection/utils/dataset_util_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.utils.dataset_util.""" 17 | 18 | import os 19 | import tensorflow as tf 20 | 21 | from object_detection.utils import dataset_util 22 | 23 | 24 | class DatasetUtilTest(tf.test.TestCase): 25 | 26 | def test_read_examples_list(self): 27 | example_list_data = """example1 1\nexample2 2""" 28 | example_list_path = os.path.join(self.get_temp_dir(), 'examples.txt') 29 | with tf.gfile.Open(example_list_path, 'wb') as f: 30 | f.write(example_list_data) 31 | 32 | examples = dataset_util.read_examples_list(example_list_path) 33 | self.assertListEqual(['example1', 'example2'], examples) 34 | 35 | 36 | if __name__ == '__main__': 37 | tf.test.main() 38 | -------------------------------------------------------------------------------- /object_detection/utils/learning_schedules_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.utils.learning_schedules.""" 17 | import tensorflow as tf 18 | 19 | from object_detection.utils import learning_schedules 20 | 21 | 22 | class LearningSchedulesTest(tf.test.TestCase): 23 | 24 | def testExponentialDecayWithBurnin(self): 25 | global_step = tf.placeholder(tf.int32, []) 26 | learning_rate_base = 1.0 27 | learning_rate_decay_steps = 3 28 | learning_rate_decay_factor = .1 29 | burnin_learning_rate = .5 30 | burnin_steps = 2 31 | exp_rates = [.5, .5, 1, .1, .1, .1, .01, .01] 32 | learning_rate = learning_schedules.exponential_decay_with_burnin( 33 | global_step, learning_rate_base, learning_rate_decay_steps, 34 | learning_rate_decay_factor, burnin_learning_rate, burnin_steps) 35 | with self.test_session() as sess: 36 | output_rates = [] 37 | for input_global_step in range(8): 38 | output_rate = sess.run(learning_rate, 39 | feed_dict={global_step: input_global_step}) 40 | output_rates.append(output_rate) 41 | self.assertAllClose(output_rates, exp_rates) 42 | 43 | def testManualStepping(self): 44 | global_step = tf.placeholder(tf.int64, []) 45 | boundaries = [2, 3, 7] 46 | rates = [1.0, 2.0, 3.0, 4.0] 47 | exp_rates = [1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0] 48 | learning_rate = learning_schedules.manual_stepping(global_step, boundaries, 49 | rates) 50 | with self.test_session() as sess: 51 | output_rates = [] 52 | for input_global_step in range(10): 53 | output_rate = sess.run(learning_rate, 54 | feed_dict={global_step: input_global_step}) 55 | output_rates.append(output_rate) 56 | self.assertAllClose(output_rates, exp_rates) 57 | 58 | if __name__ == '__main__': 59 | tf.test.main() 60 | -------------------------------------------------------------------------------- /object_detection/utils/metrics_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.metrics.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from object_detection.utils import metrics 22 | 23 | 24 | class MetricsTest(tf.test.TestCase): 25 | 26 | def test_compute_cor_loc(self): 27 | num_gt_imgs_per_class = np.array([100, 1, 5, 1, 1], dtype=int) 28 | num_images_correctly_detected_per_class = np.array([10, 0, 1, 0, 0], 29 | dtype=int) 30 | corloc = metrics.compute_cor_loc(num_gt_imgs_per_class, 31 | num_images_correctly_detected_per_class) 32 | expected_corloc = np.array([0.1, 0, 0.2, 0, 0], dtype=float) 33 | self.assertTrue(np.allclose(corloc, expected_corloc)) 34 | 35 | def test_compute_cor_loc_nans(self): 36 | num_gt_imgs_per_class = np.array([100, 0, 0, 1, 1], dtype=int) 37 | num_images_correctly_detected_per_class = np.array([10, 0, 1, 0, 0], 38 | dtype=int) 39 | corloc = metrics.compute_cor_loc(num_gt_imgs_per_class, 40 | num_images_correctly_detected_per_class) 41 | expected_corloc = np.array([0.1, np.nan, np.nan, 0, 0], dtype=float) 42 | self.assertAllClose(corloc, expected_corloc) 43 | 44 | def test_compute_precision_recall(self): 45 | num_gt = 10 46 | scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float) 47 | labels = np.array([0, 1, 1, 0, 0, 1], dtype=bool) 48 | accumulated_tp_count = np.array([0, 1, 1, 2, 2, 3], dtype=float) 49 | expected_precision = accumulated_tp_count / np.array([1, 2, 3, 4, 5, 6]) 50 | expected_recall = accumulated_tp_count / num_gt 51 | precision, recall = metrics.compute_precision_recall(scores, labels, num_gt) 52 | self.assertAllClose(precision, expected_precision) 53 | self.assertAllClose(recall, expected_recall) 54 | 55 | def test_compute_average_precision(self): 56 | precision = np.array([0.8, 0.76, 0.9, 0.65, 0.7, 0.5, 0.55, 0], dtype=float) 57 | recall = np.array([0.3, 0.3, 0.4, 0.4, 0.45, 0.45, 0.5, 0.5], dtype=float) 58 | processed_precision = np.array([0.9, 0.9, 0.9, 0.7, 0.7, 0.55, 0.55, 0], 59 | dtype=float) 60 | recall_interval = np.array([0.3, 0, 0.1, 0, 0.05, 0, 0.05, 0], dtype=float) 61 | expected_mean_ap = np.sum(recall_interval * processed_precision) 62 | mean_ap = metrics.compute_average_precision(precision, recall) 63 | self.assertAlmostEqual(expected_mean_ap, mean_ap) 64 | 65 | def test_compute_precision_recall_and_ap_no_groundtruth(self): 66 | num_gt = 0 67 | scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float) 68 | labels = np.array([0, 0, 0, 0, 0, 0], dtype=bool) 69 | expected_precision = None 70 | expected_recall = None 71 | precision, recall = metrics.compute_precision_recall(scores, labels, num_gt) 72 | self.assertEquals(precision, expected_precision) 73 | self.assertEquals(recall, expected_recall) 74 | ap = metrics.compute_average_precision(precision, recall) 75 | self.assertTrue(np.isnan(ap)) 76 | 77 | 78 | if __name__ == '__main__': 79 | tf.test.main() 80 | -------------------------------------------------------------------------------- /object_detection/utils/np_box_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Operations for [N, 4] numpy arrays representing bounding boxes. 17 | 18 | Example box operations that are supported: 19 | * Areas: compute bounding box areas 20 | * IOU: pairwise intersection-over-union scores 21 | """ 22 | import numpy as np 23 | 24 | 25 | def area(boxes): 26 | """Computes area of boxes. 27 | 28 | Args: 29 | boxes: Numpy array with shape [N, 4] holding N boxes 30 | 31 | Returns: 32 | a numpy array with shape [N*1] representing box areas 33 | """ 34 | return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) 35 | 36 | 37 | def intersection(boxes1, boxes2): 38 | """Compute pairwise intersection areas between boxes. 39 | 40 | Args: 41 | boxes1: a numpy array with shape [N, 4] holding N boxes 42 | boxes2: a numpy array with shape [M, 4] holding M boxes 43 | 44 | Returns: 45 | a numpy array with shape [N*M] representing pairwise intersection area 46 | """ 47 | [y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1) 48 | [y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1) 49 | 50 | all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2)) 51 | all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2)) 52 | intersect_heights = np.maximum( 53 | np.zeros(all_pairs_max_ymin.shape), 54 | all_pairs_min_ymax - all_pairs_max_ymin) 55 | all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2)) 56 | all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2)) 57 | intersect_widths = np.maximum( 58 | np.zeros(all_pairs_max_xmin.shape), 59 | all_pairs_min_xmax - all_pairs_max_xmin) 60 | return intersect_heights * intersect_widths 61 | 62 | 63 | def iou(boxes1, boxes2): 64 | """Computes pairwise intersection-over-union between box collections. 65 | 66 | Args: 67 | boxes1: a numpy array with shape [N, 4] holding N boxes. 68 | boxes2: a numpy array with shape [M, 4] holding N boxes. 69 | 70 | Returns: 71 | a numpy array with shape [N, M] representing pairwise iou scores. 72 | """ 73 | intersect = intersection(boxes1, boxes2) 74 | area1 = area(boxes1) 75 | area2 = area(boxes2) 76 | union = np.expand_dims(area1, axis=1) + np.expand_dims( 77 | area2, axis=0) - intersect 78 | return intersect / union 79 | 80 | 81 | def ioa(boxes1, boxes2): 82 | """Computes pairwise intersection-over-area between box collections. 83 | 84 | Intersection-over-area (ioa) between two boxes box1 and box2 is defined as 85 | their intersection area over box2's area. Note that ioa is not symmetric, 86 | that is, IOA(box1, box2) != IOA(box2, box1). 87 | 88 | Args: 89 | boxes1: a numpy array with shape [N, 4] holding N boxes. 90 | boxes2: a numpy array with shape [M, 4] holding N boxes. 91 | 92 | Returns: 93 | a numpy array with shape [N, M] representing pairwise ioa scores. 94 | """ 95 | intersect = intersection(boxes1, boxes2) 96 | areas = np.expand_dims(area(boxes2), axis=0) 97 | return intersect / areas 98 | -------------------------------------------------------------------------------- /object_detection/utils/np_box_ops_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.np_box_ops.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from object_detection.utils import np_box_ops 22 | 23 | 24 | class BoxOpsTests(tf.test.TestCase): 25 | 26 | def setUp(self): 27 | boxes1 = np.array([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]], 28 | dtype=float) 29 | boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], 30 | [0.0, 0.0, 20.0, 20.0]], 31 | dtype=float) 32 | self.boxes1 = boxes1 33 | self.boxes2 = boxes2 34 | 35 | def testArea(self): 36 | areas = np_box_ops.area(self.boxes1) 37 | expected_areas = np.array([6.0, 5.0], dtype=float) 38 | self.assertAllClose(expected_areas, areas) 39 | 40 | def testIntersection(self): 41 | intersection = np_box_ops.intersection(self.boxes1, self.boxes2) 42 | expected_intersection = np.array([[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]], 43 | dtype=float) 44 | self.assertAllClose(intersection, expected_intersection) 45 | 46 | def testIOU(self): 47 | iou = np_box_ops.iou(self.boxes1, self.boxes2) 48 | expected_iou = np.array([[2.0 / 16.0, 0.0, 6.0 / 400.0], 49 | [1.0 / 16.0, 0.0, 5.0 / 400.0]], 50 | dtype=float) 51 | self.assertAllClose(iou, expected_iou) 52 | 53 | def testIOA(self): 54 | boxes1 = np.array([[0.25, 0.25, 0.75, 0.75], 55 | [0.0, 0.0, 0.5, 0.75]], 56 | dtype=np.float32) 57 | boxes2 = np.array([[0.5, 0.25, 1.0, 1.0], 58 | [0.0, 0.0, 1.0, 1.0]], 59 | dtype=np.float32) 60 | ioa21 = np_box_ops.ioa(boxes2, boxes1) 61 | expected_ioa21 = np.array([[0.5, 0.0], 62 | [1.0, 1.0]], 63 | dtype=np.float32) 64 | self.assertAllClose(ioa21, expected_ioa21) 65 | 66 | 67 | if __name__ == '__main__': 68 | tf.test.main() 69 | -------------------------------------------------------------------------------- /object_detection/utils/static_shape.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Helper functions to access TensorShape values. 17 | 18 | The rank 4 tensor_shape must be of the form [batch_size, height, width, depth]. 19 | """ 20 | 21 | 22 | def get_batch_size(tensor_shape): 23 | """Returns batch size from the tensor shape. 24 | 25 | Args: 26 | tensor_shape: A rank 4 TensorShape. 27 | 28 | Returns: 29 | An integer representing the batch size of the tensor. 30 | """ 31 | tensor_shape.assert_has_rank(rank=4) 32 | return tensor_shape[0].value 33 | 34 | 35 | def get_height(tensor_shape): 36 | """Returns height from the tensor shape. 37 | 38 | Args: 39 | tensor_shape: A rank 4 TensorShape. 40 | 41 | Returns: 42 | An integer representing the height of the tensor. 43 | """ 44 | tensor_shape.assert_has_rank(rank=4) 45 | return tensor_shape[1].value 46 | 47 | 48 | def get_width(tensor_shape): 49 | """Returns width from the tensor shape. 50 | 51 | Args: 52 | tensor_shape: A rank 4 TensorShape. 53 | 54 | Returns: 55 | An integer representing the width of the tensor. 56 | """ 57 | tensor_shape.assert_has_rank(rank=4) 58 | return tensor_shape[2].value 59 | 60 | 61 | def get_depth(tensor_shape): 62 | """Returns depth from the tensor shape. 63 | 64 | Args: 65 | tensor_shape: A rank 4 TensorShape. 66 | 67 | Returns: 68 | An integer representing the depth of the tensor. 69 | """ 70 | tensor_shape.assert_has_rank(rank=4) 71 | return tensor_shape[3].value 72 | -------------------------------------------------------------------------------- /object_detection/utils/static_shape_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.utils.static_shape.""" 17 | 18 | import tensorflow as tf 19 | 20 | from object_detection.utils import static_shape 21 | 22 | 23 | class StaticShapeTest(tf.test.TestCase): 24 | 25 | def test_return_correct_batchSize(self): 26 | tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3]) 27 | self.assertEqual(32, static_shape.get_batch_size(tensor_shape)) 28 | 29 | def test_return_correct_height(self): 30 | tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3]) 31 | self.assertEqual(299, static_shape.get_height(tensor_shape)) 32 | 33 | def test_return_correct_width(self): 34 | tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3]) 35 | self.assertEqual(384, static_shape.get_width(tensor_shape)) 36 | 37 | def test_return_correct_depth(self): 38 | tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3]) 39 | self.assertEqual(3, static_shape.get_depth(tensor_shape)) 40 | 41 | def test_die_on_tensor_shape_with_rank_three(self): 42 | tensor_shape = tf.TensorShape(dims=[32, 299, 384]) 43 | with self.assertRaises(ValueError): 44 | static_shape.get_batch_size(tensor_shape) 45 | static_shape.get_height(tensor_shape) 46 | static_shape.get_width(tensor_shape) 47 | static_shape.get_depth(tensor_shape) 48 | 49 | if __name__ == '__main__': 50 | tf.test.main() 51 | -------------------------------------------------------------------------------- /object_detection/utils/test_utils_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.utils.test_utils.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from object_detection.utils import test_utils 22 | 23 | 24 | class TestUtilsTest(tf.test.TestCase): 25 | 26 | def test_diagonal_gradient_image(self): 27 | """Tests if a good pyramid image is created.""" 28 | pyramid_image = test_utils.create_diagonal_gradient_image(3, 4, 2) 29 | 30 | # Test which is easy to understand. 31 | expected_first_channel = np.array([[3, 2, 1, 0], 32 | [4, 3, 2, 1], 33 | [5, 4, 3, 2]], dtype=np.float32) 34 | self.assertAllEqual(np.squeeze(pyramid_image[:, :, 0]), 35 | expected_first_channel) 36 | 37 | # Actual test. 38 | expected_image = np.array([[[3, 30], 39 | [2, 20], 40 | [1, 10], 41 | [0, 0]], 42 | [[4, 40], 43 | [3, 30], 44 | [2, 20], 45 | [1, 10]], 46 | [[5, 50], 47 | [4, 40], 48 | [3, 30], 49 | [2, 20]]], dtype=np.float32) 50 | 51 | self.assertAllEqual(pyramid_image, expected_image) 52 | 53 | def test_random_boxes(self): 54 | """Tests if valid random boxes are created.""" 55 | num_boxes = 1000 56 | max_height = 3 57 | max_width = 5 58 | boxes = test_utils.create_random_boxes(num_boxes, 59 | max_height, 60 | max_width) 61 | 62 | true_column = np.ones(shape=(num_boxes)) == 1 63 | self.assertAllEqual(boxes[:, 0] < boxes[:, 2], true_column) 64 | self.assertAllEqual(boxes[:, 1] < boxes[:, 3], true_column) 65 | 66 | self.assertTrue(boxes[:, 0].min() >= 0) 67 | self.assertTrue(boxes[:, 1].min() >= 0) 68 | self.assertTrue(boxes[:, 2].max() <= max_height) 69 | self.assertTrue(boxes[:, 3].max() <= max_width) 70 | 71 | 72 | if __name__ == '__main__': 73 | tf.test.main() 74 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datitran/object_detector_app/44e8eddeb931cced5d8cf1e283383c720a5706bf/utils/__init__.py -------------------------------------------------------------------------------- /utils/test_app_utils.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from object_detector_app.utils.app_utils import color_name_to_rgb, standard_colors 3 | 4 | 5 | class TestUtils(unittest.TestCase): 6 | def setUp(self): 7 | self.colors = color_name_to_rgb() 8 | self.standard_colors = standard_colors() 9 | 10 | def test_all_colors(self): 11 | """Test that manual defined colors are also in the matplotlib color name space.""" 12 | color_list = set(sorted(list(self.colors.keys()))) 13 | standard_color_list = set(sorted([color.lower() for color in self.standard_colors])) 14 | color_common = standard_color_list.intersection(color_list) 15 | self.assertEqual(len(color_common), len(standard_color_list)) 16 | --------------------------------------------------------------------------------