├── src ├── __init__.py ├── object_detection │ ├── __init__.py │ ├── utils │ │ ├── __init__.py │ │ ├── static_shape_test.py │ │ ├── static_shape.py │ │ ├── np_box_mask_list.py │ │ ├── np_box_ops_test.py │ │ ├── test_utils_test.py │ │ ├── np_box_ops.py │ │ ├── np_mask_ops_test.py │ │ ├── test_case.py │ │ ├── np_mask_ops.py │ │ ├── np_box_list.py │ │ ├── np_box_list_test.py │ │ ├── test_utils.py │ │ ├── metrics_test.py │ │ ├── label_map_util.py │ │ ├── metrics.py │ │ ├── label_map_util_test.py │ │ └── np_box_mask_list_test.py │ ├── core │ │ └── __init__.py │ ├── metrics │ │ ├── __init__.py │ │ ├── io_utils.py │ │ ├── oid_od_challenge_evaluation_utils.py │ │ ├── oid_od_challenge_evaluation_utils_test.py │ │ └── oid_od_challenge_evaluation.py │ ├── protos │ │ ├── __init__.py │ │ ├── string_int_label_map.proto │ │ └── string_int_label_map_pb2.py │ └── dataset_tools │ │ ├── __init__.py │ │ ├── oid_hierarchical_labels_expansion_test.py │ │ ├── oid_tfrecord_creation.py │ │ ├── create_oid_tf_record.py │ │ ├── oid_tfrecord_creation_test.py │ │ └── oid_hierarchical_labels_expansion.py ├── logging.py ├── augmentation.py ├── preprocessing.py ├── postprocessing.py ├── pipelines.py ├── pipeline_config.py └── models.py ├── .github └── ISSUE_TEMPLATE │ ├── everything-else.md │ └── bug.md ├── requirements.txt ├── PULL_REQUEST_TEMPLATE.md ├── LICENSE ├── CONTRIBUTING.md ├── Tensorflow-Object-Detection-API-notice └── README.md ├── .gitignore ├── configs ├── batch_7.yaml ├── batch_6.yaml ├── neptune.yaml ├── batch_1.yaml ├── batch_5.yaml ├── batch_8.yaml ├── batch_3.yaml ├── batch_2.yaml └── batch_4.yaml ├── CODE_OF_CONDUCT.md ├── notebooks └── submission_merge.ipynb ├── main.py └── README.md /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/object_detection/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/object_detection/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/object_detection/core/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/object_detection/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/object_detection/protos/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/object_detection/dataset_tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/logging.py: -------------------------------------------------------------------------------- 1 | from src.utils import init_logger 2 | 3 | LOGGER = init_logger() 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/everything-else.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: everything else 3 | about: Suggest an idea for this project 4 | 5 | --- 6 | 7 | 8 | -------------------------------------------------------------------------------- /src/augmentation.py: -------------------------------------------------------------------------------- 1 | from imgaug import augmenters as iaa 2 | 3 | aug_seq = iaa.Sequential([ 4 | iaa.Fliplr(p=0.5), 5 | iaa.Sometimes( 6 | 0.3, 7 | iaa.Multiply((0.75, 1.25)) 8 | ), 9 | iaa.Sometimes( 10 | 0.3, 11 | iaa.AdditiveGaussianNoise() 12 | ), 13 | iaa.Affine( 14 | rotate=(-5, 5), 15 | scale=(0.8, 1.2) 16 | ) 17 | ]) 18 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | neptune-cli 2 | steppy==0.1.6 3 | steppy-toolkit==0.1.8 4 | bokeh 5 | flask 6 | numpy 7 | opencv_python 8 | attrdict==2.0.0 9 | category_encoders==1.2.8 10 | click==6.7 11 | contextlib2==0.5.5 12 | imgaug==0.2.6 13 | pandas==0.20.2 14 | Pillow>=6.2.2 15 | protobuf==3.15.0 16 | pycocotools==2.0.0 17 | scikit_learn==0.19.2 18 | six==1.10.0 19 | torch==0.3.1 20 | torchvision==0.2.1 21 | pyyaml>=4.2b1 22 | tqdm==4.11.2 23 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: bug 3 | about: Create bug report 4 | 5 | --- 6 | 7 | There are two things that will make the processing of your issue faster: 8 | 1. Make sure that you are using the latest version of the code, 9 | 1. In case of bug issue, it would be nice to provide more technical details such like execution command, error message or script that reproduces your bug. 10 | # 11 | 12 | Thanks! 13 | 14 | Kamil & Jakub, 15 | 16 | *core contributors to the Open Solution* 17 | -------------------------------------------------------------------------------- /PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Pull Request template to the *[Google AI Open Images - Object Detection Track](https://www.kaggle.com/c/google-ai-open-images-object-detection-track)* Open Solution 2 | 3 | Major - and most appreciated - contribution is pull request with feature or bug fix. Each pull request initiates discussion about your code contribution. 4 | 5 | Each pull request should be provided with minimal description about its contents. 6 | # 7 | 8 | Thanks! 9 | 10 | Kuba & Kamil, 11 | 12 | _core contributors to the Open Solutions_ 13 | -------------------------------------------------------------------------------- /src/object_detection/protos/string_int_label_map.proto: -------------------------------------------------------------------------------- 1 | // Message to store the mapping from class label strings to class id. Datasets 2 | // use string labels to represent classes while the object detection framework 3 | // works with class ids. This message maps them so they can be converted back 4 | // and forth as needed. 5 | syntax = "proto2"; 6 | 7 | package object_detection.protos; 8 | 9 | message StringIntLabelMapItem { 10 | // String name. The most common practice is to set this to a MID or synsets 11 | // id. 12 | optional string name = 1; 13 | 14 | // Integer id that maps to the string name above. Label ids should start from 15 | // 1. 16 | optional int32 id = 2; 17 | 18 | // Human readable string label. 19 | optional string display_name = 3; 20 | }; 21 | 22 | message StringIntLabelMap { 23 | repeated StringIntLabelMapItem item = 1; 24 | }; 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 neptune.ml 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing to Open Solution for [Google AI Open Images - Object Detection Track](https://www.kaggle.com/c/google-ai-open-images-object-detection-track). 2 | 3 | ### Get involved 4 | You are welcome to contribute to this Open Solution. To get started: 5 | 1. Check [our kanban board](https://github.com/neptune-ml/open-solution-googleai-object-detection/projects/1) to see what we are working on right now. 6 | 1. Express your interest in a particular [issue](https://github.com/neptune-ml/open-solution-googleai-object-detection/issues) by submitting a comment or, 7 | * submit your own [issue](https://github.com/neptune-ml/open-solution-googleai-object-detection/issues). 8 | 1. We will get back to you in order to start working together. 9 | 10 | ### Code contributions 11 | Major - and most appreciated - contribution is [pull request](https://github.com/neptune-ml/open-solution-googleai-object-detection/pulls) with feature or bug fix. 12 | 13 | ### Remarks 14 | In case of custom ideas, please contact core contributors directly at ml-team@neptune.ml. 15 | # 16 | 17 | Thanks! 18 | 19 | Kuba & Kamil, 20 | 21 | *core contributors to the Open Solution* 22 | -------------------------------------------------------------------------------- /src/object_detection/metrics/io_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Common IO utils used in offline metric computation. 16 | """ 17 | 18 | import csv 19 | 20 | 21 | def write_csv(fid, metrics): 22 | """Writes metrics key-value pairs to CSV file. 23 | 24 | Args: 25 | fid: File identifier of an opened file. 26 | metrics: A dictionary with metrics to be written. 27 | """ 28 | metrics_writer = csv.writer(fid, delimiter=',') 29 | for metric_name, metric_value in list(metrics.items()): 30 | metrics_writer.writerow([metric_name, str(metric_value)]) 31 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection-API-notice/README.md: -------------------------------------------------------------------------------- 1 | ### Tensorflow Object Detection API 2 | * [Object detection](https://github.com/neptune-ml/open-solution-googleai-object-detection/tree/master/src/object_detection) directory in this GitHub repository is a modification of the [Tensorflow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection), which is part of the [tensorflow/models](https://github.com/tensorflow/models). 3 | * Tensorflow/models is licensed under the [Apache License 2.0](https://github.com/tensorflow/models/blob/master/LICENSE) 4 | * Copy of the aforementioned [license is here](https://github.com/neptune-ml/open-solution-googleai-object-detection/tree/master/Tensorflow-Object-Detection-API-notice/LICENSE-copy). 5 | 6 | ### Code Changes 7 | * Code was copied from this repository: https://github.com/tensorflow/models/tree/master/research/object_detection 8 | * The modified version of the code is available here: https://github.com/neptune-ml/open-solution-googleai-object-detection/tree/master/src/object_detection 9 | * Entire [open-solution-googleai-object-detection](https://github.com/neptune-ml/open-solution-googleai-object-detection) repository is [MIT licensed](https://github.com/neptune-ml/open-solution-googleai-object-detection/blob/master/LICENSE). 10 | -------------------------------------------------------------------------------- /src/preprocessing.py: -------------------------------------------------------------------------------- 1 | from category_encoders.ordinal import OrdinalEncoder 2 | from sklearn.externals import joblib 3 | from steppy.base import BaseTransformer 4 | 5 | 6 | class GoogleAiLabelEncoder(BaseTransformer): 7 | def __init__(self, colname): 8 | self.colname = colname 9 | self.encoder = OrdinalEncoder() 10 | 11 | def fit(self, annotations, **kwargs): 12 | self.encoder.fit(annotations[self.colname].values) 13 | return self 14 | 15 | def transform(self, annotations, annotations_human_labels, **kwargs): 16 | if annotations is not None: 17 | annotations[self.colname] = self.encoder.transform(annotations[self.colname].values) 18 | annotations_human_labels[self.colname] = self.encoder.transform( 19 | annotations_human_labels[self.colname].values) 20 | return {'annotations': annotations, 21 | 'annotations_human_labels': annotations_human_labels} 22 | else: 23 | return {'mapping': self.encoder.category_mapping[0]['mapping']} 24 | 25 | def load(self, filepath): 26 | self.encoder = joblib.load(filepath) 27 | return self 28 | 29 | def persist(self, filepath): 30 | joblib.dump(self.encoder, filepath) 31 | 32 | 33 | class GoogleAiLabelDecoder(BaseTransformer): 34 | 35 | def transform(self, mapping, **kwargs): 36 | inverse_mapping = {val: name for name, val in mapping} 37 | return {'inverse_mapping': inverse_mapping} 38 | -------------------------------------------------------------------------------- /src/object_detection/utils/static_shape_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.utils.static_shape.""" 17 | 18 | import tensorflow as tf 19 | 20 | from object_detection.utils import static_shape 21 | 22 | 23 | class StaticShapeTest(tf.test.TestCase): 24 | 25 | def test_return_correct_batchSize(self): 26 | tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3]) 27 | self.assertEqual(32, static_shape.get_batch_size(tensor_shape)) 28 | 29 | def test_return_correct_height(self): 30 | tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3]) 31 | self.assertEqual(299, static_shape.get_height(tensor_shape)) 32 | 33 | def test_return_correct_width(self): 34 | tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3]) 35 | self.assertEqual(384, static_shape.get_width(tensor_shape)) 36 | 37 | def test_return_correct_depth(self): 38 | tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3]) 39 | self.assertEqual(3, static_shape.get_depth(tensor_shape)) 40 | 41 | def test_die_on_tensor_shape_with_rank_three(self): 42 | tensor_shape = tf.TensorShape(dims=[32, 299, 384]) 43 | with self.assertRaises(ValueError): 44 | static_shape.get_batch_size(tensor_shape) 45 | static_shape.get_height(tensor_shape) 46 | static_shape.get_width(tensor_shape) 47 | static_shape.get_depth(tensor_shape) 48 | 49 | if __name__ == '__main__': 50 | tf.test.main() 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | .pytest_cache 6 | tests/.cache 7 | 8 | # C extensions 9 | *.so 10 | 11 | # neptune, pycharm 12 | .cache 13 | .cache/ 14 | .idea/ 15 | .idea_modules/ 16 | *_local.yaml 17 | out/ 18 | output 19 | output/ 20 | *.log 21 | target/ 22 | devbook.ipynb 23 | devbook_local.ipynb 24 | 25 | # Distribution / packaging 26 | .Python 27 | env/ 28 | build/ 29 | develop-eggs/ 30 | dist/ 31 | downloads/ 32 | eggs/ 33 | .eggs/ 34 | lib/ 35 | lib64/ 36 | parts/ 37 | sdist/ 38 | var/ 39 | wheels/ 40 | *.egg-info/ 41 | .installed.cfg 42 | *.egg 43 | 44 | # PyInstaller 45 | # Usually these files are written by a python script from a template 46 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 47 | *.manifest 48 | *.spec 49 | 50 | # Installer logs 51 | pip-log.txt 52 | pip-delete-this-directory.txt 53 | 54 | # Unit test / coverage reports 55 | htmlcov/ 56 | .tox/ 57 | .coverage 58 | .coverage.* 59 | nosetests.xml 60 | coverage.xml 61 | *.cover 62 | .hypothesis/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | local_settings.py 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # Jupyter Notebook 82 | Untitled*.ipynb 83 | .ipynb_checkpoints 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # celery beat schedule file 89 | celerybeat-schedule 90 | 91 | # SageMath parsed files 92 | *.sage.py 93 | 94 | # dotenv 95 | .env 96 | 97 | # virtualenv 98 | .venv 99 | venv/ 100 | ENV/ 101 | 102 | # Spyder project settings 103 | .spyderproject 104 | .spyproject 105 | 106 | # Rope project settings 107 | .ropeproject 108 | 109 | # mkdocs documentation 110 | /site 111 | 112 | # mypy 113 | .mypy_cache/ 114 | 115 | # Working directories 116 | examples/cache/ 117 | configs/neptune_config_local.yaml 118 | notebooks/local_playground.ipynb 119 | configs/neptune_config_home.yaml 120 | configs/neptune_config_cluster.yaml 121 | playground.py 122 | playground.ipynb 123 | -------------------------------------------------------------------------------- /src/object_detection/utils/static_shape.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Helper functions to access TensorShape values. 17 | 18 | The rank 4 tensor_shape must be of the form [batch_size, height, width, depth]. 19 | """ 20 | 21 | 22 | def get_batch_size(tensor_shape): 23 | """Returns batch size from the tensor shape. 24 | 25 | Args: 26 | tensor_shape: A rank 4 TensorShape. 27 | 28 | Returns: 29 | An integer representing the batch size of the tensor. 30 | """ 31 | tensor_shape.assert_has_rank(rank=4) 32 | return tensor_shape[0].value 33 | 34 | 35 | def get_height(tensor_shape): 36 | """Returns height from the tensor shape. 37 | 38 | Args: 39 | tensor_shape: A rank 4 TensorShape. 40 | 41 | Returns: 42 | An integer representing the height of the tensor. 43 | """ 44 | tensor_shape.assert_has_rank(rank=4) 45 | return tensor_shape[1].value 46 | 47 | 48 | def get_width(tensor_shape): 49 | """Returns width from the tensor shape. 50 | 51 | Args: 52 | tensor_shape: A rank 4 TensorShape. 53 | 54 | Returns: 55 | An integer representing the width of the tensor. 56 | """ 57 | tensor_shape.assert_has_rank(rank=4) 58 | return tensor_shape[2].value 59 | 60 | 61 | def get_depth(tensor_shape): 62 | """Returns depth from the tensor shape. 63 | 64 | Args: 65 | tensor_shape: A rank 4 TensorShape. 66 | 67 | Returns: 68 | An integer representing the depth of the tensor. 69 | """ 70 | tensor_shape.assert_has_rank(rank=4) 71 | return tensor_shape[3].value 72 | -------------------------------------------------------------------------------- /src/object_detection/utils/np_box_mask_list.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Numpy BoxMaskList classes and functions.""" 17 | 18 | import numpy as np 19 | from object_detection.utils import np_box_list 20 | 21 | 22 | class BoxMaskList(np_box_list.BoxList): 23 | """Convenience wrapper for BoxList with masks. 24 | 25 | BoxMaskList extends the np_box_list.BoxList to contain masks as well. 26 | In particular, its constructor receives both boxes and masks. Note that the 27 | masks correspond to the full image. 28 | """ 29 | 30 | def __init__(self, box_data, mask_data): 31 | """Constructs box collection. 32 | 33 | Args: 34 | box_data: a numpy array of shape [N, 4] representing box coordinates 35 | mask_data: a numpy array of shape [N, height, width] representing masks 36 | with values are in {0,1}. The masks correspond to the full 37 | image. The height and the width will be equal to image height and width. 38 | 39 | Raises: 40 | ValueError: if bbox data is not a numpy array 41 | ValueError: if invalid dimensions for bbox data 42 | ValueError: if mask data is not a numpy array 43 | ValueError: if invalid dimension for mask data 44 | """ 45 | super(BoxMaskList, self).__init__(box_data) 46 | if not isinstance(mask_data, np.ndarray): 47 | raise ValueError('Mask data must be a numpy array.') 48 | if len(mask_data.shape) != 3: 49 | raise ValueError('Invalid dimensions for mask data.') 50 | if mask_data.dtype != np.uint8: 51 | raise ValueError('Invalid data type for mask data: uint8 is required.') 52 | if mask_data.shape[0] != box_data.shape[0]: 53 | raise ValueError('There should be the same number of boxes and masks.') 54 | self.data['masks'] = mask_data 55 | 56 | def get_masks(self): 57 | """Convenience function for accessing masks. 58 | 59 | Returns: 60 | a numpy array of shape [N, height, width] representing masks 61 | """ 62 | return self.get_field('masks') 63 | 64 | -------------------------------------------------------------------------------- /configs/batch_7.yaml: -------------------------------------------------------------------------------- 1 | project: neptune-ml/Google-AI-Object-Detection-Challenge 2 | 3 | name: google-ai-object-detection 4 | tags: [solution-1, batch_7, retrain] 5 | 6 | metric: 7 | channel: 'MAP' 8 | goal: maximize 9 | 10 | #Comment out if not in Cloud Environment 11 | #pip-requirements-file: requirements.txt 12 | 13 | exclude: 14 | - .git 15 | - .idea 16 | - .ipynb_checkpoints 17 | - output 18 | - imgs 19 | - neptune.log 20 | - offline_job.log 21 | - notebooks 22 | 23 | parameters: 24 | # Data Paths 25 | train_imgs_dir: '' 26 | test_imgs_dir: '' 27 | annotations_filepath: '' 28 | annotations_human_labels_filepath: '' 29 | bbox_hierarchy_filepath: '' 30 | valid_ids_filepath: '' 31 | sample_submission: '' 32 | experiment_dir: '' 33 | class_mappings_filepath: '' 34 | metadata_filepath: '' 35 | clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment 36 | 37 | # Execution 38 | clean_experiment_directory_before_training: 0 39 | num_workers: 4 40 | num_threads: 100 41 | load_in_memory: 0 42 | pin_memory: 1 43 | default_valid_ids: 1 44 | loader_mode: resize 45 | stream_mode: 0 46 | validate_with_map: 1 47 | small_annotations_size: 20 48 | kaggle_message: 'solution-1' 49 | 50 | # General parameters 51 | sampler_name: 'aspect ratio' # from {'fixed', 'aspect ratio'} 52 | even_class_sampling: 1 53 | fixed_h: 512 54 | fixed_w: 512 55 | short_dim: 640 #512 56 | long_dim: 960 #896 57 | image_channels: 3 58 | pad_method: 'resize' 59 | use_suppression: 0 60 | max_annotation_per_class: 1000000 61 | desired_class_subset: "['Bicycle', 62 | 'Bottle', 63 | 'Book', 64 | 'Palm tree', 65 | 'Street light', 66 | 'Glasses', 67 | 'Bicycle wheel', 68 | 'Tower', 69 | 'Skyscraper', 70 | 'Tire', 71 | 'Chair', 72 | 'House', 73 | 'Wheel', 74 | 'Window']" 75 | 76 | # Retina parameters (multi-output) 77 | encoder_depth: 50 78 | num_classes: 100 79 | pretrained_encoder: 1 80 | pi: 0.01 81 | aspect_ratios: '[1/2., 1/1., 2/1.]' 82 | scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]' 83 | 84 | # Training schedule 85 | epochs_nr: 1000 86 | batch_size_train: 8 87 | batch_size_inference: 1 88 | lr: 0.00001 89 | momentum: 0.9 90 | gamma: 1.0 91 | patience: 100 92 | lr_factor: 0.3 93 | lr_patience: 30 94 | training_sample_size: 10000 95 | validation_sample_size: 2000 96 | 97 | # Regularization 98 | use_batch_norm: 1 99 | l2_reg_conv: 0.0001 100 | l2_reg_dense: 0.0 101 | dropout_conv: 0.1 102 | dropout_dense: 0.0 103 | 104 | # Postprocessing 105 | classification_threshold: 0.05 106 | nms_threshold: 0.5 107 | -------------------------------------------------------------------------------- /src/object_detection/utils/np_box_ops_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.np_box_ops.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from object_detection.utils import np_box_ops 22 | 23 | 24 | class BoxOpsTests(tf.test.TestCase): 25 | 26 | def setUp(self): 27 | boxes1 = np.array([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]], 28 | dtype=float) 29 | boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], 30 | [0.0, 0.0, 20.0, 20.0]], 31 | dtype=float) 32 | self.boxes1 = boxes1 33 | self.boxes2 = boxes2 34 | 35 | def testArea(self): 36 | areas = np_box_ops.area(self.boxes1) 37 | expected_areas = np.array([6.0, 5.0], dtype=float) 38 | self.assertAllClose(expected_areas, areas) 39 | 40 | def testIntersection(self): 41 | intersection = np_box_ops.intersection(self.boxes1, self.boxes2) 42 | expected_intersection = np.array([[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]], 43 | dtype=float) 44 | self.assertAllClose(intersection, expected_intersection) 45 | 46 | def testIOU(self): 47 | iou = np_box_ops.iou(self.boxes1, self.boxes2) 48 | expected_iou = np.array([[2.0 / 16.0, 0.0, 6.0 / 400.0], 49 | [1.0 / 16.0, 0.0, 5.0 / 400.0]], 50 | dtype=float) 51 | self.assertAllClose(iou, expected_iou) 52 | 53 | def testIOA(self): 54 | boxes1 = np.array([[0.25, 0.25, 0.75, 0.75], 55 | [0.0, 0.0, 0.5, 0.75]], 56 | dtype=np.float32) 57 | boxes2 = np.array([[0.5, 0.25, 1.0, 1.0], 58 | [0.0, 0.0, 1.0, 1.0]], 59 | dtype=np.float32) 60 | ioa21 = np_box_ops.ioa(boxes2, boxes1) 61 | expected_ioa21 = np.array([[0.5, 0.0], 62 | [1.0, 1.0]], 63 | dtype=np.float32) 64 | self.assertAllClose(ioa21, expected_ioa21) 65 | 66 | 67 | if __name__ == '__main__': 68 | tf.test.main() 69 | -------------------------------------------------------------------------------- /src/postprocessing.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import PIL 4 | import numpy as np 5 | import pandas as pd 6 | from steppy.base import BaseTransformer 7 | 8 | from src.logging import LOGGER 9 | from .pipeline_config import CODES2NAMES, SOLUTION_CONFIG, params 10 | from .utils import visualize_bboxes 11 | 12 | 13 | class PredictionFormatter(BaseTransformer): 14 | def transform(self, images_data, results, decoder_dict): 15 | self.decoder_dict = decoder_dict 16 | image_ids = images_data['ImageID'].values.tolist() 17 | prediction_strings = [] 18 | for bboxes, labels, scores in results: 19 | prediction_strings.append(self._get_prediction_string(bboxes, labels, scores)) 20 | submission = pd.DataFrame({'ImageId': image_ids, 'PredictionString': prediction_strings}) 21 | return {'submission': submission} 22 | 23 | def _get_prediction_string(self, bboxes, labels, scores): 24 | prediction_list = [] 25 | for bbox, label, score in zip(bboxes, labels, scores): 26 | prediction_list.append(self._get_class_id(label)) 27 | prediction_list.append(str(score)) 28 | prediction_list.extend([str(coord) for coord in bbox]) 29 | prediction_string = " ".join(prediction_list) 30 | return prediction_string 31 | 32 | def _get_class_id(self, label): 33 | return self.decoder_dict[label] 34 | 35 | 36 | class Visualizer(BaseTransformer): 37 | def transform(self, images_data, results, decoder_dict): 38 | image_ids = images_data['ImageID'].values.tolist() 39 | decoder_dict = decoder_dict 40 | all_detections, all_boxes = [], [] 41 | for i, (image_id, detections) in enumerate(zip(image_ids, results)): 42 | if not bool(detections[0].size()): 43 | continue 44 | LOGGER.info("Drawing boxes on image {}/{}".format(i, len(results))) 45 | image = PIL.Image.open( 46 | os.path.join(SOLUTION_CONFIG['loader']['dataset_params']['images_dir'], image_id + '.jpg')) 47 | width, height = image.size # original image size 48 | box = detections[0].numpy() 49 | classes = detections[1].numpy() 50 | scores = detections[2].numpy() 51 | 52 | df = pd.DataFrame(np.column_stack([box, classes, scores])) 53 | df.columns = ['x1', 'y1', 'x2', 'y2', 'class_id', 'score'] 54 | df['class_name'] = df.class_id.map(decoder_dict) 55 | df.class_name = df.class_name.map(CODES2NAMES) 56 | 57 | # to absolute 58 | df['x1'] = df['x1'] * width 59 | df['x2'] = df['x2'] * width 60 | df['y1'] = df['y1'] * height 61 | df['y2'] = df['y2'] * height 62 | 63 | pil_image_detections = visualize_bboxes(image, df) 64 | all_detections.append(pil_image_detections) 65 | all_boxes.append(box) 66 | return all_detections 67 | -------------------------------------------------------------------------------- /configs/batch_6.yaml: -------------------------------------------------------------------------------- 1 | project: neptune-ml/Google-AI-Object-Detection-Challenge 2 | 3 | name: google-ai-object-detection 4 | tags: [solution-1, batch_6, retrain] 5 | 6 | metric: 7 | channel: 'MAP' 8 | goal: maximize 9 | 10 | #Comment out if not in Cloud Environment 11 | #pip-requirements-file: requirements.txt 12 | 13 | exclude: 14 | - .git 15 | - .idea 16 | - .ipynb_checkpoints 17 | - output 18 | - imgs 19 | - neptune.log 20 | - offline_job.log 21 | - notebooks 22 | 23 | parameters: 24 | # Data Paths 25 | train_imgs_dir: '' 26 | test_imgs_dir: '' 27 | annotations_filepath: '' 28 | annotations_human_labels_filepath: '' 29 | bbox_hierarchy_filepath: '' 30 | valid_ids_filepath: '' 31 | sample_submission: '' 32 | experiment_dir: '' 33 | class_mappings_filepath: '' 34 | metadata_filepath: '' 35 | clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment 36 | 37 | # Execution 38 | clean_experiment_directory_before_training: 0 39 | num_workers: 4 40 | num_threads: 100 41 | load_in_memory: 0 42 | pin_memory: 1 43 | default_valid_ids: 1 44 | loader_mode: resize 45 | stream_mode: 0 46 | validate_with_map: 1 47 | small_annotations_size: 20 48 | kaggle_message: 'solution-1' 49 | 50 | # General parameters 51 | sampler_name: 'aspect ratio' # from {'fixed', 'aspect ratio'} 52 | even_class_sampling: 1 53 | fixed_h: 512 54 | fixed_w: 512 55 | short_dim: 640 #512 56 | long_dim: 960 #896 57 | image_channels: 3 58 | pad_method: 'resize' 59 | use_suppression: 0 60 | max_annotation_per_class: 0 61 | desired_class_subset: "['Butterfly', 62 | 'Bee', 63 | 'Cattle', 64 | 'Desk', 65 | 'Bus', 66 | 'Picture frame', 67 | 'Rose', 68 | 'Truck', 69 | 'Wine glass', 70 | 'Train', 71 | 'Horse', 72 | 'Motorcycle', 73 | 'Balloon', 74 | 'Cat', 75 | 'Wine', 76 | 'Duck', 77 | 'Door', 78 | 'Airplane', 79 | 'Flowerpot', 80 | 'Houseplant', 81 | 'Shelf', 82 | 'Poster', 83 | 'Sunglasses', 84 | 'Drum', 85 | 'Guitar', 86 | 'Microphone', 87 | 'Dog', 88 | 'Flag']" 89 | 90 | # Retina parameters (multi-output) 91 | encoder_depth: 50 92 | num_classes: 100 93 | pretrained_encoder: 1 94 | pi: 0.01 95 | aspect_ratios: '[1/2., 1/1., 2/1.]' 96 | scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]' 97 | 98 | # Training schedule 99 | epochs_nr: 1000 100 | batch_size_train: 8 101 | batch_size_inference: 1 102 | lr: 0.00001 103 | momentum: 0.9 104 | gamma: 1.0 105 | patience: 100 106 | lr_factor: 0.3 107 | lr_patience: 30 108 | training_sample_size: 10000 109 | validation_sample_size: 2000 110 | 111 | # Regularization 112 | use_batch_norm: 1 113 | l2_reg_conv: 0.0001 114 | l2_reg_dense: 0.0 115 | dropout_conv: 0.1 116 | dropout_dense: 0.0 117 | 118 | # Postprocessing 119 | classification_threshold: 0.05 120 | nms_threshold: 0.5 121 | -------------------------------------------------------------------------------- /src/object_detection/dataset_tools/oid_hierarchical_labels_expansion_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for the OpenImages label expansion (OIDHierarchicalLabelsExpansion).""" 16 | 17 | 18 | 19 | 20 | 21 | import tensorflow as tf 22 | 23 | from object_detection.dataset_tools import oid_hierarchical_labels_expansion 24 | 25 | 26 | def create_test_data(): 27 | hierarchy = { 28 | 'LabelName': 29 | 'a', 30 | 'Subcategory': [{ 31 | 'LabelName': 'b' 32 | }, { 33 | 'LabelName': 'c', 34 | 'Subcategory': [{ 35 | 'LabelName': 'd' 36 | }, { 37 | 'LabelName': 'e' 38 | }] 39 | }, { 40 | 'LabelName': 'f', 41 | 'Subcategory': [{ 42 | 'LabelName': 'd' 43 | },] 44 | }] 45 | } 46 | bbox_rows = [ 47 | '123,xclick,b,1,0.1,0.2,0.1,0.2,1,1,0,0,0', 48 | '123,xclick,d,1,0.2,0.3,0.1,0.2,1,1,0,0,0' 49 | ] 50 | label_rows = [ 51 | '123,verification,b,0', '123,verification,c,0', '124,verification,d,1' 52 | ] 53 | return hierarchy, bbox_rows, label_rows 54 | 55 | 56 | class HierarchicalLabelsExpansionTest(tf.test.TestCase): 57 | 58 | def test_bbox_expansion(self): 59 | hierarchy, bbox_rows, _ = create_test_data() 60 | expansion_generator = ( 61 | oid_hierarchical_labels_expansion.OIDHierarchicalLabelsExpansion( 62 | hierarchy)) 63 | all_result_rows = [] 64 | for row in bbox_rows: 65 | all_result_rows.extend(expansion_generator.expand_boxes_from_csv(row)) 66 | self.assertItemsEqual([ 67 | '123,xclick,b,1,0.1,0.2,0.1,0.2,1,1,0,0,0', 68 | '123,xclick,d,1,0.2,0.3,0.1,0.2,1,1,0,0,0', 69 | '123,xclick,f,1,0.2,0.3,0.1,0.2,1,1,0,0,0', 70 | '123,xclick,c,1,0.2,0.3,0.1,0.2,1,1,0,0,0' 71 | ], all_result_rows) 72 | 73 | def test_labels_expansion(self): 74 | hierarchy, _, label_rows = create_test_data() 75 | expansion_generator = ( 76 | oid_hierarchical_labels_expansion.OIDHierarchicalLabelsExpansion( 77 | hierarchy)) 78 | all_result_rows = [] 79 | for row in label_rows: 80 | all_result_rows.extend(expansion_generator.expand_labels_from_csv(row)) 81 | self.assertItemsEqual([ 82 | '123,verification,b,0', '123,verification,c,0', '123,verification,d,0', 83 | '123,verification,e,0', '124,verification,d,1', '124,verification,f,1', 84 | '124,verification,c,1' 85 | ], all_result_rows) 86 | 87 | if __name__ == '__main__': 88 | tf.test.main() 89 | -------------------------------------------------------------------------------- /configs/neptune.yaml: -------------------------------------------------------------------------------- 1 | project: USERNAME/googleai-object-detection 2 | 3 | name: Google AI object detection 4 | tags: [solution-1] 5 | 6 | metric: 7 | channel: 'MAP' 8 | goal: maximize 9 | 10 | #Comment out if not in Cloud Environment 11 | pip-requirements-file: requirements.txt # Comment out if Local execution 12 | 13 | exclude: 14 | - .git 15 | - .idea 16 | - .ipynb_checkpoints 17 | - output 18 | - imgs 19 | - neptune.log 20 | - offline_job.log 21 | - notebooks 22 | - src/object_detection 23 | 24 | parameters: 25 | # Data Paths 26 | train_imgs_dir: /public/datasets/open-images-dataset-v4/bounding-boxes/train 27 | test_imgs_dir: /public/datasets/open-images-dataset-v4/bounding-boxes/test_challenge_2018 28 | annotations_filepath: /public/challenges/google-ai-open-images-object-detection-track/annotations/challenge-2018-train-annotations-bbox.csv 29 | annotations_human_labels_filepath: /public/challenges/google-ai-open-images-object-detection-track/annotations/challenge-2018-train-annotations-human-imagelabels.csv 30 | bbox_hierarchy_filepath: /public/challenges/google-ai-open-images-object-detection-track/metadata/bbox_labels_500_hierarchy.json 31 | class_mappings_filepath: /public/challenges/google-ai-open-images-object-detection-track/metadata/challenge-2018-class-descriptions-500.csv 32 | valid_ids_filepath: /public/challenges/google-ai-open-images-object-detection-track/metadata/challenge-2018-image-ids-valset-od.csv 33 | sample_submission: /public/challenges/google-ai-open-images-object-detection-track/sample_submission.csv 34 | experiment_dir: /output/experiment 35 | clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment 36 | 37 | # Execution 38 | clean_experiment_directory_before_training: 1 39 | num_workers: 4 40 | num_threads: 4 41 | load_in_memory: 0 42 | pin_memory: 1 43 | default_valid_ids: 1 44 | loader_mode: resize 45 | stream_mode: 0 46 | validate_with_map: 1 47 | small_annotations_size: 20 48 | kaggle_message: 'solution-1' 49 | 50 | # General parameters 51 | sampler_name: 'fixed' # from {'fixed', 'aspect ratio'} 52 | even_class_sampling: 1 53 | fixed_h: 512 54 | fixed_w: 512 55 | short_dim: 400 56 | long_dim: 600 57 | image_channels: 3 58 | pad_method: 'resize' 59 | use_suppression: 1 60 | max_annotation_per_class: 75000 61 | desired_class_subset: "['Poster', 'Cat', 'Train', 'Dog', 'Bus','Truck', 'Picture frame', 'Airplane', 'Sculpture', 'Motorcycle']" 62 | 63 | # Retina parameters (multi-output) 64 | encoder_depth: 50 65 | num_classes: 10 66 | pretrained_encoder: 1 67 | pi: 0.01 68 | aspect_ratios: '[1/2., 1/1., 2/1.]' 69 | scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]' 70 | 71 | # Training schedule 72 | epochs_nr: 100 73 | batch_size_train: 8 74 | batch_size_inference: 8 75 | lr: 0.00001 76 | momentum: 0.9 77 | gamma: 1.0 78 | patience: 30 79 | lr_factor: 0.3 80 | lr_patience: 30 81 | training_sample_size: 10000 82 | validation_sample_size: 1000 83 | 84 | # Regularization 85 | use_batch_norm: 1 86 | l2_reg_conv: 0.0001 87 | l2_reg_dense: 0.0 88 | dropout_conv: 0.1 89 | dropout_dense: 0.0 90 | 91 | # Postprocessing 92 | classification_threshold: 0.05 93 | nms_threshold: 0.5 94 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at contact@neptune.ml. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /src/object_detection/utils/test_utils_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.utils.test_utils.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from object_detection.utils import test_utils 22 | 23 | 24 | class TestUtilsTest(tf.test.TestCase): 25 | 26 | def test_diagonal_gradient_image(self): 27 | """Tests if a good pyramid image is created.""" 28 | pyramid_image = test_utils.create_diagonal_gradient_image(3, 4, 2) 29 | 30 | # Test which is easy to understand. 31 | expected_first_channel = np.array([[3, 2, 1, 0], 32 | [4, 3, 2, 1], 33 | [5, 4, 3, 2]], dtype=np.float32) 34 | self.assertAllEqual(np.squeeze(pyramid_image[:, :, 0]), 35 | expected_first_channel) 36 | 37 | # Actual test. 38 | expected_image = np.array([[[3, 30], 39 | [2, 20], 40 | [1, 10], 41 | [0, 0]], 42 | [[4, 40], 43 | [3, 30], 44 | [2, 20], 45 | [1, 10]], 46 | [[5, 50], 47 | [4, 40], 48 | [3, 30], 49 | [2, 20]]], dtype=np.float32) 50 | 51 | self.assertAllEqual(pyramid_image, expected_image) 52 | 53 | def test_random_boxes(self): 54 | """Tests if valid random boxes are created.""" 55 | num_boxes = 1000 56 | max_height = 3 57 | max_width = 5 58 | boxes = test_utils.create_random_boxes(num_boxes, 59 | max_height, 60 | max_width) 61 | 62 | true_column = np.ones(shape=(num_boxes)) == 1 63 | self.assertAllEqual(boxes[:, 0] < boxes[:, 2], true_column) 64 | self.assertAllEqual(boxes[:, 1] < boxes[:, 3], true_column) 65 | 66 | self.assertTrue(boxes[:, 0].min() >= 0) 67 | self.assertTrue(boxes[:, 1].min() >= 0) 68 | self.assertTrue(boxes[:, 2].max() <= max_height) 69 | self.assertTrue(boxes[:, 3].max() <= max_width) 70 | 71 | def test_first_rows_close_as_set(self): 72 | a = [1, 2, 3, 0, 0] 73 | b = [3, 2, 1, 0, 0] 74 | k = 3 75 | self.assertTrue(test_utils.first_rows_close_as_set(a, b, k)) 76 | 77 | a = [[1, 2], [1, 4], [0, 0]] 78 | b = [[1, 4 + 1e-9], [1, 2], [0, 0]] 79 | k = 2 80 | self.assertTrue(test_utils.first_rows_close_as_set(a, b, k)) 81 | 82 | a = [[1, 2], [1, 4], [0, 0]] 83 | b = [[1, 4 + 1e-9], [2, 2], [0, 0]] 84 | k = 2 85 | self.assertFalse(test_utils.first_rows_close_as_set(a, b, k)) 86 | 87 | 88 | if __name__ == '__main__': 89 | tf.test.main() 90 | -------------------------------------------------------------------------------- /configs/batch_1.yaml: -------------------------------------------------------------------------------- 1 | project: neptune-ml/Google-AI-Object-Detection-Challenge 2 | 3 | name: google-ai-object-detection 4 | tags: [solution-1, batch_1, retrain] 5 | 6 | metric: 7 | channel: 'MAP' 8 | goal: maximize 9 | 10 | #Comment out if not in Cloud Environment 11 | #pip-requirements-file: requirements.txt 12 | 13 | exclude: 14 | - .git 15 | - .idea 16 | - .ipynb_checkpoints 17 | - output 18 | - imgs 19 | - neptune.log 20 | - offline_job.log 21 | - notebooks 22 | 23 | parameters: 24 | # Data Paths 25 | train_imgs_dir: '/mnt/ml-team/open-images-v4/bounding-boxes/train/' 26 | test_imgs_dir: '/mnt/ml-team/open-images-v4/bounding-boxes/test_challenge_2018/' 27 | annotations_filepath: '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/data/annotations/challenge-2018-train-annotations-bbox.csv' 28 | annotations_human_labels_filepath: '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/data/annotations/challenge-2018-train-annotations-human-imagelabels.csv' 29 | bbox_hierarchy_filepath: '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/data/metadata/bbox_labels_500_hierarchy.json' 30 | valid_ids_filepath: '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/data/metadata/challenge-2018-image-ids-valset-od.csv' 31 | experiment_dir: '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/kuba/experiments/batch_1' 32 | class_mappings_filepath: '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/data/metadata/challenge-2018-class-descriptions-500.csv' 33 | metadata_filepath: '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/files/metadata.csv' 34 | sample_submission: '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/data/sample_submission.csv' 35 | clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment 36 | 37 | # Execution 38 | clean_experiment_directory_before_training: 0 39 | num_workers: 4 40 | num_threads: 100 41 | load_in_memory: 0 42 | pin_memory: 1 43 | default_valid_ids: 1 44 | loader_mode: resize 45 | stream_mode: 0 46 | validate_with_map: 1 47 | small_annotations_size: 20 48 | kaggle_message: 'solution-1' 49 | 50 | # General parameters 51 | sampler_name: 'aspect ratio' # from {'fixed', 'aspect ratio'} 52 | even_class_sampling: 1 53 | fixed_h: 512 54 | fixed_w: 512 55 | short_dim: 640 #512 56 | long_dim: 960 #896 57 | image_channels: 3 58 | pad_method: 'resize' 59 | use_suppression: 0 60 | max_annotation_per_class: 1000000 61 | desired_class_subset: "['Pressure cooker', 62 | 'Torch', 63 | 'Winter melon', 64 | 'Spatula', 65 | 'Toaster', 66 | 'Measuring cup', 67 | 'Ring binder', 68 | 'Screwdriver', 69 | 'Flashlight', 70 | 'Light switch']" 71 | 72 | # Retina parameters (multi-output) 73 | encoder_depth: 50 74 | num_classes: 100 75 | pretrained_encoder: 1 76 | pi: 0.01 77 | aspect_ratios: '[1/2., 1/1., 2/1.]' 78 | scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]' 79 | 80 | # Training schedule 81 | epochs_nr: 1000 82 | batch_size_train: 8 83 | batch_size_inference: 1 84 | lr: 0.00001 85 | momentum: 0.9 86 | gamma: 1.0 87 | patience: 100 88 | lr_factor: 0.3 89 | lr_patience: 30 90 | training_sample_size: 10000 91 | validation_sample_size: 2000 92 | 93 | # Regularization 94 | use_batch_norm: 1 95 | l2_reg_conv: 0.0001 96 | l2_reg_dense: 0.0 97 | dropout_conv: 0.1 98 | dropout_dense: 0.0 99 | 100 | # Postprocessing 101 | classification_threshold: 0.05 102 | nms_threshold: 0.5 103 | -------------------------------------------------------------------------------- /configs/batch_5.yaml: -------------------------------------------------------------------------------- 1 | project: neptune-ml/Google-AI-Object-Detection-Challenge 2 | 3 | name: google-ai-object-detection 4 | tags: [solution-1, batch-5, retrain] 5 | 6 | metric: 7 | channel: 'MAP' 8 | goal: maximize 9 | 10 | #Comment out if not in Cloud Environment 11 | #pip-requirements-file: requirements.txt 12 | 13 | exclude: 14 | - .git 15 | - .idea 16 | - .ipynb_checkpoints 17 | - output 18 | - imgs 19 | - neptune.log 20 | - offline_job.log 21 | - notebooks 22 | 23 | parameters: 24 | # Data Paths 25 | train_imgs_dir: '' 26 | test_imgs_dir: '' 27 | annotations_filepath: '' 28 | annotations_human_labels_filepath: '' 29 | bbox_hierarchy_filepath: '' 30 | valid_ids_filepath: '' 31 | sample_submission: '' 32 | experiment_dir: '' 33 | class_mappings_filepath: '' 34 | metadata_filepath: '' 35 | clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment 36 | 37 | # Execution 38 | clean_experiment_directory_before_training: 0 39 | num_workers: 4 40 | num_threads: 100 41 | load_in_memory: 0 42 | pin_memory: 1 43 | default_valid_ids: 1 44 | loader_mode: resize 45 | stream_mode: 0 46 | validate_with_map: 1 47 | small_annotations_size: 20 48 | kaggle_message: 'solution-1' 49 | 50 | # General parameters 51 | sampler_name: 'aspect ratio' # from {'fixed', 'aspect ratio'} 52 | even_class_sampling: 1 53 | fixed_h: 512 54 | fixed_w: 512 55 | short_dim: 640 #512 56 | long_dim: 960 #896 57 | image_channels: 3 58 | pad_method: 'resize' 59 | use_suppression: 0 60 | max_annotation_per_class: 0 61 | desired_class_subset: "['Helicopter', 62 | 'Monkey', 63 | 'Coin', 64 | 'Chicken', 65 | 'Salad', 66 | 'Countertop', 67 | 'Elephant', 68 | 'Sheep', 69 | 'Platter', 70 | 'Ski', 71 | 'Pillow', 72 | 'Lamp', 73 | 'Lifejacket', 74 | 'Fountain', 75 | 'Television', 76 | 'Bread', 77 | 'Porch', 78 | 'Deer', 79 | 'Swimming pool', 80 | 'Apple', 81 | 'Cookie', 82 | 'Penguin', 83 | 'Taxi', 84 | 'Christmas tree', 85 | 'Castle', 86 | 'Drawer', 87 | 'Cocktail', 88 | 'Mushroom', 89 | 'Bowl', 90 | 'Swan', 91 | 'Computer keyboard', 92 | 'Canoe', 93 | 'Muffin', 94 | 'Curtain', 95 | 'Maple', 96 | 'Office building', 97 | 'Football', 98 | 'Bookcase', 99 | 'Coffee table', 100 | 'Coffee cup', 101 | 'Box', 102 | 'Plate', 103 | 'Lantern', 104 | 'Cake', 105 | 'Candle', 106 | 'Stairs', 107 | 'Computer monitor', 108 | 'Pumpkin', 109 | 'Orange', 110 | 'Tomato', 111 | 'Mobile phone', 112 | 'Camera', 113 | 'Doll', 114 | 'Lavender', 115 | 'Sunflower', 116 | 'Tent', 117 | 'Paddle', 118 | 'Traffic light', 119 | 'Van', 120 | 'Vehicle registration plate', 121 | 'Strawberry', 122 | 'Goose', 123 | 'Cabinetry', 124 | 'Laptop', 125 | 'Beer', 126 | 'Goggles', 127 | 'Billboard']" 128 | 129 | # Retina parameters (multi-output) 130 | encoder_depth: 50 131 | num_classes: 100 132 | pretrained_encoder: 1 133 | pi: 0.01 134 | aspect_ratios: '[1/2., 1/1., 2/1.]' 135 | scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]' 136 | 137 | # Training schedule 138 | epochs_nr: 1000 139 | batch_size_train: 8 140 | batch_size_inference: 1 141 | lr: 0.00001 142 | momentum: 0.9 143 | gamma: 1.0 144 | patience: 100 145 | lr_factor: 0.3 146 | lr_patience: 30 147 | training_sample_size: 10000 148 | validation_sample_size: 2000 149 | 150 | # Regularization 151 | use_batch_norm: 1 152 | l2_reg_conv: 0.0001 153 | l2_reg_dense: 0.0 154 | dropout_conv: 0.1 155 | dropout_dense: 0.0 156 | 157 | # Postprocessing 158 | classification_threshold: 0.05 159 | nms_threshold: 0.5 160 | -------------------------------------------------------------------------------- /src/object_detection/utils/np_box_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Operations for [N, 4] numpy arrays representing bounding boxes. 17 | 18 | Example box operations that are supported: 19 | * Areas: compute bounding box areas 20 | * IOU: pairwise intersection-over-union scores 21 | """ 22 | import numpy as np 23 | 24 | 25 | def area(boxes): 26 | """Computes area of boxes. 27 | 28 | Args: 29 | boxes: Numpy array with shape [N, 4] holding N boxes 30 | 31 | Returns: 32 | a numpy array with shape [N*1] representing box areas 33 | """ 34 | return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) 35 | 36 | 37 | def intersection(boxes1, boxes2): 38 | """Compute pairwise intersection areas between boxes. 39 | 40 | Args: 41 | boxes1: a numpy array with shape [N, 4] holding N boxes 42 | boxes2: a numpy array with shape [M, 4] holding M boxes 43 | 44 | Returns: 45 | a numpy array with shape [N*M] representing pairwise intersection area 46 | """ 47 | [y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1) 48 | [y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1) 49 | 50 | all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2)) 51 | all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2)) 52 | intersect_heights = np.maximum( 53 | np.zeros(all_pairs_max_ymin.shape), 54 | all_pairs_min_ymax - all_pairs_max_ymin) 55 | all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2)) 56 | all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2)) 57 | intersect_widths = np.maximum( 58 | np.zeros(all_pairs_max_xmin.shape), 59 | all_pairs_min_xmax - all_pairs_max_xmin) 60 | return intersect_heights * intersect_widths 61 | 62 | 63 | def iou(boxes1, boxes2): 64 | """Computes pairwise intersection-over-union between box collections. 65 | 66 | Args: 67 | boxes1: a numpy array with shape [N, 4] holding N boxes. 68 | boxes2: a numpy array with shape [M, 4] holding N boxes. 69 | 70 | Returns: 71 | a numpy array with shape [N, M] representing pairwise iou scores. 72 | """ 73 | intersect = intersection(boxes1, boxes2) 74 | area1 = area(boxes1) 75 | area2 = area(boxes2) 76 | union = np.expand_dims(area1, axis=1) + np.expand_dims( 77 | area2, axis=0) - intersect 78 | return intersect / union 79 | 80 | 81 | def ioa(boxes1, boxes2): 82 | """Computes pairwise intersection-over-area between box collections. 83 | 84 | Intersection-over-area (ioa) between two boxes box1 and box2 is defined as 85 | their intersection area over box2's area. Note that ioa is not symmetric, 86 | that is, IOA(box1, box2) != IOA(box2, box1). 87 | 88 | Args: 89 | boxes1: a numpy array with shape [N, 4] holding N boxes. 90 | boxes2: a numpy array with shape [M, 4] holding N boxes. 91 | 92 | Returns: 93 | a numpy array with shape [N, M] representing pairwise ioa scores. 94 | """ 95 | intersect = intersection(boxes1, boxes2) 96 | areas = np.expand_dims(area(boxes2), axis=0) 97 | return intersect / areas 98 | -------------------------------------------------------------------------------- /configs/batch_8.yaml: -------------------------------------------------------------------------------- 1 | project: neptune-ml/Google-AI-Object-Detection-Challenge 2 | 3 | name: google-ai-object-detection 4 | tags: [solution-1, batch-8, eval] 5 | 6 | metric: 7 | channel: 'MAP' 8 | goal: maximize 9 | 10 | #Comment out if not in Cloud Environment 11 | #pip-requirements-file: requirements.txt 12 | 13 | exclude: 14 | - .git 15 | - .idea 16 | - .ipynb_checkpoints 17 | - output 18 | - imgs 19 | - neptune.log 20 | - offline_job.log 21 | - notebooks 22 | 23 | parameters: 24 | # Data Paths 25 | train_imgs_dir: '' 26 | test_imgs_dir: '' 27 | annotations_filepath: '' 28 | annotations_human_labels_filepath: '' 29 | bbox_hierarchy_filepath: '' 30 | valid_ids_filepath: '' 31 | sample_submission: '' 32 | experiment_dir: '' 33 | class_mappings_filepath: '' 34 | metadata_filepath: '' 35 | clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment 36 | 37 | # Execution 38 | clean_experiment_directory_before_training: 0 39 | num_workers: 4 40 | num_threads: 100 41 | load_in_memory: 0 42 | pin_memory: 1 43 | default_valid_ids: 1 44 | loader_mode: resize 45 | stream_mode: 0 46 | validate_with_map: 1 47 | small_annotations_size: 20 48 | kaggle_message: 'solution-1' 49 | 50 | # General parameters 51 | sampler_name: 'aspect ratio' # from {'fixed', 'aspect ratio'} 52 | even_class_sampling: 1 53 | fixed_h: 512 54 | fixed_w: 512 55 | short_dim: 640 #512 56 | long_dim: 960 #896 57 | image_channels: 3 58 | pad_method: 'resize' 59 | use_suppression: 0 60 | max_annotation_per_class: 1000000 61 | desired_class_subset: "['Roller skates', 62 | 'Toy', 63 | 'Boat', 64 | 'Jeans', 65 | 'Fedora', 66 | 'Man', 67 | 'Invertebrate', 68 | 'Personal care', 69 | 'Marine mammal', 70 | 'Weapon', 71 | 'Ball', 72 | 'Cowboy hat', 73 | 'Marine invertebrates', 74 | 'Turtle', 75 | 'Boy', 76 | 'Fish', 77 | 'Suitcase', 78 | 'Tableware', 79 | 'High heels', 80 | 'Traffic sign', 81 | 'Plumbing fixture', 82 | 'Squash', 83 | 'Girl', 84 | 'Insect', 85 | 'Sandwich', 86 | 'Telephone', 87 | 'Fruit', 88 | 'Baseball glove', 89 | 'Clock', 90 | 'Sun hat', 91 | 'Shellfish', 92 | 'Sculpture', 93 | 'Land vehicle', 94 | 'Flower', 95 | 'Seafood', 96 | 'Building', 97 | 'Aircraft', 98 | 'Watercraft', 99 | 'Football helmet', 100 | 'Musical instrument', 101 | 'Table', 102 | 'Racket', 103 | 'Miniskirt', 104 | 'Moths and butterflies', 105 | 'Furniture', 106 | 'Sandal', 107 | 'Boot', 108 | 'Bicycle helmet', 109 | 'Car', 110 | 'Bird', 111 | 'Bed', 112 | 'Couch', 113 | 'Backpack', 114 | 'Carnivore', 115 | 'Animal', 116 | 'Woman', 117 | 'Drink', 118 | 'Briefcase', 119 | 'Vegetable', 120 | 'Home appliance', 121 | 'Kitchen appliance', 122 | 'Tree', 123 | 'Office supplies', 124 | 'Bear', 125 | 'Beetle', 126 | 'Sombrero', 127 | 'Dessert', 128 | 'Vehicle', 129 | 'Reptile', 130 | 'Handbag']" 131 | 132 | # Retina parameters (multi-output) 133 | encoder_depth: 50 134 | num_classes: 100 135 | pretrained_encoder: 1 136 | pi: 0.01 137 | aspect_ratios: '[1/2., 1/1., 2/1.]' 138 | scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]' 139 | 140 | # Training schedule 141 | epochs_nr: 1000 142 | batch_size_train: 8 143 | batch_size_inference: 1 144 | lr: 0.00001 145 | momentum: 0.9 146 | gamma: 1.0 147 | patience: 100 148 | lr_factor: 0.3 149 | lr_patience: 30 150 | training_sample_size: 10000 151 | validation_sample_size: 2000 152 | 153 | # Regularization 154 | use_batch_norm: 1 155 | l2_reg_conv: 0.0001 156 | l2_reg_dense: 0.0 157 | dropout_conv: 0.1 158 | dropout_dense: 0.0 159 | 160 | # Postprocessing 161 | classification_threshold: 0.05 162 | nms_threshold: 0.5 163 | -------------------------------------------------------------------------------- /src/object_detection/utils/np_mask_ops_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.np_mask_ops.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from object_detection.utils import np_mask_ops 22 | 23 | 24 | class MaskOpsTests(tf.test.TestCase): 25 | 26 | def setUp(self): 27 | masks1_0 = np.array([[0, 0, 0, 0, 0, 0, 0, 0], 28 | [0, 0, 0, 0, 0, 0, 0, 0], 29 | [0, 0, 0, 0, 0, 0, 0, 0], 30 | [1, 1, 1, 1, 0, 0, 0, 0], 31 | [1, 1, 1, 1, 0, 0, 0, 0]], 32 | dtype=np.uint8) 33 | masks1_1 = np.array([[1, 1, 1, 1, 1, 1, 1, 1], 34 | [1, 1, 0, 0, 0, 0, 0, 0], 35 | [0, 0, 0, 0, 0, 0, 0, 0], 36 | [0, 0, 0, 0, 0, 0, 0, 0], 37 | [0, 0, 0, 0, 0, 0, 0, 0]], 38 | dtype=np.uint8) 39 | masks1 = np.stack([masks1_0, masks1_1]) 40 | masks2_0 = np.array([[0, 0, 0, 0, 0, 0, 0, 0], 41 | [0, 0, 0, 0, 0, 0, 0, 0], 42 | [0, 0, 0, 0, 0, 0, 0, 0], 43 | [1, 1, 1, 1, 0, 0, 0, 0], 44 | [1, 1, 1, 1, 0, 0, 0, 0]], 45 | dtype=np.uint8) 46 | masks2_1 = np.array([[1, 1, 1, 1, 1, 1, 1, 0], 47 | [1, 1, 1, 1, 1, 0, 0, 0], 48 | [1, 1, 1, 0, 0, 0, 0, 0], 49 | [0, 0, 0, 0, 0, 0, 0, 0], 50 | [0, 0, 0, 0, 0, 0, 0, 0]], 51 | dtype=np.uint8) 52 | masks2_2 = np.array([[1, 1, 1, 1, 1, 0, 0, 0], 53 | [1, 1, 1, 1, 1, 0, 0, 0], 54 | [1, 1, 1, 1, 1, 0, 0, 0], 55 | [1, 1, 1, 1, 1, 0, 0, 0], 56 | [1, 1, 1, 1, 1, 0, 0, 0]], 57 | dtype=np.uint8) 58 | masks2 = np.stack([masks2_0, masks2_1, masks2_2]) 59 | self.masks1 = masks1 60 | self.masks2 = masks2 61 | 62 | def testArea(self): 63 | areas = np_mask_ops.area(self.masks1) 64 | expected_areas = np.array([8.0, 10.0], dtype=np.float32) 65 | self.assertAllClose(expected_areas, areas) 66 | 67 | def testIntersection(self): 68 | intersection = np_mask_ops.intersection(self.masks1, self.masks2) 69 | expected_intersection = np.array( 70 | [[8.0, 0.0, 8.0], [0.0, 9.0, 7.0]], dtype=np.float32) 71 | self.assertAllClose(intersection, expected_intersection) 72 | 73 | def testIOU(self): 74 | iou = np_mask_ops.iou(self.masks1, self.masks2) 75 | expected_iou = np.array( 76 | [[1.0, 0.0, 8.0/25.0], [0.0, 9.0 / 16.0, 7.0 / 28.0]], dtype=np.float32) 77 | self.assertAllClose(iou, expected_iou) 78 | 79 | def testIOA(self): 80 | ioa21 = np_mask_ops.ioa(self.masks1, self.masks2) 81 | expected_ioa21 = np.array([[1.0, 0.0, 8.0/25.0], 82 | [0.0, 9.0/15.0, 7.0/25.0]], 83 | dtype=np.float32) 84 | self.assertAllClose(ioa21, expected_ioa21) 85 | 86 | 87 | if __name__ == '__main__': 88 | tf.test.main() 89 | -------------------------------------------------------------------------------- /configs/batch_3.yaml: -------------------------------------------------------------------------------- 1 | project: neptune-ml/Google-Ai-Object-Detection-Challenge 2 | 3 | name: google-ai-object-detection 4 | tags: [solution-1, batch-3] 5 | 6 | metric: 7 | channel: 'MAP' 8 | goal: maximize 9 | 10 | #Comment out if not in Cloud Environment 11 | #pip-requirements-file: requirements.txt 12 | 13 | exclude: 14 | - .git 15 | - .idea 16 | - .ipynb_checkpoints 17 | - output 18 | - imgs 19 | - neptune.log 20 | - offline_job.log 21 | - notebooks 22 | 23 | parameters: 24 | # Data Paths 25 | train_imgs_dir: '' 26 | test_imgs_dir: '' 27 | annotations_filepath: '' 28 | annotations_human_labels_filepath: '' 29 | bbox_hierarchy_filepath: '' 30 | valid_ids_filepath: '' 31 | sample_submission: '' 32 | experiment_dir: '' 33 | class_mappings_filepath: '' 34 | metadata_filepath: '' 35 | clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment 36 | 37 | # Execution 38 | clean_experiment_directory_before_training: 0 39 | num_workers: 4 40 | num_threads: 100 41 | load_in_memory: 0 42 | pin_memory: 1 43 | default_valid_ids: 1 44 | loader_mode: resize 45 | stream_mode: 0 46 | validate_with_map: 1 47 | small_annotations_size: 20 48 | kaggle_message: 'solution-1' 49 | 50 | # General parameters 51 | sampler_name: 'aspect ratio' # from {'fixed', 'aspect ratio'} 52 | even_class_sampling: 1 53 | fixed_h: 512 54 | fixed_w: 512 55 | short_dim: 640 #512 56 | long_dim: 960 #896 57 | image_channels: 3 58 | pad_method: 'resize' 59 | use_suppression: 0 60 | max_annotation_per_class: 0 61 | desired_class_subset: "['Tennis ball', 62 | 'Beehive', 63 | 'Woodpecker', 64 | 'Scoreboard', 65 | 'Gas stove', 66 | 'Jet ski', 67 | 'Bathtub', 68 | 'Hamster', 69 | 'Segway', 70 | 'Fox', 71 | 'Raven', 72 | 'Sword', 73 | 'Window blind', 74 | 'Shotgun', 75 | 'Jaguar', 76 | 'Jug', 77 | 'Refrigerator', 78 | 'Bow and arrow', 79 | 'Golf cart', 80 | 'Lobster', 81 | 'Potato', 82 | 'Missile', 83 | 'Chopsticks', 84 | 'Shark', 85 | 'Loveseat', 86 | 'Teapot', 87 | 'Oven', 88 | 'Starfish', 89 | 'Bagel', 90 | 'Ostrich', 91 | 'Brown bear', 92 | 'Washing machine', 93 | 'Bat', 94 | 'Kettle', 95 | 'Pineapple', 96 | 'Volleyball', 97 | 'Polar bear', 98 | 'Taco', 99 | 'Pomegranate', 100 | 'Mechanical fan', 101 | 'Radish', 102 | 'Waffle', 103 | 'Fireplace', 104 | 'Dice', 105 | 'Cheetah', 106 | 'Rhinoceros', 107 | 'Handgun', 108 | 'Wok', 109 | 'Turkey', 110 | 'Ladybug', 111 | 'Willow', 112 | 'Door handle', 113 | 'Otter', 114 | 'Peach', 115 | 'Pasta', 116 | 'Snowman', 117 | 'Pancake', 118 | 'Kangaroo', 119 | 'Bell pepper', 120 | 'Leopard', 121 | 'Alpaca', 122 | 'Watermelon', 123 | 'Table tennis racket', 124 | 'Knife', 125 | 'Mouse', 126 | 'Coconut', 127 | 'Caterpillar', 128 | 'Billiard table', 129 | 'Rocket', 130 | 'Pear', 131 | 'Ant', 132 | 'Doughnut', 133 | 'Snail', 134 | 'Snowboard', 135 | 'Trombone', 136 | 'Accordion', 137 | 'Tart', 138 | 'Tablet computer', 139 | 'Barge', 140 | 'Tennis racket', 141 | 'Plastic bag', 142 | 'Ladder']" 143 | 144 | # Retina parameters (multi-output) 145 | encoder_depth: 50 146 | num_classes: 100 147 | pretrained_encoder: 1 148 | pi: 0.01 149 | aspect_ratios: '[1/2., 1/1., 2/1.]' 150 | scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]' 151 | 152 | # Training schedule 153 | epochs_nr: 1000 154 | batch_size_train: 8 155 | batch_size_inference: 1 156 | lr: 0.00001 157 | momentum: 0.9 158 | gamma: 1.0 159 | patience: 100 160 | lr_factor: 0.3 161 | lr_patience: 30 162 | training_sample_size: 10000 163 | validation_sample_size: 2000 164 | 165 | # Regularization 166 | use_batch_norm: 1 167 | l2_reg_conv: 0.0001 168 | l2_reg_dense: 0.0 169 | dropout_conv: 0.1 170 | dropout_dense: 0.0 171 | 172 | # Postprocessing 173 | classification_threshold: 0.05 174 | nms_threshold: 0.5 175 | -------------------------------------------------------------------------------- /configs/batch_2.yaml: -------------------------------------------------------------------------------- 1 | project: neptune-ml/Google-Ai-Object-Detection-Challenge 2 | 3 | name: google AI object detection 4 | tags: [solution-1, batch-2, eval] 5 | 6 | metric: 7 | channel: 'MAP' 8 | goal: maximize 9 | 10 | #Comment out if not in Cloud Environment 11 | #pip-requirements-file: requirements.txt 12 | 13 | exclude: 14 | - .git 15 | - .idea 16 | - .ipynb_checkpoints 17 | - output 18 | - imgs 19 | - neptune.log 20 | - offline_job.log 21 | - notebooks 22 | 23 | parameters: 24 | # Data Paths 25 | train_imgs_dir: '' 26 | test_imgs_dir: '' 27 | annotations_filepath: '' 28 | annotations_human_labels_filepath: '' 29 | bbox_hierarchy_filepath: '' 30 | valid_ids_filepath: '' 31 | sample_submission: '' 32 | experiment_dir: '' 33 | class_mappings_filepath: '' 34 | metadata_filepath: '' 35 | clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment 36 | 37 | # Execution 38 | clean_experiment_directory_before_training: 0 39 | num_workers: 4 40 | num_threads: 100 41 | load_in_memory: 0 42 | pin_memory: 1 43 | default_valid_ids: 1 44 | loader_mode: resize 45 | stream_mode: 0 46 | validate_with_map: 1 47 | small_annotations_size: 20 48 | kaggle_message: 'solution-1' 49 | 50 | # General parameters 51 | sampler_name: 'aspect ratio' # from {'fixed', 'aspect ratio'} 52 | even_class_sampling: 1 53 | fixed_h: 512 54 | fixed_w: 512 55 | short_dim: 640 #512 56 | long_dim: 960 #896 57 | image_channels: 3 58 | pad_method: 'resize' 59 | use_suppression: 0 60 | max_annotation_per_class: 0 61 | desired_class_subset: "['Serving tray', 62 | 'Binoculars', 63 | 'Slow cooker', 64 | 'Cricket ball', 65 | 'Tick', 66 | 'Crutch', 67 | 'Oboe', 68 | 'Beaker', 69 | 'Alarm clock', 70 | 'Stretcher', 71 | 'Envelope', 72 | 'Salt and pepper shakers', 73 | 'Food processor', 74 | 'Bench', 75 | 'Digital clock', 76 | 'Wrench', 77 | 'Paper towel', 78 | 'Harpsichord', 79 | 'Cutting board', 80 | 'Mixer', 81 | 'Guacamole', 82 | 'Porcupine', 83 | 'Harp', 84 | 'Blender', 85 | 'Shower', 86 | 'Lynx', 87 | 'Treadmill', 88 | 'Ruler', 89 | 'Adhesive tape', 90 | 'Blue jay', 91 | 'Burrito', 92 | 'Printer', 93 | 'Dog bed', 94 | 'Submarine sandwich', 95 | 'Centipede', 96 | 'Power plugs and sockets', 97 | 'Drinking straw', 98 | 'Rugby ball', 99 | 'Pretzel', 100 | 'Wood-burning stove', 101 | 'Snowplow', 102 | 'Seahorse', 103 | 'Common fig', 104 | 'Coffeemaker', 105 | 'Punching bag', 106 | 'Cake stand', 107 | 'Towel', 108 | 'Stationary bicycle', 109 | 'Pitcher', 110 | 'Kitchen knife', 111 | 'Bathroom cabinet', 112 | 'Flute', 113 | 'Popcorn', 114 | 'Limousine', 115 | 'Snowmobile', 116 | 'Dagger', 117 | 'Filing cabinet', 118 | 'Artichoke', 119 | 'Toilet paper', 120 | 'Frying pan', 121 | 'Raccoon', 122 | 'Honeycomb', 123 | 'Canary', 124 | 'Asparagus', 125 | 'Stop sign', 126 | 'Organ', 127 | 'Scissors', 128 | 'Dumbbell', 129 | 'Picnic basket', 130 | 'Mango', 131 | 'Fire hydrant', 132 | 'Corded phone', 133 | 'Golf ball', 134 | 'Cabbage', 135 | 'Bidet', 136 | 'Croissant', 137 | 'Ambulance', 138 | 'Sewing machine', 139 | 'Seat belt', 140 | 'Infant bed', 141 | 'Ceiling fan', 142 | 'Hot dog', 143 | 'Microwave oven', 144 | 'Nail']" 145 | 146 | # Retina parameters (multi-output) 147 | encoder_depth: 50 148 | num_classes: 100 149 | pretrained_encoder: 1 150 | pi: 0.01 151 | aspect_ratios: '[1/2., 1/1., 2/1.]' 152 | scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]' 153 | 154 | # Training schedule 155 | epochs_nr: 1000 156 | batch_size_train: 8 157 | batch_size_inference: 8 158 | lr: 0.00001 159 | momentum: 0.9 160 | gamma: 1.0 161 | patience: 100 162 | lr_factor: 0.3 163 | lr_patience: 30 164 | training_sample_size: 10000 165 | validation_sample_size: 2000 166 | 167 | # Regularization 168 | use_batch_norm: 1 169 | l2_reg_conv: 0.0001 170 | l2_reg_dense: 0.0 171 | dropout_conv: 0.1 172 | dropout_dense: 0.0 173 | 174 | # Postprocessing 175 | classification_threshold: 0.05 176 | nms_threshold: 0.5 177 | -------------------------------------------------------------------------------- /configs/batch_4.yaml: -------------------------------------------------------------------------------- 1 | project: neptune-ml/Google-Ai-Object-Detection-Challenge 2 | 3 | name: google-ai-object-detection 4 | tags: [solution-1, batch-4] 5 | 6 | metric: 7 | channel: 'MAP' 8 | goal: maximize 9 | 10 | #Comment out if not in Cloud Environment 11 | #pip-requirements-file: requirements.txt 12 | 13 | exclude: 14 | - .git 15 | - .idea 16 | - .ipynb_checkpoints 17 | - output 18 | - imgs 19 | - neptune.log 20 | - offline_job.log 21 | - notebooks 22 | 23 | parameters: 24 | # Data Paths 25 | train_imgs_dir: '' 26 | test_imgs_dir: '' 27 | annotations_filepath: '' 28 | annotations_human_labels_filepath: '' 29 | bbox_hierarchy_filepath: '' 30 | valid_ids_filepath: '' 31 | sample_submission: '' 32 | experiment_dir: '' 33 | class_mappings_filepath: '' 34 | metadata_filepath: '' 35 | clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment 36 | 37 | # Execution 38 | clean_experiment_directory_before_training: 0 39 | num_workers: 4 40 | num_threads: 100 41 | load_in_memory: 0 42 | pin_memory: 1 43 | default_valid_ids: 1 44 | loader_mode: resize 45 | stream_mode: 0 46 | validate_with_map: 1 47 | small_annotations_size: 20 48 | kaggle_message: 'solution-1' 49 | 50 | # General parameters 51 | sampler_name: 'aspect ratio' # from {'fixed', 'aspect ratio'} 52 | even_class_sampling: 1 53 | fixed_h: 512 54 | fixed_w: 512 55 | short_dim: 640 #512 56 | long_dim: 960 #896 57 | image_channels: 3 58 | pad_method: 'resize' 59 | use_suppression: 0 60 | max_annotation_per_class: 0 61 | desired_class_subset: "['Whiteboard', 62 | 'Whale', 63 | 'Oyster', 64 | 'Crab', 65 | 'Bust', 66 | 'Wall clock', 67 | 'Crocodile', 68 | 'Cannon', 69 | 'Zucchini', 70 | 'Toilet', 71 | 'Mule', 72 | 'Zebra', 73 | 'Nightstand', 74 | 'Broccoli', 75 | 'Sea turtle', 76 | 'Cucumber', 77 | 'Saxophone', 78 | 'Baseball bat', 79 | 'Horn', 80 | 'Stool', 81 | 'Headphones', 82 | 'Tiger', 83 | 'Grapefruit', 84 | 'Camel', 85 | 'Tea', 86 | 'Cupboard', 87 | 'Piano', 88 | 'Snake', 89 | 'Giraffe', 90 | 'Tripod', 91 | 'Carrot', 92 | 'Wheelchair', 93 | 'Hamburger', 94 | 'Dragonfly', 95 | 'Sofa bed', 96 | 'Lighthouse', 97 | 'Chest of drawers', 98 | 'Dolphin', 99 | 'Trumpet', 100 | 'Antelope', 101 | 'Mirror', 102 | 'Teddy bear', 103 | 'Frog', 104 | 'Banana', 105 | 'Pig', 106 | 'Rabbit', 107 | 'Sink', 108 | 'Sparrow', 109 | 'Lion', 110 | 'Owl', 111 | 'Fork', 112 | 'Tap', 113 | 'Eagle', 114 | 'Pen', 115 | 'Spoon', 116 | 'Tank', 117 | 'Falcon', 118 | 'Dinosaur', 119 | 'Bull', 120 | 'Lemon', 121 | 'Kite', 122 | 'Musical keyboard', 123 | 'Waste container', 124 | 'Skateboard', 125 | 'Light bulb', 126 | 'Convenience store', 127 | 'Sea lion', 128 | 'Shrimp', 129 | 'Egg', 130 | 'Gondola', 131 | 'studio couch', 132 | 'Squirrel', 133 | 'Tortoise', 134 | 'Cello', 135 | 'Pizza', 136 | 'Spider', 137 | 'Jellyfish', 138 | 'Goat', 139 | 'Harbor seal', 140 | 'Barrel', 141 | 'Sushi', 142 | 'French fries', 143 | 'Lizard', 144 | 'Kitchen & dining room table', 145 | 'Goldfish', 146 | 'Lily', 147 | 'Candy', 148 | 'Mug', 149 | 'Coffee', 150 | 'Parrot', 151 | 'Vase', 152 | 'Rifle', 153 | 'Surfboard', 154 | 'Skull', 155 | 'Parachute', 156 | 'Bronze sculpture', 157 | 'Cart', 158 | 'Grape', 159 | 'Saucer', 160 | 'Violin', 161 | 'Ice cream', 162 | 'Juice', 163 | 'Tin can']" 164 | 165 | # Retina parameters (multi-output) 166 | encoder_depth: 50 167 | num_classes: 100 168 | pretrained_encoder: 1 169 | pi: 0.01 170 | aspect_ratios: '[1/2., 1/1., 2/1.]' 171 | scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]' 172 | 173 | # Training schedule 174 | epochs_nr: 1000 175 | batch_size_train: 8 176 | batch_size_inference: 1 177 | lr: 0.00001 178 | momentum: 0.9 179 | gamma: 1.0 180 | patience: 100 181 | lr_factor: 0.3 182 | lr_patience: 30 183 | training_sample_size: 10000 184 | validation_sample_size: 2000 185 | 186 | # Regularization 187 | use_batch_norm: 1 188 | l2_reg_conv: 0.0001 189 | l2_reg_dense: 0.0 190 | dropout_conv: 0.1 191 | dropout_dense: 0.0 192 | 193 | # Postprocessing 194 | classification_threshold: 0.05 195 | nms_threshold: 0.5 196 | -------------------------------------------------------------------------------- /src/object_detection/utils/test_case.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """A convenience wrapper around tf.test.TestCase to enable TPU tests.""" 16 | 17 | import tensorflow as tf 18 | from tensorflow.contrib import tpu 19 | 20 | flags = tf.app.flags 21 | 22 | flags.DEFINE_bool('tpu_test', False, 'Whether to configure test for TPU.') 23 | FLAGS = flags.FLAGS 24 | 25 | 26 | class TestCase(tf.test.TestCase): 27 | """Extends tf.test.TestCase to optionally allow running tests on TPU.""" 28 | 29 | def execute_tpu(self, graph_fn, inputs): 30 | """Constructs the graph, executes it on TPU and returns the result. 31 | 32 | Args: 33 | graph_fn: a callable that constructs the tensorflow graph to test. The 34 | arguments of this function should correspond to `inputs`. 35 | inputs: a list of numpy arrays to feed input to the computation graph. 36 | 37 | Returns: 38 | A list of numpy arrays or a scalar returned from executing the tensorflow 39 | graph. 40 | """ 41 | with self.test_session(graph=tf.Graph()) as sess: 42 | placeholders = [tf.placeholder_with_default(v, v.shape) for v in inputs] 43 | tpu_computation = tpu.rewrite(graph_fn, placeholders) 44 | sess.run(tpu.initialize_system()) 45 | sess.run([tf.global_variables_initializer(), tf.tables_initializer(), 46 | tf.local_variables_initializer()]) 47 | materialized_results = sess.run(tpu_computation, 48 | feed_dict=dict(list(zip(placeholders, inputs)))) 49 | sess.run(tpu.shutdown_system()) 50 | if (len(materialized_results) == 1 51 | and (isinstance(materialized_results, list) 52 | or isinstance(materialized_results, tuple))): 53 | materialized_results = materialized_results[0] 54 | return materialized_results 55 | 56 | def execute_cpu(self, graph_fn, inputs): 57 | """Constructs the graph, executes it on CPU and returns the result. 58 | 59 | Args: 60 | graph_fn: a callable that constructs the tensorflow graph to test. The 61 | arguments of this function should correspond to `inputs`. 62 | inputs: a list of numpy arrays to feed input to the computation graph. 63 | 64 | Returns: 65 | A list of numpy arrays or a scalar returned from executing the tensorflow 66 | graph. 67 | """ 68 | with self.test_session(graph=tf.Graph()) as sess: 69 | placeholders = [tf.placeholder_with_default(v, v.shape) for v in inputs] 70 | results = graph_fn(*placeholders) 71 | sess.run([tf.global_variables_initializer(), tf.tables_initializer(), 72 | tf.local_variables_initializer()]) 73 | materialized_results = sess.run(results, feed_dict=dict(list(zip(placeholders, 74 | inputs)))) 75 | if (len(materialized_results) == 1 76 | and (isinstance(materialized_results, list) 77 | or isinstance(materialized_results, tuple))): 78 | materialized_results = materialized_results[0] 79 | return materialized_results 80 | 81 | def execute(self, graph_fn, inputs): 82 | """Constructs the graph, creates a test session and returns the results. 83 | 84 | The graph is executed either on TPU or CPU based on the `tpu_test` flag. 85 | 86 | Args: 87 | graph_fn: a callable that constructs the tensorflow graph to test. The 88 | arguments of this function should correspond to `inputs`. 89 | inputs: a list of numpy arrays to feed input to the computation graph. 90 | 91 | Returns: 92 | A list of numpy arrays or a scalar returned from executing the tensorflow 93 | graph. 94 | """ 95 | if FLAGS.tpu_test: 96 | return self.execute_tpu(graph_fn, inputs) 97 | else: 98 | return self.execute_cpu(graph_fn, inputs) 99 | -------------------------------------------------------------------------------- /src/object_detection/metrics/oid_od_challenge_evaluation_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | r"""Converts data from CSV to the OpenImagesDetectionChallengeEvaluator format. 16 | """ 17 | 18 | import os 19 | import sys 20 | 21 | cwd = os.getcwd() 22 | sys.path.append(os.path.join(cwd, 'src')) 23 | 24 | from object_detection.core import standard_fields 25 | 26 | 27 | def build_groundtruth_boxes_dictionary(data, class_label_map): 28 | """Builds a groundtruth dictionary from groundtruth data in CSV file. 29 | 30 | Args: 31 | data: Pandas DataFrame with the groundtruth data for a single image. 32 | class_label_map: Class labelmap from string label name to an integer. 33 | 34 | Returns: 35 | A dictionary with keys suitable for passing to 36 | OpenImagesDetectionChallengeEvaluator.add_single_ground_truth_image_info: 37 | standard_fields.InputDataFields.groundtruth_boxes: float32 numpy array 38 | of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of 39 | the format [ymin, xmin, ymax, xmax] in absolute image coordinates. 40 | standard_fields.InputDataFields.groundtruth_classes: integer numpy array 41 | of shape [num_boxes] containing 1-indexed groundtruth classes for the 42 | boxes. 43 | standard_fields.InputDataFields.verified_labels: integer 1D numpy array 44 | containing all classes for which labels are verified. 45 | standard_fields.InputDataFields.groundtruth_group_of: Optional length 46 | M numpy boolean array denoting whether a groundtruth box contains a 47 | group of instances. 48 | """ 49 | 50 | data_boxes = data[data.ConfidenceImageLabel.isnull()] 51 | data_labels = data[data.XMin.isnull()] 52 | 53 | return { 54 | standard_fields.InputDataFields.groundtruth_boxes: 55 | data_boxes[['YMin', 'XMin', 'YMax', 'XMax']].as_matrix(), 56 | standard_fields.InputDataFields.groundtruth_classes: 57 | data_boxes['LabelName'].map(lambda x: class_label_map[x]).as_matrix(), 58 | standard_fields.InputDataFields.groundtruth_group_of: 59 | data_boxes['IsGroupOf'].as_matrix().astype(int), 60 | standard_fields.InputDataFields.groundtruth_image_classes: 61 | data_labels['LabelName'].map(lambda x: class_label_map[x]).as_matrix(), 62 | } 63 | 64 | 65 | def build_predictions_dictionary(data, class_label_map): 66 | """Builds a predictions dictionary from predictions data in CSV file. 67 | 68 | Args: 69 | data: Pandas DataFrame with the predictions data for a single image. 70 | class_label_map: Class labelmap from string label name to an integer. 71 | 72 | Returns: 73 | Dictionary with keys suitable for passing to 74 | OpenImagesDetectionChallengeEvaluator.add_single_detected_image_info: 75 | standard_fields.DetectionResultFields.detection_boxes: float32 numpy 76 | array of shape [num_boxes, 4] containing `num_boxes` detection boxes 77 | of the format [ymin, xmin, ymax, xmax] in absolute image coordinates. 78 | standard_fields.DetectionResultFields.detection_scores: float32 numpy 79 | array of shape [num_boxes] containing detection scores for the boxes. 80 | standard_fields.DetectionResultFields.detection_classes: integer numpy 81 | array of shape [num_boxes] containing 1-indexed detection classes for 82 | the boxes. 83 | 84 | """ 85 | return { 86 | standard_fields.DetectionResultFields.detection_boxes: 87 | data[['YMin', 'XMin', 'YMax', 'XMax']].as_matrix(), 88 | standard_fields.DetectionResultFields.detection_classes: 89 | data['LabelName'].map(lambda x: class_label_map[x]).as_matrix(), 90 | standard_fields.DetectionResultFields.detection_scores: 91 | data['Score'].as_matrix() 92 | } 93 | -------------------------------------------------------------------------------- /src/object_detection/utils/np_mask_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Operations for [N, height, width] numpy arrays representing masks. 17 | 18 | Example mask operations that are supported: 19 | * Areas: compute mask areas 20 | * IOU: pairwise intersection-over-union scores 21 | """ 22 | import numpy as np 23 | 24 | EPSILON = 1e-7 25 | 26 | 27 | def area(masks): 28 | """Computes area of masks. 29 | 30 | Args: 31 | masks: Numpy array with shape [N, height, width] holding N masks. Masks 32 | values are of type np.uint8 and values are in {0,1}. 33 | 34 | Returns: 35 | a numpy array with shape [N*1] representing mask areas. 36 | 37 | Raises: 38 | ValueError: If masks.dtype is not np.uint8 39 | """ 40 | if masks.dtype != np.uint8: 41 | raise ValueError('Masks type should be np.uint8') 42 | return np.sum(masks, axis=(1, 2), dtype=np.float32) 43 | 44 | 45 | def intersection(masks1, masks2): 46 | """Compute pairwise intersection areas between masks. 47 | 48 | Args: 49 | masks1: a numpy array with shape [N, height, width] holding N masks. Masks 50 | values are of type np.uint8 and values are in {0,1}. 51 | masks2: a numpy array with shape [M, height, width] holding M masks. Masks 52 | values are of type np.uint8 and values are in {0,1}. 53 | 54 | Returns: 55 | a numpy array with shape [N*M] representing pairwise intersection area. 56 | 57 | Raises: 58 | ValueError: If masks1 and masks2 are not of type np.uint8. 59 | """ 60 | if masks1.dtype != np.uint8 or masks2.dtype != np.uint8: 61 | raise ValueError('masks1 and masks2 should be of type np.uint8') 62 | n = masks1.shape[0] 63 | m = masks2.shape[0] 64 | answer = np.zeros([n, m], dtype=np.float32) 65 | for i in np.arange(n): 66 | for j in np.arange(m): 67 | answer[i, j] = np.sum(np.minimum(masks1[i], masks2[j]), dtype=np.float32) 68 | return answer 69 | 70 | 71 | def iou(masks1, masks2): 72 | """Computes pairwise intersection-over-union between mask collections. 73 | 74 | Args: 75 | masks1: a numpy array with shape [N, height, width] holding N masks. Masks 76 | values are of type np.uint8 and values are in {0,1}. 77 | masks2: a numpy array with shape [M, height, width] holding N masks. Masks 78 | values are of type np.uint8 and values are in {0,1}. 79 | 80 | Returns: 81 | a numpy array with shape [N, M] representing pairwise iou scores. 82 | 83 | Raises: 84 | ValueError: If masks1 and masks2 are not of type np.uint8. 85 | """ 86 | if masks1.dtype != np.uint8 or masks2.dtype != np.uint8: 87 | raise ValueError('masks1 and masks2 should be of type np.uint8') 88 | intersect = intersection(masks1, masks2) 89 | area1 = area(masks1) 90 | area2 = area(masks2) 91 | union = np.expand_dims(area1, axis=1) + np.expand_dims( 92 | area2, axis=0) - intersect 93 | return intersect / np.maximum(union, EPSILON) 94 | 95 | 96 | def ioa(masks1, masks2): 97 | """Computes pairwise intersection-over-area between box collections. 98 | 99 | Intersection-over-area (ioa) between two masks, mask1 and mask2 is defined as 100 | their intersection area over mask2's area. Note that ioa is not symmetric, 101 | that is, IOA(mask1, mask2) != IOA(mask2, mask1). 102 | 103 | Args: 104 | masks1: a numpy array with shape [N, height, width] holding N masks. Masks 105 | values are of type np.uint8 and values are in {0,1}. 106 | masks2: a numpy array with shape [M, height, width] holding N masks. Masks 107 | values are of type np.uint8 and values are in {0,1}. 108 | 109 | Returns: 110 | a numpy array with shape [N, M] representing pairwise ioa scores. 111 | 112 | Raises: 113 | ValueError: If masks1 and masks2 are not of type np.uint8. 114 | """ 115 | if masks1.dtype != np.uint8 or masks2.dtype != np.uint8: 116 | raise ValueError('masks1 and masks2 should be of type np.uint8') 117 | intersect = intersection(masks1, masks2) 118 | areas = np.expand_dims(area(masks2), axis=0) 119 | return intersect / (areas + EPSILON) 120 | -------------------------------------------------------------------------------- /src/object_detection/protos/string_int_label_map_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: object_detection/protos/string_int_label_map.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | 17 | 18 | DESCRIPTOR = _descriptor.FileDescriptor( 19 | name='object_detection/protos/string_int_label_map.proto', 20 | package='object_detection.protos', 21 | syntax='proto2', 22 | serialized_pb=_b('\n2object_detection/protos/string_int_label_map.proto\x12\x17object_detection.protos\"G\n\x15StringIntLabelMapItem\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\x05\x12\x14\n\x0c\x64isplay_name\x18\x03 \x01(\t\"Q\n\x11StringIntLabelMap\x12<\n\x04item\x18\x01 \x03(\x0b\x32..object_detection.protos.StringIntLabelMapItem') 23 | ) 24 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 25 | 26 | 27 | 28 | 29 | _STRINGINTLABELMAPITEM = _descriptor.Descriptor( 30 | name='StringIntLabelMapItem', 31 | full_name='object_detection.protos.StringIntLabelMapItem', 32 | filename=None, 33 | file=DESCRIPTOR, 34 | containing_type=None, 35 | fields=[ 36 | _descriptor.FieldDescriptor( 37 | name='name', full_name='object_detection.protos.StringIntLabelMapItem.name', index=0, 38 | number=1, type=9, cpp_type=9, label=1, 39 | has_default_value=False, default_value=_b("").decode('utf-8'), 40 | message_type=None, enum_type=None, containing_type=None, 41 | is_extension=False, extension_scope=None, 42 | options=None), 43 | _descriptor.FieldDescriptor( 44 | name='id', full_name='object_detection.protos.StringIntLabelMapItem.id', index=1, 45 | number=2, type=5, cpp_type=1, label=1, 46 | has_default_value=False, default_value=0, 47 | message_type=None, enum_type=None, containing_type=None, 48 | is_extension=False, extension_scope=None, 49 | options=None), 50 | _descriptor.FieldDescriptor( 51 | name='display_name', full_name='object_detection.protos.StringIntLabelMapItem.display_name', index=2, 52 | number=3, type=9, cpp_type=9, label=1, 53 | has_default_value=False, default_value=_b("").decode('utf-8'), 54 | message_type=None, enum_type=None, containing_type=None, 55 | is_extension=False, extension_scope=None, 56 | options=None), 57 | ], 58 | extensions=[ 59 | ], 60 | nested_types=[], 61 | enum_types=[ 62 | ], 63 | options=None, 64 | is_extendable=False, 65 | syntax='proto2', 66 | extension_ranges=[], 67 | oneofs=[ 68 | ], 69 | serialized_start=79, 70 | serialized_end=150, 71 | ) 72 | 73 | 74 | _STRINGINTLABELMAP = _descriptor.Descriptor( 75 | name='StringIntLabelMap', 76 | full_name='object_detection.protos.StringIntLabelMap', 77 | filename=None, 78 | file=DESCRIPTOR, 79 | containing_type=None, 80 | fields=[ 81 | _descriptor.FieldDescriptor( 82 | name='item', full_name='object_detection.protos.StringIntLabelMap.item', index=0, 83 | number=1, type=11, cpp_type=10, label=3, 84 | has_default_value=False, default_value=[], 85 | message_type=None, enum_type=None, containing_type=None, 86 | is_extension=False, extension_scope=None, 87 | options=None), 88 | ], 89 | extensions=[ 90 | ], 91 | nested_types=[], 92 | enum_types=[ 93 | ], 94 | options=None, 95 | is_extendable=False, 96 | syntax='proto2', 97 | extension_ranges=[], 98 | oneofs=[ 99 | ], 100 | serialized_start=152, 101 | serialized_end=233, 102 | ) 103 | 104 | _STRINGINTLABELMAP.fields_by_name['item'].message_type = _STRINGINTLABELMAPITEM 105 | DESCRIPTOR.message_types_by_name['StringIntLabelMapItem'] = _STRINGINTLABELMAPITEM 106 | DESCRIPTOR.message_types_by_name['StringIntLabelMap'] = _STRINGINTLABELMAP 107 | 108 | StringIntLabelMapItem = _reflection.GeneratedProtocolMessageType('StringIntLabelMapItem', (_message.Message,), dict( 109 | DESCRIPTOR = _STRINGINTLABELMAPITEM, 110 | __module__ = 'object_detection.protos.string_int_label_map_pb2' 111 | # @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMapItem) 112 | )) 113 | _sym_db.RegisterMessage(StringIntLabelMapItem) 114 | 115 | StringIntLabelMap = _reflection.GeneratedProtocolMessageType('StringIntLabelMap', (_message.Message,), dict( 116 | DESCRIPTOR = _STRINGINTLABELMAP, 117 | __module__ = 'object_detection.protos.string_int_label_map_pb2' 118 | # @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMap) 119 | )) 120 | _sym_db.RegisterMessage(StringIntLabelMap) 121 | 122 | 123 | # @@protoc_insertion_point(module_scope) 124 | -------------------------------------------------------------------------------- /src/object_detection/metrics/oid_od_challenge_evaluation_utils_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for oid_od_challenge_evaluation_util.""" 16 | 17 | 18 | 19 | 20 | 21 | import numpy as np 22 | import pandas as pd 23 | import tensorflow as tf 24 | from object_detection.core import standard_fields 25 | from object_detection.metrics import oid_od_challenge_evaluation_utils as utils 26 | 27 | 28 | class OidOdChallengeEvaluationUtilTest(tf.test.TestCase): 29 | 30 | def testBuildGroundtruthDictionary(self): 31 | np_data = pd.DataFrame( 32 | [['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.3, 0.5, 0.6, 1, None], [ 33 | 'fe58ec1b06db2bb7', '/m/02gy9n', 0.1, 0.2, 0.3, 0.4, 0, None 34 | ], ['fe58ec1b06db2bb7', '/m/04bcr3', None, None, None, None, None, 1], [ 35 | 'fe58ec1b06db2bb7', '/m/083vt', None, None, None, None, None, 0 36 | ], ['fe58ec1b06db2bb7', '/m/02gy9n', None, None, None, None, None, 1]], 37 | columns=[ 38 | 'ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax', 'IsGroupOf', 39 | 'ConfidenceImageLabel' 40 | ]) 41 | class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3} 42 | groundtruth_dictionary = utils.build_groundtruth_boxes_dictionary( 43 | np_data, class_label_map) 44 | 45 | self.assertTrue(standard_fields.InputDataFields.groundtruth_boxes in 46 | groundtruth_dictionary) 47 | self.assertTrue(standard_fields.InputDataFields.groundtruth_classes in 48 | groundtruth_dictionary) 49 | self.assertTrue(standard_fields.InputDataFields.groundtruth_group_of in 50 | groundtruth_dictionary) 51 | self.assertTrue(standard_fields.InputDataFields.groundtruth_image_classes in 52 | groundtruth_dictionary) 53 | 54 | self.assertAllEqual( 55 | np.array([1, 3]), groundtruth_dictionary[ 56 | standard_fields.InputDataFields.groundtruth_classes]) 57 | self.assertAllEqual( 58 | np.array([1, 0]), groundtruth_dictionary[ 59 | standard_fields.InputDataFields.groundtruth_group_of]) 60 | 61 | expected_boxes_data = np.array([[0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2]]) 62 | 63 | self.assertNDArrayNear( 64 | expected_boxes_data, groundtruth_dictionary[ 65 | standard_fields.InputDataFields.groundtruth_boxes], 1e-5) 66 | self.assertAllEqual( 67 | np.array([1, 2, 3]), groundtruth_dictionary[ 68 | standard_fields.InputDataFields.groundtruth_image_classes]) 69 | 70 | def testBuildPredictionDictionary(self): 71 | np_data = pd.DataFrame( 72 | [['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.3, 0.5, 0.6, 0.1], [ 73 | 'fe58ec1b06db2bb7', '/m/02gy9n', 0.1, 0.2, 0.3, 0.4, 0.2 74 | ], ['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.1, 0.2, 0.3, 0.3]], 75 | columns=[ 76 | 'ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax', 'Score' 77 | ]) 78 | class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3} 79 | prediction_dictionary = utils.build_predictions_dictionary( 80 | np_data, class_label_map) 81 | 82 | self.assertTrue(standard_fields.DetectionResultFields.detection_boxes in 83 | prediction_dictionary) 84 | self.assertTrue(standard_fields.DetectionResultFields.detection_classes in 85 | prediction_dictionary) 86 | self.assertTrue(standard_fields.DetectionResultFields.detection_scores in 87 | prediction_dictionary) 88 | 89 | self.assertAllEqual( 90 | np.array([1, 3, 1]), prediction_dictionary[ 91 | standard_fields.DetectionResultFields.detection_classes]) 92 | expected_boxes_data = np.array([[0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2], 93 | [0.2, 0.0, 0.3, 0.1]]) 94 | self.assertNDArrayNear( 95 | expected_boxes_data, prediction_dictionary[ 96 | standard_fields.DetectionResultFields.detection_boxes], 1e-5) 97 | self.assertNDArrayNear( 98 | np.array([0.1, 0.2, 0.3]), prediction_dictionary[ 99 | standard_fields.DetectionResultFields.detection_scores], 1e-5) 100 | 101 | 102 | if __name__ == '__main__': 103 | tf.test.main() 104 | -------------------------------------------------------------------------------- /src/object_detection/utils/np_box_list.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Numpy BoxList classes and functions.""" 17 | 18 | import numpy as np 19 | 20 | 21 | class BoxList(object): 22 | """Box collection. 23 | 24 | BoxList represents a list of bounding boxes as numpy array, where each 25 | bounding box is represented as a row of 4 numbers, 26 | [y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes within a 27 | given list correspond to a single image. 28 | 29 | Optionally, users can add additional related fields (such as 30 | objectness/classification scores). 31 | """ 32 | 33 | def __init__(self, data): 34 | """Constructs box collection. 35 | 36 | Args: 37 | data: a numpy array of shape [N, 4] representing box coordinates 38 | 39 | Raises: 40 | ValueError: if bbox data is not a numpy array 41 | ValueError: if invalid dimensions for bbox data 42 | """ 43 | if not isinstance(data, np.ndarray): 44 | raise ValueError('data must be a numpy array.') 45 | if len(data.shape) != 2 or data.shape[1] != 4: 46 | raise ValueError('Invalid dimensions for box data.') 47 | if data.dtype != np.float32 and data.dtype != np.float64: 48 | raise ValueError('Invalid data type for box data: float is required.') 49 | if not self._is_valid_boxes(data): 50 | raise ValueError('Invalid box data. data must be a numpy array of ' 51 | 'N*[y_min, x_min, y_max, x_max]') 52 | self.data = {'boxes': data} 53 | 54 | def num_boxes(self): 55 | """Return number of boxes held in collections.""" 56 | return self.data['boxes'].shape[0] 57 | 58 | def get_extra_fields(self): 59 | """Return all non-box fields.""" 60 | return [k for k in list(self.data.keys()) if k != 'boxes'] 61 | 62 | def has_field(self, field): 63 | return field in self.data 64 | 65 | def add_field(self, field, field_data): 66 | """Add data to a specified field. 67 | 68 | Args: 69 | field: a string parameter used to speficy a related field to be accessed. 70 | field_data: a numpy array of [N, ...] representing the data associated 71 | with the field. 72 | Raises: 73 | ValueError: if the field is already exist or the dimension of the field 74 | data does not matches the number of boxes. 75 | """ 76 | if self.has_field(field): 77 | raise ValueError('Field ' + field + 'already exists') 78 | if len(field_data.shape) < 1 or field_data.shape[0] != self.num_boxes(): 79 | raise ValueError('Invalid dimensions for field data') 80 | self.data[field] = field_data 81 | 82 | def get(self): 83 | """Convenience function for accesssing box coordinates. 84 | 85 | Returns: 86 | a numpy array of shape [N, 4] representing box corners 87 | """ 88 | return self.get_field('boxes') 89 | 90 | def get_field(self, field): 91 | """Accesses data associated with the specified field in the box collection. 92 | 93 | Args: 94 | field: a string parameter used to speficy a related field to be accessed. 95 | 96 | Returns: 97 | a numpy 1-d array representing data of an associated field 98 | 99 | Raises: 100 | ValueError: if invalid field 101 | """ 102 | if not self.has_field(field): 103 | raise ValueError('field {} does not exist'.format(field)) 104 | return self.data[field] 105 | 106 | def get_coordinates(self): 107 | """Get corner coordinates of boxes. 108 | 109 | Returns: 110 | a list of 4 1-d numpy arrays [y_min, x_min, y_max, x_max] 111 | """ 112 | box_coordinates = self.get() 113 | y_min = box_coordinates[:, 0] 114 | x_min = box_coordinates[:, 1] 115 | y_max = box_coordinates[:, 2] 116 | x_max = box_coordinates[:, 3] 117 | return [y_min, x_min, y_max, x_max] 118 | 119 | def _is_valid_boxes(self, data): 120 | """Check whether data fullfills the format of N*[ymin, xmin, ymax, xmin]. 121 | 122 | Args: 123 | data: a numpy array of shape [N, 4] representing box coordinates 124 | 125 | Returns: 126 | a boolean indicating whether all ymax of boxes are equal or greater than 127 | ymin, and all xmax of boxes are equal or greater than xmin. 128 | """ 129 | if data.shape[0] > 0: 130 | for i in range(data.shape[0]): 131 | if data[i, 0] > data[i, 2] or data[i, 1] > data[i, 3]: 132 | return False 133 | return True 134 | -------------------------------------------------------------------------------- /src/object_detection/dataset_tools/oid_tfrecord_creation.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | r"""Utilities for creating TFRecords of TF examples for the Open Images dataset. 16 | """ 17 | 18 | 19 | 20 | 21 | import tensorflow as tf 22 | 23 | from object_detection.core import standard_fields 24 | from object_detection.utils import dataset_util 25 | 26 | 27 | def tf_example_from_annotations_data_frame(annotations_data_frame, label_map, 28 | encoded_image): 29 | """Populates a TF Example message with image annotations from a data frame. 30 | 31 | Args: 32 | annotations_data_frame: Data frame containing the annotations for a single 33 | image. 34 | label_map: String to integer label map. 35 | encoded_image: The encoded image string 36 | 37 | Returns: 38 | The populated TF Example, if the label of at least one object is present in 39 | label_map. Otherwise, returns None. 40 | """ 41 | 42 | filtered_data_frame = annotations_data_frame[ 43 | annotations_data_frame.LabelName.isin(label_map)] 44 | filtered_data_frame_boxes = filtered_data_frame[ 45 | ~filtered_data_frame.YMin.isnull()] 46 | filtered_data_frame_labels = filtered_data_frame[ 47 | filtered_data_frame.YMin.isnull()] 48 | image_id = annotations_data_frame.ImageID.iloc[0] 49 | 50 | feature_map = { 51 | standard_fields.TfExampleFields.object_bbox_ymin: 52 | dataset_util.float_list_feature( 53 | filtered_data_frame_boxes.YMin.as_matrix()), 54 | standard_fields.TfExampleFields.object_bbox_xmin: 55 | dataset_util.float_list_feature( 56 | filtered_data_frame_boxes.XMin.as_matrix()), 57 | standard_fields.TfExampleFields.object_bbox_ymax: 58 | dataset_util.float_list_feature( 59 | filtered_data_frame_boxes.YMax.as_matrix()), 60 | standard_fields.TfExampleFields.object_bbox_xmax: 61 | dataset_util.float_list_feature( 62 | filtered_data_frame_boxes.XMax.as_matrix()), 63 | standard_fields.TfExampleFields.object_class_text: 64 | dataset_util.bytes_list_feature( 65 | filtered_data_frame_boxes.LabelName.as_matrix()), 66 | standard_fields.TfExampleFields.object_class_label: 67 | dataset_util.int64_list_feature( 68 | filtered_data_frame_boxes.LabelName.map(lambda x: label_map[x]) 69 | .as_matrix()), 70 | standard_fields.TfExampleFields.filename: 71 | dataset_util.bytes_feature('{}.jpg'.format(image_id)), 72 | standard_fields.TfExampleFields.source_id: 73 | dataset_util.bytes_feature(image_id), 74 | standard_fields.TfExampleFields.image_encoded: 75 | dataset_util.bytes_feature(encoded_image), 76 | } 77 | 78 | if 'IsGroupOf' in filtered_data_frame.columns: 79 | feature_map[standard_fields.TfExampleFields. 80 | object_group_of] = dataset_util.int64_list_feature( 81 | filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(int)) 82 | if 'IsOccluded' in filtered_data_frame.columns: 83 | feature_map[standard_fields.TfExampleFields. 84 | object_occluded] = dataset_util.int64_list_feature( 85 | filtered_data_frame_boxes.IsOccluded.as_matrix().astype( 86 | int)) 87 | if 'IsTruncated' in filtered_data_frame.columns: 88 | feature_map[standard_fields.TfExampleFields. 89 | object_truncated] = dataset_util.int64_list_feature( 90 | filtered_data_frame_boxes.IsTruncated.as_matrix().astype( 91 | int)) 92 | if 'IsDepiction' in filtered_data_frame.columns: 93 | feature_map[standard_fields.TfExampleFields. 94 | object_depiction] = dataset_util.int64_list_feature( 95 | filtered_data_frame_boxes.IsDepiction.as_matrix().astype( 96 | int)) 97 | 98 | if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns: 99 | feature_map[standard_fields.TfExampleFields. 100 | image_class_label] = dataset_util.int64_list_feature( 101 | filtered_data_frame_labels.LabelName.map( 102 | lambda x: label_map[x]).as_matrix()) 103 | feature_map[standard_fields.TfExampleFields. 104 | image_class_text] = dataset_util.bytes_list_feature( 105 | filtered_data_frame_labels.LabelName.as_matrix()), 106 | return tf.train.Example(features=tf.train.Features(feature=feature_map)) 107 | -------------------------------------------------------------------------------- /src/object_detection/dataset_tools/create_oid_tf_record.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | r"""Creates TFRecords of Open Images dataset for object detection. 16 | 17 | Example usage: 18 | python object_detection/dataset_tools/create_oid_tf_record.py \ 19 | --input_box_annotations_csv=/path/to/input/annotations-human-bbox.csv \ 20 | --input_image_label_annotations_csv=/path/to/input/annotations-label.csv \ 21 | --input_images_directory=/path/to/input/image_pixels_directory \ 22 | --input_label_map=/path/to/input/labels_bbox_545.labelmap \ 23 | --output_tf_record_path_prefix=/path/to/output/prefix.tfrecord 24 | 25 | CSVs with bounding box annotations and image metadata (including the image URLs) 26 | can be downloaded from the Open Images GitHub repository: 27 | https://github.com/openimages/dataset 28 | 29 | This script will include every image found in the input_images_directory in the 30 | output TFRecord, even if the image has no corresponding bounding box annotations 31 | in the input_annotations_csv. If input_image_label_annotations_csv is specified, 32 | it will add image-level labels as well. Note that the information of whether a 33 | label is positivelly or negativelly verified is NOT added to tfrecord. 34 | """ 35 | 36 | 37 | 38 | 39 | import os 40 | 41 | import contextlib2 42 | import pandas as pd 43 | import tensorflow as tf 44 | 45 | from object_detection.dataset_tools import oid_tfrecord_creation 46 | from object_detection.dataset_tools import tf_record_creation_util 47 | from object_detection.utils import label_map_util 48 | 49 | tf.flags.DEFINE_string('input_box_annotations_csv', None, 50 | 'Path to CSV containing image bounding box annotations') 51 | tf.flags.DEFINE_string('input_images_directory', None, 52 | 'Directory containing the image pixels ' 53 | 'downloaded from the OpenImages GitHub repository.') 54 | tf.flags.DEFINE_string('input_image_label_annotations_csv', None, 55 | 'Path to CSV containing image-level labels annotations') 56 | tf.flags.DEFINE_string('input_label_map', None, 'Path to the label map proto') 57 | tf.flags.DEFINE_string( 58 | 'output_tf_record_path_prefix', None, 59 | 'Path to the output TFRecord. The shard index and the number of shards ' 60 | 'will be appended for each output shard.') 61 | tf.flags.DEFINE_integer('num_shards', 100, 'Number of TFRecord shards') 62 | 63 | FLAGS = tf.flags.FLAGS 64 | 65 | 66 | def main(_): 67 | tf.logging.set_verbosity(tf.logging.INFO) 68 | 69 | required_flags = [ 70 | 'input_box_annotations_csv', 'input_images_directory', 'input_label_map', 71 | 'output_tf_record_path_prefix' 72 | ] 73 | for flag_name in required_flags: 74 | if not getattr(FLAGS, flag_name): 75 | raise ValueError('Flag --{} is required'.format(flag_name)) 76 | 77 | label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map) 78 | all_box_annotations = pd.read_csv(FLAGS.input_box_annotations_csv) 79 | if FLAGS.input_image_label_annotations_csv: 80 | all_label_annotations = pd.read_csv(FLAGS.input_image_label_annotations_csv) 81 | all_label_annotations.rename( 82 | columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True) 83 | else: 84 | all_label_annotations = None 85 | all_images = tf.gfile.Glob( 86 | os.path.join(FLAGS.input_images_directory, '*.jpg')) 87 | all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images] 88 | all_image_ids = pd.DataFrame({'ImageID': all_image_ids}) 89 | all_annotations = pd.concat( 90 | [all_box_annotations, all_image_ids, all_label_annotations]) 91 | 92 | tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids)) 93 | 94 | with contextlib2.ExitStack() as tf_record_close_stack: 95 | output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( 96 | tf_record_close_stack, FLAGS.output_tf_record_path_prefix, 97 | FLAGS.num_shards) 98 | 99 | for counter, image_data in enumerate(all_annotations.groupby('ImageID')): 100 | tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000, 101 | counter) 102 | 103 | image_id, image_annotations = image_data 104 | # In OID image file names are formed by appending ".jpg" to the image ID. 105 | image_path = os.path.join(FLAGS.input_images_directory, image_id + '.jpg') 106 | with tf.gfile.Open(image_path) as image_file: 107 | encoded_image = image_file.read() 108 | 109 | tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( 110 | image_annotations, label_map, encoded_image) 111 | if tf_example: 112 | shard_idx = int(image_id, 16) % FLAGS.num_shards 113 | output_tfrecords[shard_idx].write(tf_example.SerializeToString()) 114 | 115 | 116 | if __name__ == '__main__': 117 | tf.app.run() 118 | -------------------------------------------------------------------------------- /src/object_detection/metrics/oid_od_challenge_evaluation.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | r"""Runs evaluation using OpenImages groundtruth and predictions. 16 | 17 | Example usage: 18 | python models/research/object_detection/metrics/oid_od_challenge_evaluation.py \ 19 | --input_annotations_boxes=/path/to/input/annotations-human-bbox.csv \ 20 | --input_annotations_labels=/path/to/input/annotations-label.csv \ 21 | --input_class_labelmap=/path/to/input/class_labelmap.pbtxt \ 22 | --input_predictions=/path/to/input/predictions.csv \ 23 | --output_metrics=/path/to/output/metric.csv \ 24 | 25 | CSVs with bounding box annotations and image label (including the image URLs) 26 | can be downloaded from the Open Images Challenge website: 27 | https://storage.googleapis.com/openimages/web/challenge.html 28 | The format of the input csv and the metrics itself are described on the 29 | challenge website. 30 | """ 31 | 32 | import os 33 | import sys 34 | 35 | cwd = os.getcwd() 36 | sys.path.append(os.path.join(cwd, 'src')) 37 | 38 | import argparse 39 | import pandas as pd 40 | from google.protobuf import text_format 41 | 42 | from object_detection.metrics import io_utils 43 | from object_detection.metrics import oid_od_challenge_evaluation_utils as utils 44 | from object_detection.protos import string_int_label_map_pb2 45 | from object_detection.utils import object_detection_evaluation 46 | 47 | 48 | def _load_labelmap(labelmap_path): 49 | """Loads labelmap from the labelmap path. 50 | 51 | Args: 52 | labelmap_path: Path to the labelmap. 53 | 54 | Returns: 55 | A dictionary mapping class name to class numerical id 56 | A list with dictionaries, one dictionary per category. 57 | """ 58 | 59 | label_map = string_int_label_map_pb2.StringIntLabelMap() 60 | with open(labelmap_path, 'r') as fid: 61 | label_map_string = fid.read() 62 | text_format.Merge(label_map_string, label_map) 63 | labelmap_dict = {} 64 | categories = [] 65 | for item in label_map.item: 66 | labelmap_dict[item.name] = item.id 67 | categories.append({'id': item.id, 'name': item.name}) 68 | return labelmap_dict, categories 69 | 70 | 71 | def main(parsed_args): 72 | all_box_annotations = pd.read_csv(parsed_args.input_annotations_boxes) 73 | all_label_annotations = pd.read_csv(parsed_args.input_annotations_labels) 74 | 75 | all_label_annotations.rename( 76 | columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True) 77 | all_annotations = pd.concat([all_box_annotations, all_label_annotations]) 78 | 79 | class_label_map, categories = _load_labelmap(parsed_args.input_class_labelmap) 80 | challenge_evaluator = ( 81 | object_detection_evaluation.OpenImagesDetectionChallengeEvaluator( 82 | categories)) 83 | 84 | for _, groundtruth in enumerate(all_annotations.groupby('ImageID')): 85 | image_id, image_groundtruth = groundtruth 86 | groundtruth_dictionary = utils.build_groundtruth_boxes_dictionary( 87 | image_groundtruth, class_label_map) 88 | challenge_evaluator.add_single_ground_truth_image_info( 89 | image_id, groundtruth_dictionary) 90 | 91 | all_predictions = pd.read_csv(parsed_args.input_predictions) 92 | for _, prediction_data in enumerate(all_predictions.groupby('ImageID')): 93 | image_id, image_predictions = prediction_data 94 | prediction_dictionary = utils.build_predictions_dictionary( 95 | image_predictions, class_label_map) 96 | challenge_evaluator.add_single_detected_image_info(image_id, 97 | prediction_dictionary) 98 | 99 | metrics = challenge_evaluator.evaluate() 100 | 101 | with open(parsed_args.output_metrics, 'w') as fid: 102 | io_utils.write_csv(fid, metrics) 103 | 104 | 105 | if __name__ == '__main__': 106 | parser = argparse.ArgumentParser( 107 | description='Evaluate Open Images Object Detection Challenge predictions.' 108 | ) 109 | parser.add_argument( 110 | '--input_annotations_boxes', 111 | required=True, 112 | help='File with groundtruth boxes annotations.') 113 | parser.add_argument( 114 | '--input_annotations_labels', 115 | required=True, 116 | help='File with groundtruth labels annotations') 117 | parser.add_argument( 118 | '--input_predictions', 119 | required=True, 120 | help="""File with detection predictions; NOTE: no postprocessing is 121 | applied in the evaluation script.""") 122 | parser.add_argument( 123 | '--input_class_labelmap', 124 | required=True, 125 | help='Open Images Challenge labelmap.') 126 | parser.add_argument( 127 | '--output_metrics', required=True, help='Output file with csv metrics') 128 | 129 | args = parser.parse_args() 130 | main(args) 131 | -------------------------------------------------------------------------------- /src/object_detection/utils/np_box_list_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.utils.np_box_list_test.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from object_detection.utils import np_box_list 22 | 23 | 24 | class BoxListTest(tf.test.TestCase): 25 | 26 | def test_invalid_box_data(self): 27 | with self.assertRaises(ValueError): 28 | np_box_list.BoxList([0, 0, 1, 1]) 29 | 30 | with self.assertRaises(ValueError): 31 | np_box_list.BoxList(np.array([[0, 0, 1, 1]], dtype=int)) 32 | 33 | with self.assertRaises(ValueError): 34 | np_box_list.BoxList(np.array([0, 1, 1, 3, 4], dtype=float)) 35 | 36 | with self.assertRaises(ValueError): 37 | np_box_list.BoxList(np.array([[0, 1, 1, 3], [3, 1, 1, 5]], dtype=float)) 38 | 39 | def test_has_field_with_existed_field(self): 40 | boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], 41 | [0.0, 0.0, 20.0, 20.0]], 42 | dtype=float) 43 | boxlist = np_box_list.BoxList(boxes) 44 | self.assertTrue(boxlist.has_field('boxes')) 45 | 46 | def test_has_field_with_nonexisted_field(self): 47 | boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], 48 | [0.0, 0.0, 20.0, 20.0]], 49 | dtype=float) 50 | boxlist = np_box_list.BoxList(boxes) 51 | self.assertFalse(boxlist.has_field('scores')) 52 | 53 | def test_get_field_with_existed_field(self): 54 | boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], 55 | [0.0, 0.0, 20.0, 20.0]], 56 | dtype=float) 57 | boxlist = np_box_list.BoxList(boxes) 58 | self.assertTrue(np.allclose(boxlist.get_field('boxes'), boxes)) 59 | 60 | def test_get_field_with_nonexited_field(self): 61 | boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], 62 | [0.0, 0.0, 20.0, 20.0]], 63 | dtype=float) 64 | boxlist = np_box_list.BoxList(boxes) 65 | with self.assertRaises(ValueError): 66 | boxlist.get_field('scores') 67 | 68 | 69 | class AddExtraFieldTest(tf.test.TestCase): 70 | 71 | def setUp(self): 72 | boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], 73 | [0.0, 0.0, 20.0, 20.0]], 74 | dtype=float) 75 | self.boxlist = np_box_list.BoxList(boxes) 76 | 77 | def test_add_already_existed_field(self): 78 | with self.assertRaises(ValueError): 79 | self.boxlist.add_field('boxes', np.array([[0, 0, 0, 1, 0]], dtype=float)) 80 | 81 | def test_add_invalid_field_data(self): 82 | with self.assertRaises(ValueError): 83 | self.boxlist.add_field('scores', np.array([0.5, 0.7], dtype=float)) 84 | with self.assertRaises(ValueError): 85 | self.boxlist.add_field('scores', 86 | np.array([0.5, 0.7, 0.9, 0.1], dtype=float)) 87 | 88 | def test_add_single_dimensional_field_data(self): 89 | boxlist = self.boxlist 90 | scores = np.array([0.5, 0.7, 0.9], dtype=float) 91 | boxlist.add_field('scores', scores) 92 | self.assertTrue(np.allclose(scores, self.boxlist.get_field('scores'))) 93 | 94 | def test_add_multi_dimensional_field_data(self): 95 | boxlist = self.boxlist 96 | labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]], 97 | dtype=int) 98 | boxlist.add_field('labels', labels) 99 | self.assertTrue(np.allclose(labels, self.boxlist.get_field('labels'))) 100 | 101 | def test_get_extra_fields(self): 102 | boxlist = self.boxlist 103 | self.assertItemsEqual(boxlist.get_extra_fields(), []) 104 | 105 | scores = np.array([0.5, 0.7, 0.9], dtype=float) 106 | boxlist.add_field('scores', scores) 107 | self.assertItemsEqual(boxlist.get_extra_fields(), ['scores']) 108 | 109 | labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]], 110 | dtype=int) 111 | boxlist.add_field('labels', labels) 112 | self.assertItemsEqual(boxlist.get_extra_fields(), ['scores', 'labels']) 113 | 114 | def test_get_coordinates(self): 115 | y_min, x_min, y_max, x_max = self.boxlist.get_coordinates() 116 | 117 | expected_y_min = np.array([3.0, 14.0, 0.0], dtype=float) 118 | expected_x_min = np.array([4.0, 14.0, 0.0], dtype=float) 119 | expected_y_max = np.array([6.0, 15.0, 20.0], dtype=float) 120 | expected_x_max = np.array([8.0, 15.0, 20.0], dtype=float) 121 | 122 | self.assertTrue(np.allclose(y_min, expected_y_min)) 123 | self.assertTrue(np.allclose(x_min, expected_x_min)) 124 | self.assertTrue(np.allclose(y_max, expected_y_max)) 125 | self.assertTrue(np.allclose(x_max, expected_x_max)) 126 | 127 | def test_num_boxes(self): 128 | boxes = np.array([[0., 0., 100., 100.], [10., 30., 50., 70.]], dtype=float) 129 | boxlist = np_box_list.BoxList(boxes) 130 | expected_num_boxes = 2 131 | self.assertEqual(boxlist.num_boxes(), expected_num_boxes) 132 | 133 | 134 | if __name__ == '__main__': 135 | tf.test.main() 136 | -------------------------------------------------------------------------------- /notebooks/submission_merge.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "\n", 11 | "from tqdm import tqdm_notebook as tqdm\n", 12 | "import glob\n", 13 | "from collections import defaultdict\n", 14 | "from typing import Sequence\n", 15 | "import pandas as pd\n", 16 | "\n", 17 | "EXPERIMENT_DIRPATH = 'PATH/TO/models'\n", 18 | "SAMPLE_SUBMISSION_FILEPATH = 'PATH/TO/data/sample_submission.csv'" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "def merge_submissions(submissions: Sequence[pd.DataFrame]) -> pd.DataFrame:\n", 28 | " sub_dict = defaultdict(lambda: [])\n", 29 | " merged_sub = defaultdict(lambda: [])\n", 30 | " for sub in submissions:\n", 31 | " for _, row in sub.iterrows():\n", 32 | " pred_string = str(row['PredictionString'])\n", 33 | " sub_dict[row['ImageId']]\n", 34 | " if 'nan' not in pred_string:\n", 35 | " sub_dict[row['ImageId']].append(pred_string)\n", 36 | " for key, value in sub_dict.items():\n", 37 | " merged_sub['ImageId'].append(key)\n", 38 | " merged_sub['PredictionString'].append(' '.join(value))\n", 39 | " final_sub = pd.DataFrame.from_dict(merged_sub)\n", 40 | " return final_sub" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "batch_submissions = []\n", 50 | "for filepath in glob.glob('{}/batch_*/submission.csv'.format(EXPERIMENT_DIRPATH)):\n", 51 | " print('Processing {}'.format(filepath))\n", 52 | " batch_submission = pd.read_csv(filepath)\n", 53 | " batch_submissions.append(batch_submission)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "submission = merge_submissions(batch_submissions)\n", 63 | "submission.head()" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "# Fix those that have bbox with xmin <= xmax and stuff" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "def chunker(seq, size):\n", 80 | " return (seq[pos:pos + size] for pos in range(0, len(seq), size))\n", 81 | "\n", 82 | "def clean_submission(submission):\n", 83 | " ids_cleaned , predictions_cleaned = [], []\n", 84 | " for i, row in tqdm(submission.iterrows(), total = len(submission)):\n", 85 | " img_predictions = row.PredictionString.split(' ')\n", 86 | " if img_predictions == ['']:\n", 87 | " continue\n", 88 | " else:\n", 89 | " img_predictions_cleaned = []\n", 90 | " for pred in chunker(img_predictions,size=6):\n", 91 | " label,score,x1,y1,x2,y2 = pred\n", 92 | " x1,y1,x2,y2 = float(x1),float(y1),float(x2),float(y2)\n", 93 | " if x2 <= x1 or y2 <= y1:\n", 94 | " continue\n", 95 | " else:\n", 96 | " img_predictions_cleaned.extend(pred)\n", 97 | " img_predictions_cleaned = ' '.join(img_predictions_cleaned)\n", 98 | " predictions_cleaned.append(img_predictions_cleaned)\n", 99 | " ids_cleaned.append(row.ImageId)\n", 100 | " submission_fixed = pd.DataFrame({'ImageId': ids_cleaned, \n", 101 | " 'PredictionString':predictions_cleaned})\n", 102 | " return submission_fixed" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "submission = clean_submission(submission)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "def add_missing_image_ids(submission, sample_submission):\n", 121 | " submission['ImageId'] = submission['ImageId'].astype(str)\n", 122 | " sample_submission['ImageId'] = sample_submission['ImageId'].astype(str)\n", 123 | " fixed_submission = pd.merge(sample_submission[['ImageId']], submission, on=['ImageId'], how='outer')\n", 124 | " return fixed_submission" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "sample_submission = pd.read_csv(SAMPLE_SUBMISSION_FILEPATH)\n", 134 | "submission = add_missing_image_ids(submission, sample_submission)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "submission.to_csv(os.path.join(EXPERIMENT_DIRPATH,'merged_submission.csv'), index=False)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [] 152 | } 153 | ], 154 | "metadata": { 155 | "kernelspec": { 156 | "display_name": "cpu py3", 157 | "language": "python", 158 | "name": "cpu_py3" 159 | }, 160 | "language_info": { 161 | "codemirror_mode": { 162 | "name": "ipython", 163 | "version": 3 164 | }, 165 | "file_extension": ".py", 166 | "mimetype": "text/x-python", 167 | "name": "python", 168 | "nbconvert_exporter": "python", 169 | "pygments_lexer": "ipython3", 170 | "version": "3.5.2" 171 | } 172 | }, 173 | "nbformat": 4, 174 | "nbformat_minor": 2 175 | } 176 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import click 2 | from src.pipeline_manager import PipelineManager 3 | 4 | pipeline_manager = PipelineManager() 5 | 6 | 7 | @click.group() 8 | def main(): 9 | pass 10 | 11 | 12 | @main.command() 13 | def prepare_metadata(): 14 | pipeline_manager.prepare_metadata() 15 | 16 | 17 | @main.command() 18 | @click.option('-p', '--pipeline_name', help='pipeline to be trained', required=True) 19 | @click.option('-d', '--dev_mode', help='if true only a small sample of data will be used', is_flag=True, required=False) 20 | def train(pipeline_name, dev_mode): 21 | pipeline_manager.train(pipeline_name, dev_mode) 22 | 23 | 24 | @main.command() 25 | @click.option('-p', '--pipeline_name', help='pipeline to be trained', required=True) 26 | @click.option('-d', '--image_dir', help='path to image folder with test files', required=False) 27 | @click.option('-s', '--single_image', help='predict single image and visualize', required=False) 28 | @click.option('-n', '--n_files', help='number of files to visualize', type=int, default=16) 29 | @click.option('-sp', '--show_popups', help="if showing images in a popup window") 30 | @click.option('-cl', '--classes_to_visualize', 31 | help="Reduce the vis to subset of classes (string with comma separated classes)") 32 | @click.option('-clsthr', '--classification_threshold', type=float) 33 | @click.option('-nmsthr', '--nms_threshold', type=float) 34 | def visualize(pipeline_name, image_dir=None, single_image=None, n_files=16, show_popups=False, 35 | classes_to_visualize=None, nms_threshold=None, classification_threshold=None): 36 | """ 37 | Makes predictions on test_images (or images in image_dir) draw bounding boxes on them and sends to neptune. 38 | If show pop_ups it will display the predictions locally in a window popup 39 | Example: 40 | neptune run --config "./configs/neptune_config_local.yaml" main.py -- \ 41 | visualize --pipeline_name retinanet --classes_to_visualize='Picture frame,Cat' 42 | """ 43 | if classes_to_visualize: 44 | classes_to_visualize = classes_to_visualize.split(',') 45 | classes_to_visualize = list(filter(None, classes_to_visualize)) # filter out empty strings 46 | 47 | pipeline_manager.visualize(pipeline_name, image_dir, single_image, 48 | n_files, show_popups, classes_to_visualize, 49 | nms_threshold, classification_threshold) 50 | 51 | 52 | @main.command() 53 | @click.option('-p', '--pipeline_name', help='pipeline to be trained', required=True) 54 | @click.option('-d', '--dev_mode', help='if true only a small sample of data will be used', is_flag=True, required=False) 55 | @click.option('-c', '--chunk_size', help='size of the chunks to run evaluation on', type=int, default=None, 56 | required=False) 57 | def evaluate(pipeline_name, dev_mode, chunk_size): 58 | pipeline_manager.evaluate(pipeline_name, dev_mode, chunk_size) 59 | 60 | 61 | @main.command() 62 | @click.option('-p', '--pipeline_name', help='pipeline to be trained', required=True) 63 | @click.option('-d', '--dev_mode', help='if true only a small sample of data will be used', is_flag=True, required=False) 64 | @click.option('-s', '--submit_predictions', help='submit predictions if true', is_flag=True, required=False) 65 | @click.option('-c', '--chunk_size', help='size of the chunks to run prediction on', type=int, default=None, 66 | required=False) 67 | def predict(pipeline_name, dev_mode, submit_predictions, chunk_size): 68 | pipeline_manager.predict(pipeline_name, dev_mode, submit_predictions, chunk_size) 69 | 70 | 71 | @main.command() 72 | @click.option('-p', '--pipeline_name', help='pipeline to be trained', required=True) 73 | @click.option('-s', '--submit_predictions', help='submit predictions if true', is_flag=True, required=False) 74 | @click.option('-d', '--dev_mode', help='if true only a small sample of data will be used', is_flag=True, required=False) 75 | @click.option('-c', '--chunk_size', help='size of the chunks to run evaluation and prediction on', type=int, 76 | default=None, required=False) 77 | def train_evaluate_predict(pipeline_name, submit_predictions, dev_mode, chunk_size): 78 | pipeline_manager.train(pipeline_name, dev_mode) 79 | pipeline_manager.evaluate(pipeline_name, dev_mode, chunk_size) 80 | pipeline_manager.predict(pipeline_name, dev_mode, submit_predictions, chunk_size) 81 | 82 | 83 | @main.command() 84 | @click.option('-p', '--pipeline_name', help='pipeline to be trained', required=True) 85 | @click.option('-d', '--dev_mode', help='if true only a small sample of data will be used', is_flag=True, required=False) 86 | @click.option('-c', '--chunk_size', help='size of the chunks to run evaluation and prediction on', type=int, 87 | default=None, required=False) 88 | def train_evaluate(pipeline_name, dev_mode, chunk_size): 89 | pipeline_manager.train(pipeline_name, dev_mode) 90 | pipeline_manager.evaluate(pipeline_name, dev_mode, chunk_size) 91 | 92 | 93 | @main.command() 94 | @click.option('-p', '--pipeline_name', help='pipeline to be trained', required=True) 95 | @click.option('-s', '--submit_predictions', help='submit predictions if true', is_flag=True, required=False) 96 | @click.option('-d', '--dev_mode', help='if true only a small sample of data will be used', is_flag=True, required=False) 97 | @click.option('-c', '--chunk_size', help='size of the chunks to run prediction on', type=int, default=None, 98 | required=False) 99 | def evaluate_predict(pipeline_name, submit_predictions, dev_mode, chunk_size): 100 | pipeline_manager.evaluate(pipeline_name, dev_mode, chunk_size) 101 | pipeline_manager.predict(pipeline_name, dev_mode, submit_predictions, chunk_size) 102 | 103 | 104 | @main.command() 105 | @click.option('-f', '--submission_filepath', help='filepath to json submission file', required=True) 106 | def submit_predictions(submission_filepath): 107 | pipeline_manager.make_submission(submission_filepath) 108 | 109 | 110 | if __name__ == "__main__": 111 | main() 112 | -------------------------------------------------------------------------------- /src/object_detection/utils/test_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Contains functions which are convenient for unit testing.""" 17 | import numpy as np 18 | import tensorflow as tf 19 | 20 | from object_detection.core import anchor_generator 21 | from object_detection.core import box_coder 22 | from object_detection.core import box_list 23 | from object_detection.core import box_predictor 24 | from object_detection.core import matcher 25 | from object_detection.utils import shape_utils 26 | 27 | 28 | class MockBoxCoder(box_coder.BoxCoder): 29 | """Simple `difference` BoxCoder.""" 30 | 31 | @property 32 | def code_size(self): 33 | return 4 34 | 35 | def _encode(self, boxes, anchors): 36 | return boxes.get() - anchors.get() 37 | 38 | def _decode(self, rel_codes, anchors): 39 | return box_list.BoxList(rel_codes + anchors.get()) 40 | 41 | 42 | class MockBoxPredictor(box_predictor.BoxPredictor): 43 | """Simple box predictor that ignores inputs and outputs all zeros.""" 44 | 45 | def __init__(self, is_training, num_classes): 46 | super(MockBoxPredictor, self).__init__(is_training, num_classes) 47 | 48 | def _predict(self, image_features, num_predictions_per_location): 49 | image_feature = image_features[0] 50 | combined_feature_shape = shape_utils.combined_static_and_dynamic_shape( 51 | image_feature) 52 | batch_size = combined_feature_shape[0] 53 | num_anchors = (combined_feature_shape[1] * combined_feature_shape[2]) 54 | code_size = 4 55 | zero = tf.reduce_sum(0 * image_feature) 56 | box_encodings = zero + tf.zeros( 57 | (batch_size, num_anchors, 1, code_size), dtype=tf.float32) 58 | class_predictions_with_background = zero + tf.zeros( 59 | (batch_size, num_anchors, self.num_classes + 1), dtype=tf.float32) 60 | return {box_predictor.BOX_ENCODINGS: box_encodings, 61 | box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND: 62 | class_predictions_with_background} 63 | 64 | 65 | class MockAnchorGenerator(anchor_generator.AnchorGenerator): 66 | """Mock anchor generator.""" 67 | 68 | def name_scope(self): 69 | return 'MockAnchorGenerator' 70 | 71 | def num_anchors_per_location(self): 72 | return [1] 73 | 74 | def _generate(self, feature_map_shape_list): 75 | num_anchors = sum([shape[0] * shape[1] for shape in feature_map_shape_list]) 76 | return box_list.BoxList(tf.zeros((num_anchors, 4), dtype=tf.float32)) 77 | 78 | 79 | class MockMatcher(matcher.Matcher): 80 | """Simple matcher that matches first anchor to first groundtruth box.""" 81 | 82 | def _match(self, similarity_matrix): 83 | return tf.constant([0, -1, -1, -1], dtype=tf.int32) 84 | 85 | 86 | def create_diagonal_gradient_image(height, width, depth): 87 | """Creates pyramid image. Useful for testing. 88 | 89 | For example, pyramid_image(5, 6, 1) looks like: 90 | # [[[ 5. 4. 3. 2. 1. 0.] 91 | # [ 6. 5. 4. 3. 2. 1.] 92 | # [ 7. 6. 5. 4. 3. 2.] 93 | # [ 8. 7. 6. 5. 4. 3.] 94 | # [ 9. 8. 7. 6. 5. 4.]]] 95 | 96 | Args: 97 | height: height of image 98 | width: width of image 99 | depth: depth of image 100 | 101 | Returns: 102 | pyramid image 103 | """ 104 | row = np.arange(height) 105 | col = np.arange(width)[::-1] 106 | image_layer = np.expand_dims(row, 1) + col 107 | image_layer = np.expand_dims(image_layer, 2) 108 | 109 | image = image_layer 110 | for i in range(1, depth): 111 | image = np.concatenate((image, image_layer * pow(10, i)), 2) 112 | 113 | return image.astype(np.float32) 114 | 115 | 116 | def create_random_boxes(num_boxes, max_height, max_width): 117 | """Creates random bounding boxes of specific maximum height and width. 118 | 119 | Args: 120 | num_boxes: number of boxes. 121 | max_height: maximum height of boxes. 122 | max_width: maximum width of boxes. 123 | 124 | Returns: 125 | boxes: numpy array of shape [num_boxes, 4]. Each row is in form 126 | [y_min, x_min, y_max, x_max]. 127 | """ 128 | 129 | y_1 = np.random.uniform(size=(1, num_boxes)) * max_height 130 | y_2 = np.random.uniform(size=(1, num_boxes)) * max_height 131 | x_1 = np.random.uniform(size=(1, num_boxes)) * max_width 132 | x_2 = np.random.uniform(size=(1, num_boxes)) * max_width 133 | 134 | boxes = np.zeros(shape=(num_boxes, 4)) 135 | boxes[:, 0] = np.minimum(y_1, y_2) 136 | boxes[:, 1] = np.minimum(x_1, x_2) 137 | boxes[:, 2] = np.maximum(y_1, y_2) 138 | boxes[:, 3] = np.maximum(x_1, x_2) 139 | 140 | return boxes.astype(np.float32) 141 | 142 | 143 | def first_rows_close_as_set(a, b, k=None, rtol=1e-6, atol=1e-6): 144 | """Checks if first K entries of two lists are close, up to permutation. 145 | 146 | Inputs to this assert are lists of items which can be compared via 147 | numpy.allclose(...) and can be sorted. 148 | 149 | Args: 150 | a: list of items which can be compared via numpy.allclose(...) and are 151 | sortable. 152 | b: list of items which can be compared via numpy.allclose(...) and are 153 | sortable. 154 | k: a non-negative integer. If not provided, k is set to be len(a). 155 | rtol: relative tolerance. 156 | atol: absolute tolerance. 157 | 158 | Returns: 159 | boolean, True if input lists a and b have the same length and 160 | the first k entries of the inputs satisfy numpy.allclose() after 161 | sorting entries. 162 | """ 163 | if not isinstance(a, list) or not isinstance(b, list) or len(a) != len(b): 164 | return False 165 | if not k: 166 | k = len(a) 167 | k = min(k, len(a)) 168 | a_sorted = sorted(a[:k]) 169 | b_sorted = sorted(b[:k]) 170 | return all([ 171 | np.allclose(entry_a, entry_b, rtol, atol) 172 | for (entry_a, entry_b) in zip(a_sorted, b_sorted) 173 | ]) 174 | -------------------------------------------------------------------------------- /src/pipelines.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | from steppy.base import Step, IdentityOperation 4 | from steppy.adapter import Adapter, E 5 | 6 | from .loaders import ImageDetectionLoader 7 | from .models import Retina 8 | from .retinanet import DataDecoder 9 | from .postprocessing import PredictionFormatter, Visualizer 10 | from .preprocessing import GoogleAiLabelEncoder, GoogleAiLabelDecoder 11 | 12 | 13 | def retinanet(config, train_mode, visualize=False): 14 | persist_output = False 15 | load_persisted_output = False 16 | 17 | loader = preprocessing_generator(config, is_train=train_mode) 18 | 19 | retinanet = Step(name='retinanet', 20 | transformer=Retina(**config.retinanet, train_mode=train_mode), 21 | input_steps=[loader], 22 | experiment_directory=config.env.cache_dirpath, 23 | persist_output=persist_output, 24 | is_trainable=True, 25 | load_persisted_output=load_persisted_output) 26 | 27 | if train_mode: 28 | return retinanet 29 | 30 | if visualize: 31 | return visualizer(retinanet, loader.get_step('label_encoder'), config) 32 | 33 | postprocessor = postprocessing(retinanet, loader.get_step('label_encoder'), config) 34 | 35 | output = Step(name='output', 36 | transformer=IdentityOperation(), 37 | input_steps=[postprocessor], 38 | adapter=Adapter({'y_pred': E(postprocessor.name, 'submission')}), 39 | experiment_directory=config.env.cache_dirpath, 40 | persist_output=persist_output, 41 | load_persisted_output=load_persisted_output) 42 | return output 43 | 44 | 45 | def preprocessing_generator(config, is_train): 46 | label_encoder = Step(name='label_encoder', 47 | transformer=GoogleAiLabelEncoder(**config.label_encoder), 48 | input_data=['metadata'], 49 | adapter=Adapter({'annotations': E('metadata', 'annotations'), 50 | 'annotations_human_labels': E('metadata', 'annotations_human_labels') 51 | }), 52 | is_trainable=True, 53 | experiment_directory=config.env.cache_dirpath) 54 | 55 | if is_train: 56 | loader = Step(name='loader', 57 | transformer=ImageDetectionLoader(train_mode=True, **config.loader), 58 | input_data=['input', 'validation_input'], 59 | input_steps=[label_encoder], 60 | adapter=Adapter({'images_data': E('input', 'images_data'), 61 | 'valid_images_data': E('validation_input', 'valid_images_data'), 62 | 'annotations': E(label_encoder.name, 'annotations'), 63 | 'annotations_human_labels': E(label_encoder.name, 'annotations_human_labels'), 64 | }), 65 | experiment_directory=config.env.cache_dirpath) 66 | 67 | else: 68 | loader = Step(name='loader', 69 | transformer=ImageDetectionLoader(train_mode=False, **config.loader), 70 | input_data=['input'], 71 | input_steps=[label_encoder], 72 | adapter=Adapter({'images_data': E('input', 'images_data'), 73 | 'annotations': None, 74 | 'annotations_human_labels': None, 75 | }), 76 | experiment_directory=config.env.cache_dirpath) 77 | return loader 78 | 79 | 80 | def visualizer(model, label_encoder, config): 81 | label_decoder = Step(name='label_decoder', 82 | transformer=GoogleAiLabelDecoder(), 83 | input_steps=[label_encoder, ], 84 | experiment_directory=config.env.cache_dirpath) 85 | 86 | decoder = Step(name='decoder', 87 | transformer=DataDecoder(**config.postprocessing.data_decoder), 88 | input_data=['input'], 89 | input_steps=[model, ], 90 | experiment_directory=config.env.cache_dirpath) 91 | 92 | visualize = Step(name='visualizer', 93 | transformer=Visualizer(), 94 | input_steps=[label_decoder, decoder], 95 | input_data=['input'], 96 | adapter=Adapter({'images_data': E('input', 'images_data'), 97 | 'results': E(decoder.name, 'results'), 98 | 'decoder_dict': E(label_decoder.name, 'inverse_mapping')}), 99 | experiment_directory=config.env.cache_dirpath) 100 | 101 | return visualize 102 | 103 | 104 | def postprocessing(model, label_encoder, config): 105 | label_decoder = Step(name='label_decoder', 106 | transformer=GoogleAiLabelDecoder(), 107 | input_steps=[label_encoder, ], 108 | experiment_directory=config.env.cache_dirpath) 109 | 110 | decoder = Step(name='decoder', 111 | transformer=DataDecoder(**config.postprocessing.data_decoder), 112 | input_data=['input'], 113 | input_steps=[model, ], 114 | experiment_directory=config.env.cache_dirpath) 115 | 116 | submission_producer = Step(name='submission_producer', 117 | transformer=PredictionFormatter(), 118 | input_steps=[label_decoder, decoder], 119 | input_data=['input'], 120 | adapter=Adapter({'images_data': E('input', 'images_data'), 121 | 'results': E(decoder.name, 'results'), 122 | 'decoder_dict': E(label_decoder.name, 'inverse_mapping')}), 123 | experiment_directory=config.env.cache_dirpath) 124 | return submission_producer 125 | 126 | 127 | PIPELINES = {'retinanet': {'train': partial(retinanet, train_mode=True), 128 | 'inference': partial(retinanet, train_mode=False), 129 | 'visualize': partial(retinanet, train_mode=False, visualize=True) 130 | }, 131 | 132 | } 133 | -------------------------------------------------------------------------------- /src/object_detection/utils/metrics_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for object_detection.metrics.""" 16 | 17 | import numpy as np 18 | import tensorflow as tf 19 | 20 | from object_detection.utils import metrics 21 | 22 | 23 | class MetricsTest(tf.test.TestCase): 24 | 25 | def test_compute_cor_loc(self): 26 | num_gt_imgs_per_class = np.array([100, 1, 5, 1, 1], dtype=int) 27 | num_images_correctly_detected_per_class = np.array( 28 | [10, 0, 1, 0, 0], dtype=int) 29 | corloc = metrics.compute_cor_loc(num_gt_imgs_per_class, 30 | num_images_correctly_detected_per_class) 31 | expected_corloc = np.array([0.1, 0, 0.2, 0, 0], dtype=float) 32 | self.assertTrue(np.allclose(corloc, expected_corloc)) 33 | 34 | def test_compute_cor_loc_nans(self): 35 | num_gt_imgs_per_class = np.array([100, 0, 0, 1, 1], dtype=int) 36 | num_images_correctly_detected_per_class = np.array( 37 | [10, 0, 1, 0, 0], dtype=int) 38 | corloc = metrics.compute_cor_loc(num_gt_imgs_per_class, 39 | num_images_correctly_detected_per_class) 40 | expected_corloc = np.array([0.1, np.nan, np.nan, 0, 0], dtype=float) 41 | self.assertAllClose(corloc, expected_corloc) 42 | 43 | def test_compute_precision_recall(self): 44 | num_gt = 10 45 | scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float) 46 | labels = np.array([0, 1, 1, 0, 0, 1], dtype=bool) 47 | labels_float_type = np.array([0, 1, 1, 0, 0, 1], dtype=float) 48 | accumulated_tp_count = np.array([0, 1, 1, 2, 2, 3], dtype=float) 49 | expected_precision = accumulated_tp_count / np.array([1, 2, 3, 4, 5, 6]) 50 | expected_recall = accumulated_tp_count / num_gt 51 | 52 | precision, recall = metrics.compute_precision_recall(scores, labels, num_gt) 53 | precision_float_type, recall_float_type = metrics.compute_precision_recall( 54 | scores, labels_float_type, num_gt) 55 | 56 | self.assertAllClose(precision, expected_precision) 57 | self.assertAllClose(recall, expected_recall) 58 | self.assertAllClose(precision_float_type, expected_precision) 59 | self.assertAllClose(recall_float_type, expected_recall) 60 | 61 | def test_compute_precision_recall_float(self): 62 | num_gt = 10 63 | scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float) 64 | labels_float = np.array([0, 1, 1, 0.5, 0, 1], dtype=float) 65 | expected_precision = np.array( 66 | [0., 0.5, 0.33333333, 0.5, 0.55555556, 0.63636364], dtype=float) 67 | expected_recall = np.array([0., 0.1, 0.1, 0.2, 0.25, 0.35], dtype=float) 68 | precision, recall = metrics.compute_precision_recall( 69 | scores, labels_float, num_gt) 70 | self.assertAllClose(precision, expected_precision) 71 | self.assertAllClose(recall, expected_recall) 72 | 73 | def test_compute_average_precision(self): 74 | precision = np.array([0.8, 0.76, 0.9, 0.65, 0.7, 0.5, 0.55, 0], dtype=float) 75 | recall = np.array([0.3, 0.3, 0.4, 0.4, 0.45, 0.45, 0.5, 0.5], dtype=float) 76 | processed_precision = np.array( 77 | [0.9, 0.9, 0.9, 0.7, 0.7, 0.55, 0.55, 0], dtype=float) 78 | recall_interval = np.array([0.3, 0, 0.1, 0, 0.05, 0, 0.05, 0], dtype=float) 79 | expected_mean_ap = np.sum(recall_interval * processed_precision) 80 | mean_ap = metrics.compute_average_precision(precision, recall) 81 | self.assertAlmostEqual(expected_mean_ap, mean_ap) 82 | 83 | def test_compute_precision_recall_and_ap_no_groundtruth(self): 84 | num_gt = 0 85 | scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float) 86 | labels = np.array([0, 0, 0, 0, 0, 0], dtype=bool) 87 | expected_precision = None 88 | expected_recall = None 89 | precision, recall = metrics.compute_precision_recall(scores, labels, num_gt) 90 | self.assertEqual(precision, expected_precision) 91 | self.assertEqual(recall, expected_recall) 92 | ap = metrics.compute_average_precision(precision, recall) 93 | self.assertTrue(np.isnan(ap)) 94 | 95 | def test_compute_recall_at_k(self): 96 | num_gt = 4 97 | tp_fp = [ 98 | np.array([1, 0, 0], dtype=float), 99 | np.array([0, 1], dtype=float), 100 | np.array([0, 0, 0, 0, 0], dtype=float) 101 | ] 102 | tp_fp_bool = [ 103 | np.array([True, False, False], dtype=bool), 104 | np.array([False, True], dtype=float), 105 | np.array([False, False, False, False, False], dtype=float) 106 | ] 107 | 108 | recall_1 = metrics.compute_recall_at_k(tp_fp, num_gt, 1) 109 | recall_3 = metrics.compute_recall_at_k(tp_fp, num_gt, 3) 110 | recall_5 = metrics.compute_recall_at_k(tp_fp, num_gt, 5) 111 | 112 | recall_3_bool = metrics.compute_recall_at_k(tp_fp_bool, num_gt, 3) 113 | 114 | self.assertAlmostEqual(recall_1, 0.25) 115 | self.assertAlmostEqual(recall_3, 0.5) 116 | self.assertAlmostEqual(recall_3_bool, 0.5) 117 | self.assertAlmostEqual(recall_5, 0.5) 118 | 119 | def test_compute_median_rank_at_k(self): 120 | tp_fp = [ 121 | np.array([1, 0, 0], dtype=float), 122 | np.array([0, 0.1], dtype=float), 123 | np.array([0, 0, 0, 0, 0], dtype=float) 124 | ] 125 | tp_fp_bool = [ 126 | np.array([True, False, False], dtype=bool), 127 | np.array([False, True], dtype=float), 128 | np.array([False, False, False, False, False], dtype=float) 129 | ] 130 | 131 | median_ranks_1 = metrics.compute_median_rank_at_k(tp_fp, 1) 132 | median_ranks_3 = metrics.compute_median_rank_at_k(tp_fp, 3) 133 | median_ranks_5 = metrics.compute_median_rank_at_k(tp_fp, 5) 134 | median_ranks_3_bool = metrics.compute_median_rank_at_k(tp_fp_bool, 3) 135 | 136 | self.assertEqual(median_ranks_1, 0) 137 | self.assertEqual(median_ranks_3, 0.5) 138 | self.assertEqual(median_ranks_3_bool, 0.5) 139 | self.assertEqual(median_ranks_5, 0.5) 140 | 141 | 142 | if __name__ == '__main__': 143 | tf.test.main() 144 | -------------------------------------------------------------------------------- /src/object_detection/utils/label_map_util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Label map utility functions.""" 17 | 18 | import logging 19 | 20 | import tensorflow as tf 21 | from google.protobuf import text_format 22 | from object_detection.protos import string_int_label_map_pb2 23 | 24 | 25 | def _validate_label_map(label_map): 26 | """Checks if a label map is valid. 27 | 28 | Args: 29 | label_map: StringIntLabelMap to validate. 30 | 31 | Raises: 32 | ValueError: if label map is invalid. 33 | """ 34 | for item in label_map.item: 35 | if item.id < 0: 36 | raise ValueError('Label map ids should be >= 0.') 37 | if (item.id == 0 and item.name != 'background' and 38 | item.display_name != 'background'): 39 | raise ValueError('Label map id 0 is reserved for the background label') 40 | 41 | 42 | def create_category_index(categories): 43 | """Creates dictionary of COCO compatible categories keyed by category id. 44 | 45 | Args: 46 | categories: a list of dicts, each of which has the following keys: 47 | 'id': (required) an integer id uniquely identifying this category. 48 | 'name': (required) string representing category name 49 | e.g., 'cat', 'dog', 'pizza'. 50 | 51 | Returns: 52 | category_index: a dict containing the same entries as categories, but keyed 53 | by the 'id' field of each category. 54 | """ 55 | category_index = {} 56 | for cat in categories: 57 | category_index[cat['id']] = cat 58 | return category_index 59 | 60 | 61 | def get_max_label_map_index(label_map): 62 | """Get maximum index in label map. 63 | 64 | Args: 65 | label_map: a StringIntLabelMapProto 66 | 67 | Returns: 68 | an integer 69 | """ 70 | return max([item.id for item in label_map.item]) 71 | 72 | 73 | def convert_label_map_to_categories(label_map, 74 | max_num_classes, 75 | use_display_name=True): 76 | """Loads label map proto and returns categories list compatible with eval. 77 | 78 | This function loads a label map and returns a list of dicts, each of which 79 | has the following keys: 80 | 'id': (required) an integer id uniquely identifying this category. 81 | 'name': (required) string representing category name 82 | e.g., 'cat', 'dog', 'pizza'. 83 | We only allow class into the list if its id-label_id_offset is 84 | between 0 (inclusive) and max_num_classes (exclusive). 85 | If there are several items mapping to the same id in the label map, 86 | we will only keep the first one in the categories list. 87 | 88 | Args: 89 | label_map: a StringIntLabelMapProto or None. If None, a default categories 90 | list is created with max_num_classes categories. 91 | max_num_classes: maximum number of (consecutive) label indices to include. 92 | use_display_name: (boolean) choose whether to load 'display_name' field 93 | as category name. If False or if the display_name field does not exist, 94 | uses 'name' field as category names instead. 95 | Returns: 96 | categories: a list of dictionaries representing all possible categories. 97 | """ 98 | categories = [] 99 | list_of_ids_already_added = [] 100 | if not label_map: 101 | label_id_offset = 1 102 | for class_id in range(max_num_classes): 103 | categories.append({ 104 | 'id': class_id + label_id_offset, 105 | 'name': 'category_{}'.format(class_id + label_id_offset) 106 | }) 107 | return categories 108 | for item in label_map.item: 109 | if not 0 < item.id <= max_num_classes: 110 | logging.info('Ignore item %d since it falls outside of requested ' 111 | 'label range.', item.id) 112 | continue 113 | if use_display_name and item.HasField('display_name'): 114 | name = item.display_name 115 | else: 116 | name = item.name 117 | if item.id not in list_of_ids_already_added: 118 | list_of_ids_already_added.append(item.id) 119 | categories.append({'id': item.id, 'name': name}) 120 | return categories 121 | 122 | 123 | def load_labelmap(path): 124 | """Loads label map proto. 125 | 126 | Args: 127 | path: path to StringIntLabelMap proto text file. 128 | Returns: 129 | a StringIntLabelMapProto 130 | """ 131 | with tf.gfile.GFile(path, 'r') as fid: 132 | label_map_string = fid.read() 133 | label_map = string_int_label_map_pb2.StringIntLabelMap() 134 | try: 135 | text_format.Merge(label_map_string, label_map) 136 | except text_format.ParseError: 137 | label_map.ParseFromString(label_map_string) 138 | _validate_label_map(label_map) 139 | return label_map 140 | 141 | 142 | def get_label_map_dict(label_map_path, use_display_name=False): 143 | """Reads a label map and returns a dictionary of label names to id. 144 | 145 | Args: 146 | label_map_path: path to label_map. 147 | use_display_name: whether to use the label map items' display names as keys. 148 | 149 | Returns: 150 | A dictionary mapping label names to id. 151 | """ 152 | label_map = load_labelmap(label_map_path) 153 | label_map_dict = {} 154 | for item in label_map.item: 155 | if use_display_name: 156 | label_map_dict[item.display_name] = item.id 157 | else: 158 | label_map_dict[item.name] = item.id 159 | return label_map_dict 160 | 161 | 162 | def create_category_index_from_labelmap(label_map_path): 163 | """Reads a label map and returns a category index. 164 | 165 | Args: 166 | label_map_path: Path to `StringIntLabelMap` proto text file. 167 | 168 | Returns: 169 | A category index, which is a dictionary that maps integer ids to dicts 170 | containing categories, e.g. 171 | {1: {'id': 1, 'name': 'dog'}, 2: {'id': 2, 'name': 'cat'}, ...} 172 | """ 173 | label_map = load_labelmap(label_map_path) 174 | max_num_classes = max(item.id for item in label_map.item) 175 | categories = convert_label_map_to_categories(label_map, max_num_classes) 176 | return create_category_index(categories) 177 | 178 | 179 | def create_class_agnostic_category_index(): 180 | """Creates a category index with a single `object` class.""" 181 | return {1: {'id': 1, 'name': 'object'}} 182 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Google AI Open Images - Object Detection Track: Open Solution 2 | 3 | [![license](https://img.shields.io/github/license/mashape/apistatus.svg?maxAge=2592000)](https://github.com/neptune-ml/open-solution-googleai-object-detection/blob/master/LICENSE) 4 | [![Join the chat at https://gitter.im/neptune-ml/open-solution-googleai-object-detection](https://badges.gitter.im/neptune-ml/open-solution-googleai-object-detection.svg)](https://gitter.im/neptune-ml/open-solution-googleai-object-detection?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 5 | 6 | This is an open solution to the [Google AI Open Images - Object Detection Track](https://www.kaggle.com/c/google-ai-open-images-object-detection-track) :smiley: 7 | 8 | ## More competitions :sparkler: 9 | Check collection of [public projects :gift:](https://app.neptune.ml/-/explore), where you can find multiple Kaggle competitions with code, experiments and outputs. 10 | 11 | ## Our goals 12 | We are building entirely open solution to this competition. Specifically: 13 | 1. **Learning from the process** - updates about new ideas, code and experiments is the best way to learn data science. Our activity is especially useful for people who wants to enter the competition, but lack appropriate experience. 14 | 1. Encourage more Kagglers to start working on this competition. 15 | 1. Deliver open source solution with no strings attached. Code is available on our [GitHub repository :computer:](https://github.com/neptune-ml/open-solution-googleai-object-detection). This solution should establish solid benchmark, as well as provide good base for your custom ideas and experiments. We care about clean code :smiley: 16 | 1. We are opening our experiments as well: everybody can have **live preview** on our experiments, parameters, code, etc. Check: [Google-AI-Object-Detection-Challenge :chart_with_upwards_trend:](https://app.neptune.ml/neptune-ml/Google-AI-Object-Detection-Challenge) and images below: 17 | 18 | | UNet training monitor :bar_chart: | Predicted bounding boxes :bar_chart: | 19 | |:---|:---| 20 | |[![unet-training-monitor](https://gist.githubusercontent.com/kamil-kaczmarek/b3b939797fb39752c45fdadfedba3ed9/raw/19272701575bca235473adaabb7b7c54b2416a54/gai-1.png)](https://app.neptune.ml/-/dashboard/experiment/f945da64-6dd3-459b-94c5-58bc6a83f590)|[![predicted-bounding-boxes](https://gist.githubusercontent.com/kamil-kaczmarek/b3b939797fb39752c45fdadfedba3ed9/raw/19272701575bca235473adaabb7b7c54b2416a54/gai-2.png)](https://app.neptune.ml/-/dashboard/experiment/c779468e-d3f7-44b8-a3a4-43a012315708)| 21 | 22 | ## Disclaimer 23 | In this open source solution you will find references to the [neptune.ml](https://neptune.ml). It is free platform for community Users, which we use daily to keep track of our experiments. Please note that using neptune.ml is not necessary to proceed with this solution. You may run it as plain Python script :snake:. 24 | 25 | # How to start? 26 | ## Learn about our solutions 27 | 1. Check [Kaggle forum](https://www.kaggle.com/c/google-ai-open-images-object-detection-track/discussion/62895) and participate in the discussions. 28 | 1. Check our [Wiki pages :dolphin:](https://github.com/neptune-ml/open-solution-googleai-object-detection/wiki), where we describe our work. Below are link to specific solutions: 29 | 30 | | link to code| link to description | 31 | |:---:|:---:| 32 | |[solution-1](https://github.com/neptune-ml/open-solution-googleai-object-detection/tree/solution-1)|[palm-tree :palm_tree:](https://github.com/neptune-ml/open-solution-googleai-object-detection/wiki/RetinaNet-with-sampler)| 33 | 34 | ## Dataset for this competition 35 | This competition is special, because it used [Open Images Dataset V4](https://storage.googleapis.com/openimages/web/index.html), which is quite large: `>1.8M` images and `>0.5TB` :astonished: To make it more approachable, we are hosting entire dataset in the neptune's public directory :sunglasses:. **You can use this dataset in [neptune.ml](https://neptune.ml) with no additional setup :+1:.** 36 | 37 | ## Start experimenting with ready-to-use code 38 | You can jump start your participation in the competition by using our starter pack. Installation instruction below will guide you through the setup. 39 | 40 | ## Installation 41 | ### Fast Track 42 | 1. Clone repository, install requirements (check _requirements.txt) 43 | 44 | ```bash 45 | pip3 install -r requirements.txt 46 | ``` 47 | 48 | 2. Register to the [neptune.ml](https://neptune.ml/login) _(if you wish to use it)_ and create your project, for example Google-AI-Object-Detection-Challenge. 49 | 3. Train RetinaNet: 50 | 51 | :hamster: 52 | ```bash 53 | neptune send --worker m-4p100 \ 54 | --environment pytorch-0.3.1-gpu-py3 \ 55 | --config configs/neptune.yaml \ 56 | main.py train --pipeline_name retinanet 57 | ``` 58 | 59 | :trident: 60 | ```bash 61 | neptune run main.py train --pipeline_name retinanet 62 | ``` 63 | 64 | :snake: 65 | ```bash 66 | python main.py -- train --pipeline_name retinanet 67 | ``` 68 | 69 | 4. Evaluate/Predict RetinaNet: 70 | 71 | **Note** in case of memory trouble go to `neptune.yaml` and change `batch_size_inference: 1` 72 | 73 | :hamster: 74 | With cloud environment you need to change the experiment directory to the one that you have just trained. Let's assume that your experiment id was `GAI-14`. You should go to `neptune.yaml` and change: 75 | 76 | ```yaml 77 | experiment_dir: /output/experiment 78 | clone_experiment_dir_from: /input/GAI-14/output/experiment 79 | ``` 80 | 81 | ```bash 82 | neptune send --worker m-4p100 \ 83 | --environment pytorch-0.3.1-gpu-py3 \ 84 | --config configs/neptune.yaml \ 85 | --input /GAI-14 \ 86 | main.py evaluate_predict --pipeline_name retinanet --chunk_size 100 87 | ``` 88 | 89 | :trident: 90 | ```bash 91 | neptune run main.py train --pipeline_name retinanet --chunk_size 100 92 | ``` 93 | 94 | :snake: 95 | ```bash 96 | python main.py -- train --pipeline_name retinanet --chunk_size 100 97 | ``` 98 | 99 | ## Get involved 100 | You are welcome to contribute your code and ideas to this open solution. To get started: 101 | 1. Check [competition project](https://github.com/neptune-ml/open-solution-googleai-object-detection/projects/1) on GitHub to see what we are working on right now. 102 | 1. Express your interest in particular task by writing comment in this task, or by creating new one with your fresh idea. 103 | 1. We will get back to you quickly in order to start working together. 104 | 1. Check [CONTRIBUTING](CONTRIBUTING.md) for some more information. 105 | 106 | ## User support 107 | There are several ways to seek help: 108 | 1. [Kaggle discussion](https://www.kaggle.com/c/google-ai-open-images-object-detection-track/discussion/62895) is our primary way of communication. 109 | 1. Read project's [Wiki](https://github.com/neptune-ml/open-solution-googleai-object-detection/wiki), where we publish descriptions about the code, pipelines and supporting tools such as [neptune.ml](https://neptune.ml). 110 | 1. Submit an [issue]((https://github.com/neptune-ml/open-solution-googleai-object-detection/issues)) directly in this repo. 111 | -------------------------------------------------------------------------------- /src/pipeline_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import neptune 4 | from attrdict import AttrDict 5 | from .utils import read_params, parameter_eval, get_class_mappings 6 | 7 | ctx = neptune.Context() 8 | params = read_params(ctx) 9 | 10 | ID_COLUMN = 'ImageID' 11 | LABEL_COLUMN = 'LabelName' 12 | SEED = 1234 13 | MEAN = [0.485, 0.456, 0.406] 14 | STD = [0.229, 0.224, 0.225] 15 | 16 | DESIRED_CLASS_SUBSET = parameter_eval(params.desired_class_subset) 17 | N_SUB_CLASSES = len(DESIRED_CLASS_SUBSET) 18 | 19 | ASPECT_RATIOS = parameter_eval(params.aspect_ratios) 20 | SCALE_RATIOS = parameter_eval(params.scale_ratios) 21 | 22 | CODES2NAMES, NAMES2CODES = get_class_mappings(mappings_file=params.class_mappings_filepath) 23 | 24 | GLOBAL_CONFIG = {'exp_root': params.experiment_dir, 25 | 'load_in_memory': params.load_in_memory, 26 | 'num_workers': params.num_workers, 27 | 'num_classes': N_SUB_CLASSES if N_SUB_CLASSES else params.num_classes, 28 | 'batch_size_train': params.batch_size_train, 29 | 'batch_size_inference': params.batch_size_inference, 30 | 'loader_mode': params.loader_mode, 31 | 'stream_mode': params.stream_mode, 32 | 'max_annotation_per_class': params.max_annotation_per_class, 33 | 'use_suppression': params.use_suppression, 34 | } 35 | 36 | SOLUTION_CONFIG = AttrDict({ 37 | 'env': {'cache_dirpath': params.experiment_dir}, 38 | 'execution': GLOBAL_CONFIG, 39 | 40 | 'label_encoder': {'colname': LABEL_COLUMN 41 | }, 42 | 'loader': {'dataset_params': {'images_dir': None, 43 | 'short_dim': params.short_dim, 44 | 'long_dim': params.long_dim, 45 | 'fixed_h': params.fixed_h, 46 | 'fixed_w': params.fixed_w, 47 | 'sampler_name': params.sampler_name, 48 | 'pad_method': params.pad_method, 49 | 'sample_size': params.training_sample_size, 50 | 'valid_sample_size': params.validation_sample_size, 51 | 'even_class_sampling': params.even_class_sampling, 52 | 'use_suppression': params.use_suppression, 53 | 'data_encoder': {'aspect_ratios': ASPECT_RATIOS, 54 | 'scale_ratios': SCALE_RATIOS, 55 | 'num_anchors': len(ASPECT_RATIOS) * len(SCALE_RATIOS)} 56 | }, 57 | 'loader_params': {'training': {'batch_size': params.batch_size_train, 58 | 'shuffle': False, 59 | 'num_workers': params.num_workers, 60 | 'pin_memory': params.pin_memory 61 | }, 62 | 'inference': {'batch_size': params.batch_size_inference, 63 | 'shuffle': False, 64 | 'num_workers': params.num_workers, 65 | 'pin_memory': params.pin_memory 66 | }, 67 | }, 68 | }, 69 | 70 | 'retinanet': { 71 | 'architecture_config': {'model_params': {'encoder_depth': params.encoder_depth, 72 | 'num_classes': N_SUB_CLASSES if N_SUB_CLASSES else params.num_classes, 73 | # we change the model output size if subclasses used 74 | # fallback to config file 75 | 'num_anchors': len(ASPECT_RATIOS) * len(SCALE_RATIOS), 76 | 'pretrained_encoder': params.pretrained_encoder 77 | }, 78 | 'optimizer_params': {'lr': params.lr, 79 | }, 80 | 'regularizer_params': {'regularize': True, 81 | 'weight_decay_conv2d': params.l2_reg_conv, 82 | }, 83 | 'weights_init': {'function': 'he', 84 | 'pi': params.pi 85 | } 86 | }, 87 | 'training_config': {'epochs': params.epochs_nr, 88 | }, 89 | 'callbacks_config': { 90 | 'model_checkpoint': { 91 | 'filepath': os.path.join(GLOBAL_CONFIG['exp_root'], 'checkpoints', 'retinanet', 'best.torch'), 92 | 'epoch_every': 1, 93 | # 'minimize': not params.validate_with_map 94 | }, 95 | 'exp_lr_scheduler': {'gamma': params.gamma, 96 | 'epoch_every': 1}, 97 | 'plateau_lr_scheduler': {'lr_factor': params.lr_factor, 98 | 'lr_patience': params.lr_patience, 99 | 'epoch_every': 1}, 100 | 'training_monitor': {'batch_every': 1, 101 | 'epoch_every': 1}, 102 | 'experiment_timing': {'batch_every': 10, 103 | 'epoch_every': 1}, 104 | 'validation_monitor': { 105 | 'epoch_every': 1, 106 | # 'data_dir': params.train_imgs_dir, 107 | # 'validate_with_map': params.validate_with_map, 108 | # 'small_annotations_size': params.small_annotations_size, 109 | }, 110 | 'neptune_monitor': {'model_name': 'unet', 111 | # 'image_nr': 16, 112 | # 'image_resize': 0.2, 113 | # 'outputs_to_plot': params.unet_outputs_to_plot 114 | }, 115 | 'early_stopping': {'patience': params.patience, 116 | # 'minimize': not params.validate_with_map 117 | }, 118 | }, 119 | }, 120 | 'postprocessing': { 121 | 'data_decoder': { 122 | 'short_dim': params.short_dim, 123 | 'long_dim': params.long_dim, 124 | 'fixed_h': params.fixed_h, 125 | 'fixed_w': params.fixed_w, 126 | 'sampler_name': params.sampler_name, 127 | 'num_threads': params.num_threads, 128 | 'aspect_ratios': ASPECT_RATIOS, 129 | 'scale_ratios': SCALE_RATIOS, 130 | 'num_anchors': len(ASPECT_RATIOS) * len(SCALE_RATIOS), 131 | 'cls_thrs': params.classification_threshold, 132 | 'nms_thrs': params.nms_threshold 133 | } 134 | }, 135 | }) 136 | -------------------------------------------------------------------------------- /src/models.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from math import log 3 | import torch 4 | from torch.autograd import Variable 5 | from torch import optim 6 | from toolkit.pytorch_transformers.models import Model 7 | from toolkit.pytorch_transformers.callbacks import CallbackList, TrainingMonitor, ExperimentTiming, \ 8 | ExponentialLRScheduler, ModelCheckpoint, EarlyStopping, NeptuneMonitor, ValidationMonitor 9 | 10 | from .parallel import DataParallelCriterion, DataParallelModel as DataParallel 11 | from .retinanet import RetinaNet, RetinaLoss 12 | 13 | 14 | class ModelParallel(Model): 15 | def fit(self, datagen, validation_datagen=None): 16 | self._initialize_model_weights() 17 | 18 | self.model = DataParallel(self.model) 19 | 20 | if torch.cuda.is_available(): 21 | self.model = self.model.cuda() 22 | 23 | self.callbacks.set_params(self, validation_datagen=validation_datagen) 24 | self.callbacks.on_train_begin() 25 | 26 | batch_gen, steps = datagen 27 | for epoch_id in range(self.training_config['epochs']): 28 | self.callbacks.on_epoch_begin() 29 | for batch_id, data in enumerate(batch_gen): 30 | self.callbacks.on_batch_begin() 31 | metrics = self._fit_loop(data) 32 | self.callbacks.on_batch_end(metrics=metrics) 33 | if batch_id == steps: 34 | break 35 | self.callbacks.on_epoch_end() 36 | if self.callbacks.training_break(): 37 | break 38 | self.callbacks.on_train_end() 39 | return self 40 | 41 | def _fit_loop(self, data): 42 | X = data[0] 43 | targets_tensors = data[1:] 44 | 45 | if torch.cuda.is_available(): 46 | X = Variable(X).cuda() 47 | targets_var = [] 48 | for target_tensor in targets_tensors: 49 | targets_var.append(Variable(target_tensor).cuda()) 50 | else: 51 | X = Variable(X) 52 | targets_var = [] 53 | for target_tensor in targets_tensors: 54 | targets_var.append(Variable(target_tensor)) 55 | 56 | self.optimizer.zero_grad() 57 | outputs_batch = self.model(X) 58 | partial_batch_losses = {} 59 | 60 | if len(self.output_names) == 1: 61 | for (name, loss_function, weight), target in zip(self.loss_function, targets_var): 62 | batch_loss = loss_function(outputs_batch, target) * weight 63 | else: 64 | for (name, loss_function, weight), output, target in zip(self.loss_function, outputs_batch, targets_var): 65 | partial_batch_losses[name] = loss_function(output, target) * weight 66 | batch_loss = sum(partial_batch_losses.values()) 67 | partial_batch_losses['sum'] = batch_loss 68 | batch_loss.backward() 69 | self.optimizer.step() 70 | 71 | return partial_batch_losses 72 | 73 | def load(self, filepath): 74 | self.model.eval() 75 | 76 | if not isinstance(self.model, DataParallel): 77 | self.model = DataParallel(self.model) 78 | 79 | if torch.cuda.is_available(): 80 | self.model.cpu() 81 | self.model.load_state_dict(torch.load(filepath)) 82 | self.model = self.model.cuda() 83 | else: 84 | self.model.load_state_dict(torch.load(filepath, map_location=lambda storage, loc: storage)) 85 | 86 | self.model.train() 87 | 88 | return self 89 | 90 | 91 | class Retina(ModelParallel): 92 | def __init__(self, architecture_config, training_config, callbacks_config, train_mode=False): 93 | """ 94 | """ 95 | super().__init__(architecture_config, training_config, callbacks_config) 96 | self.train_mode = train_mode 97 | self.num_classes = self.architecture_config['model_params']['num_classes'] 98 | self.pi = self.architecture_config['weights_init']['pi'] 99 | 100 | self.set_model() 101 | self.weight_regularization = weight_regularization 102 | self.optimizer = optim.Adam(self.weight_regularization(self.model, **architecture_config['regularizer_params']), 103 | **architecture_config['optimizer_params']) 104 | self.loss_function = [('FocalLoss', DataParallelCriterion(RetinaLoss(num_classes=self.num_classes)), 1.0)] 105 | self.callbacks = callbacks(self.callbacks_config) 106 | 107 | def transform(self, datagen, *args, **kwargs): 108 | if self.train_mode: 109 | return self 110 | 111 | self.model.eval() 112 | 113 | batch_gen, steps = datagen 114 | boxes = [] 115 | labels = [] 116 | for batch_id, data in enumerate(batch_gen): 117 | if isinstance(data, list): 118 | X = data[0] 119 | else: 120 | X = data 121 | 122 | if torch.cuda.is_available(): 123 | X = Variable(X, volatile=True).cuda() 124 | else: 125 | X = Variable(X, volatile=True) 126 | 127 | outputs = self.model(X) 128 | 129 | if isinstance(outputs, list): 130 | outputs = [output.data.cpu() for output in outputs] 131 | outputs = torch.cat(outputs, dim=0) 132 | else: 133 | outputs = outputs.data.cpu() 134 | 135 | boxes_batch, labels_batch = outputs[:, :, :4], outputs[:, :, 4:] 136 | boxes.extend([box for box in boxes_batch]) 137 | labels.extend([label for label in labels_batch]) 138 | 139 | if batch_id == steps: 140 | break 141 | 142 | self.model.train() 143 | 144 | outputs = {'box_predictions': boxes, 145 | 'class_predictions': labels} 146 | return outputs 147 | 148 | def set_model(self): 149 | self.model = RetinaNet(**self.architecture_config['model_params']) 150 | 151 | def _initialize_model_weights(self): 152 | self.model.apply(partial(init_weights_retina, pi=self.pi)) 153 | self.model.freeze_bn() 154 | 155 | 156 | def weight_regularization(model, regularize, weight_decay_conv2d): 157 | if regularize: 158 | parameter_list = [{'params': model.parameters(), 'weight_decay': weight_decay_conv2d}] 159 | else: 160 | parameter_list = [model.parameters()] 161 | return parameter_list 162 | 163 | 164 | def callbacks(callbacks_config): 165 | experiment_timing = ExperimentTiming(**callbacks_config['experiment_timing']) 166 | model_checkpoints = ModelCheckpoint(**callbacks_config['model_checkpoint']) 167 | lr_scheduler = ExponentialLRScheduler(**callbacks_config['exp_lr_scheduler']) 168 | training_monitor = TrainingMonitor(**callbacks_config['training_monitor']) 169 | validation_monitor = ValidationMonitor(**callbacks_config['validation_monitor']) 170 | neptune_monitor = NeptuneMonitor(**callbacks_config['neptune_monitor']) 171 | early_stopping = EarlyStopping(**callbacks_config['early_stopping']) 172 | 173 | return CallbackList( 174 | callbacks=[experiment_timing, training_monitor, validation_monitor, 175 | model_checkpoints, lr_scheduler, early_stopping, neptune_monitor, 176 | ]) 177 | 178 | 179 | def init_weights_retina(module, pi): 180 | if hasattr(module, 'name'): 181 | b = -log((1 - pi) / pi) 182 | if module.name == 'final_layer': 183 | module.bias.data.fill_(b) 184 | elif module.name == 'head_layer': 185 | module.weight.data.normal_(0, pi) 186 | module.bias.data.fill_(0) 187 | -------------------------------------------------------------------------------- /src/object_detection/utils/metrics.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Functions for computing metrics like precision, recall, CorLoc and etc.""" 16 | 17 | 18 | import numpy as np 19 | 20 | 21 | def compute_precision_recall(scores, labels, num_gt): 22 | """Compute precision and recall. 23 | 24 | Args: 25 | scores: A float numpy array representing detection score 26 | labels: A float numpy array representing weighted true/false positive labels 27 | num_gt: Number of ground truth instances 28 | 29 | Raises: 30 | ValueError: if the input is not of the correct format 31 | 32 | Returns: 33 | precision: Fraction of positive instances over detected ones. This value is 34 | None if no ground truth labels are present. 35 | recall: Fraction of detected positive instance over all positive instances. 36 | This value is None if no ground truth labels are present. 37 | 38 | """ 39 | if not isinstance(labels, np.ndarray) or len(labels.shape) != 1: 40 | raise ValueError("labels must be single dimension numpy array") 41 | 42 | if labels.dtype != np.float and labels.dtype != np.bool: 43 | raise ValueError("labels type must be either bool or float") 44 | 45 | if not isinstance(scores, np.ndarray) or len(scores.shape) != 1: 46 | raise ValueError("scores must be single dimension numpy array") 47 | 48 | if num_gt < np.sum(labels): 49 | raise ValueError("Number of true positives must be smaller than num_gt.") 50 | 51 | if len(scores) != len(labels): 52 | raise ValueError("scores and labels must be of the same size.") 53 | 54 | if num_gt == 0: 55 | return None, None 56 | 57 | sorted_indices = np.argsort(scores) 58 | sorted_indices = sorted_indices[::-1] 59 | true_positive_labels = labels[sorted_indices] 60 | false_positive_labels = (true_positive_labels <= 0).astype(float) 61 | cum_true_positives = np.cumsum(true_positive_labels) 62 | cum_false_positives = np.cumsum(false_positive_labels) 63 | precision = cum_true_positives.astype(float) / ( 64 | cum_true_positives + cum_false_positives) 65 | recall = cum_true_positives.astype(float) / num_gt 66 | return precision, recall 67 | 68 | 69 | def compute_average_precision(precision, recall): 70 | """Compute Average Precision according to the definition in VOCdevkit. 71 | 72 | Precision is modified to ensure that it does not decrease as recall 73 | decrease. 74 | 75 | Args: 76 | precision: A float [N, 1] numpy array of precisions 77 | recall: A float [N, 1] numpy array of recalls 78 | 79 | Raises: 80 | ValueError: if the input is not of the correct format 81 | 82 | Returns: 83 | average_precison: The area under the precision recall curve. NaN if 84 | precision and recall are None. 85 | 86 | """ 87 | if precision is None: 88 | if recall is not None: 89 | raise ValueError("If precision is None, recall must also be None") 90 | return np.NAN 91 | 92 | if not isinstance(precision, np.ndarray) or not isinstance( 93 | recall, np.ndarray): 94 | raise ValueError("precision and recall must be numpy array") 95 | if precision.dtype != np.float or recall.dtype != np.float: 96 | raise ValueError("input must be float numpy array.") 97 | if len(precision) != len(recall): 98 | raise ValueError("precision and recall must be of the same size.") 99 | if not precision.size: 100 | return 0.0 101 | if np.amin(precision) < 0 or np.amax(precision) > 1: 102 | raise ValueError("Precision must be in the range of [0, 1].") 103 | if np.amin(recall) < 0 or np.amax(recall) > 1: 104 | raise ValueError("recall must be in the range of [0, 1].") 105 | if not all(recall[i] <= recall[i + 1] for i in range(len(recall) - 1)): 106 | raise ValueError("recall must be a non-decreasing array") 107 | 108 | recall = np.concatenate([[0], recall, [1]]) 109 | precision = np.concatenate([[0], precision, [0]]) 110 | 111 | # Preprocess precision to be a non-decreasing array 112 | for i in range(len(precision) - 2, -1, -1): 113 | precision[i] = np.maximum(precision[i], precision[i + 1]) 114 | 115 | indices = np.where(recall[1:] != recall[:-1])[0] + 1 116 | average_precision = np.sum( 117 | (recall[indices] - recall[indices - 1]) * precision[indices]) 118 | return average_precision 119 | 120 | 121 | def compute_cor_loc(num_gt_imgs_per_class, 122 | num_images_correctly_detected_per_class): 123 | """Compute CorLoc according to the definition in the following paper. 124 | 125 | https://www.robots.ox.ac.uk/~vgg/rg/papers/deselaers-eccv10.pdf 126 | 127 | Returns nans if there are no ground truth images for a class. 128 | 129 | Args: 130 | num_gt_imgs_per_class: 1D array, representing number of images containing 131 | at least one object instance of a particular class 132 | num_images_correctly_detected_per_class: 1D array, representing number of 133 | images that are correctly detected at least one object instance of a 134 | particular class 135 | 136 | Returns: 137 | corloc_per_class: A float numpy array represents the corloc score of each 138 | class 139 | """ 140 | return np.where( 141 | num_gt_imgs_per_class == 0, np.nan, 142 | num_images_correctly_detected_per_class / num_gt_imgs_per_class) 143 | 144 | 145 | def compute_median_rank_at_k(tp_fp_list, k): 146 | """Computes MedianRank@k, where k is the top-scoring labels. 147 | 148 | Args: 149 | tp_fp_list: a list of numpy arrays; each numpy array corresponds to the all 150 | detection on a single image, where the detections are sorted by score in 151 | descending order. Further, each numpy array element can have boolean or 152 | float values. True positive elements have either value >0.0 or True; 153 | any other value is considered false positive. 154 | k: number of top-scoring proposals to take. 155 | 156 | Returns: 157 | median_rank: median rank of all true positive proposals among top k by 158 | score. 159 | """ 160 | ranks = [] 161 | for i in range(len(tp_fp_list)): 162 | ranks.append( 163 | np.where(tp_fp_list[i][0:min(k, tp_fp_list[i].shape[0])] > 0)[0]) 164 | concatenated_ranks = np.concatenate(ranks) 165 | return np.median(concatenated_ranks) 166 | 167 | 168 | def compute_recall_at_k(tp_fp_list, num_gt, k): 169 | """Computes Recall@k, MedianRank@k, where k is the top-scoring labels. 170 | 171 | Args: 172 | tp_fp_list: a list of numpy arrays; each numpy array corresponds to the all 173 | detection on a single image, where the detections are sorted by score in 174 | descending order. Further, each numpy array element can have boolean or 175 | float values. True positive elements have either value >0.0 or True; 176 | any other value is considered false positive. 177 | num_gt: number of groundtruth anotations. 178 | k: number of top-scoring proposals to take. 179 | 180 | Returns: 181 | recall: recall evaluated on the top k by score detections. 182 | """ 183 | 184 | tp_fp_eval = [] 185 | for i in range(len(tp_fp_list)): 186 | tp_fp_eval.append(tp_fp_list[i][0:min(k, tp_fp_list[i].shape[0])]) 187 | 188 | tp_fp_eval = np.concatenate(tp_fp_eval) 189 | 190 | return np.sum(tp_fp_eval) / num_gt 191 | -------------------------------------------------------------------------------- /src/object_detection/utils/label_map_util_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.utils.label_map_util.""" 17 | 18 | import os 19 | import tensorflow as tf 20 | 21 | from google.protobuf import text_format 22 | from object_detection.protos import string_int_label_map_pb2 23 | from object_detection.utils import label_map_util 24 | 25 | 26 | class LabelMapUtilTest(tf.test.TestCase): 27 | 28 | def _generate_label_map(self, num_classes): 29 | label_map_proto = string_int_label_map_pb2.StringIntLabelMap() 30 | for i in range(1, num_classes + 1): 31 | item = label_map_proto.item.add() 32 | item.id = i 33 | item.name = 'label_' + str(i) 34 | item.display_name = str(i) 35 | return label_map_proto 36 | 37 | def test_get_label_map_dict(self): 38 | label_map_string = """ 39 | item { 40 | id:2 41 | name:'cat' 42 | } 43 | item { 44 | id:1 45 | name:'dog' 46 | } 47 | """ 48 | label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') 49 | with tf.gfile.Open(label_map_path, 'wb') as f: 50 | f.write(label_map_string) 51 | 52 | label_map_dict = label_map_util.get_label_map_dict(label_map_path) 53 | self.assertEqual(label_map_dict['dog'], 1) 54 | self.assertEqual(label_map_dict['cat'], 2) 55 | 56 | def test_get_label_map_dict_display(self): 57 | label_map_string = """ 58 | item { 59 | id:2 60 | display_name:'cat' 61 | } 62 | item { 63 | id:1 64 | display_name:'dog' 65 | } 66 | """ 67 | label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') 68 | with tf.gfile.Open(label_map_path, 'wb') as f: 69 | f.write(label_map_string) 70 | 71 | label_map_dict = label_map_util.get_label_map_dict( 72 | label_map_path, use_display_name=True) 73 | self.assertEqual(label_map_dict['dog'], 1) 74 | self.assertEqual(label_map_dict['cat'], 2) 75 | 76 | def test_load_bad_label_map(self): 77 | label_map_string = """ 78 | item { 79 | id:0 80 | name:'class that should not be indexed at zero' 81 | } 82 | item { 83 | id:2 84 | name:'cat' 85 | } 86 | item { 87 | id:1 88 | name:'dog' 89 | } 90 | """ 91 | label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') 92 | with tf.gfile.Open(label_map_path, 'wb') as f: 93 | f.write(label_map_string) 94 | 95 | with self.assertRaises(ValueError): 96 | label_map_util.load_labelmap(label_map_path) 97 | 98 | def test_load_label_map_with_background(self): 99 | label_map_string = """ 100 | item { 101 | id:0 102 | name:'background' 103 | } 104 | item { 105 | id:2 106 | name:'cat' 107 | } 108 | item { 109 | id:1 110 | name:'dog' 111 | } 112 | """ 113 | label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') 114 | with tf.gfile.Open(label_map_path, 'wb') as f: 115 | f.write(label_map_string) 116 | 117 | label_map_dict = label_map_util.get_label_map_dict(label_map_path) 118 | self.assertEqual(label_map_dict['background'], 0) 119 | self.assertEqual(label_map_dict['dog'], 1) 120 | self.assertEqual(label_map_dict['cat'], 2) 121 | 122 | def test_keep_categories_with_unique_id(self): 123 | label_map_proto = string_int_label_map_pb2.StringIntLabelMap() 124 | label_map_string = """ 125 | item { 126 | id:2 127 | name:'cat' 128 | } 129 | item { 130 | id:1 131 | name:'child' 132 | } 133 | item { 134 | id:1 135 | name:'person' 136 | } 137 | item { 138 | id:1 139 | name:'n00007846' 140 | } 141 | """ 142 | text_format.Merge(label_map_string, label_map_proto) 143 | categories = label_map_util.convert_label_map_to_categories( 144 | label_map_proto, max_num_classes=3) 145 | self.assertListEqual([{ 146 | 'id': 2, 147 | 'name': 'cat' 148 | }, { 149 | 'id': 1, 150 | 'name': 'child' 151 | }], categories) 152 | 153 | def test_convert_label_map_to_categories_no_label_map(self): 154 | categories = label_map_util.convert_label_map_to_categories( 155 | None, max_num_classes=3) 156 | expected_categories_list = [{ 157 | 'name': 'category_1', 158 | 'id': 1 159 | }, { 160 | 'name': 'category_2', 161 | 'id': 2 162 | }, { 163 | 'name': 'category_3', 164 | 'id': 3 165 | }] 166 | self.assertListEqual(expected_categories_list, categories) 167 | 168 | def test_convert_label_map_to_coco_categories(self): 169 | label_map_proto = self._generate_label_map(num_classes=4) 170 | categories = label_map_util.convert_label_map_to_categories( 171 | label_map_proto, max_num_classes=3) 172 | expected_categories_list = [{ 173 | 'name': '1', 174 | 'id': 1 175 | }, { 176 | 'name': '2', 177 | 'id': 2 178 | }, { 179 | 'name': '3', 180 | 'id': 3 181 | }] 182 | self.assertListEqual(expected_categories_list, categories) 183 | 184 | def test_convert_label_map_to_coco_categories_with_few_classes(self): 185 | label_map_proto = self._generate_label_map(num_classes=4) 186 | cat_no_offset = label_map_util.convert_label_map_to_categories( 187 | label_map_proto, max_num_classes=2) 188 | expected_categories_list = [{ 189 | 'name': '1', 190 | 'id': 1 191 | }, { 192 | 'name': '2', 193 | 'id': 2 194 | }] 195 | self.assertListEqual(expected_categories_list, cat_no_offset) 196 | 197 | def test_get_max_label_map_index(self): 198 | num_classes = 4 199 | label_map_proto = self._generate_label_map(num_classes=num_classes) 200 | max_index = label_map_util.get_max_label_map_index(label_map_proto) 201 | self.assertEqual(num_classes, max_index) 202 | 203 | def test_create_category_index(self): 204 | categories = [{'name': '1', 'id': 1}, {'name': '2', 'id': 2}] 205 | category_index = label_map_util.create_category_index(categories) 206 | self.assertDictEqual({ 207 | 1: { 208 | 'name': '1', 209 | 'id': 1 210 | }, 211 | 2: { 212 | 'name': '2', 213 | 'id': 2 214 | } 215 | }, category_index) 216 | 217 | def test_create_category_index_from_labelmap(self): 218 | label_map_string = """ 219 | item { 220 | id:2 221 | name:'cat' 222 | } 223 | item { 224 | id:1 225 | name:'dog' 226 | } 227 | """ 228 | label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') 229 | with tf.gfile.Open(label_map_path, 'wb') as f: 230 | f.write(label_map_string) 231 | 232 | category_index = label_map_util.create_category_index_from_labelmap( 233 | label_map_path) 234 | self.assertDictEqual({ 235 | 1: { 236 | 'name': 'dog', 237 | 'id': 1 238 | }, 239 | 2: { 240 | 'name': 'cat', 241 | 'id': 2 242 | } 243 | }, category_index) 244 | 245 | 246 | if __name__ == '__main__': 247 | tf.test.main() 248 | -------------------------------------------------------------------------------- /src/object_detection/dataset_tools/oid_tfrecord_creation_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for oid_tfrecord_creation.py.""" 16 | 17 | import pandas as pd 18 | import tensorflow as tf 19 | 20 | from object_detection.dataset_tools import oid_tfrecord_creation 21 | 22 | 23 | def create_test_data(): 24 | data = { 25 | 'ImageID': ['i1', 'i1', 'i1', 'i1', 'i1', 'i2', 'i2'], 26 | 'LabelName': ['a', 'a', 'b', 'b', 'c', 'b', 'c'], 27 | 'YMin': [0.3, 0.6, 0.8, 0.1, None, 0.0, 0.0], 28 | 'XMin': [0.1, 0.3, 0.7, 0.0, None, 0.1, 0.1], 29 | 'XMax': [0.2, 0.3, 0.8, 0.5, None, 0.9, 0.9], 30 | 'YMax': [0.3, 0.6, 1, 0.8, None, 0.8, 0.8], 31 | 'IsOccluded': [0, 1, 1, 0, None, 0, 0], 32 | 'IsTruncated': [0, 0, 0, 1, None, 0, 0], 33 | 'IsGroupOf': [0, 0, 0, 0, None, 0, 1], 34 | 'IsDepiction': [1, 0, 0, 0, None, 0, 0], 35 | 'ConfidenceImageLabel': [None, None, None, None, 0, None, None], 36 | } 37 | df = pd.DataFrame(data=data) 38 | label_map = {'a': 0, 'b': 1, 'c': 2} 39 | return label_map, df 40 | 41 | 42 | class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase): 43 | 44 | def test_simple(self): 45 | label_map, df = create_test_data() 46 | 47 | tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( 48 | df[df.ImageID == 'i1'], label_map, 'encoded_image_test') 49 | self.assertProtoEquals( 50 | """ 51 | features { 52 | feature { 53 | key: "image/encoded" 54 | value { bytes_list { value: "encoded_image_test" } } } 55 | feature { 56 | key: "image/filename" 57 | value { bytes_list { value: "i1.jpg" } } } 58 | feature { 59 | key: "image/object/bbox/ymin" 60 | value { float_list { value: [0.3, 0.6, 0.8, 0.1] } } } 61 | feature { 62 | key: "image/object/bbox/xmin" 63 | value { float_list { value: [0.1, 0.3, 0.7, 0.0] } } } 64 | feature { 65 | key: "image/object/bbox/ymax" 66 | value { float_list { value: [0.3, 0.6, 1.0, 0.8] } } } 67 | feature { 68 | key: "image/object/bbox/xmax" 69 | value { float_list { value: [0.2, 0.3, 0.8, 0.5] } } } 70 | feature { 71 | key: "image/object/class/label" 72 | value { int64_list { value: [0, 0, 1, 1] } } } 73 | feature { 74 | key: "image/object/class/text" 75 | value { bytes_list { value: ["a", "a", "b", "b"] } } } 76 | feature { 77 | key: "image/source_id" 78 | value { bytes_list { value: "i1" } } } 79 | feature { 80 | key: "image/object/depiction" 81 | value { int64_list { value: [1, 0, 0, 0] } } } 82 | feature { 83 | key: "image/object/group_of" 84 | value { int64_list { value: [0, 0, 0, 0] } } } 85 | feature { 86 | key: "image/object/occluded" 87 | value { int64_list { value: [0, 1, 1, 0] } } } 88 | feature { 89 | key: "image/object/truncated" 90 | value { int64_list { value: [0, 0, 0, 1] } } } 91 | feature { 92 | key: "image/class/label" 93 | value { int64_list { value: [2] } } } 94 | feature { 95 | key: "image/class/text" 96 | value { bytes_list { value: ["c"] } } } } 97 | """, tf_example) 98 | 99 | def test_no_attributes(self): 100 | label_map, df = create_test_data() 101 | 102 | del df['IsDepiction'] 103 | del df['IsGroupOf'] 104 | del df['IsOccluded'] 105 | del df['IsTruncated'] 106 | del df['ConfidenceImageLabel'] 107 | 108 | tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( 109 | df[df.ImageID == 'i2'], label_map, 'encoded_image_test') 110 | self.assertProtoEquals(""" 111 | features { 112 | feature { 113 | key: "image/encoded" 114 | value { bytes_list { value: "encoded_image_test" } } } 115 | feature { 116 | key: "image/filename" 117 | value { bytes_list { value: "i2.jpg" } } } 118 | feature { 119 | key: "image/object/bbox/ymin" 120 | value { float_list { value: [0.0, 0.0] } } } 121 | feature { 122 | key: "image/object/bbox/xmin" 123 | value { float_list { value: [0.1, 0.1] } } } 124 | feature { 125 | key: "image/object/bbox/ymax" 126 | value { float_list { value: [0.8, 0.8] } } } 127 | feature { 128 | key: "image/object/bbox/xmax" 129 | value { float_list { value: [0.9, 0.9] } } } 130 | feature { 131 | key: "image/object/class/label" 132 | value { int64_list { value: [1, 2] } } } 133 | feature { 134 | key: "image/object/class/text" 135 | value { bytes_list { value: ["b", "c"] } } } 136 | feature { 137 | key: "image/source_id" 138 | value { bytes_list { value: "i2" } } } } 139 | """, tf_example) 140 | 141 | def test_label_filtering(self): 142 | label_map, df = create_test_data() 143 | 144 | label_map = {'a': 0} 145 | 146 | tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( 147 | df[df.ImageID == 'i1'], label_map, 'encoded_image_test') 148 | self.assertProtoEquals( 149 | """ 150 | features { 151 | feature { 152 | key: "image/encoded" 153 | value { bytes_list { value: "encoded_image_test" } } } 154 | feature { 155 | key: "image/filename" 156 | value { bytes_list { value: "i1.jpg" } } } 157 | feature { 158 | key: "image/object/bbox/ymin" 159 | value { float_list { value: [0.3, 0.6] } } } 160 | feature { 161 | key: "image/object/bbox/xmin" 162 | value { float_list { value: [0.1, 0.3] } } } 163 | feature { 164 | key: "image/object/bbox/ymax" 165 | value { float_list { value: [0.3, 0.6] } } } 166 | feature { 167 | key: "image/object/bbox/xmax" 168 | value { float_list { value: [0.2, 0.3] } } } 169 | feature { 170 | key: "image/object/class/label" 171 | value { int64_list { value: [0, 0] } } } 172 | feature { 173 | key: "image/object/class/text" 174 | value { bytes_list { value: ["a", "a"] } } } 175 | feature { 176 | key: "image/source_id" 177 | value { bytes_list { value: "i1" } } } 178 | feature { 179 | key: "image/object/depiction" 180 | value { int64_list { value: [1, 0] } } } 181 | feature { 182 | key: "image/object/group_of" 183 | value { int64_list { value: [0, 0] } } } 184 | feature { 185 | key: "image/object/occluded" 186 | value { int64_list { value: [0, 1] } } } 187 | feature { 188 | key: "image/object/truncated" 189 | value { int64_list { value: [0, 0] } } } 190 | feature { 191 | key: "image/class/label" 192 | value { int64_list { } } } 193 | feature { 194 | key: "image/class/text" 195 | value { bytes_list { } } } } 196 | """, tf_example) 197 | 198 | 199 | if __name__ == '__main__': 200 | tf.test.main() 201 | -------------------------------------------------------------------------------- /src/object_detection/utils/np_box_mask_list_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.utils.np_box_mask_list_test.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from object_detection.utils import np_box_mask_list 22 | 23 | 24 | class BoxMaskListTest(tf.test.TestCase): 25 | 26 | def test_invalid_box_mask_data(self): 27 | with self.assertRaises(ValueError): 28 | np_box_mask_list.BoxMaskList( 29 | box_data=[0, 0, 1, 1], 30 | mask_data=np.zeros([1, 3, 3], dtype=np.uint8)) 31 | 32 | with self.assertRaises(ValueError): 33 | np_box_mask_list.BoxMaskList( 34 | box_data=np.array([[0, 0, 1, 1]], dtype=int), 35 | mask_data=np.zeros([1, 3, 3], dtype=np.uint8)) 36 | 37 | with self.assertRaises(ValueError): 38 | np_box_mask_list.BoxMaskList( 39 | box_data=np.array([0, 1, 1, 3, 4], dtype=float), 40 | mask_data=np.zeros([1, 3, 3], dtype=np.uint8)) 41 | 42 | with self.assertRaises(ValueError): 43 | np_box_mask_list.BoxMaskList( 44 | box_data=np.array([[0, 1, 1, 3], [3, 1, 1, 5]], dtype=float), 45 | mask_data=np.zeros([2, 3, 3], dtype=np.uint8)) 46 | 47 | with self.assertRaises(ValueError): 48 | np_box_mask_list.BoxMaskList( 49 | box_data=np.array([[0, 1, 1, 3], [1, 1, 1, 5]], dtype=float), 50 | mask_data=np.zeros([3, 5, 5], dtype=np.uint8)) 51 | 52 | with self.assertRaises(ValueError): 53 | np_box_mask_list.BoxMaskList( 54 | box_data=np.array([[0, 1, 1, 3], [1, 1, 1, 5]], dtype=float), 55 | mask_data=np.zeros([2, 5], dtype=np.uint8)) 56 | 57 | with self.assertRaises(ValueError): 58 | np_box_mask_list.BoxMaskList( 59 | box_data=np.array([[0, 1, 1, 3], [1, 1, 1, 5]], dtype=float), 60 | mask_data=np.zeros([2, 5, 5, 5], dtype=np.uint8)) 61 | 62 | with self.assertRaises(ValueError): 63 | np_box_mask_list.BoxMaskList( 64 | box_data=np.array([[0, 1, 1, 3], [1, 1, 1, 5]], dtype=float), 65 | mask_data=np.zeros([2, 5, 5], dtype=np.int32)) 66 | 67 | def test_has_field_with_existed_field(self): 68 | boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], 69 | [0.0, 0.0, 20.0, 20.0]], 70 | dtype=float) 71 | box_mask_list = np_box_mask_list.BoxMaskList( 72 | box_data=boxes, mask_data=np.zeros([3, 5, 5], dtype=np.uint8)) 73 | self.assertTrue(box_mask_list.has_field('boxes')) 74 | self.assertTrue(box_mask_list.has_field('masks')) 75 | 76 | def test_has_field_with_nonexisted_field(self): 77 | boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], 78 | [0.0, 0.0, 20.0, 20.0]], 79 | dtype=float) 80 | box_mask_list = np_box_mask_list.BoxMaskList( 81 | box_data=boxes, mask_data=np.zeros([3, 3, 3], dtype=np.uint8)) 82 | self.assertFalse(box_mask_list.has_field('scores')) 83 | 84 | def test_get_field_with_existed_field(self): 85 | boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], 86 | [0.0, 0.0, 20.0, 20.0]], 87 | dtype=float) 88 | masks = np.zeros([3, 3, 3], dtype=np.uint8) 89 | box_mask_list = np_box_mask_list.BoxMaskList( 90 | box_data=boxes, mask_data=masks) 91 | self.assertTrue(np.allclose(box_mask_list.get_field('boxes'), boxes)) 92 | self.assertTrue(np.allclose(box_mask_list.get_field('masks'), masks)) 93 | 94 | def test_get_field_with_nonexited_field(self): 95 | boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], 96 | [0.0, 0.0, 20.0, 20.0]], 97 | dtype=float) 98 | masks = np.zeros([3, 3, 3], dtype=np.uint8) 99 | box_mask_list = np_box_mask_list.BoxMaskList( 100 | box_data=boxes, mask_data=masks) 101 | with self.assertRaises(ValueError): 102 | box_mask_list.get_field('scores') 103 | 104 | 105 | class AddExtraFieldTest(tf.test.TestCase): 106 | 107 | def setUp(self): 108 | boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], 109 | [0.0, 0.0, 20.0, 20.0]], 110 | dtype=float) 111 | masks = np.zeros([3, 3, 3], dtype=np.uint8) 112 | self.box_mask_list = np_box_mask_list.BoxMaskList( 113 | box_data=boxes, mask_data=masks) 114 | 115 | def test_add_already_existed_field_bbox(self): 116 | with self.assertRaises(ValueError): 117 | self.box_mask_list.add_field('boxes', 118 | np.array([[0, 0, 0, 1, 0]], dtype=float)) 119 | 120 | def test_add_already_existed_field_mask(self): 121 | with self.assertRaises(ValueError): 122 | self.box_mask_list.add_field('masks', 123 | np.zeros([3, 3, 3], dtype=np.uint8)) 124 | 125 | def test_add_invalid_field_data(self): 126 | with self.assertRaises(ValueError): 127 | self.box_mask_list.add_field('scores', np.array([0.5, 0.7], dtype=float)) 128 | with self.assertRaises(ValueError): 129 | self.box_mask_list.add_field('scores', 130 | np.array([0.5, 0.7, 0.9, 0.1], dtype=float)) 131 | 132 | def test_add_single_dimensional_field_data(self): 133 | box_mask_list = self.box_mask_list 134 | scores = np.array([0.5, 0.7, 0.9], dtype=float) 135 | box_mask_list.add_field('scores', scores) 136 | self.assertTrue(np.allclose(scores, self.box_mask_list.get_field('scores'))) 137 | 138 | def test_add_multi_dimensional_field_data(self): 139 | box_mask_list = self.box_mask_list 140 | labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]], 141 | dtype=int) 142 | box_mask_list.add_field('labels', labels) 143 | self.assertTrue(np.allclose(labels, self.box_mask_list.get_field('labels'))) 144 | 145 | def test_get_extra_fields(self): 146 | box_mask_list = self.box_mask_list 147 | self.assertItemsEqual(box_mask_list.get_extra_fields(), ['masks']) 148 | 149 | scores = np.array([0.5, 0.7, 0.9], dtype=float) 150 | box_mask_list.add_field('scores', scores) 151 | self.assertItemsEqual(box_mask_list.get_extra_fields(), ['masks', 'scores']) 152 | 153 | labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]], 154 | dtype=int) 155 | box_mask_list.add_field('labels', labels) 156 | self.assertItemsEqual(box_mask_list.get_extra_fields(), 157 | ['masks', 'scores', 'labels']) 158 | 159 | def test_get_coordinates(self): 160 | y_min, x_min, y_max, x_max = self.box_mask_list.get_coordinates() 161 | 162 | expected_y_min = np.array([3.0, 14.0, 0.0], dtype=float) 163 | expected_x_min = np.array([4.0, 14.0, 0.0], dtype=float) 164 | expected_y_max = np.array([6.0, 15.0, 20.0], dtype=float) 165 | expected_x_max = np.array([8.0, 15.0, 20.0], dtype=float) 166 | 167 | self.assertTrue(np.allclose(y_min, expected_y_min)) 168 | self.assertTrue(np.allclose(x_min, expected_x_min)) 169 | self.assertTrue(np.allclose(y_max, expected_y_max)) 170 | self.assertTrue(np.allclose(x_max, expected_x_max)) 171 | 172 | def test_num_boxes(self): 173 | boxes = np.array([[0., 0., 100., 100.], [10., 30., 50., 70.]], dtype=float) 174 | masks = np.zeros([2, 5, 5], dtype=np.uint8) 175 | box_mask_list = np_box_mask_list.BoxMaskList( 176 | box_data=boxes, mask_data=masks) 177 | expected_num_boxes = 2 178 | self.assertEqual(box_mask_list.num_boxes(), expected_num_boxes) 179 | 180 | 181 | if __name__ == '__main__': 182 | tf.test.main() 183 | -------------------------------------------------------------------------------- /src/object_detection/dataset_tools/oid_hierarchical_labels_expansion.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | r"""An executable to expand hierarchically image-level labels and boxes. 16 | 17 | Example usage: 18 | python models/research/object_detection/dataset_tools/\ 19 | oid_hierarchical_labels_expansion.py \ 20 | --json_hierarchy_file= \ 21 | --input_annotations= \ 22 | --output_annotations= \ 23 | --annotation_type=<1 (for boxes) or 2 (for image-level labels)> 24 | """ 25 | 26 | import argparse 27 | import json 28 | 29 | 30 | def _update_dict(initial_dict, update): 31 | """Updates dictionary with update content. 32 | 33 | Args: 34 | initial_dict: initial dictionary. 35 | update: updated dictionary. 36 | """ 37 | 38 | for key, value_list in update.items(): 39 | if key in initial_dict: 40 | initial_dict[key].extend(value_list) 41 | else: 42 | initial_dict[key] = value_list 43 | 44 | 45 | def _build_plain_hierarchy(hierarchy, skip_root=False): 46 | """Expands tree hierarchy representation to parent-child dictionary. 47 | 48 | Args: 49 | hierarchy: labels hierarchy as JSON file. 50 | skip_root: if true skips root from the processing (done for the case when all 51 | classes under hierarchy are collected under virtual node). 52 | 53 | Returns: 54 | keyed_parent - dictionary of parent - all its children nodes. 55 | keyed_child - dictionary of children - all its parent nodes 56 | children - all children of the current node. 57 | """ 58 | all_children = [] 59 | all_keyed_parent = {} 60 | all_keyed_child = {} 61 | if 'Subcategory' in hierarchy: 62 | for node in hierarchy['Subcategory']: 63 | keyed_parent, keyed_child, children = _build_plain_hierarchy(node) 64 | # Update is not done through dict.update() since some children have multi- 65 | # ple parents in the hiearchy. 66 | _update_dict(all_keyed_parent, keyed_parent) 67 | _update_dict(all_keyed_child, keyed_child) 68 | all_children.extend(children) 69 | 70 | if not skip_root: 71 | all_keyed_parent[hierarchy['LabelName']] = all_children 72 | all_children = [hierarchy['LabelName']] + all_children 73 | for child, _ in all_keyed_child.items(): 74 | all_keyed_child[child].append(hierarchy['LabelName']) 75 | all_keyed_child[hierarchy['LabelName']] = [] 76 | 77 | return all_keyed_parent, all_keyed_child, all_children 78 | 79 | 80 | class OIDHierarchicalLabelsExpansion(object): 81 | """ Main class to perform labels hierachical expansion.""" 82 | 83 | def __init__(self, hierarchy): 84 | """Constructor. 85 | 86 | Args: 87 | hierarchy: labels hierarchy as JSON object. 88 | """ 89 | 90 | self._hierarchy_keyed_parent, self._hierarchy_keyed_child, _ = ( 91 | _build_plain_hierarchy(hierarchy, skip_root=True)) 92 | 93 | def expand_boxes_from_csv(self, csv_row): 94 | """Expands a row containing bounding boxes from CSV file. 95 | 96 | Args: 97 | csv_row: a single row of Open Images released groundtruth file. 98 | 99 | Returns: 100 | a list of strings (including the initial row) corresponding to the ground 101 | truth expanded to multiple annotation for evaluation with Open Images 102 | Challenge 2018 metric. 103 | """ 104 | # Row header is expected to be exactly: 105 | # ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded, 106 | # IsTruncated,IsGroupOf,IsDepiction,IsInside 107 | cvs_row_splitted = csv_row.split(',') 108 | assert len(cvs_row_splitted) == 13 109 | result = [csv_row] 110 | assert cvs_row_splitted[2] in self._hierarchy_keyed_child 111 | parent_nodes = self._hierarchy_keyed_child[cvs_row_splitted[2]] 112 | for parent_node in parent_nodes: 113 | cvs_row_splitted[2] = parent_node 114 | result.append(','.join(cvs_row_splitted)) 115 | return result 116 | 117 | def expand_labels_from_csv(self, csv_row): 118 | """Expands a row containing bounding boxes from CSV file. 119 | 120 | Args: 121 | csv_row: a single row of Open Images released groundtruth file. 122 | 123 | Returns: 124 | a list of strings (including the initial row) corresponding to the ground 125 | truth expanded to multiple annotation for evaluation with Open Images 126 | Challenge 2018 metric. 127 | """ 128 | # Row header is expected to be exactly: 129 | # ImageID,Source,LabelName,Confidence 130 | cvs_row_splited = csv_row.split(',') 131 | assert len(cvs_row_splited) == 4 132 | result = [csv_row] 133 | if int(cvs_row_splited[3]) == 1: 134 | assert cvs_row_splited[2] in self._hierarchy_keyed_child 135 | parent_nodes = self._hierarchy_keyed_child[cvs_row_splited[2]] 136 | for parent_node in parent_nodes: 137 | cvs_row_splited[2] = parent_node 138 | result.append(','.join(cvs_row_splited)) 139 | else: 140 | assert cvs_row_splited[2] in self._hierarchy_keyed_parent 141 | child_nodes = self._hierarchy_keyed_parent[cvs_row_splited[2]] 142 | for child_node in child_nodes: 143 | cvs_row_splited[2] = child_node 144 | result.append(','.join(cvs_row_splited)) 145 | return result 146 | 147 | 148 | def main(parsed_args): 149 | with open(parsed_args.json_hierarchy_file) as f: 150 | hierarchy = json.load(f) 151 | expansion_generator = OIDHierarchicalLabelsExpansion(hierarchy) 152 | labels_file = False 153 | if parsed_args.annotation_type == 2: 154 | labels_file = True 155 | elif parsed_args.annotation_type != 1: 156 | print('--annotation_type expected value is 1 or 2.') 157 | return -1 158 | with open(parsed_args.input_annotations, 'r') as source: 159 | with open(parsed_args.output_annotations, 'w') as target: 160 | header = None 161 | for line in source: 162 | if not header: 163 | target.writelines(line) 164 | header = line 165 | continue 166 | if labels_file: 167 | expanded_lines = expansion_generator.expand_labels_from_csv(line) 168 | else: 169 | expanded_lines = expansion_generator.expand_boxes_from_csv(line) 170 | target.writelines(expanded_lines) 171 | 172 | 173 | if __name__ == '__main__': 174 | parser = argparse.ArgumentParser( 175 | description='Hierarchically expand annotations (excluding root node).') 176 | parser.add_argument( 177 | '--json_hierarchy_file', 178 | required=True, 179 | help='Path to the file containing label hierarchy in JSON format.') 180 | parser.add_argument( 181 | '--input_annotations', 182 | required=True, 183 | help="""Path to Open Images annotations file (either bounding boxes or 184 | image-level labels).""") 185 | parser.add_argument( 186 | '--output_annotations', 187 | required=True, 188 | help="""Path to the output file.""") 189 | parser.add_argument( 190 | '--annotation_type', 191 | type=int, 192 | required=True, 193 | help="""Type of the input annotations: 1 - boxes, 2 - image-level 194 | labels""" 195 | ) 196 | args = parser.parse_args() 197 | main(args) 198 | --------------------------------------------------------------------------------