├── src
    ├── __init__.py
    ├── object_detection
    │   ├── __init__.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── static_shape_test.py
    │   │   ├── static_shape.py
    │   │   ├── np_box_mask_list.py
    │   │   ├── np_box_ops_test.py
    │   │   ├── test_utils_test.py
    │   │   ├── np_box_ops.py
    │   │   ├── np_mask_ops_test.py
    │   │   ├── test_case.py
    │   │   ├── np_mask_ops.py
    │   │   ├── np_box_list.py
    │   │   ├── np_box_list_test.py
    │   │   ├── test_utils.py
    │   │   ├── metrics_test.py
    │   │   ├── label_map_util.py
    │   │   ├── metrics.py
    │   │   ├── label_map_util_test.py
    │   │   └── np_box_mask_list_test.py
    │   ├── core
    │   │   └── __init__.py
    │   ├── metrics
    │   │   ├── __init__.py
    │   │   ├── io_utils.py
    │   │   ├── oid_od_challenge_evaluation_utils.py
    │   │   ├── oid_od_challenge_evaluation_utils_test.py
    │   │   └── oid_od_challenge_evaluation.py
    │   ├── protos
    │   │   ├── __init__.py
    │   │   ├── string_int_label_map.proto
    │   │   └── string_int_label_map_pb2.py
    │   └── dataset_tools
    │   │   ├── __init__.py
    │   │   ├── oid_hierarchical_labels_expansion_test.py
    │   │   ├── oid_tfrecord_creation.py
    │   │   ├── create_oid_tf_record.py
    │   │   ├── oid_tfrecord_creation_test.py
    │   │   └── oid_hierarchical_labels_expansion.py
    ├── logging.py
    ├── augmentation.py
    ├── preprocessing.py
    ├── postprocessing.py
    ├── pipelines.py
    ├── pipeline_config.py
    └── models.py
├── .github
    └── ISSUE_TEMPLATE
    │   ├── everything-else.md
    │   └── bug.md
├── requirements.txt
├── PULL_REQUEST_TEMPLATE.md
├── LICENSE
├── CONTRIBUTING.md
├── Tensorflow-Object-Detection-API-notice
    └── README.md
├── .gitignore
├── configs
    ├── batch_7.yaml
    ├── batch_6.yaml
    ├── neptune.yaml
    ├── batch_1.yaml
    ├── batch_5.yaml
    ├── batch_8.yaml
    ├── batch_3.yaml
    ├── batch_2.yaml
    └── batch_4.yaml
├── CODE_OF_CONDUCT.md
├── notebooks
    └── submission_merge.ipynb
├── main.py
└── README.md


/src/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/object_detection/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/object_detection/core/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/object_detection/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/object_detection/protos/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/object_detection/dataset_tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/logging.py:
--------------------------------------------------------------------------------
1 | from src.utils import init_logger
2 | 
3 | LOGGER = init_logger()
4 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/everything-else.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: everything else
3 | about: Suggest an idea for this project
4 | 
5 | ---
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/src/augmentation.py:
--------------------------------------------------------------------------------
 1 | from imgaug import augmenters as iaa
 2 | 
 3 | aug_seq = iaa.Sequential([
 4 |     iaa.Fliplr(p=0.5),
 5 |     iaa.Sometimes(
 6 |         0.3,
 7 |         iaa.Multiply((0.75, 1.25))
 8 |     ),
 9 |     iaa.Sometimes(
10 |         0.3,
11 |         iaa.AdditiveGaussianNoise()
12 |     ),
13 |     iaa.Affine(
14 |         rotate=(-5, 5),
15 |         scale=(0.8, 1.2)
16 |     )
17 | ])
18 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | neptune-cli
 2 | steppy==0.1.6
 3 | steppy-toolkit==0.1.8
 4 | bokeh
 5 | flask
 6 | numpy
 7 | opencv_python
 8 | attrdict==2.0.0
 9 | category_encoders==1.2.8
10 | click==6.7
11 | contextlib2==0.5.5
12 | imgaug==0.2.6
13 | pandas==0.20.2
14 | Pillow>=6.2.2
15 | protobuf==3.15.0
16 | pycocotools==2.0.0
17 | scikit_learn==0.19.2
18 | six==1.10.0
19 | torch==0.3.1
20 | torchvision==0.2.1
21 | pyyaml>=4.2b1
22 | tqdm==4.11.2
23 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: bug
 3 | about: Create bug report
 4 | 
 5 | ---
 6 | 
 7 | There are two things that will make the processing of your issue faster:
 8 | 1. Make sure that you are using the latest version of the code,
 9 | 1. In case of bug issue, it would be nice to provide more technical details such like execution command, error message or script that reproduces your bug.
10 | #
11 | 
12 | Thanks!
13 | 
14 | Kamil & Jakub,
15 | 
16 | *core contributors to the Open Solution*
17 | 


--------------------------------------------------------------------------------
/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## Pull Request template to the *[Google AI Open Images - Object Detection Track](https://www.kaggle.com/c/google-ai-open-images-object-detection-track)* Open Solution
 2 | 
 3 | Major - and most appreciated - contribution is pull request with feature or bug fix. Each pull request initiates discussion about your code contribution.
 4 | 
 5 | Each pull request should be provided with minimal description about its contents.
 6 | #
 7 | 
 8 | Thanks!
 9 | 
10 | Kuba & Kamil,
11 | 
12 | _core contributors to the Open Solutions_
13 | 


--------------------------------------------------------------------------------
/src/object_detection/protos/string_int_label_map.proto:
--------------------------------------------------------------------------------
 1 | // Message to store the mapping from class label strings to class id. Datasets
 2 | // use string labels to represent classes while the object detection framework
 3 | // works with class ids. This message maps them so they can be converted back
 4 | // and forth as needed.
 5 | syntax = "proto2";
 6 | 
 7 | package object_detection.protos;
 8 | 
 9 | message StringIntLabelMapItem {
10 |   // String name. The most common practice is to set this to a MID or synsets
11 |   // id.
12 |   optional string name = 1;
13 | 
14 |   // Integer id that maps to the string name above. Label ids should start from
15 |   // 1.
16 |   optional int32 id = 2;
17 | 
18 |   // Human readable string label.
19 |   optional string display_name = 3;
20 | };
21 | 
22 | message StringIntLabelMap {
23 |   repeated StringIntLabelMapItem item = 1;
24 | };
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 neptune.ml
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## Contributing to Open Solution for [Google AI Open Images - Object Detection Track](https://www.kaggle.com/c/google-ai-open-images-object-detection-track).
 2 | 
 3 | ### Get involved
 4 | You are welcome to contribute to this Open Solution. To get started:
 5 | 1. Check [our kanban board](https://github.com/neptune-ml/open-solution-googleai-object-detection/projects/1) to see what we are working on right now.
 6 | 1. Express your interest in a particular [issue](https://github.com/neptune-ml/open-solution-googleai-object-detection/issues) by submitting a comment or,
 7 |     * submit your own [issue](https://github.com/neptune-ml/open-solution-googleai-object-detection/issues).
 8 | 1. We will get back to you in order to start working together.
 9 | 
10 | ### Code contributions
11 | Major - and most appreciated - contribution is [pull request](https://github.com/neptune-ml/open-solution-googleai-object-detection/pulls) with feature or bug fix.
12 | 
13 | ### Remarks
14 | In case of custom ideas, please contact core contributors directly at ml-team@neptune.ml.
15 | #
16 | 
17 | Thanks!
18 | 
19 | Kuba & Kamil,
20 | 
21 | *core contributors to the Open Solution*
22 | 


--------------------------------------------------------------------------------
/src/object_detection/metrics/io_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Common IO utils used in offline metric computation.
16 | """
17 | 
18 | import csv
19 | 
20 | 
21 | def write_csv(fid, metrics):
22 |     """Writes metrics key-value pairs to CSV file.
23 | 
24 |     Args:
25 |       fid: File identifier of an opened file.
26 |       metrics: A dictionary with metrics to be written.
27 |     """
28 |     metrics_writer = csv.writer(fid, delimiter=',')
29 |     for metric_name, metric_value in list(metrics.items()):
30 |         metrics_writer.writerow([metric_name, str(metric_value)])
31 | 


--------------------------------------------------------------------------------
/Tensorflow-Object-Detection-API-notice/README.md:
--------------------------------------------------------------------------------
 1 | ### Tensorflow Object Detection API
 2 | * [Object detection](https://github.com/neptune-ml/open-solution-googleai-object-detection/tree/master/src/object_detection) directory in this GitHub repository is a modification of the [Tensorflow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection), which is part of the [tensorflow/models](https://github.com/tensorflow/models).
 3 | * Tensorflow/models is licensed under the [Apache License 2.0](https://github.com/tensorflow/models/blob/master/LICENSE)
 4 | * Copy of the aforementioned [license is here](https://github.com/neptune-ml/open-solution-googleai-object-detection/tree/master/Tensorflow-Object-Detection-API-notice/LICENSE-copy).
 5 | 
 6 | ### Code Changes
 7 | * Code was copied from this repository: https://github.com/tensorflow/models/tree/master/research/object_detection
 8 | * The modified version of the code is available here: https://github.com/neptune-ml/open-solution-googleai-object-detection/tree/master/src/object_detection
 9 | * Entire [open-solution-googleai-object-detection](https://github.com/neptune-ml/open-solution-googleai-object-detection) repository is [MIT licensed](https://github.com/neptune-ml/open-solution-googleai-object-detection/blob/master/LICENSE).
10 | 


--------------------------------------------------------------------------------
/src/preprocessing.py:
--------------------------------------------------------------------------------
 1 | from category_encoders.ordinal import OrdinalEncoder
 2 | from sklearn.externals import joblib
 3 | from steppy.base import BaseTransformer
 4 | 
 5 | 
 6 | class GoogleAiLabelEncoder(BaseTransformer):
 7 |     def __init__(self, colname):
 8 |         self.colname = colname
 9 |         self.encoder = OrdinalEncoder()
10 | 
11 |     def fit(self, annotations, **kwargs):
12 |         self.encoder.fit(annotations[self.colname].values)
13 |         return self
14 | 
15 |     def transform(self, annotations, annotations_human_labels, **kwargs):
16 |         if annotations is not None:
17 |             annotations[self.colname] = self.encoder.transform(annotations[self.colname].values)
18 |             annotations_human_labels[self.colname] = self.encoder.transform(
19 |                 annotations_human_labels[self.colname].values)
20 |             return {'annotations': annotations,
21 |                     'annotations_human_labels': annotations_human_labels}
22 |         else:
23 |             return {'mapping': self.encoder.category_mapping[0]['mapping']}
24 | 
25 |     def load(self, filepath):
26 |         self.encoder = joblib.load(filepath)
27 |         return self
28 | 
29 |     def persist(self, filepath):
30 |         joblib.dump(self.encoder, filepath)
31 | 
32 | 
33 | class GoogleAiLabelDecoder(BaseTransformer):
34 | 
35 |     def transform(self, mapping, **kwargs):
36 |         inverse_mapping = {val: name for name, val in mapping}
37 |         return {'inverse_mapping': inverse_mapping}
38 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/static_shape_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Tests for object_detection.utils.static_shape."""
17 | 
18 | import tensorflow as tf
19 | 
20 | from object_detection.utils import static_shape
21 | 
22 | 
23 | class StaticShapeTest(tf.test.TestCase):
24 | 
25 |   def test_return_correct_batchSize(self):
26 |     tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
27 |     self.assertEqual(32, static_shape.get_batch_size(tensor_shape))
28 | 
29 |   def test_return_correct_height(self):
30 |     tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
31 |     self.assertEqual(299, static_shape.get_height(tensor_shape))
32 | 
33 |   def test_return_correct_width(self):
34 |     tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
35 |     self.assertEqual(384, static_shape.get_width(tensor_shape))
36 | 
37 |   def test_return_correct_depth(self):
38 |     tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
39 |     self.assertEqual(3, static_shape.get_depth(tensor_shape))
40 | 
41 |   def test_die_on_tensor_shape_with_rank_three(self):
42 |     tensor_shape = tf.TensorShape(dims=[32, 299, 384])
43 |     with self.assertRaises(ValueError):
44 |       static_shape.get_batch_size(tensor_shape)
45 |       static_shape.get_height(tensor_shape)
46 |       static_shape.get_width(tensor_shape)
47 |       static_shape.get_depth(tensor_shape)
48 | 
49 | if __name__ == '__main__':
50 |   tf.test.main()
51 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | .pytest_cache
  6 | tests/.cache
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # neptune, pycharm
 12 | .cache
 13 | .cache/
 14 | .idea/
 15 | .idea_modules/
 16 | *_local.yaml
 17 | out/
 18 | output
 19 | output/
 20 | *.log
 21 | target/
 22 | devbook.ipynb
 23 | devbook_local.ipynb
 24 | 
 25 | # Distribution / packaging
 26 | .Python
 27 | env/
 28 | build/
 29 | develop-eggs/
 30 | dist/
 31 | downloads/
 32 | eggs/
 33 | .eggs/
 34 | lib/
 35 | lib64/
 36 | parts/
 37 | sdist/
 38 | var/
 39 | wheels/
 40 | *.egg-info/
 41 | .installed.cfg
 42 | *.egg
 43 | 
 44 | # PyInstaller
 45 | #  Usually these files are written by a python script from a template
 46 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 47 | *.manifest
 48 | *.spec
 49 | 
 50 | # Installer logs
 51 | pip-log.txt
 52 | pip-delete-this-directory.txt
 53 | 
 54 | # Unit test / coverage reports
 55 | htmlcov/
 56 | .tox/
 57 | .coverage
 58 | .coverage.*
 59 | nosetests.xml
 60 | coverage.xml
 61 | *.cover
 62 | .hypothesis/
 63 | 
 64 | # Translations
 65 | *.mo
 66 | *.pot
 67 | 
 68 | # Django stuff:
 69 | local_settings.py
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # Jupyter Notebook
 82 | Untitled*.ipynb
 83 | .ipynb_checkpoints
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # celery beat schedule file
 89 | celerybeat-schedule
 90 | 
 91 | # SageMath parsed files
 92 | *.sage.py
 93 | 
 94 | # dotenv
 95 | .env
 96 | 
 97 | # virtualenv
 98 | .venv
 99 | venv/
100 | ENV/
101 | 
102 | # Spyder project settings
103 | .spyderproject
104 | .spyproject
105 | 
106 | # Rope project settings
107 | .ropeproject
108 | 
109 | # mkdocs documentation
110 | /site
111 | 
112 | # mypy
113 | .mypy_cache/
114 | 
115 | # Working directories
116 | examples/cache/
117 | configs/neptune_config_local.yaml
118 | notebooks/local_playground.ipynb
119 | configs/neptune_config_home.yaml
120 | configs/neptune_config_cluster.yaml
121 | playground.py
122 | playground.ipynb
123 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/static_shape.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Helper functions to access TensorShape values.
17 | 
18 | The rank 4 tensor_shape must be of the form [batch_size, height, width, depth].
19 | """
20 | 
21 | 
22 | def get_batch_size(tensor_shape):
23 |   """Returns batch size from the tensor shape.
24 | 
25 |   Args:
26 |     tensor_shape: A rank 4 TensorShape.
27 | 
28 |   Returns:
29 |     An integer representing the batch size of the tensor.
30 |   """
31 |   tensor_shape.assert_has_rank(rank=4)
32 |   return tensor_shape[0].value
33 | 
34 | 
35 | def get_height(tensor_shape):
36 |   """Returns height from the tensor shape.
37 | 
38 |   Args:
39 |     tensor_shape: A rank 4 TensorShape.
40 | 
41 |   Returns:
42 |     An integer representing the height of the tensor.
43 |   """
44 |   tensor_shape.assert_has_rank(rank=4)
45 |   return tensor_shape[1].value
46 | 
47 | 
48 | def get_width(tensor_shape):
49 |   """Returns width from the tensor shape.
50 | 
51 |   Args:
52 |     tensor_shape: A rank 4 TensorShape.
53 | 
54 |   Returns:
55 |     An integer representing the width of the tensor.
56 |   """
57 |   tensor_shape.assert_has_rank(rank=4)
58 |   return tensor_shape[2].value
59 | 
60 | 
61 | def get_depth(tensor_shape):
62 |   """Returns depth from the tensor shape.
63 | 
64 |   Args:
65 |     tensor_shape: A rank 4 TensorShape.
66 | 
67 |   Returns:
68 |     An integer representing the depth of the tensor.
69 |   """
70 |   tensor_shape.assert_has_rank(rank=4)
71 |   return tensor_shape[3].value
72 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/np_box_mask_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Numpy BoxMaskList classes and functions."""
17 | 
18 | import numpy as np
19 | from object_detection.utils import np_box_list
20 | 
21 | 
22 | class BoxMaskList(np_box_list.BoxList):
23 |   """Convenience wrapper for BoxList with masks.
24 | 
25 |   BoxMaskList extends the np_box_list.BoxList to contain masks as well.
26 |   In particular, its constructor receives both boxes and masks. Note that the
27 |   masks correspond to the full image.
28 |   """
29 | 
30 |   def __init__(self, box_data, mask_data):
31 |     """Constructs box collection.
32 | 
33 |     Args:
34 |       box_data: a numpy array of shape [N, 4] representing box coordinates
35 |       mask_data: a numpy array of shape [N, height, width] representing masks
36 |         with values are in {0,1}. The masks correspond to the full
37 |         image. The height and the width will be equal to image height and width.
38 | 
39 |     Raises:
40 |       ValueError: if bbox data is not a numpy array
41 |       ValueError: if invalid dimensions for bbox data
42 |       ValueError: if mask data is not a numpy array
43 |       ValueError: if invalid dimension for mask data
44 |     """
45 |     super(BoxMaskList, self).__init__(box_data)
46 |     if not isinstance(mask_data, np.ndarray):
47 |       raise ValueError('Mask data must be a numpy array.')
48 |     if len(mask_data.shape) != 3:
49 |       raise ValueError('Invalid dimensions for mask data.')
50 |     if mask_data.dtype != np.uint8:
51 |       raise ValueError('Invalid data type for mask data: uint8 is required.')
52 |     if mask_data.shape[0] != box_data.shape[0]:
53 |       raise ValueError('There should be the same number of boxes and masks.')
54 |     self.data['masks'] = mask_data
55 | 
56 |   def get_masks(self):
57 |     """Convenience function for accessing masks.
58 | 
59 |     Returns:
60 |       a numpy array of shape [N, height, width] representing masks
61 |     """
62 |     return self.get_field('masks')
63 | 
64 | 


--------------------------------------------------------------------------------
/configs/batch_7.yaml:
--------------------------------------------------------------------------------
  1 | project: neptune-ml/Google-AI-Object-Detection-Challenge
  2 | 
  3 | name: google-ai-object-detection
  4 | tags: [solution-1, batch_7, retrain]
  5 | 
  6 | metric:
  7 |   channel: 'MAP'
  8 |   goal: maximize
  9 | 
 10 | #Comment out if not in Cloud Environment
 11 | #pip-requirements-file: requirements.txt
 12 | 
 13 | exclude:
 14 |   - .git
 15 |   - .idea
 16 |   - .ipynb_checkpoints
 17 |   - output
 18 |   - imgs
 19 |   - neptune.log
 20 |   - offline_job.log
 21 |   - notebooks
 22 | 
 23 | parameters:
 24 | # Data Paths
 25 |   train_imgs_dir: ''
 26 |   test_imgs_dir: ''
 27 |   annotations_filepath: ''
 28 |   annotations_human_labels_filepath: ''
 29 |   bbox_hierarchy_filepath: ''
 30 |   valid_ids_filepath: ''
 31 |   sample_submission: ''
 32 |   experiment_dir:  ''
 33 |   class_mappings_filepath: ''
 34 |   metadata_filepath: ''
 35 |   clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment
 36 | 
 37 | # Execution
 38 |   clean_experiment_directory_before_training: 0
 39 |   num_workers: 4
 40 |   num_threads: 100
 41 |   load_in_memory: 0
 42 |   pin_memory: 1
 43 |   default_valid_ids: 1
 44 |   loader_mode: resize
 45 |   stream_mode: 0
 46 |   validate_with_map: 1
 47 |   small_annotations_size: 20
 48 |   kaggle_message: 'solution-1'
 49 | 
 50 | # General parameters
 51 |   sampler_name: 'aspect ratio'     # from {'fixed', 'aspect ratio'}
 52 |   even_class_sampling: 1
 53 |   fixed_h: 512
 54 |   fixed_w: 512
 55 |   short_dim: 640 #512
 56 |   long_dim: 960 #896
 57 |   image_channels: 3
 58 |   pad_method: 'resize'
 59 |   use_suppression: 0
 60 |   max_annotation_per_class: 1000000
 61 |   desired_class_subset: "['Bicycle',
 62 |  'Bottle',
 63 |  'Book',
 64 |  'Palm tree',
 65 |  'Street light',
 66 |  'Glasses',
 67 |  'Bicycle wheel',
 68 |  'Tower',
 69 |  'Skyscraper',
 70 |  'Tire',
 71 |  'Chair',
 72 |  'House',
 73 |  'Wheel',
 74 |  'Window']"
 75 | 
 76 | # Retina parameters (multi-output)
 77 |   encoder_depth: 50
 78 |   num_classes: 100
 79 |   pretrained_encoder: 1
 80 |   pi: 0.01
 81 |   aspect_ratios: '[1/2., 1/1., 2/1.]'
 82 |   scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]'
 83 | 
 84 | # Training schedule
 85 |   epochs_nr: 1000
 86 |   batch_size_train: 8
 87 |   batch_size_inference: 1
 88 |   lr: 0.00001
 89 |   momentum: 0.9
 90 |   gamma: 1.0
 91 |   patience: 100
 92 |   lr_factor: 0.3
 93 |   lr_patience: 30
 94 |   training_sample_size: 10000
 95 |   validation_sample_size: 2000
 96 | 
 97 | # Regularization
 98 |   use_batch_norm: 1
 99 |   l2_reg_conv: 0.0001
100 |   l2_reg_dense: 0.0
101 |   dropout_conv: 0.1
102 |   dropout_dense: 0.0
103 | 
104 | # Postprocessing
105 |   classification_threshold: 0.05
106 |   nms_threshold: 0.5
107 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/np_box_ops_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Tests for object_detection.np_box_ops."""
17 | 
18 | import numpy as np
19 | import tensorflow as tf
20 | 
21 | from object_detection.utils import np_box_ops
22 | 
23 | 
24 | class BoxOpsTests(tf.test.TestCase):
25 | 
26 |   def setUp(self):
27 |     boxes1 = np.array([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]],
28 |                       dtype=float)
29 |     boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
30 |                        [0.0, 0.0, 20.0, 20.0]],
31 |                       dtype=float)
32 |     self.boxes1 = boxes1
33 |     self.boxes2 = boxes2
34 | 
35 |   def testArea(self):
36 |     areas = np_box_ops.area(self.boxes1)
37 |     expected_areas = np.array([6.0, 5.0], dtype=float)
38 |     self.assertAllClose(expected_areas, areas)
39 | 
40 |   def testIntersection(self):
41 |     intersection = np_box_ops.intersection(self.boxes1, self.boxes2)
42 |     expected_intersection = np.array([[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]],
43 |                                      dtype=float)
44 |     self.assertAllClose(intersection, expected_intersection)
45 | 
46 |   def testIOU(self):
47 |     iou = np_box_ops.iou(self.boxes1, self.boxes2)
48 |     expected_iou = np.array([[2.0 / 16.0, 0.0, 6.0 / 400.0],
49 |                              [1.0 / 16.0, 0.0, 5.0 / 400.0]],
50 |                             dtype=float)
51 |     self.assertAllClose(iou, expected_iou)
52 | 
53 |   def testIOA(self):
54 |     boxes1 = np.array([[0.25, 0.25, 0.75, 0.75],
55 |                        [0.0, 0.0, 0.5, 0.75]],
56 |                       dtype=np.float32)
57 |     boxes2 = np.array([[0.5, 0.25, 1.0, 1.0],
58 |                        [0.0, 0.0, 1.0, 1.0]],
59 |                       dtype=np.float32)
60 |     ioa21 = np_box_ops.ioa(boxes2, boxes1)
61 |     expected_ioa21 = np.array([[0.5, 0.0],
62 |                                [1.0, 1.0]],
63 |                               dtype=np.float32)
64 |     self.assertAllClose(ioa21, expected_ioa21)
65 | 
66 | 
67 | if __name__ == '__main__':
68 |   tf.test.main()
69 | 


--------------------------------------------------------------------------------
/src/postprocessing.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import PIL
 4 | import numpy as np
 5 | import pandas as pd
 6 | from steppy.base import BaseTransformer
 7 | 
 8 | from src.logging import LOGGER
 9 | from .pipeline_config import CODES2NAMES, SOLUTION_CONFIG, params
10 | from .utils import visualize_bboxes
11 | 
12 | 
13 | class PredictionFormatter(BaseTransformer):
14 |     def transform(self, images_data, results, decoder_dict):
15 |         self.decoder_dict = decoder_dict
16 |         image_ids = images_data['ImageID'].values.tolist()
17 |         prediction_strings = []
18 |         for bboxes, labels, scores in results:
19 |             prediction_strings.append(self._get_prediction_string(bboxes, labels, scores))
20 |         submission = pd.DataFrame({'ImageId': image_ids, 'PredictionString': prediction_strings})
21 |         return {'submission': submission}
22 | 
23 |     def _get_prediction_string(self, bboxes, labels, scores):
24 |         prediction_list = []
25 |         for bbox, label, score in zip(bboxes, labels, scores):
26 |             prediction_list.append(self._get_class_id(label))
27 |             prediction_list.append(str(score))
28 |             prediction_list.extend([str(coord) for coord in bbox])
29 |         prediction_string = " ".join(prediction_list)
30 |         return prediction_string
31 | 
32 |     def _get_class_id(self, label):
33 |         return self.decoder_dict[label]
34 | 
35 | 
36 | class Visualizer(BaseTransformer):
37 |     def transform(self, images_data, results, decoder_dict):
38 |         image_ids = images_data['ImageID'].values.tolist()
39 |         decoder_dict = decoder_dict
40 |         all_detections, all_boxes = [], []
41 |         for i, (image_id, detections) in enumerate(zip(image_ids, results)):
42 |             if not bool(detections[0].size()):
43 |                 continue
44 |             LOGGER.info("Drawing boxes on image {}/{}".format(i, len(results)))
45 |             image = PIL.Image.open(
46 |                 os.path.join(SOLUTION_CONFIG['loader']['dataset_params']['images_dir'], image_id + '.jpg'))
47 |             width, height = image.size  # original image size
48 |             box = detections[0].numpy()
49 |             classes = detections[1].numpy()
50 |             scores = detections[2].numpy()
51 | 
52 |             df = pd.DataFrame(np.column_stack([box, classes, scores]))
53 |             df.columns = ['x1', 'y1', 'x2', 'y2', 'class_id', 'score']
54 |             df['class_name'] = df.class_id.map(decoder_dict)
55 |             df.class_name = df.class_name.map(CODES2NAMES)
56 | 
57 |             # to absolute
58 |             df['x1'] = df['x1'] * width
59 |             df['x2'] = df['x2'] * width
60 |             df['y1'] = df['y1'] * height
61 |             df['y2'] = df['y2'] * height
62 | 
63 |             pil_image_detections = visualize_bboxes(image, df)
64 |             all_detections.append(pil_image_detections)
65 |             all_boxes.append(box)
66 |         return all_detections
67 | 


--------------------------------------------------------------------------------
/configs/batch_6.yaml:
--------------------------------------------------------------------------------
  1 | project: neptune-ml/Google-AI-Object-Detection-Challenge
  2 | 
  3 | name: google-ai-object-detection
  4 | tags: [solution-1, batch_6, retrain]
  5 | 
  6 | metric:
  7 |   channel: 'MAP'
  8 |   goal: maximize
  9 | 
 10 | #Comment out if not in Cloud Environment
 11 | #pip-requirements-file: requirements.txt
 12 | 
 13 | exclude:
 14 |   - .git
 15 |   - .idea
 16 |   - .ipynb_checkpoints
 17 |   - output
 18 |   - imgs
 19 |   - neptune.log
 20 |   - offline_job.log
 21 |   - notebooks
 22 | 
 23 | parameters:
 24 | # Data Paths
 25 |   train_imgs_dir: ''
 26 |   test_imgs_dir: ''
 27 |   annotations_filepath: ''
 28 |   annotations_human_labels_filepath: ''
 29 |   bbox_hierarchy_filepath: ''
 30 |   valid_ids_filepath: ''
 31 |   sample_submission: ''
 32 |   experiment_dir:  ''
 33 |   class_mappings_filepath: ''
 34 |   metadata_filepath: ''
 35 |   clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment
 36 | 
 37 | # Execution
 38 |   clean_experiment_directory_before_training: 0
 39 |   num_workers: 4
 40 |   num_threads: 100
 41 |   load_in_memory: 0
 42 |   pin_memory: 1
 43 |   default_valid_ids: 1
 44 |   loader_mode: resize
 45 |   stream_mode: 0
 46 |   validate_with_map: 1
 47 |   small_annotations_size: 20
 48 |   kaggle_message: 'solution-1'
 49 | 
 50 | # General parameters
 51 |   sampler_name: 'aspect ratio'     # from {'fixed', 'aspect ratio'}
 52 |   even_class_sampling: 1
 53 |   fixed_h: 512
 54 |   fixed_w: 512
 55 |   short_dim: 640 #512
 56 |   long_dim: 960 #896
 57 |   image_channels: 3
 58 |   pad_method: 'resize'
 59 |   use_suppression: 0
 60 |   max_annotation_per_class: 0
 61 |   desired_class_subset: "['Butterfly',
 62 |  'Bee',
 63 |  'Cattle',
 64 |  'Desk',
 65 |  'Bus',
 66 |  'Picture frame',
 67 |  'Rose',
 68 |  'Truck',
 69 |  'Wine glass',
 70 |  'Train',
 71 |  'Horse',
 72 |  'Motorcycle',
 73 |  'Balloon',
 74 |  'Cat',
 75 |  'Wine',
 76 |  'Duck',
 77 |  'Door',
 78 |  'Airplane',
 79 |  'Flowerpot',
 80 |  'Houseplant',
 81 |  'Shelf',
 82 |  'Poster',
 83 |  'Sunglasses',
 84 |  'Drum',
 85 |  'Guitar',
 86 |  'Microphone',
 87 |  'Dog',
 88 |  'Flag']"
 89 | 
 90 | # Retina parameters (multi-output)
 91 |   encoder_depth: 50
 92 |   num_classes: 100
 93 |   pretrained_encoder: 1
 94 |   pi: 0.01
 95 |   aspect_ratios: '[1/2., 1/1., 2/1.]'
 96 |   scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]'
 97 | 
 98 | # Training schedule
 99 |   epochs_nr: 1000
100 |   batch_size_train: 8
101 |   batch_size_inference: 1
102 |   lr: 0.00001
103 |   momentum: 0.9
104 |   gamma: 1.0
105 |   patience: 100
106 |   lr_factor: 0.3
107 |   lr_patience: 30
108 |   training_sample_size: 10000
109 |   validation_sample_size: 2000
110 | 
111 | # Regularization
112 |   use_batch_norm: 1
113 |   l2_reg_conv: 0.0001
114 |   l2_reg_dense: 0.0
115 |   dropout_conv: 0.1
116 |   dropout_dense: 0.0
117 | 
118 | # Postprocessing
119 |   classification_threshold: 0.05
120 |   nms_threshold: 0.5
121 | 


--------------------------------------------------------------------------------
/src/object_detection/dataset_tools/oid_hierarchical_labels_expansion_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for the OpenImages label expansion (OIDHierarchicalLabelsExpansion)."""
16 | 
17 | 
18 | 
19 | 
20 | 
21 | import tensorflow as tf
22 | 
23 | from object_detection.dataset_tools import oid_hierarchical_labels_expansion
24 | 
25 | 
26 | def create_test_data():
27 |   hierarchy = {
28 |       'LabelName':
29 |           'a',
30 |       'Subcategory': [{
31 |           'LabelName': 'b'
32 |       }, {
33 |           'LabelName': 'c',
34 |           'Subcategory': [{
35 |               'LabelName': 'd'
36 |           }, {
37 |               'LabelName': 'e'
38 |           }]
39 |       }, {
40 |           'LabelName': 'f',
41 |           'Subcategory': [{
42 |               'LabelName': 'd'
43 |           },]
44 |       }]
45 |   }
46 |   bbox_rows = [
47 |       '123,xclick,b,1,0.1,0.2,0.1,0.2,1,1,0,0,0',
48 |       '123,xclick,d,1,0.2,0.3,0.1,0.2,1,1,0,0,0'
49 |   ]
50 |   label_rows = [
51 |       '123,verification,b,0', '123,verification,c,0', '124,verification,d,1'
52 |   ]
53 |   return hierarchy, bbox_rows, label_rows
54 | 
55 | 
56 | class HierarchicalLabelsExpansionTest(tf.test.TestCase):
57 | 
58 |   def test_bbox_expansion(self):
59 |     hierarchy, bbox_rows, _ = create_test_data()
60 |     expansion_generator = (
61 |         oid_hierarchical_labels_expansion.OIDHierarchicalLabelsExpansion(
62 |             hierarchy))
63 |     all_result_rows = []
64 |     for row in bbox_rows:
65 |       all_result_rows.extend(expansion_generator.expand_boxes_from_csv(row))
66 |     self.assertItemsEqual([
67 |         '123,xclick,b,1,0.1,0.2,0.1,0.2,1,1,0,0,0',
68 |         '123,xclick,d,1,0.2,0.3,0.1,0.2,1,1,0,0,0',
69 |         '123,xclick,f,1,0.2,0.3,0.1,0.2,1,1,0,0,0',
70 |         '123,xclick,c,1,0.2,0.3,0.1,0.2,1,1,0,0,0'
71 |     ], all_result_rows)
72 | 
73 |   def test_labels_expansion(self):
74 |     hierarchy, _, label_rows = create_test_data()
75 |     expansion_generator = (
76 |         oid_hierarchical_labels_expansion.OIDHierarchicalLabelsExpansion(
77 |             hierarchy))
78 |     all_result_rows = []
79 |     for row in label_rows:
80 |       all_result_rows.extend(expansion_generator.expand_labels_from_csv(row))
81 |     self.assertItemsEqual([
82 |         '123,verification,b,0', '123,verification,c,0', '123,verification,d,0',
83 |         '123,verification,e,0', '124,verification,d,1', '124,verification,f,1',
84 |         '124,verification,c,1'
85 |     ], all_result_rows)
86 | 
87 | if __name__ == '__main__':
88 |   tf.test.main()
89 | 


--------------------------------------------------------------------------------
/configs/neptune.yaml:
--------------------------------------------------------------------------------
 1 | project: USERNAME/googleai-object-detection
 2 | 
 3 | name: Google AI object detection
 4 | tags: [solution-1]
 5 | 
 6 | metric:
 7 |   channel: 'MAP'
 8 |   goal: maximize
 9 | 
10 | #Comment out if not in Cloud Environment
11 | pip-requirements-file: requirements.txt # Comment out if Local execution
12 | 
13 | exclude:
14 |   - .git
15 |   - .idea
16 |   - .ipynb_checkpoints
17 |   - output
18 |   - imgs
19 |   - neptune.log
20 |   - offline_job.log
21 |   - notebooks
22 |   - src/object_detection
23 | 
24 | parameters:
25 | # Data Paths
26 |   train_imgs_dir: /public/datasets/open-images-dataset-v4/bounding-boxes/train
27 |   test_imgs_dir: /public/datasets/open-images-dataset-v4/bounding-boxes/test_challenge_2018
28 |   annotations_filepath: /public/challenges/google-ai-open-images-object-detection-track/annotations/challenge-2018-train-annotations-bbox.csv
29 |   annotations_human_labels_filepath: /public/challenges/google-ai-open-images-object-detection-track/annotations/challenge-2018-train-annotations-human-imagelabels.csv
30 |   bbox_hierarchy_filepath: /public/challenges/google-ai-open-images-object-detection-track/metadata/bbox_labels_500_hierarchy.json
31 |   class_mappings_filepath: /public/challenges/google-ai-open-images-object-detection-track/metadata/challenge-2018-class-descriptions-500.csv
32 |   valid_ids_filepath: /public/challenges/google-ai-open-images-object-detection-track/metadata/challenge-2018-image-ids-valset-od.csv
33 |   sample_submission: /public/challenges/google-ai-open-images-object-detection-track/sample_submission.csv
34 |   experiment_dir:  /output/experiment
35 |   clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment
36 | 
37 |   # Execution
38 |   clean_experiment_directory_before_training: 1
39 |   num_workers: 4
40 |   num_threads: 4
41 |   load_in_memory: 0
42 |   pin_memory: 1
43 |   default_valid_ids: 1
44 |   loader_mode: resize
45 |   stream_mode: 0
46 |   validate_with_map: 1
47 |   small_annotations_size: 20
48 |   kaggle_message: 'solution-1'
49 | 
50 | # General parameters
51 |   sampler_name: 'fixed'     # from {'fixed', 'aspect ratio'}
52 |   even_class_sampling: 1
53 |   fixed_h: 512
54 |   fixed_w: 512
55 |   short_dim: 400
56 |   long_dim: 600
57 |   image_channels: 3
58 |   pad_method: 'resize'
59 |   use_suppression: 1
60 |   max_annotation_per_class: 75000
61 |   desired_class_subset: "['Poster', 'Cat', 'Train', 'Dog', 'Bus','Truck', 'Picture frame', 'Airplane', 'Sculpture', 'Motorcycle']"
62 | 
63 | # Retina parameters (multi-output)
64 |   encoder_depth: 50
65 |   num_classes: 10
66 |   pretrained_encoder: 1
67 |   pi: 0.01
68 |   aspect_ratios: '[1/2., 1/1., 2/1.]'
69 |   scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]'
70 | 
71 | # Training schedule
72 |   epochs_nr: 100
73 |   batch_size_train: 8
74 |   batch_size_inference: 8
75 |   lr: 0.00001
76 |   momentum: 0.9
77 |   gamma: 1.0
78 |   patience: 30
79 |   lr_factor: 0.3
80 |   lr_patience: 30
81 |   training_sample_size: 10000
82 |   validation_sample_size: 1000
83 | 
84 | # Regularization
85 |   use_batch_norm: 1
86 |   l2_reg_conv: 0.0001
87 |   l2_reg_dense: 0.0
88 |   dropout_conv: 0.1
89 |   dropout_dense: 0.0
90 | 
91 | # Postprocessing
92 |   classification_threshold: 0.05
93 |   nms_threshold: 0.5
94 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 6 | 
 7 | ## Our Standards
 8 | 
 9 | Examples of behavior that contributes to creating a positive environment include:
10 | 
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 | 
17 | Examples of unacceptable behavior by participants include:
18 | 
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 | 
25 | ## Our Responsibilities
26 | 
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 | 
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 | 
31 | ## Scope
32 | 
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 | 
35 | ## Enforcement
36 | 
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at contact@neptune.ml. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 | 
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 | 
41 | ## Attribution
42 | 
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 | 
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/test_utils_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Tests for object_detection.utils.test_utils."""
17 | 
18 | import numpy as np
19 | import tensorflow as tf
20 | 
21 | from object_detection.utils import test_utils
22 | 
23 | 
24 | class TestUtilsTest(tf.test.TestCase):
25 | 
26 |   def test_diagonal_gradient_image(self):
27 |     """Tests if a good pyramid image is created."""
28 |     pyramid_image = test_utils.create_diagonal_gradient_image(3, 4, 2)
29 | 
30 |     # Test which is easy to understand.
31 |     expected_first_channel = np.array([[3, 2, 1, 0],
32 |                                        [4, 3, 2, 1],
33 |                                        [5, 4, 3, 2]], dtype=np.float32)
34 |     self.assertAllEqual(np.squeeze(pyramid_image[:, :, 0]),
35 |                         expected_first_channel)
36 | 
37 |     # Actual test.
38 |     expected_image = np.array([[[3, 30],
39 |                                 [2, 20],
40 |                                 [1, 10],
41 |                                 [0, 0]],
42 |                                [[4, 40],
43 |                                 [3, 30],
44 |                                 [2, 20],
45 |                                 [1, 10]],
46 |                                [[5, 50],
47 |                                 [4, 40],
48 |                                 [3, 30],
49 |                                 [2, 20]]], dtype=np.float32)
50 | 
51 |     self.assertAllEqual(pyramid_image, expected_image)
52 | 
53 |   def test_random_boxes(self):
54 |     """Tests if valid random boxes are created."""
55 |     num_boxes = 1000
56 |     max_height = 3
57 |     max_width = 5
58 |     boxes = test_utils.create_random_boxes(num_boxes,
59 |                                            max_height,
60 |                                            max_width)
61 | 
62 |     true_column = np.ones(shape=(num_boxes)) == 1
63 |     self.assertAllEqual(boxes[:, 0] < boxes[:, 2], true_column)
64 |     self.assertAllEqual(boxes[:, 1] < boxes[:, 3], true_column)
65 | 
66 |     self.assertTrue(boxes[:, 0].min() >= 0)
67 |     self.assertTrue(boxes[:, 1].min() >= 0)
68 |     self.assertTrue(boxes[:, 2].max() <= max_height)
69 |     self.assertTrue(boxes[:, 3].max() <= max_width)
70 | 
71 |   def test_first_rows_close_as_set(self):
72 |     a = [1, 2, 3, 0, 0]
73 |     b = [3, 2, 1, 0, 0]
74 |     k = 3
75 |     self.assertTrue(test_utils.first_rows_close_as_set(a, b, k))
76 | 
77 |     a = [[1, 2], [1, 4], [0, 0]]
78 |     b = [[1, 4 + 1e-9], [1, 2], [0, 0]]
79 |     k = 2
80 |     self.assertTrue(test_utils.first_rows_close_as_set(a, b, k))
81 | 
82 |     a = [[1, 2], [1, 4], [0, 0]]
83 |     b = [[1, 4 + 1e-9], [2, 2], [0, 0]]
84 |     k = 2
85 |     self.assertFalse(test_utils.first_rows_close_as_set(a, b, k))
86 | 
87 | 
88 | if __name__ == '__main__':
89 |   tf.test.main()
90 | 


--------------------------------------------------------------------------------
/configs/batch_1.yaml:
--------------------------------------------------------------------------------
  1 | project: neptune-ml/Google-AI-Object-Detection-Challenge
  2 | 
  3 | name: google-ai-object-detection
  4 | tags: [solution-1, batch_1, retrain]
  5 | 
  6 | metric:
  7 |   channel: 'MAP'
  8 |   goal: maximize
  9 | 
 10 | #Comment out if not in Cloud Environment
 11 | #pip-requirements-file: requirements.txt
 12 | 
 13 | exclude:
 14 |   - .git
 15 |   - .idea
 16 |   - .ipynb_checkpoints
 17 |   - output
 18 |   - imgs
 19 |   - neptune.log
 20 |   - offline_job.log
 21 |   - notebooks
 22 | 
 23 | parameters:
 24 | # Data Paths
 25 |   train_imgs_dir: '/mnt/ml-team/open-images-v4/bounding-boxes/train/'
 26 |   test_imgs_dir: '/mnt/ml-team/open-images-v4/bounding-boxes/test_challenge_2018/'
 27 |   annotations_filepath: '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/data/annotations/challenge-2018-train-annotations-bbox.csv'
 28 |   annotations_human_labels_filepath: '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/data/annotations/challenge-2018-train-annotations-human-imagelabels.csv'
 29 |   bbox_hierarchy_filepath: '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/data/metadata/bbox_labels_500_hierarchy.json'
 30 |   valid_ids_filepath: '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/data/metadata/challenge-2018-image-ids-valset-od.csv'
 31 |   experiment_dir:  '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/kuba/experiments/batch_1'
 32 |   class_mappings_filepath: '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/data/metadata/challenge-2018-class-descriptions-500.csv'
 33 |   metadata_filepath: '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/files/metadata.csv'
 34 |   sample_submission: '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/data/sample_submission.csv'
 35 |   clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment
 36 | 
 37 | # Execution
 38 |   clean_experiment_directory_before_training: 0
 39 |   num_workers: 4
 40 |   num_threads: 100
 41 |   load_in_memory: 0
 42 |   pin_memory: 1
 43 |   default_valid_ids: 1
 44 |   loader_mode: resize
 45 |   stream_mode: 0
 46 |   validate_with_map: 1
 47 |   small_annotations_size: 20
 48 |   kaggle_message: 'solution-1'
 49 | 
 50 | # General parameters
 51 |   sampler_name: 'aspect ratio'     # from {'fixed', 'aspect ratio'}
 52 |   even_class_sampling: 1
 53 |   fixed_h: 512
 54 |   fixed_w: 512
 55 |   short_dim: 640 #512
 56 |   long_dim: 960 #896
 57 |   image_channels: 3
 58 |   pad_method: 'resize'
 59 |   use_suppression: 0
 60 |   max_annotation_per_class: 1000000
 61 |   desired_class_subset: "['Pressure cooker',
 62 |  'Torch',
 63 |  'Winter melon',
 64 |  'Spatula',
 65 |  'Toaster',
 66 |  'Measuring cup',
 67 |  'Ring binder',
 68 |  'Screwdriver',
 69 |  'Flashlight',
 70 |  'Light switch']"
 71 | 
 72 | # Retina parameters (multi-output)
 73 |   encoder_depth: 50
 74 |   num_classes: 100
 75 |   pretrained_encoder: 1
 76 |   pi: 0.01
 77 |   aspect_ratios: '[1/2., 1/1., 2/1.]'
 78 |   scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]'
 79 | 
 80 | # Training schedule
 81 |   epochs_nr: 1000
 82 |   batch_size_train: 8
 83 |   batch_size_inference: 1
 84 |   lr: 0.00001
 85 |   momentum: 0.9
 86 |   gamma: 1.0
 87 |   patience: 100
 88 |   lr_factor: 0.3
 89 |   lr_patience: 30
 90 |   training_sample_size: 10000
 91 |   validation_sample_size: 2000
 92 | 
 93 | # Regularization
 94 |   use_batch_norm: 1
 95 |   l2_reg_conv: 0.0001
 96 |   l2_reg_dense: 0.0
 97 |   dropout_conv: 0.1
 98 |   dropout_dense: 0.0
 99 | 
100 | # Postprocessing
101 |   classification_threshold: 0.05
102 |   nms_threshold: 0.5
103 | 


--------------------------------------------------------------------------------
/configs/batch_5.yaml:
--------------------------------------------------------------------------------
  1 | project: neptune-ml/Google-AI-Object-Detection-Challenge
  2 | 
  3 | name: google-ai-object-detection
  4 | tags: [solution-1, batch-5, retrain]
  5 | 
  6 | metric:
  7 |   channel: 'MAP'
  8 |   goal: maximize
  9 | 
 10 | #Comment out if not in Cloud Environment
 11 | #pip-requirements-file: requirements.txt
 12 | 
 13 | exclude:
 14 |   - .git
 15 |   - .idea
 16 |   - .ipynb_checkpoints
 17 |   - output
 18 |   - imgs
 19 |   - neptune.log
 20 |   - offline_job.log
 21 |   - notebooks
 22 | 
 23 | parameters:
 24 | # Data Paths
 25 |   train_imgs_dir: ''
 26 |   test_imgs_dir: ''
 27 |   annotations_filepath: ''
 28 |   annotations_human_labels_filepath: ''
 29 |   bbox_hierarchy_filepath: ''
 30 |   valid_ids_filepath: ''
 31 |   sample_submission: ''
 32 |   experiment_dir:  ''
 33 |   class_mappings_filepath: ''
 34 |   metadata_filepath: ''
 35 |   clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment
 36 | 
 37 | # Execution
 38 |   clean_experiment_directory_before_training: 0
 39 |   num_workers: 4
 40 |   num_threads: 100
 41 |   load_in_memory: 0
 42 |   pin_memory: 1
 43 |   default_valid_ids: 1
 44 |   loader_mode: resize
 45 |   stream_mode: 0
 46 |   validate_with_map: 1
 47 |   small_annotations_size: 20
 48 |   kaggle_message: 'solution-1'
 49 | 
 50 | # General parameters
 51 |   sampler_name: 'aspect ratio'     # from {'fixed', 'aspect ratio'}
 52 |   even_class_sampling: 1
 53 |   fixed_h: 512
 54 |   fixed_w: 512
 55 |   short_dim: 640 #512
 56 |   long_dim: 960 #896
 57 |   image_channels: 3
 58 |   pad_method: 'resize'
 59 |   use_suppression: 0
 60 |   max_annotation_per_class: 0
 61 |   desired_class_subset: "['Helicopter',
 62 |  'Monkey',
 63 |  'Coin',
 64 |  'Chicken',
 65 |  'Salad',
 66 |  'Countertop',
 67 |  'Elephant',
 68 |  'Sheep',
 69 |  'Platter',
 70 |  'Ski',
 71 |  'Pillow',
 72 |  'Lamp',
 73 |  'Lifejacket',
 74 |  'Fountain',
 75 |  'Television',
 76 |  'Bread',
 77 |  'Porch',
 78 |  'Deer',
 79 |  'Swimming pool',
 80 |  'Apple',
 81 |  'Cookie',
 82 |  'Penguin',
 83 |  'Taxi',
 84 |  'Christmas tree',
 85 |  'Castle',
 86 |  'Drawer',
 87 |  'Cocktail',
 88 |  'Mushroom',
 89 |  'Bowl',
 90 |  'Swan',
 91 |  'Computer keyboard',
 92 |  'Canoe',
 93 |  'Muffin',
 94 |  'Curtain',
 95 |  'Maple',
 96 |  'Office building',
 97 |  'Football',
 98 |  'Bookcase',
 99 |  'Coffee table',
100 |  'Coffee cup',
101 |  'Box',
102 |  'Plate',
103 |  'Lantern',
104 |  'Cake',
105 |  'Candle',
106 |  'Stairs',
107 |  'Computer monitor',
108 |  'Pumpkin',
109 |  'Orange',
110 |  'Tomato',
111 |  'Mobile phone',
112 |  'Camera',
113 |  'Doll',
114 |  'Lavender',
115 |  'Sunflower',
116 |  'Tent',
117 |  'Paddle',
118 |  'Traffic light',
119 |  'Van',
120 |  'Vehicle registration plate',
121 |  'Strawberry',
122 |  'Goose',
123 |  'Cabinetry',
124 |  'Laptop',
125 |  'Beer',
126 |  'Goggles',
127 |  'Billboard']"
128 | 
129 | # Retina parameters (multi-output)
130 |   encoder_depth: 50
131 |   num_classes: 100
132 |   pretrained_encoder: 1
133 |   pi: 0.01
134 |   aspect_ratios: '[1/2., 1/1., 2/1.]'
135 |   scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]'
136 | 
137 | # Training schedule
138 |   epochs_nr: 1000
139 |   batch_size_train: 8
140 |   batch_size_inference: 1
141 |   lr: 0.00001
142 |   momentum: 0.9
143 |   gamma: 1.0
144 |   patience: 100
145 |   lr_factor: 0.3
146 |   lr_patience: 30
147 |   training_sample_size: 10000
148 |   validation_sample_size: 2000
149 | 
150 | # Regularization
151 |   use_batch_norm: 1
152 |   l2_reg_conv: 0.0001
153 |   l2_reg_dense: 0.0
154 |   dropout_conv: 0.1
155 |   dropout_dense: 0.0
156 | 
157 | # Postprocessing
158 |   classification_threshold: 0.05
159 |   nms_threshold: 0.5
160 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/np_box_ops.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Operations for [N, 4] numpy arrays representing bounding boxes.
17 | 
18 | Example box operations that are supported:
19 |   * Areas: compute bounding box areas
20 |   * IOU: pairwise intersection-over-union scores
21 | """
22 | import numpy as np
23 | 
24 | 
25 | def area(boxes):
26 |   """Computes area of boxes.
27 | 
28 |   Args:
29 |     boxes: Numpy array with shape [N, 4] holding N boxes
30 | 
31 |   Returns:
32 |     a numpy array with shape [N*1] representing box areas
33 |   """
34 |   return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
35 | 
36 | 
37 | def intersection(boxes1, boxes2):
38 |   """Compute pairwise intersection areas between boxes.
39 | 
40 |   Args:
41 |     boxes1: a numpy array with shape [N, 4] holding N boxes
42 |     boxes2: a numpy array with shape [M, 4] holding M boxes
43 | 
44 |   Returns:
45 |     a numpy array with shape [N*M] representing pairwise intersection area
46 |   """
47 |   [y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1)
48 |   [y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1)
49 | 
50 |   all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2))
51 |   all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2))
52 |   intersect_heights = np.maximum(
53 |       np.zeros(all_pairs_max_ymin.shape),
54 |       all_pairs_min_ymax - all_pairs_max_ymin)
55 |   all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2))
56 |   all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
57 |   intersect_widths = np.maximum(
58 |       np.zeros(all_pairs_max_xmin.shape),
59 |       all_pairs_min_xmax - all_pairs_max_xmin)
60 |   return intersect_heights * intersect_widths
61 | 
62 | 
63 | def iou(boxes1, boxes2):
64 |   """Computes pairwise intersection-over-union between box collections.
65 | 
66 |   Args:
67 |     boxes1: a numpy array with shape [N, 4] holding N boxes.
68 |     boxes2: a numpy array with shape [M, 4] holding N boxes.
69 | 
70 |   Returns:
71 |     a numpy array with shape [N, M] representing pairwise iou scores.
72 |   """
73 |   intersect = intersection(boxes1, boxes2)
74 |   area1 = area(boxes1)
75 |   area2 = area(boxes2)
76 |   union = np.expand_dims(area1, axis=1) + np.expand_dims(
77 |       area2, axis=0) - intersect
78 |   return intersect / union
79 | 
80 | 
81 | def ioa(boxes1, boxes2):
82 |   """Computes pairwise intersection-over-area between box collections.
83 | 
84 |   Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
85 |   their intersection area over box2's area. Note that ioa is not symmetric,
86 |   that is, IOA(box1, box2) != IOA(box2, box1).
87 | 
88 |   Args:
89 |     boxes1: a numpy array with shape [N, 4] holding N boxes.
90 |     boxes2: a numpy array with shape [M, 4] holding N boxes.
91 | 
92 |   Returns:
93 |     a numpy array with shape [N, M] representing pairwise ioa scores.
94 |   """
95 |   intersect = intersection(boxes1, boxes2)
96 |   areas = np.expand_dims(area(boxes2), axis=0)
97 |   return intersect / areas
98 | 


--------------------------------------------------------------------------------
/configs/batch_8.yaml:
--------------------------------------------------------------------------------
  1 | project: neptune-ml/Google-AI-Object-Detection-Challenge
  2 | 
  3 | name: google-ai-object-detection
  4 | tags: [solution-1, batch-8, eval]
  5 | 
  6 | metric:
  7 |   channel: 'MAP'
  8 |   goal: maximize
  9 | 
 10 | #Comment out if not in Cloud Environment
 11 | #pip-requirements-file: requirements.txt
 12 | 
 13 | exclude:
 14 |   - .git
 15 |   - .idea
 16 |   - .ipynb_checkpoints
 17 |   - output
 18 |   - imgs
 19 |   - neptune.log
 20 |   - offline_job.log
 21 |   - notebooks
 22 | 
 23 | parameters:
 24 | # Data Paths
 25 |   train_imgs_dir: ''
 26 |   test_imgs_dir: ''
 27 |   annotations_filepath: ''
 28 |   annotations_human_labels_filepath: ''
 29 |   bbox_hierarchy_filepath: ''
 30 |   valid_ids_filepath: ''
 31 |   sample_submission: ''
 32 |   experiment_dir:  ''
 33 |   class_mappings_filepath: ''
 34 |   metadata_filepath: ''
 35 |   clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment
 36 | 
 37 | # Execution
 38 |   clean_experiment_directory_before_training: 0
 39 |   num_workers: 4
 40 |   num_threads: 100
 41 |   load_in_memory: 0
 42 |   pin_memory: 1
 43 |   default_valid_ids: 1
 44 |   loader_mode: resize
 45 |   stream_mode: 0
 46 |   validate_with_map: 1
 47 |   small_annotations_size: 20
 48 |   kaggle_message: 'solution-1'
 49 | 
 50 | # General parameters
 51 |   sampler_name: 'aspect ratio'     # from {'fixed', 'aspect ratio'}
 52 |   even_class_sampling: 1
 53 |   fixed_h: 512
 54 |   fixed_w: 512
 55 |   short_dim: 640 #512
 56 |   long_dim: 960 #896
 57 |   image_channels: 3
 58 |   pad_method: 'resize'
 59 |   use_suppression: 0
 60 |   max_annotation_per_class: 1000000
 61 |   desired_class_subset: "['Roller skates',
 62 |  'Toy',
 63 |  'Boat',
 64 |  'Jeans',
 65 |  'Fedora',
 66 |  'Man',
 67 |  'Invertebrate',
 68 |  'Personal care',
 69 |  'Marine mammal',
 70 |  'Weapon',
 71 |  'Ball',
 72 |  'Cowboy hat',
 73 |  'Marine invertebrates',
 74 |  'Turtle',
 75 |  'Boy',
 76 |  'Fish',
 77 |  'Suitcase',
 78 |  'Tableware',
 79 |  'High heels',
 80 |  'Traffic sign',
 81 |  'Plumbing fixture',
 82 |  'Squash',
 83 |  'Girl',
 84 |  'Insect',
 85 |  'Sandwich',
 86 |  'Telephone',
 87 |  'Fruit',
 88 |  'Baseball glove',
 89 |  'Clock',
 90 |  'Sun hat',
 91 |  'Shellfish',
 92 |  'Sculpture',
 93 |  'Land vehicle',
 94 |  'Flower',
 95 |  'Seafood',
 96 |  'Building',
 97 |  'Aircraft',
 98 |  'Watercraft',
 99 |  'Football helmet',
100 |  'Musical instrument',
101 |  'Table',
102 |  'Racket',
103 |  'Miniskirt',
104 |  'Moths and butterflies',
105 |  'Furniture',
106 |  'Sandal',
107 |  'Boot',
108 |  'Bicycle helmet',
109 |  'Car',
110 |  'Bird',
111 |  'Bed',
112 |  'Couch',
113 |  'Backpack',
114 |  'Carnivore',
115 |  'Animal',
116 |  'Woman',
117 |  'Drink',
118 |  'Briefcase',
119 |  'Vegetable',
120 |  'Home appliance',
121 |  'Kitchen appliance',
122 |  'Tree',
123 |  'Office supplies',
124 |  'Bear',
125 |  'Beetle',
126 |  'Sombrero',
127 |  'Dessert',
128 |  'Vehicle',
129 |  'Reptile',
130 |  'Handbag']"
131 | 
132 | # Retina parameters (multi-output)
133 |   encoder_depth: 50
134 |   num_classes: 100
135 |   pretrained_encoder: 1
136 |   pi: 0.01
137 |   aspect_ratios: '[1/2., 1/1., 2/1.]'
138 |   scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]'
139 | 
140 | # Training schedule
141 |   epochs_nr: 1000
142 |   batch_size_train: 8
143 |   batch_size_inference: 1
144 |   lr: 0.00001
145 |   momentum: 0.9
146 |   gamma: 1.0
147 |   patience: 100
148 |   lr_factor: 0.3
149 |   lr_patience: 30
150 |   training_sample_size: 10000
151 |   validation_sample_size: 2000
152 | 
153 | # Regularization
154 |   use_batch_norm: 1
155 |   l2_reg_conv: 0.0001
156 |   l2_reg_dense: 0.0
157 |   dropout_conv: 0.1
158 |   dropout_dense: 0.0
159 | 
160 | # Postprocessing
161 |   classification_threshold: 0.05
162 |   nms_threshold: 0.5
163 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/np_mask_ops_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Tests for object_detection.np_mask_ops."""
17 | 
18 | import numpy as np
19 | import tensorflow as tf
20 | 
21 | from object_detection.utils import np_mask_ops
22 | 
23 | 
24 | class MaskOpsTests(tf.test.TestCase):
25 | 
26 |   def setUp(self):
27 |     masks1_0 = np.array([[0, 0, 0, 0, 0, 0, 0, 0],
28 |                          [0, 0, 0, 0, 0, 0, 0, 0],
29 |                          [0, 0, 0, 0, 0, 0, 0, 0],
30 |                          [1, 1, 1, 1, 0, 0, 0, 0],
31 |                          [1, 1, 1, 1, 0, 0, 0, 0]],
32 |                         dtype=np.uint8)
33 |     masks1_1 = np.array([[1, 1, 1, 1, 1, 1, 1, 1],
34 |                          [1, 1, 0, 0, 0, 0, 0, 0],
35 |                          [0, 0, 0, 0, 0, 0, 0, 0],
36 |                          [0, 0, 0, 0, 0, 0, 0, 0],
37 |                          [0, 0, 0, 0, 0, 0, 0, 0]],
38 |                         dtype=np.uint8)
39 |     masks1 = np.stack([masks1_0, masks1_1])
40 |     masks2_0 = np.array([[0, 0, 0, 0, 0, 0, 0, 0],
41 |                          [0, 0, 0, 0, 0, 0, 0, 0],
42 |                          [0, 0, 0, 0, 0, 0, 0, 0],
43 |                          [1, 1, 1, 1, 0, 0, 0, 0],
44 |                          [1, 1, 1, 1, 0, 0, 0, 0]],
45 |                         dtype=np.uint8)
46 |     masks2_1 = np.array([[1, 1, 1, 1, 1, 1, 1, 0],
47 |                          [1, 1, 1, 1, 1, 0, 0, 0],
48 |                          [1, 1, 1, 0, 0, 0, 0, 0],
49 |                          [0, 0, 0, 0, 0, 0, 0, 0],
50 |                          [0, 0, 0, 0, 0, 0, 0, 0]],
51 |                         dtype=np.uint8)
52 |     masks2_2 = np.array([[1, 1, 1, 1, 1, 0, 0, 0],
53 |                          [1, 1, 1, 1, 1, 0, 0, 0],
54 |                          [1, 1, 1, 1, 1, 0, 0, 0],
55 |                          [1, 1, 1, 1, 1, 0, 0, 0],
56 |                          [1, 1, 1, 1, 1, 0, 0, 0]],
57 |                         dtype=np.uint8)
58 |     masks2 = np.stack([masks2_0, masks2_1, masks2_2])
59 |     self.masks1 = masks1
60 |     self.masks2 = masks2
61 | 
62 |   def testArea(self):
63 |     areas = np_mask_ops.area(self.masks1)
64 |     expected_areas = np.array([8.0, 10.0], dtype=np.float32)
65 |     self.assertAllClose(expected_areas, areas)
66 | 
67 |   def testIntersection(self):
68 |     intersection = np_mask_ops.intersection(self.masks1, self.masks2)
69 |     expected_intersection = np.array(
70 |         [[8.0, 0.0, 8.0], [0.0, 9.0, 7.0]], dtype=np.float32)
71 |     self.assertAllClose(intersection, expected_intersection)
72 | 
73 |   def testIOU(self):
74 |     iou = np_mask_ops.iou(self.masks1, self.masks2)
75 |     expected_iou = np.array(
76 |         [[1.0, 0.0, 8.0/25.0], [0.0, 9.0 / 16.0, 7.0 / 28.0]], dtype=np.float32)
77 |     self.assertAllClose(iou, expected_iou)
78 | 
79 |   def testIOA(self):
80 |     ioa21 = np_mask_ops.ioa(self.masks1, self.masks2)
81 |     expected_ioa21 = np.array([[1.0, 0.0, 8.0/25.0],
82 |                                [0.0, 9.0/15.0, 7.0/25.0]],
83 |                               dtype=np.float32)
84 |     self.assertAllClose(ioa21, expected_ioa21)
85 | 
86 | 
87 | if __name__ == '__main__':
88 |   tf.test.main()
89 | 


--------------------------------------------------------------------------------
/configs/batch_3.yaml:
--------------------------------------------------------------------------------
  1 | project: neptune-ml/Google-Ai-Object-Detection-Challenge
  2 | 
  3 | name: google-ai-object-detection
  4 | tags: [solution-1, batch-3]
  5 | 
  6 | metric:
  7 |   channel: 'MAP'
  8 |   goal: maximize
  9 | 
 10 | #Comment out if not in Cloud Environment
 11 | #pip-requirements-file: requirements.txt
 12 | 
 13 | exclude:
 14 |   - .git
 15 |   - .idea
 16 |   - .ipynb_checkpoints
 17 |   - output
 18 |   - imgs
 19 |   - neptune.log
 20 |   - offline_job.log
 21 |   - notebooks
 22 | 
 23 | parameters:
 24 | # Data Paths
 25 |   train_imgs_dir: ''
 26 |   test_imgs_dir: ''
 27 |   annotations_filepath: ''
 28 |   annotations_human_labels_filepath: ''
 29 |   bbox_hierarchy_filepath: ''
 30 |   valid_ids_filepath: ''
 31 |   sample_submission: ''
 32 |   experiment_dir:  ''
 33 |   class_mappings_filepath: ''
 34 |   metadata_filepath: ''
 35 |   clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment
 36 | 
 37 | # Execution
 38 |   clean_experiment_directory_before_training: 0
 39 |   num_workers: 4
 40 |   num_threads: 100
 41 |   load_in_memory: 0
 42 |   pin_memory: 1
 43 |   default_valid_ids: 1
 44 |   loader_mode: resize
 45 |   stream_mode: 0
 46 |   validate_with_map: 1
 47 |   small_annotations_size: 20
 48 |   kaggle_message: 'solution-1'
 49 | 
 50 | # General parameters
 51 |   sampler_name: 'aspect ratio'     # from {'fixed', 'aspect ratio'}
 52 |   even_class_sampling: 1
 53 |   fixed_h: 512
 54 |   fixed_w: 512
 55 |   short_dim: 640 #512
 56 |   long_dim: 960 #896
 57 |   image_channels: 3
 58 |   pad_method: 'resize'
 59 |   use_suppression: 0
 60 |   max_annotation_per_class: 0
 61 |   desired_class_subset: "['Tennis ball',
 62 |  'Beehive',
 63 |  'Woodpecker',
 64 |  'Scoreboard',
 65 |  'Gas stove',
 66 |  'Jet ski',
 67 |  'Bathtub',
 68 |  'Hamster',
 69 |  'Segway',
 70 |  'Fox',
 71 |  'Raven',
 72 |  'Sword',
 73 |  'Window blind',
 74 |  'Shotgun',
 75 |  'Jaguar',
 76 |  'Jug',
 77 |  'Refrigerator',
 78 |  'Bow and arrow',
 79 |  'Golf cart',
 80 |  'Lobster',
 81 |  'Potato',
 82 |  'Missile',
 83 |  'Chopsticks',
 84 |  'Shark',
 85 |  'Loveseat',
 86 |  'Teapot',
 87 |  'Oven',
 88 |  'Starfish',
 89 |  'Bagel',
 90 |  'Ostrich',
 91 |  'Brown bear',
 92 |  'Washing machine',
 93 |  'Bat',
 94 |  'Kettle',
 95 |  'Pineapple',
 96 |  'Volleyball',
 97 |  'Polar bear',
 98 |  'Taco',
 99 |  'Pomegranate',
100 |  'Mechanical fan',
101 |  'Radish',
102 |  'Waffle',
103 |  'Fireplace',
104 |  'Dice',
105 |  'Cheetah',
106 |  'Rhinoceros',
107 |  'Handgun',
108 |  'Wok',
109 |  'Turkey',
110 |  'Ladybug',
111 |  'Willow',
112 |  'Door handle',
113 |  'Otter',
114 |  'Peach',
115 |  'Pasta',
116 |  'Snowman',
117 |  'Pancake',
118 |  'Kangaroo',
119 |  'Bell pepper',
120 |  'Leopard',
121 |  'Alpaca',
122 |  'Watermelon',
123 |  'Table tennis racket',
124 |  'Knife',
125 |  'Mouse',
126 |  'Coconut',
127 |  'Caterpillar',
128 |  'Billiard table',
129 |  'Rocket',
130 |  'Pear',
131 |  'Ant',
132 |  'Doughnut',
133 |  'Snail',
134 |  'Snowboard',
135 |  'Trombone',
136 |  'Accordion',
137 |  'Tart',
138 |  'Tablet computer',
139 |  'Barge',
140 |  'Tennis racket',
141 |  'Plastic bag',
142 |  'Ladder']"
143 | 
144 | # Retina parameters (multi-output)
145 |   encoder_depth: 50
146 |   num_classes: 100
147 |   pretrained_encoder: 1
148 |   pi: 0.01
149 |   aspect_ratios: '[1/2., 1/1., 2/1.]'
150 |   scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]'
151 | 
152 | # Training schedule
153 |   epochs_nr: 1000
154 |   batch_size_train: 8
155 |   batch_size_inference: 1
156 |   lr: 0.00001
157 |   momentum: 0.9
158 |   gamma: 1.0
159 |   patience: 100
160 |   lr_factor: 0.3
161 |   lr_patience: 30
162 |   training_sample_size: 10000
163 |   validation_sample_size: 2000
164 | 
165 | # Regularization
166 |   use_batch_norm: 1
167 |   l2_reg_conv: 0.0001
168 |   l2_reg_dense: 0.0
169 |   dropout_conv: 0.1
170 |   dropout_dense: 0.0
171 | 
172 | # Postprocessing
173 |   classification_threshold: 0.05
174 |   nms_threshold: 0.5
175 | 


--------------------------------------------------------------------------------
/configs/batch_2.yaml:
--------------------------------------------------------------------------------
  1 | project: neptune-ml/Google-Ai-Object-Detection-Challenge
  2 | 
  3 | name: google AI object detection
  4 | tags: [solution-1, batch-2, eval]
  5 | 
  6 | metric:
  7 |   channel: 'MAP'
  8 |   goal: maximize
  9 | 
 10 | #Comment out if not in Cloud Environment
 11 | #pip-requirements-file: requirements.txt
 12 | 
 13 | exclude:
 14 |   - .git
 15 |   - .idea
 16 |   - .ipynb_checkpoints
 17 |   - output
 18 |   - imgs
 19 |   - neptune.log
 20 |   - offline_job.log
 21 |   - notebooks
 22 | 
 23 | parameters:
 24 | # Data Paths
 25 |   train_imgs_dir: ''
 26 |   test_imgs_dir: ''
 27 |   annotations_filepath: ''
 28 |   annotations_human_labels_filepath: ''
 29 |   bbox_hierarchy_filepath: ''
 30 |   valid_ids_filepath: ''
 31 |   sample_submission: ''
 32 |   experiment_dir:  ''
 33 |   class_mappings_filepath: ''
 34 |   metadata_filepath: ''
 35 |   clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment
 36 | 
 37 | # Execution
 38 |   clean_experiment_directory_before_training: 0
 39 |   num_workers: 4
 40 |   num_threads: 100
 41 |   load_in_memory: 0
 42 |   pin_memory: 1
 43 |   default_valid_ids: 1
 44 |   loader_mode: resize
 45 |   stream_mode: 0
 46 |   validate_with_map: 1
 47 |   small_annotations_size: 20
 48 |   kaggle_message: 'solution-1'
 49 | 
 50 | # General parameters
 51 |   sampler_name: 'aspect ratio'     # from {'fixed', 'aspect ratio'}
 52 |   even_class_sampling: 1
 53 |   fixed_h: 512
 54 |   fixed_w: 512
 55 |   short_dim: 640 #512
 56 |   long_dim: 960 #896
 57 |   image_channels: 3
 58 |   pad_method: 'resize'
 59 |   use_suppression: 0
 60 |   max_annotation_per_class: 0
 61 |   desired_class_subset: "['Serving tray',
 62 |  'Binoculars',
 63 |  'Slow cooker',
 64 |  'Cricket ball',
 65 |  'Tick',
 66 |  'Crutch',
 67 |  'Oboe',
 68 |  'Beaker',
 69 |  'Alarm clock',
 70 |  'Stretcher',
 71 |  'Envelope',
 72 |  'Salt and pepper shakers',
 73 |  'Food processor',
 74 |  'Bench',
 75 |  'Digital clock',
 76 |  'Wrench',
 77 |  'Paper towel',
 78 |  'Harpsichord',
 79 |  'Cutting board',
 80 |  'Mixer',
 81 |  'Guacamole',
 82 |  'Porcupine',
 83 |  'Harp',
 84 |  'Blender',
 85 |  'Shower',
 86 |  'Lynx',
 87 |  'Treadmill',
 88 |  'Ruler',
 89 |  'Adhesive tape',
 90 |  'Blue jay',
 91 |  'Burrito',
 92 |  'Printer',
 93 |  'Dog bed',
 94 |  'Submarine sandwich',
 95 |  'Centipede',
 96 |  'Power plugs and sockets',
 97 |  'Drinking straw',
 98 |  'Rugby ball',
 99 |  'Pretzel',
100 |  'Wood-burning stove',
101 |  'Snowplow',
102 |  'Seahorse',
103 |  'Common fig',
104 |  'Coffeemaker',
105 |  'Punching bag',
106 |  'Cake stand',
107 |  'Towel',
108 |  'Stationary bicycle',
109 |  'Pitcher',
110 |  'Kitchen knife',
111 |  'Bathroom cabinet',
112 |  'Flute',
113 |  'Popcorn',
114 |  'Limousine',
115 |  'Snowmobile',
116 |  'Dagger',
117 |  'Filing cabinet',
118 |  'Artichoke',
119 |  'Toilet paper',
120 |  'Frying pan',
121 |  'Raccoon',
122 |  'Honeycomb',
123 |  'Canary',
124 |  'Asparagus',
125 |  'Stop sign',
126 |  'Organ',
127 |  'Scissors',
128 |  'Dumbbell',
129 |  'Picnic basket',
130 |  'Mango',
131 |  'Fire hydrant',
132 |  'Corded phone',
133 |  'Golf ball',
134 |  'Cabbage',
135 |  'Bidet',
136 |  'Croissant',
137 |  'Ambulance',
138 |  'Sewing machine',
139 |  'Seat belt',
140 |  'Infant bed',
141 |  'Ceiling fan',
142 |  'Hot dog',
143 |  'Microwave oven',
144 |  'Nail']"
145 | 
146 | # Retina parameters (multi-output)
147 |   encoder_depth: 50
148 |   num_classes: 100
149 |   pretrained_encoder: 1
150 |   pi: 0.01
151 |   aspect_ratios: '[1/2., 1/1., 2/1.]'
152 |   scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]'
153 | 
154 | # Training schedule
155 |   epochs_nr: 1000
156 |   batch_size_train: 8
157 |   batch_size_inference: 8
158 |   lr: 0.00001
159 |   momentum: 0.9
160 |   gamma: 1.0
161 |   patience: 100
162 |   lr_factor: 0.3
163 |   lr_patience: 30
164 |   training_sample_size: 10000
165 |   validation_sample_size: 2000
166 | 
167 | # Regularization
168 |   use_batch_norm: 1
169 |   l2_reg_conv: 0.0001
170 |   l2_reg_dense: 0.0
171 |   dropout_conv: 0.1
172 |   dropout_dense: 0.0
173 | 
174 | # Postprocessing
175 |   classification_threshold: 0.05
176 |   nms_threshold: 0.5
177 | 


--------------------------------------------------------------------------------
/configs/batch_4.yaml:
--------------------------------------------------------------------------------
  1 | project: neptune-ml/Google-Ai-Object-Detection-Challenge
  2 | 
  3 | name: google-ai-object-detection
  4 | tags: [solution-1, batch-4]
  5 | 
  6 | metric:
  7 |   channel: 'MAP'
  8 |   goal: maximize
  9 | 
 10 | #Comment out if not in Cloud Environment
 11 | #pip-requirements-file: requirements.txt
 12 | 
 13 | exclude:
 14 |   - .git
 15 |   - .idea
 16 |   - .ipynb_checkpoints
 17 |   - output
 18 |   - imgs
 19 |   - neptune.log
 20 |   - offline_job.log
 21 |   - notebooks
 22 | 
 23 | parameters:
 24 | # Data Paths
 25 |   train_imgs_dir: ''
 26 |   test_imgs_dir: ''
 27 |   annotations_filepath: ''
 28 |   annotations_human_labels_filepath: ''
 29 |   bbox_hierarchy_filepath: ''
 30 |   valid_ids_filepath: ''
 31 |   sample_submission: ''
 32 |   experiment_dir:  ''
 33 |   class_mappings_filepath: ''
 34 |   metadata_filepath: ''
 35 |   clone_experiment_dir_from: '' #When running eval specify this as for example /input/GAI-14/output/experiment
 36 | 
 37 | # Execution
 38 |   clean_experiment_directory_before_training: 0
 39 |   num_workers: 4
 40 |   num_threads: 100
 41 |   load_in_memory: 0
 42 |   pin_memory: 1
 43 |   default_valid_ids: 1
 44 |   loader_mode: resize
 45 |   stream_mode: 0
 46 |   validate_with_map: 1
 47 |   small_annotations_size: 20
 48 |   kaggle_message: 'solution-1'
 49 | 
 50 | # General parameters
 51 |   sampler_name: 'aspect ratio'     # from {'fixed', 'aspect ratio'}
 52 |   even_class_sampling: 1
 53 |   fixed_h: 512
 54 |   fixed_w: 512
 55 |   short_dim: 640 #512
 56 |   long_dim: 960 #896
 57 |   image_channels: 3
 58 |   pad_method: 'resize'
 59 |   use_suppression: 0
 60 |   max_annotation_per_class: 0
 61 |   desired_class_subset: "['Whiteboard',
 62 |  'Whale',
 63 |  'Oyster',
 64 |  'Crab',
 65 |  'Bust',
 66 |  'Wall clock',
 67 |  'Crocodile',
 68 |  'Cannon',
 69 |  'Zucchini',
 70 |  'Toilet',
 71 |  'Mule',
 72 |  'Zebra',
 73 |  'Nightstand',
 74 |  'Broccoli',
 75 |  'Sea turtle',
 76 |  'Cucumber',
 77 |  'Saxophone',
 78 |  'Baseball bat',
 79 |  'Horn',
 80 |  'Stool',
 81 |  'Headphones',
 82 |  'Tiger',
 83 |  'Grapefruit',
 84 |  'Camel',
 85 |  'Tea',
 86 |  'Cupboard',
 87 |  'Piano',
 88 |  'Snake',
 89 |  'Giraffe',
 90 |  'Tripod',
 91 |  'Carrot',
 92 |  'Wheelchair',
 93 |  'Hamburger',
 94 |  'Dragonfly',
 95 |  'Sofa bed',
 96 |  'Lighthouse',
 97 |  'Chest of drawers',
 98 |  'Dolphin',
 99 |  'Trumpet',
100 |  'Antelope',
101 |  'Mirror',
102 |  'Teddy bear',
103 |  'Frog',
104 |  'Banana',
105 |  'Pig',
106 |  'Rabbit',
107 |  'Sink',
108 |  'Sparrow',
109 |  'Lion',
110 |  'Owl',
111 |  'Fork',
112 |  'Tap',
113 |  'Eagle',
114 |  'Pen',
115 |  'Spoon',
116 |  'Tank',
117 |  'Falcon',
118 |  'Dinosaur',
119 |  'Bull',
120 |  'Lemon',
121 |  'Kite',
122 |  'Musical keyboard',
123 |  'Waste container',
124 |  'Skateboard',
125 |  'Light bulb',
126 |  'Convenience store',
127 |  'Sea lion',
128 |  'Shrimp',
129 |  'Egg',
130 |  'Gondola',
131 |  'studio couch',
132 |  'Squirrel',
133 |  'Tortoise',
134 |  'Cello',
135 |  'Pizza',
136 |  'Spider',
137 |  'Jellyfish',
138 |  'Goat',
139 |  'Harbor seal',
140 |  'Barrel',
141 |  'Sushi',
142 |  'French fries',
143 |  'Lizard',
144 |  'Kitchen & dining room table',
145 |  'Goldfish',
146 |  'Lily',
147 |  'Candy',
148 |  'Mug',
149 |  'Coffee',
150 |  'Parrot',
151 |  'Vase',
152 |  'Rifle',
153 |  'Surfboard',
154 |  'Skull',
155 |  'Parachute',
156 |  'Bronze sculpture',
157 |  'Cart',
158 |  'Grape',
159 |  'Saucer',
160 |  'Violin',
161 |  'Ice cream',
162 |  'Juice',
163 |  'Tin can']"
164 | 
165 | # Retina parameters (multi-output)
166 |   encoder_depth: 50
167 |   num_classes: 100
168 |   pretrained_encoder: 1
169 |   pi: 0.01
170 |   aspect_ratios: '[1/2., 1/1., 2/1.]'
171 |   scale_ratios: '[1., pow(2,1/3.), pow(2,2/3.)]'
172 | 
173 | # Training schedule
174 |   epochs_nr: 1000
175 |   batch_size_train: 8
176 |   batch_size_inference: 1
177 |   lr: 0.00001
178 |   momentum: 0.9
179 |   gamma: 1.0
180 |   patience: 100
181 |   lr_factor: 0.3
182 |   lr_patience: 30
183 |   training_sample_size: 10000
184 |   validation_sample_size: 2000
185 | 
186 | # Regularization
187 |   use_batch_norm: 1
188 |   l2_reg_conv: 0.0001
189 |   l2_reg_dense: 0.0
190 |   dropout_conv: 0.1
191 |   dropout_dense: 0.0
192 | 
193 | # Postprocessing
194 |   classification_threshold: 0.05
195 |   nms_threshold: 0.5
196 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/test_case.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """A convenience wrapper around tf.test.TestCase to enable TPU tests."""
16 | 
17 | import tensorflow as tf
18 | from tensorflow.contrib import tpu
19 | 
20 | flags = tf.app.flags
21 | 
22 | flags.DEFINE_bool('tpu_test', False, 'Whether to configure test for TPU.')
23 | FLAGS = flags.FLAGS
24 | 
25 | 
26 | class TestCase(tf.test.TestCase):
27 |   """Extends tf.test.TestCase to optionally allow running tests on TPU."""
28 | 
29 |   def execute_tpu(self, graph_fn, inputs):
30 |     """Constructs the graph, executes it on TPU and returns the result.
31 | 
32 |     Args:
33 |       graph_fn: a callable that constructs the tensorflow graph to test. The
34 |         arguments of this function should correspond to `inputs`.
35 |       inputs: a list of numpy arrays to feed input to the computation graph.
36 | 
37 |     Returns:
38 |       A list of numpy arrays or a scalar returned from executing the tensorflow
39 |       graph.
40 |     """
41 |     with self.test_session(graph=tf.Graph()) as sess:
42 |       placeholders = [tf.placeholder_with_default(v, v.shape) for v in inputs]
43 |       tpu_computation = tpu.rewrite(graph_fn, placeholders)
44 |       sess.run(tpu.initialize_system())
45 |       sess.run([tf.global_variables_initializer(), tf.tables_initializer(),
46 |                 tf.local_variables_initializer()])
47 |       materialized_results = sess.run(tpu_computation,
48 |                                       feed_dict=dict(list(zip(placeholders, inputs))))
49 |       sess.run(tpu.shutdown_system())
50 |       if (len(materialized_results) == 1
51 |           and (isinstance(materialized_results, list)
52 |                or isinstance(materialized_results, tuple))):
53 |         materialized_results = materialized_results[0]
54 |     return materialized_results
55 | 
56 |   def execute_cpu(self, graph_fn, inputs):
57 |     """Constructs the graph, executes it on CPU and returns the result.
58 | 
59 |     Args:
60 |       graph_fn: a callable that constructs the tensorflow graph to test. The
61 |         arguments of this function should correspond to `inputs`.
62 |       inputs: a list of numpy arrays to feed input to the computation graph.
63 | 
64 |     Returns:
65 |       A list of numpy arrays or a scalar returned from executing the tensorflow
66 |       graph.
67 |     """
68 |     with self.test_session(graph=tf.Graph()) as sess:
69 |       placeholders = [tf.placeholder_with_default(v, v.shape) for v in inputs]
70 |       results = graph_fn(*placeholders)
71 |       sess.run([tf.global_variables_initializer(), tf.tables_initializer(),
72 |                 tf.local_variables_initializer()])
73 |       materialized_results = sess.run(results, feed_dict=dict(list(zip(placeholders,
74 |                                                                   inputs))))
75 |       if (len(materialized_results) == 1
76 |           and (isinstance(materialized_results, list)
77 |                or isinstance(materialized_results, tuple))):
78 |         materialized_results = materialized_results[0]
79 |     return materialized_results
80 | 
81 |   def execute(self, graph_fn, inputs):
82 |     """Constructs the graph, creates a test session and returns the results.
83 | 
84 |     The graph is executed either on TPU or CPU based on the `tpu_test` flag.
85 | 
86 |     Args:
87 |       graph_fn: a callable that constructs the tensorflow graph to test. The
88 |         arguments of this function should correspond to `inputs`.
89 |       inputs: a list of numpy arrays to feed input to the computation graph.
90 | 
91 |     Returns:
92 |       A list of numpy arrays or a scalar returned from executing the tensorflow
93 |       graph.
94 |     """
95 |     if FLAGS.tpu_test:
96 |       return self.execute_tpu(graph_fn, inputs)
97 |     else:
98 |       return self.execute_cpu(graph_fn, inputs)
99 | 


--------------------------------------------------------------------------------
/src/object_detection/metrics/oid_od_challenge_evaluation_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | r"""Converts data from CSV to the OpenImagesDetectionChallengeEvaluator format.
16 | """
17 | 
18 | import os
19 | import sys
20 | 
21 | cwd = os.getcwd()
22 | sys.path.append(os.path.join(cwd, 'src'))
23 | 
24 | from object_detection.core import standard_fields
25 | 
26 | 
27 | def build_groundtruth_boxes_dictionary(data, class_label_map):
28 |     """Builds a groundtruth dictionary from groundtruth data in CSV file.
29 | 
30 |     Args:
31 |       data: Pandas DataFrame with the groundtruth data for a single image.
32 |       class_label_map: Class labelmap from string label name to an integer.
33 | 
34 |     Returns:
35 |       A dictionary with keys suitable for passing to
36 |       OpenImagesDetectionChallengeEvaluator.add_single_ground_truth_image_info:
37 |           standard_fields.InputDataFields.groundtruth_boxes: float32 numpy array
38 |             of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of
39 |             the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
40 |           standard_fields.InputDataFields.groundtruth_classes: integer numpy array
41 |             of shape [num_boxes] containing 1-indexed groundtruth classes for the
42 |             boxes.
43 |           standard_fields.InputDataFields.verified_labels: integer 1D numpy array
44 |             containing all classes for which labels are verified.
45 |           standard_fields.InputDataFields.groundtruth_group_of: Optional length
46 |             M numpy boolean array denoting whether a groundtruth box contains a
47 |             group of instances.
48 |     """
49 | 
50 |     data_boxes = data[data.ConfidenceImageLabel.isnull()]
51 |     data_labels = data[data.XMin.isnull()]
52 | 
53 |     return {
54 |         standard_fields.InputDataFields.groundtruth_boxes:
55 |             data_boxes[['YMin', 'XMin', 'YMax', 'XMax']].as_matrix(),
56 |         standard_fields.InputDataFields.groundtruth_classes:
57 |             data_boxes['LabelName'].map(lambda x: class_label_map[x]).as_matrix(),
58 |         standard_fields.InputDataFields.groundtruth_group_of:
59 |             data_boxes['IsGroupOf'].as_matrix().astype(int),
60 |         standard_fields.InputDataFields.groundtruth_image_classes:
61 |             data_labels['LabelName'].map(lambda x: class_label_map[x]).as_matrix(),
62 |     }
63 | 
64 | 
65 | def build_predictions_dictionary(data, class_label_map):
66 |     """Builds a predictions dictionary from predictions data in CSV file.
67 | 
68 |     Args:
69 |       data: Pandas DataFrame with the predictions data for a single image.
70 |       class_label_map: Class labelmap from string label name to an integer.
71 | 
72 |     Returns:
73 |       Dictionary with keys suitable for passing to
74 |       OpenImagesDetectionChallengeEvaluator.add_single_detected_image_info:
75 |           standard_fields.DetectionResultFields.detection_boxes: float32 numpy
76 |             array of shape [num_boxes, 4] containing `num_boxes` detection boxes
77 |             of the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
78 |           standard_fields.DetectionResultFields.detection_scores: float32 numpy
79 |             array of shape [num_boxes] containing detection scores for the boxes.
80 |           standard_fields.DetectionResultFields.detection_classes: integer numpy
81 |             array of shape [num_boxes] containing 1-indexed detection classes for
82 |             the boxes.
83 | 
84 |     """
85 |     return {
86 |         standard_fields.DetectionResultFields.detection_boxes:
87 |             data[['YMin', 'XMin', 'YMax', 'XMax']].as_matrix(),
88 |         standard_fields.DetectionResultFields.detection_classes:
89 |             data['LabelName'].map(lambda x: class_label_map[x]).as_matrix(),
90 |         standard_fields.DetectionResultFields.detection_scores:
91 |             data['Score'].as_matrix()
92 |     }
93 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/np_mask_ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Operations for [N, height, width] numpy arrays representing masks.
 17 | 
 18 | Example mask operations that are supported:
 19 |   * Areas: compute mask areas
 20 |   * IOU: pairwise intersection-over-union scores
 21 | """
 22 | import numpy as np
 23 | 
 24 | EPSILON = 1e-7
 25 | 
 26 | 
 27 | def area(masks):
 28 |   """Computes area of masks.
 29 | 
 30 |   Args:
 31 |     masks: Numpy array with shape [N, height, width] holding N masks. Masks
 32 |       values are of type np.uint8 and values are in {0,1}.
 33 | 
 34 |   Returns:
 35 |     a numpy array with shape [N*1] representing mask areas.
 36 | 
 37 |   Raises:
 38 |     ValueError: If masks.dtype is not np.uint8
 39 |   """
 40 |   if masks.dtype != np.uint8:
 41 |     raise ValueError('Masks type should be np.uint8')
 42 |   return np.sum(masks, axis=(1, 2), dtype=np.float32)
 43 | 
 44 | 
 45 | def intersection(masks1, masks2):
 46 |   """Compute pairwise intersection areas between masks.
 47 | 
 48 |   Args:
 49 |     masks1: a numpy array with shape [N, height, width] holding N masks. Masks
 50 |       values are of type np.uint8 and values are in {0,1}.
 51 |     masks2: a numpy array with shape [M, height, width] holding M masks. Masks
 52 |       values are of type np.uint8 and values are in {0,1}.
 53 | 
 54 |   Returns:
 55 |     a numpy array with shape [N*M] representing pairwise intersection area.
 56 | 
 57 |   Raises:
 58 |     ValueError: If masks1 and masks2 are not of type np.uint8.
 59 |   """
 60 |   if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
 61 |     raise ValueError('masks1 and masks2 should be of type np.uint8')
 62 |   n = masks1.shape[0]
 63 |   m = masks2.shape[0]
 64 |   answer = np.zeros([n, m], dtype=np.float32)
 65 |   for i in np.arange(n):
 66 |     for j in np.arange(m):
 67 |       answer[i, j] = np.sum(np.minimum(masks1[i], masks2[j]), dtype=np.float32)
 68 |   return answer
 69 | 
 70 | 
 71 | def iou(masks1, masks2):
 72 |   """Computes pairwise intersection-over-union between mask collections.
 73 | 
 74 |   Args:
 75 |     masks1: a numpy array with shape [N, height, width] holding N masks. Masks
 76 |       values are of type np.uint8 and values are in {0,1}.
 77 |     masks2: a numpy array with shape [M, height, width] holding N masks. Masks
 78 |       values are of type np.uint8 and values are in {0,1}.
 79 | 
 80 |   Returns:
 81 |     a numpy array with shape [N, M] representing pairwise iou scores.
 82 | 
 83 |   Raises:
 84 |     ValueError: If masks1 and masks2 are not of type np.uint8.
 85 |   """
 86 |   if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
 87 |     raise ValueError('masks1 and masks2 should be of type np.uint8')
 88 |   intersect = intersection(masks1, masks2)
 89 |   area1 = area(masks1)
 90 |   area2 = area(masks2)
 91 |   union = np.expand_dims(area1, axis=1) + np.expand_dims(
 92 |       area2, axis=0) - intersect
 93 |   return intersect / np.maximum(union, EPSILON)
 94 | 
 95 | 
 96 | def ioa(masks1, masks2):
 97 |   """Computes pairwise intersection-over-area between box collections.
 98 | 
 99 |   Intersection-over-area (ioa) between two masks, mask1 and mask2 is defined as
100 |   their intersection area over mask2's area. Note that ioa is not symmetric,
101 |   that is, IOA(mask1, mask2) != IOA(mask2, mask1).
102 | 
103 |   Args:
104 |     masks1: a numpy array with shape [N, height, width] holding N masks. Masks
105 |       values are of type np.uint8 and values are in {0,1}.
106 |     masks2: a numpy array with shape [M, height, width] holding N masks. Masks
107 |       values are of type np.uint8 and values are in {0,1}.
108 | 
109 |   Returns:
110 |     a numpy array with shape [N, M] representing pairwise ioa scores.
111 | 
112 |   Raises:
113 |     ValueError: If masks1 and masks2 are not of type np.uint8.
114 |   """
115 |   if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
116 |     raise ValueError('masks1 and masks2 should be of type np.uint8')
117 |   intersect = intersection(masks1, masks2)
118 |   areas = np.expand_dims(area(masks2), axis=0)
119 |   return intersect / (areas + EPSILON)
120 | 


--------------------------------------------------------------------------------
/src/object_detection/protos/string_int_label_map_pb2.py:
--------------------------------------------------------------------------------
  1 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
  2 | # source: object_detection/protos/string_int_label_map.proto
  3 | 
  4 | import sys
  5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
  6 | from google.protobuf import descriptor as _descriptor
  7 | from google.protobuf import message as _message
  8 | from google.protobuf import reflection as _reflection
  9 | from google.protobuf import symbol_database as _symbol_database
 10 | from google.protobuf import descriptor_pb2
 11 | # @@protoc_insertion_point(imports)
 12 | 
 13 | _sym_db = _symbol_database.Default()
 14 | 
 15 | 
 16 | 
 17 | 
 18 | DESCRIPTOR = _descriptor.FileDescriptor(
 19 |   name='object_detection/protos/string_int_label_map.proto',
 20 |   package='object_detection.protos',
 21 |   syntax='proto2',
 22 |   serialized_pb=_b('\n2object_detection/protos/string_int_label_map.proto\x12\x17object_detection.protos\"G\n\x15StringIntLabelMapItem\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\x05\x12\x14\n\x0c\x64isplay_name\x18\x03 \x01(\t\"Q\n\x11StringIntLabelMap\x12<\n\x04item\x18\x01 \x03(\x0b\x32..object_detection.protos.StringIntLabelMapItem')
 23 | )
 24 | _sym_db.RegisterFileDescriptor(DESCRIPTOR)
 25 | 
 26 | 
 27 | 
 28 | 
 29 | _STRINGINTLABELMAPITEM = _descriptor.Descriptor(
 30 |   name='StringIntLabelMapItem',
 31 |   full_name='object_detection.protos.StringIntLabelMapItem',
 32 |   filename=None,
 33 |   file=DESCRIPTOR,
 34 |   containing_type=None,
 35 |   fields=[
 36 |     _descriptor.FieldDescriptor(
 37 |       name='name', full_name='object_detection.protos.StringIntLabelMapItem.name', index=0,
 38 |       number=1, type=9, cpp_type=9, label=1,
 39 |       has_default_value=False, default_value=_b("").decode('utf-8'),
 40 |       message_type=None, enum_type=None, containing_type=None,
 41 |       is_extension=False, extension_scope=None,
 42 |       options=None),
 43 |     _descriptor.FieldDescriptor(
 44 |       name='id', full_name='object_detection.protos.StringIntLabelMapItem.id', index=1,
 45 |       number=2, type=5, cpp_type=1, label=1,
 46 |       has_default_value=False, default_value=0,
 47 |       message_type=None, enum_type=None, containing_type=None,
 48 |       is_extension=False, extension_scope=None,
 49 |       options=None),
 50 |     _descriptor.FieldDescriptor(
 51 |       name='display_name', full_name='object_detection.protos.StringIntLabelMapItem.display_name', index=2,
 52 |       number=3, type=9, cpp_type=9, label=1,
 53 |       has_default_value=False, default_value=_b("").decode('utf-8'),
 54 |       message_type=None, enum_type=None, containing_type=None,
 55 |       is_extension=False, extension_scope=None,
 56 |       options=None),
 57 |   ],
 58 |   extensions=[
 59 |   ],
 60 |   nested_types=[],
 61 |   enum_types=[
 62 |   ],
 63 |   options=None,
 64 |   is_extendable=False,
 65 |   syntax='proto2',
 66 |   extension_ranges=[],
 67 |   oneofs=[
 68 |   ],
 69 |   serialized_start=79,
 70 |   serialized_end=150,
 71 | )
 72 | 
 73 | 
 74 | _STRINGINTLABELMAP = _descriptor.Descriptor(
 75 |   name='StringIntLabelMap',
 76 |   full_name='object_detection.protos.StringIntLabelMap',
 77 |   filename=None,
 78 |   file=DESCRIPTOR,
 79 |   containing_type=None,
 80 |   fields=[
 81 |     _descriptor.FieldDescriptor(
 82 |       name='item', full_name='object_detection.protos.StringIntLabelMap.item', index=0,
 83 |       number=1, type=11, cpp_type=10, label=3,
 84 |       has_default_value=False, default_value=[],
 85 |       message_type=None, enum_type=None, containing_type=None,
 86 |       is_extension=False, extension_scope=None,
 87 |       options=None),
 88 |   ],
 89 |   extensions=[
 90 |   ],
 91 |   nested_types=[],
 92 |   enum_types=[
 93 |   ],
 94 |   options=None,
 95 |   is_extendable=False,
 96 |   syntax='proto2',
 97 |   extension_ranges=[],
 98 |   oneofs=[
 99 |   ],
100 |   serialized_start=152,
101 |   serialized_end=233,
102 | )
103 | 
104 | _STRINGINTLABELMAP.fields_by_name['item'].message_type = _STRINGINTLABELMAPITEM
105 | DESCRIPTOR.message_types_by_name['StringIntLabelMapItem'] = _STRINGINTLABELMAPITEM
106 | DESCRIPTOR.message_types_by_name['StringIntLabelMap'] = _STRINGINTLABELMAP
107 | 
108 | StringIntLabelMapItem = _reflection.GeneratedProtocolMessageType('StringIntLabelMapItem', (_message.Message,), dict(
109 |   DESCRIPTOR = _STRINGINTLABELMAPITEM,
110 |   __module__ = 'object_detection.protos.string_int_label_map_pb2'
111 |   # @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMapItem)
112 |   ))
113 | _sym_db.RegisterMessage(StringIntLabelMapItem)
114 | 
115 | StringIntLabelMap = _reflection.GeneratedProtocolMessageType('StringIntLabelMap', (_message.Message,), dict(
116 |   DESCRIPTOR = _STRINGINTLABELMAP,
117 |   __module__ = 'object_detection.protos.string_int_label_map_pb2'
118 |   # @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMap)
119 |   ))
120 | _sym_db.RegisterMessage(StringIntLabelMap)
121 | 
122 | 
123 | # @@protoc_insertion_point(module_scope)
124 | 


--------------------------------------------------------------------------------
/src/object_detection/metrics/oid_od_challenge_evaluation_utils_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for oid_od_challenge_evaluation_util."""
 16 | 
 17 | 
 18 | 
 19 | 
 20 | 
 21 | import numpy as np
 22 | import pandas as pd
 23 | import tensorflow as tf
 24 | from object_detection.core import standard_fields
 25 | from object_detection.metrics import oid_od_challenge_evaluation_utils as utils
 26 | 
 27 | 
 28 | class OidOdChallengeEvaluationUtilTest(tf.test.TestCase):
 29 | 
 30 |   def testBuildGroundtruthDictionary(self):
 31 |     np_data = pd.DataFrame(
 32 |         [['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.3, 0.5, 0.6, 1, None], [
 33 |             'fe58ec1b06db2bb7', '/m/02gy9n', 0.1, 0.2, 0.3, 0.4, 0, None
 34 |         ], ['fe58ec1b06db2bb7', '/m/04bcr3', None, None, None, None, None, 1], [
 35 |             'fe58ec1b06db2bb7', '/m/083vt', None, None, None, None, None, 0
 36 |         ], ['fe58ec1b06db2bb7', '/m/02gy9n', None, None, None, None, None, 1]],
 37 |         columns=[
 38 |             'ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax', 'IsGroupOf',
 39 |             'ConfidenceImageLabel'
 40 |         ])
 41 |     class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3}
 42 |     groundtruth_dictionary = utils.build_groundtruth_boxes_dictionary(
 43 |         np_data, class_label_map)
 44 | 
 45 |     self.assertTrue(standard_fields.InputDataFields.groundtruth_boxes in
 46 |                     groundtruth_dictionary)
 47 |     self.assertTrue(standard_fields.InputDataFields.groundtruth_classes in
 48 |                     groundtruth_dictionary)
 49 |     self.assertTrue(standard_fields.InputDataFields.groundtruth_group_of in
 50 |                     groundtruth_dictionary)
 51 |     self.assertTrue(standard_fields.InputDataFields.groundtruth_image_classes in
 52 |                     groundtruth_dictionary)
 53 | 
 54 |     self.assertAllEqual(
 55 |         np.array([1, 3]), groundtruth_dictionary[
 56 |             standard_fields.InputDataFields.groundtruth_classes])
 57 |     self.assertAllEqual(
 58 |         np.array([1, 0]), groundtruth_dictionary[
 59 |             standard_fields.InputDataFields.groundtruth_group_of])
 60 | 
 61 |     expected_boxes_data = np.array([[0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2]])
 62 | 
 63 |     self.assertNDArrayNear(
 64 |         expected_boxes_data, groundtruth_dictionary[
 65 |             standard_fields.InputDataFields.groundtruth_boxes], 1e-5)
 66 |     self.assertAllEqual(
 67 |         np.array([1, 2, 3]), groundtruth_dictionary[
 68 |             standard_fields.InputDataFields.groundtruth_image_classes])
 69 | 
 70 |   def testBuildPredictionDictionary(self):
 71 |     np_data = pd.DataFrame(
 72 |         [['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.3, 0.5, 0.6, 0.1], [
 73 |             'fe58ec1b06db2bb7', '/m/02gy9n', 0.1, 0.2, 0.3, 0.4, 0.2
 74 |         ], ['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.1, 0.2, 0.3, 0.3]],
 75 |         columns=[
 76 |             'ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax', 'Score'
 77 |         ])
 78 |     class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3}
 79 |     prediction_dictionary = utils.build_predictions_dictionary(
 80 |         np_data, class_label_map)
 81 | 
 82 |     self.assertTrue(standard_fields.DetectionResultFields.detection_boxes in
 83 |                     prediction_dictionary)
 84 |     self.assertTrue(standard_fields.DetectionResultFields.detection_classes in
 85 |                     prediction_dictionary)
 86 |     self.assertTrue(standard_fields.DetectionResultFields.detection_scores in
 87 |                     prediction_dictionary)
 88 | 
 89 |     self.assertAllEqual(
 90 |         np.array([1, 3, 1]), prediction_dictionary[
 91 |             standard_fields.DetectionResultFields.detection_classes])
 92 |     expected_boxes_data = np.array([[0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2],
 93 |                                     [0.2, 0.0, 0.3, 0.1]])
 94 |     self.assertNDArrayNear(
 95 |         expected_boxes_data, prediction_dictionary[
 96 |             standard_fields.DetectionResultFields.detection_boxes], 1e-5)
 97 |     self.assertNDArrayNear(
 98 |         np.array([0.1, 0.2, 0.3]), prediction_dictionary[
 99 |             standard_fields.DetectionResultFields.detection_scores], 1e-5)
100 | 
101 | 
102 | if __name__ == '__main__':
103 |   tf.test.main()
104 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/np_box_list.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Numpy BoxList classes and functions."""
 17 | 
 18 | import numpy as np
 19 | 
 20 | 
 21 | class BoxList(object):
 22 |   """Box collection.
 23 | 
 24 |   BoxList represents a list of bounding boxes as numpy array, where each
 25 |   bounding box is represented as a row of 4 numbers,
 26 |   [y_min, x_min, y_max, x_max].  It is assumed that all bounding boxes within a
 27 |   given list correspond to a single image.
 28 | 
 29 |   Optionally, users can add additional related fields (such as
 30 |   objectness/classification scores).
 31 |   """
 32 | 
 33 |   def __init__(self, data):
 34 |     """Constructs box collection.
 35 | 
 36 |     Args:
 37 |       data: a numpy array of shape [N, 4] representing box coordinates
 38 | 
 39 |     Raises:
 40 |       ValueError: if bbox data is not a numpy array
 41 |       ValueError: if invalid dimensions for bbox data
 42 |     """
 43 |     if not isinstance(data, np.ndarray):
 44 |       raise ValueError('data must be a numpy array.')
 45 |     if len(data.shape) != 2 or data.shape[1] != 4:
 46 |       raise ValueError('Invalid dimensions for box data.')
 47 |     if data.dtype != np.float32 and data.dtype != np.float64:
 48 |       raise ValueError('Invalid data type for box data: float is required.')
 49 |     if not self._is_valid_boxes(data):
 50 |       raise ValueError('Invalid box data. data must be a numpy array of '
 51 |                        'N*[y_min, x_min, y_max, x_max]')
 52 |     self.data = {'boxes': data}
 53 | 
 54 |   def num_boxes(self):
 55 |     """Return number of boxes held in collections."""
 56 |     return self.data['boxes'].shape[0]
 57 | 
 58 |   def get_extra_fields(self):
 59 |     """Return all non-box fields."""
 60 |     return [k for k in list(self.data.keys()) if k != 'boxes']
 61 | 
 62 |   def has_field(self, field):
 63 |     return field in self.data
 64 | 
 65 |   def add_field(self, field, field_data):
 66 |     """Add data to a specified field.
 67 | 
 68 |     Args:
 69 |       field: a string parameter used to speficy a related field to be accessed.
 70 |       field_data: a numpy array of [N, ...] representing the data associated
 71 |           with the field.
 72 |     Raises:
 73 |       ValueError: if the field is already exist or the dimension of the field
 74 |           data does not matches the number of boxes.
 75 |     """
 76 |     if self.has_field(field):
 77 |       raise ValueError('Field ' + field + 'already exists')
 78 |     if len(field_data.shape) < 1 or field_data.shape[0] != self.num_boxes():
 79 |       raise ValueError('Invalid dimensions for field data')
 80 |     self.data[field] = field_data
 81 | 
 82 |   def get(self):
 83 |     """Convenience function for accesssing box coordinates.
 84 | 
 85 |     Returns:
 86 |       a numpy array of shape [N, 4] representing box corners
 87 |     """
 88 |     return self.get_field('boxes')
 89 | 
 90 |   def get_field(self, field):
 91 |     """Accesses data associated with the specified field in the box collection.
 92 | 
 93 |     Args:
 94 |       field: a string parameter used to speficy a related field to be accessed.
 95 | 
 96 |     Returns:
 97 |       a numpy 1-d array representing data of an associated field
 98 | 
 99 |     Raises:
100 |       ValueError: if invalid field
101 |     """
102 |     if not self.has_field(field):
103 |       raise ValueError('field {} does not exist'.format(field))
104 |     return self.data[field]
105 | 
106 |   def get_coordinates(self):
107 |     """Get corner coordinates of boxes.
108 | 
109 |     Returns:
110 |      a list of 4 1-d numpy arrays [y_min, x_min, y_max, x_max]
111 |     """
112 |     box_coordinates = self.get()
113 |     y_min = box_coordinates[:, 0]
114 |     x_min = box_coordinates[:, 1]
115 |     y_max = box_coordinates[:, 2]
116 |     x_max = box_coordinates[:, 3]
117 |     return [y_min, x_min, y_max, x_max]
118 | 
119 |   def _is_valid_boxes(self, data):
120 |     """Check whether data fullfills the format of N*[ymin, xmin, ymax, xmin].
121 | 
122 |     Args:
123 |       data: a numpy array of shape [N, 4] representing box coordinates
124 | 
125 |     Returns:
126 |       a boolean indicating whether all ymax of boxes are equal or greater than
127 |           ymin, and all xmax of boxes are equal or greater than xmin.
128 |     """
129 |     if data.shape[0] > 0:
130 |       for i in range(data.shape[0]):
131 |         if data[i, 0] > data[i, 2] or data[i, 1] > data[i, 3]:
132 |           return False
133 |     return True
134 | 


--------------------------------------------------------------------------------
/src/object_detection/dataset_tools/oid_tfrecord_creation.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | r"""Utilities for creating TFRecords of TF examples for the Open Images dataset.
 16 | """
 17 | 
 18 | 
 19 | 
 20 | 
 21 | import tensorflow as tf
 22 | 
 23 | from object_detection.core import standard_fields
 24 | from object_detection.utils import dataset_util
 25 | 
 26 | 
 27 | def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
 28 |                                            encoded_image):
 29 |   """Populates a TF Example message with image annotations from a data frame.
 30 | 
 31 |   Args:
 32 |     annotations_data_frame: Data frame containing the annotations for a single
 33 |       image.
 34 |     label_map: String to integer label map.
 35 |     encoded_image: The encoded image string
 36 | 
 37 |   Returns:
 38 |     The populated TF Example, if the label of at least one object is present in
 39 |     label_map. Otherwise, returns None.
 40 |   """
 41 | 
 42 |   filtered_data_frame = annotations_data_frame[
 43 |       annotations_data_frame.LabelName.isin(label_map)]
 44 |   filtered_data_frame_boxes = filtered_data_frame[
 45 |       ~filtered_data_frame.YMin.isnull()]
 46 |   filtered_data_frame_labels = filtered_data_frame[
 47 |       filtered_data_frame.YMin.isnull()]
 48 |   image_id = annotations_data_frame.ImageID.iloc[0]
 49 | 
 50 |   feature_map = {
 51 |       standard_fields.TfExampleFields.object_bbox_ymin:
 52 |           dataset_util.float_list_feature(
 53 |               filtered_data_frame_boxes.YMin.as_matrix()),
 54 |       standard_fields.TfExampleFields.object_bbox_xmin:
 55 |           dataset_util.float_list_feature(
 56 |               filtered_data_frame_boxes.XMin.as_matrix()),
 57 |       standard_fields.TfExampleFields.object_bbox_ymax:
 58 |           dataset_util.float_list_feature(
 59 |               filtered_data_frame_boxes.YMax.as_matrix()),
 60 |       standard_fields.TfExampleFields.object_bbox_xmax:
 61 |           dataset_util.float_list_feature(
 62 |               filtered_data_frame_boxes.XMax.as_matrix()),
 63 |       standard_fields.TfExampleFields.object_class_text:
 64 |           dataset_util.bytes_list_feature(
 65 |               filtered_data_frame_boxes.LabelName.as_matrix()),
 66 |       standard_fields.TfExampleFields.object_class_label:
 67 |           dataset_util.int64_list_feature(
 68 |               filtered_data_frame_boxes.LabelName.map(lambda x: label_map[x])
 69 |               .as_matrix()),
 70 |       standard_fields.TfExampleFields.filename:
 71 |           dataset_util.bytes_feature('{}.jpg'.format(image_id)),
 72 |       standard_fields.TfExampleFields.source_id:
 73 |           dataset_util.bytes_feature(image_id),
 74 |       standard_fields.TfExampleFields.image_encoded:
 75 |           dataset_util.bytes_feature(encoded_image),
 76 |   }
 77 | 
 78 |   if 'IsGroupOf' in filtered_data_frame.columns:
 79 |     feature_map[standard_fields.TfExampleFields.
 80 |                 object_group_of] = dataset_util.int64_list_feature(
 81 |                     filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(int))
 82 |   if 'IsOccluded' in filtered_data_frame.columns:
 83 |     feature_map[standard_fields.TfExampleFields.
 84 |                 object_occluded] = dataset_util.int64_list_feature(
 85 |                     filtered_data_frame_boxes.IsOccluded.as_matrix().astype(
 86 |                         int))
 87 |   if 'IsTruncated' in filtered_data_frame.columns:
 88 |     feature_map[standard_fields.TfExampleFields.
 89 |                 object_truncated] = dataset_util.int64_list_feature(
 90 |                     filtered_data_frame_boxes.IsTruncated.as_matrix().astype(
 91 |                         int))
 92 |   if 'IsDepiction' in filtered_data_frame.columns:
 93 |     feature_map[standard_fields.TfExampleFields.
 94 |                 object_depiction] = dataset_util.int64_list_feature(
 95 |                     filtered_data_frame_boxes.IsDepiction.as_matrix().astype(
 96 |                         int))
 97 | 
 98 |   if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns:
 99 |     feature_map[standard_fields.TfExampleFields.
100 |                 image_class_label] = dataset_util.int64_list_feature(
101 |                     filtered_data_frame_labels.LabelName.map(
102 |                         lambda x: label_map[x]).as_matrix())
103 |     feature_map[standard_fields.TfExampleFields.
104 |                 image_class_text] = dataset_util.bytes_list_feature(
105 |                     filtered_data_frame_labels.LabelName.as_matrix()),
106 |   return tf.train.Example(features=tf.train.Features(feature=feature_map))
107 | 


--------------------------------------------------------------------------------
/src/object_detection/dataset_tools/create_oid_tf_record.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | r"""Creates TFRecords of Open Images dataset for object detection.
 16 | 
 17 | Example usage:
 18 |   python object_detection/dataset_tools/create_oid_tf_record.py \
 19 |     --input_box_annotations_csv=/path/to/input/annotations-human-bbox.csv \
 20 |     --input_image_label_annotations_csv=/path/to/input/annotations-label.csv \
 21 |     --input_images_directory=/path/to/input/image_pixels_directory \
 22 |     --input_label_map=/path/to/input/labels_bbox_545.labelmap \
 23 |     --output_tf_record_path_prefix=/path/to/output/prefix.tfrecord
 24 | 
 25 | CSVs with bounding box annotations and image metadata (including the image URLs)
 26 | can be downloaded from the Open Images GitHub repository:
 27 | https://github.com/openimages/dataset
 28 | 
 29 | This script will include every image found in the input_images_directory in the
 30 | output TFRecord, even if the image has no corresponding bounding box annotations
 31 | in the input_annotations_csv. If input_image_label_annotations_csv is specified,
 32 | it will add image-level labels as well. Note that the information of whether a
 33 | label is positivelly or negativelly verified is NOT added to tfrecord.
 34 | """
 35 | 
 36 | 
 37 | 
 38 | 
 39 | import os
 40 | 
 41 | import contextlib2
 42 | import pandas as pd
 43 | import tensorflow as tf
 44 | 
 45 | from object_detection.dataset_tools import oid_tfrecord_creation
 46 | from object_detection.dataset_tools import tf_record_creation_util
 47 | from object_detection.utils import label_map_util
 48 | 
 49 | tf.flags.DEFINE_string('input_box_annotations_csv', None,
 50 |                        'Path to CSV containing image bounding box annotations')
 51 | tf.flags.DEFINE_string('input_images_directory', None,
 52 |                        'Directory containing the image pixels '
 53 |                        'downloaded from the OpenImages GitHub repository.')
 54 | tf.flags.DEFINE_string('input_image_label_annotations_csv', None,
 55 |                        'Path to CSV containing image-level labels annotations')
 56 | tf.flags.DEFINE_string('input_label_map', None, 'Path to the label map proto')
 57 | tf.flags.DEFINE_string(
 58 |     'output_tf_record_path_prefix', None,
 59 |     'Path to the output TFRecord. The shard index and the number of shards '
 60 |     'will be appended for each output shard.')
 61 | tf.flags.DEFINE_integer('num_shards', 100, 'Number of TFRecord shards')
 62 | 
 63 | FLAGS = tf.flags.FLAGS
 64 | 
 65 | 
 66 | def main(_):
 67 |   tf.logging.set_verbosity(tf.logging.INFO)
 68 | 
 69 |   required_flags = [
 70 |       'input_box_annotations_csv', 'input_images_directory', 'input_label_map',
 71 |       'output_tf_record_path_prefix'
 72 |   ]
 73 |   for flag_name in required_flags:
 74 |     if not getattr(FLAGS, flag_name):
 75 |       raise ValueError('Flag --{} is required'.format(flag_name))
 76 | 
 77 |   label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map)
 78 |   all_box_annotations = pd.read_csv(FLAGS.input_box_annotations_csv)
 79 |   if FLAGS.input_image_label_annotations_csv:
 80 |     all_label_annotations = pd.read_csv(FLAGS.input_image_label_annotations_csv)
 81 |     all_label_annotations.rename(
 82 |         columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True)
 83 |   else:
 84 |     all_label_annotations = None
 85 |   all_images = tf.gfile.Glob(
 86 |       os.path.join(FLAGS.input_images_directory, '*.jpg'))
 87 |   all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images]
 88 |   all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
 89 |   all_annotations = pd.concat(
 90 |       [all_box_annotations, all_image_ids, all_label_annotations])
 91 | 
 92 |   tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))
 93 | 
 94 |   with contextlib2.ExitStack() as tf_record_close_stack:
 95 |     output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
 96 |         tf_record_close_stack, FLAGS.output_tf_record_path_prefix,
 97 |         FLAGS.num_shards)
 98 | 
 99 |     for counter, image_data in enumerate(all_annotations.groupby('ImageID')):
100 |       tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000,
101 |                              counter)
102 | 
103 |       image_id, image_annotations = image_data
104 |       # In OID image file names are formed by appending ".jpg" to the image ID.
105 |       image_path = os.path.join(FLAGS.input_images_directory, image_id + '.jpg')
106 |       with tf.gfile.Open(image_path) as image_file:
107 |         encoded_image = image_file.read()
108 | 
109 |       tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
110 |           image_annotations, label_map, encoded_image)
111 |       if tf_example:
112 |         shard_idx = int(image_id, 16) % FLAGS.num_shards
113 |         output_tfrecords[shard_idx].write(tf_example.SerializeToString())
114 | 
115 | 
116 | if __name__ == '__main__':
117 |   tf.app.run()
118 | 


--------------------------------------------------------------------------------
/src/object_detection/metrics/oid_od_challenge_evaluation.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | r"""Runs evaluation using OpenImages groundtruth and predictions.
 16 | 
 17 | Example usage:
 18 | python models/research/object_detection/metrics/oid_od_challenge_evaluation.py \
 19 |     --input_annotations_boxes=/path/to/input/annotations-human-bbox.csv \
 20 |     --input_annotations_labels=/path/to/input/annotations-label.csv \
 21 |     --input_class_labelmap=/path/to/input/class_labelmap.pbtxt \
 22 |     --input_predictions=/path/to/input/predictions.csv \
 23 |     --output_metrics=/path/to/output/metric.csv \
 24 | 
 25 | CSVs with bounding box annotations and image label (including the image URLs)
 26 | can be downloaded from the Open Images Challenge website:
 27 | https://storage.googleapis.com/openimages/web/challenge.html
 28 | The format of the input csv and the metrics itself are described on the
 29 | challenge website.
 30 | """
 31 | 
 32 | import os
 33 | import sys
 34 | 
 35 | cwd = os.getcwd()
 36 | sys.path.append(os.path.join(cwd, 'src'))
 37 | 
 38 | import argparse
 39 | import pandas as pd
 40 | from google.protobuf import text_format
 41 | 
 42 | from object_detection.metrics import io_utils
 43 | from object_detection.metrics import oid_od_challenge_evaluation_utils as utils
 44 | from object_detection.protos import string_int_label_map_pb2
 45 | from object_detection.utils import object_detection_evaluation
 46 | 
 47 | 
 48 | def _load_labelmap(labelmap_path):
 49 |     """Loads labelmap from the labelmap path.
 50 | 
 51 |     Args:
 52 |       labelmap_path: Path to the labelmap.
 53 | 
 54 |     Returns:
 55 |       A dictionary mapping class name to class numerical id
 56 |       A list with dictionaries, one dictionary per category.
 57 |     """
 58 | 
 59 |     label_map = string_int_label_map_pb2.StringIntLabelMap()
 60 |     with open(labelmap_path, 'r') as fid:
 61 |         label_map_string = fid.read()
 62 |         text_format.Merge(label_map_string, label_map)
 63 |     labelmap_dict = {}
 64 |     categories = []
 65 |     for item in label_map.item:
 66 |         labelmap_dict[item.name] = item.id
 67 |         categories.append({'id': item.id, 'name': item.name})
 68 |     return labelmap_dict, categories
 69 | 
 70 | 
 71 | def main(parsed_args):
 72 |     all_box_annotations = pd.read_csv(parsed_args.input_annotations_boxes)
 73 |     all_label_annotations = pd.read_csv(parsed_args.input_annotations_labels)
 74 | 
 75 |     all_label_annotations.rename(
 76 |         columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True)
 77 |     all_annotations = pd.concat([all_box_annotations, all_label_annotations])
 78 | 
 79 |     class_label_map, categories = _load_labelmap(parsed_args.input_class_labelmap)
 80 |     challenge_evaluator = (
 81 |         object_detection_evaluation.OpenImagesDetectionChallengeEvaluator(
 82 |             categories))
 83 | 
 84 |     for _, groundtruth in enumerate(all_annotations.groupby('ImageID')):
 85 |         image_id, image_groundtruth = groundtruth
 86 |         groundtruth_dictionary = utils.build_groundtruth_boxes_dictionary(
 87 |             image_groundtruth, class_label_map)
 88 |         challenge_evaluator.add_single_ground_truth_image_info(
 89 |             image_id, groundtruth_dictionary)
 90 | 
 91 |     all_predictions = pd.read_csv(parsed_args.input_predictions)
 92 |     for _, prediction_data in enumerate(all_predictions.groupby('ImageID')):
 93 |         image_id, image_predictions = prediction_data
 94 |         prediction_dictionary = utils.build_predictions_dictionary(
 95 |             image_predictions, class_label_map)
 96 |         challenge_evaluator.add_single_detected_image_info(image_id,
 97 |                                                            prediction_dictionary)
 98 | 
 99 |     metrics = challenge_evaluator.evaluate()
100 | 
101 |     with open(parsed_args.output_metrics, 'w') as fid:
102 |         io_utils.write_csv(fid, metrics)
103 | 
104 | 
105 | if __name__ == '__main__':
106 |     parser = argparse.ArgumentParser(
107 |         description='Evaluate Open Images Object Detection Challenge predictions.'
108 |     )
109 |     parser.add_argument(
110 |         '--input_annotations_boxes',
111 |         required=True,
112 |         help='File with groundtruth boxes annotations.')
113 |     parser.add_argument(
114 |         '--input_annotations_labels',
115 |         required=True,
116 |         help='File with groundtruth labels annotations')
117 |     parser.add_argument(
118 |         '--input_predictions',
119 |         required=True,
120 |         help="""File with detection predictions; NOTE: no postprocessing is
121 |       applied in the evaluation script.""")
122 |     parser.add_argument(
123 |         '--input_class_labelmap',
124 |         required=True,
125 |         help='Open Images Challenge labelmap.')
126 |     parser.add_argument(
127 |         '--output_metrics', required=True, help='Output file with csv metrics')
128 | 
129 |     args = parser.parse_args()
130 |     main(args)
131 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/np_box_list_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Tests for object_detection.utils.np_box_list_test."""
 17 | 
 18 | import numpy as np
 19 | import tensorflow as tf
 20 | 
 21 | from object_detection.utils import np_box_list
 22 | 
 23 | 
 24 | class BoxListTest(tf.test.TestCase):
 25 | 
 26 |   def test_invalid_box_data(self):
 27 |     with self.assertRaises(ValueError):
 28 |       np_box_list.BoxList([0, 0, 1, 1])
 29 | 
 30 |     with self.assertRaises(ValueError):
 31 |       np_box_list.BoxList(np.array([[0, 0, 1, 1]], dtype=int))
 32 | 
 33 |     with self.assertRaises(ValueError):
 34 |       np_box_list.BoxList(np.array([0, 1, 1, 3, 4], dtype=float))
 35 | 
 36 |     with self.assertRaises(ValueError):
 37 |       np_box_list.BoxList(np.array([[0, 1, 1, 3], [3, 1, 1, 5]], dtype=float))
 38 | 
 39 |   def test_has_field_with_existed_field(self):
 40 |     boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
 41 |                       [0.0, 0.0, 20.0, 20.0]],
 42 |                      dtype=float)
 43 |     boxlist = np_box_list.BoxList(boxes)
 44 |     self.assertTrue(boxlist.has_field('boxes'))
 45 | 
 46 |   def test_has_field_with_nonexisted_field(self):
 47 |     boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
 48 |                       [0.0, 0.0, 20.0, 20.0]],
 49 |                      dtype=float)
 50 |     boxlist = np_box_list.BoxList(boxes)
 51 |     self.assertFalse(boxlist.has_field('scores'))
 52 | 
 53 |   def test_get_field_with_existed_field(self):
 54 |     boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
 55 |                       [0.0, 0.0, 20.0, 20.0]],
 56 |                      dtype=float)
 57 |     boxlist = np_box_list.BoxList(boxes)
 58 |     self.assertTrue(np.allclose(boxlist.get_field('boxes'), boxes))
 59 | 
 60 |   def test_get_field_with_nonexited_field(self):
 61 |     boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
 62 |                       [0.0, 0.0, 20.0, 20.0]],
 63 |                      dtype=float)
 64 |     boxlist = np_box_list.BoxList(boxes)
 65 |     with self.assertRaises(ValueError):
 66 |       boxlist.get_field('scores')
 67 | 
 68 | 
 69 | class AddExtraFieldTest(tf.test.TestCase):
 70 | 
 71 |   def setUp(self):
 72 |     boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
 73 |                       [0.0, 0.0, 20.0, 20.0]],
 74 |                      dtype=float)
 75 |     self.boxlist = np_box_list.BoxList(boxes)
 76 | 
 77 |   def test_add_already_existed_field(self):
 78 |     with self.assertRaises(ValueError):
 79 |       self.boxlist.add_field('boxes', np.array([[0, 0, 0, 1, 0]], dtype=float))
 80 | 
 81 |   def test_add_invalid_field_data(self):
 82 |     with self.assertRaises(ValueError):
 83 |       self.boxlist.add_field('scores', np.array([0.5, 0.7], dtype=float))
 84 |     with self.assertRaises(ValueError):
 85 |       self.boxlist.add_field('scores',
 86 |                              np.array([0.5, 0.7, 0.9, 0.1], dtype=float))
 87 | 
 88 |   def test_add_single_dimensional_field_data(self):
 89 |     boxlist = self.boxlist
 90 |     scores = np.array([0.5, 0.7, 0.9], dtype=float)
 91 |     boxlist.add_field('scores', scores)
 92 |     self.assertTrue(np.allclose(scores, self.boxlist.get_field('scores')))
 93 | 
 94 |   def test_add_multi_dimensional_field_data(self):
 95 |     boxlist = self.boxlist
 96 |     labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]],
 97 |                       dtype=int)
 98 |     boxlist.add_field('labels', labels)
 99 |     self.assertTrue(np.allclose(labels, self.boxlist.get_field('labels')))
100 | 
101 |   def test_get_extra_fields(self):
102 |     boxlist = self.boxlist
103 |     self.assertItemsEqual(boxlist.get_extra_fields(), [])
104 | 
105 |     scores = np.array([0.5, 0.7, 0.9], dtype=float)
106 |     boxlist.add_field('scores', scores)
107 |     self.assertItemsEqual(boxlist.get_extra_fields(), ['scores'])
108 | 
109 |     labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]],
110 |                       dtype=int)
111 |     boxlist.add_field('labels', labels)
112 |     self.assertItemsEqual(boxlist.get_extra_fields(), ['scores', 'labels'])
113 | 
114 |   def test_get_coordinates(self):
115 |     y_min, x_min, y_max, x_max = self.boxlist.get_coordinates()
116 | 
117 |     expected_y_min = np.array([3.0, 14.0, 0.0], dtype=float)
118 |     expected_x_min = np.array([4.0, 14.0, 0.0], dtype=float)
119 |     expected_y_max = np.array([6.0, 15.0, 20.0], dtype=float)
120 |     expected_x_max = np.array([8.0, 15.0, 20.0], dtype=float)
121 | 
122 |     self.assertTrue(np.allclose(y_min, expected_y_min))
123 |     self.assertTrue(np.allclose(x_min, expected_x_min))
124 |     self.assertTrue(np.allclose(y_max, expected_y_max))
125 |     self.assertTrue(np.allclose(x_max, expected_x_max))
126 | 
127 |   def test_num_boxes(self):
128 |     boxes = np.array([[0., 0., 100., 100.], [10., 30., 50., 70.]], dtype=float)
129 |     boxlist = np_box_list.BoxList(boxes)
130 |     expected_num_boxes = 2
131 |     self.assertEqual(boxlist.num_boxes(), expected_num_boxes)
132 | 
133 | 
134 | if __name__ == '__main__':
135 |   tf.test.main()
136 | 


--------------------------------------------------------------------------------
/notebooks/submission_merge.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "\n",
 11 |     "from tqdm import tqdm_notebook as tqdm\n",
 12 |     "import glob\n",
 13 |     "from collections import defaultdict\n",
 14 |     "from typing import Sequence\n",
 15 |     "import pandas as pd\n",
 16 |     "\n",
 17 |     "EXPERIMENT_DIRPATH = 'PATH/TO/models'\n",
 18 |     "SAMPLE_SUBMISSION_FILEPATH = 'PATH/TO/data/sample_submission.csv'"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "def merge_submissions(submissions: Sequence[pd.DataFrame]) -> pd.DataFrame:\n",
 28 |     "    sub_dict = defaultdict(lambda: [])\n",
 29 |     "    merged_sub = defaultdict(lambda: [])\n",
 30 |     "    for sub in submissions:\n",
 31 |     "        for _, row in sub.iterrows():\n",
 32 |     "            pred_string = str(row['PredictionString'])\n",
 33 |     "            sub_dict[row['ImageId']]\n",
 34 |     "            if 'nan' not in pred_string:\n",
 35 |     "                sub_dict[row['ImageId']].append(pred_string)\n",
 36 |     "    for key, value in sub_dict.items():\n",
 37 |     "        merged_sub['ImageId'].append(key)\n",
 38 |     "        merged_sub['PredictionString'].append(' '.join(value))\n",
 39 |     "    final_sub = pd.DataFrame.from_dict(merged_sub)\n",
 40 |     "    return final_sub"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "batch_submissions = []\n",
 50 |     "for filepath in glob.glob('{}/batch_*/submission.csv'.format(EXPERIMENT_DIRPATH)):\n",
 51 |     "    print('Processing {}'.format(filepath))\n",
 52 |     "    batch_submission = pd.read_csv(filepath)\n",
 53 |     "    batch_submissions.append(batch_submission)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "submission = merge_submissions(batch_submissions)\n",
 63 |     "submission.head()"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "# Fix those that have bbox with xmin <= xmax and stuff"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "def chunker(seq, size):\n",
 80 |     "    return (seq[pos:pos + size] for pos in range(0, len(seq), size))\n",
 81 |     "\n",
 82 |     "def clean_submission(submission):\n",
 83 |     "    ids_cleaned , predictions_cleaned = [], []\n",
 84 |     "    for i, row in tqdm(submission.iterrows(), total = len(submission)):\n",
 85 |     "        img_predictions = row.PredictionString.split(' ')\n",
 86 |     "        if img_predictions == ['']:\n",
 87 |     "            continue\n",
 88 |     "        else:\n",
 89 |     "            img_predictions_cleaned = []\n",
 90 |     "            for pred in chunker(img_predictions,size=6):\n",
 91 |     "                label,score,x1,y1,x2,y2 = pred\n",
 92 |     "                x1,y1,x2,y2 = float(x1),float(y1),float(x2),float(y2)\n",
 93 |     "                if x2 <= x1 or y2 <= y1:\n",
 94 |     "                    continue\n",
 95 |     "                else:\n",
 96 |     "                    img_predictions_cleaned.extend(pred)\n",
 97 |     "            img_predictions_cleaned = ' '.join(img_predictions_cleaned)\n",
 98 |     "            predictions_cleaned.append(img_predictions_cleaned)\n",
 99 |     "            ids_cleaned.append(row.ImageId)\n",
100 |     "    submission_fixed = pd.DataFrame({'ImageId': ids_cleaned, \n",
101 |     "                                     'PredictionString':predictions_cleaned})\n",
102 |     "    return submission_fixed"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "submission = clean_submission(submission)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "def add_missing_image_ids(submission, sample_submission):\n",
121 |     "    submission['ImageId'] = submission['ImageId'].astype(str)\n",
122 |     "    sample_submission['ImageId'] = sample_submission['ImageId'].astype(str)\n",
123 |     "    fixed_submission = pd.merge(sample_submission[['ImageId']], submission, on=['ImageId'], how='outer')\n",
124 |     "    return fixed_submission"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": [
133 |     "sample_submission = pd.read_csv(SAMPLE_SUBMISSION_FILEPATH)\n",
134 |     "submission = add_missing_image_ids(submission, sample_submission)"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "submission.to_csv(os.path.join(EXPERIMENT_DIRPATH,'merged_submission.csv'), index=False)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": []
152 |   }
153 |  ],
154 |  "metadata": {
155 |   "kernelspec": {
156 |    "display_name": "cpu py3",
157 |    "language": "python",
158 |    "name": "cpu_py3"
159 |   },
160 |   "language_info": {
161 |    "codemirror_mode": {
162 |     "name": "ipython",
163 |     "version": 3
164 |    },
165 |    "file_extension": ".py",
166 |    "mimetype": "text/x-python",
167 |    "name": "python",
168 |    "nbconvert_exporter": "python",
169 |    "pygments_lexer": "ipython3",
170 |    "version": "3.5.2"
171 |   }
172 |  },
173 |  "nbformat": 4,
174 |  "nbformat_minor": 2
175 | }
176 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import click
  2 | from src.pipeline_manager import PipelineManager
  3 | 
  4 | pipeline_manager = PipelineManager()
  5 | 
  6 | 
  7 | @click.group()
  8 | def main():
  9 |     pass
 10 | 
 11 | 
 12 | @main.command()
 13 | def prepare_metadata():
 14 |     pipeline_manager.prepare_metadata()
 15 | 
 16 | 
 17 | @main.command()
 18 | @click.option('-p', '--pipeline_name', help='pipeline to be trained', required=True)
 19 | @click.option('-d', '--dev_mode', help='if true only a small sample of data will be used', is_flag=True, required=False)
 20 | def train(pipeline_name, dev_mode):
 21 |     pipeline_manager.train(pipeline_name, dev_mode)
 22 | 
 23 | 
 24 | @main.command()
 25 | @click.option('-p', '--pipeline_name', help='pipeline to be trained', required=True)
 26 | @click.option('-d', '--image_dir', help='path to image folder with test files', required=False)
 27 | @click.option('-s', '--single_image', help='predict single image and visualize', required=False)
 28 | @click.option('-n', '--n_files', help='number of files to visualize', type=int, default=16)
 29 | @click.option('-sp', '--show_popups', help="if showing images in a popup window")
 30 | @click.option('-cl', '--classes_to_visualize',
 31 |               help="Reduce the vis to subset of classes (string with comma separated classes)")
 32 | @click.option('-clsthr', '--classification_threshold', type=float)
 33 | @click.option('-nmsthr', '--nms_threshold', type=float)
 34 | def visualize(pipeline_name, image_dir=None, single_image=None, n_files=16, show_popups=False,
 35 |               classes_to_visualize=None, nms_threshold=None, classification_threshold=None):
 36 |     """
 37 |     Makes predictions on test_images (or images in image_dir) draw bounding boxes on them and sends to neptune.
 38 |     If show pop_ups it will display the predictions locally in a window popup
 39 |     Example:
 40 |         neptune run --config "./configs/neptune_config_local.yaml" main.py -- \
 41 |          visualize --pipeline_name retinanet --classes_to_visualize='Picture frame,Cat'
 42 |     """
 43 |     if classes_to_visualize:
 44 |         classes_to_visualize = classes_to_visualize.split(',')
 45 |         classes_to_visualize = list(filter(None, classes_to_visualize))  # filter out empty strings
 46 | 
 47 |     pipeline_manager.visualize(pipeline_name, image_dir, single_image,
 48 |                                n_files, show_popups, classes_to_visualize,
 49 |                                nms_threshold, classification_threshold)
 50 | 
 51 | 
 52 | @main.command()
 53 | @click.option('-p', '--pipeline_name', help='pipeline to be trained', required=True)
 54 | @click.option('-d', '--dev_mode', help='if true only a small sample of data will be used', is_flag=True, required=False)
 55 | @click.option('-c', '--chunk_size', help='size of the chunks to run evaluation on', type=int, default=None,
 56 |               required=False)
 57 | def evaluate(pipeline_name, dev_mode, chunk_size):
 58 |     pipeline_manager.evaluate(pipeline_name, dev_mode, chunk_size)
 59 | 
 60 | 
 61 | @main.command()
 62 | @click.option('-p', '--pipeline_name', help='pipeline to be trained', required=True)
 63 | @click.option('-d', '--dev_mode', help='if true only a small sample of data will be used', is_flag=True, required=False)
 64 | @click.option('-s', '--submit_predictions', help='submit predictions if true', is_flag=True, required=False)
 65 | @click.option('-c', '--chunk_size', help='size of the chunks to run prediction on', type=int, default=None,
 66 |               required=False)
 67 | def predict(pipeline_name, dev_mode, submit_predictions, chunk_size):
 68 |     pipeline_manager.predict(pipeline_name, dev_mode, submit_predictions, chunk_size)
 69 | 
 70 | 
 71 | @main.command()
 72 | @click.option('-p', '--pipeline_name', help='pipeline to be trained', required=True)
 73 | @click.option('-s', '--submit_predictions', help='submit predictions if true', is_flag=True, required=False)
 74 | @click.option('-d', '--dev_mode', help='if true only a small sample of data will be used', is_flag=True, required=False)
 75 | @click.option('-c', '--chunk_size', help='size of the chunks to run evaluation and prediction on', type=int,
 76 |               default=None, required=False)
 77 | def train_evaluate_predict(pipeline_name, submit_predictions, dev_mode, chunk_size):
 78 |     pipeline_manager.train(pipeline_name, dev_mode)
 79 |     pipeline_manager.evaluate(pipeline_name, dev_mode, chunk_size)
 80 |     pipeline_manager.predict(pipeline_name, dev_mode, submit_predictions, chunk_size)
 81 | 
 82 | 
 83 | @main.command()
 84 | @click.option('-p', '--pipeline_name', help='pipeline to be trained', required=True)
 85 | @click.option('-d', '--dev_mode', help='if true only a small sample of data will be used', is_flag=True, required=False)
 86 | @click.option('-c', '--chunk_size', help='size of the chunks to run evaluation and prediction on', type=int,
 87 |               default=None, required=False)
 88 | def train_evaluate(pipeline_name, dev_mode, chunk_size):
 89 |     pipeline_manager.train(pipeline_name, dev_mode)
 90 |     pipeline_manager.evaluate(pipeline_name, dev_mode, chunk_size)
 91 | 
 92 | 
 93 | @main.command()
 94 | @click.option('-p', '--pipeline_name', help='pipeline to be trained', required=True)
 95 | @click.option('-s', '--submit_predictions', help='submit predictions if true', is_flag=True, required=False)
 96 | @click.option('-d', '--dev_mode', help='if true only a small sample of data will be used', is_flag=True, required=False)
 97 | @click.option('-c', '--chunk_size', help='size of the chunks to run prediction on', type=int, default=None,
 98 |               required=False)
 99 | def evaluate_predict(pipeline_name, submit_predictions, dev_mode, chunk_size):
100 |     pipeline_manager.evaluate(pipeline_name, dev_mode, chunk_size)
101 |     pipeline_manager.predict(pipeline_name, dev_mode, submit_predictions, chunk_size)
102 | 
103 | 
104 | @main.command()
105 | @click.option('-f', '--submission_filepath', help='filepath to json submission file', required=True)
106 | def submit_predictions(submission_filepath):
107 |     pipeline_manager.make_submission(submission_filepath)
108 | 
109 | 
110 | if __name__ == "__main__":
111 |     main()
112 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/test_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Contains functions which are convenient for unit testing."""
 17 | import numpy as np
 18 | import tensorflow as tf
 19 | 
 20 | from object_detection.core import anchor_generator
 21 | from object_detection.core import box_coder
 22 | from object_detection.core import box_list
 23 | from object_detection.core import box_predictor
 24 | from object_detection.core import matcher
 25 | from object_detection.utils import shape_utils
 26 | 
 27 | 
 28 | class MockBoxCoder(box_coder.BoxCoder):
 29 |   """Simple `difference` BoxCoder."""
 30 | 
 31 |   @property
 32 |   def code_size(self):
 33 |     return 4
 34 | 
 35 |   def _encode(self, boxes, anchors):
 36 |     return boxes.get() - anchors.get()
 37 | 
 38 |   def _decode(self, rel_codes, anchors):
 39 |     return box_list.BoxList(rel_codes + anchors.get())
 40 | 
 41 | 
 42 | class MockBoxPredictor(box_predictor.BoxPredictor):
 43 |   """Simple box predictor that ignores inputs and outputs all zeros."""
 44 | 
 45 |   def __init__(self, is_training, num_classes):
 46 |     super(MockBoxPredictor, self).__init__(is_training, num_classes)
 47 | 
 48 |   def _predict(self, image_features, num_predictions_per_location):
 49 |     image_feature = image_features[0]
 50 |     combined_feature_shape = shape_utils.combined_static_and_dynamic_shape(
 51 |         image_feature)
 52 |     batch_size = combined_feature_shape[0]
 53 |     num_anchors = (combined_feature_shape[1] * combined_feature_shape[2])
 54 |     code_size = 4
 55 |     zero = tf.reduce_sum(0 * image_feature)
 56 |     box_encodings = zero + tf.zeros(
 57 |         (batch_size, num_anchors, 1, code_size), dtype=tf.float32)
 58 |     class_predictions_with_background = zero + tf.zeros(
 59 |         (batch_size, num_anchors, self.num_classes + 1), dtype=tf.float32)
 60 |     return {box_predictor.BOX_ENCODINGS: box_encodings,
 61 |             box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND:
 62 |             class_predictions_with_background}
 63 | 
 64 | 
 65 | class MockAnchorGenerator(anchor_generator.AnchorGenerator):
 66 |   """Mock anchor generator."""
 67 | 
 68 |   def name_scope(self):
 69 |     return 'MockAnchorGenerator'
 70 | 
 71 |   def num_anchors_per_location(self):
 72 |     return [1]
 73 | 
 74 |   def _generate(self, feature_map_shape_list):
 75 |     num_anchors = sum([shape[0] * shape[1] for shape in feature_map_shape_list])
 76 |     return box_list.BoxList(tf.zeros((num_anchors, 4), dtype=tf.float32))
 77 | 
 78 | 
 79 | class MockMatcher(matcher.Matcher):
 80 |   """Simple matcher that matches first anchor to first groundtruth box."""
 81 | 
 82 |   def _match(self, similarity_matrix):
 83 |     return tf.constant([0, -1, -1, -1], dtype=tf.int32)
 84 | 
 85 | 
 86 | def create_diagonal_gradient_image(height, width, depth):
 87 |   """Creates pyramid image. Useful for testing.
 88 | 
 89 |   For example, pyramid_image(5, 6, 1) looks like:
 90 |   # [[[ 5.  4.  3.  2.  1.  0.]
 91 |   #   [ 6.  5.  4.  3.  2.  1.]
 92 |   #   [ 7.  6.  5.  4.  3.  2.]
 93 |   #   [ 8.  7.  6.  5.  4.  3.]
 94 |   #   [ 9.  8.  7.  6.  5.  4.]]]
 95 | 
 96 |   Args:
 97 |     height: height of image
 98 |     width: width of image
 99 |     depth: depth of image
100 | 
101 |   Returns:
102 |     pyramid image
103 |   """
104 |   row = np.arange(height)
105 |   col = np.arange(width)[::-1]
106 |   image_layer = np.expand_dims(row, 1) + col
107 |   image_layer = np.expand_dims(image_layer, 2)
108 | 
109 |   image = image_layer
110 |   for i in range(1, depth):
111 |     image = np.concatenate((image, image_layer * pow(10, i)), 2)
112 | 
113 |   return image.astype(np.float32)
114 | 
115 | 
116 | def create_random_boxes(num_boxes, max_height, max_width):
117 |   """Creates random bounding boxes of specific maximum height and width.
118 | 
119 |   Args:
120 |     num_boxes: number of boxes.
121 |     max_height: maximum height of boxes.
122 |     max_width: maximum width of boxes.
123 | 
124 |   Returns:
125 |     boxes: numpy array of shape [num_boxes, 4]. Each row is in form
126 |         [y_min, x_min, y_max, x_max].
127 |   """
128 | 
129 |   y_1 = np.random.uniform(size=(1, num_boxes)) * max_height
130 |   y_2 = np.random.uniform(size=(1, num_boxes)) * max_height
131 |   x_1 = np.random.uniform(size=(1, num_boxes)) * max_width
132 |   x_2 = np.random.uniform(size=(1, num_boxes)) * max_width
133 | 
134 |   boxes = np.zeros(shape=(num_boxes, 4))
135 |   boxes[:, 0] = np.minimum(y_1, y_2)
136 |   boxes[:, 1] = np.minimum(x_1, x_2)
137 |   boxes[:, 2] = np.maximum(y_1, y_2)
138 |   boxes[:, 3] = np.maximum(x_1, x_2)
139 | 
140 |   return boxes.astype(np.float32)
141 | 
142 | 
143 | def first_rows_close_as_set(a, b, k=None, rtol=1e-6, atol=1e-6):
144 |   """Checks if first K entries of two lists are close, up to permutation.
145 | 
146 |   Inputs to this assert are lists of items which can be compared via
147 |   numpy.allclose(...) and can be sorted.
148 | 
149 |   Args:
150 |     a: list of items which can be compared via numpy.allclose(...) and are
151 |       sortable.
152 |     b: list of items which can be compared via numpy.allclose(...) and are
153 |       sortable.
154 |     k: a non-negative integer.  If not provided, k is set to be len(a).
155 |     rtol: relative tolerance.
156 |     atol: absolute tolerance.
157 | 
158 |   Returns:
159 |     boolean, True if input lists a and b have the same length and
160 |     the first k entries of the inputs satisfy numpy.allclose() after
161 |     sorting entries.
162 |   """
163 |   if not isinstance(a, list) or not isinstance(b, list) or len(a) != len(b):
164 |     return False
165 |   if not k:
166 |     k = len(a)
167 |   k = min(k, len(a))
168 |   a_sorted = sorted(a[:k])
169 |   b_sorted = sorted(b[:k])
170 |   return all([
171 |       np.allclose(entry_a, entry_b, rtol, atol)
172 |       for (entry_a, entry_b) in zip(a_sorted, b_sorted)
173 |   ])
174 | 


--------------------------------------------------------------------------------
/src/pipelines.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | 
  3 | from steppy.base import Step, IdentityOperation
  4 | from steppy.adapter import Adapter, E
  5 | 
  6 | from .loaders import ImageDetectionLoader
  7 | from .models import Retina
  8 | from .retinanet import DataDecoder
  9 | from .postprocessing import PredictionFormatter, Visualizer
 10 | from .preprocessing import GoogleAiLabelEncoder, GoogleAiLabelDecoder
 11 | 
 12 | 
 13 | def retinanet(config, train_mode, visualize=False):
 14 |     persist_output = False
 15 |     load_persisted_output = False
 16 | 
 17 |     loader = preprocessing_generator(config, is_train=train_mode)
 18 | 
 19 |     retinanet = Step(name='retinanet',
 20 |                      transformer=Retina(**config.retinanet, train_mode=train_mode),
 21 |                      input_steps=[loader],
 22 |                      experiment_directory=config.env.cache_dirpath,
 23 |                      persist_output=persist_output,
 24 |                      is_trainable=True,
 25 |                      load_persisted_output=load_persisted_output)
 26 | 
 27 |     if train_mode:
 28 |         return retinanet
 29 | 
 30 |     if visualize:
 31 |         return visualizer(retinanet, loader.get_step('label_encoder'), config)
 32 | 
 33 |     postprocessor = postprocessing(retinanet, loader.get_step('label_encoder'), config)
 34 | 
 35 |     output = Step(name='output',
 36 |                   transformer=IdentityOperation(),
 37 |                   input_steps=[postprocessor],
 38 |                   adapter=Adapter({'y_pred': E(postprocessor.name, 'submission')}),
 39 |                   experiment_directory=config.env.cache_dirpath,
 40 |                   persist_output=persist_output,
 41 |                   load_persisted_output=load_persisted_output)
 42 |     return output
 43 | 
 44 | 
 45 | def preprocessing_generator(config, is_train):
 46 |     label_encoder = Step(name='label_encoder',
 47 |                          transformer=GoogleAiLabelEncoder(**config.label_encoder),
 48 |                          input_data=['metadata'],
 49 |                          adapter=Adapter({'annotations': E('metadata', 'annotations'),
 50 |                                           'annotations_human_labels': E('metadata', 'annotations_human_labels')
 51 |                                           }),
 52 |                          is_trainable=True,
 53 |                          experiment_directory=config.env.cache_dirpath)
 54 | 
 55 |     if is_train:
 56 |         loader = Step(name='loader',
 57 |                       transformer=ImageDetectionLoader(train_mode=True, **config.loader),
 58 |                       input_data=['input', 'validation_input'],
 59 |                       input_steps=[label_encoder],
 60 |                       adapter=Adapter({'images_data': E('input', 'images_data'),
 61 |                                        'valid_images_data': E('validation_input', 'valid_images_data'),
 62 |                                        'annotations': E(label_encoder.name, 'annotations'),
 63 |                                        'annotations_human_labels': E(label_encoder.name, 'annotations_human_labels'),
 64 |                                        }),
 65 |                       experiment_directory=config.env.cache_dirpath)
 66 | 
 67 |     else:
 68 |         loader = Step(name='loader',
 69 |                       transformer=ImageDetectionLoader(train_mode=False, **config.loader),
 70 |                       input_data=['input'],
 71 |                       input_steps=[label_encoder],
 72 |                       adapter=Adapter({'images_data': E('input', 'images_data'),
 73 |                                        'annotations': None,
 74 |                                        'annotations_human_labels': None,
 75 |                                        }),
 76 |                       experiment_directory=config.env.cache_dirpath)
 77 |     return loader
 78 | 
 79 | 
 80 | def visualizer(model, label_encoder, config):
 81 |     label_decoder = Step(name='label_decoder',
 82 |                          transformer=GoogleAiLabelDecoder(),
 83 |                          input_steps=[label_encoder, ],
 84 |                          experiment_directory=config.env.cache_dirpath)
 85 | 
 86 |     decoder = Step(name='decoder',
 87 |                    transformer=DataDecoder(**config.postprocessing.data_decoder),
 88 |                    input_data=['input'],
 89 |                    input_steps=[model, ],
 90 |                    experiment_directory=config.env.cache_dirpath)
 91 | 
 92 |     visualize = Step(name='visualizer',
 93 |                      transformer=Visualizer(),
 94 |                      input_steps=[label_decoder, decoder],
 95 |                      input_data=['input'],
 96 |                      adapter=Adapter({'images_data': E('input', 'images_data'),
 97 |                                       'results': E(decoder.name, 'results'),
 98 |                                       'decoder_dict': E(label_decoder.name, 'inverse_mapping')}),
 99 |                      experiment_directory=config.env.cache_dirpath)
100 | 
101 |     return visualize
102 | 
103 | 
104 | def postprocessing(model, label_encoder, config):
105 |     label_decoder = Step(name='label_decoder',
106 |                          transformer=GoogleAiLabelDecoder(),
107 |                          input_steps=[label_encoder, ],
108 |                          experiment_directory=config.env.cache_dirpath)
109 | 
110 |     decoder = Step(name='decoder',
111 |                    transformer=DataDecoder(**config.postprocessing.data_decoder),
112 |                    input_data=['input'],
113 |                    input_steps=[model, ],
114 |                    experiment_directory=config.env.cache_dirpath)
115 | 
116 |     submission_producer = Step(name='submission_producer',
117 |                                transformer=PredictionFormatter(),
118 |                                input_steps=[label_decoder, decoder],
119 |                                input_data=['input'],
120 |                                adapter=Adapter({'images_data': E('input', 'images_data'),
121 |                                                 'results': E(decoder.name, 'results'),
122 |                                                 'decoder_dict': E(label_decoder.name, 'inverse_mapping')}),
123 |                                experiment_directory=config.env.cache_dirpath)
124 |     return submission_producer
125 | 
126 | 
127 | PIPELINES = {'retinanet': {'train': partial(retinanet, train_mode=True),
128 |                            'inference': partial(retinanet, train_mode=False),
129 |                            'visualize': partial(retinanet, train_mode=False, visualize=True)
130 |                            },
131 | 
132 |              }
133 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/metrics_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for object_detection.metrics."""
 16 | 
 17 | import numpy as np
 18 | import tensorflow as tf
 19 | 
 20 | from object_detection.utils import metrics
 21 | 
 22 | 
 23 | class MetricsTest(tf.test.TestCase):
 24 | 
 25 |   def test_compute_cor_loc(self):
 26 |     num_gt_imgs_per_class = np.array([100, 1, 5, 1, 1], dtype=int)
 27 |     num_images_correctly_detected_per_class = np.array(
 28 |         [10, 0, 1, 0, 0], dtype=int)
 29 |     corloc = metrics.compute_cor_loc(num_gt_imgs_per_class,
 30 |                                      num_images_correctly_detected_per_class)
 31 |     expected_corloc = np.array([0.1, 0, 0.2, 0, 0], dtype=float)
 32 |     self.assertTrue(np.allclose(corloc, expected_corloc))
 33 | 
 34 |   def test_compute_cor_loc_nans(self):
 35 |     num_gt_imgs_per_class = np.array([100, 0, 0, 1, 1], dtype=int)
 36 |     num_images_correctly_detected_per_class = np.array(
 37 |         [10, 0, 1, 0, 0], dtype=int)
 38 |     corloc = metrics.compute_cor_loc(num_gt_imgs_per_class,
 39 |                                      num_images_correctly_detected_per_class)
 40 |     expected_corloc = np.array([0.1, np.nan, np.nan, 0, 0], dtype=float)
 41 |     self.assertAllClose(corloc, expected_corloc)
 42 | 
 43 |   def test_compute_precision_recall(self):
 44 |     num_gt = 10
 45 |     scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
 46 |     labels = np.array([0, 1, 1, 0, 0, 1], dtype=bool)
 47 |     labels_float_type = np.array([0, 1, 1, 0, 0, 1], dtype=float)
 48 |     accumulated_tp_count = np.array([0, 1, 1, 2, 2, 3], dtype=float)
 49 |     expected_precision = accumulated_tp_count / np.array([1, 2, 3, 4, 5, 6])
 50 |     expected_recall = accumulated_tp_count / num_gt
 51 | 
 52 |     precision, recall = metrics.compute_precision_recall(scores, labels, num_gt)
 53 |     precision_float_type, recall_float_type = metrics.compute_precision_recall(
 54 |         scores, labels_float_type, num_gt)
 55 | 
 56 |     self.assertAllClose(precision, expected_precision)
 57 |     self.assertAllClose(recall, expected_recall)
 58 |     self.assertAllClose(precision_float_type, expected_precision)
 59 |     self.assertAllClose(recall_float_type, expected_recall)
 60 | 
 61 |   def test_compute_precision_recall_float(self):
 62 |     num_gt = 10
 63 |     scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
 64 |     labels_float = np.array([0, 1, 1, 0.5, 0, 1], dtype=float)
 65 |     expected_precision = np.array(
 66 |         [0., 0.5, 0.33333333, 0.5, 0.55555556, 0.63636364], dtype=float)
 67 |     expected_recall = np.array([0., 0.1, 0.1, 0.2, 0.25, 0.35], dtype=float)
 68 |     precision, recall = metrics.compute_precision_recall(
 69 |         scores, labels_float, num_gt)
 70 |     self.assertAllClose(precision, expected_precision)
 71 |     self.assertAllClose(recall, expected_recall)
 72 | 
 73 |   def test_compute_average_precision(self):
 74 |     precision = np.array([0.8, 0.76, 0.9, 0.65, 0.7, 0.5, 0.55, 0], dtype=float)
 75 |     recall = np.array([0.3, 0.3, 0.4, 0.4, 0.45, 0.45, 0.5, 0.5], dtype=float)
 76 |     processed_precision = np.array(
 77 |         [0.9, 0.9, 0.9, 0.7, 0.7, 0.55, 0.55, 0], dtype=float)
 78 |     recall_interval = np.array([0.3, 0, 0.1, 0, 0.05, 0, 0.05, 0], dtype=float)
 79 |     expected_mean_ap = np.sum(recall_interval * processed_precision)
 80 |     mean_ap = metrics.compute_average_precision(precision, recall)
 81 |     self.assertAlmostEqual(expected_mean_ap, mean_ap)
 82 | 
 83 |   def test_compute_precision_recall_and_ap_no_groundtruth(self):
 84 |     num_gt = 0
 85 |     scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
 86 |     labels = np.array([0, 0, 0, 0, 0, 0], dtype=bool)
 87 |     expected_precision = None
 88 |     expected_recall = None
 89 |     precision, recall = metrics.compute_precision_recall(scores, labels, num_gt)
 90 |     self.assertEqual(precision, expected_precision)
 91 |     self.assertEqual(recall, expected_recall)
 92 |     ap = metrics.compute_average_precision(precision, recall)
 93 |     self.assertTrue(np.isnan(ap))
 94 | 
 95 |   def test_compute_recall_at_k(self):
 96 |     num_gt = 4
 97 |     tp_fp = [
 98 |         np.array([1, 0, 0], dtype=float),
 99 |         np.array([0, 1], dtype=float),
100 |         np.array([0, 0, 0, 0, 0], dtype=float)
101 |     ]
102 |     tp_fp_bool = [
103 |         np.array([True, False, False], dtype=bool),
104 |         np.array([False, True], dtype=float),
105 |         np.array([False, False, False, False, False], dtype=float)
106 |     ]
107 | 
108 |     recall_1 = metrics.compute_recall_at_k(tp_fp, num_gt, 1)
109 |     recall_3 = metrics.compute_recall_at_k(tp_fp, num_gt, 3)
110 |     recall_5 = metrics.compute_recall_at_k(tp_fp, num_gt, 5)
111 | 
112 |     recall_3_bool = metrics.compute_recall_at_k(tp_fp_bool, num_gt, 3)
113 | 
114 |     self.assertAlmostEqual(recall_1, 0.25)
115 |     self.assertAlmostEqual(recall_3, 0.5)
116 |     self.assertAlmostEqual(recall_3_bool, 0.5)
117 |     self.assertAlmostEqual(recall_5, 0.5)
118 | 
119 |   def test_compute_median_rank_at_k(self):
120 |     tp_fp = [
121 |         np.array([1, 0, 0], dtype=float),
122 |         np.array([0, 0.1], dtype=float),
123 |         np.array([0, 0, 0, 0, 0], dtype=float)
124 |     ]
125 |     tp_fp_bool = [
126 |         np.array([True, False, False], dtype=bool),
127 |         np.array([False, True], dtype=float),
128 |         np.array([False, False, False, False, False], dtype=float)
129 |     ]
130 | 
131 |     median_ranks_1 = metrics.compute_median_rank_at_k(tp_fp, 1)
132 |     median_ranks_3 = metrics.compute_median_rank_at_k(tp_fp, 3)
133 |     median_ranks_5 = metrics.compute_median_rank_at_k(tp_fp, 5)
134 |     median_ranks_3_bool = metrics.compute_median_rank_at_k(tp_fp_bool, 3)
135 | 
136 |     self.assertEqual(median_ranks_1, 0)
137 |     self.assertEqual(median_ranks_3, 0.5)
138 |     self.assertEqual(median_ranks_3_bool, 0.5)
139 |     self.assertEqual(median_ranks_5, 0.5)
140 | 
141 | 
142 | if __name__ == '__main__':
143 |   tf.test.main()
144 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/label_map_util.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Label map utility functions."""
 17 | 
 18 | import logging
 19 | 
 20 | import tensorflow as tf
 21 | from google.protobuf import text_format
 22 | from object_detection.protos import string_int_label_map_pb2
 23 | 
 24 | 
 25 | def _validate_label_map(label_map):
 26 |   """Checks if a label map is valid.
 27 | 
 28 |   Args:
 29 |     label_map: StringIntLabelMap to validate.
 30 | 
 31 |   Raises:
 32 |     ValueError: if label map is invalid.
 33 |   """
 34 |   for item in label_map.item:
 35 |     if item.id < 0:
 36 |       raise ValueError('Label map ids should be >= 0.')
 37 |     if (item.id == 0 and item.name != 'background' and
 38 |         item.display_name != 'background'):
 39 |       raise ValueError('Label map id 0 is reserved for the background label')
 40 | 
 41 | 
 42 | def create_category_index(categories):
 43 |   """Creates dictionary of COCO compatible categories keyed by category id.
 44 | 
 45 |   Args:
 46 |     categories: a list of dicts, each of which has the following keys:
 47 |       'id': (required) an integer id uniquely identifying this category.
 48 |       'name': (required) string representing category name
 49 |         e.g., 'cat', 'dog', 'pizza'.
 50 | 
 51 |   Returns:
 52 |     category_index: a dict containing the same entries as categories, but keyed
 53 |       by the 'id' field of each category.
 54 |   """
 55 |   category_index = {}
 56 |   for cat in categories:
 57 |     category_index[cat['id']] = cat
 58 |   return category_index
 59 | 
 60 | 
 61 | def get_max_label_map_index(label_map):
 62 |   """Get maximum index in label map.
 63 | 
 64 |   Args:
 65 |     label_map: a StringIntLabelMapProto
 66 | 
 67 |   Returns:
 68 |     an integer
 69 |   """
 70 |   return max([item.id for item in label_map.item])
 71 | 
 72 | 
 73 | def convert_label_map_to_categories(label_map,
 74 |                                     max_num_classes,
 75 |                                     use_display_name=True):
 76 |   """Loads label map proto and returns categories list compatible with eval.
 77 | 
 78 |   This function loads a label map and returns a list of dicts, each of which
 79 |   has the following keys:
 80 |     'id': (required) an integer id uniquely identifying this category.
 81 |     'name': (required) string representing category name
 82 |       e.g., 'cat', 'dog', 'pizza'.
 83 |   We only allow class into the list if its id-label_id_offset is
 84 |   between 0 (inclusive) and max_num_classes (exclusive).
 85 |   If there are several items mapping to the same id in the label map,
 86 |   we will only keep the first one in the categories list.
 87 | 
 88 |   Args:
 89 |     label_map: a StringIntLabelMapProto or None.  If None, a default categories
 90 |       list is created with max_num_classes categories.
 91 |     max_num_classes: maximum number of (consecutive) label indices to include.
 92 |     use_display_name: (boolean) choose whether to load 'display_name' field
 93 |       as category name.  If False or if the display_name field does not exist,
 94 |       uses 'name' field as category names instead.
 95 |   Returns:
 96 |     categories: a list of dictionaries representing all possible categories.
 97 |   """
 98 |   categories = []
 99 |   list_of_ids_already_added = []
100 |   if not label_map:
101 |     label_id_offset = 1
102 |     for class_id in range(max_num_classes):
103 |       categories.append({
104 |           'id': class_id + label_id_offset,
105 |           'name': 'category_{}'.format(class_id + label_id_offset)
106 |       })
107 |     return categories
108 |   for item in label_map.item:
109 |     if not 0 < item.id <= max_num_classes:
110 |       logging.info('Ignore item %d since it falls outside of requested '
111 |                    'label range.', item.id)
112 |       continue
113 |     if use_display_name and item.HasField('display_name'):
114 |       name = item.display_name
115 |     else:
116 |       name = item.name
117 |     if item.id not in list_of_ids_already_added:
118 |       list_of_ids_already_added.append(item.id)
119 |       categories.append({'id': item.id, 'name': name})
120 |   return categories
121 | 
122 | 
123 | def load_labelmap(path):
124 |   """Loads label map proto.
125 | 
126 |   Args:
127 |     path: path to StringIntLabelMap proto text file.
128 |   Returns:
129 |     a StringIntLabelMapProto
130 |   """
131 |   with tf.gfile.GFile(path, 'r') as fid:
132 |     label_map_string = fid.read()
133 |     label_map = string_int_label_map_pb2.StringIntLabelMap()
134 |     try:
135 |       text_format.Merge(label_map_string, label_map)
136 |     except text_format.ParseError:
137 |       label_map.ParseFromString(label_map_string)
138 |   _validate_label_map(label_map)
139 |   return label_map
140 | 
141 | 
142 | def get_label_map_dict(label_map_path, use_display_name=False):
143 |   """Reads a label map and returns a dictionary of label names to id.
144 | 
145 |   Args:
146 |     label_map_path: path to label_map.
147 |     use_display_name: whether to use the label map items' display names as keys.
148 | 
149 |   Returns:
150 |     A dictionary mapping label names to id.
151 |   """
152 |   label_map = load_labelmap(label_map_path)
153 |   label_map_dict = {}
154 |   for item in label_map.item:
155 |     if use_display_name:
156 |       label_map_dict[item.display_name] = item.id
157 |     else:
158 |       label_map_dict[item.name] = item.id
159 |   return label_map_dict
160 | 
161 | 
162 | def create_category_index_from_labelmap(label_map_path):
163 |   """Reads a label map and returns a category index.
164 | 
165 |   Args:
166 |     label_map_path: Path to `StringIntLabelMap` proto text file.
167 | 
168 |   Returns:
169 |     A category index, which is a dictionary that maps integer ids to dicts
170 |     containing categories, e.g.
171 |     {1: {'id': 1, 'name': 'dog'}, 2: {'id': 2, 'name': 'cat'}, ...}
172 |   """
173 |   label_map = load_labelmap(label_map_path)
174 |   max_num_classes = max(item.id for item in label_map.item)
175 |   categories = convert_label_map_to_categories(label_map, max_num_classes)
176 |   return create_category_index(categories)
177 | 
178 | 
179 | def create_class_agnostic_category_index():
180 |   """Creates a category index with a single `object` class."""
181 |   return {1: {'id': 1, 'name': 'object'}}
182 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Google AI Open Images - Object Detection Track: Open Solution
  2 | 
  3 | [![license](https://img.shields.io/github/license/mashape/apistatus.svg?maxAge=2592000)](https://github.com/neptune-ml/open-solution-googleai-object-detection/blob/master/LICENSE)
  4 | [![Join the chat at https://gitter.im/neptune-ml/open-solution-googleai-object-detection](https://badges.gitter.im/neptune-ml/open-solution-googleai-object-detection.svg)](https://gitter.im/neptune-ml/open-solution-googleai-object-detection?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
  5 | 
  6 | This is an open solution to the [Google AI Open Images - Object Detection Track](https://www.kaggle.com/c/google-ai-open-images-object-detection-track) :smiley:
  7 | 
  8 | ## More competitions :sparkler:
  9 | Check collection of [public projects :gift:](https://app.neptune.ml/-/explore), where you can find multiple Kaggle competitions with code, experiments and outputs.
 10 | 
 11 | ## Our goals
 12 | We are building entirely open solution to this competition. Specifically:
 13 | 1. **Learning from the process** - updates about new ideas, code and experiments is the best way to learn data science. Our activity is especially useful for people who wants to enter the competition, but lack appropriate experience.
 14 | 1. Encourage more Kagglers to start working on this competition.
 15 | 1. Deliver open source solution with no strings attached. Code is available on our [GitHub repository :computer:](https://github.com/neptune-ml/open-solution-googleai-object-detection). This solution should establish solid benchmark, as well as provide good base for your custom ideas and experiments. We care about clean code :smiley:
 16 | 1. We are opening our experiments as well: everybody can have **live preview** on our experiments, parameters, code, etc. Check: [Google-AI-Object-Detection-Challenge :chart_with_upwards_trend:](https://app.neptune.ml/neptune-ml/Google-AI-Object-Detection-Challenge) and images below:
 17 | 
 18 | | UNet training monitor :bar_chart: | Predicted bounding boxes :bar_chart: |
 19 | |:---|:---|
 20 | |[![unet-training-monitor](https://gist.githubusercontent.com/kamil-kaczmarek/b3b939797fb39752c45fdadfedba3ed9/raw/19272701575bca235473adaabb7b7c54b2416a54/gai-1.png)](https://app.neptune.ml/-/dashboard/experiment/f945da64-6dd3-459b-94c5-58bc6a83f590)|[![predicted-bounding-boxes](https://gist.githubusercontent.com/kamil-kaczmarek/b3b939797fb39752c45fdadfedba3ed9/raw/19272701575bca235473adaabb7b7c54b2416a54/gai-2.png)](https://app.neptune.ml/-/dashboard/experiment/c779468e-d3f7-44b8-a3a4-43a012315708)|
 21 | 
 22 | ## Disclaimer
 23 | In this open source solution you will find references to the [neptune.ml](https://neptune.ml). It is free platform for community Users, which we use daily to keep track of our experiments. Please note that using neptune.ml is not necessary to proceed with this solution. You may run it as plain Python script :snake:.
 24 | 
 25 | # How to start?
 26 | ## Learn about our solutions
 27 | 1. Check [Kaggle forum](https://www.kaggle.com/c/google-ai-open-images-object-detection-track/discussion/62895) and participate in the discussions.
 28 | 1. Check our [Wiki pages :dolphin:](https://github.com/neptune-ml/open-solution-googleai-object-detection/wiki), where we describe our work. Below are link to specific solutions:
 29 | 
 30 | | link to code| link to description |
 31 | |:---:|:---:|
 32 | |[solution-1](https://github.com/neptune-ml/open-solution-googleai-object-detection/tree/solution-1)|[palm-tree :palm_tree:](https://github.com/neptune-ml/open-solution-googleai-object-detection/wiki/RetinaNet-with-sampler)|
 33 | 
 34 | ## Dataset for this competition
 35 | This competition is special, because it used [Open Images Dataset V4](https://storage.googleapis.com/openimages/web/index.html), which is quite large: `>1.8M` images and `>0.5TB` :astonished: To make it more approachable, we are hosting entire dataset in the neptune's public directory :sunglasses:. **You can use this dataset in [neptune.ml](https://neptune.ml) with no additional setup :+1:.**
 36 | 
 37 | ## Start experimenting with ready-to-use code
 38 | You can jump start your participation in the competition by using our starter pack. Installation instruction below will guide you through the setup.
 39 | 
 40 | ## Installation
 41 | ### Fast Track
 42 | 1. Clone repository, install requirements (check _requirements.txt)
 43 | 
 44 | ```bash
 45 | pip3 install -r requirements.txt
 46 | ```
 47 | 
 48 | 2. Register to the [neptune.ml](https://neptune.ml/login) _(if you wish to use it)_ and create your project, for example Google-AI-Object-Detection-Challenge.
 49 | 3. Train RetinaNet:
 50 | 
 51 | :hamster:
 52 | ```bash
 53 | neptune send --worker m-4p100 \
 54 | --environment pytorch-0.3.1-gpu-py3 \
 55 | --config configs/neptune.yaml \
 56 | main.py train --pipeline_name retinanet
 57 | ```
 58 | 
 59 | :trident:
 60 | ```bash
 61 | neptune run main.py train --pipeline_name retinanet
 62 | ```
 63 | 
 64 | :snake:
 65 | ```bash
 66 | python main.py -- train --pipeline_name retinanet
 67 | ```
 68 | 
 69 | 4. Evaluate/Predict RetinaNet:
 70 | 
 71 | **Note** in case of memory trouble go to `neptune.yaml` and change `batch_size_inference: 1`
 72 | 
 73 | :hamster:
 74 | With cloud environment you need to change the experiment directory to the one that you have just trained. Let's assume that your experiment id was `GAI-14`. You should go to `neptune.yaml` and change:
 75 | 
 76 | ```yaml
 77 |   experiment_dir:  /output/experiment
 78 |   clone_experiment_dir_from:  /input/GAI-14/output/experiment
 79 | ```
 80 | 
 81 | ```bash
 82 | neptune send --worker m-4p100 \
 83 | --environment pytorch-0.3.1-gpu-py3 \
 84 | --config configs/neptune.yaml \
 85 | --input /GAI-14 \
 86 | main.py evaluate_predict --pipeline_name retinanet --chunk_size 100
 87 | ```
 88 | 
 89 | :trident:
 90 | ```bash
 91 | neptune run main.py train --pipeline_name retinanet --chunk_size 100
 92 | ```
 93 | 
 94 | :snake:
 95 | ```bash
 96 | python main.py -- train --pipeline_name retinanet --chunk_size 100
 97 | ```
 98 | 
 99 | ## Get involved
100 | You are welcome to contribute your code and ideas to this open solution. To get started:
101 | 1. Check [competition project](https://github.com/neptune-ml/open-solution-googleai-object-detection/projects/1) on GitHub to see what we are working on right now.
102 | 1. Express your interest in particular task by writing comment in this task, or by creating new one with your fresh idea.
103 | 1. We will get back to you quickly in order to start working together.
104 | 1. Check [CONTRIBUTING](CONTRIBUTING.md) for some more information.
105 | 
106 | ## User support
107 | There are several ways to seek help:
108 | 1. [Kaggle discussion](https://www.kaggle.com/c/google-ai-open-images-object-detection-track/discussion/62895) is our primary way of communication.
109 | 1. Read project's [Wiki](https://github.com/neptune-ml/open-solution-googleai-object-detection/wiki), where we publish descriptions about the code, pipelines and supporting tools such as [neptune.ml](https://neptune.ml).
110 | 1. Submit an [issue]((https://github.com/neptune-ml/open-solution-googleai-object-detection/issues)) directly in this repo.
111 | 


--------------------------------------------------------------------------------
/src/pipeline_config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import neptune
  4 | from attrdict import AttrDict
  5 | from .utils import read_params, parameter_eval, get_class_mappings
  6 | 
  7 | ctx = neptune.Context()
  8 | params = read_params(ctx)
  9 | 
 10 | ID_COLUMN = 'ImageID'
 11 | LABEL_COLUMN = 'LabelName'
 12 | SEED = 1234
 13 | MEAN = [0.485, 0.456, 0.406]
 14 | STD = [0.229, 0.224, 0.225]
 15 | 
 16 | DESIRED_CLASS_SUBSET = parameter_eval(params.desired_class_subset)
 17 | N_SUB_CLASSES = len(DESIRED_CLASS_SUBSET)
 18 | 
 19 | ASPECT_RATIOS = parameter_eval(params.aspect_ratios)
 20 | SCALE_RATIOS = parameter_eval(params.scale_ratios)
 21 | 
 22 | CODES2NAMES, NAMES2CODES = get_class_mappings(mappings_file=params.class_mappings_filepath)
 23 | 
 24 | GLOBAL_CONFIG = {'exp_root': params.experiment_dir,
 25 |                  'load_in_memory': params.load_in_memory,
 26 |                  'num_workers': params.num_workers,
 27 |                  'num_classes': N_SUB_CLASSES if N_SUB_CLASSES else params.num_classes,
 28 |                  'batch_size_train': params.batch_size_train,
 29 |                  'batch_size_inference': params.batch_size_inference,
 30 |                  'loader_mode': params.loader_mode,
 31 |                  'stream_mode': params.stream_mode,
 32 |                  'max_annotation_per_class': params.max_annotation_per_class,
 33 |                  'use_suppression': params.use_suppression,
 34 |                  }
 35 | 
 36 | SOLUTION_CONFIG = AttrDict({
 37 |     'env': {'cache_dirpath': params.experiment_dir},
 38 |     'execution': GLOBAL_CONFIG,
 39 | 
 40 |     'label_encoder': {'colname': LABEL_COLUMN
 41 |                       },
 42 |     'loader': {'dataset_params': {'images_dir': None,
 43 |                                   'short_dim': params.short_dim,
 44 |                                   'long_dim': params.long_dim,
 45 |                                   'fixed_h': params.fixed_h,
 46 |                                   'fixed_w': params.fixed_w,
 47 |                                   'sampler_name': params.sampler_name,
 48 |                                   'pad_method': params.pad_method,
 49 |                                   'sample_size': params.training_sample_size,
 50 |                                   'valid_sample_size': params.validation_sample_size,
 51 |                                   'even_class_sampling': params.even_class_sampling,
 52 |                                   'use_suppression': params.use_suppression,
 53 |                                   'data_encoder': {'aspect_ratios': ASPECT_RATIOS,
 54 |                                                    'scale_ratios': SCALE_RATIOS,
 55 |                                                    'num_anchors': len(ASPECT_RATIOS) * len(SCALE_RATIOS)}
 56 |                                   },
 57 |                'loader_params': {'training': {'batch_size': params.batch_size_train,
 58 |                                               'shuffle': False,
 59 |                                               'num_workers': params.num_workers,
 60 |                                               'pin_memory': params.pin_memory
 61 |                                               },
 62 |                                  'inference': {'batch_size': params.batch_size_inference,
 63 |                                                'shuffle': False,
 64 |                                                'num_workers': params.num_workers,
 65 |                                                'pin_memory': params.pin_memory
 66 |                                                },
 67 |                                  },
 68 |                },
 69 | 
 70 |     'retinanet': {
 71 |         'architecture_config': {'model_params': {'encoder_depth': params.encoder_depth,
 72 |                                                  'num_classes': N_SUB_CLASSES if N_SUB_CLASSES else params.num_classes,
 73 |                                                  # we change the model output size if subclasses used
 74 |                                                  # fallback to config file
 75 |                                                  'num_anchors': len(ASPECT_RATIOS) * len(SCALE_RATIOS),
 76 |                                                  'pretrained_encoder': params.pretrained_encoder
 77 |                                                  },
 78 |                                 'optimizer_params': {'lr': params.lr,
 79 |                                                      },
 80 |                                 'regularizer_params': {'regularize': True,
 81 |                                                        'weight_decay_conv2d': params.l2_reg_conv,
 82 |                                                        },
 83 |                                 'weights_init': {'function': 'he',
 84 |                                                  'pi': params.pi
 85 |                                                  }
 86 |                                 },
 87 |         'training_config': {'epochs': params.epochs_nr,
 88 |                             },
 89 |         'callbacks_config': {
 90 |             'model_checkpoint': {
 91 |                 'filepath': os.path.join(GLOBAL_CONFIG['exp_root'], 'checkpoints', 'retinanet', 'best.torch'),
 92 |                 'epoch_every': 1,
 93 |                 # 'minimize': not params.validate_with_map
 94 |             },
 95 |             'exp_lr_scheduler': {'gamma': params.gamma,
 96 |                                  'epoch_every': 1},
 97 |             'plateau_lr_scheduler': {'lr_factor': params.lr_factor,
 98 |                                      'lr_patience': params.lr_patience,
 99 |                                      'epoch_every': 1},
100 |             'training_monitor': {'batch_every': 1,
101 |                                  'epoch_every': 1},
102 |             'experiment_timing': {'batch_every': 10,
103 |                                   'epoch_every': 1},
104 |             'validation_monitor': {
105 |                 'epoch_every': 1,
106 |                 # 'data_dir': params.train_imgs_dir,
107 |                 # 'validate_with_map': params.validate_with_map,
108 |                 # 'small_annotations_size': params.small_annotations_size,
109 |             },
110 |             'neptune_monitor': {'model_name': 'unet',
111 |                                 # 'image_nr': 16,
112 |                                 # 'image_resize': 0.2,
113 |                                 # 'outputs_to_plot': params.unet_outputs_to_plot
114 |                                 },
115 |             'early_stopping': {'patience': params.patience,
116 |                                # 'minimize': not params.validate_with_map
117 |                                },
118 |         },
119 |     },
120 |     'postprocessing': {
121 |         'data_decoder': {
122 |             'short_dim': params.short_dim,
123 |             'long_dim': params.long_dim,
124 |             'fixed_h': params.fixed_h,
125 |             'fixed_w': params.fixed_w,
126 |             'sampler_name': params.sampler_name,
127 |             'num_threads': params.num_threads,
128 |             'aspect_ratios': ASPECT_RATIOS,
129 |             'scale_ratios': SCALE_RATIOS,
130 |             'num_anchors': len(ASPECT_RATIOS) * len(SCALE_RATIOS),
131 |             'cls_thrs': params.classification_threshold,
132 |             'nms_thrs': params.nms_threshold
133 |         }
134 |     },
135 | })
136 | 


--------------------------------------------------------------------------------
/src/models.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | from math import log
  3 | import torch
  4 | from torch.autograd import Variable
  5 | from torch import optim
  6 | from toolkit.pytorch_transformers.models import Model
  7 | from toolkit.pytorch_transformers.callbacks import CallbackList, TrainingMonitor, ExperimentTiming, \
  8 |     ExponentialLRScheduler, ModelCheckpoint, EarlyStopping, NeptuneMonitor, ValidationMonitor
  9 | 
 10 | from .parallel import DataParallelCriterion, DataParallelModel as DataParallel
 11 | from .retinanet import RetinaNet, RetinaLoss
 12 | 
 13 | 
 14 | class ModelParallel(Model):
 15 |     def fit(self, datagen, validation_datagen=None):
 16 |         self._initialize_model_weights()
 17 | 
 18 |         self.model = DataParallel(self.model)
 19 | 
 20 |         if torch.cuda.is_available():
 21 |             self.model = self.model.cuda()
 22 | 
 23 |         self.callbacks.set_params(self, validation_datagen=validation_datagen)
 24 |         self.callbacks.on_train_begin()
 25 | 
 26 |         batch_gen, steps = datagen
 27 |         for epoch_id in range(self.training_config['epochs']):
 28 |             self.callbacks.on_epoch_begin()
 29 |             for batch_id, data in enumerate(batch_gen):
 30 |                 self.callbacks.on_batch_begin()
 31 |                 metrics = self._fit_loop(data)
 32 |                 self.callbacks.on_batch_end(metrics=metrics)
 33 |                 if batch_id == steps:
 34 |                     break
 35 |             self.callbacks.on_epoch_end()
 36 |             if self.callbacks.training_break():
 37 |                 break
 38 |         self.callbacks.on_train_end()
 39 |         return self
 40 | 
 41 |     def _fit_loop(self, data):
 42 |         X = data[0]
 43 |         targets_tensors = data[1:]
 44 | 
 45 |         if torch.cuda.is_available():
 46 |             X = Variable(X).cuda()
 47 |             targets_var = []
 48 |             for target_tensor in targets_tensors:
 49 |                 targets_var.append(Variable(target_tensor).cuda())
 50 |         else:
 51 |             X = Variable(X)
 52 |             targets_var = []
 53 |             for target_tensor in targets_tensors:
 54 |                 targets_var.append(Variable(target_tensor))
 55 | 
 56 |         self.optimizer.zero_grad()
 57 |         outputs_batch = self.model(X)
 58 |         partial_batch_losses = {}
 59 | 
 60 |         if len(self.output_names) == 1:
 61 |             for (name, loss_function, weight), target in zip(self.loss_function, targets_var):
 62 |                 batch_loss = loss_function(outputs_batch, target) * weight
 63 |         else:
 64 |             for (name, loss_function, weight), output, target in zip(self.loss_function, outputs_batch, targets_var):
 65 |                 partial_batch_losses[name] = loss_function(output, target) * weight
 66 |             batch_loss = sum(partial_batch_losses.values())
 67 |         partial_batch_losses['sum'] = batch_loss
 68 |         batch_loss.backward()
 69 |         self.optimizer.step()
 70 | 
 71 |         return partial_batch_losses
 72 | 
 73 |     def load(self, filepath):
 74 |         self.model.eval()
 75 | 
 76 |         if not isinstance(self.model, DataParallel):
 77 |             self.model = DataParallel(self.model)
 78 | 
 79 |         if torch.cuda.is_available():
 80 |             self.model.cpu()
 81 |             self.model.load_state_dict(torch.load(filepath))
 82 |             self.model = self.model.cuda()
 83 |         else:
 84 |             self.model.load_state_dict(torch.load(filepath, map_location=lambda storage, loc: storage))
 85 | 
 86 |         self.model.train()
 87 | 
 88 |         return self
 89 | 
 90 | 
 91 | class Retina(ModelParallel):
 92 |     def __init__(self, architecture_config, training_config, callbacks_config, train_mode=False):
 93 |         """
 94 |         """
 95 |         super().__init__(architecture_config, training_config, callbacks_config)
 96 |         self.train_mode = train_mode
 97 |         self.num_classes = self.architecture_config['model_params']['num_classes']
 98 |         self.pi = self.architecture_config['weights_init']['pi']
 99 | 
100 |         self.set_model()
101 |         self.weight_regularization = weight_regularization
102 |         self.optimizer = optim.Adam(self.weight_regularization(self.model, **architecture_config['regularizer_params']),
103 |                                     **architecture_config['optimizer_params'])
104 |         self.loss_function = [('FocalLoss', DataParallelCriterion(RetinaLoss(num_classes=self.num_classes)), 1.0)]
105 |         self.callbacks = callbacks(self.callbacks_config)
106 | 
107 |     def transform(self, datagen, *args, **kwargs):
108 |         if self.train_mode:
109 |             return self
110 | 
111 |         self.model.eval()
112 | 
113 |         batch_gen, steps = datagen
114 |         boxes = []
115 |         labels = []
116 |         for batch_id, data in enumerate(batch_gen):
117 |             if isinstance(data, list):
118 |                 X = data[0]
119 |             else:
120 |                 X = data
121 | 
122 |             if torch.cuda.is_available():
123 |                 X = Variable(X, volatile=True).cuda()
124 |             else:
125 |                 X = Variable(X, volatile=True)
126 | 
127 |             outputs = self.model(X)
128 | 
129 |             if isinstance(outputs, list):
130 |                 outputs = [output.data.cpu() for output in outputs]
131 |                 outputs = torch.cat(outputs, dim=0)
132 |             else:
133 |                 outputs = outputs.data.cpu()
134 | 
135 |             boxes_batch, labels_batch = outputs[:, :, :4], outputs[:, :, 4:]
136 |             boxes.extend([box for box in boxes_batch])
137 |             labels.extend([label for label in labels_batch])
138 | 
139 |             if batch_id == steps:
140 |                 break
141 | 
142 |         self.model.train()
143 | 
144 |         outputs = {'box_predictions': boxes,
145 |                    'class_predictions': labels}
146 |         return outputs
147 | 
148 |     def set_model(self):
149 |         self.model = RetinaNet(**self.architecture_config['model_params'])
150 | 
151 |     def _initialize_model_weights(self):
152 |         self.model.apply(partial(init_weights_retina, pi=self.pi))
153 |         self.model.freeze_bn()
154 | 
155 | 
156 | def weight_regularization(model, regularize, weight_decay_conv2d):
157 |     if regularize:
158 |         parameter_list = [{'params': model.parameters(), 'weight_decay': weight_decay_conv2d}]
159 |     else:
160 |         parameter_list = [model.parameters()]
161 |     return parameter_list
162 | 
163 | 
164 | def callbacks(callbacks_config):
165 |     experiment_timing = ExperimentTiming(**callbacks_config['experiment_timing'])
166 |     model_checkpoints = ModelCheckpoint(**callbacks_config['model_checkpoint'])
167 |     lr_scheduler = ExponentialLRScheduler(**callbacks_config['exp_lr_scheduler'])
168 |     training_monitor = TrainingMonitor(**callbacks_config['training_monitor'])
169 |     validation_monitor = ValidationMonitor(**callbacks_config['validation_monitor'])
170 |     neptune_monitor = NeptuneMonitor(**callbacks_config['neptune_monitor'])
171 |     early_stopping = EarlyStopping(**callbacks_config['early_stopping'])
172 | 
173 |     return CallbackList(
174 |         callbacks=[experiment_timing, training_monitor, validation_monitor,
175 |                    model_checkpoints, lr_scheduler, early_stopping, neptune_monitor,
176 |                    ])
177 | 
178 | 
179 | def init_weights_retina(module, pi):
180 |     if hasattr(module, 'name'):
181 |         b = -log((1 - pi) / pi)
182 |         if module.name == 'final_layer':
183 |             module.bias.data.fill_(b)
184 |         elif module.name == 'head_layer':
185 |             module.weight.data.normal_(0, pi)
186 |             module.bias.data.fill_(0)
187 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/metrics.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Functions for computing metrics like precision, recall, CorLoc and etc."""
 16 | 
 17 | 
 18 | import numpy as np
 19 | 
 20 | 
 21 | def compute_precision_recall(scores, labels, num_gt):
 22 |   """Compute precision and recall.
 23 | 
 24 |   Args:
 25 |     scores: A float numpy array representing detection score
 26 |     labels: A float numpy array representing weighted true/false positive labels
 27 |     num_gt: Number of ground truth instances
 28 | 
 29 |   Raises:
 30 |     ValueError: if the input is not of the correct format
 31 | 
 32 |   Returns:
 33 |     precision: Fraction of positive instances over detected ones. This value is
 34 |       None if no ground truth labels are present.
 35 |     recall: Fraction of detected positive instance over all positive instances.
 36 |       This value is None if no ground truth labels are present.
 37 | 
 38 |   """
 39 |   if not isinstance(labels, np.ndarray) or len(labels.shape) != 1:
 40 |     raise ValueError("labels must be single dimension numpy array")
 41 | 
 42 |   if labels.dtype != np.float and labels.dtype != np.bool:
 43 |     raise ValueError("labels type must be either bool or float")
 44 | 
 45 |   if not isinstance(scores, np.ndarray) or len(scores.shape) != 1:
 46 |     raise ValueError("scores must be single dimension numpy array")
 47 | 
 48 |   if num_gt < np.sum(labels):
 49 |     raise ValueError("Number of true positives must be smaller than num_gt.")
 50 | 
 51 |   if len(scores) != len(labels):
 52 |     raise ValueError("scores and labels must be of the same size.")
 53 | 
 54 |   if num_gt == 0:
 55 |     return None, None
 56 | 
 57 |   sorted_indices = np.argsort(scores)
 58 |   sorted_indices = sorted_indices[::-1]
 59 |   true_positive_labels = labels[sorted_indices]
 60 |   false_positive_labels = (true_positive_labels <= 0).astype(float)
 61 |   cum_true_positives = np.cumsum(true_positive_labels)
 62 |   cum_false_positives = np.cumsum(false_positive_labels)
 63 |   precision = cum_true_positives.astype(float) / (
 64 |       cum_true_positives + cum_false_positives)
 65 |   recall = cum_true_positives.astype(float) / num_gt
 66 |   return precision, recall
 67 | 
 68 | 
 69 | def compute_average_precision(precision, recall):
 70 |   """Compute Average Precision according to the definition in VOCdevkit.
 71 | 
 72 |   Precision is modified to ensure that it does not decrease as recall
 73 |   decrease.
 74 | 
 75 |   Args:
 76 |     precision: A float [N, 1] numpy array of precisions
 77 |     recall: A float [N, 1] numpy array of recalls
 78 | 
 79 |   Raises:
 80 |     ValueError: if the input is not of the correct format
 81 | 
 82 |   Returns:
 83 |     average_precison: The area under the precision recall curve. NaN if
 84 |       precision and recall are None.
 85 | 
 86 |   """
 87 |   if precision is None:
 88 |     if recall is not None:
 89 |       raise ValueError("If precision is None, recall must also be None")
 90 |     return np.NAN
 91 | 
 92 |   if not isinstance(precision, np.ndarray) or not isinstance(
 93 |       recall, np.ndarray):
 94 |     raise ValueError("precision and recall must be numpy array")
 95 |   if precision.dtype != np.float or recall.dtype != np.float:
 96 |     raise ValueError("input must be float numpy array.")
 97 |   if len(precision) != len(recall):
 98 |     raise ValueError("precision and recall must be of the same size.")
 99 |   if not precision.size:
100 |     return 0.0
101 |   if np.amin(precision) < 0 or np.amax(precision) > 1:
102 |     raise ValueError("Precision must be in the range of [0, 1].")
103 |   if np.amin(recall) < 0 or np.amax(recall) > 1:
104 |     raise ValueError("recall must be in the range of [0, 1].")
105 |   if not all(recall[i] <= recall[i + 1] for i in range(len(recall) - 1)):
106 |     raise ValueError("recall must be a non-decreasing array")
107 | 
108 |   recall = np.concatenate([[0], recall, [1]])
109 |   precision = np.concatenate([[0], precision, [0]])
110 | 
111 |   # Preprocess precision to be a non-decreasing array
112 |   for i in range(len(precision) - 2, -1, -1):
113 |     precision[i] = np.maximum(precision[i], precision[i + 1])
114 | 
115 |   indices = np.where(recall[1:] != recall[:-1])[0] + 1
116 |   average_precision = np.sum(
117 |       (recall[indices] - recall[indices - 1]) * precision[indices])
118 |   return average_precision
119 | 
120 | 
121 | def compute_cor_loc(num_gt_imgs_per_class,
122 |                     num_images_correctly_detected_per_class):
123 |   """Compute CorLoc according to the definition in the following paper.
124 | 
125 |   https://www.robots.ox.ac.uk/~vgg/rg/papers/deselaers-eccv10.pdf
126 | 
127 |   Returns nans if there are no ground truth images for a class.
128 | 
129 |   Args:
130 |     num_gt_imgs_per_class: 1D array, representing number of images containing
131 |         at least one object instance of a particular class
132 |     num_images_correctly_detected_per_class: 1D array, representing number of
133 |         images that are correctly detected at least one object instance of a
134 |         particular class
135 | 
136 |   Returns:
137 |     corloc_per_class: A float numpy array represents the corloc score of each
138 |       class
139 |   """
140 |   return np.where(
141 |       num_gt_imgs_per_class == 0, np.nan,
142 |       num_images_correctly_detected_per_class / num_gt_imgs_per_class)
143 | 
144 | 
145 | def compute_median_rank_at_k(tp_fp_list, k):
146 |   """Computes MedianRank@k, where k is the top-scoring labels.
147 | 
148 |   Args:
149 |     tp_fp_list: a list of numpy arrays; each numpy array corresponds to the all
150 |         detection on a single image, where the detections are sorted by score in
151 |         descending order. Further, each numpy array element can have boolean or
152 |         float values. True positive elements have either value >0.0 or True;
153 |         any other value is considered false positive.
154 |     k: number of top-scoring proposals to take.
155 | 
156 |   Returns:
157 |     median_rank: median rank of all true positive proposals among top k by
158 |       score.
159 |   """
160 |   ranks = []
161 |   for i in range(len(tp_fp_list)):
162 |     ranks.append(
163 |         np.where(tp_fp_list[i][0:min(k, tp_fp_list[i].shape[0])] > 0)[0])
164 |   concatenated_ranks = np.concatenate(ranks)
165 |   return np.median(concatenated_ranks)
166 | 
167 | 
168 | def compute_recall_at_k(tp_fp_list, num_gt, k):
169 |   """Computes Recall@k, MedianRank@k, where k is the top-scoring labels.
170 | 
171 |   Args:
172 |     tp_fp_list: a list of numpy arrays; each numpy array corresponds to the all
173 |         detection on a single image, where the detections are sorted by score in
174 |         descending order. Further, each numpy array element can have boolean or
175 |         float values. True positive elements have either value >0.0 or True;
176 |         any other value is considered false positive.
177 |     num_gt: number of groundtruth anotations.
178 |     k: number of top-scoring proposals to take.
179 | 
180 |   Returns:
181 |     recall: recall evaluated on the top k by score detections.
182 |   """
183 | 
184 |   tp_fp_eval = []
185 |   for i in range(len(tp_fp_list)):
186 |     tp_fp_eval.append(tp_fp_list[i][0:min(k, tp_fp_list[i].shape[0])])
187 | 
188 |   tp_fp_eval = np.concatenate(tp_fp_eval)
189 | 
190 |   return np.sum(tp_fp_eval) / num_gt
191 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/label_map_util_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Tests for object_detection.utils.label_map_util."""
 17 | 
 18 | import os
 19 | import tensorflow as tf
 20 | 
 21 | from google.protobuf import text_format
 22 | from object_detection.protos import string_int_label_map_pb2
 23 | from object_detection.utils import label_map_util
 24 | 
 25 | 
 26 | class LabelMapUtilTest(tf.test.TestCase):
 27 | 
 28 |   def _generate_label_map(self, num_classes):
 29 |     label_map_proto = string_int_label_map_pb2.StringIntLabelMap()
 30 |     for i in range(1, num_classes + 1):
 31 |       item = label_map_proto.item.add()
 32 |       item.id = i
 33 |       item.name = 'label_' + str(i)
 34 |       item.display_name = str(i)
 35 |     return label_map_proto
 36 | 
 37 |   def test_get_label_map_dict(self):
 38 |     label_map_string = """
 39 |       item {
 40 |         id:2
 41 |         name:'cat'
 42 |       }
 43 |       item {
 44 |         id:1
 45 |         name:'dog'
 46 |       }
 47 |     """
 48 |     label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
 49 |     with tf.gfile.Open(label_map_path, 'wb') as f:
 50 |       f.write(label_map_string)
 51 | 
 52 |     label_map_dict = label_map_util.get_label_map_dict(label_map_path)
 53 |     self.assertEqual(label_map_dict['dog'], 1)
 54 |     self.assertEqual(label_map_dict['cat'], 2)
 55 | 
 56 |   def test_get_label_map_dict_display(self):
 57 |     label_map_string = """
 58 |       item {
 59 |         id:2
 60 |         display_name:'cat'
 61 |       }
 62 |       item {
 63 |         id:1
 64 |         display_name:'dog'
 65 |       }
 66 |     """
 67 |     label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
 68 |     with tf.gfile.Open(label_map_path, 'wb') as f:
 69 |       f.write(label_map_string)
 70 | 
 71 |     label_map_dict = label_map_util.get_label_map_dict(
 72 |         label_map_path, use_display_name=True)
 73 |     self.assertEqual(label_map_dict['dog'], 1)
 74 |     self.assertEqual(label_map_dict['cat'], 2)
 75 | 
 76 |   def test_load_bad_label_map(self):
 77 |     label_map_string = """
 78 |       item {
 79 |         id:0
 80 |         name:'class that should not be indexed at zero'
 81 |       }
 82 |       item {
 83 |         id:2
 84 |         name:'cat'
 85 |       }
 86 |       item {
 87 |         id:1
 88 |         name:'dog'
 89 |       }
 90 |     """
 91 |     label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
 92 |     with tf.gfile.Open(label_map_path, 'wb') as f:
 93 |       f.write(label_map_string)
 94 | 
 95 |     with self.assertRaises(ValueError):
 96 |       label_map_util.load_labelmap(label_map_path)
 97 | 
 98 |   def test_load_label_map_with_background(self):
 99 |     label_map_string = """
100 |       item {
101 |         id:0
102 |         name:'background'
103 |       }
104 |       item {
105 |         id:2
106 |         name:'cat'
107 |       }
108 |       item {
109 |         id:1
110 |         name:'dog'
111 |       }
112 |     """
113 |     label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
114 |     with tf.gfile.Open(label_map_path, 'wb') as f:
115 |       f.write(label_map_string)
116 | 
117 |     label_map_dict = label_map_util.get_label_map_dict(label_map_path)
118 |     self.assertEqual(label_map_dict['background'], 0)
119 |     self.assertEqual(label_map_dict['dog'], 1)
120 |     self.assertEqual(label_map_dict['cat'], 2)
121 | 
122 |   def test_keep_categories_with_unique_id(self):
123 |     label_map_proto = string_int_label_map_pb2.StringIntLabelMap()
124 |     label_map_string = """
125 |       item {
126 |         id:2
127 |         name:'cat'
128 |       }
129 |       item {
130 |         id:1
131 |         name:'child'
132 |       }
133 |       item {
134 |         id:1
135 |         name:'person'
136 |       }
137 |       item {
138 |         id:1
139 |         name:'n00007846'
140 |       }
141 |     """
142 |     text_format.Merge(label_map_string, label_map_proto)
143 |     categories = label_map_util.convert_label_map_to_categories(
144 |         label_map_proto, max_num_classes=3)
145 |     self.assertListEqual([{
146 |         'id': 2,
147 |         'name': 'cat'
148 |     }, {
149 |         'id': 1,
150 |         'name': 'child'
151 |     }], categories)
152 | 
153 |   def test_convert_label_map_to_categories_no_label_map(self):
154 |     categories = label_map_util.convert_label_map_to_categories(
155 |         None, max_num_classes=3)
156 |     expected_categories_list = [{
157 |         'name': 'category_1',
158 |         'id': 1
159 |     }, {
160 |         'name': 'category_2',
161 |         'id': 2
162 |     }, {
163 |         'name': 'category_3',
164 |         'id': 3
165 |     }]
166 |     self.assertListEqual(expected_categories_list, categories)
167 | 
168 |   def test_convert_label_map_to_coco_categories(self):
169 |     label_map_proto = self._generate_label_map(num_classes=4)
170 |     categories = label_map_util.convert_label_map_to_categories(
171 |         label_map_proto, max_num_classes=3)
172 |     expected_categories_list = [{
173 |         'name': '1',
174 |         'id': 1
175 |     }, {
176 |         'name': '2',
177 |         'id': 2
178 |     }, {
179 |         'name': '3',
180 |         'id': 3
181 |     }]
182 |     self.assertListEqual(expected_categories_list, categories)
183 | 
184 |   def test_convert_label_map_to_coco_categories_with_few_classes(self):
185 |     label_map_proto = self._generate_label_map(num_classes=4)
186 |     cat_no_offset = label_map_util.convert_label_map_to_categories(
187 |         label_map_proto, max_num_classes=2)
188 |     expected_categories_list = [{
189 |         'name': '1',
190 |         'id': 1
191 |     }, {
192 |         'name': '2',
193 |         'id': 2
194 |     }]
195 |     self.assertListEqual(expected_categories_list, cat_no_offset)
196 | 
197 |   def test_get_max_label_map_index(self):
198 |     num_classes = 4
199 |     label_map_proto = self._generate_label_map(num_classes=num_classes)
200 |     max_index = label_map_util.get_max_label_map_index(label_map_proto)
201 |     self.assertEqual(num_classes, max_index)
202 | 
203 |   def test_create_category_index(self):
204 |     categories = [{'name': '1', 'id': 1}, {'name': '2', 'id': 2}]
205 |     category_index = label_map_util.create_category_index(categories)
206 |     self.assertDictEqual({
207 |         1: {
208 |             'name': '1',
209 |             'id': 1
210 |         },
211 |         2: {
212 |             'name': '2',
213 |             'id': 2
214 |         }
215 |     }, category_index)
216 | 
217 |   def test_create_category_index_from_labelmap(self):
218 |     label_map_string = """
219 |       item {
220 |         id:2
221 |         name:'cat'
222 |       }
223 |       item {
224 |         id:1
225 |         name:'dog'
226 |       }
227 |     """
228 |     label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
229 |     with tf.gfile.Open(label_map_path, 'wb') as f:
230 |       f.write(label_map_string)
231 | 
232 |     category_index = label_map_util.create_category_index_from_labelmap(
233 |         label_map_path)
234 |     self.assertDictEqual({
235 |         1: {
236 |             'name': 'dog',
237 |             'id': 1
238 |         },
239 |         2: {
240 |             'name': 'cat',
241 |             'id': 2
242 |         }
243 |     }, category_index)
244 | 
245 | 
246 | if __name__ == '__main__':
247 |   tf.test.main()
248 | 


--------------------------------------------------------------------------------
/src/object_detection/dataset_tools/oid_tfrecord_creation_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for oid_tfrecord_creation.py."""
 16 | 
 17 | import pandas as pd
 18 | import tensorflow as tf
 19 | 
 20 | from object_detection.dataset_tools import oid_tfrecord_creation
 21 | 
 22 | 
 23 | def create_test_data():
 24 |   data = {
 25 |       'ImageID': ['i1', 'i1', 'i1', 'i1', 'i1', 'i2', 'i2'],
 26 |       'LabelName': ['a', 'a', 'b', 'b', 'c', 'b', 'c'],
 27 |       'YMin': [0.3, 0.6, 0.8, 0.1, None, 0.0, 0.0],
 28 |       'XMin': [0.1, 0.3, 0.7, 0.0, None, 0.1, 0.1],
 29 |       'XMax': [0.2, 0.3, 0.8, 0.5, None, 0.9, 0.9],
 30 |       'YMax': [0.3, 0.6, 1, 0.8, None, 0.8, 0.8],
 31 |       'IsOccluded': [0, 1, 1, 0, None, 0, 0],
 32 |       'IsTruncated': [0, 0, 0, 1, None, 0, 0],
 33 |       'IsGroupOf': [0, 0, 0, 0, None, 0, 1],
 34 |       'IsDepiction': [1, 0, 0, 0, None, 0, 0],
 35 |       'ConfidenceImageLabel': [None, None, None, None, 0, None, None],
 36 |   }
 37 |   df = pd.DataFrame(data=data)
 38 |   label_map = {'a': 0, 'b': 1, 'c': 2}
 39 |   return label_map, df
 40 | 
 41 | 
 42 | class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
 43 | 
 44 |   def test_simple(self):
 45 |     label_map, df = create_test_data()
 46 | 
 47 |     tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
 48 |         df[df.ImageID == 'i1'], label_map, 'encoded_image_test')
 49 |     self.assertProtoEquals(
 50 |         """
 51 |         features {
 52 |           feature {
 53 |             key: "image/encoded"
 54 |             value { bytes_list { value: "encoded_image_test" } } }
 55 |           feature {
 56 |             key: "image/filename"
 57 |             value { bytes_list { value: "i1.jpg" } } }
 58 |           feature {
 59 |             key: "image/object/bbox/ymin"
 60 |             value { float_list { value: [0.3, 0.6, 0.8, 0.1] } } }
 61 |           feature {
 62 |             key: "image/object/bbox/xmin"
 63 |             value { float_list { value: [0.1, 0.3, 0.7, 0.0] } } }
 64 |           feature {
 65 |             key: "image/object/bbox/ymax"
 66 |             value { float_list { value: [0.3, 0.6, 1.0, 0.8] } } }
 67 |           feature {
 68 |             key: "image/object/bbox/xmax"
 69 |             value { float_list { value: [0.2, 0.3, 0.8, 0.5] } } }
 70 |           feature {
 71 |             key: "image/object/class/label"
 72 |             value { int64_list { value: [0, 0, 1, 1] } } }
 73 |           feature {
 74 |             key: "image/object/class/text"
 75 |             value { bytes_list { value: ["a", "a", "b", "b"] } } }
 76 |           feature {
 77 |             key: "image/source_id"
 78 |             value { bytes_list { value: "i1" } } }
 79 |           feature {
 80 |             key: "image/object/depiction"
 81 |             value { int64_list { value: [1, 0, 0, 0] } } }
 82 |           feature {
 83 |             key: "image/object/group_of"
 84 |             value { int64_list { value: [0, 0, 0, 0] } } }
 85 |           feature {
 86 |             key: "image/object/occluded"
 87 |             value { int64_list { value: [0, 1, 1, 0] } } }
 88 |           feature {
 89 |             key: "image/object/truncated"
 90 |             value { int64_list { value: [0, 0, 0, 1] } } }
 91 |           feature {
 92 |             key: "image/class/label"
 93 |             value { int64_list { value: [2] } } }
 94 |           feature {
 95 |             key: "image/class/text"
 96 |             value { bytes_list { value: ["c"] } } } }
 97 |     """, tf_example)
 98 | 
 99 |   def test_no_attributes(self):
100 |     label_map, df = create_test_data()
101 | 
102 |     del df['IsDepiction']
103 |     del df['IsGroupOf']
104 |     del df['IsOccluded']
105 |     del df['IsTruncated']
106 |     del df['ConfidenceImageLabel']
107 | 
108 |     tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
109 |         df[df.ImageID == 'i2'], label_map, 'encoded_image_test')
110 |     self.assertProtoEquals("""
111 |         features {
112 |           feature {
113 |             key: "image/encoded"
114 |             value { bytes_list { value: "encoded_image_test" } } }
115 |           feature {
116 |             key: "image/filename"
117 |             value { bytes_list { value: "i2.jpg" } } }
118 |           feature {
119 |             key: "image/object/bbox/ymin"
120 |             value { float_list { value: [0.0, 0.0] } } }
121 |           feature {
122 |             key: "image/object/bbox/xmin"
123 |             value { float_list { value: [0.1, 0.1] } } }
124 |           feature {
125 |             key: "image/object/bbox/ymax"
126 |             value { float_list { value: [0.8, 0.8] } } }
127 |           feature {
128 |             key: "image/object/bbox/xmax"
129 |             value { float_list { value: [0.9, 0.9] } } }
130 |           feature {
131 |             key: "image/object/class/label"
132 |             value { int64_list { value: [1, 2] } } }
133 |           feature {
134 |             key: "image/object/class/text"
135 |             value { bytes_list { value: ["b", "c"] } } }
136 |           feature {
137 |             key: "image/source_id"
138 |            value { bytes_list { value: "i2" } } } }
139 |     """, tf_example)
140 | 
141 |   def test_label_filtering(self):
142 |     label_map, df = create_test_data()
143 | 
144 |     label_map = {'a': 0}
145 | 
146 |     tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
147 |         df[df.ImageID == 'i1'], label_map, 'encoded_image_test')
148 |     self.assertProtoEquals(
149 |         """
150 |         features {
151 |           feature {
152 |             key: "image/encoded"
153 |             value { bytes_list { value: "encoded_image_test" } } }
154 |           feature {
155 |             key: "image/filename"
156 |             value { bytes_list { value: "i1.jpg" } } }
157 |           feature {
158 |             key: "image/object/bbox/ymin"
159 |             value { float_list { value: [0.3, 0.6] } } }
160 |           feature {
161 |             key: "image/object/bbox/xmin"
162 |             value { float_list { value: [0.1, 0.3] } } }
163 |           feature {
164 |             key: "image/object/bbox/ymax"
165 |             value { float_list { value: [0.3, 0.6] } } }
166 |           feature {
167 |             key: "image/object/bbox/xmax"
168 |             value { float_list { value: [0.2, 0.3] } } }
169 |           feature {
170 |             key: "image/object/class/label"
171 |             value { int64_list { value: [0, 0] } } }
172 |           feature {
173 |             key: "image/object/class/text"
174 |             value { bytes_list { value: ["a", "a"] } } }
175 |           feature {
176 |             key: "image/source_id"
177 |             value { bytes_list { value: "i1" } } }
178 |           feature {
179 |             key: "image/object/depiction"
180 |             value { int64_list { value: [1, 0] } } }
181 |           feature {
182 |             key: "image/object/group_of"
183 |             value { int64_list { value: [0, 0] } } }
184 |           feature {
185 |             key: "image/object/occluded"
186 |             value { int64_list { value: [0, 1] } } }
187 |           feature {
188 |             key: "image/object/truncated"
189 |             value { int64_list { value: [0, 0] } } }
190 |           feature {
191 |             key: "image/class/label"
192 |             value { int64_list { } } }
193 |           feature {
194 |             key: "image/class/text"
195 |             value { bytes_list { } } } }
196 |     """, tf_example)
197 | 
198 | 
199 | if __name__ == '__main__':
200 |   tf.test.main()
201 | 


--------------------------------------------------------------------------------
/src/object_detection/utils/np_box_mask_list_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Tests for object_detection.utils.np_box_mask_list_test."""
 17 | 
 18 | import numpy as np
 19 | import tensorflow as tf
 20 | 
 21 | from object_detection.utils import np_box_mask_list
 22 | 
 23 | 
 24 | class BoxMaskListTest(tf.test.TestCase):
 25 | 
 26 |   def test_invalid_box_mask_data(self):
 27 |     with self.assertRaises(ValueError):
 28 |       np_box_mask_list.BoxMaskList(
 29 |           box_data=[0, 0, 1, 1],
 30 |           mask_data=np.zeros([1, 3, 3], dtype=np.uint8))
 31 | 
 32 |     with self.assertRaises(ValueError):
 33 |       np_box_mask_list.BoxMaskList(
 34 |           box_data=np.array([[0, 0, 1, 1]], dtype=int),
 35 |           mask_data=np.zeros([1, 3, 3], dtype=np.uint8))
 36 | 
 37 |     with self.assertRaises(ValueError):
 38 |       np_box_mask_list.BoxMaskList(
 39 |           box_data=np.array([0, 1, 1, 3, 4], dtype=float),
 40 |           mask_data=np.zeros([1, 3, 3], dtype=np.uint8))
 41 | 
 42 |     with self.assertRaises(ValueError):
 43 |       np_box_mask_list.BoxMaskList(
 44 |           box_data=np.array([[0, 1, 1, 3], [3, 1, 1, 5]], dtype=float),
 45 |           mask_data=np.zeros([2, 3, 3], dtype=np.uint8))
 46 | 
 47 |     with self.assertRaises(ValueError):
 48 |       np_box_mask_list.BoxMaskList(
 49 |           box_data=np.array([[0, 1, 1, 3], [1, 1, 1, 5]], dtype=float),
 50 |           mask_data=np.zeros([3, 5, 5], dtype=np.uint8))
 51 | 
 52 |     with self.assertRaises(ValueError):
 53 |       np_box_mask_list.BoxMaskList(
 54 |           box_data=np.array([[0, 1, 1, 3], [1, 1, 1, 5]], dtype=float),
 55 |           mask_data=np.zeros([2, 5], dtype=np.uint8))
 56 | 
 57 |     with self.assertRaises(ValueError):
 58 |       np_box_mask_list.BoxMaskList(
 59 |           box_data=np.array([[0, 1, 1, 3], [1, 1, 1, 5]], dtype=float),
 60 |           mask_data=np.zeros([2, 5, 5, 5], dtype=np.uint8))
 61 | 
 62 |     with self.assertRaises(ValueError):
 63 |       np_box_mask_list.BoxMaskList(
 64 |           box_data=np.array([[0, 1, 1, 3], [1, 1, 1, 5]], dtype=float),
 65 |           mask_data=np.zeros([2, 5, 5], dtype=np.int32))
 66 | 
 67 |   def test_has_field_with_existed_field(self):
 68 |     boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
 69 |                       [0.0, 0.0, 20.0, 20.0]],
 70 |                      dtype=float)
 71 |     box_mask_list = np_box_mask_list.BoxMaskList(
 72 |         box_data=boxes, mask_data=np.zeros([3, 5, 5], dtype=np.uint8))
 73 |     self.assertTrue(box_mask_list.has_field('boxes'))
 74 |     self.assertTrue(box_mask_list.has_field('masks'))
 75 | 
 76 |   def test_has_field_with_nonexisted_field(self):
 77 |     boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
 78 |                       [0.0, 0.0, 20.0, 20.0]],
 79 |                      dtype=float)
 80 |     box_mask_list = np_box_mask_list.BoxMaskList(
 81 |         box_data=boxes, mask_data=np.zeros([3, 3, 3], dtype=np.uint8))
 82 |     self.assertFalse(box_mask_list.has_field('scores'))
 83 | 
 84 |   def test_get_field_with_existed_field(self):
 85 |     boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
 86 |                       [0.0, 0.0, 20.0, 20.0]],
 87 |                      dtype=float)
 88 |     masks = np.zeros([3, 3, 3], dtype=np.uint8)
 89 |     box_mask_list = np_box_mask_list.BoxMaskList(
 90 |         box_data=boxes, mask_data=masks)
 91 |     self.assertTrue(np.allclose(box_mask_list.get_field('boxes'), boxes))
 92 |     self.assertTrue(np.allclose(box_mask_list.get_field('masks'), masks))
 93 | 
 94 |   def test_get_field_with_nonexited_field(self):
 95 |     boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
 96 |                       [0.0, 0.0, 20.0, 20.0]],
 97 |                      dtype=float)
 98 |     masks = np.zeros([3, 3, 3], dtype=np.uint8)
 99 |     box_mask_list = np_box_mask_list.BoxMaskList(
100 |         box_data=boxes, mask_data=masks)
101 |     with self.assertRaises(ValueError):
102 |       box_mask_list.get_field('scores')
103 | 
104 | 
105 | class AddExtraFieldTest(tf.test.TestCase):
106 | 
107 |   def setUp(self):
108 |     boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
109 |                       [0.0, 0.0, 20.0, 20.0]],
110 |                      dtype=float)
111 |     masks = np.zeros([3, 3, 3], dtype=np.uint8)
112 |     self.box_mask_list = np_box_mask_list.BoxMaskList(
113 |         box_data=boxes, mask_data=masks)
114 | 
115 |   def test_add_already_existed_field_bbox(self):
116 |     with self.assertRaises(ValueError):
117 |       self.box_mask_list.add_field('boxes',
118 |                                    np.array([[0, 0, 0, 1, 0]], dtype=float))
119 | 
120 |   def test_add_already_existed_field_mask(self):
121 |     with self.assertRaises(ValueError):
122 |       self.box_mask_list.add_field('masks',
123 |                                    np.zeros([3, 3, 3], dtype=np.uint8))
124 | 
125 |   def test_add_invalid_field_data(self):
126 |     with self.assertRaises(ValueError):
127 |       self.box_mask_list.add_field('scores', np.array([0.5, 0.7], dtype=float))
128 |     with self.assertRaises(ValueError):
129 |       self.box_mask_list.add_field('scores',
130 |                                    np.array([0.5, 0.7, 0.9, 0.1], dtype=float))
131 | 
132 |   def test_add_single_dimensional_field_data(self):
133 |     box_mask_list = self.box_mask_list
134 |     scores = np.array([0.5, 0.7, 0.9], dtype=float)
135 |     box_mask_list.add_field('scores', scores)
136 |     self.assertTrue(np.allclose(scores, self.box_mask_list.get_field('scores')))
137 | 
138 |   def test_add_multi_dimensional_field_data(self):
139 |     box_mask_list = self.box_mask_list
140 |     labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]],
141 |                       dtype=int)
142 |     box_mask_list.add_field('labels', labels)
143 |     self.assertTrue(np.allclose(labels, self.box_mask_list.get_field('labels')))
144 | 
145 |   def test_get_extra_fields(self):
146 |     box_mask_list = self.box_mask_list
147 |     self.assertItemsEqual(box_mask_list.get_extra_fields(), ['masks'])
148 | 
149 |     scores = np.array([0.5, 0.7, 0.9], dtype=float)
150 |     box_mask_list.add_field('scores', scores)
151 |     self.assertItemsEqual(box_mask_list.get_extra_fields(), ['masks', 'scores'])
152 | 
153 |     labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]],
154 |                       dtype=int)
155 |     box_mask_list.add_field('labels', labels)
156 |     self.assertItemsEqual(box_mask_list.get_extra_fields(),
157 |                           ['masks', 'scores', 'labels'])
158 | 
159 |   def test_get_coordinates(self):
160 |     y_min, x_min, y_max, x_max = self.box_mask_list.get_coordinates()
161 | 
162 |     expected_y_min = np.array([3.0, 14.0, 0.0], dtype=float)
163 |     expected_x_min = np.array([4.0, 14.0, 0.0], dtype=float)
164 |     expected_y_max = np.array([6.0, 15.0, 20.0], dtype=float)
165 |     expected_x_max = np.array([8.0, 15.0, 20.0], dtype=float)
166 | 
167 |     self.assertTrue(np.allclose(y_min, expected_y_min))
168 |     self.assertTrue(np.allclose(x_min, expected_x_min))
169 |     self.assertTrue(np.allclose(y_max, expected_y_max))
170 |     self.assertTrue(np.allclose(x_max, expected_x_max))
171 | 
172 |   def test_num_boxes(self):
173 |     boxes = np.array([[0., 0., 100., 100.], [10., 30., 50., 70.]], dtype=float)
174 |     masks = np.zeros([2, 5, 5], dtype=np.uint8)
175 |     box_mask_list = np_box_mask_list.BoxMaskList(
176 |         box_data=boxes, mask_data=masks)
177 |     expected_num_boxes = 2
178 |     self.assertEqual(box_mask_list.num_boxes(), expected_num_boxes)
179 | 
180 | 
181 | if __name__ == '__main__':
182 |   tf.test.main()
183 | 


--------------------------------------------------------------------------------
/src/object_detection/dataset_tools/oid_hierarchical_labels_expansion.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | r"""An executable to expand hierarchically image-level labels and boxes.
 16 | 
 17 | Example usage:
 18 | python models/research/object_detection/dataset_tools/\
 19 | oid_hierarchical_labels_expansion.py \
 20 | --json_hierarchy_file=<path to JSON hierarchy> \
 21 | --input_annotations=<input csv file> \
 22 | --output_annotations=<output csv file> \
 23 | --annotation_type=<1 (for boxes) or 2 (for image-level labels)>
 24 | """
 25 | 
 26 | import argparse
 27 | import json
 28 | 
 29 | 
 30 | def _update_dict(initial_dict, update):
 31 |     """Updates dictionary with update content.
 32 | 
 33 |     Args:
 34 |      initial_dict: initial dictionary.
 35 |      update: updated dictionary.
 36 |     """
 37 | 
 38 |     for key, value_list in update.items():
 39 |         if key in initial_dict:
 40 |             initial_dict[key].extend(value_list)
 41 |         else:
 42 |             initial_dict[key] = value_list
 43 | 
 44 | 
 45 | def _build_plain_hierarchy(hierarchy, skip_root=False):
 46 |     """Expands tree hierarchy representation to parent-child dictionary.
 47 | 
 48 |     Args:
 49 |      hierarchy: labels hierarchy as JSON file.
 50 |      skip_root: if true skips root from the processing (done for the case when all
 51 |        classes under hierarchy are collected under virtual node).
 52 | 
 53 |     Returns:
 54 |       keyed_parent - dictionary of parent - all its children nodes.
 55 |       keyed_child  - dictionary of children - all its parent nodes
 56 |       children - all children of the current node.
 57 |     """
 58 |     all_children = []
 59 |     all_keyed_parent = {}
 60 |     all_keyed_child = {}
 61 |     if 'Subcategory' in hierarchy:
 62 |         for node in hierarchy['Subcategory']:
 63 |             keyed_parent, keyed_child, children = _build_plain_hierarchy(node)
 64 |             # Update is not done through dict.update() since some children have multi-
 65 |             # ple parents in the hiearchy.
 66 |             _update_dict(all_keyed_parent, keyed_parent)
 67 |             _update_dict(all_keyed_child, keyed_child)
 68 |             all_children.extend(children)
 69 | 
 70 |     if not skip_root:
 71 |         all_keyed_parent[hierarchy['LabelName']] = all_children
 72 |         all_children = [hierarchy['LabelName']] + all_children
 73 |         for child, _ in all_keyed_child.items():
 74 |             all_keyed_child[child].append(hierarchy['LabelName'])
 75 |         all_keyed_child[hierarchy['LabelName']] = []
 76 | 
 77 |     return all_keyed_parent, all_keyed_child, all_children
 78 | 
 79 | 
 80 | class OIDHierarchicalLabelsExpansion(object):
 81 |     """ Main class to perform labels hierachical expansion."""
 82 | 
 83 |     def __init__(self, hierarchy):
 84 |         """Constructor.
 85 | 
 86 |         Args:
 87 |           hierarchy: labels hierarchy as JSON object.
 88 |         """
 89 | 
 90 |         self._hierarchy_keyed_parent, self._hierarchy_keyed_child, _ = (
 91 |             _build_plain_hierarchy(hierarchy, skip_root=True))
 92 | 
 93 |     def expand_boxes_from_csv(self, csv_row):
 94 |         """Expands a row containing bounding boxes from CSV file.
 95 | 
 96 |         Args:
 97 |           csv_row: a single row of Open Images released groundtruth file.
 98 | 
 99 |         Returns:
100 |           a list of strings (including the initial row) corresponding to the ground
101 |           truth expanded to multiple annotation for evaluation with Open Images
102 |           Challenge 2018 metric.
103 |         """
104 |         # Row header is expected to be exactly:
105 |         # ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,
106 |         # IsTruncated,IsGroupOf,IsDepiction,IsInside
107 |         cvs_row_splitted = csv_row.split(',')
108 |         assert len(cvs_row_splitted) == 13
109 |         result = [csv_row]
110 |         assert cvs_row_splitted[2] in self._hierarchy_keyed_child
111 |         parent_nodes = self._hierarchy_keyed_child[cvs_row_splitted[2]]
112 |         for parent_node in parent_nodes:
113 |             cvs_row_splitted[2] = parent_node
114 |             result.append(','.join(cvs_row_splitted))
115 |         return result
116 | 
117 |     def expand_labels_from_csv(self, csv_row):
118 |         """Expands a row containing bounding boxes from CSV file.
119 | 
120 |         Args:
121 |           csv_row: a single row of Open Images released groundtruth file.
122 | 
123 |         Returns:
124 |           a list of strings (including the initial row) corresponding to the ground
125 |           truth expanded to multiple annotation for evaluation with Open Images
126 |           Challenge 2018 metric.
127 |         """
128 |         # Row header is expected to be exactly:
129 |         # ImageID,Source,LabelName,Confidence
130 |         cvs_row_splited = csv_row.split(',')
131 |         assert len(cvs_row_splited) == 4
132 |         result = [csv_row]
133 |         if int(cvs_row_splited[3]) == 1:
134 |             assert cvs_row_splited[2] in self._hierarchy_keyed_child
135 |             parent_nodes = self._hierarchy_keyed_child[cvs_row_splited[2]]
136 |             for parent_node in parent_nodes:
137 |                 cvs_row_splited[2] = parent_node
138 |                 result.append(','.join(cvs_row_splited))
139 |         else:
140 |             assert cvs_row_splited[2] in self._hierarchy_keyed_parent
141 |             child_nodes = self._hierarchy_keyed_parent[cvs_row_splited[2]]
142 |             for child_node in child_nodes:
143 |                 cvs_row_splited[2] = child_node
144 |                 result.append(','.join(cvs_row_splited))
145 |         return result
146 | 
147 | 
148 | def main(parsed_args):
149 |     with open(parsed_args.json_hierarchy_file) as f:
150 |         hierarchy = json.load(f)
151 |     expansion_generator = OIDHierarchicalLabelsExpansion(hierarchy)
152 |     labels_file = False
153 |     if parsed_args.annotation_type == 2:
154 |         labels_file = True
155 |     elif parsed_args.annotation_type != 1:
156 |         print('--annotation_type expected value is 1 or 2.')
157 |         return -1
158 |     with open(parsed_args.input_annotations, 'r') as source:
159 |         with open(parsed_args.output_annotations, 'w') as target:
160 |             header = None
161 |             for line in source:
162 |                 if not header:
163 |                     target.writelines(line)
164 |                     header = line
165 |                     continue
166 |                 if labels_file:
167 |                     expanded_lines = expansion_generator.expand_labels_from_csv(line)
168 |                 else:
169 |                     expanded_lines = expansion_generator.expand_boxes_from_csv(line)
170 |                 target.writelines(expanded_lines)
171 | 
172 | 
173 | if __name__ == '__main__':
174 |     parser = argparse.ArgumentParser(
175 |         description='Hierarchically expand annotations (excluding root node).')
176 |     parser.add_argument(
177 |         '--json_hierarchy_file',
178 |         required=True,
179 |         help='Path to the file containing label hierarchy in JSON format.')
180 |     parser.add_argument(
181 |         '--input_annotations',
182 |         required=True,
183 |         help="""Path to Open Images annotations file (either bounding boxes or
184 |       image-level labels).""")
185 |     parser.add_argument(
186 |         '--output_annotations',
187 |         required=True,
188 |         help="""Path to the output file.""")
189 |     parser.add_argument(
190 |         '--annotation_type',
191 |         type=int,
192 |         required=True,
193 |         help="""Type of the input annotations: 1 - boxes, 2 - image-level
194 |       labels"""
195 |     )
196 |     args = parser.parse_args()
197 |     main(args)
198 | 


--------------------------------------------------------------------------------