├── .gitattributes
├── .github
    └── stale.yml
├── .gitignore
├── CONTRIBUTING.md
├── ISSUE_TEMPLATE.md
├── LICENSE.txt
├── README.md
├── __init__.py
├── bounding_box_utils
    ├── __init__.py
    └── bounding_box_utils.py
├── data_generator
    ├── __init__.py
    ├── data_augmentation_chain_constant_input_size.py
    ├── data_augmentation_chain_original_ssd.py
    ├── data_augmentation_chain_satellite.py
    ├── data_augmentation_chain_variable_input_size.py
    ├── object_detection_2d_data_generator.py
    ├── object_detection_2d_geometric_ops.py
    ├── object_detection_2d_image_boxes_validation_utils.py
    ├── object_detection_2d_misc_utils.py
    ├── object_detection_2d_patch_sampling_ops.py
    └── object_detection_2d_photometric_ops.py
├── eval_utils
    ├── __init__.py
    ├── average_precision_evaluator.py
    └── coco_utils.py
├── examples
    ├── fish-bike.jpg
    ├── fish_bike.jpg
    ├── ssd300_pascalVOC_pred_01.png
    ├── ssd300_pascalVOC_pred_02.png
    ├── ssd300_pascalVOC_pred_03.png
    ├── ssd300_pascalVOC_pred_04.png
    ├── ssd300_pascalVOC_pred_05.png
    ├── ssd300_pascalVOC_pred_06.png
    ├── ssd300_pascalVOC_pred_07.png
    ├── ssd300_pascalVOC_pred_08.png
    ├── ssd300_pascalVOC_pred_09.png
    ├── ssd7_udacity_traffic_pred_01.png
    ├── ssd7_udacity_traffic_pred_02.png
    ├── ssd7_udacity_traffic_pred_03.png
    ├── ssd7_udacity_traffic_pred_04.png
    ├── ssd7_udacity_traffic_pred_05.png
    ├── trained_ssd300_pascalVOC2007_test_pred_01.png
    ├── trained_ssd300_pascalVOC2007_test_pred_01_no_gt.png
    ├── trained_ssd300_pascalVOC2007_test_pred_02.png
    ├── trained_ssd300_pascalVOC2007_test_pred_02_no_gt.png
    ├── trained_ssd300_pascalVOC2007_test_pred_03.png
    ├── trained_ssd300_pascalVOC2007_test_pred_03_no_gt.png
    ├── trained_ssd300_pascalVOC2007_test_pred_04.png
    ├── trained_ssd300_pascalVOC2007_test_pred_04_no_gt.png
    ├── trained_ssd300_pascalVOC2007_test_pred_05.png
    ├── trained_ssd300_pascalVOC2007_test_pred_05_no_gt.png
    └── trained_ssd300_pascalVOC2007_test_pred_06.png
├── keras_layers
    ├── __init__.py
    ├── keras_layer_AnchorBoxes.py
    ├── keras_layer_DecodeDetections.py
    ├── keras_layer_DecodeDetectionsFast.py
    └── keras_layer_L2Normalization.py
├── keras_loss_function
    ├── __init__.py
    └── keras_ssd_loss.py
├── misc_utils
    ├── __init__.py
    └── tensor_sampling_utils.py
├── models
    ├── __init__.py
    ├── keras_ssd300.py
    ├── keras_ssd512.py
    └── keras_ssd7.py
├── ssd300_evaluation.ipynb
├── ssd300_evaluation_COCO.ipynb
├── ssd300_inference.ipynb
├── ssd300_training.ipynb
├── ssd512_inference.ipynb
├── ssd7_training.ipynb
├── ssd_encoder_decoder
    ├── __init__.py
    ├── matching_utils.py
    ├── ssd_input_encoder.py
    └── ssd_output_decoder.py
├── training_summaries
    ├── ssd300_pascal_07+12_loss_history.png
    └── ssd300_pascal_07+12_training_summary.md
└── weight_sampling_tutorial.ipynb


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-language=Python
2 | 


--------------------------------------------------------------------------------
/.github/stale.yml:
--------------------------------------------------------------------------------
 1 | # Configuration for probot-stale - https://github.com/probot/stale
 2 | 
 3 | # Number of days of inactivity before an Issue or Pull Request becomes stale
 4 | daysUntilStale: 7
 5 | # Number of days of inactivity before a stale Issue or Pull Request is closed
 6 | daysUntilClose: 7
 7 | # Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
 8 | exemptLabels:
 9 |   - pinned
10 |   - security
11 |   - "[Status] Maybe Later"
12 | # Label to use when marking as stale
13 | staleLabel: stale
14 | # Comment to post when marking as stale. Set to `false` to disable
15 | markComment: >
16 |   This issue has been automatically marked as stale because it has not had
17 |   recent activity. It will be closed if no further activity occurs. Thank you
18 |   for your contributions.
19 | # Comment to post when removing the stale label. Set to `false` to disable
20 | unmarkComment: false
21 | # Comment to post when closing a stale Issue or Pull Request. Set to `false` to disable
22 | closeComment: false
23 | # Limit to only `issues` or `pulls`
24 | # only: issues
25 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | 
28 | # PyInstaller
29 | #  Usually these files are written by a python script from a template
30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 | 
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 | 
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *,cover
47 | .hypothesis/
48 | 
49 | # Translations
50 | *.mo
51 | *.pot
52 | 
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 | 
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 | 
61 | # Scrapy stuff:
62 | .scrapy
63 | 
64 | # Sphinx documentation
65 | docs/_build/
66 | 
67 | # PyBuilder
68 | target/
69 | 
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 | .ipynb_checkpoints/
73 | 
74 | # pyenv
75 | .python-version
76 | 
77 | # celery beat schedule file
78 | celerybeat-schedule
79 | 
80 | # SageMath parsed files
81 | *.sage.py
82 | 
83 | # dotenv
84 | .env
85 | 
86 | # virtualenv
87 | .venv
88 | venv/
89 | ENV/
90 | 
91 | # Spyder project settings
92 | .spyderproject
93 | 
94 | # Rope project settings
95 | .ropeproject
96 | 
97 | # Ignore any files and directories that begin with the word "local"
98 | local*
99 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | ---
 3 | 
 4 | Contributions to this repository are welcome, but before you create a pull request, consider the following guidelines:
 5 | 
 6 | 1. The To-do list in the README of this repository defines the main topics for which contributions are welcome. If you want to contribute, ideally contribute to one of the topics listed there.
 7 | 2. If you'd like to contribute features that are not mentioned on the to-do list in the README, make sure to explain why your proposed change adds value, i.e. what relevant use case it solves. The benefit of any new feature will be compared against the cost of maintaining it and your contribution will be accepter or rejected based on this trade-off.
 8 | 3. One pull request should be about one specific feature or improvement, i.e. it should not contain multiple unrelated changes. If you want to contribute multiple features and/or improvements, create a separate pull request for every individual feature or improvement.
 9 | 3. When you create a pull request, make sure to explain properly
10 |     * why your propsed change adds value, i.e. what problem or use case it solves,
11 |     * all the API changes it will introduce, if any,
12 |     * all behavioral changes in any existing parts of the project it will introduce, if any.
13 | 4. This should go without saying, but you are responsible for updating any parts of the code or the tutorial notebooks that are affected by your introduced changes.
14 | 5. Any submitted code must conform to the coding standards and style of this repository. There is no formal guide for coding standards and style, but here are a few things to note:
15 |     * Any new modules, classes or functions must provide proper docstrings unless they are trivial. These docstrings must have sections for Arguments, Returns, and Raises (if applicable). For every argument of a function, the docstring must explain precisely what the argument does, what data type it expects, whether or not it is optional, and any requirements for the range of values it expects. The same goes for the returns. Use existing docstrings as templates.
16 |     * Naming:
17 |         * `ClassNames` consist of capitalized words without underscores.
18 |         * `module_names.py` consist of lower case words connected with underscores.
19 |         * `function_names` consist of lower case words connected with underscores.
20 |         * `variable_names` consist of lower case words connected with underscores.
21 |     * All module, class, function, and variable names must be descriptive in order to meet the goal that all code should always be as self-explanatory as possible. A longer and descriptive name is always preferable over a shorter and non-descriptive name. Abbreviations are generally to be avoided unless the full words would really make the name too long.
22 |     * More in-line comments are better than fewer in-line comments and all comments should be precise and succinct.
23 | 


--------------------------------------------------------------------------------
/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ### If you open a GitHub issue, here is the policy:
 2 | 
 3 | Your issue must be about one of the following:
 4 | 
 5 | 1. a bug,
 6 | 2. a feature request,
 7 | 3. a documentation issue, or
 8 | 4. a question that is **specific to this SSD implementation**.
 9 | 
10 | You will only get help if you adhere to the following guidelines:
11 | 
12 | * Before you open an issue, search the open **and closed** issues first. Your problem/question might already have been solved/answered before.
13 | * If you're getting unexpected behavior from code I wrote, open an issue and I'll try to help. If you're getting unexpected behavior from code **you** wrote, you'll have to fix it yourself. E.g. if you made a ton of changes to the code or the tutorials and now it doesn't work anymore, that's your own problem. I don't want to spend my time debugging your code.
14 | * Make sure you're using the latest master. If you're 30 commits behind and have a problem, the only answer you'll likely get is to pull the latest master and try again.
15 | * Read the documentation. All of it. If the answer to your problem/question can be found in the documentation, you might not get an answer, because, seriously, you could really have figured this out yourself.
16 | * If you're asking a question, it must be specific to this SSD implementation. General deep learning or object detection questions will likely get closed without an answer. E.g. a question like "How do I get the mAP of an SSD for my own dataset?" has nothing to do with this particular SSD implementation, because computing the mAP works the same way for any object detection model. You should ask such a question in an appropriate forum or on the [Data Science section of StackOverflow](https://datascience.stackexchange.com/) instead.
17 | * If you get an error:
18 |     * Provide the full stack trace of the error you're getting, not just the error message itself.
19 |     * Make sure any code you post is properly formatted as such.
20 |     * Provide any useful information about your environment, e.g.:
21 |         * Operating System
22 |         * Which commit of this repository you're on
23 |         * Keras version
24 |         * TensorFlow version
25 |     * Provide a minimal reproducible example, i.e. post code and explain clearly how you ended up with this error.
26 |     * Provide any useful information about your specific use case and parameters:
27 |         * What model are you trying to use/train?
28 |         * Describe the dataset you're using.
29 |         * List the values of any parameters you changed that might be relevant.
30 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | Copyright 2018 Pierluigi Ferrari.
  2 | 
  3 |                                 Apache License
  4 |                            Version 2.0, January 2004
  5 |                         http://www.apache.org/licenses/
  6 | 
  7 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  8 | 
  9 |    1. Definitions.
 10 | 
 11 |       "License" shall mean the terms and conditions for use, reproduction,
 12 |       and distribution as defined by Sections 1 through 9 of this document.
 13 | 
 14 |       "Licensor" shall mean the copyright owner or entity authorized by
 15 |       the copyright owner that is granting the License.
 16 | 
 17 |       "Legal Entity" shall mean the union of the acting entity and all
 18 |       other entities that control, are controlled by, or are under common
 19 |       control with that entity. For the purposes of this definition,
 20 |       "control" means (i) the power, direct or indirect, to cause the
 21 |       direction or management of such entity, whether by contract or
 22 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 23 |       outstanding shares, or (iii) beneficial ownership of such entity.
 24 | 
 25 |       "You" (or "Your") shall mean an individual or Legal Entity
 26 |       exercising permissions granted by this License.
 27 | 
 28 |       "Source" form shall mean the preferred form for making modifications,
 29 |       including but not limited to software source code, documentation
 30 |       source, and configuration files.
 31 | 
 32 |       "Object" form shall mean any form resulting from mechanical
 33 |       transformation or translation of a Source form, including but
 34 |       not limited to compiled object code, generated documentation,
 35 |       and conversions to other media types.
 36 | 
 37 |       "Work" shall mean the work of authorship, whether in Source or
 38 |       Object form, made available under the License, as indicated by a
 39 |       copyright notice that is included in or attached to the work
 40 |       (an example is provided in the Appendix below).
 41 | 
 42 |       "Derivative Works" shall mean any work, whether in Source or Object
 43 |       form, that is based on (or derived from) the Work and for which the
 44 |       editorial revisions, annotations, elaborations, or other modifications
 45 |       represent, as a whole, an original work of authorship. For the purposes
 46 |       of this License, Derivative Works shall not include works that remain
 47 |       separable from, or merely link (or bind by name) to the interfaces of,
 48 |       the Work and Derivative Works thereof.
 49 | 
 50 |       "Contribution" shall mean any work of authorship, including
 51 |       the original version of the Work and any modifications or additions
 52 |       to that Work or Derivative Works thereof, that is intentionally
 53 |       submitted to Licensor for inclusion in the Work by the copyright owner
 54 |       or by an individual or Legal Entity authorized to submit on behalf of
 55 |       the copyright owner. For the purposes of this definition, "submitted"
 56 |       means any form of electronic, verbal, or written communication sent
 57 |       to the Licensor or its representatives, including but not limited to
 58 |       communication on electronic mailing lists, source code control systems,
 59 |       and issue tracking systems that are managed by, or on behalf of, the
 60 |       Licensor for the purpose of discussing and improving the Work, but
 61 |       excluding communication that is conspicuously marked or otherwise
 62 |       designated in writing by the copyright owner as "Not a Contribution."
 63 | 
 64 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 65 |       on behalf of whom a Contribution has been received by Licensor and
 66 |       subsequently incorporated within the Work.
 67 | 
 68 |    2. Grant of Copyright License. Subject to the terms and conditions of
 69 |       this License, each Contributor hereby grants to You a perpetual,
 70 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 71 |       copyright license to reproduce, prepare Derivative Works of,
 72 |       publicly display, publicly perform, sublicense, and distribute the
 73 |       Work and such Derivative Works in Source or Object form.
 74 | 
 75 |    3. Grant of Patent License. Subject to the terms and conditions of
 76 |       this License, each Contributor hereby grants to You a perpetual,
 77 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 78 |       (except as stated in this section) patent license to make, have made,
 79 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 80 |       where such license applies only to those patent claims licensable
 81 |       by such Contributor that are necessarily infringed by their
 82 |       Contribution(s) alone or by combination of their Contribution(s)
 83 |       with the Work to which such Contribution(s) was submitted. If You
 84 |       institute patent litigation against any entity (including a
 85 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 86 |       or a Contribution incorporated within the Work constitutes direct
 87 |       or contributory patent infringement, then any patent licenses
 88 |       granted to You under this License for that Work shall terminate
 89 |       as of the date such litigation is filed.
 90 | 
 91 |    4. Redistribution. You may reproduce and distribute copies of the
 92 |       Work or Derivative Works thereof in any medium, with or without
 93 |       modifications, and in Source or Object form, provided that You
 94 |       meet the following conditions:
 95 | 
 96 |       (a) You must give any other recipients of the Work or
 97 |           Derivative Works a copy of this License; and
 98 | 
 99 |       (b) You must cause any modified files to carry prominent notices
100 |           stating that You changed the files; and
101 | 
102 |       (c) You must retain, in the Source form of any Derivative Works
103 |           that You distribute, all copyright, patent, trademark, and
104 |           attribution notices from the Source form of the Work,
105 |           excluding those notices that do not pertain to any part of
106 |           the Derivative Works; and
107 | 
108 |       (d) If the Work includes a "NOTICE" text file as part of its
109 |           distribution, then any Derivative Works that You distribute must
110 |           include a readable copy of the attribution notices contained
111 |           within such NOTICE file, excluding those notices that do not
112 |           pertain to any part of the Derivative Works, in at least one
113 |           of the following places: within a NOTICE text file distributed
114 |           as part of the Derivative Works; within the Source form or
115 |           documentation, if provided along with the Derivative Works; or,
116 |           within a display generated by the Derivative Works, if and
117 |           wherever such third-party notices normally appear. The contents
118 |           of the NOTICE file are for informational purposes only and
119 |           do not modify the License. You may add Your own attribution
120 |           notices within Derivative Works that You distribute, alongside
121 |           or as an addendum to the NOTICE text from the Work, provided
122 |           that such additional attribution notices cannot be construed
123 |           as modifying the License.
124 | 
125 |       You may add Your own copyright statement to Your modifications and
126 |       may provide additional or different license terms and conditions
127 |       for use, reproduction, or distribution of Your modifications, or
128 |       for any such Derivative Works as a whole, provided Your use,
129 |       reproduction, and distribution of the Work otherwise complies with
130 |       the conditions stated in this License.
131 | 
132 |    5. Submission of Contributions. Unless You explicitly state otherwise,
133 |       any Contribution intentionally submitted for inclusion in the Work
134 |       by You to the Licensor shall be under the terms and conditions of
135 |       this License, without any additional terms or conditions.
136 |       Notwithstanding the above, nothing herein shall supersede or modify
137 |       the terms of any separate license agreement you may have executed
138 |       with Licensor regarding such Contributions.
139 | 
140 |    6. Trademarks. This License does not grant permission to use the trade
141 |       names, trademarks, service marks, or product names of the Licensor,
142 |       except as required for reasonable and customary use in describing the
143 |       origin of the Work and reproducing the content of the NOTICE file.
144 | 
145 |    7. Disclaimer of Warranty. Unless required by applicable law or
146 |       agreed to in writing, Licensor provides the Work (and each
147 |       Contributor provides its Contributions) on an "AS IS" BASIS,
148 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149 |       implied, including, without limitation, any warranties or conditions
150 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151 |       PARTICULAR PURPOSE. You are solely responsible for determining the
152 |       appropriateness of using or redistributing the Work and assume any
153 |       risks associated with Your exercise of permissions under this License.
154 | 
155 |    8. Limitation of Liability. In no event and under no legal theory,
156 |       whether in tort (including negligence), contract, or otherwise,
157 |       unless required by applicable law (such as deliberate and grossly
158 |       negligent acts) or agreed to in writing, shall any Contributor be
159 |       liable to You for damages, including any direct, indirect, special,
160 |       incidental, or consequential damages of any character arising as a
161 |       result of this License or out of the use or inability to use the
162 |       Work (including but not limited to damages for loss of goodwill,
163 |       work stoppage, computer failure or malfunction, or any and all
164 |       other commercial damages or losses), even if such Contributor
165 |       has been advised of the possibility of such damages.
166 | 
167 |    9. Accepting Warranty or Additional Liability. While redistributing
168 |       the Work or Derivative Works thereof, You may choose to offer,
169 |       and charge a fee for, acceptance of support, warranty, indemnity,
170 |       or other liability obligations and/or rights consistent with this
171 |       License. However, in accepting such obligations, You may act only
172 |       on Your own behalf and on Your sole responsibility, not on behalf
173 |       of any other Contributor, and only if You agree to indemnify,
174 |       defend, and hold each Contributor harmless for any liability
175 |       incurred by, or claims asserted against, such Contributor by reason
176 |       of your accepting any such warranty or additional liability.
177 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/__init__.py


--------------------------------------------------------------------------------
/bounding_box_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/bounding_box_utils/__init__.py


--------------------------------------------------------------------------------
/data_generator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/data_generator/__init__.py


--------------------------------------------------------------------------------
/data_generator/data_augmentation_chain_constant_input_size.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | The data augmentation operations of the original SSD implementation.
  3 | 
  4 | Copyright (C) 2018 Pierluigi Ferrari
  5 | 
  6 | Licensed under the Apache License, Version 2.0 (the "License");
  7 | you may not use this file except in compliance with the License.
  8 | You may obtain a copy of the License at
  9 | 
 10 |    http://www.apache.org/licenses/LICENSE-2.0
 11 | 
 12 | Unless required by applicable law or agreed to in writing, software
 13 | distributed under the License is distributed on an "AS IS" BASIS,
 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | See the License for the specific language governing permissions and
 16 | limitations under the License.
 17 | '''
 18 | 
 19 | from __future__ import division
 20 | import numpy as np
 21 | 
 22 | from data_generator.object_detection_2d_photometric_ops import ConvertColor, ConvertDataType, ConvertTo3Channels, RandomBrightness, RandomContrast, RandomHue, RandomSaturation
 23 | from data_generator.object_detection_2d_geometric_ops import RandomFlip, RandomTranslate, RandomScale
 24 | from data_generator.object_detection_2d_image_boxes_validation_utils import BoundGenerator, BoxFilter, ImageValidator
 25 | 
 26 | class DataAugmentationConstantInputSize:
 27 |     '''
 28 |     Applies a chain of photometric and geometric image transformations. For documentation, please refer
 29 |     to the documentation of the individual transformations involved.
 30 | 
 31 |     Important: This augmentation chain is suitable for constant-size images only.
 32 |     '''
 33 | 
 34 |     def __init__(self,
 35 |                  random_brightness=(-48, 48, 0.5),
 36 |                  random_contrast=(0.5, 1.8, 0.5),
 37 |                  random_saturation=(0.5, 1.8, 0.5),
 38 |                  random_hue=(18, 0.5),
 39 |                  random_flip=0.5,
 40 |                  random_translate=((0.03,0.5), (0.03,0.5), 0.5),
 41 |                  random_scale=(0.5, 2.0, 0.5),
 42 |                  n_trials_max=3,
 43 |                  clip_boxes=True,
 44 |                  overlap_criterion='area',
 45 |                  bounds_box_filter=(0.3, 1.0),
 46 |                  bounds_validator=(0.5, 1.0),
 47 |                  n_boxes_min=1,
 48 |                  background=(0,0,0),
 49 |                  labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
 50 | 
 51 |         if (random_scale[0] >= 1) or (random_scale[1] <= 1):
 52 |             raise ValueError("This sequence of transformations only makes sense if the minimum scaling factor is <1 and the maximum scaling factor is >1.")
 53 | 
 54 |         self.n_trials_max = n_trials_max
 55 |         self.clip_boxes = clip_boxes
 56 |         self.overlap_criterion = overlap_criterion
 57 |         self.bounds_box_filter = bounds_box_filter
 58 |         self.bounds_validator = bounds_validator
 59 |         self.n_boxes_min = n_boxes_min
 60 |         self.background = background
 61 |         self.labels_format = labels_format
 62 | 
 63 |         # Determines which boxes are kept in an image after the transformations have been applied.
 64 |         self.box_filter = BoxFilter(check_overlap=True,
 65 |                                     check_min_area=True,
 66 |                                     check_degenerate=True,
 67 |                                     overlap_criterion=self.overlap_criterion,
 68 |                                     overlap_bounds=self.bounds_box_filter,
 69 |                                     min_area=16,
 70 |                                     labels_format=self.labels_format)
 71 | 
 72 |         # Determines whether the result of the transformations is a valid training image.
 73 |         self.image_validator = ImageValidator(overlap_criterion=self.overlap_criterion,
 74 |                                               bounds=self.bounds_validator,
 75 |                                               n_boxes_min=self.n_boxes_min,
 76 |                                               labels_format=self.labels_format)
 77 | 
 78 |         # Utility distortions
 79 |         self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV')
 80 |         self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB')
 81 |         self.convert_to_float32 = ConvertDataType(to='float32')
 82 |         self.convert_to_uint8 = ConvertDataType(to='uint8')
 83 |         self.convert_to_3_channels = ConvertTo3Channels() # Make sure all images end up having 3 channels.
 84 | 
 85 |         # Photometric transformations
 86 |         self.random_brightness = RandomBrightness(lower=random_brightness[0], upper=random_brightness[1], prob=random_brightness[2])
 87 |         self.random_contrast = RandomContrast(lower=random_contrast[0], upper=random_contrast[1], prob=random_contrast[2])
 88 |         self.random_saturation = RandomSaturation(lower=random_saturation[0], upper=random_saturation[1], prob=random_saturation[2])
 89 |         self.random_hue = RandomHue(max_delta=random_hue[0], prob=random_hue[1])
 90 | 
 91 |         # Geometric transformations
 92 |         self.random_flip = RandomFlip(dim='horizontal', prob=random_flip, labels_format=self.labels_format)
 93 |         self.random_translate = RandomTranslate(dy_minmax=random_translate[0],
 94 |                                                 dx_minmax=random_translate[1],
 95 |                                                 prob=random_translate[2],
 96 |                                                 clip_boxes=self.clip_boxes,
 97 |                                                 box_filter=self.box_filter,
 98 |                                                 image_validator=self.image_validator,
 99 |                                                 n_trials_max=self.n_trials_max,
100 |                                                 background=self.background,
101 |                                                 labels_format=self.labels_format)
102 |         self.random_zoom_in = RandomScale(min_factor=1.0,
103 |                                           max_factor=random_scale[1],
104 |                                           prob=random_scale[2],
105 |                                           clip_boxes=self.clip_boxes,
106 |                                           box_filter=self.box_filter,
107 |                                           image_validator=self.image_validator,
108 |                                           n_trials_max=self.n_trials_max,
109 |                                           background=self.background,
110 |                                           labels_format=self.labels_format)
111 |         self.random_zoom_out = RandomScale(min_factor=random_scale[0],
112 |                                            max_factor=1.0,
113 |                                            prob=random_scale[2],
114 |                                            clip_boxes=self.clip_boxes,
115 |                                            box_filter=self.box_filter,
116 |                                            image_validator=self.image_validator,
117 |                                            n_trials_max=self.n_trials_max,
118 |                                            background=self.background,
119 |                                            labels_format=self.labels_format)
120 | 
121 |         # If we zoom in, do translation before scaling.
122 |         self.sequence1 = [self.convert_to_3_channels,
123 |                           self.convert_to_float32,
124 |                           self.random_brightness,
125 |                           self.random_contrast,
126 |                           self.convert_to_uint8,
127 |                           self.convert_RGB_to_HSV,
128 |                           self.convert_to_float32,
129 |                           self.random_saturation,
130 |                           self.random_hue,
131 |                           self.convert_to_uint8,
132 |                           self.convert_HSV_to_RGB,
133 |                           self.random_translate,
134 |                           self.random_zoom_in,
135 |                           self.random_flip]
136 | 
137 |         # If we zoom out, do scaling before translation.
138 |         self.sequence2 = [self.convert_to_3_channels,
139 |                           self.convert_to_float32,
140 |                           self.random_brightness,
141 |                           self.convert_to_uint8,
142 |                           self.convert_RGB_to_HSV,
143 |                           self.convert_to_float32,
144 |                           self.random_saturation,
145 |                           self.random_hue,
146 |                           self.convert_to_uint8,
147 |                           self.convert_HSV_to_RGB,
148 |                           self.convert_to_float32,
149 |                           self.random_contrast,
150 |                           self.convert_to_uint8,
151 |                           self.random_zoom_out,
152 |                           self.random_translate,
153 |                           self.random_flip]
154 | 
155 |     def __call__(self, image, labels=None):
156 | 
157 |         self.random_translate.labels_format = self.labels_format
158 |         self.random_zoom_in.labels_format = self.labels_format
159 |         self.random_zoom_out.labels_format = self.labels_format
160 |         self.random_flip.labels_format = self.labels_format
161 | 
162 |         # Choose sequence 1 with probability 0.5.
163 |         if np.random.choice(2):
164 | 
165 |             if not (labels is None):
166 |                 for transform in self.sequence1:
167 |                     image, labels = transform(image, labels)
168 |                 return image, labels
169 |             else:
170 |                 for transform in self.sequence1:
171 |                     image = transform(image)
172 |                 return image
173 |         # Choose sequence 2 with probability 0.5.
174 |         else:
175 | 
176 |             if not (labels is None):
177 |                 for transform in self.sequence2:
178 |                     image, labels = transform(image, labels)
179 |                 return image, labels
180 |             else:
181 |                 for transform in self.sequence2:
182 |                     image = transform(image)
183 |                 return image
184 | 


--------------------------------------------------------------------------------
/data_generator/data_augmentation_chain_original_ssd.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | The data augmentation operations of the original SSD implementation.
  3 | 
  4 | Copyright (C) 2018 Pierluigi Ferrari
  5 | 
  6 | Licensed under the Apache License, Version 2.0 (the "License");
  7 | you may not use this file except in compliance with the License.
  8 | You may obtain a copy of the License at
  9 | 
 10 |    http://www.apache.org/licenses/LICENSE-2.0
 11 | 
 12 | Unless required by applicable law or agreed to in writing, software
 13 | distributed under the License is distributed on an "AS IS" BASIS,
 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | See the License for the specific language governing permissions and
 16 | limitations under the License.
 17 | '''
 18 | 
 19 | from __future__ import division
 20 | import numpy as np
 21 | import cv2
 22 | import inspect
 23 | 
 24 | from data_generator.object_detection_2d_photometric_ops import ConvertColor, ConvertDataType, ConvertTo3Channels, RandomBrightness, RandomContrast, RandomHue, RandomSaturation, RandomChannelSwap
 25 | from data_generator.object_detection_2d_patch_sampling_ops import PatchCoordinateGenerator, RandomPatch, RandomPatchInf
 26 | from data_generator.object_detection_2d_geometric_ops import ResizeRandomInterp, RandomFlip
 27 | from data_generator.object_detection_2d_image_boxes_validation_utils import BoundGenerator, BoxFilter, ImageValidator
 28 | 
 29 | class SSDRandomCrop:
 30 |     '''
 31 |     Performs the same random crops as defined by the `batch_sampler` instructions
 32 |     of the original Caffe implementation of SSD. A description of this random cropping
 33 |     strategy can also be found in the data augmentation section of the paper:
 34 |     https://arxiv.org/abs/1512.02325
 35 |     '''
 36 | 
 37 |     def __init__(self, labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
 38 |         '''
 39 |         Arguments:
 40 |             labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
 41 |                 of an image contains which bounding box coordinate. The dictionary maps at least the keywords
 42 |                 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
 43 |         '''
 44 | 
 45 |         self.labels_format = labels_format
 46 | 
 47 |         # This randomly samples one of the lower IoU bounds defined
 48 |         # by the `sample_space` every time it is called.
 49 |         self.bound_generator = BoundGenerator(sample_space=((None, None),
 50 |                                                             (0.1, None),
 51 |                                                             (0.3, None),
 52 |                                                             (0.5, None),
 53 |                                                             (0.7, None),
 54 |                                                             (0.9, None)),
 55 |                                               weights=None)
 56 | 
 57 |         # Produces coordinates for candidate patches such that the height
 58 |         # and width of the patches are between 0.3 and 1.0 of the height
 59 |         # and width of the respective image and the aspect ratio of the
 60 |         # patches is between 0.5 and 2.0.
 61 |         self.patch_coord_generator = PatchCoordinateGenerator(must_match='h_w',
 62 |                                                               min_scale=0.3,
 63 |                                                               max_scale=1.0,
 64 |                                                               scale_uniformly=False,
 65 |                                                               min_aspect_ratio = 0.5,
 66 |                                                               max_aspect_ratio = 2.0)
 67 | 
 68 |         # Filters out boxes whose center point does not lie within the
 69 |         # chosen patches.
 70 |         self.box_filter = BoxFilter(check_overlap=True,
 71 |                                     check_min_area=False,
 72 |                                     check_degenerate=False,
 73 |                                     overlap_criterion='center_point',
 74 |                                     labels_format=self.labels_format)
 75 | 
 76 |         # Determines whether a given patch is considered a valid patch.
 77 |         # Defines a patch to be valid if at least one ground truth bounding box
 78 |         # (n_boxes_min == 1) has an IoU overlap with the patch that
 79 |         # meets the requirements defined by `bound_generator`.
 80 |         self.image_validator = ImageValidator(overlap_criterion='iou',
 81 |                                               n_boxes_min=1,
 82 |                                               labels_format=self.labels_format,
 83 |                                               border_pixels='half')
 84 | 
 85 |         # Performs crops according to the parameters set in the objects above.
 86 |         # Runs until either a valid patch is found or the original input image
 87 |         # is returned unaltered. Runs a maximum of 50 trials to find a valid
 88 |         # patch for each new sampled IoU threshold. Every 50 trials, the original
 89 |         # image is returned as is with probability (1 - prob) = 0.143.
 90 |         self.random_crop = RandomPatchInf(patch_coord_generator=self.patch_coord_generator,
 91 |                                           box_filter=self.box_filter,
 92 |                                           image_validator=self.image_validator,
 93 |                                           bound_generator=self.bound_generator,
 94 |                                           n_trials_max=50,
 95 |                                           clip_boxes=True,
 96 |                                           prob=0.857,
 97 |                                           labels_format=self.labels_format)
 98 | 
 99 |     def __call__(self, image, labels=None, return_inverter=False):
100 |         self.random_crop.labels_format = self.labels_format
101 |         return self.random_crop(image, labels, return_inverter)
102 | 
103 | class SSDExpand:
104 |     '''
105 |     Performs the random image expansion as defined by the `train_transform_param` instructions
106 |     of the original Caffe implementation of SSD. A description of this expansion strategy
107 |     can also be found in section 3.6 ("Data Augmentation for Small Object Accuracy") of the paper:
108 |     https://arxiv.org/abs/1512.02325
109 |     '''
110 | 
111 |     def __init__(self, background=(123, 117, 104), labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
112 |         '''
113 |         Arguments:
114 |             background (list/tuple, optional): A 3-tuple specifying the RGB color value of the
115 |                 background pixels of the translated images.
116 |             labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
117 |                 of an image contains which bounding box coordinate. The dictionary maps at least the keywords
118 |                 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
119 |         '''
120 | 
121 |         self.labels_format = labels_format
122 | 
123 |         # Generate coordinates for patches that are between 1.0 and 4.0 times
124 |         # the size of the input image in both spatial dimensions.
125 |         self.patch_coord_generator = PatchCoordinateGenerator(must_match='h_w',
126 |                                                               min_scale=1.0,
127 |                                                               max_scale=4.0,
128 |                                                               scale_uniformly=True)
129 | 
130 |         # With probability 0.5, place the input image randomly on a canvas filled with
131 |         # mean color values according to the parameters set above. With probability 0.5,
132 |         # return the input image unaltered.
133 |         self.expand = RandomPatch(patch_coord_generator=self.patch_coord_generator,
134 |                                   box_filter=None,
135 |                                   image_validator=None,
136 |                                   n_trials_max=1,
137 |                                   clip_boxes=False,
138 |                                   prob=0.5,
139 |                                   background=background,
140 |                                   labels_format=self.labels_format)
141 | 
142 |     def __call__(self, image, labels=None, return_inverter=False):
143 |         self.expand.labels_format = self.labels_format
144 |         return self.expand(image, labels, return_inverter)
145 | 
146 | class SSDPhotometricDistortions:
147 |     '''
148 |     Performs the photometric distortions defined by the `train_transform_param` instructions
149 |     of the original Caffe implementation of SSD.
150 |     '''
151 | 
152 |     def __init__(self):
153 | 
154 |         self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV')
155 |         self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB')
156 |         self.convert_to_float32 = ConvertDataType(to='float32')
157 |         self.convert_to_uint8 = ConvertDataType(to='uint8')
158 |         self.convert_to_3_channels = ConvertTo3Channels()
159 |         self.random_brightness = RandomBrightness(lower=-32, upper=32, prob=0.5)
160 |         self.random_contrast = RandomContrast(lower=0.5, upper=1.5, prob=0.5)
161 |         self.random_saturation = RandomSaturation(lower=0.5, upper=1.5, prob=0.5)
162 |         self.random_hue = RandomHue(max_delta=18, prob=0.5)
163 |         self.random_channel_swap = RandomChannelSwap(prob=0.0)
164 | 
165 |         self.sequence1 = [self.convert_to_3_channels,
166 |                           self.convert_to_float32,
167 |                           self.random_brightness,
168 |                           self.random_contrast,
169 |                           self.convert_to_uint8,
170 |                           self.convert_RGB_to_HSV,
171 |                           self.convert_to_float32,
172 |                           self.random_saturation,
173 |                           self.random_hue,
174 |                           self.convert_to_uint8,
175 |                           self.convert_HSV_to_RGB,
176 |                           self.random_channel_swap]
177 | 
178 |         self.sequence2 = [self.convert_to_3_channels,
179 |                           self.convert_to_float32,
180 |                           self.random_brightness,
181 |                           self.convert_to_uint8,
182 |                           self.convert_RGB_to_HSV,
183 |                           self.convert_to_float32,
184 |                           self.random_saturation,
185 |                           self.random_hue,
186 |                           self.convert_to_uint8,
187 |                           self.convert_HSV_to_RGB,
188 |                           self.convert_to_float32,
189 |                           self.random_contrast,
190 |                           self.convert_to_uint8,
191 |                           self.random_channel_swap]
192 | 
193 |     def __call__(self, image, labels):
194 | 
195 |         # Choose sequence 1 with probability 0.5.
196 |         if np.random.choice(2):
197 | 
198 |             for transform in self.sequence1:
199 |                 image, labels = transform(image, labels)
200 |             return image, labels
201 |         # Choose sequence 2 with probability 0.5.
202 |         else:
203 | 
204 |             for transform in self.sequence2:
205 |                 image, labels = transform(image, labels)
206 |             return image, labels
207 | 
208 | class SSDDataAugmentation:
209 |     '''
210 |     Reproduces the data augmentation pipeline used in the training of the original
211 |     Caffe implementation of SSD.
212 |     '''
213 | 
214 |     def __init__(self,
215 |                  img_height=300,
216 |                  img_width=300,
217 |                  background=(123, 117, 104),
218 |                  labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
219 |         '''
220 |         Arguments:
221 |             height (int): The desired height of the output images in pixels.
222 |             width (int): The desired width of the output images in pixels.
223 |             background (list/tuple, optional): A 3-tuple specifying the RGB color value of the
224 |                 background pixels of the translated images.
225 |             labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
226 |                 of an image contains which bounding box coordinate. The dictionary maps at least the keywords
227 |                 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
228 |         '''
229 | 
230 |         self.labels_format = labels_format
231 | 
232 |         self.photometric_distortions = SSDPhotometricDistortions()
233 |         self.expand = SSDExpand(background=background, labels_format=self.labels_format)
234 |         self.random_crop = SSDRandomCrop(labels_format=self.labels_format)
235 |         self.random_flip = RandomFlip(dim='horizontal', prob=0.5, labels_format=self.labels_format)
236 | 
237 |         # This box filter makes sure that the resized images don't contain any degenerate boxes.
238 |         # Resizing the images could lead the boxes to becomes smaller. For boxes that are already
239 |         # pretty small, that might result in boxes with height and/or width zero, which we obviously
240 |         # cannot allow.
241 |         self.box_filter = BoxFilter(check_overlap=False,
242 |                                     check_min_area=False,
243 |                                     check_degenerate=True,
244 |                                     labels_format=self.labels_format)
245 | 
246 |         self.resize = ResizeRandomInterp(height=img_height,
247 |                                          width=img_width,
248 |                                          interpolation_modes=[cv2.INTER_NEAREST,
249 |                                                               cv2.INTER_LINEAR,
250 |                                                               cv2.INTER_CUBIC,
251 |                                                               cv2.INTER_AREA,
252 |                                                               cv2.INTER_LANCZOS4],
253 |                                          box_filter=self.box_filter,
254 |                                          labels_format=self.labels_format)
255 | 
256 |         self.sequence = [self.photometric_distortions,
257 |                          self.expand,
258 |                          self.random_crop,
259 |                          self.random_flip,
260 |                          self.resize]
261 | 
262 |     def __call__(self, image, labels, return_inverter=False):
263 |         self.expand.labels_format = self.labels_format
264 |         self.random_crop.labels_format = self.labels_format
265 |         self.random_flip.labels_format = self.labels_format
266 |         self.resize.labels_format = self.labels_format
267 | 
268 |         inverters = []
269 | 
270 |         for transform in self.sequence:
271 |             if return_inverter and ('return_inverter' in inspect.signature(transform).parameters):
272 |                 image, labels, inverter = transform(image, labels, return_inverter=True)
273 |                 inverters.append(inverter)
274 |             else:
275 |                 image, labels = transform(image, labels)
276 | 
277 |         if return_inverter:
278 |             return image, labels, inverters[::-1]
279 |         else:
280 |             return image, labels
281 | 


--------------------------------------------------------------------------------
/data_generator/data_augmentation_chain_satellite.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | A data augmentation pipeline for datasets in bird's eye view, i.e. where there is
  3 | no "up" or "down" in the images.
  4 | 
  5 | Copyright (C) 2018 Pierluigi Ferrari
  6 | 
  7 | Licensed under the Apache License, Version 2.0 (the "License");
  8 | you may not use this file except in compliance with the License.
  9 | You may obtain a copy of the License at
 10 | 
 11 |    http://www.apache.org/licenses/LICENSE-2.0
 12 | 
 13 | Unless required by applicable law or agreed to in writing, software
 14 | distributed under the License is distributed on an "AS IS" BASIS,
 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | See the License for the specific language governing permissions and
 17 | limitations under the License.
 18 | '''
 19 | 
 20 | from __future__ import division
 21 | import numpy as np
 22 | 
 23 | from data_generator.object_detection_2d_photometric_ops import ConvertColor, ConvertDataType, ConvertTo3Channels, RandomBrightness, RandomContrast, RandomHue, RandomSaturation
 24 | from data_generator.object_detection_2d_geometric_ops import Resize, RandomFlip, RandomRotate
 25 | from data_generator.object_detection_2d_patch_sampling_ops import PatchCoordinateGenerator, RandomPatch
 26 | from data_generator.object_detection_2d_image_boxes_validation_utils import BoxFilter, ImageValidator
 27 | 
 28 | class DataAugmentationSatellite:
 29 |     '''
 30 |     A data augmentation pipeline for datasets in bird's eye view, i.e. where there is
 31 |     no "up" or "down" in the images.
 32 | 
 33 |     Applies a chain of photometric and geometric image transformations. For documentation, please refer
 34 |     to the documentation of the individual transformations involved.
 35 |     '''
 36 | 
 37 |     def __init__(self,
 38 |                  resize_height,
 39 |                  resize_width,
 40 |                  random_brightness=(-48, 48, 0.5),
 41 |                  random_contrast=(0.5, 1.8, 0.5),
 42 |                  random_saturation=(0.5, 1.8, 0.5),
 43 |                  random_hue=(18, 0.5),
 44 |                  random_flip=0.5,
 45 |                  random_rotate=([90, 180, 270], 0.5),
 46 |                  min_scale=0.3,
 47 |                  max_scale=2.0,
 48 |                  min_aspect_ratio = 0.8,
 49 |                  max_aspect_ratio = 1.25,
 50 |                  n_trials_max=3,
 51 |                  clip_boxes=True,
 52 |                  overlap_criterion='area',
 53 |                  bounds_box_filter=(0.3, 1.0),
 54 |                  bounds_validator=(0.5, 1.0),
 55 |                  n_boxes_min=1,
 56 |                  background=(0,0,0),
 57 |                  labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
 58 | 
 59 |         self.n_trials_max = n_trials_max
 60 |         self.clip_boxes = clip_boxes
 61 |         self.overlap_criterion = overlap_criterion
 62 |         self.bounds_box_filter = bounds_box_filter
 63 |         self.bounds_validator = bounds_validator
 64 |         self.n_boxes_min = n_boxes_min
 65 |         self.background = background
 66 |         self.labels_format = labels_format
 67 | 
 68 |         # Determines which boxes are kept in an image after the transformations have been applied.
 69 |         self.box_filter_patch = BoxFilter(check_overlap=True,
 70 |                                           check_min_area=False,
 71 |                                           check_degenerate=False,
 72 |                                           overlap_criterion=self.overlap_criterion,
 73 |                                           overlap_bounds=self.bounds_box_filter,
 74 |                                           labels_format=self.labels_format)
 75 | 
 76 |         self.box_filter_resize = BoxFilter(check_overlap=False,
 77 |                                            check_min_area=True,
 78 |                                            check_degenerate=True,
 79 |                                            min_area=16,
 80 |                                            labels_format=self.labels_format)
 81 | 
 82 |         # Determines whether the result of the transformations is a valid training image.
 83 |         self.image_validator = ImageValidator(overlap_criterion=self.overlap_criterion,
 84 |                                               bounds=self.bounds_validator,
 85 |                                               n_boxes_min=self.n_boxes_min,
 86 |                                               labels_format=self.labels_format)
 87 | 
 88 |         # Utility transformations
 89 |         self.convert_to_3_channels  = ConvertTo3Channels() # Make sure all images end up having 3 channels.
 90 |         self.convert_RGB_to_HSV     = ConvertColor(current='RGB', to='HSV')
 91 |         self.convert_HSV_to_RGB     = ConvertColor(current='HSV', to='RGB')
 92 |         self.convert_to_float32     = ConvertDataType(to='float32')
 93 |         self.convert_to_uint8       = ConvertDataType(to='uint8')
 94 |         self.resize                 = Resize(height=resize_height,
 95 |                                              width=resize_width,
 96 |                                              box_filter=self.box_filter_resize,
 97 |                                              labels_format=self.labels_format)
 98 | 
 99 |         # Photometric transformations
100 |         self.random_brightness      = RandomBrightness(lower=random_brightness[0], upper=random_brightness[1], prob=random_brightness[2])
101 |         self.random_contrast        = RandomContrast(lower=random_contrast[0], upper=random_contrast[1], prob=random_contrast[2])
102 |         self.random_saturation      = RandomSaturation(lower=random_saturation[0], upper=random_saturation[1], prob=random_saturation[2])
103 |         self.random_hue             = RandomHue(max_delta=random_hue[0], prob=random_hue[1])
104 | 
105 |         # Geometric transformations
106 |         self.random_horizontal_flip = RandomFlip(dim='horizontal', prob=random_flip, labels_format=self.labels_format)
107 |         self.random_vertical_flip   = RandomFlip(dim='vertical', prob=random_flip, labels_format=self.labels_format)
108 |         self.random_rotate          = RandomRotate(angles=random_rotate[0], prob=random_rotate[1], labels_format=self.labels_format)
109 |         self.patch_coord_generator  = PatchCoordinateGenerator(must_match='w_ar',
110 |                                                                min_scale=min_scale,
111 |                                                                max_scale=max_scale,
112 |                                                                scale_uniformly=False,
113 |                                                                min_aspect_ratio = min_aspect_ratio,
114 |                                                                max_aspect_ratio = max_aspect_ratio)
115 |         self.random_patch           = RandomPatch(patch_coord_generator=self.patch_coord_generator,
116 |                                                   box_filter=self.box_filter_patch,
117 |                                                   image_validator=self.image_validator,
118 |                                                   n_trials_max=self.n_trials_max,
119 |                                                   clip_boxes=self.clip_boxes,
120 |                                                   prob=1.0,
121 |                                                   can_fail=False,
122 |                                                   labels_format=self.labels_format)
123 | 
124 |         # Define the processing chain.
125 |         self.transformations = [self.convert_to_3_channels,
126 |                                 self.convert_to_float32,
127 |                                 self.random_brightness,
128 |                                 self.random_contrast,
129 |                                 self.convert_to_uint8,
130 |                                 self.convert_RGB_to_HSV,
131 |                                 self.convert_to_float32,
132 |                                 self.random_saturation,
133 |                                 self.random_hue,
134 |                                 self.convert_to_uint8,
135 |                                 self.convert_HSV_to_RGB,
136 |                                 self.random_horizontal_flip,
137 |                                 self.random_vertical_flip,
138 |                                 self.random_rotate,
139 |                                 self.random_patch,
140 |                                 self.resize]
141 | 
142 |     def __call__(self, image, labels=None):
143 | 
144 |         self.random_patch.labels_format = self.labels_format
145 |         self.random_horizontal_flip.labels_format = self.labels_format
146 |         self.random_vertical_flip.labels_format = self.labels_format
147 |         self.random_rotate.labels_format = self.labels_format
148 |         self.resize.labels_format = self.labels_format
149 | 
150 |         if not (labels is None):
151 |             for transform in self.transformations:
152 |                 image, labels = transform(image, labels)
153 |             return image, labels
154 |         else:
155 |             for transform in self.sequence1:
156 |                 image = transform(image)
157 |             return image
158 | 


--------------------------------------------------------------------------------
/data_generator/data_augmentation_chain_variable_input_size.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | A data augmentation pipeline suitable for variable-size images that produces effects
  3 | that are similar (but not identical) to those of the original SSD data augmentation
  4 | pipeline while being faster.
  5 | 
  6 | Copyright (C) 2018 Pierluigi Ferrari
  7 | 
  8 | Licensed under the Apache License, Version 2.0 (the "License");
  9 | you may not use this file except in compliance with the License.
 10 | You may obtain a copy of the License at
 11 | 
 12 |    http://www.apache.org/licenses/LICENSE-2.0
 13 | 
 14 | Unless required by applicable law or agreed to in writing, software
 15 | distributed under the License is distributed on an "AS IS" BASIS,
 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 | See the License for the specific language governing permissions and
 18 | limitations under the License.
 19 | '''
 20 | 
 21 | from __future__ import division
 22 | import numpy as np
 23 | 
 24 | from data_generator.object_detection_2d_photometric_ops import ConvertColor, ConvertDataType, ConvertTo3Channels, RandomBrightness, RandomContrast, RandomHue, RandomSaturation
 25 | from data_generator.object_detection_2d_geometric_ops import Resize, RandomFlip
 26 | from data_generator.object_detection_2d_patch_sampling_ops import PatchCoordinateGenerator, RandomPatch
 27 | from data_generator.object_detection_2d_image_boxes_validation_utils import BoxFilter, ImageValidator
 28 | 
 29 | class DataAugmentationVariableInputSize:
 30 |     '''
 31 |     A data augmentation pipeline suitable for variable-size images that produces effects
 32 |     that are similar (but not identical!) to those of the original SSD data augmentation
 33 |     pipeline while being faster.
 34 | 
 35 |     Applies a chain of photometric and geometric image transformations. For documentation, please refer
 36 |     to the documentation of the individual transformations involved.
 37 |     '''
 38 | 
 39 |     def __init__(self,
 40 |                  resize_height,
 41 |                  resize_width,
 42 |                  random_brightness=(-48, 48, 0.5),
 43 |                  random_contrast=(0.5, 1.8, 0.5),
 44 |                  random_saturation=(0.5, 1.8, 0.5),
 45 |                  random_hue=(18, 0.5),
 46 |                  random_flip=0.5,
 47 |                  min_scale=0.3,
 48 |                  max_scale=2.0,
 49 |                  min_aspect_ratio = 0.5,
 50 |                  max_aspect_ratio = 2.0,
 51 |                  n_trials_max=3,
 52 |                  clip_boxes=True,
 53 |                  overlap_criterion='area',
 54 |                  bounds_box_filter=(0.3, 1.0),
 55 |                  bounds_validator=(0.5, 1.0),
 56 |                  n_boxes_min=1,
 57 |                  background=(0,0,0),
 58 |                  labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
 59 | 
 60 |         self.n_trials_max = n_trials_max
 61 |         self.clip_boxes = clip_boxes
 62 |         self.overlap_criterion = overlap_criterion
 63 |         self.bounds_box_filter = bounds_box_filter
 64 |         self.bounds_validator = bounds_validator
 65 |         self.n_boxes_min = n_boxes_min
 66 |         self.background = background
 67 |         self.labels_format = labels_format
 68 | 
 69 |         # Determines which boxes are kept in an image after the transformations have been applied.
 70 |         self.box_filter_patch = BoxFilter(check_overlap=True,
 71 |                                           check_min_area=False,
 72 |                                           check_degenerate=False,
 73 |                                           overlap_criterion=self.overlap_criterion,
 74 |                                           overlap_bounds=self.bounds_box_filter,
 75 |                                           labels_format=self.labels_format)
 76 | 
 77 |         self.box_filter_resize = BoxFilter(check_overlap=False,
 78 |                                            check_min_area=True,
 79 |                                            check_degenerate=True,
 80 |                                            min_area=16,
 81 |                                            labels_format=self.labels_format)
 82 | 
 83 |         # Determines whether the result of the transformations is a valid training image.
 84 |         self.image_validator = ImageValidator(overlap_criterion=self.overlap_criterion,
 85 |                                               bounds=self.bounds_validator,
 86 |                                               n_boxes_min=self.n_boxes_min,
 87 |                                               labels_format=self.labels_format)
 88 | 
 89 |         # Utility transformations
 90 |         self.convert_to_3_channels = ConvertTo3Channels() # Make sure all images end up having 3 channels.
 91 |         self.convert_RGB_to_HSV    = ConvertColor(current='RGB', to='HSV')
 92 |         self.convert_HSV_to_RGB    = ConvertColor(current='HSV', to='RGB')
 93 |         self.convert_to_float32    = ConvertDataType(to='float32')
 94 |         self.convert_to_uint8      = ConvertDataType(to='uint8')
 95 |         self.resize                = Resize(height=resize_height,
 96 |                                             width=resize_width,
 97 |                                             box_filter=self.box_filter_resize,
 98 |                                             labels_format=self.labels_format)
 99 | 
100 |         # Photometric transformations
101 |         self.random_brightness     = RandomBrightness(lower=random_brightness[0], upper=random_brightness[1], prob=random_brightness[2])
102 |         self.random_contrast       = RandomContrast(lower=random_contrast[0], upper=random_contrast[1], prob=random_contrast[2])
103 |         self.random_saturation     = RandomSaturation(lower=random_saturation[0], upper=random_saturation[1], prob=random_saturation[2])
104 |         self.random_hue            = RandomHue(max_delta=random_hue[0], prob=random_hue[1])
105 | 
106 |         # Geometric transformations
107 |         self.random_flip           = RandomFlip(dim='horizontal', prob=random_flip, labels_format=self.labels_format)
108 |         self.patch_coord_generator = PatchCoordinateGenerator(must_match='w_ar',
109 |                                                               min_scale=min_scale,
110 |                                                               max_scale=max_scale,
111 |                                                               scale_uniformly=False,
112 |                                                               min_aspect_ratio = min_aspect_ratio,
113 |                                                               max_aspect_ratio = max_aspect_ratio)
114 |         self.random_patch          = RandomPatch(patch_coord_generator=self.patch_coord_generator,
115 |                                                  box_filter=self.box_filter_patch,
116 |                                                  image_validator=self.image_validator,
117 |                                                  n_trials_max=self.n_trials_max,
118 |                                                  clip_boxes=self.clip_boxes,
119 |                                                  prob=1.0,
120 |                                                  can_fail=False,
121 |                                                  labels_format=self.labels_format)
122 | 
123 |         # Define the processing chain
124 |         self.transformations = [self.convert_to_3_channels,
125 |                                 self.convert_to_float32,
126 |                                 self.random_brightness,
127 |                                 self.random_contrast,
128 |                                 self.convert_to_uint8,
129 |                                 self.convert_RGB_to_HSV,
130 |                                 self.convert_to_float32,
131 |                                 self.random_saturation,
132 |                                 self.random_hue,
133 |                                 self.convert_to_uint8,
134 |                                 self.convert_HSV_to_RGB,
135 |                                 self.random_patch,
136 |                                 self.random_flip,
137 |                                 self.resize]
138 | 
139 |     def __call__(self, image, labels=None):
140 | 
141 |         self.random_patch.labels_format = self.labels_format
142 |         self.random_flip.labels_format = self.labels_format
143 |         self.resize.labels_format = self.labels_format
144 | 
145 |         if not (labels is None):
146 |             for transform in self.transformations:
147 |                 image, labels = transform(image, labels)
148 |             return image, labels
149 |         else:
150 |             for transform in self.sequence1:
151 |                 image = transform(image)
152 |             return image
153 | 


--------------------------------------------------------------------------------
/data_generator/object_detection_2d_image_boxes_validation_utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Utilities for 2D object detection related to answering the following questions:
  3 | 1. Given an image size and bounding boxes, which bounding boxes meet certain
  4 |    requirements with respect to the image size?
  5 | 2. Given an image size and bounding boxes, is an image of that size valid with
  6 |    respect to the bounding boxes according to certain requirements?
  7 | 
  8 | Copyright (C) 2018 Pierluigi Ferrari
  9 | 
 10 | Licensed under the Apache License, Version 2.0 (the "License");
 11 | you may not use this file except in compliance with the License.
 12 | You may obtain a copy of the License at
 13 | 
 14 |    http://www.apache.org/licenses/LICENSE-2.0
 15 | 
 16 | Unless required by applicable law or agreed to in writing, software
 17 | distributed under the License is distributed on an "AS IS" BASIS,
 18 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 19 | See the License for the specific language governing permissions and
 20 | limitations under the License.
 21 | '''
 22 | 
 23 | from __future__ import division
 24 | import numpy as np
 25 | 
 26 | from bounding_box_utils.bounding_box_utils import iou
 27 | 
 28 | class BoundGenerator:
 29 |     '''
 30 |     Generates pairs of floating point values that represent lower and upper bounds
 31 |     from a given sample space.
 32 |     '''
 33 |     def __init__(self,
 34 |                  sample_space=((0.1, None),
 35 |                                (0.3, None),
 36 |                                (0.5, None),
 37 |                                (0.7, None),
 38 |                                (0.9, None),
 39 |                                (None, None)),
 40 |                  weights=None):
 41 |         '''
 42 |         Arguments:
 43 |             sample_space (list or tuple): A list, tuple, or array-like object of shape
 44 |                 `(n, 2)` that contains `n` samples to choose from, where each sample
 45 |                 is a 2-tuple of scalars and/or `None` values.
 46 |             weights (list or tuple, optional): A list or tuple representing the distribution
 47 |                 over the sample space. If `None`, a uniform distribution will be assumed.
 48 |         '''
 49 | 
 50 |         if (not (weights is None)) and len(weights) != len(sample_space):
 51 |             raise ValueError("`weights` must either be `None` for uniform distribution or have the same length as `sample_space`.")
 52 | 
 53 |         self.sample_space = []
 54 |         for bound_pair in sample_space:
 55 |             if len(bound_pair) != 2:
 56 |                 raise ValueError("All elements of the sample space must be 2-tuples.")
 57 |             bound_pair = list(bound_pair)
 58 |             if bound_pair[0] is None: bound_pair[0] = 0.0
 59 |             if bound_pair[1] is None: bound_pair[1] = 1.0
 60 |             if bound_pair[0] > bound_pair[1]:
 61 |                 raise ValueError("For all sample space elements, the lower bound cannot be greater than the upper bound.")
 62 |             self.sample_space.append(bound_pair)
 63 | 
 64 |         self.sample_space_size = len(self.sample_space)
 65 | 
 66 |         if weights is None:
 67 |             self.weights = [1.0/self.sample_space_size] * self.sample_space_size
 68 |         else:
 69 |             self.weights = weights
 70 | 
 71 |     def __call__(self):
 72 |         '''
 73 |         Returns:
 74 |             An item of the sample space, i.e. a 2-tuple of scalars.
 75 |         '''
 76 |         i = np.random.choice(self.sample_space_size, p=self.weights)
 77 |         return self.sample_space[i]
 78 | 
 79 | class BoxFilter:
 80 |     '''
 81 |     Returns all bounding boxes that are valid with respect to a the defined criteria.
 82 |     '''
 83 | 
 84 |     def __init__(self,
 85 |                  check_overlap=True,
 86 |                  check_min_area=True,
 87 |                  check_degenerate=True,
 88 |                  overlap_criterion='center_point',
 89 |                  overlap_bounds=(0.3, 1.0),
 90 |                  min_area=16,
 91 |                  labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4},
 92 |                  border_pixels='half'):
 93 |         '''
 94 |         Arguments:
 95 |             check_overlap (bool, optional): Whether or not to enforce the overlap requirements defined by
 96 |                 `overlap_criterion` and `overlap_bounds`. Sometimes you might want to use the box filter only
 97 |                 to enforce a certain minimum area for all boxes (see next argument), in such cases you can
 98 |                 turn the overlap requirements off.
 99 |             check_min_area (bool, optional): Whether or not to enforce the minimum area requirement defined
100 |                 by `min_area`. If `True`, any boxes that have an area (in pixels) that is smaller than `min_area`
101 |                 will be removed from the labels of an image. Bounding boxes below a certain area aren't useful
102 |                 training examples. An object that takes up only, say, 5 pixels in an image is probably not
103 |                 recognizable anymore, neither for a human, nor for an object detection model. It makes sense
104 |                 to remove such boxes.
105 |             check_degenerate (bool, optional): Whether or not to check for and remove degenerate bounding boxes.
106 |                 Degenerate bounding boxes are boxes that have `xmax <= xmin` and/or `ymax <= ymin`. In particular,
107 |                 boxes with a width and/or height of zero are degenerate. It is obviously important to filter out
108 |                 such boxes, so you should only set this option to `False` if you are certain that degenerate
109 |                 boxes are not possible in your data and processing chain.
110 |             overlap_criterion (str, optional): Can be either of 'center_point', 'iou', or 'area'. Determines
111 |                 which boxes are considered valid with respect to a given image. If set to 'center_point',
112 |                 a given bounding box is considered valid if its center point lies within the image.
113 |                 If set to 'area', a given bounding box is considered valid if the quotient of its intersection
114 |                 area with the image and its own area is within the given `overlap_bounds`. If set to 'iou', a given
115 |                 bounding box is considered valid if its IoU with the image is within the given `overlap_bounds`.
116 |             overlap_bounds (list or BoundGenerator, optional): Only relevant if `overlap_criterion` is 'area' or 'iou'.
117 |                 Determines the lower and upper bounds for `overlap_criterion`. Can be either a 2-tuple of scalars
118 |                 representing a lower bound and an upper bound, or a `BoundGenerator` object, which provides
119 |                 the possibility to generate bounds randomly.
120 |             min_area (int, optional): Only relevant if `check_min_area` is `True`. Defines the minimum area in
121 |                 pixels that a bounding box must have in order to be valid. Boxes with an area smaller than this
122 |                 will be removed.
123 |             labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
124 |                 of an image contains which bounding box coordinate. The dictionary maps at least the keywords
125 |                 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
126 |             border_pixels (str, optional): How to treat the border pixels of the bounding boxes.
127 |                 Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong
128 |                 to the boxes. If 'exclude', the border pixels do not belong to the boxes.
129 |                 If 'half', then one of each of the two horizontal and vertical borders belong
130 |                 to the boxex, but not the other.
131 |         '''
132 |         if not isinstance(overlap_bounds, (list, tuple, BoundGenerator)):
133 |             raise ValueError("`overlap_bounds` must be either a 2-tuple of scalars or a `BoundGenerator` object.")
134 |         if isinstance(overlap_bounds, (list, tuple)) and (overlap_bounds[0] > overlap_bounds[1]):
135 |             raise ValueError("The lower bound must not be greater than the upper bound.")
136 |         if not (overlap_criterion in {'iou', 'area', 'center_point'}):
137 |             raise ValueError("`overlap_criterion` must be one of 'iou', 'area', or 'center_point'.")
138 |         self.overlap_criterion = overlap_criterion
139 |         self.overlap_bounds = overlap_bounds
140 |         self.min_area = min_area
141 |         self.check_overlap = check_overlap
142 |         self.check_min_area = check_min_area
143 |         self.check_degenerate = check_degenerate
144 |         self.labels_format = labels_format
145 |         self.border_pixels = border_pixels
146 | 
147 |     def __call__(self,
148 |                  labels,
149 |                  image_height=None,
150 |                  image_width=None):
151 |         '''
152 |         Arguments:
153 |             labels (array): The labels to be filtered. This is an array with shape `(m,n)`, where
154 |                 `m` is the number of bounding boxes and `n` is the number of elements that defines
155 |                 each bounding box (box coordinates, class ID, etc.). The box coordinates are expected
156 |                 to be in the image's coordinate system.
157 |             image_height (int): Only relevant if `check_overlap == True`. The height of the image
158 |                 (in pixels) to compare the box coordinates to.
159 |             image_width (int): `check_overlap == True`. The width of the image (in pixels) to compare
160 |                 the box coordinates to.
161 | 
162 |         Returns:
163 |             An array containing the labels of all boxes that are valid.
164 |         '''
165 | 
166 |         labels = np.copy(labels)
167 | 
168 |         xmin = self.labels_format['xmin']
169 |         ymin = self.labels_format['ymin']
170 |         xmax = self.labels_format['xmax']
171 |         ymax = self.labels_format['ymax']
172 | 
173 |         # Record the boxes that pass all checks here.
174 |         requirements_met = np.ones(shape=labels.shape[0], dtype=np.bool)
175 | 
176 |         if self.check_degenerate:
177 | 
178 |             non_degenerate = (labels[:,xmax] > labels[:,xmin]) * (labels[:,ymax] > labels[:,ymin])
179 |             requirements_met *= non_degenerate
180 | 
181 |         if self.check_min_area:
182 | 
183 |             min_area_met = (labels[:,xmax] - labels[:,xmin]) * (labels[:,ymax] - labels[:,ymin]) >= self.min_area
184 |             requirements_met *= min_area_met
185 | 
186 |         if self.check_overlap:
187 | 
188 |             # Get the lower and upper bounds.
189 |             if isinstance(self.overlap_bounds, BoundGenerator):
190 |                 lower, upper = self.overlap_bounds()
191 |             else:
192 |                 lower, upper = self.overlap_bounds
193 | 
194 |             # Compute which boxes are valid.
195 | 
196 |             if self.overlap_criterion == 'iou':
197 |                 # Compute the patch coordinates.
198 |                 image_coords = np.array([0, 0, image_width, image_height])
199 |                 # Compute the IoU between the patch and all of the ground truth boxes.
200 |                 image_boxes_iou = iou(image_coords, labels[:, [xmin, ymin, xmax, ymax]], coords='corners', mode='element-wise', border_pixels=self.border_pixels)
201 |                 requirements_met *= (image_boxes_iou > lower) * (image_boxes_iou <= upper)
202 | 
203 |             elif self.overlap_criterion == 'area':
204 |                 if self.border_pixels == 'half':
205 |                     d = 0
206 |                 elif self.border_pixels == 'include':
207 |                     d = 1 # If border pixels are supposed to belong to the bounding boxes, we have to add one pixel to any difference `xmax - xmin` or `ymax - ymin`.
208 |                 elif self.border_pixels == 'exclude':
209 |                     d = -1 # If border pixels are not supposed to belong to the bounding boxes, we have to subtract one pixel from any difference `xmax - xmin` or `ymax - ymin`.
210 |                 # Compute the areas of the boxes.
211 |                 box_areas = (labels[:,xmax] - labels[:,xmin] + d) * (labels[:,ymax] - labels[:,ymin] + d)
212 |                 # Compute the intersection area between the patch and all of the ground truth boxes.
213 |                 clipped_boxes = np.copy(labels)
214 |                 clipped_boxes[:,[ymin,ymax]] = np.clip(labels[:,[ymin,ymax]], a_min=0, a_max=image_height-1)
215 |                 clipped_boxes[:,[xmin,xmax]] = np.clip(labels[:,[xmin,xmax]], a_min=0, a_max=image_width-1)
216 |                 intersection_areas = (clipped_boxes[:,xmax] - clipped_boxes[:,xmin] + d) * (clipped_boxes[:,ymax] - clipped_boxes[:,ymin] + d) # +1 because the border pixels belong to the box areas.
217 |                 # Check which boxes meet the overlap requirements.
218 |                 if lower == 0.0:
219 |                     mask_lower = intersection_areas > lower * box_areas # If `self.lower == 0`, we want to make sure that boxes with area 0 don't count, hence the ">" sign instead of the ">=" sign.
220 |                 else:
221 |                     mask_lower = intersection_areas >= lower * box_areas # Especially for the case `self.lower == 1` we want the ">=" sign, otherwise no boxes would count at all.
222 |                 mask_upper = intersection_areas <= upper * box_areas
223 |                 requirements_met *= mask_lower * mask_upper
224 | 
225 |             elif self.overlap_criterion == 'center_point':
226 |                 # Compute the center points of the boxes.
227 |                 cy = (labels[:,ymin] + labels[:,ymax]) / 2
228 |                 cx = (labels[:,xmin] + labels[:,xmax]) / 2
229 |                 # Check which of the boxes have center points within the cropped patch remove those that don't.
230 |                 requirements_met *= (cy >= 0.0) * (cy <= image_height-1) * (cx >= 0.0) * (cx <= image_width-1)
231 | 
232 |         return labels[requirements_met]
233 | 
234 | class ImageValidator:
235 |     '''
236 |     Returns `True` if a given minimum number of bounding boxes meets given overlap
237 |     requirements with an image of a given height and width.
238 |     '''
239 | 
240 |     def __init__(self,
241 |                  overlap_criterion='center_point',
242 |                  bounds=(0.3, 1.0),
243 |                  n_boxes_min=1,
244 |                  labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4},
245 |                  border_pixels='half'):
246 |         '''
247 |         Arguments:
248 |             overlap_criterion (str, optional): Can be either of 'center_point', 'iou', or 'area'. Determines
249 |                 which boxes are considered valid with respect to a given image. If set to 'center_point',
250 |                 a given bounding box is considered valid if its center point lies within the image.
251 |                 If set to 'area', a given bounding box is considered valid if the quotient of its intersection
252 |                 area with the image and its own area is within `lower` and `upper`. If set to 'iou', a given
253 |                 bounding box is considered valid if its IoU with the image is within `lower` and `upper`.
254 |             bounds (list or BoundGenerator, optional): Only relevant if `overlap_criterion` is 'area' or 'iou'.
255 |                 Determines the lower and upper bounds for `overlap_criterion`. Can be either a 2-tuple of scalars
256 |                 representing a lower bound and an upper bound, or a `BoundGenerator` object, which provides
257 |                 the possibility to generate bounds randomly.
258 |             n_boxes_min (int or str, optional): Either a non-negative integer or the string 'all'.
259 |                 Determines the minimum number of boxes that must meet the `overlap_criterion` with respect to
260 |                 an image of the given height and width in order for the image to be a valid image.
261 |                 If set to 'all', an image is considered valid if all given boxes meet the `overlap_criterion`.
262 |             labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
263 |                 of an image contains which bounding box coordinate. The dictionary maps at least the keywords
264 |                 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
265 |             border_pixels (str, optional): How to treat the border pixels of the bounding boxes.
266 |                 Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong
267 |                 to the boxes. If 'exclude', the border pixels do not belong to the boxes.
268 |                 If 'half', then one of each of the two horizontal and vertical borders belong
269 |                 to the boxex, but not the other.
270 |         '''
271 |         if not ((isinstance(n_boxes_min, int) and n_boxes_min > 0) or n_boxes_min == 'all'):
272 |             raise ValueError("`n_boxes_min` must be a positive integer or 'all'.")
273 |         self.overlap_criterion = overlap_criterion
274 |         self.bounds = bounds
275 |         self.n_boxes_min = n_boxes_min
276 |         self.labels_format = labels_format
277 |         self.border_pixels = border_pixels
278 |         self.box_filter = BoxFilter(check_overlap=True,
279 |                                     check_min_area=False,
280 |                                     check_degenerate=False,
281 |                                     overlap_criterion=self.overlap_criterion,
282 |                                     overlap_bounds=self.bounds,
283 |                                     labels_format=self.labels_format,
284 |                                     border_pixels=self.border_pixels)
285 | 
286 |     def __call__(self,
287 |                  labels,
288 |                  image_height,
289 |                  image_width):
290 |         '''
291 |         Arguments:
292 |             labels (array): The labels to be tested. The box coordinates are expected
293 |                 to be in the image's coordinate system.
294 |             image_height (int): The height of the image to compare the box coordinates to.
295 |             image_width (int): The width of the image to compare the box coordinates to.
296 | 
297 |         Returns:
298 |             A boolean indicating whether an imgae of the given height and width is
299 |             valid with respect to the given bounding boxes.
300 |         '''
301 | 
302 |         self.box_filter.overlap_bounds = self.bounds
303 |         self.box_filter.labels_format = self.labels_format
304 | 
305 |         # Get all boxes that meet the overlap requirements.
306 |         valid_labels = self.box_filter(labels=labels,
307 |                                        image_height=image_height,
308 |                                        image_width=image_width)
309 | 
310 |         # Check whether enough boxes meet the requirements.
311 |         if isinstance(self.n_boxes_min, int):
312 |             # The image is valid if at least `self.n_boxes_min` ground truth boxes meet the requirements.
313 |             if len(valid_labels) >= self.n_boxes_min:
314 |                 return True
315 |             else:
316 |                 return False
317 |         elif self.n_boxes_min == 'all':
318 |             # The image is valid if all ground truth boxes meet the requirements.
319 |             if len(valid_labels) == len(labels):
320 |                 return True
321 |             else:
322 |                 return False
323 | 


--------------------------------------------------------------------------------
/data_generator/object_detection_2d_misc_utils.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Miscellaneous data generator utilities.
 3 | 
 4 | Copyright (C) 2018 Pierluigi Ferrari
 5 | 
 6 | Licensed under the Apache License, Version 2.0 (the "License");
 7 | you may not use this file except in compliance with the License.
 8 | You may obtain a copy of the License at
 9 | 
10 |    http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | '''
18 | 
19 | from __future__ import division
20 | import numpy as np
21 | 
22 | def apply_inverse_transforms(y_pred_decoded, inverse_transforms):
23 |     '''
24 |     Takes a list or Numpy array of decoded predictions and applies a given list of
25 |     transforms to them. The list of inverse transforms would usually contain the
26 |     inverter functions that some of the image transformations that come with this
27 |     data generator return. This function would normally be used to transform predictions
28 |     that were made on a transformed image back to the original image.
29 | 
30 |     Arguments:
31 |         y_pred_decoded (list or array): Either a list of length `batch_size` that
32 |             contains Numpy arrays that contain the predictions for each batch item
33 |             or a Numpy array. If this is a list of Numpy arrays, the arrays would
34 |             usually have the shape `(num_predictions, 6)`, where `num_predictions`
35 |             is different for each batch item. If this is a Numpy array, it would
36 |             usually have the shape `(batch_size, num_predictions, 6)`. The last axis
37 |             would usually contain the class ID, confidence score, and four bounding
38 |             box coordinates for each prediction.
39 |         inverse_predictions (list): A nested list of length `batch_size` that contains
40 |             for each batch item a list of functions that take one argument (one element
41 |             of `y_pred_decoded` if it is a list or one slice along the first axis of
42 |             `y_pred_decoded` if it is an array) and return an output of the same shape
43 |             and data type.
44 | 
45 |     Returns:
46 |         The transformed predictions, which have the same structure as `y_pred_decoded`.
47 |     '''
48 | 
49 |     if isinstance(y_pred_decoded, list):
50 | 
51 |         y_pred_decoded_inv = []
52 | 
53 |         for i in range(len(y_pred_decoded)):
54 |             y_pred_decoded_inv.append(np.copy(y_pred_decoded[i]))
55 |             if y_pred_decoded_inv[i].size > 0: # If there are any predictions for this batch item.
56 |                 for inverter in inverse_transforms[i]:
57 |                     if not (inverter is None):
58 |                         y_pred_decoded_inv[i] = inverter(y_pred_decoded_inv[i])
59 | 
60 |     elif isinstance(y_pred_decoded, np.ndarray):
61 | 
62 |         y_pred_decoded_inv = np.copy(y_pred_decoded)
63 | 
64 |         for i in range(len(y_pred_decoded)):
65 |             if y_pred_decoded_inv[i].size > 0: # If there are any predictions for this batch item.
66 |                 for inverter in inverse_transforms[i]:
67 |                     if not (inverter is None):
68 |                         y_pred_decoded_inv[i] = inverter(y_pred_decoded_inv[i])
69 | 
70 |     else:
71 |         raise ValueError("`y_pred_decoded` must be either a list or a Numpy array.")
72 | 
73 |     return y_pred_decoded_inv
74 | 


--------------------------------------------------------------------------------
/data_generator/object_detection_2d_photometric_ops.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Various photometric image transformations, both deterministic and probabilistic.
  3 | 
  4 | Copyright (C) 2018 Pierluigi Ferrari
  5 | 
  6 | Licensed under the Apache License, Version 2.0 (the "License");
  7 | you may not use this file except in compliance with the License.
  8 | You may obtain a copy of the License at
  9 | 
 10 |    http://www.apache.org/licenses/LICENSE-2.0
 11 | 
 12 | Unless required by applicable law or agreed to in writing, software
 13 | distributed under the License is distributed on an "AS IS" BASIS,
 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | See the License for the specific language governing permissions and
 16 | limitations under the License.
 17 | '''
 18 | 
 19 | from __future__ import division
 20 | import numpy as np
 21 | import cv2
 22 | 
 23 | class ConvertColor:
 24 |     '''
 25 |     Converts images between RGB, HSV and grayscale color spaces. This is just a wrapper
 26 |     around `cv2.cvtColor()`.
 27 |     '''
 28 |     def __init__(self, current='RGB', to='HSV', keep_3ch=True):
 29 |         '''
 30 |         Arguments:
 31 |             current (str, optional): The current color space of the images. Can be
 32 |                 one of 'RGB' and 'HSV'.
 33 |             to (str, optional): The target color space of the images. Can be one of
 34 |                 'RGB', 'HSV', and 'GRAY'.
 35 |             keep_3ch (bool, optional): Only relevant if `to == GRAY`.
 36 |                 If `True`, the resulting grayscale images will have three channels.
 37 |         '''
 38 |         if not ((current in {'RGB', 'HSV'}) and (to in {'RGB', 'HSV', 'GRAY'})):
 39 |             raise NotImplementedError
 40 |         self.current = current
 41 |         self.to = to
 42 |         self.keep_3ch = keep_3ch
 43 | 
 44 |     def __call__(self, image, labels=None):
 45 |         if self.current == 'RGB' and self.to == 'HSV':
 46 |             image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
 47 |         elif self.current == 'RGB' and self.to == 'GRAY':
 48 |             image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
 49 |             if self.keep_3ch:
 50 |                 image = np.stack([image] * 3, axis=-1)
 51 |         elif self.current == 'HSV' and self.to == 'RGB':
 52 |             image = cv2.cvtColor(image, cv2.COLOR_HSV2RGB)
 53 |         elif self.current == 'HSV' and self.to == 'GRAY':
 54 |             image = cv2.cvtColor(image, cv2.COLOR_HSV2GRAY)
 55 |             if self.keep_3ch:
 56 |                 image = np.stack([image] * 3, axis=-1)
 57 |         if labels is None:
 58 |             return image
 59 |         else:
 60 |             return image, labels
 61 | 
 62 | class ConvertDataType:
 63 |     '''
 64 |     Converts images represented as Numpy arrays between `uint8` and `float32`.
 65 |     Serves as a helper for certain photometric distortions. This is just a wrapper
 66 |     around `np.ndarray.astype()`.
 67 |     '''
 68 |     def __init__(self, to='uint8'):
 69 |         '''
 70 |         Arguments:
 71 |             to (string, optional): To which datatype to convert the input images.
 72 |                 Can be either of 'uint8' and 'float32'.
 73 |         '''
 74 |         if not (to == 'uint8' or to == 'float32'):
 75 |             raise ValueError("`to` can be either of 'uint8' or 'float32'.")
 76 |         self.to = to
 77 | 
 78 |     def __call__(self, image, labels=None):
 79 |         if self.to == 'uint8':
 80 |             image = np.round(image, decimals=0).astype(np.uint8)
 81 |         else:
 82 |             image = image.astype(np.float32)
 83 |         if labels is None:
 84 |             return image
 85 |         else:
 86 |             return image, labels
 87 | 
 88 | class ConvertTo3Channels:
 89 |     '''
 90 |     Converts 1-channel and 4-channel images to 3-channel images. Does nothing to images that
 91 |     already have 3 channels. In the case of 4-channel images, the fourth channel will be
 92 |     discarded.
 93 |     '''
 94 |     def __init__(self):
 95 |         pass
 96 | 
 97 |     def __call__(self, image, labels=None):
 98 |         if image.ndim == 2:
 99 |             image = np.stack([image] * 3, axis=-1)
100 |         elif image.ndim == 3:
101 |             if image.shape[2] == 1:
102 |                 image = np.concatenate([image] * 3, axis=-1)
103 |             elif image.shape[2] == 4:
104 |                 image = image[:,:,:3]
105 |         if labels is None:
106 |             return image
107 |         else:
108 |             return image, labels
109 | 
110 | class Hue:
111 |     '''
112 |     Changes the hue of HSV images.
113 | 
114 |     Important:
115 |         - Expects HSV input.
116 |         - Expects input array to be of `dtype` `float`.
117 |     '''
118 |     def __init__(self, delta):
119 |         '''
120 |         Arguments:
121 |             delta (int): An integer in the closed interval `[-180, 180]` that determines the hue change, where
122 |                 a change by integer `delta` means a change by `2 * delta` degrees. Read up on the HSV color format
123 |                 if you need more information.
124 |         '''
125 |         if not (-180 <= delta <= 180): raise ValueError("`delta` must be in the closed interval `[-180, 180]`.")
126 |         self.delta = delta
127 | 
128 |     def __call__(self, image, labels=None):
129 |         image[:, :, 0] = (image[:, :, 0] + self.delta) % 180.0
130 |         if labels is None:
131 |             return image
132 |         else:
133 |             return image, labels
134 | 
135 | class RandomHue:
136 |     '''
137 |     Randomly changes the hue of HSV images.
138 | 
139 |     Important:
140 |         - Expects HSV input.
141 |         - Expects input array to be of `dtype` `float`.
142 |     '''
143 |     def __init__(self, max_delta=18, prob=0.5):
144 |         '''
145 |         Arguments:
146 |             max_delta (int): An integer in the closed interval `[0, 180]` that determines the maximal absolute
147 |                 hue change.
148 |             prob (float, optional): `(1 - prob)` determines the probability with which the original,
149 |                 unaltered image is returned.
150 |         '''
151 |         if not (0 <= max_delta <= 180): raise ValueError("`max_delta` must be in the closed interval `[0, 180]`.")
152 |         self.max_delta = max_delta
153 |         self.prob = prob
154 |         self.change_hue = Hue(delta=0)
155 | 
156 |     def __call__(self, image, labels=None):
157 |         p = np.random.uniform(0,1)
158 |         if p >= (1.0-self.prob):
159 |             self.change_hue.delta = np.random.uniform(-self.max_delta, self.max_delta)
160 |             return self.change_hue(image, labels)
161 |         elif labels is None:
162 |             return image
163 |         else:
164 |             return image, labels
165 | 
166 | class Saturation:
167 |     '''
168 |     Changes the saturation of HSV images.
169 | 
170 |     Important:
171 |         - Expects HSV input.
172 |         - Expects input array to be of `dtype` `float`.
173 |     '''
174 |     def __init__(self, factor):
175 |         '''
176 |         Arguments:
177 |             factor (float): A float greater than zero that determines saturation change, where
178 |                 values less than one result in less saturation and values greater than one result
179 |                 in more saturation.
180 |         '''
181 |         if factor <= 0.0: raise ValueError("It must be `factor > 0`.")
182 |         self.factor = factor
183 | 
184 |     def __call__(self, image, labels=None):
185 |         image[:,:,1] = np.clip(image[:,:,1] * self.factor, 0, 255)
186 |         if labels is None:
187 |             return image
188 |         else:
189 |             return image, labels
190 | 
191 | class RandomSaturation:
192 |     '''
193 |     Randomly changes the saturation of HSV images.
194 | 
195 |     Important:
196 |         - Expects HSV input.
197 |         - Expects input array to be of `dtype` `float`.
198 |     '''
199 |     def __init__(self, lower=0.3, upper=2.0, prob=0.5):
200 |         '''
201 |         Arguments:
202 |             lower (float, optional): A float greater than zero, the lower bound for the random
203 |                 saturation change.
204 |             upper (float, optional): A float greater than zero, the upper bound for the random
205 |                 saturation change. Must be greater than `lower`.
206 |             prob (float, optional): `(1 - prob)` determines the probability with which the original,
207 |                 unaltered image is returned.
208 |         '''
209 |         if lower >= upper: raise ValueError("`upper` must be greater than `lower`.")
210 |         self.lower = lower
211 |         self.upper = upper
212 |         self.prob = prob
213 |         self.change_saturation = Saturation(factor=1.0)
214 | 
215 |     def __call__(self, image, labels=None):
216 |         p = np.random.uniform(0,1)
217 |         if p >= (1.0-self.prob):
218 |             self.change_saturation.factor = np.random.uniform(self.lower, self.upper)
219 |             return self.change_saturation(image, labels)
220 |         elif labels is None:
221 |             return image
222 |         else:
223 |             return image, labels
224 | 
225 | class Brightness:
226 |     '''
227 |     Changes the brightness of RGB images.
228 | 
229 |     Important:
230 |         - Expects RGB input.
231 |         - Expects input array to be of `dtype` `float`.
232 |     '''
233 |     def __init__(self, delta):
234 |         '''
235 |         Arguments:
236 |             delta (int): An integer, the amount to add to or subtract from the intensity
237 |                 of every pixel.
238 |         '''
239 |         self.delta = delta
240 | 
241 |     def __call__(self, image, labels=None):
242 |         image = np.clip(image + self.delta, 0, 255)
243 |         if labels is None:
244 |             return image
245 |         else:
246 |             return image, labels
247 | 
248 | class RandomBrightness:
249 |     '''
250 |     Randomly changes the brightness of RGB images.
251 | 
252 |     Important:
253 |         - Expects RGB input.
254 |         - Expects input array to be of `dtype` `float`.
255 |     '''
256 |     def __init__(self, lower=-84, upper=84, prob=0.5):
257 |         '''
258 |         Arguments:
259 |             lower (int, optional): An integer, the lower bound for the random brightness change.
260 |             upper (int, optional): An integer, the upper bound for the random brightness change.
261 |                 Must be greater than `lower`.
262 |             prob (float, optional): `(1 - prob)` determines the probability with which the original,
263 |                 unaltered image is returned.
264 |         '''
265 |         if lower >= upper: raise ValueError("`upper` must be greater than `lower`.")
266 |         self.lower = float(lower)
267 |         self.upper = float(upper)
268 |         self.prob = prob
269 |         self.change_brightness = Brightness(delta=0)
270 | 
271 |     def __call__(self, image, labels=None):
272 |         p = np.random.uniform(0,1)
273 |         if p >= (1.0-self.prob):
274 |             self.change_brightness.delta = np.random.uniform(self.lower, self.upper)
275 |             return self.change_brightness(image, labels)
276 |         elif labels is None:
277 |             return image
278 |         else:
279 |             return image, labels
280 | 
281 | class Contrast:
282 |     '''
283 |     Changes the contrast of RGB images.
284 | 
285 |     Important:
286 |         - Expects RGB input.
287 |         - Expects input array to be of `dtype` `float`.
288 |     '''
289 |     def __init__(self, factor):
290 |         '''
291 |         Arguments:
292 |             factor (float): A float greater than zero that determines contrast change, where
293 |                 values less than one result in less contrast and values greater than one result
294 |                 in more contrast.
295 |         '''
296 |         if factor <= 0.0: raise ValueError("It must be `factor > 0`.")
297 |         self.factor = factor
298 | 
299 |     def __call__(self, image, labels=None):
300 |         image = np.clip(127.5 + self.factor * (image - 127.5), 0, 255)
301 |         if labels is None:
302 |             return image
303 |         else:
304 |             return image, labels
305 | 
306 | class RandomContrast:
307 |     '''
308 |     Randomly changes the contrast of RGB images.
309 | 
310 |     Important:
311 |         - Expects RGB input.
312 |         - Expects input array to be of `dtype` `float`.
313 |     '''
314 |     def __init__(self, lower=0.5, upper=1.5, prob=0.5):
315 |         '''
316 |         Arguments:
317 |             lower (float, optional): A float greater than zero, the lower bound for the random
318 |                 contrast change.
319 |             upper (float, optional): A float greater than zero, the upper bound for the random
320 |                 contrast change. Must be greater than `lower`.
321 |             prob (float, optional): `(1 - prob)` determines the probability with which the original,
322 |                 unaltered image is returned.
323 |         '''
324 |         if lower >= upper: raise ValueError("`upper` must be greater than `lower`.")
325 |         self.lower = lower
326 |         self.upper = upper
327 |         self.prob = prob
328 |         self.change_contrast = Contrast(factor=1.0)
329 | 
330 |     def __call__(self, image, labels=None):
331 |         p = np.random.uniform(0,1)
332 |         if p >= (1.0-self.prob):
333 |             self.change_contrast.factor = np.random.uniform(self.lower, self.upper)
334 |             return self.change_contrast(image, labels)
335 |         elif labels is None:
336 |             return image
337 |         else:
338 |             return image, labels
339 | 
340 | class Gamma:
341 |     '''
342 |     Changes the gamma value of RGB images.
343 | 
344 |     Important: Expects RGB input.
345 |     '''
346 |     def __init__(self, gamma):
347 |         '''
348 |         Arguments:
349 |             gamma (float): A float greater than zero that determines gamma change.
350 |         '''
351 |         if gamma <= 0.0: raise ValueError("It must be `gamma > 0`.")
352 |         self.gamma = gamma
353 |         self.gamma_inv = 1.0 / gamma
354 |         # Build a lookup table mapping the pixel values [0, 255] to
355 |         # their adjusted gamma values.
356 |         self.table = np.array([((i / 255.0) ** self.gamma_inv) * 255 for i in np.arange(0, 256)]).astype("uint8")
357 | 
358 |     def __call__(self, image, labels=None):
359 |         image = cv2.LUT(image, table)
360 |         if labels is None:
361 |             return image
362 |         else:
363 |             return image, labels
364 | 
365 | class RandomGamma:
366 |     '''
367 |     Randomly changes the gamma value of RGB images.
368 | 
369 |     Important: Expects RGB input.
370 |     '''
371 |     def __init__(self, lower=0.25, upper=2.0, prob=0.5):
372 |         '''
373 |         Arguments:
374 |             lower (float, optional): A float greater than zero, the lower bound for the random
375 |                 gamma change.
376 |             upper (float, optional): A float greater than zero, the upper bound for the random
377 |                 gamma change. Must be greater than `lower`.
378 |             prob (float, optional): `(1 - prob)` determines the probability with which the original,
379 |                 unaltered image is returned.
380 |         '''
381 |         if lower >= upper: raise ValueError("`upper` must be greater than `lower`.")
382 |         self.lower = lower
383 |         self.upper = upper
384 |         self.prob = prob
385 | 
386 |     def __call__(self, image, labels=None):
387 |         p = np.random.uniform(0,1)
388 |         if p >= (1.0-self.prob):
389 |             gamma = np.random.uniform(self.lower, self.upper)
390 |             change_gamma = Gamma(gamma=gamma)
391 |             return change_gamma(image, labels)
392 |         elif labels is None:
393 |             return image
394 |         else:
395 |             return image, labels
396 | 
397 | class HistogramEqualization:
398 |     '''
399 |     Performs histogram equalization on HSV images.
400 | 
401 |     Importat: Expects HSV input.
402 |     '''
403 |     def __init__(self):
404 |         pass
405 | 
406 |     def __call__(self, image, labels=None):
407 |         image[:,:,2] = cv2.equalizeHist(image[:,:,2])
408 |         if labels is None:
409 |             return image
410 |         else:
411 |             return image, labels
412 | 
413 | class RandomHistogramEqualization:
414 |     '''
415 |     Randomly performs histogram equalization on HSV images. The randomness only refers
416 |     to whether or not the equalization is performed.
417 | 
418 |     Importat: Expects HSV input.
419 |     '''
420 |     def __init__(self, prob=0.5):
421 |         '''
422 |         Arguments:
423 |             prob (float, optional): `(1 - prob)` determines the probability with which the original,
424 |                 unaltered image is returned.
425 |         '''
426 |         self.prob = prob
427 |         self.equalize = HistogramEqualization()
428 | 
429 |     def __call__(self, image, labels=None):
430 |         p = np.random.uniform(0,1)
431 |         if p >= (1.0-self.prob):
432 |             return self.equalize(image, labels)
433 |         elif labels is None:
434 |             return image
435 |         else:
436 |             return image, labels
437 | 
438 | class ChannelSwap:
439 |     '''
440 |     Swaps the channels of images.
441 |     '''
442 |     def __init__(self, order):
443 |         '''
444 |         Arguments:
445 |             order (tuple): A tuple of integers that defines the desired channel order
446 |                 of the input images after the channel swap.
447 |         '''
448 |         self.order = order
449 | 
450 |     def __call__(self, image, labels=None):
451 |         image = image[:,:,self.order]
452 |         if labels is None:
453 |             return image
454 |         else:
455 |             return image, labels
456 | 
457 | class RandomChannelSwap:
458 |     '''
459 |     Randomly swaps the channels of RGB images.
460 | 
461 |     Important: Expects RGB input.
462 |     '''
463 |     def __init__(self, prob=0.5):
464 |         '''
465 |         Arguments:
466 |             prob (float, optional): `(1 - prob)` determines the probability with which the original,
467 |                 unaltered image is returned.
468 |         '''
469 |         self.prob = prob
470 |         # All possible permutations of the three image channels except the original order.
471 |         self.permutations = ((0, 2, 1),
472 |                              (1, 0, 2), (1, 2, 0),
473 |                              (2, 0, 1), (2, 1, 0))
474 |         self.swap_channels = ChannelSwap(order=(0, 1, 2))
475 | 
476 |     def __call__(self, image, labels=None):
477 |         p = np.random.uniform(0,1)
478 |         if p >= (1.0-self.prob):
479 |             i = np.random.randint(5) # There are 6 possible permutations.
480 |             self.swap_channels.order = self.permutations[i]
481 |             return self.swap_channels(image, labels)
482 |         elif labels is None:
483 |             return image
484 |         else:
485 |             return image, labels
486 | 


--------------------------------------------------------------------------------
/eval_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/eval_utils/__init__.py


--------------------------------------------------------------------------------
/eval_utils/coco_utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | A few utilities that are useful when working with the MS COCO datasets.
  3 | 
  4 | Copyright (C) 2018 Pierluigi Ferrari
  5 | 
  6 | Licensed under the Apache License, Version 2.0 (the "License");
  7 | you may not use this file except in compliance with the License.
  8 | You may obtain a copy of the License at
  9 | 
 10 |    http://www.apache.org/licenses/LICENSE-2.0
 11 | 
 12 | Unless required by applicable law or agreed to in writing, software
 13 | distributed under the License is distributed on an "AS IS" BASIS,
 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | See the License for the specific language governing permissions and
 16 | limitations under the License.
 17 | '''
 18 | 
 19 | import json
 20 | from tqdm import trange
 21 | from math import ceil
 22 | import sys
 23 | 
 24 | from data_generator.object_detection_2d_geometric_ops import Resize
 25 | from data_generator.object_detection_2d_patch_sampling_ops import RandomPadFixedAR
 26 | from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels
 27 | from ssd_encoder_decoder.ssd_output_decoder import decode_detections
 28 | from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms
 29 | 
 30 | def get_coco_category_maps(annotations_file):
 31 |     '''
 32 |     Builds dictionaries that map between MS COCO category IDs, transformed category IDs, and category names.
 33 |     The original MS COCO category IDs are not consecutive unfortunately: The 80 category IDs are spread
 34 |     across the integers 1 through 90 with some integers skipped. Since we usually use a one-hot
 35 |     class representation in neural networks, we need to map these non-consecutive original COCO category
 36 |     IDs (let's call them 'cats') to consecutive category IDs (let's call them 'classes').
 37 | 
 38 |     Arguments:
 39 |         annotations_file (str): The filepath to any MS COCO annotations JSON file.
 40 | 
 41 |     Returns:
 42 |         1) cats_to_classes: A dictionary that maps between the original (keys) and the transformed category IDs (values).
 43 |         2) classes_to_cats: A dictionary that maps between the transformed (keys) and the original category IDs (values).
 44 |         3) cats_to_names: A dictionary that maps between original category IDs (keys) and the respective category names (values).
 45 |         4) classes_to_names: A list of the category names (values) with their indices representing the transformed IDs.
 46 |     '''
 47 |     with open(annotations_file, 'r') as f:
 48 |         annotations = json.load(f)
 49 |     cats_to_classes = {}
 50 |     classes_to_cats = {}
 51 |     cats_to_names = {}
 52 |     classes_to_names = []
 53 |     classes_to_names.append('background') # Need to add the background class first so that the indexing is right.
 54 |     for i, cat in enumerate(annotations['categories']):
 55 |         cats_to_classes[cat['id']] = i + 1
 56 |         classes_to_cats[i + 1] = cat['id']
 57 |         cats_to_names[cat['id']] = cat['name']
 58 |         classes_to_names.append(cat['name'])
 59 | 
 60 |     return cats_to_classes, classes_to_cats, cats_to_names, classes_to_names
 61 | 
 62 | def predict_all_to_json(out_file,
 63 |                         model,
 64 |                         img_height,
 65 |                         img_width,
 66 |                         classes_to_cats,
 67 |                         data_generator,
 68 |                         batch_size,
 69 |                         data_generator_mode='resize',
 70 |                         model_mode='training',
 71 |                         confidence_thresh=0.01,
 72 |                         iou_threshold=0.45,
 73 |                         top_k=200,
 74 |                         pred_coords='centroids',
 75 |                         normalize_coords=True):
 76 |     '''
 77 |     Runs detection predictions over the whole dataset given a model and saves them in a JSON file
 78 |     in the MS COCO detection results format.
 79 | 
 80 |     Arguments:
 81 |         out_file (str): The file name (full path) under which to save the results JSON file.
 82 |         model (Keras model): A Keras SSD model object.
 83 |         img_height (int): The input image height for the model.
 84 |         img_width (int): The input image width for the model.
 85 |         classes_to_cats (dict): A dictionary that maps the consecutive class IDs predicted by the model
 86 |             to the non-consecutive original MS COCO category IDs.
 87 |         data_generator (DataGenerator): A `DataGenerator` object with the evaluation dataset.
 88 |         batch_size (int): The batch size for the evaluation.
 89 |         data_generator_mode (str, optional): Either of 'resize' or 'pad'. If 'resize', the input images will
 90 |             be resized (i.e. warped) to `(img_height, img_width)`. This mode does not preserve the aspect ratios of the images.
 91 |             If 'pad', the input images will be first padded so that they have the aspect ratio defined by `img_height`
 92 |             and `img_width` and then resized to `(img_height, img_width)`. This mode preserves the aspect ratios of the images.
 93 |         model_mode (str, optional): The mode in which the model was created, i.e. 'training', 'inference' or 'inference_fast'.
 94 |             This is needed in order to know whether the model output is already decoded or still needs to be decoded. Refer to
 95 |             the model documentation for the meaning of the individual modes.
 96 |         confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
 97 |             positive class in order to be considered for the non-maximum suppression stage for the respective class.
 98 |             A lower value will result in a larger part of the selection process being done by the non-maximum suppression
 99 |             stage, while a larger value will result in a larger part of the selection process happening in the confidence
100 |             thresholding stage.
101 |         iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold`
102 |             with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
103 |             to the box score.
104 |         top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
105 |             non-maximum suppression stage. Defaults to 200, following the paper.
106 |         input_coords (str, optional): The box coordinate format that the model outputs. Can be either 'centroids'
107 |             for the format `(cx, cy, w, h)` (box center coordinates, width, and height), 'minmax' for the format
108 |             `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`.
109 |         normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1])
110 |             and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs
111 |             relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`.
112 |             Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect
113 |             coordinates. Requires `img_height` and `img_width` if set to `True`.
114 | 
115 |     Returns:
116 |         None.
117 |     '''
118 | 
119 |     convert_to_3_channels = ConvertTo3Channels()
120 |     resize = Resize(height=img_height,width=img_width)
121 |     if data_generator_mode == 'resize':
122 |         transformations = [convert_to_3_channels,
123 |                            resize]
124 |     elif data_generator_mode == 'pad':
125 |         random_pad = RandomPadFixedAR(patch_aspect_ratio=img_width/img_height, clip_boxes=False)
126 |         transformations = [convert_to_3_channels,
127 |                            random_pad,
128 |                            resize]
129 |     else:
130 |         raise ValueError("Unexpected argument value: `data_generator_mode` can be either of 'resize' or 'pad', but received '{}'.".format(data_generator_mode))
131 | 
132 |     # Set the generator parameters.
133 |     generator = data_generator.generate(batch_size=batch_size,
134 |                                         shuffle=False,
135 |                                         transformations=transformations,
136 |                                         label_encoder=None,
137 |                                         returns={'processed_images',
138 |                                                  'image_ids',
139 |                                                  'inverse_transform'},
140 |                                         keep_images_without_gt=True)
141 |     # Put the results in this list.
142 |     results = []
143 |     # Compute the number of batches to iterate over the entire dataset.
144 |     n_images = data_generator.get_dataset_size()
145 |     print("Number of images in the evaluation dataset: {}".format(n_images))
146 |     n_batches = int(ceil(n_images / batch_size))
147 |     # Loop over all batches.
148 |     tr = trange(n_batches, file=sys.stdout)
149 |     tr.set_description('Producing results file')
150 |     for i in tr:
151 |         # Generate batch.
152 |         batch_X, batch_image_ids, batch_inverse_transforms = next(generator)
153 |         # Predict.
154 |         y_pred = model.predict(batch_X)
155 |         # If the model was created in 'training' mode, the raw predictions need to
156 |         # be decoded and filtered, otherwise that's already taken care of.
157 |         if model_mode == 'training':
158 |             # Decode.
159 |             y_pred = decode_detections(y_pred,
160 |                                        confidence_thresh=confidence_thresh,
161 |                                        iou_threshold=iou_threshold,
162 |                                        top_k=top_k,
163 |                                        input_coords=pred_coords,
164 |                                        normalize_coords=normalize_coords,
165 |                                        img_height=img_height,
166 |                                        img_width=img_width)
167 |         else:
168 |             # Filter out the all-zeros dummy elements of `y_pred`.
169 |             y_pred_filtered = []
170 |             for i in range(len(y_pred)):
171 |                 y_pred_filtered.append(y_pred[i][y_pred[i,:,0] != 0])
172 |             y_pred = y_pred_filtered
173 |         # Convert the predicted box coordinates for the original images.
174 |         y_pred = apply_inverse_transforms(y_pred, batch_inverse_transforms)
175 | 
176 |         # Convert each predicted box into the results format.
177 |         for k, batch_item in enumerate(y_pred):
178 |             for box in batch_item:
179 |                 class_id = box[0]
180 |                 # Transform the consecutive class IDs back to the original COCO category IDs.
181 |                 cat_id = classes_to_cats[class_id]
182 |                 # Round the box coordinates to reduce the JSON file size.
183 |                 xmin = float(round(box[2], 1))
184 |                 ymin = float(round(box[3], 1))
185 |                 xmax = float(round(box[4], 1))
186 |                 ymax = float(round(box[5], 1))
187 |                 width = xmax - xmin
188 |                 height = ymax - ymin
189 |                 bbox = [xmin, ymin, width, height]
190 |                 result = {}
191 |                 result['image_id'] = batch_image_ids[k]
192 |                 result['category_id'] = cat_id
193 |                 result['score'] = float(round(box[1], 3))
194 |                 result['bbox'] = bbox
195 |                 results.append(result)
196 | 
197 |     with open(out_file, 'w') as f:
198 |         json.dump(results, f)
199 | 
200 |     print("Prediction results saved in '{}'".format(out_file))
201 | 


--------------------------------------------------------------------------------
/examples/fish-bike.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/fish-bike.jpg


--------------------------------------------------------------------------------
/examples/fish_bike.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/fish_bike.jpg


--------------------------------------------------------------------------------
/examples/ssd300_pascalVOC_pred_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/ssd300_pascalVOC_pred_01.png


--------------------------------------------------------------------------------
/examples/ssd300_pascalVOC_pred_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/ssd300_pascalVOC_pred_02.png


--------------------------------------------------------------------------------
/examples/ssd300_pascalVOC_pred_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/ssd300_pascalVOC_pred_03.png


--------------------------------------------------------------------------------
/examples/ssd300_pascalVOC_pred_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/ssd300_pascalVOC_pred_04.png


--------------------------------------------------------------------------------
/examples/ssd300_pascalVOC_pred_05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/ssd300_pascalVOC_pred_05.png


--------------------------------------------------------------------------------
/examples/ssd300_pascalVOC_pred_06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/ssd300_pascalVOC_pred_06.png


--------------------------------------------------------------------------------
/examples/ssd300_pascalVOC_pred_07.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/ssd300_pascalVOC_pred_07.png


--------------------------------------------------------------------------------
/examples/ssd300_pascalVOC_pred_08.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/ssd300_pascalVOC_pred_08.png


--------------------------------------------------------------------------------
/examples/ssd300_pascalVOC_pred_09.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/ssd300_pascalVOC_pred_09.png


--------------------------------------------------------------------------------
/examples/ssd7_udacity_traffic_pred_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/ssd7_udacity_traffic_pred_01.png


--------------------------------------------------------------------------------
/examples/ssd7_udacity_traffic_pred_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/ssd7_udacity_traffic_pred_02.png


--------------------------------------------------------------------------------
/examples/ssd7_udacity_traffic_pred_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/ssd7_udacity_traffic_pred_03.png


--------------------------------------------------------------------------------
/examples/ssd7_udacity_traffic_pred_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/ssd7_udacity_traffic_pred_04.png


--------------------------------------------------------------------------------
/examples/ssd7_udacity_traffic_pred_05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/ssd7_udacity_traffic_pred_05.png


--------------------------------------------------------------------------------
/examples/trained_ssd300_pascalVOC2007_test_pred_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/trained_ssd300_pascalVOC2007_test_pred_01.png


--------------------------------------------------------------------------------
/examples/trained_ssd300_pascalVOC2007_test_pred_01_no_gt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/trained_ssd300_pascalVOC2007_test_pred_01_no_gt.png


--------------------------------------------------------------------------------
/examples/trained_ssd300_pascalVOC2007_test_pred_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/trained_ssd300_pascalVOC2007_test_pred_02.png


--------------------------------------------------------------------------------
/examples/trained_ssd300_pascalVOC2007_test_pred_02_no_gt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/trained_ssd300_pascalVOC2007_test_pred_02_no_gt.png


--------------------------------------------------------------------------------
/examples/trained_ssd300_pascalVOC2007_test_pred_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/trained_ssd300_pascalVOC2007_test_pred_03.png


--------------------------------------------------------------------------------
/examples/trained_ssd300_pascalVOC2007_test_pred_03_no_gt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/trained_ssd300_pascalVOC2007_test_pred_03_no_gt.png


--------------------------------------------------------------------------------
/examples/trained_ssd300_pascalVOC2007_test_pred_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/trained_ssd300_pascalVOC2007_test_pred_04.png


--------------------------------------------------------------------------------
/examples/trained_ssd300_pascalVOC2007_test_pred_04_no_gt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/trained_ssd300_pascalVOC2007_test_pred_04_no_gt.png


--------------------------------------------------------------------------------
/examples/trained_ssd300_pascalVOC2007_test_pred_05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/trained_ssd300_pascalVOC2007_test_pred_05.png


--------------------------------------------------------------------------------
/examples/trained_ssd300_pascalVOC2007_test_pred_05_no_gt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/trained_ssd300_pascalVOC2007_test_pred_05_no_gt.png


--------------------------------------------------------------------------------
/examples/trained_ssd300_pascalVOC2007_test_pred_06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/examples/trained_ssd300_pascalVOC2007_test_pred_06.png


--------------------------------------------------------------------------------
/keras_layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/keras_layers/__init__.py


--------------------------------------------------------------------------------
/keras_layers/keras_layer_AnchorBoxes.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | A custom Keras layer to generate anchor boxes.
  3 | 
  4 | Copyright (C) 2018 Pierluigi Ferrari
  5 | 
  6 | Licensed under the Apache License, Version 2.0 (the "License");
  7 | you may not use this file except in compliance with the License.
  8 | You may obtain a copy of the License at
  9 | 
 10 |    http://www.apache.org/licenses/LICENSE-2.0
 11 | 
 12 | Unless required by applicable law or agreed to in writing, software
 13 | distributed under the License is distributed on an "AS IS" BASIS,
 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | See the License for the specific language governing permissions and
 16 | limitations under the License.
 17 | '''
 18 | 
 19 | from __future__ import division
 20 | import numpy as np
 21 | import keras.backend as K
 22 | from keras.engine.topology import InputSpec
 23 | from keras.engine.topology import Layer
 24 | 
 25 | from bounding_box_utils.bounding_box_utils import convert_coordinates
 26 | 
 27 | class AnchorBoxes(Layer):
 28 |     '''
 29 |     A Keras layer to create an output tensor containing anchor box coordinates
 30 |     and variances based on the input tensor and the passed arguments.
 31 | 
 32 |     A set of 2D anchor boxes of different aspect ratios is created for each spatial unit of
 33 |     the input tensor. The number of anchor boxes created per unit depends on the arguments
 34 |     `aspect_ratios` and `two_boxes_for_ar1`, in the default case it is 4. The boxes
 35 |     are parameterized by the coordinate tuple `(xmin, xmax, ymin, ymax)`.
 36 | 
 37 |     The logic implemented by this layer is identical to the logic in the module
 38 |     `ssd_box_encode_decode_utils.py`.
 39 | 
 40 |     The purpose of having this layer in the network is to make the model self-sufficient
 41 |     at inference time. Since the model is predicting offsets to the anchor boxes
 42 |     (rather than predicting absolute box coordinates directly), one needs to know the anchor
 43 |     box coordinates in order to construct the final prediction boxes from the predicted offsets.
 44 |     If the model's output tensor did not contain the anchor box coordinates, the necessary
 45 |     information to convert the predicted offsets back to absolute coordinates would be missing
 46 |     in the model output. The reason why it is necessary to predict offsets to the anchor boxes
 47 |     rather than to predict absolute box coordinates directly is explained in `README.md`.
 48 | 
 49 |     Input shape:
 50 |         4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'`
 51 |         or `(batch, height, width, channels)` if `dim_ordering = 'tf'`.
 52 | 
 53 |     Output shape:
 54 |         5D tensor of shape `(batch, height, width, n_boxes, 8)`. The last axis contains
 55 |         the four anchor box coordinates and the four variance values for each box.
 56 |     '''
 57 | 
 58 |     def __init__(self,
 59 |                  img_height,
 60 |                  img_width,
 61 |                  this_scale,
 62 |                  next_scale,
 63 |                  aspect_ratios=[0.5, 1.0, 2.0],
 64 |                  two_boxes_for_ar1=True,
 65 |                  this_steps=None,
 66 |                  this_offsets=None,
 67 |                  clip_boxes=False,
 68 |                  variances=[0.1, 0.1, 0.2, 0.2],
 69 |                  coords='centroids',
 70 |                  normalize_coords=False,
 71 |                  **kwargs):
 72 |         '''
 73 |         All arguments need to be set to the same values as in the box encoding process, otherwise the behavior is undefined.
 74 |         Some of these arguments are explained in more detail in the documentation of the `SSDBoxEncoder` class.
 75 | 
 76 |         Arguments:
 77 |             img_height (int): The height of the input images.
 78 |             img_width (int): The width of the input images.
 79 |             this_scale (float): A float in [0, 1], the scaling factor for the size of the generated anchor boxes
 80 |                 as a fraction of the shorter side of the input image.
 81 |             next_scale (float): A float in [0, 1], the next larger scaling factor. Only relevant if
 82 |                 `self.two_boxes_for_ar1 == True`.
 83 |             aspect_ratios (list, optional): The list of aspect ratios for which default boxes are to be
 84 |                 generated for this layer.
 85 |             two_boxes_for_ar1 (bool, optional): Only relevant if `aspect_ratios` contains 1.
 86 |                 If `True`, two default boxes will be generated for aspect ratio 1. The first will be generated
 87 |                 using the scaling factor for the respective layer, the second one will be generated using
 88 |                 geometric mean of said scaling factor and next bigger scaling factor.
 89 |             clip_boxes (bool, optional): If `True`, clips the anchor box coordinates to stay within image boundaries.
 90 |             variances (list, optional): A list of 4 floats >0. The anchor box offset for each coordinate will be divided by
 91 |                 its respective variance value.
 92 |             coords (str, optional): The box coordinate format to be used internally in the model (i.e. this is not the input format
 93 |                 of the ground truth labels). Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, and height),
 94 |                 'corners' for the format `(xmin, ymin, xmax,  ymax)`, or 'minmax' for the format `(xmin, xmax, ymin, ymax)`.
 95 |             normalize_coords (bool, optional): Set to `True` if the model uses relative instead of absolute coordinates,
 96 |                 i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates.
 97 |         '''
 98 |         if K.backend() != 'tensorflow':
 99 |             raise TypeError("This layer only supports TensorFlow at the moment, but you are using the {} backend.".format(K.backend()))
100 | 
101 |         if (this_scale < 0) or (next_scale < 0) or (this_scale > 1):
102 |             raise ValueError("`this_scale` must be in [0, 1] and `next_scale` must be >0, but `this_scale` == {}, `next_scale` == {}".format(this_scale, next_scale))
103 | 
104 |         if len(variances) != 4:
105 |             raise ValueError("4 variance values must be pased, but {} values were received.".format(len(variances)))
106 |         variances = np.array(variances)
107 |         if np.any(variances <= 0):
108 |             raise ValueError("All variances must be >0, but the variances given are {}".format(variances))
109 | 
110 |         self.img_height = img_height
111 |         self.img_width = img_width
112 |         self.this_scale = this_scale
113 |         self.next_scale = next_scale
114 |         self.aspect_ratios = aspect_ratios
115 |         self.two_boxes_for_ar1 = two_boxes_for_ar1
116 |         self.this_steps = this_steps
117 |         self.this_offsets = this_offsets
118 |         self.clip_boxes = clip_boxes
119 |         self.variances = variances
120 |         self.coords = coords
121 |         self.normalize_coords = normalize_coords
122 |         # Compute the number of boxes per cell
123 |         if (1 in aspect_ratios) and two_boxes_for_ar1:
124 |             self.n_boxes = len(aspect_ratios) + 1
125 |         else:
126 |             self.n_boxes = len(aspect_ratios)
127 |         super(AnchorBoxes, self).__init__(**kwargs)
128 | 
129 |     def build(self, input_shape):
130 |         self.input_spec = [InputSpec(shape=input_shape)]
131 |         super(AnchorBoxes, self).build(input_shape)
132 | 
133 |     def call(self, x, mask=None):
134 |         '''
135 |         Return an anchor box tensor based on the shape of the input tensor.
136 | 
137 |         The logic implemented here is identical to the logic in the module `ssd_box_encode_decode_utils.py`.
138 | 
139 |         Note that this tensor does not participate in any graph computations at runtime. It is being created
140 |         as a constant once during graph creation and is just being output along with the rest of the model output
141 |         during runtime. Because of this, all logic is implemented as Numpy array operations and it is sufficient
142 |         to convert the resulting Numpy array into a Keras tensor at the very end before outputting it.
143 | 
144 |         Arguments:
145 |             x (tensor): 4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'`
146 |                 or `(batch, height, width, channels)` if `dim_ordering = 'tf'`. The input for this
147 |                 layer must be the output of the localization predictor layer.
148 |         '''
149 | 
150 |         # Compute box width and height for each aspect ratio
151 |         # The shorter side of the image will be used to compute `w` and `h` using `scale` and `aspect_ratios`.
152 |         size = min(self.img_height, self.img_width)
153 |         # Compute the box widths and and heights for all aspect ratios
154 |         wh_list = []
155 |         for ar in self.aspect_ratios:
156 |             if (ar == 1):
157 |                 # Compute the regular anchor box for aspect ratio 1.
158 |                 box_height = box_width = self.this_scale * size
159 |                 wh_list.append((box_width, box_height))
160 |                 if self.two_boxes_for_ar1:
161 |                     # Compute one slightly larger version using the geometric mean of this scale value and the next.
162 |                     box_height = box_width = np.sqrt(self.this_scale * self.next_scale) * size
163 |                     wh_list.append((box_width, box_height))
164 |             else:
165 |                 box_height = self.this_scale * size / np.sqrt(ar)
166 |                 box_width = self.this_scale * size * np.sqrt(ar)
167 |                 wh_list.append((box_width, box_height))
168 |         wh_list = np.array(wh_list)
169 | 
170 |         # We need the shape of the input tensor
171 |         if K.image_dim_ordering() == 'tf':
172 |             batch_size, feature_map_height, feature_map_width, feature_map_channels = x._keras_shape
173 |         else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future
174 |             batch_size, feature_map_channels, feature_map_height, feature_map_width = x._keras_shape
175 | 
176 |         # Compute the grid of box center points. They are identical for all aspect ratios.
177 | 
178 |         # Compute the step sizes, i.e. how far apart the anchor box center points will be vertically and horizontally.
179 |         if (self.this_steps is None):
180 |             step_height = self.img_height / feature_map_height
181 |             step_width = self.img_width / feature_map_width
182 |         else:
183 |             if isinstance(self.this_steps, (list, tuple)) and (len(self.this_steps) == 2):
184 |                 step_height = self.this_steps[0]
185 |                 step_width = self.this_steps[1]
186 |             elif isinstance(self.this_steps, (int, float)):
187 |                 step_height = self.this_steps
188 |                 step_width = self.this_steps
189 |         # Compute the offsets, i.e. at what pixel values the first anchor box center point will be from the top and from the left of the image.
190 |         if (self.this_offsets is None):
191 |             offset_height = 0.5
192 |             offset_width = 0.5
193 |         else:
194 |             if isinstance(self.this_offsets, (list, tuple)) and (len(self.this_offsets) == 2):
195 |                 offset_height = self.this_offsets[0]
196 |                 offset_width = self.this_offsets[1]
197 |             elif isinstance(self.this_offsets, (int, float)):
198 |                 offset_height = self.this_offsets
199 |                 offset_width = self.this_offsets
200 |         # Now that we have the offsets and step sizes, compute the grid of anchor box center points.
201 |         cy = np.linspace(offset_height * step_height, (offset_height + feature_map_height - 1) * step_height, feature_map_height)
202 |         cx = np.linspace(offset_width * step_width, (offset_width + feature_map_width - 1) * step_width, feature_map_width)
203 |         cx_grid, cy_grid = np.meshgrid(cx, cy)
204 |         cx_grid = np.expand_dims(cx_grid, -1) # This is necessary for np.tile() to do what we want further down
205 |         cy_grid = np.expand_dims(cy_grid, -1) # This is necessary for np.tile() to do what we want further down
206 | 
207 |         # Create a 4D tensor template of shape `(feature_map_height, feature_map_width, n_boxes, 4)`
208 |         # where the last dimension will contain `(cx, cy, w, h)`
209 |         boxes_tensor = np.zeros((feature_map_height, feature_map_width, self.n_boxes, 4))
210 | 
211 |         boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, self.n_boxes)) # Set cx
212 |         boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, self.n_boxes)) # Set cy
213 |         boxes_tensor[:, :, :, 2] = wh_list[:, 0] # Set w
214 |         boxes_tensor[:, :, :, 3] = wh_list[:, 1] # Set h
215 | 
216 |         # Convert `(cx, cy, w, h)` to `(xmin, xmax, ymin, ymax)`
217 |         boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='centroids2corners')
218 | 
219 |         # If `clip_boxes` is enabled, clip the coordinates to lie within the image boundaries
220 |         if self.clip_boxes:
221 |             x_coords = boxes_tensor[:,:,:,[0, 2]]
222 |             x_coords[x_coords >= self.img_width] = self.img_width - 1
223 |             x_coords[x_coords < 0] = 0
224 |             boxes_tensor[:,:,:,[0, 2]] = x_coords
225 |             y_coords = boxes_tensor[:,:,:,[1, 3]]
226 |             y_coords[y_coords >= self.img_height] = self.img_height - 1
227 |             y_coords[y_coords < 0] = 0
228 |             boxes_tensor[:,:,:,[1, 3]] = y_coords
229 | 
230 |         # If `normalize_coords` is enabled, normalize the coordinates to be within [0,1]
231 |         if self.normalize_coords:
232 |             boxes_tensor[:, :, :, [0, 2]] /= self.img_width
233 |             boxes_tensor[:, :, :, [1, 3]] /= self.img_height
234 | 
235 |         # TODO: Implement box limiting directly for `(cx, cy, w, h)` so that we don't have to unnecessarily convert back and forth.
236 |         if self.coords == 'centroids':
237 |             # Convert `(xmin, ymin, xmax, ymax)` back to `(cx, cy, w, h)`.
238 |             boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2centroids', border_pixels='half')
239 |         elif self.coords == 'minmax':
240 |             # Convert `(xmin, ymin, xmax, ymax)` to `(xmin, xmax, ymin, ymax).
241 |             boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2minmax', border_pixels='half')
242 | 
243 |         # Create a tensor to contain the variances and append it to `boxes_tensor`. This tensor has the same shape
244 |         # as `boxes_tensor` and simply contains the same 4 variance values for every position in the last axis.
245 |         variances_tensor = np.zeros_like(boxes_tensor) # Has shape `(feature_map_height, feature_map_width, n_boxes, 4)`
246 |         variances_tensor += self.variances # Long live broadcasting
247 |         # Now `boxes_tensor` becomes a tensor of shape `(feature_map_height, feature_map_width, n_boxes, 8)`
248 |         boxes_tensor = np.concatenate((boxes_tensor, variances_tensor), axis=-1)
249 | 
250 |         # Now prepend one dimension to `boxes_tensor` to account for the batch size and tile it along
251 |         # The result will be a 5D tensor of shape `(batch_size, feature_map_height, feature_map_width, n_boxes, 8)`
252 |         boxes_tensor = np.expand_dims(boxes_tensor, axis=0)
253 |         boxes_tensor = K.tile(K.constant(boxes_tensor, dtype='float32'), (K.shape(x)[0], 1, 1, 1, 1))
254 | 
255 |         return boxes_tensor
256 | 
257 |     def compute_output_shape(self, input_shape):
258 |         if K.image_dim_ordering() == 'tf':
259 |             batch_size, feature_map_height, feature_map_width, feature_map_channels = input_shape
260 |         else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future
261 |             batch_size, feature_map_channels, feature_map_height, feature_map_width = input_shape
262 |         return (batch_size, feature_map_height, feature_map_width, self.n_boxes, 8)
263 | 
264 |     def get_config(self):
265 |         config = {
266 |             'img_height': self.img_height,
267 |             'img_width': self.img_width,
268 |             'this_scale': self.this_scale,
269 |             'next_scale': self.next_scale,
270 |             'aspect_ratios': list(self.aspect_ratios),
271 |             'two_boxes_for_ar1': self.two_boxes_for_ar1,
272 |             'clip_boxes': self.clip_boxes,
273 |             'variances': list(self.variances),
274 |             'coords': self.coords,
275 |             'normalize_coords': self.normalize_coords
276 |         }
277 |         base_config = super(AnchorBoxes, self).get_config()
278 |         return dict(list(base_config.items()) + list(config.items()))
279 | 


--------------------------------------------------------------------------------
/keras_layers/keras_layer_DecodeDetections.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | A custom Keras layer to decode the raw SSD prediction output. Corresponds to the
  3 | `DetectionOutput` layer type in the original Caffe implementation of SSD.
  4 | 
  5 | Copyright (C) 2018 Pierluigi Ferrari
  6 | 
  7 | Licensed under the Apache License, Version 2.0 (the "License");
  8 | you may not use this file except in compliance with the License.
  9 | You may obtain a copy of the License at
 10 | 
 11 |    http://www.apache.org/licenses/LICENSE-2.0
 12 | 
 13 | Unless required by applicable law or agreed to in writing, software
 14 | distributed under the License is distributed on an "AS IS" BASIS,
 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | See the License for the specific language governing permissions and
 17 | limitations under the License.
 18 | '''
 19 | 
 20 | from __future__ import division
 21 | import numpy as np
 22 | import tensorflow as tf
 23 | import keras.backend as K
 24 | from keras.engine.topology import InputSpec
 25 | from keras.engine.topology import Layer
 26 | 
 27 | class DecodeDetections(Layer):
 28 |     '''
 29 |     A Keras layer to decode the raw SSD prediction output.
 30 | 
 31 |     Input shape:
 32 |         3D tensor of shape `(batch_size, n_boxes, n_classes + 12)`.
 33 | 
 34 |     Output shape:
 35 |         3D tensor of shape `(batch_size, top_k, 6)`.
 36 |     '''
 37 | 
 38 |     def __init__(self,
 39 |                  confidence_thresh=0.01,
 40 |                  iou_threshold=0.45,
 41 |                  top_k=200,
 42 |                  nms_max_output_size=400,
 43 |                  coords='centroids',
 44 |                  normalize_coords=True,
 45 |                  img_height=None,
 46 |                  img_width=None,
 47 |                  **kwargs):
 48 |         '''
 49 |         All default argument values follow the Caffe implementation.
 50 | 
 51 |         Arguments:
 52 |             confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
 53 |                 positive class in order to be considered for the non-maximum suppression stage for the respective class.
 54 |                 A lower value will result in a larger part of the selection process being done by the non-maximum suppression
 55 |                 stage, while a larger value will result in a larger part of the selection process happening in the confidence
 56 |                 thresholding stage.
 57 |             iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold`
 58 |                 with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
 59 |                 to the box score.
 60 |             top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
 61 |                 non-maximum suppression stage.
 62 |             nms_max_output_size (int, optional): The maximum number of predictions that will be left after performing non-maximum
 63 |                 suppression.
 64 |             coords (str, optional): The box coordinate format that the model outputs. Must be 'centroids'
 65 |                 i.e. the format `(cx, cy, w, h)` (box center coordinates, width, and height). Other coordinate formats are
 66 |                 currently not supported.
 67 |             normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1])
 68 |                 and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs
 69 |                 relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`.
 70 |                 Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect
 71 |                 coordinates. Requires `img_height` and `img_width` if set to `True`.
 72 |             img_height (int, optional): The height of the input images. Only needed if `normalize_coords` is `True`.
 73 |             img_width (int, optional): The width of the input images. Only needed if `normalize_coords` is `True`.
 74 |         '''
 75 |         if K.backend() != 'tensorflow':
 76 |             raise TypeError("This layer only supports TensorFlow at the moment, but you are using the {} backend.".format(K.backend()))
 77 | 
 78 |         if normalize_coords and ((img_height is None) or (img_width is None)):
 79 |             raise ValueError("If relative box coordinates are supposed to be converted to absolute coordinates, the decoder needs the image size in order to decode the predictions, but `img_height == {}` and `img_width == {}`".format(img_height, img_width))
 80 | 
 81 |         if coords != 'centroids':
 82 |             raise ValueError("The DetectionOutput layer currently only supports the 'centroids' coordinate format.")
 83 | 
 84 |         # We need these members for the config.
 85 |         self.confidence_thresh = confidence_thresh
 86 |         self.iou_threshold = iou_threshold
 87 |         self.top_k = top_k
 88 |         self.normalize_coords = normalize_coords
 89 |         self.img_height = img_height
 90 |         self.img_width = img_width
 91 |         self.coords = coords
 92 |         self.nms_max_output_size = nms_max_output_size
 93 | 
 94 |         # We need these members for TensorFlow.
 95 |         self.tf_confidence_thresh = tf.constant(self.confidence_thresh, name='confidence_thresh')
 96 |         self.tf_iou_threshold = tf.constant(self.iou_threshold, name='iou_threshold')
 97 |         self.tf_top_k = tf.constant(self.top_k, name='top_k')
 98 |         self.tf_normalize_coords = tf.constant(self.normalize_coords, name='normalize_coords')
 99 |         self.tf_img_height = tf.constant(self.img_height, dtype=tf.float32, name='img_height')
100 |         self.tf_img_width = tf.constant(self.img_width, dtype=tf.float32, name='img_width')
101 |         self.tf_nms_max_output_size = tf.constant(self.nms_max_output_size, name='nms_max_output_size')
102 | 
103 |         super(DecodeDetections, self).__init__(**kwargs)
104 | 
105 |     def build(self, input_shape):
106 |         self.input_spec = [InputSpec(shape=input_shape)]
107 |         super(DecodeDetections, self).build(input_shape)
108 | 
109 |     def call(self, y_pred, mask=None):
110 |         '''
111 |         Returns:
112 |             3D tensor of shape `(batch_size, top_k, 6)`. The second axis is zero-padded
113 |             to always yield `top_k` predictions per batch item. The last axis contains
114 |             the coordinates for each predicted box in the format
115 |             `[class_id, confidence, xmin, ymin, xmax, ymax]`.
116 |         '''
117 | 
118 |         #####################################################################################
119 |         # 1. Convert the box coordinates from predicted anchor box offsets to predicted
120 |         #    absolute coordinates
121 |         #####################################################################################
122 | 
123 |         # Convert anchor box offsets to image offsets.
124 |         cx = y_pred[...,-12] * y_pred[...,-4] * y_pred[...,-6] + y_pred[...,-8] # cx = cx_pred * cx_variance * w_anchor + cx_anchor
125 |         cy = y_pred[...,-11] * y_pred[...,-3] * y_pred[...,-5] + y_pred[...,-7] # cy = cy_pred * cy_variance * h_anchor + cy_anchor
126 |         w = tf.exp(y_pred[...,-10] * y_pred[...,-2]) * y_pred[...,-6] # w = exp(w_pred * variance_w) * w_anchor
127 |         h = tf.exp(y_pred[...,-9] * y_pred[...,-1]) * y_pred[...,-5] # h = exp(h_pred * variance_h) * h_anchor
128 | 
129 |         # Convert 'centroids' to 'corners'.
130 |         xmin = cx - 0.5 * w
131 |         ymin = cy - 0.5 * h
132 |         xmax = cx + 0.5 * w
133 |         ymax = cy + 0.5 * h
134 | 
135 |         # If the model predicts box coordinates relative to the image dimensions and they are supposed
136 |         # to be converted back to absolute coordinates, do that.
137 |         def normalized_coords():
138 |             xmin1 = tf.expand_dims(xmin * self.tf_img_width, axis=-1)
139 |             ymin1 = tf.expand_dims(ymin * self.tf_img_height, axis=-1)
140 |             xmax1 = tf.expand_dims(xmax * self.tf_img_width, axis=-1)
141 |             ymax1 = tf.expand_dims(ymax * self.tf_img_height, axis=-1)
142 |             return xmin1, ymin1, xmax1, ymax1
143 |         def non_normalized_coords():
144 |             return tf.expand_dims(xmin, axis=-1), tf.expand_dims(ymin, axis=-1), tf.expand_dims(xmax, axis=-1), tf.expand_dims(ymax, axis=-1)
145 | 
146 |         xmin, ymin, xmax, ymax = tf.cond(self.tf_normalize_coords, normalized_coords, non_normalized_coords)
147 | 
148 |         # Concatenate the one-hot class confidences and the converted box coordinates to form the decoded predictions tensor.
149 |         y_pred = tf.concat(values=[y_pred[...,:-12], xmin, ymin, xmax, ymax], axis=-1)
150 | 
151 |         #####################################################################################
152 |         # 2. Perform confidence thresholding, per-class non-maximum suppression, and
153 |         #    top-k filtering.
154 |         #####################################################################################
155 | 
156 |         batch_size = tf.shape(y_pred)[0] # Output dtype: tf.int32
157 |         n_boxes = tf.shape(y_pred)[1]
158 |         n_classes = y_pred.shape[2] - 4
159 |         class_indices = tf.range(1, n_classes)
160 | 
161 |         # Create a function that filters the predictions for the given batch item. Specifically, it performs:
162 |         # - confidence thresholding
163 |         # - non-maximum suppression (NMS)
164 |         # - top-k filtering
165 |         def filter_predictions(batch_item):
166 | 
167 |             # Create a function that filters the predictions for one single class.
168 |             def filter_single_class(index):
169 | 
170 |                 # From a tensor of shape (n_boxes, n_classes + 4 coordinates) extract
171 |                 # a tensor of shape (n_boxes, 1 + 4 coordinates) that contains the
172 |                 # confidnece values for just one class, determined by `index`.
173 |                 confidences = tf.expand_dims(batch_item[..., index], axis=-1)
174 |                 class_id = tf.fill(dims=tf.shape(confidences), value=tf.to_float(index))
175 |                 box_coordinates = batch_item[...,-4:]
176 | 
177 |                 single_class = tf.concat([class_id, confidences, box_coordinates], axis=-1)
178 | 
179 |                 # Apply confidence thresholding with respect to the class defined by `index`.
180 |                 threshold_met = single_class[:,1] > self.tf_confidence_thresh
181 |                 single_class = tf.boolean_mask(tensor=single_class,
182 |                                                mask=threshold_met)
183 | 
184 |                 # If any boxes made the threshold, perform NMS.
185 |                 def perform_nms():
186 |                     scores = single_class[...,1]
187 | 
188 |                     # `tf.image.non_max_suppression()` needs the box coordinates in the format `(ymin, xmin, ymax, xmax)`.
189 |                     xmin = tf.expand_dims(single_class[...,-4], axis=-1)
190 |                     ymin = tf.expand_dims(single_class[...,-3], axis=-1)
191 |                     xmax = tf.expand_dims(single_class[...,-2], axis=-1)
192 |                     ymax = tf.expand_dims(single_class[...,-1], axis=-1)
193 |                     boxes = tf.concat(values=[ymin, xmin, ymax, xmax], axis=-1)
194 | 
195 |                     maxima_indices = tf.image.non_max_suppression(boxes=boxes,
196 |                                                                   scores=scores,
197 |                                                                   max_output_size=self.tf_nms_max_output_size,
198 |                                                                   iou_threshold=self.iou_threshold,
199 |                                                                   name='non_maximum_suppresion')
200 |                     maxima = tf.gather(params=single_class,
201 |                                        indices=maxima_indices,
202 |                                        axis=0)
203 |                     return maxima
204 | 
205 |                 def no_confident_predictions():
206 |                     return tf.constant(value=0.0, shape=(1,6))
207 | 
208 |                 single_class_nms = tf.cond(tf.equal(tf.size(single_class), 0), no_confident_predictions, perform_nms)
209 | 
210 |                 # Make sure `single_class` is exactly `self.nms_max_output_size` elements long.
211 |                 padded_single_class = tf.pad(tensor=single_class_nms,
212 |                                              paddings=[[0, self.tf_nms_max_output_size - tf.shape(single_class_nms)[0]], [0, 0]],
213 |                                              mode='CONSTANT',
214 |                                              constant_values=0.0)
215 | 
216 |                 return padded_single_class
217 | 
218 |             # Iterate `filter_single_class()` over all class indices.
219 |             filtered_single_classes = tf.map_fn(fn=lambda i: filter_single_class(i),
220 |                                                 elems=tf.range(1,n_classes),
221 |                                                 dtype=tf.float32,
222 |                                                 parallel_iterations=128,
223 |                                                 back_prop=False,
224 |                                                 swap_memory=False,
225 |                                                 infer_shape=True,
226 |                                                 name='loop_over_classes')
227 | 
228 |             # Concatenate the filtered results for all individual classes to one tensor.
229 |             filtered_predictions = tf.reshape(tensor=filtered_single_classes, shape=(-1,6))
230 | 
231 |             # Perform top-k filtering for this batch item or pad it in case there are
232 |             # fewer than `self.top_k` boxes left at this point. Either way, produce a
233 |             # tensor of length `self.top_k`. By the time we return the final results tensor
234 |             # for the whole batch, all batch items must have the same number of predicted
235 |             # boxes so that the tensor dimensions are homogenous. If fewer than `self.top_k`
236 |             # predictions are left after the filtering process above, we pad the missing
237 |             # predictions with zeros as dummy entries.
238 |             def top_k():
239 |                 return tf.gather(params=filtered_predictions,
240 |                                  indices=tf.nn.top_k(filtered_predictions[:, 1], k=self.tf_top_k, sorted=True).indices,
241 |                                  axis=0)
242 |             def pad_and_top_k():
243 |                 padded_predictions = tf.pad(tensor=filtered_predictions,
244 |                                             paddings=[[0, self.tf_top_k - tf.shape(filtered_predictions)[0]], [0, 0]],
245 |                                             mode='CONSTANT',
246 |                                             constant_values=0.0)
247 |                 return tf.gather(params=padded_predictions,
248 |                                  indices=tf.nn.top_k(padded_predictions[:, 1], k=self.tf_top_k, sorted=True).indices,
249 |                                  axis=0)
250 | 
251 |             top_k_boxes = tf.cond(tf.greater_equal(tf.shape(filtered_predictions)[0], self.tf_top_k), top_k, pad_and_top_k)
252 | 
253 |             return top_k_boxes
254 | 
255 |         # Iterate `filter_predictions()` over all batch items.
256 |         output_tensor = tf.map_fn(fn=lambda x: filter_predictions(x),
257 |                                   elems=y_pred,
258 |                                   dtype=None,
259 |                                   parallel_iterations=128,
260 |                                   back_prop=False,
261 |                                   swap_memory=False,
262 |                                   infer_shape=True,
263 |                                   name='loop_over_batch')
264 | 
265 |         return output_tensor
266 | 
267 |     def compute_output_shape(self, input_shape):
268 |         batch_size, n_boxes, last_axis = input_shape
269 |         return (batch_size, self.tf_top_k, 6) # Last axis: (class_ID, confidence, 4 box coordinates)
270 | 
271 |     def get_config(self):
272 |         config = {
273 |             'confidence_thresh': self.confidence_thresh,
274 |             'iou_threshold': self.iou_threshold,
275 |             'top_k': self.top_k,
276 |             'nms_max_output_size': self.nms_max_output_size,
277 |             'coords': self.coords,
278 |             'normalize_coords': self.normalize_coords,
279 |             'img_height': self.img_height,
280 |             'img_width': self.img_width,
281 |         }
282 |         base_config = super(DecodeDetections, self).get_config()
283 |         return dict(list(base_config.items()) + list(config.items()))
284 | 


--------------------------------------------------------------------------------
/keras_layers/keras_layer_DecodeDetectionsFast.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | A custom Keras layer to decode the raw SSD prediction output. This is a modified
  3 | and more efficient version of the `DetectionOutput` layer type in the original Caffe
  4 | implementation of SSD. For a faithful replication of the original layer, please
  5 | refer to the `DecodeDetections` layer.
  6 | 
  7 | Copyright (C) 2018 Pierluigi Ferrari
  8 | 
  9 | Licensed under the Apache License, Version 2.0 (the "License");
 10 | you may not use this file except in compliance with the License.
 11 | You may obtain a copy of the License at
 12 | 
 13 |    http://www.apache.org/licenses/LICENSE-2.0
 14 | 
 15 | Unless required by applicable law or agreed to in writing, software
 16 | distributed under the License is distributed on an "AS IS" BASIS,
 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 | See the License for the specific language governing permissions and
 19 | limitations under the License.
 20 | '''
 21 | 
 22 | from __future__ import division
 23 | import numpy as np
 24 | import tensorflow as tf
 25 | import keras.backend as K
 26 | from keras.engine.topology import InputSpec
 27 | from keras.engine.topology import Layer
 28 | 
 29 | class DecodeDetectionsFast(Layer):
 30 |     '''
 31 |     A Keras layer to decode the raw SSD prediction output.
 32 | 
 33 |     Input shape:
 34 |         3D tensor of shape `(batch_size, n_boxes, n_classes + 12)`.
 35 | 
 36 |     Output shape:
 37 |         3D tensor of shape `(batch_size, top_k, 6)`.
 38 |     '''
 39 | 
 40 |     def __init__(self,
 41 |                  confidence_thresh=0.01,
 42 |                  iou_threshold=0.45,
 43 |                  top_k=200,
 44 |                  nms_max_output_size=400,
 45 |                  coords='centroids',
 46 |                  normalize_coords=True,
 47 |                  img_height=None,
 48 |                  img_width=None,
 49 |                  **kwargs):
 50 |         '''
 51 |         All default argument values follow the Caffe implementation.
 52 | 
 53 |         Arguments:
 54 |             confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
 55 |                 positive class in order to be considered for the non-maximum suppression stage for the respective class.
 56 |                 A lower value will result in a larger part of the selection process being done by the non-maximum suppression
 57 |                 stage, while a larger value will result in a larger part of the selection process happening in the confidence
 58 |                 thresholding stage.
 59 |             iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold`
 60 |                 with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
 61 |                 to the box score.
 62 |             top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
 63 |                 non-maximum suppression stage.
 64 |             nms_max_output_size (int, optional): The maximum number of predictions that will be left after performing non-maximum
 65 |                 suppression.
 66 |             coords (str, optional): The box coordinate format that the model outputs. Must be 'centroids'
 67 |                 i.e. the format `(cx, cy, w, h)` (box center coordinates, width, and height). Other coordinate formats are
 68 |                 currently not supported.
 69 |             normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1])
 70 |                 and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs
 71 |                 relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`.
 72 |                 Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect
 73 |                 coordinates. Requires `img_height` and `img_width` if set to `True`.
 74 |             img_height (int, optional): The height of the input images. Only needed if `normalize_coords` is `True`.
 75 |             img_width (int, optional): The width of the input images. Only needed if `normalize_coords` is `True`.
 76 |         '''
 77 |         if K.backend() != 'tensorflow':
 78 |             raise TypeError("This layer only supports TensorFlow at the moment, but you are using the {} backend.".format(K.backend()))
 79 | 
 80 |         if normalize_coords and ((img_height is None) or (img_width is None)):
 81 |             raise ValueError("If relative box coordinates are supposed to be converted to absolute coordinates, the decoder needs the image size in order to decode the predictions, but `img_height == {}` and `img_width == {}`".format(img_height, img_width))
 82 | 
 83 |         if coords != 'centroids':
 84 |             raise ValueError("The DetectionOutput layer currently only supports the 'centroids' coordinate format.")
 85 | 
 86 |         # We need these members for the config.
 87 |         self.confidence_thresh = confidence_thresh
 88 |         self.iou_threshold = iou_threshold
 89 |         self.top_k = top_k
 90 |         self.normalize_coords = normalize_coords
 91 |         self.img_height = img_height
 92 |         self.img_width = img_width
 93 |         self.coords = coords
 94 |         self.nms_max_output_size = nms_max_output_size
 95 | 
 96 |         # We need these members for TensorFlow.
 97 |         self.tf_confidence_thresh = tf.constant(self.confidence_thresh, name='confidence_thresh')
 98 |         self.tf_iou_threshold = tf.constant(self.iou_threshold, name='iou_threshold')
 99 |         self.tf_top_k = tf.constant(self.top_k, name='top_k')
100 |         self.tf_normalize_coords = tf.constant(self.normalize_coords, name='normalize_coords')
101 |         self.tf_img_height = tf.constant(self.img_height, dtype=tf.float32, name='img_height')
102 |         self.tf_img_width = tf.constant(self.img_width, dtype=tf.float32, name='img_width')
103 |         self.tf_nms_max_output_size = tf.constant(self.nms_max_output_size, name='nms_max_output_size')
104 | 
105 |         super(DecodeDetectionsFast, self).__init__(**kwargs)
106 | 
107 |     def build(self, input_shape):
108 |         self.input_spec = [InputSpec(shape=input_shape)]
109 |         super(DecodeDetectionsFast, self).build(input_shape)
110 | 
111 |     def call(self, y_pred, mask=None):
112 |         '''
113 |         Returns:
114 |             3D tensor of shape `(batch_size, top_k, 6)`. The second axis is zero-padded
115 |             to always yield `top_k` predictions per batch item. The last axis contains
116 |             the coordinates for each predicted box in the format
117 |             `[class_id, confidence, xmin, ymin, xmax, ymax]`.
118 |         '''
119 | 
120 |         #####################################################################################
121 |         # 1. Convert the box coordinates from predicted anchor box offsets to predicted
122 |         #    absolute coordinates
123 |         #####################################################################################
124 | 
125 |         # Extract the predicted class IDs as the indices of the highest confidence values.
126 |         class_ids = tf.expand_dims(tf.to_float(tf.argmax(y_pred[...,:-12], axis=-1)), axis=-1)
127 |         # Extract the confidences of the maximal classes.
128 |         confidences = tf.reduce_max(y_pred[...,:-12], axis=-1, keep_dims=True)
129 | 
130 |         # Convert anchor box offsets to image offsets.
131 |         cx = y_pred[...,-12] * y_pred[...,-4] * y_pred[...,-6] + y_pred[...,-8] # cx = cx_pred * cx_variance * w_anchor + cx_anchor
132 |         cy = y_pred[...,-11] * y_pred[...,-3] * y_pred[...,-5] + y_pred[...,-7] # cy = cy_pred * cy_variance * h_anchor + cy_anchor
133 |         w = tf.exp(y_pred[...,-10] * y_pred[...,-2]) * y_pred[...,-6] # w = exp(w_pred * variance_w) * w_anchor
134 |         h = tf.exp(y_pred[...,-9] * y_pred[...,-1]) * y_pred[...,-5] # h = exp(h_pred * variance_h) * h_anchor
135 | 
136 |         # Convert 'centroids' to 'corners'.
137 |         xmin = cx - 0.5 * w
138 |         ymin = cy - 0.5 * h
139 |         xmax = cx + 0.5 * w
140 |         ymax = cy + 0.5 * h
141 | 
142 |         # If the model predicts box coordinates relative to the image dimensions and they are supposed
143 |         # to be converted back to absolute coordinates, do that.
144 |         def normalized_coords():
145 |             xmin1 = tf.expand_dims(xmin * self.tf_img_width, axis=-1)
146 |             ymin1 = tf.expand_dims(ymin * self.tf_img_height, axis=-1)
147 |             xmax1 = tf.expand_dims(xmax * self.tf_img_width, axis=-1)
148 |             ymax1 = tf.expand_dims(ymax * self.tf_img_height, axis=-1)
149 |             return xmin1, ymin1, xmax1, ymax1
150 |         def non_normalized_coords():
151 |             return tf.expand_dims(xmin, axis=-1), tf.expand_dims(ymin, axis=-1), tf.expand_dims(xmax, axis=-1), tf.expand_dims(ymax, axis=-1)
152 | 
153 |         xmin, ymin, xmax, ymax = tf.cond(self.tf_normalize_coords, normalized_coords, non_normalized_coords)
154 | 
155 |         # Concatenate the one-hot class confidences and the converted box coordinates to form the decoded predictions tensor.
156 |         y_pred = tf.concat(values=[class_ids, confidences, xmin, ymin, xmax, ymax], axis=-1)
157 | 
158 |         #####################################################################################
159 |         # 2. Perform confidence thresholding, non-maximum suppression, and top-k filtering.
160 |         #####################################################################################
161 | 
162 |         batch_size = tf.shape(y_pred)[0] # Output dtype: tf.int32
163 |         n_boxes = tf.shape(y_pred)[1]
164 |         n_classes = y_pred.shape[2] - 4
165 |         class_indices = tf.range(1, n_classes)
166 | 
167 |         # Create a function that filters the predictions for the given batch item. Specifically, it performs:
168 |         # - confidence thresholding
169 |         # - non-maximum suppression (NMS)
170 |         # - top-k filtering
171 |         def filter_predictions(batch_item):
172 | 
173 |             # Keep only the non-background boxes.
174 |             positive_boxes = tf.not_equal(batch_item[...,0], 0.0)
175 |             predictions = tf.boolean_mask(tensor=batch_item,
176 |                                           mask=positive_boxes)
177 | 
178 |             def perform_confidence_thresholding():
179 |                 # Apply confidence thresholding.
180 |                 threshold_met = predictions[:,1] > self.tf_confidence_thresh
181 |                 return tf.boolean_mask(tensor=predictions,
182 |                                        mask=threshold_met)
183 |             def no_positive_boxes():
184 |                 return tf.constant(value=0.0, shape=(1,6))
185 | 
186 |             # If there are any positive predictions, perform confidence thresholding.
187 |             predictions_conf_thresh = tf.cond(tf.equal(tf.size(predictions), 0), no_positive_boxes, perform_confidence_thresholding)
188 | 
189 |             def perform_nms():
190 |                 scores = predictions_conf_thresh[...,1]
191 | 
192 |                 # `tf.image.non_max_suppression()` needs the box coordinates in the format `(ymin, xmin, ymax, xmax)`.
193 |                 xmin = tf.expand_dims(predictions_conf_thresh[...,-4], axis=-1)
194 |                 ymin = tf.expand_dims(predictions_conf_thresh[...,-3], axis=-1)
195 |                 xmax = tf.expand_dims(predictions_conf_thresh[...,-2], axis=-1)
196 |                 ymax = tf.expand_dims(predictions_conf_thresh[...,-1], axis=-1)
197 |                 boxes = tf.concat(values=[ymin, xmin, ymax, xmax], axis=-1)
198 | 
199 |                 maxima_indices = tf.image.non_max_suppression(boxes=boxes,
200 |                                                               scores=scores,
201 |                                                               max_output_size=self.tf_nms_max_output_size,
202 |                                                               iou_threshold=self.iou_threshold,
203 |                                                               name='non_maximum_suppresion')
204 |                 maxima = tf.gather(params=predictions_conf_thresh,
205 |                                    indices=maxima_indices,
206 |                                    axis=0)
207 |                 return maxima
208 |             def no_confident_predictions():
209 |                 return tf.constant(value=0.0, shape=(1,6))
210 | 
211 |             # If any boxes made the threshold, perform NMS.
212 |             predictions_nms = tf.cond(tf.equal(tf.size(predictions_conf_thresh), 0), no_confident_predictions, perform_nms)
213 | 
214 |             # Perform top-k filtering for this batch item or pad it in case there are
215 |             # fewer than `self.top_k` boxes left at this point. Either way, produce a
216 |             # tensor of length `self.top_k`. By the time we return the final results tensor
217 |             # for the whole batch, all batch items must have the same number of predicted
218 |             # boxes so that the tensor dimensions are homogenous. If fewer than `self.top_k`
219 |             # predictions are left after the filtering process above, we pad the missing
220 |             # predictions with zeros as dummy entries.
221 |             def top_k():
222 |                 return tf.gather(params=predictions_nms,
223 |                                  indices=tf.nn.top_k(predictions_nms[:, 1], k=self.tf_top_k, sorted=True).indices,
224 |                                  axis=0)
225 |             def pad_and_top_k():
226 |                 padded_predictions = tf.pad(tensor=predictions_nms,
227 |                                             paddings=[[0, self.tf_top_k - tf.shape(predictions_nms)[0]], [0, 0]],
228 |                                             mode='CONSTANT',
229 |                                             constant_values=0.0)
230 |                 return tf.gather(params=padded_predictions,
231 |                                  indices=tf.nn.top_k(padded_predictions[:, 1], k=self.tf_top_k, sorted=True).indices,
232 |                                  axis=0)
233 | 
234 |             top_k_boxes = tf.cond(tf.greater_equal(tf.shape(predictions_nms)[0], self.tf_top_k), top_k, pad_and_top_k)
235 | 
236 |             return top_k_boxes
237 | 
238 |         # Iterate `filter_predictions()` over all batch items.
239 |         output_tensor = tf.map_fn(fn=lambda x: filter_predictions(x),
240 |                                   elems=y_pred,
241 |                                   dtype=None,
242 |                                   parallel_iterations=128,
243 |                                   back_prop=False,
244 |                                   swap_memory=False,
245 |                                   infer_shape=True,
246 |                                   name='loop_over_batch')
247 | 
248 |         return output_tensor
249 | 
250 |     def compute_output_shape(self, input_shape):
251 |         batch_size, n_boxes, last_axis = input_shape
252 |         return (batch_size, self.tf_top_k, 6) # Last axis: (class_ID, confidence, 4 box coordinates)
253 | 
254 |     def get_config(self):
255 |         config = {
256 |             'confidence_thresh': self.confidence_thresh,
257 |             'iou_threshold': self.iou_threshold,
258 |             'top_k': self.top_k,
259 |             'nms_max_output_size': self.nms_max_output_size,
260 |             'coords': self.coords,
261 |             'normalize_coords': self.normalize_coords,
262 |             'img_height': self.img_height,
263 |             'img_width': self.img_width,
264 |         }
265 |         base_config = super(DecodeDetectionsFast, self).get_config()
266 |         return dict(list(base_config.items()) + list(config.items()))
267 | 


--------------------------------------------------------------------------------
/keras_layers/keras_layer_L2Normalization.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | A custom Keras layer to perform L2-normalization.
 3 | 
 4 | Copyright (C) 2018 Pierluigi Ferrari
 5 | 
 6 | Licensed under the Apache License, Version 2.0 (the "License");
 7 | you may not use this file except in compliance with the License.
 8 | You may obtain a copy of the License at
 9 | 
10 |    http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | '''
18 | 
19 | from __future__ import division
20 | import numpy as np
21 | import keras.backend as K
22 | from keras.engine.topology import InputSpec
23 | from keras.engine.topology import Layer
24 | 
25 | class L2Normalization(Layer):
26 |     '''
27 |     Performs L2 normalization on the input tensor with a learnable scaling parameter
28 |     as described in the paper "Parsenet: Looking Wider to See Better" (see references)
29 |     and as used in the original SSD model.
30 | 
31 |     Arguments:
32 |         gamma_init (int): The initial scaling parameter. Defaults to 20 following the
33 |             SSD paper.
34 | 
35 |     Input shape:
36 |         4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'`
37 |         or `(batch, height, width, channels)` if `dim_ordering = 'tf'`.
38 | 
39 |     Returns:
40 |         The scaled tensor. Same shape as the input tensor.
41 | 
42 |     References:
43 |         http://cs.unc.edu/~wliu/papers/parsenet.pdf
44 |     '''
45 | 
46 |     def __init__(self, gamma_init=20, **kwargs):
47 |         if K.image_dim_ordering() == 'tf':
48 |             self.axis = 3
49 |         else:
50 |             self.axis = 1
51 |         self.gamma_init = gamma_init
52 |         super(L2Normalization, self).__init__(**kwargs)
53 | 
54 |     def build(self, input_shape):
55 |         self.input_spec = [InputSpec(shape=input_shape)]
56 |         gamma = self.gamma_init * np.ones((input_shape[self.axis],))
57 |         self.gamma = K.variable(gamma, name='{}_gamma'.format(self.name))
58 |         self.trainable_weights = [self.gamma]
59 |         super(L2Normalization, self).build(input_shape)
60 | 
61 |     def call(self, x, mask=None):
62 |         output = K.l2_normalize(x, self.axis)
63 |         return output * self.gamma
64 | 
65 |     def get_config(self):
66 |         config = {
67 |             'gamma_init': self.gamma_init
68 |         }
69 |         base_config = super(L2Normalization, self).get_config()
70 |         return dict(list(base_config.items()) + list(config.items()))
71 | 


--------------------------------------------------------------------------------
/keras_loss_function/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/keras_loss_function/__init__.py


--------------------------------------------------------------------------------
/keras_loss_function/keras_ssd_loss.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | The Keras-compatible loss function for the SSD model. Currently supports TensorFlow only.
  3 | 
  4 | Copyright (C) 2018 Pierluigi Ferrari
  5 | 
  6 | Licensed under the Apache License, Version 2.0 (the "License");
  7 | you may not use this file except in compliance with the License.
  8 | You may obtain a copy of the License at
  9 | 
 10 |    http://www.apache.org/licenses/LICENSE-2.0
 11 | 
 12 | Unless required by applicable law or agreed to in writing, software
 13 | distributed under the License is distributed on an "AS IS" BASIS,
 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | See the License for the specific language governing permissions and
 16 | limitations under the License.
 17 | '''
 18 | 
 19 | from __future__ import division
 20 | import tensorflow as tf
 21 | 
 22 | class SSDLoss:
 23 |     '''
 24 |     The SSD loss, see https://arxiv.org/abs/1512.02325.
 25 |     '''
 26 | 
 27 |     def __init__(self,
 28 |                  neg_pos_ratio=3,
 29 |                  n_neg_min=0,
 30 |                  alpha=1.0):
 31 |         '''
 32 |         Arguments:
 33 |             neg_pos_ratio (int, optional): The maximum ratio of negative (i.e. background)
 34 |                 to positive ground truth boxes to include in the loss computation.
 35 |                 There are no actual background ground truth boxes of course, but `y_true`
 36 |                 contains anchor boxes labeled with the background class. Since
 37 |                 the number of background boxes in `y_true` will usually exceed
 38 |                 the number of positive boxes by far, it is necessary to balance
 39 |                 their influence on the loss. Defaults to 3 following the paper.
 40 |             n_neg_min (int, optional): The minimum number of negative ground truth boxes to
 41 |                 enter the loss computation *per batch*. This argument can be used to make
 42 |                 sure that the model learns from a minimum number of negatives in batches
 43 |                 in which there are very few, or even none at all, positive ground truth
 44 |                 boxes. It defaults to 0 and if used, it should be set to a value that
 45 |                 stands in reasonable proportion to the batch size used for training.
 46 |             alpha (float, optional): A factor to weight the localization loss in the
 47 |                 computation of the total loss. Defaults to 1.0 following the paper.
 48 |         '''
 49 |         self.neg_pos_ratio = neg_pos_ratio
 50 |         self.n_neg_min = n_neg_min
 51 |         self.alpha = alpha
 52 | 
 53 |     def smooth_L1_loss(self, y_true, y_pred):
 54 |         '''
 55 |         Compute smooth L1 loss, see references.
 56 | 
 57 |         Arguments:
 58 |             y_true (nD tensor): A TensorFlow tensor of any shape containing the ground truth data.
 59 |                 In this context, the expected tensor has shape `(batch_size, #boxes, 4)` and
 60 |                 contains the ground truth bounding box coordinates, where the last dimension
 61 |                 contains `(xmin, xmax, ymin, ymax)`.
 62 |             y_pred (nD tensor): A TensorFlow tensor of identical structure to `y_true` containing
 63 |                 the predicted data, in this context the predicted bounding box coordinates.
 64 | 
 65 |         Returns:
 66 |             The smooth L1 loss, a nD-1 Tensorflow tensor. In this context a 2D tensor
 67 |             of shape (batch, n_boxes_total).
 68 | 
 69 |         References:
 70 |             https://arxiv.org/abs/1504.08083
 71 |         '''
 72 |         absolute_loss = tf.abs(y_true - y_pred)
 73 |         square_loss = 0.5 * (y_true - y_pred)**2
 74 |         l1_loss = tf.where(tf.less(absolute_loss, 1.0), square_loss, absolute_loss - 0.5)
 75 |         return tf.reduce_sum(l1_loss, axis=-1)
 76 | 
 77 |     def log_loss(self, y_true, y_pred):
 78 |         '''
 79 |         Compute the softmax log loss.
 80 | 
 81 |         Arguments:
 82 |             y_true (nD tensor): A TensorFlow tensor of any shape containing the ground truth data.
 83 |                 In this context, the expected tensor has shape (batch_size, #boxes, #classes)
 84 |                 and contains the ground truth bounding box categories.
 85 |             y_pred (nD tensor): A TensorFlow tensor of identical structure to `y_true` containing
 86 |                 the predicted data, in this context the predicted bounding box categories.
 87 | 
 88 |         Returns:
 89 |             The softmax log loss, a nD-1 Tensorflow tensor. In this context a 2D tensor
 90 |             of shape (batch, n_boxes_total).
 91 |         '''
 92 |         # Make sure that `y_pred` doesn't contain any zeros (which would break the log function)
 93 |         y_pred = tf.maximum(y_pred, 1e-15)
 94 |         # Compute the log loss
 95 |         log_loss = -tf.reduce_sum(y_true * tf.log(y_pred), axis=-1)
 96 |         return log_loss
 97 | 
 98 |     def compute_loss(self, y_true, y_pred):
 99 |         '''
100 |         Compute the loss of the SSD model prediction against the ground truth.
101 | 
102 |         Arguments:
103 |             y_true (array): A Numpy array of shape `(batch_size, #boxes, #classes + 12)`,
104 |                 where `#boxes` is the total number of boxes that the model predicts
105 |                 per image. Be careful to make sure that the index of each given
106 |                 box in `y_true` is the same as the index for the corresponding
107 |                 box in `y_pred`. The last axis must have length `#classes + 12` and contain
108 |                 `[classes one-hot encoded, 4 ground truth box coordinate offsets, 8 arbitrary entries]`
109 |                 in this order, including the background class. The last eight entries of the
110 |                 last axis are not used by this function and therefore their contents are
111 |                 irrelevant, they only exist so that `y_true` has the same shape as `y_pred`,
112 |                 where the last four entries of the last axis contain the anchor box
113 |                 coordinates, which are needed during inference. Important: Boxes that
114 |                 you want the cost function to ignore need to have a one-hot
115 |                 class vector of all zeros.
116 |             y_pred (Keras tensor): The model prediction. The shape is identical
117 |                 to that of `y_true`, i.e. `(batch_size, #boxes, #classes + 12)`.
118 |                 The last axis must contain entries in the format
119 |                 `[classes one-hot encoded, 4 predicted box coordinate offsets, 8 arbitrary entries]`.
120 | 
121 |         Returns:
122 |             A scalar, the total multitask loss for classification and localization.
123 |         '''
124 |         self.neg_pos_ratio = tf.constant(self.neg_pos_ratio)
125 |         self.n_neg_min = tf.constant(self.n_neg_min)
126 |         self.alpha = tf.constant(self.alpha)
127 | 
128 |         batch_size = tf.shape(y_pred)[0] # Output dtype: tf.int32
129 |         n_boxes = tf.shape(y_pred)[1] # Output dtype: tf.int32, note that `n_boxes` in this context denotes the total number of boxes per image, not the number of boxes per cell.
130 | 
131 |         # 1: Compute the losses for class and box predictions for every box.
132 | 
133 |         classification_loss = tf.to_float(self.log_loss(y_true[:,:,:-12], y_pred[:,:,:-12])) # Output shape: (batch_size, n_boxes)
134 |         localization_loss = tf.to_float(self.smooth_L1_loss(y_true[:,:,-12:-8], y_pred[:,:,-12:-8])) # Output shape: (batch_size, n_boxes)
135 | 
136 |         # 2: Compute the classification losses for the positive and negative targets.
137 | 
138 |         # Create masks for the positive and negative ground truth classes.
139 |         negatives = y_true[:,:,0] # Tensor of shape (batch_size, n_boxes)
140 |         positives = tf.to_float(tf.reduce_max(y_true[:,:,1:-12], axis=-1)) # Tensor of shape (batch_size, n_boxes)
141 | 
142 |         # Count the number of positive boxes (classes 1 to n) in y_true across the whole batch.
143 |         n_positive = tf.reduce_sum(positives)
144 | 
145 |         # Now mask all negative boxes and sum up the losses for the positive boxes PER batch item
146 |         # (Keras loss functions must output one scalar loss value PER batch item, rather than just
147 |         # one scalar for the entire batch, that's why we're not summing across all axes).
148 |         pos_class_loss = tf.reduce_sum(classification_loss * positives, axis=-1) # Tensor of shape (batch_size,)
149 | 
150 |         # Compute the classification loss for the negative default boxes (if there are any).
151 | 
152 |         # First, compute the classification loss for all negative boxes.
153 |         neg_class_loss_all = classification_loss * negatives # Tensor of shape (batch_size, n_boxes)
154 |         n_neg_losses = tf.count_nonzero(neg_class_loss_all, dtype=tf.int32) # The number of non-zero loss entries in `neg_class_loss_all`
155 |         # What's the point of `n_neg_losses`? For the next step, which will be to compute which negative boxes enter the classification
156 |         # loss, we don't just want to know how many negative ground truth boxes there are, but for how many of those there actually is
157 |         # a positive (i.e. non-zero) loss. This is necessary because `tf.nn.top-k()` in the function below will pick the top k boxes with
158 |         # the highest losses no matter what, even if it receives a vector where all losses are zero. In the unlikely event that all negative
159 |         # classification losses ARE actually zero though, this behavior might lead to `tf.nn.top-k()` returning the indices of positive
160 |         # boxes, leading to an incorrect negative classification loss computation, and hence an incorrect overall loss computation.
161 |         # We therefore need to make sure that `n_negative_keep`, which assumes the role of the `k` argument in `tf.nn.top-k()`,
162 |         # is at most the number of negative boxes for which there is a positive classification loss.
163 | 
164 |         # Compute the number of negative examples we want to account for in the loss.
165 |         # We'll keep at most `self.neg_pos_ratio` times the number of positives in `y_true`, but at least `self.n_neg_min` (unless `n_neg_loses` is smaller).
166 |         n_negative_keep = tf.minimum(tf.maximum(self.neg_pos_ratio * tf.to_int32(n_positive), self.n_neg_min), n_neg_losses)
167 | 
168 |         # In the unlikely case when either (1) there are no negative ground truth boxes at all
169 |         # or (2) the classification loss for all negative boxes is zero, return zero as the `neg_class_loss`.
170 |         def f1():
171 |             return tf.zeros([batch_size])
172 |         # Otherwise compute the negative loss.
173 |         def f2():
174 |             # Now we'll identify the top-k (where k == `n_negative_keep`) boxes with the highest confidence loss that
175 |             # belong to the background class in the ground truth data. Note that this doesn't necessarily mean that the model
176 |             # predicted the wrong class for those boxes, it just means that the loss for those boxes is the highest.
177 | 
178 |             # To do this, we reshape `neg_class_loss_all` to 1D...
179 |             neg_class_loss_all_1D = tf.reshape(neg_class_loss_all, [-1]) # Tensor of shape (batch_size * n_boxes,)
180 |             # ...and then we get the indices for the `n_negative_keep` boxes with the highest loss out of those...
181 |             values, indices = tf.nn.top_k(neg_class_loss_all_1D,
182 |                                           k=n_negative_keep,
183 |                                           sorted=False) # We don't need them sorted.
184 |             # ...and with these indices we'll create a mask...
185 |             negatives_keep = tf.scatter_nd(indices=tf.expand_dims(indices, axis=1),
186 |                                            updates=tf.ones_like(indices, dtype=tf.int32),
187 |                                            shape=tf.shape(neg_class_loss_all_1D)) # Tensor of shape (batch_size * n_boxes,)
188 |             negatives_keep = tf.to_float(tf.reshape(negatives_keep, [batch_size, n_boxes])) # Tensor of shape (batch_size, n_boxes)
189 |             # ...and use it to keep only those boxes and mask all other classification losses
190 |             neg_class_loss = tf.reduce_sum(classification_loss * negatives_keep, axis=-1) # Tensor of shape (batch_size,)
191 |             return neg_class_loss
192 | 
193 |         neg_class_loss = tf.cond(tf.equal(n_neg_losses, tf.constant(0)), f1, f2)
194 | 
195 |         class_loss = pos_class_loss + neg_class_loss # Tensor of shape (batch_size,)
196 | 
197 |         # 3: Compute the localization loss for the positive targets.
198 |         #    We don't compute a localization loss for negative predicted boxes (obviously: there are no ground truth boxes they would correspond to).
199 | 
200 |         loc_loss = tf.reduce_sum(localization_loss * positives, axis=-1) # Tensor of shape (batch_size,)
201 | 
202 |         # 4: Compute the total loss.
203 | 
204 |         total_loss = (class_loss + self.alpha * loc_loss) / tf.maximum(1.0, n_positive) # In case `n_positive == 0`
205 |         # Keras has the annoying habit of dividing the loss by the batch size, which sucks in our case
206 |         # because the relevant criterion to average our loss over is the number of positive boxes in the batch
207 |         # (by which we're dividing in the line above), not the batch size. So in order to revert Keras' averaging
208 |         # over the batch size, we'll have to multiply by it.
209 |         total_loss = total_loss * tf.to_float(batch_size)
210 | 
211 |         return total_loss
212 | 


--------------------------------------------------------------------------------
/misc_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/misc_utils/__init__.py


--------------------------------------------------------------------------------
/misc_utils/tensor_sampling_utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Utilities that are useful to sub- or up-sample weights tensors.
  3 | 
  4 | Copyright (C) 2018 Pierluigi Ferrari
  5 | 
  6 | Licensed under the Apache License, Version 2.0 (the "License");
  7 | you may not use this file except in compliance with the License.
  8 | You may obtain a copy of the License at
  9 | 
 10 |    http://www.apache.org/licenses/LICENSE-2.0
 11 | 
 12 | Unless required by applicable law or agreed to in writing, software
 13 | distributed under the License is distributed on an "AS IS" BASIS,
 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | See the License for the specific language governing permissions and
 16 | limitations under the License.
 17 | '''
 18 | 
 19 | import numpy as np
 20 | 
 21 | def sample_tensors(weights_list, sampling_instructions, axes=None, init=None, mean=0.0, stddev=0.005):
 22 |     '''
 23 |     Can sub-sample and/or up-sample individual dimensions of the tensors in the given list
 24 |     of input tensors.
 25 | 
 26 |     It is possible to sub-sample some dimensions and up-sample other dimensions at the same time.
 27 | 
 28 |     The tensors in the list will be sampled consistently, i.e. for any given dimension that
 29 |     corresponds among all tensors in the list, the same elements will be picked for every tensor
 30 |     along that dimension.
 31 | 
 32 |     For dimensions that are being sub-sampled, you can either provide a list of the indices
 33 |     that should be picked, or you can provide the number of elements to be sub-sampled, in which
 34 |     case the elements will be chosen at random.
 35 | 
 36 |     For dimensions that are being up-sampled, "filler" elements will be insterted at random
 37 |     positions along the respective dimension. These filler elements will be initialized either
 38 |     with zero or from a normal distribution with selectable mean and standard deviation.
 39 | 
 40 |     Arguments:
 41 |         weights_list (list): A list of Numpy arrays. Each array represents one of the tensors
 42 |             to be sampled. The tensor with the greatest number of dimensions must be the first
 43 |             element in the list. For example, in the case of the weights of a 2D convolutional
 44 |             layer, the kernel must be the first element in the list and the bias the second,
 45 |             not the other way around. For all tensors in the list after the first tensor, the
 46 |             lengths of each of their axes must identical to the length of some axis of the
 47 |             first tensor.
 48 |         sampling_instructions (list): A list that contains the sampling instructions for each
 49 |             dimension of the first tensor. If the first tensor has `n` dimensions, then this
 50 |             must be a list of length `n`. That means, sampling instructions for every dimension
 51 |             of the first tensor must still be given even if not all dimensions should be changed.
 52 |             The elements of this list can be either lists of integers or integers. If the sampling
 53 |             instruction for a given dimension is a list of integers, then these integers represent
 54 |             the indices of the elements of that dimension that will be sub-sampled. If the sampling
 55 |             instruction for a given dimension is an integer, then that number of elements will be
 56 |             sampled along said dimension. If the integer is greater than the number of elements
 57 |             of the input tensors in that dimension, that dimension will be up-sampled. If the integer
 58 |             is smaller than the number of elements of the input tensors in that dimension, that
 59 |             dimension will be sub-sampled. If the integer is equal to the number of elements
 60 |             of the input tensors in that dimension, that dimension will remain the same.
 61 |         axes (list, optional): Only relevant if `weights_list` contains more than one tensor.
 62 |             This list contains a list for each additional tensor in `weights_list` beyond the first.
 63 |             Each of these lists contains integers that determine to which axes of the first tensor
 64 |             the axes of the respective tensor correspond. For example, let the first tensor be a
 65 |             4D tensor and the second tensor in the list be a 2D tensor. If the first element of
 66 |             `axis` is the list `[2,3]`, then that means that the two axes of the second tensor
 67 |             correspond to the last two axes of the first tensor, in the same order. The point of
 68 |             this list is for the program to know, if a given dimension of the first tensor is to
 69 |             be sub- or up-sampled, which dimensions of the other tensors in the list must be
 70 |             sub- or up-sampled accordingly.
 71 |         init (list, optional): Only relevant for up-sampling. Must be `None` or a list of strings
 72 |             that determines for each tensor in `weights_list` how the newly inserted values should
 73 |             be initialized. The possible values are 'gaussian' for initialization from a normal
 74 |             distribution with the selected mean and standard deviation (see the following two arguments),
 75 |             or 'zeros' for zero-initialization. If `None`, all initializations default to
 76 |             'gaussian'.
 77 |         mean (float, optional): Only relevant for up-sampling. The mean of the values that will
 78 |             be inserted into the tensors at random in the case of up-sampling.
 79 |         stddev (float, optional): Only relevant for up-sampling. The standard deviation of the
 80 |             values that will be inserted into the tensors at random in the case of up-sampling.
 81 | 
 82 |     Returns:
 83 |         A list containing the sampled tensors in the same order in which they were given.
 84 |     '''
 85 | 
 86 |     first_tensor = weights_list[0]
 87 | 
 88 |     if (not isinstance(sampling_instructions, (list, tuple))) or (len(sampling_instructions) != first_tensor.ndim):
 89 |         raise ValueError("The sampling instructions must be a list whose length is the number of dimensions of the first tensor in `weights_list`.")
 90 | 
 91 |     if (not init is None) and len(init) != len(weights_list):
 92 |         raise ValueError("`init` must either be `None` or a list of strings that has the same length as `weights_list`.")
 93 | 
 94 |     up_sample = [] # Store the dimensions along which we need to up-sample.
 95 |     out_shape = [] # Store the shape of the output tensor here.
 96 |     # Store two stages of the new (sub-sampled and/or up-sampled) weights tensors in the following two lists.
 97 |     subsampled_weights_list = [] # Tensors after sub-sampling, but before up-sampling (if any).
 98 |     upsampled_weights_list = [] # Sub-sampled tensors after up-sampling (if any), i.e. final output tensors.
 99 | 
100 |     # Create the slicing arrays from the sampling instructions.
101 |     sampling_slices = []
102 |     for i, sampling_inst in enumerate(sampling_instructions):
103 |         if isinstance(sampling_inst, (list, tuple)):
104 |             amax = np.amax(np.array(sampling_inst))
105 |             if amax >= first_tensor.shape[i]:
106 |                 raise ValueError("The sample instructions for dimension {} contain index {}, which is greater than the length of that dimension.".format(i, amax))
107 |             sampling_slices.append(np.array(sampling_inst))
108 |             out_shape.append(len(sampling_inst))
109 |         elif isinstance(sampling_inst, int):
110 |             out_shape.append(sampling_inst)
111 |             if sampling_inst == first_tensor.shape[i]:
112 |                 # Nothing to sample here, we're keeping the original number of elements along this axis.
113 |                 sampling_slice = np.arange(sampling_inst)
114 |                 sampling_slices.append(sampling_slice)
115 |             elif sampling_inst < first_tensor.shape[i]:
116 |                 # We want to SUB-sample this dimension. Randomly pick `sample_inst` many elements from it.
117 |                 sampling_slice1 = np.array([0]) # We will always sample class 0, the background class.
118 |                 # Sample the rest of the classes.
119 |                 sampling_slice2 = np.sort(np.random.choice(np.arange(1, first_tensor.shape[i]), sampling_inst - 1, replace=False))
120 |                 sampling_slice = np.concatenate([sampling_slice1, sampling_slice2])
121 |                 sampling_slices.append(sampling_slice)
122 |             else:
123 |                 # We want to UP-sample. Pick all elements from this dimension.
124 |                 sampling_slice = np.arange(first_tensor.shape[i])
125 |                 sampling_slices.append(sampling_slice)
126 |                 up_sample.append(i)
127 |         else:
128 |             raise ValueError("Each element of the sampling instructions must be either an integer or a list/tuple of integers, but received `{}`".format(type(sampling_inst)))
129 | 
130 |     # Process the first tensor.
131 |     subsampled_first_tensor = np.copy(first_tensor[np.ix_(*sampling_slices)])
132 |     subsampled_weights_list.append(subsampled_first_tensor)
133 | 
134 |     # Process the other tensors.
135 |     if len(weights_list) > 1:
136 |         for j in range(1, len(weights_list)):
137 |             this_sampling_slices = [sampling_slices[i] for i in axes[j-1]] # Get the sampling slices for this tensor.
138 |             subsampled_weights_list.append(np.copy(weights_list[j][np.ix_(*this_sampling_slices)]))
139 | 
140 |     if up_sample:
141 |         # Take care of the dimensions that are to be up-sampled.
142 | 
143 |         out_shape = np.array(out_shape)
144 | 
145 |         # Process the first tensor.
146 |         if init is None or init[0] == 'gaussian':
147 |             upsampled_first_tensor = np.random.normal(loc=mean, scale=stddev, size=out_shape)
148 |         elif init[0] == 'zeros':
149 |             upsampled_first_tensor = np.zeros(out_shape)
150 |         else:
151 |             raise ValueError("Valid initializations are 'gaussian' and 'zeros', but received '{}'.".format(init[0]))
152 |         # Pick the indices of the elements in `upsampled_first_tensor` that should be occupied by `subsampled_first_tensor`.
153 |         up_sample_slices = [np.arange(k) for k in subsampled_first_tensor.shape]
154 |         for i in up_sample:
155 |             # Randomly select across which indices of this dimension to scatter the elements of `new_weights_tensor` in this dimension.
156 |             up_sample_slice1 = np.array([0])
157 |             up_sample_slice2 = np.sort(np.random.choice(np.arange(1, upsampled_first_tensor.shape[i]), subsampled_first_tensor.shape[i] - 1, replace=False))
158 |             up_sample_slices[i] = np.concatenate([up_sample_slice1, up_sample_slice2])
159 |         upsampled_first_tensor[np.ix_(*up_sample_slices)] = subsampled_first_tensor
160 |         upsampled_weights_list.append(upsampled_first_tensor)
161 | 
162 |         # Process the other tensors
163 |         if len(weights_list) > 1:
164 |             for j in range(1, len(weights_list)):
165 |                 if init is None or init[j] == 'gaussian':
166 |                     upsampled_tensor = np.random.normal(loc=mean, scale=stddev, size=out_shape[axes[j-1]])
167 |                 elif init[j] == 'zeros':
168 |                     upsampled_tensor = np.zeros(out_shape[axes[j-1]])
169 |                 else:
170 |                     raise ValueError("Valid initializations are 'gaussian' and 'zeros', but received '{}'.".format(init[j]))
171 |                 this_up_sample_slices = [up_sample_slices[i] for i in axes[j-1]] # Get the up-sampling slices for this tensor.
172 |                 upsampled_tensor[np.ix_(*this_up_sample_slices)] = subsampled_weights_list[j]
173 |                 upsampled_weights_list.append(upsampled_tensor)
174 | 
175 |         return upsampled_weights_list
176 |     else:
177 |         return subsampled_weights_list
178 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/models/__init__.py


--------------------------------------------------------------------------------
/ssd300_evaluation_COCO.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# SSD300 MS COCO Evaluation Tutorial\n",
  8 |     "\n",
  9 |     "This is a brief tutorial that goes over how to evaluate a trained SSD300 on one of the MS COCO datasets using the official MS COCO Python tools available here:\n",
 10 |     "\n",
 11 |     "https://github.com/cocodataset/cocoapi\n",
 12 |     "\n",
 13 |     "Follow the instructions in the GitHub repository above to install the `pycocotools`. Note that you will need to set the path to your local copy of the PythonAPI directory in the subsequent code cell.\n",
 14 |     "\n",
 15 |     "Of course the evaulation procedure described here is identical for SSD512, you just need to build a different model."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {
 22 |     "collapsed": true
 23 |    },
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "from keras import backend as K\n",
 27 |     "from keras.models import load_model\n",
 28 |     "from keras.optimizers import Adam\n",
 29 |     "from scipy.misc import imread\n",
 30 |     "import numpy as np\n",
 31 |     "from matplotlib import pyplot as plt\n",
 32 |     "import sys\n",
 33 |     "\n",
 34 |     "# TODO: Specify the directory that contains the `pycocotools` here.\n",
 35 |     "pycocotools_dir = '../cocoapi/PythonAPI/'\n",
 36 |     "if pycocotools_dir not in sys.path:\n",
 37 |     "    sys.path.insert(0, pycocotools_dir)\n",
 38 |     "\n",
 39 |     "from pycocotools.coco import COCO\n",
 40 |     "from pycocotools.cocoeval import COCOeval\n",
 41 |     "\n",
 42 |     "from models.keras_ssd300 import ssd_300\n",
 43 |     "from keras_loss_function.keras_ssd_loss import SSDLoss\n",
 44 |     "from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes\n",
 45 |     "from keras_layers.keras_layer_DecodeDetections import DecodeDetections\n",
 46 |     "from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast\n",
 47 |     "from keras_layers.keras_layer_L2Normalization import L2Normalization\n",
 48 |     "from data_generator.object_detection_2d_data_generator import DataGenerator\n",
 49 |     "from eval_utils.coco_utils import get_coco_category_maps, predict_all_to_json\n",
 50 |     "\n",
 51 |     "%matplotlib inline"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 2,
 57 |    "metadata": {
 58 |     "collapsed": true
 59 |    },
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "# Set the input image size for the model.\n",
 63 |     "img_height = 300\n",
 64 |     "img_width = 300"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "## 1. Load a trained SSD\n",
 72 |     "\n",
 73 |     "Either load a trained model or build a model and load trained weights into it. Since the HDF5 files I'm providing contain only the weights for the various SSD versions, not the complete models, you'll have to go with the latter option when using this implementation for the first time. You can then of course save the model and next time load the full model directly, without having to build it.\n",
 74 |     "\n",
 75 |     "You can find the download links to all the trained model weights in the README."
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "metadata": {},
 81 |    "source": [
 82 |     "### 1.1. Build the model and load trained weights into it"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 4,
 88 |    "metadata": {
 89 |     "collapsed": true
 90 |    },
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "# 1: Build the Keras model\n",
 94 |     "\n",
 95 |     "K.clear_session() # Clear previous models from memory.\n",
 96 |     "\n",
 97 |     "model = ssd_300(image_size=(img_height, img_width, 3),\n",
 98 |     "                n_classes=80,\n",
 99 |     "                mode='inference',\n",
100 |     "                l2_regularization=0.0005,\n",
101 |     "                scales=[0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05], # The scales for Pascal VOC are [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05]\n",
102 |     "                aspect_ratios_per_layer=[[1.0, 2.0, 0.5],\n",
103 |     "                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],\n",
104 |     "                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],\n",
105 |     "                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],\n",
106 |     "                                         [1.0, 2.0, 0.5],\n",
107 |     "                                         [1.0, 2.0, 0.5]],\n",
108 |     "                two_boxes_for_ar1=True,\n",
109 |     "                steps=[8, 16, 32, 64, 100, 300],\n",
110 |     "                offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n",
111 |     "                clip_boxes=False,\n",
112 |     "                variances=[0.1, 0.1, 0.2, 0.2],\n",
113 |     "                normalize_coords=True,\n",
114 |     "                subtract_mean=[123, 117, 104],\n",
115 |     "                swap_channels=[2, 1, 0],\n",
116 |     "                confidence_thresh=0.01,\n",
117 |     "                iou_threshold=0.45,\n",
118 |     "                top_k=200,\n",
119 |     "                nms_max_output_size=400)\n",
120 |     "\n",
121 |     "# 2: Load the trained weights into the model.\n",
122 |     "\n",
123 |     "# TODO: Set the path of the trained weights.\n",
124 |     "weights_path = 'path/to/trained/weights/VGG_coco_SSD_300x300_iter_400000.h5'\n",
125 |     "\n",
126 |     "model.load_weights(weights_path, by_name=True)\n",
127 |     "\n",
128 |     "# 3: Compile the model so that Keras won't complain the next time you load it.\n",
129 |     "\n",
130 |     "adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)\n",
131 |     "\n",
132 |     "ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)\n",
133 |     "\n",
134 |     "model.compile(optimizer=adam, loss=ssd_loss.compute_loss)"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {},
140 |    "source": [
141 |     "Or"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "markdown",
146 |    "metadata": {},
147 |    "source": [
148 |     "### 1.2. Load a trained model"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "metadata": {
155 |     "collapsed": true
156 |    },
157 |    "outputs": [],
158 |    "source": [
159 |     "# TODO: Set the path to the `.h5` file of the model to be loaded.\n",
160 |     "model_path = 'path/to/trained/model.h5'\n",
161 |     "\n",
162 |     "# We need to create an SSDLoss object in order to pass that to the model loader.\n",
163 |     "ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)\n",
164 |     "\n",
165 |     "K.clear_session() # Clear previous models from memory.\n",
166 |     "\n",
167 |     "model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes,\n",
168 |     "                                               'L2Normalization': L2Normalization,\n",
169 |     "                                               'DecodeDetections': DecodeDetections,\n",
170 |     "                                               'compute_loss': ssd_loss.compute_loss})"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "## 2. Create a data generator for the evaluation dataset\n",
178 |     "\n",
179 |     "Instantiate a `DataGenerator` that will serve the evaluation dataset during the prediction phase."
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 5,
185 |    "metadata": {
186 |     "collapsed": true
187 |    },
188 |    "outputs": [],
189 |    "source": [
190 |     "dataset = DataGenerator()\n",
191 |     "\n",
192 |     "# TODO: Set the paths to the dataset here.\n",
193 |     "MS_COCO_dataset_images_dir = '../../datasets/MicrosoftCOCO/val2017/'\n",
194 |     "MS_COCO_dataset_annotations_filename = '../../datasets/MicrosoftCOCO/annotations/instances_val2017.json'\n",
195 |     "\n",
196 |     "dataset.parse_json(images_dirs=[MS_COCO_dataset_images_dir],\n",
197 |     "                   annotations_filenames=[MS_COCO_dataset_annotations_filename],\n",
198 |     "                   ground_truth_available=False, # It doesn't matter whether you set this `True` or `False` because the ground truth won't be used anyway, but the parsing goes faster if you don't load the ground truth.\n",
199 |     "                   include_classes='all',\n",
200 |     "                   ret=False)\n",
201 |     "\n",
202 |     "# We need the `classes_to_cats` dictionary. Read the documentation of this function to understand why.\n",
203 |     "cats_to_classes, classes_to_cats, cats_to_names, classes_to_names = get_coco_category_maps(MS_COCO_dataset_annotations_filename)"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "markdown",
208 |    "metadata": {},
209 |    "source": [
210 |     "## 3. Run the predictions over the evaluation dataset\n",
211 |     "\n",
212 |     "Now that we have instantiated a model and a data generator to serve the dataset, we can make predictions on the entire dataset and save those predictions in a JSON file in the format in which COCOeval needs them for the evaluation.\n",
213 |     "\n",
214 |     "Read the documenation to learn what the arguments mean, but the arguments as preset below are the parameters used in the evaluation of the original Caffe models."
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 6,
220 |    "metadata": {
221 |     "collapsed": true
222 |    },
223 |    "outputs": [],
224 |    "source": [
225 |     "# TODO: Set the desired output file name and the batch size.\n",
226 |     "results_file = 'detections_val2017_ssd300_results.json'\n",
227 |     "batch_size = 20 # Ideally, choose a batch size that divides the number of images in the dataset."
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": 7,
233 |    "metadata": {},
234 |    "outputs": [
235 |     {
236 |      "name": "stdout",
237 |      "output_type": "stream",
238 |      "text": [
239 |       "Number of images in the evaluation dataset: 5000\n",
240 |       "Producing results file: 100%|██████████| 250/250 [04:11<00:00,  1.05s/it]\n",
241 |       "Prediction results saved in 'detections_val2017_ssd300_results.json'\n"
242 |      ]
243 |     }
244 |    ],
245 |    "source": [
246 |     "predict_all_to_json(out_file=results_file,\n",
247 |     "                    model=model,\n",
248 |     "                    img_height=img_height,\n",
249 |     "                    img_width=img_width,\n",
250 |     "                    classes_to_cats=classes_to_cats,\n",
251 |     "                    data_generator=dataset,\n",
252 |     "                    batch_size=batch_size,\n",
253 |     "                    data_generator_mode='resize',\n",
254 |     "                    model_mode='inference',\n",
255 |     "                    confidence_thresh=0.01,\n",
256 |     "                    iou_threshold=0.45,\n",
257 |     "                    top_k=200,\n",
258 |     "                    normalize_coords=True)"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {},
264 |    "source": [
265 |     "## 4. Run the evaluation\n",
266 |     "\n",
267 |     "Now we'll load the JSON file containing all the predictions that we produced in the last step and feed it to `COCOeval`. Note that the evaluation may take a while."
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": 8,
273 |    "metadata": {},
274 |    "outputs": [
275 |     {
276 |      "name": "stdout",
277 |      "output_type": "stream",
278 |      "text": [
279 |       "loading annotations into memory...\n",
280 |       "Done (t=0.46s)\n",
281 |       "creating index...\n",
282 |       "index created!\n",
283 |       "Loading and preparing results...\n",
284 |       "DONE (t=5.87s)\n",
285 |       "creating index...\n",
286 |       "index created!\n"
287 |      ]
288 |     }
289 |    ],
290 |    "source": [
291 |     "coco_gt   = COCO(MS_COCO_dataset_annotations_filename)\n",
292 |     "coco_dt   = coco_gt.loadRes(results_file)\n",
293 |     "image_ids = sorted(coco_gt.getImgIds())"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 9,
299 |    "metadata": {},
300 |    "outputs": [
301 |     {
302 |      "name": "stdout",
303 |      "output_type": "stream",
304 |      "text": [
305 |       "Running per image evaluation...\n",
306 |       "Evaluate annotation type *bbox*\n",
307 |       "DONE (t=64.15s).\n",
308 |       "Accumulating evaluation results...\n",
309 |       "DONE (t=10.58s).\n",
310 |       " Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.247\n",
311 |       " Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.424\n",
312 |       " Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.253\n",
313 |       " Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.059\n",
314 |       " Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.264\n",
315 |       " Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.414\n",
316 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.232\n",
317 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.341\n",
318 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.362\n",
319 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.102\n",
320 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.401\n",
321 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.577\n"
322 |      ]
323 |     }
324 |    ],
325 |    "source": [
326 |     "cocoEval = COCOeval(cocoGt=coco_gt,\n",
327 |     "                    cocoDt=coco_dt,\n",
328 |     "                    iouType='bbox')\n",
329 |     "cocoEval.params.imgIds  = image_ids\n",
330 |     "cocoEval.evaluate()\n",
331 |     "cocoEval.accumulate()\n",
332 |     "cocoEval.summarize()"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "code",
337 |    "execution_count": null,
338 |    "metadata": {
339 |     "collapsed": true
340 |    },
341 |    "outputs": [],
342 |    "source": []
343 |   }
344 |  ],
345 |  "metadata": {
346 |   "kernelspec": {
347 |    "display_name": "Python 3",
348 |    "language": "python",
349 |    "name": "python3"
350 |   },
351 |   "language_info": {
352 |    "codemirror_mode": {
353 |     "name": "ipython",
354 |     "version": 3
355 |    },
356 |    "file_extension": ".py",
357 |    "mimetype": "text/x-python",
358 |    "name": "python",
359 |    "nbconvert_exporter": "python",
360 |    "pygments_lexer": "ipython3",
361 |    "version": "3.5.3"
362 |   }
363 |  },
364 |  "nbformat": 4,
365 |  "nbformat_minor": 2
366 | }
367 | 


--------------------------------------------------------------------------------
/ssd_encoder_decoder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/ssd_encoder_decoder/__init__.py


--------------------------------------------------------------------------------
/ssd_encoder_decoder/matching_utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Utilities to match ground truth boxes to anchor boxes.
  3 | 
  4 | Copyright (C) 2018 Pierluigi Ferrari
  5 | 
  6 | Licensed under the Apache License, Version 2.0 (the "License");
  7 | you may not use this file except in compliance with the License.
  8 | You may obtain a copy of the License at
  9 | 
 10 |    http://www.apache.org/licenses/LICENSE-2.0
 11 | 
 12 | Unless required by applicable law or agreed to in writing, software
 13 | distributed under the License is distributed on an "AS IS" BASIS,
 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | See the License for the specific language governing permissions and
 16 | limitations under the License.
 17 | '''
 18 | 
 19 | from __future__ import division
 20 | import numpy as np
 21 | 
 22 | def match_bipartite_greedy(weight_matrix):
 23 |     '''
 24 |     Returns a bipartite matching according to the given weight matrix.
 25 | 
 26 |     The algorithm works as follows:
 27 | 
 28 |     Let the first axis of `weight_matrix` represent ground truth boxes
 29 |     and the second axis anchor boxes.
 30 |     The ground truth box that has the greatest similarity with any
 31 |     anchor box will be matched first, then out of the remaining ground
 32 |     truth boxes, the ground truth box that has the greatest similarity
 33 |     with any of the remaining anchor boxes will be matched second, and
 34 |     so on. That is, the ground truth boxes will be matched in descending
 35 |     order by maximum similarity with any of the respectively remaining
 36 |     anchor boxes.
 37 |     The runtime complexity is O(m^2 * n), where `m` is the number of
 38 |     ground truth boxes and `n` is the number of anchor boxes.
 39 | 
 40 |     Arguments:
 41 |         weight_matrix (array): A 2D Numpy array that represents the weight matrix
 42 |             for the matching process. If `(m,n)` is the shape of the weight matrix,
 43 |             it must be `m <= n`. The weights can be integers or floating point
 44 |             numbers. The matching process will maximize, i.e. larger weights are
 45 |             preferred over smaller weights.
 46 | 
 47 |     Returns:
 48 |         A 1D Numpy array of length `weight_matrix.shape[0]` that represents
 49 |         the matched index along the second axis of `weight_matrix` for each index
 50 |         along the first axis.
 51 |     '''
 52 | 
 53 |     weight_matrix = np.copy(weight_matrix) # We'll modify this array.
 54 |     num_ground_truth_boxes = weight_matrix.shape[0]
 55 |     all_gt_indices = list(range(num_ground_truth_boxes)) # Only relevant for fancy-indexing below.
 56 | 
 57 |     # This 1D array will contain for each ground truth box the index of
 58 |     # the matched anchor box.
 59 |     matches = np.zeros(num_ground_truth_boxes, dtype=np.int)
 60 | 
 61 |     # In each iteration of the loop below, exactly one ground truth box
 62 |     # will be matched to one anchor box.
 63 |     for _ in range(num_ground_truth_boxes):
 64 | 
 65 |         # Find the maximal anchor-ground truth pair in two steps: First, reduce
 66 |         # over the anchor boxes and then reduce over the ground truth boxes.
 67 |         anchor_indices = np.argmax(weight_matrix, axis=1) # Reduce along the anchor box axis.
 68 |         overlaps = weight_matrix[all_gt_indices, anchor_indices]
 69 |         ground_truth_index = np.argmax(overlaps) # Reduce along the ground truth box axis.
 70 |         anchor_index = anchor_indices[ground_truth_index]
 71 |         matches[ground_truth_index] = anchor_index # Set the match.
 72 | 
 73 |         # Set the row of the matched ground truth box and the column of the matched
 74 |         # anchor box to all zeros. This ensures that those boxes will not be matched again,
 75 |         # because they will never be the best matches for any other boxes.
 76 |         weight_matrix[ground_truth_index] = 0
 77 |         weight_matrix[:,anchor_index] = 0
 78 | 
 79 |     return matches
 80 | 
 81 | def match_multi(weight_matrix, threshold):
 82 |     '''
 83 |     Matches all elements along the second axis of `weight_matrix` to their best
 84 |     matches along the first axis subject to the constraint that the weight of a match
 85 |     must be greater than or equal to `threshold` in order to produce a match.
 86 | 
 87 |     If the weight matrix contains elements that should be ignored, the row or column
 88 |     representing the respective elemet should be set to a value below `threshold`.
 89 | 
 90 |     Arguments:
 91 |         weight_matrix (array): A 2D Numpy array that represents the weight matrix
 92 |             for the matching process. If `(m,n)` is the shape of the weight matrix,
 93 |             it must be `m <= n`. The weights can be integers or floating point
 94 |             numbers. The matching process will maximize, i.e. larger weights are
 95 |             preferred over smaller weights.
 96 |         threshold (float): A float that represents the threshold (i.e. lower bound)
 97 |             that must be met by a pair of elements to produce a match.
 98 | 
 99 |     Returns:
100 |         Two 1D Numpy arrays of equal length that represent the matched indices. The first
101 |         array contains the indices along the first axis of `weight_matrix`, the second array
102 |         contains the indices along the second axis.
103 |     '''
104 | 
105 |     num_anchor_boxes = weight_matrix.shape[1]
106 |     all_anchor_indices = list(range(num_anchor_boxes)) # Only relevant for fancy-indexing below.
107 | 
108 |     # Find the best ground truth match for every anchor box.
109 |     ground_truth_indices = np.argmax(weight_matrix, axis=0) # Array of shape (weight_matrix.shape[1],)
110 |     overlaps = weight_matrix[ground_truth_indices, all_anchor_indices] # Array of shape (weight_matrix.shape[1],)
111 | 
112 |     # Filter out the matches with a weight below the threshold.
113 |     anchor_indices_thresh_met = np.nonzero(overlaps >= threshold)[0]
114 |     gt_indices_thresh_met = ground_truth_indices[anchor_indices_thresh_met]
115 | 
116 |     return gt_indices_thresh_met, anchor_indices_thresh_met
117 | 


--------------------------------------------------------------------------------
/training_summaries/ssd300_pascal_07+12_loss_history.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierluigiferrari/ssd_keras/3ac9adaf3889f1020d74b0eeefea281d5e82f353/training_summaries/ssd300_pascal_07+12_loss_history.png


--------------------------------------------------------------------------------
/training_summaries/ssd300_pascal_07+12_training_summary.md:
--------------------------------------------------------------------------------
 1 | ## SSD300 Pascal VOC 07+12 Training Summary
 2 | ---
 3 | 
 4 | This is a summary of the training of an SSD300 on the Pascal VOC 2007 `trainval` and 2012 `trainval` image sets using the same configuration as in the original Caffe implementation for that same model.
 5 | 
 6 | Since neither the SSD paper nor the GitHub repository of the original Caffe SSD implementation state details on the training progress, but only the final evaluation results, maybe some will find the loss curves and intermediate mAP evaluation results provided here helpful for comparison with their own training.
 7 | 
 8 | What you see below are the training results of running the [`ssd300_training.ipynb`](../ssd300_training.ipynb) notebook as is, in which all parameters are already preset to replicate the training configuration of the original SSD300 "07+12" model. I just made one small change: I occasionally ran into `OOM` errors at batch size 32, so I trained with batch size 31.
 9 | 
10 | Important note about the data shown below:
11 | 
12 | SGD is inherently unstable at the beginning of the training. Remember that the optimization is stochastic, i.e. if you start a fresh training ten times, the loss pattern over the first training steps can look different each time, and in the case of SGD, very different. One time the loss might decrease smoothly right from the start, which is what happened in my case below. Another time the loss might get temporarily stuck on a plateau very early on such that nothing seems to be happening for a couple of hundred training steps. Yet another time the loss might blow up right at the start and become `NaN`. As long as the loss doesn't become `NaN`, the final convergence loss does, in my experience, not strongly depend on the loss progression in the very early phase of the training. In other words, even if the loss doesn't decrease as fast in the beginning, you will likely still end up with the same convergence loss, it will just take longer to get there. Just as a benchmark, after the first 1,000 training steps I've seen anything between around 10 and 15 as values for the training loss. The Adam optimizer doesn't suffer from this variability to the same extent and is evidently the superior optimizer, but since the original Caffe models were trained with SGD, I used that to reproduce the original results.
13 | 
14 | ### Training and Validation Loss
15 | 
16 | What you see below are the training and validation loss every 1,000 training steps. The validation loss is computed on the Pascal VOC 2007 `test` image set. In my case it took only around 105,000 instead of the expected 120,000 iterations for the validation loss to converge, but as explained above, it may well take longer. The drop you're seeing at 56,000 training steps was when I reduced the learning rate from 0.001 to 0.0001. The original learning rate schedule schedules this reduction only after 80,000 training steps, but since the loss decreased so quickly in the beginning in my case, I had to decrease the learning rate earlier. I reduced the learning rate to 0.00001 after 76,000 training steps and kept it constant from there.
17 | 
18 | ![loss_history](ssd300_pascal_07+12_loss_history.png)
19 | 
20 | ### Mean Average Precision
21 | 
22 | Here are the intermediate and final mAP values on Pascal VOC 2007 `test`, evaluated using the official Pascal VOCdevkit 2007 Matlab evaluation code. The table shows the best values after every 20,000 training steps. Once again, the progress may be slower depending on how the early phase of the training is going. In another training I started with the same configuration, I got an mAP of only 0.665 after the first 20,000 training steps. The full model after 102,000 training steps can be downloaded [here](https://drive.google.com/open?id=1-MYYaZbIHNPtI2zzklgVBAjssbP06BeA).
23 | 
24 | |             | Steps |  20k     |  40k     |  60k     |  80k     |  100k    |  102k    |
25 | |-------------|-------|----------|----------|----------|----------|----------|----------|
26 | |aeroplane    |  AP   |  0.6874  |  0.7401  |  0.7679  |  0.7827  |  0.7912  |  0.7904  |
27 | |bicycle      |  AP   |  0.7786  |  0.8203  |  0.795   |  0.8436  |  0.8453  |  0.8466  |
28 | |bird         |  AP   |  0.6855  |  0.6939  |  0.7191  |  0.7564  |  0.7655  |  0.7672  |
29 | |boat         |  AP   |  0.5804  |  0.6173  |  0.6258  |  0.6866  |  0.6896  |  0.6952  |
30 | |bottle       |  AP   |  0.3449  |  0.4288  |  0.453   |  0.4681  |  0.4896  |  0.4844  |
31 | |bus          |  AP   |  0.7771  |  0.8332  |  0.8343  |  0.8525  |  0.8537  |  0.8554  |
32 | |car          |  AP   |  0.8048  |  0.8435  |  0.8345  |  0.848   |  0.8546  |  0.8543  |
33 | |cat          |  AP   |  0.852   |  0.7989  |  0.8551  |  0.8759  |  0.8727  |  0.8746  |
34 | |chair        |  AP   |  0.5085  |  0.5548  |  0.5287  |  0.5873  |  0.5895  |  0.5911  |
35 | |cow          |  AP   |  0.7359  |  0.7821  |  0.791   |  0.8278  |  0.8271  |  0.8243  |
36 | |diningtable  |  AP   |  0.6805  |  0.7181  |  0.7502  |  0.7543  |  0.7733  |  0.7614  |
37 | |dog          |  AP   |  0.8118  |  0.7898  |  0.8222  |  0.8546  |  0.8544  |  0.8552  |
38 | |horse        |  AP   |  0.823   |  0.8501  |  0.8532  |  0.8586  |  0.8688  |  0.867   |
39 | |motorbike    |  AP   |  0.7725  |  0.7935  |  0.8081  |  0.845   |  0.8471  |  0.8509  |
40 | |person       |  AP   |  0.73    |  0.7514  |  0.7634  |  0.7851  |  0.7869  |  0.7862  |
41 | |pottedplant  |  AP   |  0.4112  |  0.4335  |  0.4982  |  0.5051  |  0.5131  |  0.5182  |
42 | |sheep        |  AP   |  0.6821  |  0.7324  |  0.7283  |  0.7717  |  0.7783  |  0.7799  |
43 | |sofa         |  AP   |  0.7417  |  0.7824  |  0.7663  |  0.7928  |  0.7911  |  0.794   |
44 | |train        |  AP   |  0.7942  |  0.8169  |  0.8326  |  0.867   |  0.862   |  0.8596  |
45 | |tvmonitor    |  AP   |  0.725   |  0.7301  |  0.7259  |  0.7589  |  0.7649  |  0.7651  |
46 | |             |**mAP**|**0.696** |**0.726** |**0.738** |**0.766** |**0.7709**|**0.7711**|
47 | 


--------------------------------------------------------------------------------