├── .gitignore
├── LICENSE
├── README.md
├── augmentation.py
├── blazeface.py
├── environment.yml
├── predictor.py
├── ssd_loss.py
├── trainer.py
└── utils
    ├── __init__.py
    ├── bbox_utils.py
    ├── data_utils.py
    ├── drawing_utils.py
    ├── io_utils.py
    ├── landmark_utils.py
    └── train_utils.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | trained/*
3 | data/*
4 | logs/*
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # BlazeFace
 2 | 
 3 | This is **unofficial** tensorflow blazeface implementation from scratch.
 4 | This repo includes the entire training pipeline of blazeface.
 5 | However, since the dataset used in the training process is a modified version of some datasets, it is not shared at this stage.
 6 | Anchor / prior box hyperparameters were taken from the [MediaPipe](https://github.com/google/mediapipe) implementation.
 7 | Loss calculation and augmentation methods were implemented as in [SSD](https://github.com/FurkanOM/tf-ssd).
 8 | 
 9 | It's implemented and tested with **tensorflow 2.0, 2.1, and 2.2**
10 | 
11 | ## Usage
12 | 
13 | Project models created in virtual environment using [miniconda](https://docs.conda.io/en/latest/miniconda.html).
14 | You can also create required virtual environment with [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file).
15 | 
16 | To create virtual environment (tensorflow-2 gpu environment):
17 | 
18 | ```sh
19 | conda env create -f environment.yml
20 | ```
21 | 
22 | To train and test BlazeFace model:
23 | 
24 | ```sh
25 | python trainer.py
26 | python predictor.py
27 | ```
28 | 
29 | If you have GPU issues you can use **-handle-gpu** flag with these commands:
30 | 
31 | ```sh
32 | python trainer.py -handle-gpu
33 | ```
34 | 
35 | ### References
36 | 
37 | * BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs [[paper]](https://arxiv.org/abs/1907.05047)
38 | * SSD: Single Shot MultiBox Detector [[paper]](https://arxiv.org/abs/1512.02325)
39 | * MediaPipe [[code]](https://github.com/google/mediapipe)
40 | * BlazeFace-PyTorch [[code]](https://github.com/hollance/BlazeFace-PyTorch)
41 | 


--------------------------------------------------------------------------------
/augmentation.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from utils import bbox_utils, landmark_utils
  3 | 
  4 | def apply(img, gt_boxes, gt_landmarks):
  5 |     """Randomly applying data augmentation methods to image and ground truth boxes.
  6 |     inputs:
  7 |         img = (height, width, depth)
  8 |         gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
  9 |             in normalized form [0, 1]
 10 |         gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y])
 11 |             in normalized form [0, 1]
 12 |     outputs:
 13 |         modified_img = (final_height, final_width, depth)
 14 |         modified_gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
 15 |             in normalized form [0, 1]
 16 |         modified_gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y])
 17 |             in normalized form [0, 1]
 18 |     """
 19 |     # Color operations
 20 |     # Randomly change hue, saturation, brightness and contrast of image
 21 |     color_methods = [random_brightness, random_contrast, random_hue, random_saturation]
 22 |     # Geometric operations
 23 |     # Randomly sample a patch image and ground truth boxes
 24 |     geometric_methods = [patch]
 25 |     #
 26 |     for augmentation_method in geometric_methods + color_methods:
 27 |         img, gt_boxes, gt_landmarks = randomly_apply_operation(augmentation_method, img, gt_boxes, gt_landmarks)
 28 |     #
 29 |     img = tf.clip_by_value(img, 0., 1.)
 30 |     return img, gt_boxes, gt_landmarks
 31 | 
 32 | def get_random_bool():
 33 |     """Generating random boolean.
 34 |     outputs:
 35 |         random boolean 0d tensor
 36 |     """
 37 |     return tf.greater(tf.random.uniform((), dtype=tf.float32), 0.5)
 38 | 
 39 | def randomly_apply_operation(operation, img, gt_boxes, gt_landmarks, *args):
 40 |     """Randomly applying given method to image and ground truth boxes.
 41 |     inputs:
 42 |         operation = callable method
 43 |         img = (height, width, depth)
 44 |         gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
 45 |         gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y])
 46 |     outputs:
 47 |         modified_or_not_img = (final_height, final_width, depth)
 48 |         modified_or_not_gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
 49 |         modified_or_not_gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y])
 50 |     """
 51 |     return tf.cond(
 52 |         get_random_bool(),
 53 |         lambda: operation(img, gt_boxes, gt_landmarks, *args),
 54 |         lambda: (img, gt_boxes, gt_landmarks)
 55 |     )
 56 | 
 57 | def random_brightness(img, gt_boxes, gt_landmarks, max_delta=0.12):
 58 |     """Randomly change brightness of the image.
 59 |     inputs:
 60 |         img = (height, width, depth)
 61 |         gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
 62 |         gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y])
 63 |     outputs:
 64 |         modified_img = (height, width, depth)
 65 |         gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
 66 |         gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y])
 67 |     """
 68 |     return tf.image.random_brightness(img, max_delta), gt_boxes, gt_landmarks
 69 | 
 70 | def random_contrast(img, gt_boxes, gt_landmarks, lower=0.5, upper=1.5):
 71 |     """Randomly change contrast of the image.
 72 |     inputs:
 73 |         img = (height, width, depth)
 74 |         gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
 75 |         gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y])
 76 |     outputs:
 77 |         modified_img = (height, width, depth)
 78 |         gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
 79 |         gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y])
 80 |     """
 81 |     return tf.image.random_contrast(img, lower, upper), gt_boxes, gt_landmarks
 82 | 
 83 | def random_hue(img, gt_boxes, gt_landmarks, max_delta=0.08):
 84 |     """Randomly change hue of the image.
 85 |     inputs:
 86 |         img = (height, width, depth)
 87 |         gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
 88 |         gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y])
 89 |     outputs:
 90 |         modified_img = (height, width, depth)
 91 |         gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
 92 |         gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y])
 93 |     """
 94 |     return tf.image.random_hue(img, max_delta), gt_boxes, gt_landmarks
 95 | 
 96 | def random_saturation(img, gt_boxes, gt_landmarks, lower=0.5, upper=1.5):
 97 |     """Randomly change saturation of the image.
 98 |     inputs:
 99 |         img = (height, width, depth)
100 |         gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
101 |         gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y])
102 |     outputs:
103 |         modified_img = (height, width, depth)
104 |         gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
105 |         gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y])
106 |     """
107 |     return tf.image.random_saturation(img, lower, upper), gt_boxes, gt_landmarks
108 | 
109 | ##############################################################################
110 | ## Sample patch start
111 | ##############################################################################
112 | 
113 | def get_random_min_overlap():
114 |     """Generating random minimum overlap value.
115 |     outputs:
116 |         min_overlap = random minimum overlap value 0d tensor
117 |     """
118 |     overlaps = tf.constant([0.1, 0.3, 0.5, 0.7, 0.9], dtype=tf.float32)
119 |     i = tf.random.uniform((), minval=0, maxval=tf.shape(overlaps)[0], dtype=tf.int32)
120 |     return overlaps[i]
121 | 
122 | def expand_image(img, gt_boxes, gt_landmarks, height, width):
123 |     """Randomly expanding image and adjusting ground truth object coordinates.
124 |     inputs:
125 |         img = (height, width, depth)
126 |         gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
127 |         gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y])
128 |         height = height of the image
129 |         width = width of the image
130 |     outputs:
131 |         modified_img = (final_height, final_width, depth)
132 |         modified_gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
133 |         modified_gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y])
134 |     """
135 |     expansion_ratio = tf.random.uniform((), minval=1, maxval=4, dtype=tf.float32)
136 |     final_height, final_width = tf.round(height * expansion_ratio), tf.round(width * expansion_ratio)
137 |     pad_left = tf.round(tf.random.uniform((), minval=0, maxval=final_width - width, dtype=tf.float32))
138 |     pad_top = tf.round(tf.random.uniform((), minval=0, maxval=final_height - height, dtype=tf.float32))
139 |     pad_right = final_width - (width + pad_left)
140 |     pad_bottom = final_height - (height + pad_top)
141 |     #
142 |     mean, _ = tf.nn.moments(img, [0, 1])
143 |     expanded_image = tf.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0,0)), constant_values=-1)
144 |     expanded_image = tf.where(expanded_image == -1, mean, expanded_image)
145 |     #
146 |     min_max = tf.stack([-pad_top, -pad_left, pad_bottom+height, pad_right+width], -1) / [height, width, height, width]
147 |     modified_gt_boxes = bbox_utils.renormalize_bboxes_with_min_max(gt_boxes, min_max)
148 |     modified_gt_landmarks = landmark_utils.renormalize_landmarks_with_min_max(gt_landmarks, min_max)
149 |     #
150 |     return expanded_image, modified_gt_boxes, modified_gt_landmarks
151 | 
152 | def patch(img, gt_boxes, gt_landmarks):
153 |     """Generating random patch and adjusting image and ground truth objects to this patch.
154 |     After this operation some of the ground truth boxes / objects could be removed from the image.
155 |     However, these objects are not excluded from the output, only the coordinates are changed as zero.
156 |     inputs:
157 |         img = (height, width, depth)
158 |         gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
159 |             in normalized form [0, 1]
160 |         gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y])
161 |             in normalized form [0, 1]
162 |     outputs:
163 |         modified_img = (final_height, final_width, depth)
164 |         modified_gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
165 |             in normalized form [0, 1]
166 |         modified_gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y])
167 |             in normalized form [0, 1]
168 |     """
169 |     img_shape = tf.cast(tf.shape(img), dtype=tf.float32)
170 |     org_height, org_width = img_shape[0], img_shape[1]
171 |     # Randomly expand image and adjust bounding boxes
172 |     img, gt_boxes, gt_landmarks = randomly_apply_operation(expand_image, img, gt_boxes, gt_landmarks, org_height, org_width)
173 |     # Get random minimum overlap value
174 |     min_overlap = get_random_min_overlap()
175 |     #
176 |     begin, size, new_boundaries = tf.image.sample_distorted_bounding_box(
177 |         tf.shape(img),
178 |         bounding_boxes=tf.expand_dims(gt_boxes, 0),
179 |         min_object_covered=min_overlap)
180 |     #
181 |     img = tf.slice(img, begin, size)
182 |     img = tf.image.resize(img, (org_height, org_width))
183 |     gt_boxes = bbox_utils.renormalize_bboxes_with_min_max(gt_boxes, new_boundaries[0, 0])
184 |     gt_landmarks = landmark_utils.renormalize_landmarks_with_min_max(gt_landmarks, new_boundaries[0, 0])
185 |     #
186 |     return img, gt_boxes, gt_landmarks
187 | 


--------------------------------------------------------------------------------
/blazeface.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.keras.models import Model
  3 | from tensorflow.keras.layers import Layer, Input, DepthwiseConv2D, Conv2D, MaxPool2D, Add, Activation
  4 | 
  5 | class HeadWrapper(Layer):
  6 |     """Merging all feature maps for detections.
  7 |     inputs:
  8 |         conv4_3 = (batch_size, (layer_shape x aspect_ratios), last_dimension)
  9 |             ssd300 conv4_3 shape => (38 x 38 x 4) = 5776
 10 |         conv7 = (batch_size, (layer_shape x aspect_ratios), last_dimension)
 11 |             ssd300 conv7 shape => (19 x 19 x 6) = 2166
 12 |         conv8_2 = (batch_size, (layer_shape x aspect_ratios), last_dimension)
 13 |             ssd300 conv8_2 shape => (10 x 10 x 6) = 600
 14 |         conv9_2 = (batch_size, (layer_shape x aspect_ratios), last_dimension)
 15 |             ssd300 conv9_2 shape => (5 x 5 x 6) = 150
 16 |         conv10_2 = (batch_size, (layer_shape x aspect_ratios), last_dimension)
 17 |             ssd300 conv10_2 shape => (3 x 3 x 4) = 36
 18 |         conv11_2 = (batch_size, (layer_shape x aspect_ratios), last_dimension)
 19 |             ssd300 conv11_2 shape => (1 x 1 x 4) = 4
 20 |                                            Total = 8732 default box
 21 | 
 22 |     outputs:
 23 |         merged_head = (batch_size, total_bboxes, last_dimension)
 24 |     """
 25 | 
 26 |     def __init__(self, last_dimension, **kwargs):
 27 |         super(HeadWrapper, self).__init__(**kwargs)
 28 |         self.last_dimension = last_dimension
 29 | 
 30 |     def get_config(self):
 31 |         config = super(HeadWrapper, self).get_config()
 32 |         config.update({"last_dimension": self.last_dimension})
 33 |         return config
 34 | 
 35 |     def call(self, inputs):
 36 |         last_dimension = self.last_dimension
 37 |         batch_size = tf.shape(inputs[0])[0]
 38 |         outputs = []
 39 |         for conv_layer in inputs:
 40 |             outputs.append(tf.reshape(conv_layer, (batch_size, -1, last_dimension)))
 41 |         #
 42 |         return tf.concat(outputs, axis=1)
 43 | 
 44 | def blaze_block(input, filters, stride=1):
 45 |     y = input
 46 |     x = DepthwiseConv2D((5,5), strides=stride, padding="same")(input)
 47 |     x = Conv2D(filters, (1,1), padding="same")(x)
 48 |     if stride == 2:
 49 |         y = MaxPool2D((2,2))(y)
 50 |         y = Conv2D(filters, (1,1), padding="same")(y)
 51 |     output = Add()([x, y])
 52 |     return Activation("relu")(output)
 53 | 
 54 | def double_blaze_block(input, filters, stride=1):
 55 |     y = input
 56 |     x = DepthwiseConv2D((5,5), strides=stride, padding="same")(input)
 57 |     x = Conv2D(filters[0], (1,1), padding="same")(x)
 58 |     x = Activation("relu")(x)
 59 |     x = DepthwiseConv2D((5,5), padding="same")(x)
 60 |     x = Conv2D(filters[1], (1,1), padding="same")(x)
 61 |     if stride == 2:
 62 |         y = MaxPool2D((2,2))(y)
 63 |         y = Conv2D(filters[1], (1,1), padding="same")(y)
 64 |     output = Add()([x, y])
 65 |     return Activation("relu")(output)
 66 | 
 67 | def get_model(hyper_params):
 68 |     detections_per_layer = hyper_params["detections_per_layer"]
 69 |     img_size = hyper_params["img_size"]
 70 |     total_reg_points = hyper_params["total_landmarks"] * 2 + 4
 71 |     #
 72 |     input = Input(shape=(None, None, 3))
 73 |     # First conv layer
 74 |     first_conv = Conv2D(24, (5,5), strides=2, padding="same", activation="relu")(input)
 75 |     # First blaze block
 76 |     single_1 = blaze_block(first_conv, 24)
 77 |     # Second blaze block
 78 |     single_2 = blaze_block(single_1, 24)
 79 |     # Third blaze block
 80 |     single_3 = blaze_block(single_2, 48, 2)
 81 |     # Fourth blaze block
 82 |     single_4 = blaze_block(single_3, 48)
 83 |     # Fifth blaze block
 84 |     single_5 = blaze_block(single_4, 48)
 85 |     # First double blaze block
 86 |     double_1 = double_blaze_block(single_5, [24, 96], 2)
 87 |     # Second double blaze block
 88 |     double_2 = double_blaze_block(double_1, [24, 96])
 89 |     # Third double blaze block
 90 |     double_3 = double_blaze_block(double_2, [24, 96])
 91 |     # Fourth double blaze block
 92 |     double_4 = double_blaze_block(double_3, [24, 96], 2)
 93 |     # Fifth double blaze block
 94 |     double_5 = double_blaze_block(double_4, [24, 96])
 95 |     # Sixth double blaze block
 96 |     double_6 = double_blaze_block(double_5, [24, 96])
 97 |     #
 98 |     double_3_labels = Conv2D(detections_per_layer[0], (3, 3), padding="same")(double_3)
 99 |     double_6_labels = Conv2D(detections_per_layer[1], (3, 3), padding="same")(double_6)
100 |     #
101 |     double_3_boxes = Conv2D(detections_per_layer[0] * total_reg_points, (3, 3), padding="same")(double_3)
102 |     double_6_boxes = Conv2D(detections_per_layer[1] * total_reg_points, (3, 3), padding="same")(double_6)
103 |     #
104 |     pred_labels = HeadWrapper(1, name="conf_head")([double_3_labels, double_6_labels])
105 |     pred_labels = Activation("sigmoid", name="conf")(pred_labels)
106 |     pred_deltas = HeadWrapper(total_reg_points, name="loc")([double_3_boxes, double_6_boxes])
107 |     #
108 |     return Model(inputs=input, outputs=[pred_deltas, pred_labels])
109 | 
110 | def init_model(model):
111 |     """Initializing model with dummy data for load weights with optimizer state and also graph construction.
112 |     inputs:
113 |         model = tf.keras.model
114 | 
115 |     """
116 |     model(tf.random.uniform((1, 512, 512, 3)))
117 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: blazeface-env
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - _libgcc_mutex=0.1=main
 6 |   - _tflow_select=2.1.0=gpu
 7 |   - absl-py=0.9.0=py37_0
 8 |   - asn1crypto=1.3.0=py37_0
 9 |   - astor=0.8.0=py37_0
10 |   - blas=1.0=mkl
11 |   - blinker=1.4=py37_0
12 |   - c-ares=1.15.0=h7b6447c_1001
13 |   - ca-certificates=2020.1.1=0
14 |   - cachetools=3.1.1=py_0
15 |   - certifi=2019.11.28=py37_1
16 |   - cffi=1.14.0=py37h2e261b9_0
17 |   - chardet=3.0.4=py37_1003
18 |   - click=7.1.1=py_0
19 |   - cryptography=2.8=py37h1ba5d50_0
20 |   - cudatoolkit=10.0.130=0
21 |   - cudnn=7.6.5=cuda10.0_0
22 |   - cupti=10.0.130=0
23 |   - gast=0.2.2=py37_0
24 |   - google-auth=1.11.2=py_0
25 |   - google-auth-oauthlib=0.4.1=py_2
26 |   - google-pasta=0.2.0=py_0
27 |   - grpcio=1.27.2=py37hf8bcb03_0
28 |   - h5py=2.10.0=py37h7918eee_0
29 |   - hdf5=1.10.4=hb1b8bf9_0
30 |   - idna=2.9=py_1
31 |   - intel-openmp=2020.0=166
32 |   - keras-applications=1.0.8=py_0
33 |   - keras-preprocessing=1.1.0=py_1
34 |   - ld_impl_linux-64=2.33.1=h53a641e_7
35 |   - libedit=3.1.20181209=hc058e9b_0
36 |   - libffi=3.2.1=hd88cf55_4
37 |   - libgcc-ng=9.1.0=hdf63c60_0
38 |   - libgfortran-ng=7.3.0=hdf63c60_0
39 |   - libprotobuf=3.11.4=hd408876_0
40 |   - libstdcxx-ng=9.1.0=hdf63c60_0
41 |   - markdown=3.1.1=py37_0
42 |   - mkl=2020.0=166
43 |   - mkl-service=2.3.0=py37he904b0f_0
44 |   - mkl_fft=1.0.15=py37ha843d7b_0
45 |   - mkl_random=1.1.0=py37hd6b4f25_0
46 |   - ncurses=6.2=he6710b0_0
47 |   - numpy=1.18.1=py37h4f9e942_0
48 |   - numpy-base=1.18.1=py37hde5b4d6_1
49 |   - oauthlib=3.1.0=py_0
50 |   - openssl=1.1.1f=h7b6447c_0
51 |   - opt_einsum=3.1.0=py_0
52 |   - pip=20.0.2=py37_1
53 |   - protobuf=3.11.4=py37he6710b0_0
54 |   - pyasn1=0.4.8=py_0
55 |   - pyasn1-modules=0.2.7=py_0
56 |   - pycparser=2.20=py_0
57 |   - pyjwt=1.7.1=py37_0
58 |   - pyopenssl=19.1.0=py37_0
59 |   - pysocks=1.7.1=py37_0
60 |   - python=3.7.7=hcf32534_0_cpython
61 |   - readline=8.0=h7b6447c_0
62 |   - requests=2.23.0=py37_0
63 |   - requests-oauthlib=1.3.0=py_0
64 |   - rsa=4.0=py_0
65 |   - scipy=1.4.1=py37h0b6359f_0
66 |   - setuptools=46.1.3=py37_0
67 |   - six=1.14.0=py37_0
68 |   - sqlite=3.31.1=h7b6447c_0
69 |   - tensorboard=2.1.0=py3_0
70 |   - tensorflow=2.0.0=gpu_py37h768510d_0
71 |   - tensorflow-base=2.0.0=gpu_py37h0ec5d1f_0
72 |   - tensorflow-estimator=2.0.0=pyh2649769_0
73 |   - tensorflow-gpu=2.0.0=h0d30ee6_0
74 |   - termcolor=1.1.0=py37_1
75 |   - tk=8.6.8=hbc83047_0
76 |   - urllib3=1.25.8=py37_0
77 |   - werkzeug=0.16.1=py_0
78 |   - wheel=0.34.2=py37_0
79 |   - wrapt=1.12.1=py37h7b6447c_1
80 |   - xz=5.2.4=h14c3975_4
81 |   - zlib=1.2.11=h7b6447c_3
82 |   - pip:
83 |     - attrs==19.3.0
84 |     - cycler==0.10.0
85 |     - dill==0.3.1.1
86 |     - future==0.18.2
87 |     - googleapis-common-protos==1.51.0
88 |     - kiwisolver==1.2.0
89 |     - matplotlib==3.2.1
90 |     - pillow==7.1.1
91 |     - promise==2.3
92 |     - pyparsing==2.4.6
93 |     - python-dateutil==2.8.1
94 |     - tensorflow-datasets==2.1.0
95 |     - tensorflow-metadata==0.21.1
96 |     - tqdm==4.45.0
97 | prefix: /home/furkan/miniconda3/envs/blazeface-env
98 | 


--------------------------------------------------------------------------------
/predictor.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from utils import bbox_utils, data_utils, drawing_utils, io_utils, train_utils, landmark_utils
 3 | import blazeface
 4 | 
 5 | args = io_utils.handle_args()
 6 | if args.handle_gpu:
 7 |     io_utils.handle_gpu_compatibility()
 8 | 
 9 | batch_size = 1
10 | use_custom_images = False
11 | custom_image_path = "data/images/"
12 | hyper_params = train_utils.get_hyper_params()
13 | img_size = hyper_params["img_size"]
14 | 
15 | data_types = data_utils.get_data_types()
16 | data_shapes = data_utils.get_data_shapes()
17 | padding_values = data_utils.get_padding_values()
18 | 
19 | if use_custom_images:
20 |     img_paths = data_utils.get_custom_imgs(custom_image_path)
21 |     total_items = len(img_paths)
22 |     test_data = tf.data.Dataset.from_generator(lambda: data_utils.custom_data_generator(
23 |                                                img_paths, img_size, img_size), data_types, data_shapes)
24 | else:
25 |     test_split = "train[80%:]"
26 |     test_data, info = data_utils.get_dataset("the300w_lp", test_split)
27 |     total_items = data_utils.get_total_item_size(info, test_split)
28 |     test_data = test_data.map(lambda x: data_utils.preprocessing(x, img_size, img_size))
29 | #
30 | test_data = test_data.padded_batch(batch_size, padded_shapes=data_shapes, padding_values=padding_values)
31 | 
32 | model = blazeface.get_model(hyper_params)
33 | model_path = io_utils.get_model_path()
34 | model.load_weights(model_path)
35 | 
36 | prior_boxes = bbox_utils.generate_prior_boxes(hyper_params["feature_map_shapes"], hyper_params["aspect_ratios"])
37 | 
38 | variances = hyper_params["variances"]
39 | total_landmarks = hyper_params["total_landmarks"]
40 | landmark_variances = total_landmarks * variances[0:2]
41 | variances += landmark_variances
42 | 
43 | for image_data in test_data:
44 |     img, _, _ = image_data
45 |     pred_deltas, pred_scores = model.predict_on_batch(img)
46 |     pred_deltas *= variances
47 |     #
48 |     pred_bboxes_and_landmarks = bbox_utils.get_bboxes_and_landmarks_from_deltas(prior_boxes, pred_deltas)
49 |     pred_bboxes_and_landmarks = tf.clip_by_value(pred_bboxes_and_landmarks, 0, 1)
50 |     #
51 |     pred_scores = tf.cast(pred_scores, tf.float32)
52 |     #
53 |     weighted_suppressed_data = bbox_utils.weighted_suppression(pred_scores[0], pred_bboxes_and_landmarks[0])
54 |     #
55 |     weighted_bboxes = weighted_suppressed_data[..., 0:4]
56 |     weighted_landmarks = weighted_suppressed_data[..., 4:]
57 |     #
58 |     denormalized_bboxes = bbox_utils.denormalize_bboxes(weighted_bboxes, img_size, img_size)
59 |     weighted_landmarks = tf.reshape(weighted_landmarks, (-1, total_landmarks, 2))
60 |     denormalized_landmarks = landmark_utils.denormalize_landmarks(weighted_landmarks, img_size, img_size)
61 |     drawing_utils.draw_bboxes_with_landmarks(img[0], denormalized_bboxes, denormalized_landmarks)
62 | 


--------------------------------------------------------------------------------
/ssd_loss.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | class CustomLoss(object):
 4 |     def __init__(self, neg_pos_ratio, loc_loss_alpha):
 5 |         self.neg_pos_ratio = tf.constant(neg_pos_ratio, dtype=tf.float32)
 6 |         self.loc_loss_alpha = tf.constant(loc_loss_alpha, dtype=tf.float32)
 7 | 
 8 |     def loc_loss_fn(self, actual_bbox_deltas, pred_bbox_deltas):
 9 |         """Calculating SSD localization loss value for only positive samples.
10 |         inputs:
11 |             actual_bbox_deltas = (batch_size, total_bboxes, [delta_bbox_y, delta_bbox_x, delta_bbox_h, delta_bbox_w, delta_landmark_x0, delta_landmark_y0, ..., delta_landmark_xN, delta_landmark_yN])
12 |             pred_bbox_deltas = (batch_size, total_bboxes, [delta_bbox_y, delta_bbox_x, delta_bbox_h, delta_bbox_w, delta_landmark_x0, delta_landmark_y0, ..., delta_landmark_xN, delta_landmark_yN])
13 | 
14 |         outputs:
15 |             loc_loss = localization / regression / bounding box loss value
16 |         """
17 |         total_reg_points = tf.shape(actual_bbox_deltas)[-1]
18 |         # Localization / bbox / regression loss calculation for all bboxes
19 |         loc_loss_fn = tf.losses.Huber(reduction=tf.losses.Reduction.NONE)
20 |         loc_loss_for_all = loc_loss_fn(actual_bbox_deltas, pred_bbox_deltas)
21 |         # After tf 2.2.0 version, the huber calculates mean over the last axis
22 |         loc_loss_for_all = tf.cond(tf.greater(tf.rank(loc_loss_for_all), tf.constant(2)),
23 |                                    lambda: tf.reduce_sum(loc_loss_for_all, axis=-1),
24 |                                    lambda: loc_loss_for_all * tf.cast(total_reg_points, dtype=tf.float32))
25 |         #
26 |         pos_cond = tf.reduce_any(tf.not_equal(actual_bbox_deltas, tf.constant(0.0)), axis=2)
27 |         pos_mask = tf.cast(pos_cond, dtype=tf.float32)
28 |         total_pos_bboxes = tf.reduce_sum(pos_mask, axis=1)
29 |         #
30 |         loc_loss = tf.reduce_sum(pos_mask * loc_loss_for_all, axis=-1)
31 |         total_pos_bboxes = tf.where(tf.equal(total_pos_bboxes, tf.constant(0.0)), tf.constant(1.0), total_pos_bboxes)
32 |         loc_loss = loc_loss / total_pos_bboxes
33 |         #
34 |         return loc_loss * self.loc_loss_alpha
35 | 
36 |     def conf_loss_fn(self, actual_labels, pred_labels):
37 |         """Calculating SSD confidence loss value by performing hard negative mining as mentioned in the paper.
38 |         inputs:
39 |             actual_labels = (batch_size, total_bboxes, 1)
40 |             pred_labels = (batch_size, total_bboxes, 1)
41 | 
42 |         outputs:
43 |             conf_loss = confidence / class / label loss value
44 |         """
45 |         # Confidence / Label loss calculation for all labels
46 |         conf_loss_fn = tf.losses.BinaryCrossentropy(reduction=tf.losses.Reduction.NONE)
47 |         conf_loss_for_all = conf_loss_fn(actual_labels, pred_labels)
48 |         #
49 |         squeezed_actual_labels = tf.squeeze(actual_labels, -1)
50 |         pos_cond = tf.not_equal(squeezed_actual_labels, tf.constant(0.0))
51 |         pos_mask = tf.cast(pos_cond, dtype=tf.float32)
52 |         total_pos_bboxes = tf.reduce_sum(pos_mask, axis=1)
53 |         # Hard negative mining
54 |         total_neg_bboxes = tf.cast(total_pos_bboxes * self.neg_pos_ratio, tf.int32)
55 |         #
56 |         masked_loss = tf.where(tf.equal(squeezed_actual_labels, tf.constant(0.0)), conf_loss_for_all, tf.zeros_like(conf_loss_for_all, dtype=tf.float32))
57 |         sorted_loss = tf.argsort(masked_loss, direction="DESCENDING")
58 |         sorted_loss = tf.argsort(sorted_loss)
59 |         neg_cond = tf.less(sorted_loss, tf.expand_dims(total_neg_bboxes, axis=1))
60 |         neg_mask = tf.cast(neg_cond, dtype=tf.float32)
61 |         #
62 |         final_mask = pos_mask + neg_mask
63 |         conf_loss = tf.reduce_sum(final_mask * conf_loss_for_all, axis=-1)
64 |         total_pos_bboxes = tf.where(tf.equal(total_pos_bboxes, tf.constant(0.0)), tf.constant(1.0), total_pos_bboxes)
65 |         conf_loss = conf_loss / total_pos_bboxes
66 |         #
67 |         return conf_loss
68 | 


--------------------------------------------------------------------------------
/trainer.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, LearningRateScheduler
 3 | from tensorflow.keras.optimizers import SGD, Adam
 4 | import augmentation
 5 | from ssd_loss import CustomLoss
 6 | from utils import bbox_utils, data_utils, io_utils, train_utils, drawing_utils, landmark_utils
 7 | import blazeface
 8 | import random
 9 | 
10 | args = io_utils.handle_args()
11 | if args.handle_gpu:
12 |     io_utils.handle_gpu_compatibility()
13 | 
14 | batch_size = 32
15 | epochs = 150
16 | load_weights = False
17 | hyper_params = train_utils.get_hyper_params()
18 | 
19 | train_split = "train[:80%]"
20 | val_split = "train[80%:]"
21 | train_data, info = data_utils.get_dataset("the300w_lp", train_split)
22 | val_data, _ = data_utils.get_dataset("the300w_lp", val_split)
23 | train_total_items = data_utils.get_total_item_size(info, train_split)
24 | val_total_items = data_utils.get_total_item_size(info, val_split)
25 | #
26 | img_size = hyper_params["img_size"]
27 | 
28 | train_data = train_data.map(lambda x : data_utils.preprocessing(x, img_size, img_size, augmentation.apply))
29 | val_data = val_data.map(lambda x : data_utils.preprocessing(x, img_size, img_size))
30 | #
31 | data_shapes = data_utils.get_data_shapes()
32 | padding_values = data_utils.get_padding_values()
33 | train_data = train_data.shuffle(batch_size*12).padded_batch(batch_size, padded_shapes=data_shapes, padding_values=padding_values)
34 | val_data = val_data.padded_batch(batch_size, padded_shapes=data_shapes, padding_values=padding_values)
35 | #
36 | model = blazeface.get_model(hyper_params)
37 | custom_losses = CustomLoss(hyper_params["neg_pos_ratio"], hyper_params["loc_loss_alpha"])
38 | model.compile(optimizer=Adam(learning_rate=1e-3),
39 |                   loss=[custom_losses.loc_loss_fn, custom_losses.conf_loss_fn])
40 | blazeface.init_model(model)
41 | #
42 | model_path = io_utils.get_model_path()
43 | if load_weights:
44 |     model.load_weights(model_path)
45 | log_path = io_utils.get_log_path("blazeface/")
46 | # We calculate prior boxes for one time and use it for all operations because of the all images are the same sizes
47 | prior_boxes = bbox_utils.generate_prior_boxes(hyper_params["feature_map_shapes"], hyper_params["aspect_ratios"])
48 | #
49 | train_feed = train_utils.generator(train_data, prior_boxes, hyper_params)
50 | val_feed = train_utils.generator(val_data, prior_boxes, hyper_params)
51 | 
52 | checkpoint_callback = ModelCheckpoint(model_path, monitor="val_loss", save_best_only=True, save_weights_only=True)
53 | tensorboard_callback = TensorBoard(log_dir=log_path)
54 | learning_rate_callback = LearningRateScheduler(train_utils.scheduler, verbose=0)
55 | 
56 | step_size_train = train_utils.get_step_size(train_total_items, batch_size)
57 | step_size_val = train_utils.get_step_size(val_total_items, batch_size)
58 | model.fit(train_feed,
59 |           steps_per_epoch=step_size_train,
60 |           validation_data=val_feed,
61 |           validation_steps=step_size_val,
62 |           epochs=epochs,
63 |           callbacks=[checkpoint_callback, tensorboard_callback, learning_rate_callback])
64 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FurkanOM/tf-blazeface/dce7aa2b9750b63a0e3611ccc18150e59f220645/utils/__init__.py


--------------------------------------------------------------------------------
/utils/bbox_utils.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | 
  3 | def get_weighted_boxes_and_landmarks(scores, bboxes_and_landmarks, mask):
  4 |     """Calculating weighted mean of given bboxes and landmarks according to the mask.
  5 |     inputs:
  6 |         scores = (total_bboxes, [probability])
  7 |         bboxes_and_landmarks = (total_bboxes, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN])
  8 |         mask = (total_bboxes,)
  9 | 
 10 |     outputs:
 11 |         weighted_bbox_and_landmark = (1, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN])
 12 |     """
 13 |     selected_scores = scores[mask]
 14 |     selected_bboxes_and_landmarks = bboxes_and_landmarks[mask]
 15 |     weighted_sum = tf.reduce_sum(selected_bboxes_and_landmarks * selected_scores, 0)
 16 |     sum_selected_scores = tf.reduce_sum(selected_scores, 0)
 17 |     sum_selected_scores = tf.where(tf.equal(sum_selected_scores, 0.0), 1.0, sum_selected_scores)
 18 |     return tf.expand_dims(weighted_sum / sum_selected_scores, 0)
 19 | 
 20 | def weighted_suppression_body(counter, iou_threshold, scores, bboxes_and_landmarks, weighted_suppressed_data):
 21 |     """Weighted mean suppression algorithm while body.
 22 |     inputs:
 23 |         counter = while body counter
 24 |         iou_threshold = threshold value for overlapping bounding boxes
 25 |         scores = (total_bboxes, [probability])
 26 |         bboxes_and_landmarks = (total_bboxes, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN])
 27 |         weighted_suppressed_data = (M, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN])
 28 | 
 29 |     outputs:
 30 |         counter = while body counter
 31 |         iou_threshold = threshold value for overlapping bounding boxes
 32 |         scores = (total_bboxes - N, [probability])
 33 |         bboxes_and_landmarks = (total_bboxes - N, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN])
 34 |         weighted_suppressed_data = (M + 1, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN])
 35 |     """
 36 |     counter = tf.add(counter, 1)
 37 |     first_box = bboxes_and_landmarks[0, 0:4]
 38 |     iou_map = generate_iou_map(first_box, bboxes_and_landmarks[..., 0:4], transpose_perm=[1, 0])
 39 |     overlapped_mask = tf.reshape(tf.greater(iou_map, iou_threshold), (-1,))
 40 |     weighted_bbox_and_landmark = get_weighted_boxes_and_landmarks(scores, bboxes_and_landmarks, overlapped_mask)
 41 |     weighted_suppressed_data = tf.concat([weighted_suppressed_data, weighted_bbox_and_landmark], axis=0)
 42 |     not_overlapped_mask = tf.logical_not(overlapped_mask)
 43 |     scores = scores[not_overlapped_mask]
 44 |     bboxes_and_landmarks = bboxes_and_landmarks[not_overlapped_mask]
 45 |     return counter, iou_threshold, scores, bboxes_and_landmarks, weighted_suppressed_data
 46 | 
 47 | def weighted_suppression(scores, bboxes_and_landmarks, max_total_size=50, score_threshold=0.75, iou_threshold=0.3):
 48 |     """Blazeface weighted mean suppression algorithm.
 49 |     inputs:
 50 |         scores = (total_bboxes, [probability])
 51 |         bboxes_and_landmarks = (total_bboxes, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN])
 52 |         max_total_size = maximum returned bounding boxes and landmarks
 53 |         score_threshold = threshold value for bounding boxes and landmarks selection
 54 |         iou_threshold = threshold value for overlapping bounding boxes
 55 | 
 56 |     outputs:
 57 |         weighted_bboxes_and_landmarks = (dynamic_size, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN])
 58 |     """
 59 |     score_mask = tf.squeeze(tf.greater(scores, score_threshold), -1)
 60 |     scores = scores[score_mask]
 61 |     bboxes_and_landmarks = bboxes_and_landmarks[score_mask]
 62 |     sorted_indices = tf.argsort(scores, axis=0, direction="DESCENDING")
 63 |     sorted_scores = tf.gather_nd(scores, sorted_indices)
 64 |     sorted_bboxes_and_landmarks = tf.gather_nd(bboxes_and_landmarks, sorted_indices)
 65 |     counter = tf.constant(0, tf.int32)
 66 |     weighted_data = tf.zeros(tf.shape(bboxes_and_landmarks[0:1]), dtype=tf.float32)
 67 |     cond = lambda counter, iou_threshold, scores, data, weighted: tf.logical_and(tf.less(counter, max_total_size), tf.greater(tf.shape(scores)[0], 0))
 68 |     _, _, _, _, weighted_data = tf.while_loop(cond, weighted_suppression_body,
 69 |                                           [counter, iou_threshold, sorted_scores, sorted_bboxes_and_landmarks, weighted_data])
 70 |     #
 71 |     weighted_data = weighted_data[1:]
 72 |     pad_size = max_total_size - weighted_data.shape[0]
 73 |     weighted_data = tf.pad(weighted_data, ((0, pad_size),(0, 0)))
 74 |     return weighted_data
 75 | 
 76 | def non_max_suppression(pred_bboxes, pred_labels, **kwargs):
 77 |     """Applying non maximum suppression.
 78 |     Details could be found on tensorflow documentation.
 79 |     https://www.tensorflow.org/api_docs/python/tf/image/combined_non_max_suppression
 80 |     inputs:
 81 |         pred_bboxes = (batch_size, total_bboxes, total_labels, [y1, x1, y2, x2])
 82 |             total_labels should be 1 for binary operations like in rpn
 83 |         pred_labels = (batch_size, total_bboxes, total_labels)
 84 |         **kwargs = other parameters
 85 | 
 86 |     outputs:
 87 |         nms_boxes = (batch_size, max_detections, [y1, x1, y2, x2])
 88 |         nmsed_scores = (batch_size, max_detections)
 89 |         nmsed_classes = (batch_size, max_detections)
 90 |         valid_detections = (batch_size)
 91 |             Only the top valid_detections[i] entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid.
 92 |             The rest of the entries are zero paddings.
 93 |     """
 94 |     return tf.image.combined_non_max_suppression(
 95 |         pred_bboxes,
 96 |         pred_labels,
 97 |         **kwargs
 98 |     )
 99 | 
100 | def generate_iou_map(bboxes, gt_boxes, transpose_perm=[0, 2, 1]):
101 |     """Calculating intersection over union values for each ground truth boxes in a dynamic manner.
102 |     It is supported from 1d to 3d dimensions for bounding boxes.
103 |     Even if bboxes have different rank from gt_boxes it should be work.
104 |     inputs:
105 |         bboxes = (dynamic_dimension, [y1, x1, y2, x2])
106 |         gt_boxes = (dynamic_dimension, [y1, x1, y2, x2])
107 |         transpose_perm = (transpose_perm_order)
108 |             for 3d gt_boxes => [0, 2, 1]
109 | 
110 |     outputs:
111 |         iou_map = (dynamic_dimension, total_gt_boxes)
112 |             same rank with the gt_boxes
113 |     """
114 |     gt_rank = tf.rank(gt_boxes)
115 |     gt_expand_axis = gt_rank - 2
116 |     #
117 |     bbox_y1, bbox_x1, bbox_y2, bbox_x2 = tf.split(bboxes, 4, axis=-1)
118 |     gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(gt_boxes, 4, axis=-1)
119 |     # Calculate bbox and ground truth boxes areas
120 |     gt_area = tf.squeeze((gt_y2 - gt_y1) * (gt_x2 - gt_x1), axis=-1)
121 |     bbox_area = tf.squeeze((bbox_y2 - bbox_y1) * (bbox_x2 - bbox_x1), axis=-1)
122 |     #
123 |     x_top = tf.maximum(bbox_x1, tf.transpose(gt_x1, transpose_perm))
124 |     y_top = tf.maximum(bbox_y1, tf.transpose(gt_y1, transpose_perm))
125 |     x_bottom = tf.minimum(bbox_x2, tf.transpose(gt_x2, transpose_perm))
126 |     y_bottom = tf.minimum(bbox_y2, tf.transpose(gt_y2, transpose_perm))
127 |     ### Calculate intersection area
128 |     intersection_area = tf.maximum(x_bottom - x_top, 0) * tf.maximum(y_bottom - y_top, 0)
129 |     ### Calculate union area
130 |     union_area = (tf.expand_dims(bbox_area, -1) + tf.expand_dims(gt_area, gt_expand_axis) - intersection_area)
131 |     # Intersection over Union
132 |     return intersection_area / union_area
133 | 
134 | def get_bboxes_and_landmarks_from_deltas(prior_boxes, deltas):
135 |     """Calculating bounding boxes and landmarks for given delta values.
136 |     inputs:
137 |         prior_boxes = (total_bboxes, [center_x, center_y, width, height])
138 |         deltas = (batch_size, total_bboxes, [delta_bbox_y, delta_bbox_x, delta_bbox_h, delta_bbox_w, delta_landmark_x0, delta_landmark_y0, ..., delta_landmark_xN, delta_landmark_yN])
139 | 
140 |     outputs:
141 |         bboxes_and_landmarks = (batch_size, total_bboxes, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN])
142 |     """
143 |     #
144 |     bbox_width = deltas[..., 3] * prior_boxes[..., 2]
145 |     bbox_height = deltas[..., 2] * prior_boxes[..., 3]
146 |     bbox_ctr_x = (deltas[..., 1] * prior_boxes[..., 2]) + prior_boxes[..., 0]
147 |     bbox_ctr_y = (deltas[..., 0] * prior_boxes[..., 3]) + prior_boxes[..., 1]
148 |     #
149 |     y1 = bbox_ctr_y - (0.5 * bbox_height)
150 |     x1 = bbox_ctr_x - (0.5 * bbox_width)
151 |     y2 = bbox_height + y1
152 |     x2 = bbox_width + x1
153 |     #
154 |     total_landmarks = tf.shape(deltas[..., 4:])[-1] // 2
155 |     xy_pairs = tf.tile(prior_boxes[..., 0:2], (1, total_landmarks))
156 |     wh_pairs = tf.tile(prior_boxes[..., 2:4], (1, total_landmarks))
157 |     landmarks = (deltas[..., 4:] * wh_pairs) + xy_pairs
158 |     #
159 |     return tf.concat([tf.stack([y1, x1, y2, x2], axis=-1), landmarks], -1)
160 | 
161 | def get_deltas_from_bboxes_and_landmarks(prior_boxes, bboxes_and_landmarks):
162 |     """Calculating bounding box and landmark deltas for given ground truth boxes and landmarks.
163 |     inputs:
164 |         prior_boxes = (total_bboxes, [center_x, center_y, width, height])
165 |         bboxes_and_landmarks = (batch_size, total_bboxes, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN])
166 | 
167 |     outputs:
168 |         deltas = (batch_size, total_bboxes, [delta_bbox_y, delta_bbox_x, delta_bbox_h, delta_bbox_w, delta_landmark_x0, delta_landmark_y0, ..., delta_landmark_xN, delta_landmark_yN])
169 |     """
170 |     #
171 |     gt_width = bboxes_and_landmarks[..., 3] - bboxes_and_landmarks[..., 1]
172 |     gt_height = bboxes_and_landmarks[..., 2] - bboxes_and_landmarks[..., 0]
173 |     gt_ctr_x = bboxes_and_landmarks[..., 1] + 0.5 * gt_width
174 |     gt_ctr_y = bboxes_and_landmarks[..., 0] + 0.5 * gt_height
175 |     #
176 |     delta_x = (gt_ctr_x - prior_boxes[..., 0]) / prior_boxes[..., 2]
177 |     delta_y = (gt_ctr_y - prior_boxes[..., 1]) / prior_boxes[..., 3]
178 |     delta_w = gt_width / prior_boxes[..., 2]
179 |     delta_h = gt_height / prior_boxes[..., 3]
180 |     #
181 |     total_landmarks = tf.shape(bboxes_and_landmarks[..., 4:])[-1] // 2
182 |     xy_pairs = tf.tile(prior_boxes[..., 0:2], (1, total_landmarks))
183 |     wh_pairs = tf.tile(prior_boxes[..., 2:4], (1, total_landmarks))
184 |     landmark_deltas = (bboxes_and_landmarks[..., 4:] - xy_pairs) / wh_pairs
185 |     #
186 |     return tf.concat([tf.stack([delta_y, delta_x, delta_h, delta_w], -1), landmark_deltas], -1)
187 | 
188 | def get_scale_for_nth_feature_map(k, m=4, scale_min=0.1484375, scale_max=0.75):
189 |     """Calculating scale value for nth feature map using the given method in the paper.
190 |     inputs:
191 |         k = nth feature map for scale calculation
192 |         m = length of all using feature maps for detections, 6 for ssd300, 4 for blazeface
193 | 
194 |     outputs:
195 |         scale = calculated scale value for given index
196 |     """
197 |     return scale_min + ((scale_max - scale_min) / (m - 1)) * (k - 1)
198 | 
199 | def get_wh_pairs(aspect_ratios, feature_map_index, total_feature_map):
200 |     """Generating height and width pairs for different aspect ratios and feature map shapes.
201 |     inputs:
202 |         aspect_ratios = for all feature map shapes + 1 for ratio 1
203 |         feature_map_index = nth feature maps for scale calculation
204 |         total_feature_map = length of all using feature map for detections, 6 for ssd300
205 | 
206 |     outputs:
207 |         wh_pairs = [(width1, height1), ..., (widthN, heightN)]
208 |     """
209 |     current_scale = get_scale_for_nth_feature_map(feature_map_index, m=total_feature_map)
210 |     next_scale = get_scale_for_nth_feature_map(feature_map_index + 1, m=total_feature_map)
211 |     wh_pairs = []
212 |     for aspect_ratio in aspect_ratios:
213 |         height = current_scale / tf.sqrt(aspect_ratio)
214 |         width = current_scale * tf.sqrt(aspect_ratio)
215 |         wh_pairs.append([width, height])
216 |     # 1 extra pair for ratio 1
217 |     height = width = tf.sqrt(current_scale * next_scale)
218 |     wh_pairs.append([width, height])
219 |     return tf.cast(wh_pairs, dtype=tf.float32)
220 | 
221 | def generate_prior_boxes(feature_map_shapes, aspect_ratios):
222 |     """Generating top left prior boxes for given stride, height and width pairs of different aspect ratios.
223 |     These prior boxes same with the anchors in Faster-RCNN.
224 |     inputs:
225 |         feature_map_shapes = for all feature map output size
226 |         aspect_ratios = for all feature map shapes + 1 for ratio 1
227 | 
228 |     outputs:
229 |         prior_boxes = (total_bboxes, [y1, x1, y2, x2])
230 |             these values in normalized format between [0, 1]
231 |     """
232 |     prior_boxes = []
233 |     for i, feature_map_shape in enumerate(feature_map_shapes):
234 |         wh_pairs = get_wh_pairs(aspect_ratios[i], i+1, len(feature_map_shapes))
235 |         #
236 |         stride = 1 / feature_map_shape
237 |         grid_coords = tf.cast(tf.range(0, feature_map_shape) / feature_map_shape + stride / 2, dtype=tf.float32)
238 |         grid_x, grid_y = tf.meshgrid(grid_coords, grid_coords)
239 |         flat_grid_x, flat_grid_y = tf.reshape(grid_x, (-1, )), tf.reshape(grid_y, (-1, ))
240 |         #
241 |         grid_map = tf.stack([flat_grid_x, flat_grid_y], axis=-1)
242 |         grid_map = tf.pad(grid_map, ((0,0), (0,2)))
243 |         wh_pairs = tf.pad(wh_pairs, ((0,0), (2,0)))
244 |         #
245 |         prior_boxes_for_feature_map = tf.reshape(wh_pairs, (1, -1, 4)) + tf.reshape(grid_map, (-1, 1, 4))
246 |         prior_boxes_for_feature_map = tf.reshape(prior_boxes_for_feature_map, (-1, 4))
247 |         #
248 |         prior_boxes.append(prior_boxes_for_feature_map)
249 |     prior_boxes = tf.concat(prior_boxes, axis=0)
250 |     return tf.clip_by_value(prior_boxes, 0, 1)
251 | 
252 | def convert_xywh_to_bboxes(xywh):
253 |     """Converting center x, y and width height format to bounding boxes.
254 |     inputs:
255 |         xywh = (M, N, [center_x, center_y, width, height])
256 | 
257 |     outputs:
258 |         bboxes = (M, N, [y1, x1, y2, x2])
259 |     """
260 |     y1 = xywh[..., 1] - (0.5 * xywh[..., 3])
261 |     x1 = xywh[..., 0] - (0.5 * xywh[..., 2])
262 |     y2 = xywh[..., 3] + y1
263 |     x2 = xywh[..., 2] + x1
264 |     bboxes = tf.stack([y1, x1, y2, x2], axis=-1)
265 |     return tf.clip_by_value(bboxes, 0, 1)
266 | 
267 | def renormalize_bboxes_with_min_max(bboxes, min_max):
268 |     """Renormalizing given bounding boxes to the new boundaries.
269 |     r = (x - min) / (max - min)
270 |     outputs:
271 |         bboxes = (total_bboxes, [y1, x1, y2, x2])
272 |         min_max = ([y_min, x_min, y_max, x_max])
273 |     """
274 |     y_min, x_min, y_max, x_max = tf.split(min_max, 4)
275 |     renomalized_bboxes = bboxes - tf.concat([y_min, x_min, y_min, x_min], -1)
276 |     renomalized_bboxes /= tf.concat([y_max-y_min, x_max-x_min, y_max-y_min, x_max-x_min], -1)
277 |     return tf.clip_by_value(renomalized_bboxes, 0, 1)
278 | 
279 | def normalize_bboxes(bboxes, height, width):
280 |     """Normalizing bounding boxes.
281 |     inputs:
282 |         bboxes = (M, N, [y1, x1, y2, x2])
283 |         height = image height
284 |         width = image width
285 | 
286 |     outputs:
287 |         normalized_bboxes = (M, N, [y1, x1, y2, x2])
288 |             in normalized form [0, 1]
289 |     """
290 |     y1 = bboxes[..., 0] / height
291 |     x1 = bboxes[..., 1] / width
292 |     y2 = bboxes[..., 2] / height
293 |     x2 = bboxes[..., 3] / width
294 |     return tf.stack([y1, x1, y2, x2], axis=-1)
295 | 
296 | def denormalize_bboxes(bboxes, height, width):
297 |     """Denormalizing bounding boxes.
298 |     inputs:
299 |         bboxes = (M, N, [y1, x1, y2, x2])
300 |             in normalized form [0, 1]
301 |         height = image height
302 |         width = image width
303 | 
304 |     outputs:
305 |         denormalized_bboxes = (M, N, [y1, x1, y2, x2])
306 |     """
307 |     y1 = bboxes[..., 0] * height
308 |     x1 = bboxes[..., 1] * width
309 |     y2 = bboxes[..., 2] * height
310 |     x2 = bboxes[..., 3] * width
311 |     return tf.round(tf.stack([y1, x1, y2, x2], axis=-1))
312 | 


--------------------------------------------------------------------------------
/utils/data_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tensorflow as tf
  3 | import tensorflow_datasets as tfds
  4 | from PIL import Image
  5 | from utils import landmark_utils
  6 | import numpy as np
  7 | 
  8 | def filter_landmarks(landmarks):
  9 |     """Filtering landmark from 68 points to 6 points for blazeface.
 10 |     inputs:
 11 |         landmarks = (M, 68, [x, y])
 12 | 
 13 |     outputs:
 14 |         filtered_landmarks = (M, 6, [x, y])
 15 |     """
 16 |     # Left eye
 17 |     left_eye_coords = tf.reduce_mean(landmarks[..., 36:42, :], -2)
 18 |     # Right eye
 19 |     right_eye_coords = tf.reduce_mean(landmarks[..., 42:48, :], -2)
 20 |     # Left ear
 21 |     left_ear_coords = tf.reduce_mean(landmarks[..., 0:2, :], -2)
 22 |     # Right ear
 23 |     right_ear_coords = tf.reduce_mean(landmarks[..., 15:17, :], -2)
 24 |     # Nose
 25 |     nose_coords = tf.reduce_mean(landmarks[..., 27:36, :], -2)
 26 |     # Mouth
 27 |     mouth_coords = tf.reduce_mean(landmarks[..., 48:68, :], -2)
 28 |     return tf.stack([
 29 |         left_eye_coords,
 30 |         right_eye_coords,
 31 |         left_ear_coords,
 32 |         right_ear_coords,
 33 |         nose_coords,
 34 |         mouth_coords,
 35 |     ], -2)
 36 | 
 37 | def generate_bboxes_from_landmarks(landmarks):
 38 |     """Generating bounding boxes from landmarks.
 39 |     inputs:
 40 |         landmarks = (M, total_landmarks, [x, y])
 41 | 
 42 |     outputs:
 43 |         bboxes = (M, [y1, x1, y2, x2])
 44 |     """
 45 |     padding = 5e-3
 46 |     x1 = tf.reduce_min(landmarks[..., 0], -1) - padding
 47 |     x2 = tf.reduce_max(landmarks[..., 0], -1) + padding
 48 |     y1 = tf.reduce_min(landmarks[..., 1], -1) - padding
 49 |     y2 = tf.reduce_max(landmarks[..., 1], -1) + padding
 50 |     #
 51 |     gt_boxes = tf.stack([y1, x1, y2, x2], -1)
 52 |     return tf.clip_by_value(gt_boxes, 0, 1)
 53 | 
 54 | def preprocessing(image_data, final_height, final_width, augmentation_fn=None):
 55 |     """Image resizing operation handled before batch operations.
 56 |     inputs:
 57 |         image_data = tensorflow dataset image_data
 58 |         final_height = final image height after resizing
 59 |         final_width = final image width after resizing
 60 | 
 61 |     outputs:
 62 |         img = (final_height, final_width, channels)
 63 |         gt_boxes = (gt_box_size, [y1, x1, y2, x2])
 64 |         gt_landmarks = (gt_box_size, total_landmarks, [x, y])
 65 |     """
 66 |     img = image_data["image"]
 67 |     img = tf.image.convert_image_dtype(img, tf.float32)
 68 |     gt_landmarks = tf.expand_dims(image_data["landmarks_2d"], 0)
 69 |     gt_boxes = generate_bboxes_from_landmarks(gt_landmarks)
 70 |     gt_landmarks = filter_landmarks(gt_landmarks)
 71 |     img = tf.image.resize(img, (final_height, final_width))
 72 |     if augmentation_fn:
 73 |         img, gt_boxes, gt_landmarks = augmentation_fn(img, gt_boxes, gt_landmarks)
 74 |     img = (img - 0.5) / 0.5
 75 |     return img, gt_boxes, gt_landmarks
 76 | 
 77 | def get_dataset(name, split, data_dir="~/tensorflow_datasets"):
 78 |     """Get tensorflow dataset split and info.
 79 |     inputs:
 80 |         name = name of the dataset, the300w_lp, etc.
 81 |         split = data split string
 82 |         data_dir = read/write path for tensorflow datasets
 83 | 
 84 |     outputs:
 85 |         dataset = tensorflow dataset split
 86 |         info = tensorflow dataset info
 87 |     """
 88 |     dataset, info = tfds.load(name, split=split, data_dir=data_dir, with_info=True)
 89 |     return dataset, info
 90 | 
 91 | def get_total_item_size(info, split):
 92 |     """Get total item size for given split.
 93 |     inputs:
 94 |         info = tensorflow dataset info
 95 |         split = data split string
 96 | 
 97 |     outputs:
 98 |         total_item_size = number of total items
 99 |     """
100 |     return info.splits[split].num_examples
101 | 
102 | def get_labels(info):
103 |     """Get label names list.
104 |     inputs:
105 |         info = tensorflow dataset info
106 | 
107 |     outputs:
108 |         labels = [labels list]
109 |     """
110 |     return info.features["labels"].names
111 | 
112 | def get_custom_imgs(custom_image_path):
113 |     """Generating a list of images for given path.
114 |     inputs:
115 |         custom_image_path = folder of the custom images
116 |     outputs:
117 |         custom image list = [path1, path2]
118 |     """
119 |     img_paths = []
120 |     for path, dir, filenames in os.walk(custom_image_path):
121 |         for filename in filenames:
122 |             img_paths.append(os.path.join(path, filename))
123 |         break
124 |     return img_paths
125 | 
126 | def custom_data_generator(img_paths, final_height, final_width):
127 |     """Yielding custom entities as dataset.
128 |     inputs:
129 |         img_paths = custom image paths
130 |         final_height = final image height after resizing
131 |         final_width = final image width after resizing
132 |     outputs:
133 |         img = (final_height, final_width, depth)
134 |         dummy_gt_boxes = (None, None)
135 |         dummy_gt_labels = (None, )
136 |     """
137 |     for img_path in img_paths:
138 |         image = Image.open(img_path)
139 |         resized_image = image.resize((final_width, final_height), Image.LANCZOS)
140 |         img = np.array(resized_image)
141 |         img = tf.image.convert_image_dtype(img, tf.float32)
142 |         img = (img - 0.5) / 0.5
143 |         yield img, tf.constant([[]], dtype=tf.float32), tf.constant([[[]]], dtype=tf.float32)
144 | 
145 | def get_data_types():
146 |     """Generating dataset parameter dtypes for tensorflow datasets.
147 |     outputs:
148 |         dtypes = output dtypes for (images, ground truth boxes, ground truth landmarks)
149 |     """
150 |     return (tf.float32, tf.float32, tf.float32)
151 | 
152 | def get_data_shapes():
153 |     """Generating dataset parameter shapes for tensorflow datasets.
154 |     outputs:
155 |         shapes = output shapes for (images, ground truth boxes, ground truth landmarks)
156 |     """
157 |     return ([None, None, None], [None, None], [None, None, None])
158 | 
159 | def get_padding_values():
160 |     """Generating padding values for missing values in batch for tensorflow datasets.
161 |     outputs:
162 |         paddings = padding values with dtypes for (images, ground truth boxes, ground truth landmarks)
163 |     """
164 |     return (tf.constant(0, tf.float32), tf.constant(0, tf.float32), tf.constant(0, tf.float32))
165 | 


--------------------------------------------------------------------------------
/utils/drawing_utils.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from PIL import Image, ImageDraw
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | def draw_bboxes(imgs, bboxes):
 6 |     """Drawing bounding boxes on given images.
 7 |     inputs:
 8 |         imgs = (batch_size, height, width, channels)
 9 |         bboxes = (batch_size, total_bboxes, [y1, x1, y2, x2])
10 |             in normalized form [0, 1]
11 |     """
12 |     colors = tf.constant([[1, 0, 0, 1]], dtype=tf.float32)
13 |     imgs_with_bb = tf.image.draw_bounding_boxes(imgs, bboxes, colors)
14 |     plt.figure()
15 |     for img_with_bb in imgs_with_bb:
16 |         plt.imshow(img_with_bb)
17 |         plt.show()
18 | 
19 | def draw_bboxes_with_landmarks(img, bboxes, landmarks):
20 |     """Drawing bounding boxes and landmarks on given image.
21 |     inputs:
22 |         img = (height, width, channels)
23 |         bboxes = (total_bboxes, [y1, x1, y2, x2])
24 |         landmarks = (total_bboxes, total_landmarks, [x, y])
25 |     """
26 |     image = tf.keras.preprocessing.image.array_to_img(img)
27 |     width, height = image.size
28 |     draw = ImageDraw.Draw(image)
29 |     color = (255, 0, 0, 255)
30 |     for index, bbox in enumerate(bboxes):
31 |         y1, x1, y2, x2 = tf.split(bbox, 4)
32 |         width = x2 - x1
33 |         height = y2 - y1
34 |         if width <= 0 or height <= 0:
35 |             continue
36 |         draw.rectangle((x1, y1, x2, y2), outline=color, width=1)
37 |     for index, landmark in enumerate(landmarks):
38 |         if tf.reduce_max(landmark) <= 0:
39 |             continue
40 |         rects = tf.concat([landmark - 1, landmark + 1], -1)
41 |         for rect in rects:
42 |             draw.ellipse(rect, fill=color)
43 |     plt.figure()
44 |     plt.imshow(image)
45 |     plt.show()
46 | 


--------------------------------------------------------------------------------
/utils/io_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import tensorflow as tf
 4 | from datetime import datetime
 5 | 
 6 | def get_log_path(custom_prefix=""):
 7 |     """Generating log path for tensorboard.
 8 |     inputs:
 9 |         custom_prefix = any custom string for log folder name
10 | 
11 |     outputs:
12 |         log_path = tensorboard log path, for example: "logs/{custom_prefix}{date}"
13 |     """
14 |     return "logs/{}{}".format(custom_prefix, datetime.now().strftime("%Y%m%d-%H%M%S"))
15 | 
16 | def get_model_path():
17 |     """Generating model path for save/load model weights.
18 | 
19 |     outputs:
20 |         model_path = os model path, for example: "models/blazeface_model_weights.h5"
21 |     """
22 |     main_path = "trained"
23 |     if not os.path.exists(main_path):
24 |         os.makedirs(main_path)
25 |     model_path = os.path.join(main_path, "blazeface_model_weights.h5")
26 |     return model_path
27 | 
28 | def handle_args():
29 |     """Handling of command line arguments using argparse library.
30 | 
31 |     outputs:
32 |         args = parsed command line arguments
33 |     """
34 |     parser = argparse.ArgumentParser(description="BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs Implementation")
35 |     parser.add_argument("-handle-gpu", action="store_true", help="Tensorflow 2 GPU compatibility flag")
36 |     args = parser.parse_args()
37 |     return args
38 | 
39 | def handle_gpu_compatibility():
40 |     """Handling of GPU issues for cuDNN initialize error and memory issues."""
41 |     try:
42 |         gpus = tf.config.experimental.list_physical_devices("GPU")
43 |         for gpu in gpus:
44 |             tf.config.experimental.set_memory_growth(gpu, True)
45 |     except Exception as e:
46 |         print(e)
47 | 


--------------------------------------------------------------------------------
/utils/landmark_utils.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | def renormalize_landmarks_with_min_max(landmarks, min_max):
 4 |     """Renormalizing given bounding boxes to the new boundaries.
 5 |     r = (x - min) / (max - min)
 6 |     outputs:
 7 |         landmarks = (total_count, total_landmarks, [x, y])
 8 |         min_max = ([y_min, x_min, y_max, x_max])
 9 |     """
10 |     y_min, x_min, y_max, x_max = tf.split(min_max, 4)
11 |     renomalized_landmarks = landmarks - tf.concat([x_min, y_min], -1)
12 |     renomalized_landmarks /= tf.concat([x_max-x_min, y_max-y_min], -1)
13 |     return tf.clip_by_value(renomalized_landmarks, 0, 1)
14 | 
15 | def normalize_landmarks(landmarks, height, width):
16 |     """Normalizing landmarks.
17 |     inputs:
18 |         landmarks = (M, N, [x, y])
19 |         height = image height
20 |         width = image width
21 | 
22 |     outputs:
23 |         normalized_landmarks = (M, N, [x, y])
24 |             in normalized form [0, 1]
25 |     """
26 |     return landmarks / tf.cast([width, height], tf.float32)
27 | 
28 | def denormalize_landmarks(landmarks, height, width):
29 |     """Denormalizing landmarks.
30 |     inputs:
31 |         landmarks = (M, N, [x, y])
32 |             in normalized form [0, 1]
33 |         height = image height
34 |         width = image width
35 | 
36 |     outputs:
37 |         denormalized_landmarks = (M, N, [x, y])
38 |     """
39 |     return tf.round(landmarks * tf.cast([width, height], tf.float32))
40 | 


--------------------------------------------------------------------------------
/utils/train_utils.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import math
  3 | from utils import bbox_utils
  4 | 
  5 | def get_hyper_params(**kwargs):
  6 |     """Generating hyper params in a dynamic way.
  7 |     inputs:
  8 |         **kwargs = any value could be updated in the hyper_params
  9 | 
 10 |     outputs:
 11 |         hyper_params = dictionary
 12 |     """
 13 |     hyper_params = {
 14 |         "img_size": 128,
 15 |         "feature_map_shapes": [16, 8, 8, 8],
 16 |         "aspect_ratios": [[1.], [1.], [1.], [1.]],
 17 |         "detections_per_layer": [2, 6],
 18 |         "total_landmarks": 6,
 19 |         "iou_threshold": 0.5,
 20 |         "neg_pos_ratio": 3,
 21 |         "loc_loss_alpha": 1,
 22 |         "variances": [0.1, 0.1, 0.2, 0.2],
 23 |     }
 24 |     for key, value in kwargs.items():
 25 |         if key in hyper_params and value:
 26 |             hyper_params[key] = value
 27 |     #
 28 |     return hyper_params
 29 | 
 30 | def scheduler(epoch):
 31 |     """Generating learning rate value for a given epoch.
 32 |     inputs:
 33 |         epoch = number of current epoch
 34 | 
 35 |     outputs:
 36 |         learning_rate = float learning rate value
 37 |     """
 38 |     if epoch < 100:
 39 |         return 1e-3
 40 |     elif epoch < 125:
 41 |         return 1e-4
 42 |     else:
 43 |         return 1e-5
 44 | 
 45 | def get_step_size(total_items, batch_size):
 46 |     """Get step size for given total item size and batch size.
 47 |     inputs:
 48 |         total_items = number of total items
 49 |         batch_size = number of batch size during training or validation
 50 | 
 51 |     outputs:
 52 |         step_size = number of step size for model training
 53 |     """
 54 |     return math.ceil(total_items / batch_size)
 55 | 
 56 | def generator(dataset, prior_boxes, hyper_params):
 57 |     """Tensorflow data generator for fit method, yielding inputs and outputs.
 58 |     inputs:
 59 |         dataset = tf.data.Dataset, PaddedBatchDataset
 60 |         prior_boxes = (total_bboxes, [center_x, center_y, width, height])
 61 |             these values in normalized format between [0, 1]
 62 |         hyper_params = dictionary
 63 | 
 64 |     outputs:
 65 |         yield inputs, outputs
 66 |     """
 67 |     while True:
 68 |         for image_data in dataset:
 69 |             img, gt_boxes, gt_landmarks = image_data
 70 |             actual_deltas, actual_labels = calculate_actual_outputs(prior_boxes, gt_boxes, gt_landmarks, hyper_params)
 71 |             yield img, (actual_deltas, actual_labels)
 72 | 
 73 | def calculate_actual_outputs(prior_boxes, gt_boxes, gt_landmarks, hyper_params):
 74 |     """Calculate ssd actual output values.
 75 |     Batch operations supported.
 76 |     inputs:
 77 |         prior_boxes = (total_bboxes, [center_x, center_y, width, height])
 78 |             these values in normalized format between [0, 1]
 79 |         gt_boxes = (batch_size, gt_box_size, [y1, x1, y2, x2])
 80 |             these values in normalized format between [0, 1]
 81 |         gt_landmarks = (batch_size, gt_box_size, total_landmarks, [x, y])
 82 |             these values in normalized format between [0, 1]
 83 |         hyper_params = dictionary
 84 | 
 85 |     outputs:
 86 |         actual_deltas = (batch_size, total_bboxes, [delta_bbox_y, delta_bbox_x, delta_bbox_h, delta_bbox_w, delta_landmark_x0, delta_landmark_y0, ..., delta_landmark_xN, delta_landmark_yN])
 87 |         actual_labels = (batch_size, total_bboxes, [1 or 0])
 88 |     """
 89 |     batch_size = tf.shape(gt_boxes)[0]
 90 |     iou_threshold = hyper_params["iou_threshold"]
 91 |     variances = hyper_params["variances"]
 92 |     total_landmarks = hyper_params["total_landmarks"]
 93 |     landmark_variances = total_landmarks * variances[0:2]
 94 |     # Calculate iou values between each bboxes and ground truth boxes
 95 |     iou_map = bbox_utils.generate_iou_map(bbox_utils.convert_xywh_to_bboxes(prior_boxes), gt_boxes)
 96 |     # Get max index value for each row
 97 |     max_indices_each_gt_box = tf.argmax(iou_map, axis=2, output_type=tf.int32)
 98 |     # IoU map has iou values for every gt boxes and we merge these values column wise
 99 |     merged_iou_map = tf.reduce_max(iou_map, axis=2)
100 |     #
101 |     pos_cond = tf.greater(merged_iou_map, iou_threshold)
102 |     #
103 |     gt_landmarks = tf.reshape(gt_landmarks, (batch_size, -1, total_landmarks * 2))
104 |     gt_boxes_and_landmarks = tf.concat([gt_boxes, gt_landmarks], -1)
105 |     gt_boxes_and_landmarks_map = tf.gather(gt_boxes_and_landmarks, max_indices_each_gt_box, batch_dims=1)
106 |     expanded_gt_boxes_and_landmarks = tf.where(tf.expand_dims(pos_cond, -1), gt_boxes_and_landmarks_map, tf.zeros_like(gt_boxes_and_landmarks_map))
107 |     actual_deltas = bbox_utils.get_deltas_from_bboxes_and_landmarks(prior_boxes, expanded_gt_boxes_and_landmarks) / (variances + landmark_variances)
108 |     #
109 |     actual_labels = tf.expand_dims(tf.cast(pos_cond, dtype=tf.float32), -1)
110 |     #
111 |     return actual_deltas, actual_labels
112 | 


--------------------------------------------------------------------------------