├── .gitignore ├── LICENSE ├── README.md ├── augmentation.py ├── blazeface.py ├── environment.yml ├── predictor.py ├── ssd_loss.py ├── trainer.py └── utils ├── __init__.py ├── bbox_utils.py ├── data_utils.py ├── drawing_utils.py ├── io_utils.py ├── landmark_utils.py └── train_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | trained/* 3 | data/* 4 | logs/* 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BlazeFace 2 | 3 | This is **unofficial** tensorflow blazeface implementation from scratch. 4 | This repo includes the entire training pipeline of blazeface. 5 | However, since the dataset used in the training process is a modified version of some datasets, it is not shared at this stage. 6 | Anchor / prior box hyperparameters were taken from the [MediaPipe](https://github.com/google/mediapipe) implementation. 7 | Loss calculation and augmentation methods were implemented as in [SSD](https://github.com/FurkanOM/tf-ssd). 8 | 9 | It's implemented and tested with **tensorflow 2.0, 2.1, and 2.2** 10 | 11 | ## Usage 12 | 13 | Project models created in virtual environment using [miniconda](https://docs.conda.io/en/latest/miniconda.html). 14 | You can also create required virtual environment with [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file). 15 | 16 | To create virtual environment (tensorflow-2 gpu environment): 17 | 18 | ```sh 19 | conda env create -f environment.yml 20 | ``` 21 | 22 | To train and test BlazeFace model: 23 | 24 | ```sh 25 | python trainer.py 26 | python predictor.py 27 | ``` 28 | 29 | If you have GPU issues you can use **-handle-gpu** flag with these commands: 30 | 31 | ```sh 32 | python trainer.py -handle-gpu 33 | ``` 34 | 35 | ### References 36 | 37 | * BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs [[paper]](https://arxiv.org/abs/1907.05047) 38 | * SSD: Single Shot MultiBox Detector [[paper]](https://arxiv.org/abs/1512.02325) 39 | * MediaPipe [[code]](https://github.com/google/mediapipe) 40 | * BlazeFace-PyTorch [[code]](https://github.com/hollance/BlazeFace-PyTorch) 41 | -------------------------------------------------------------------------------- /augmentation.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from utils import bbox_utils, landmark_utils 3 | 4 | def apply(img, gt_boxes, gt_landmarks): 5 | """Randomly applying data augmentation methods to image and ground truth boxes. 6 | inputs: 7 | img = (height, width, depth) 8 | gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2]) 9 | in normalized form [0, 1] 10 | gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y]) 11 | in normalized form [0, 1] 12 | outputs: 13 | modified_img = (final_height, final_width, depth) 14 | modified_gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2]) 15 | in normalized form [0, 1] 16 | modified_gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y]) 17 | in normalized form [0, 1] 18 | """ 19 | # Color operations 20 | # Randomly change hue, saturation, brightness and contrast of image 21 | color_methods = [random_brightness, random_contrast, random_hue, random_saturation] 22 | # Geometric operations 23 | # Randomly sample a patch image and ground truth boxes 24 | geometric_methods = [patch] 25 | # 26 | for augmentation_method in geometric_methods + color_methods: 27 | img, gt_boxes, gt_landmarks = randomly_apply_operation(augmentation_method, img, gt_boxes, gt_landmarks) 28 | # 29 | img = tf.clip_by_value(img, 0., 1.) 30 | return img, gt_boxes, gt_landmarks 31 | 32 | def get_random_bool(): 33 | """Generating random boolean. 34 | outputs: 35 | random boolean 0d tensor 36 | """ 37 | return tf.greater(tf.random.uniform((), dtype=tf.float32), 0.5) 38 | 39 | def randomly_apply_operation(operation, img, gt_boxes, gt_landmarks, *args): 40 | """Randomly applying given method to image and ground truth boxes. 41 | inputs: 42 | operation = callable method 43 | img = (height, width, depth) 44 | gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2]) 45 | gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y]) 46 | outputs: 47 | modified_or_not_img = (final_height, final_width, depth) 48 | modified_or_not_gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2]) 49 | modified_or_not_gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y]) 50 | """ 51 | return tf.cond( 52 | get_random_bool(), 53 | lambda: operation(img, gt_boxes, gt_landmarks, *args), 54 | lambda: (img, gt_boxes, gt_landmarks) 55 | ) 56 | 57 | def random_brightness(img, gt_boxes, gt_landmarks, max_delta=0.12): 58 | """Randomly change brightness of the image. 59 | inputs: 60 | img = (height, width, depth) 61 | gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2]) 62 | gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y]) 63 | outputs: 64 | modified_img = (height, width, depth) 65 | gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2]) 66 | gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y]) 67 | """ 68 | return tf.image.random_brightness(img, max_delta), gt_boxes, gt_landmarks 69 | 70 | def random_contrast(img, gt_boxes, gt_landmarks, lower=0.5, upper=1.5): 71 | """Randomly change contrast of the image. 72 | inputs: 73 | img = (height, width, depth) 74 | gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2]) 75 | gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y]) 76 | outputs: 77 | modified_img = (height, width, depth) 78 | gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2]) 79 | gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y]) 80 | """ 81 | return tf.image.random_contrast(img, lower, upper), gt_boxes, gt_landmarks 82 | 83 | def random_hue(img, gt_boxes, gt_landmarks, max_delta=0.08): 84 | """Randomly change hue of the image. 85 | inputs: 86 | img = (height, width, depth) 87 | gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2]) 88 | gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y]) 89 | outputs: 90 | modified_img = (height, width, depth) 91 | gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2]) 92 | gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y]) 93 | """ 94 | return tf.image.random_hue(img, max_delta), gt_boxes, gt_landmarks 95 | 96 | def random_saturation(img, gt_boxes, gt_landmarks, lower=0.5, upper=1.5): 97 | """Randomly change saturation of the image. 98 | inputs: 99 | img = (height, width, depth) 100 | gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2]) 101 | gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y]) 102 | outputs: 103 | modified_img = (height, width, depth) 104 | gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2]) 105 | gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y]) 106 | """ 107 | return tf.image.random_saturation(img, lower, upper), gt_boxes, gt_landmarks 108 | 109 | ############################################################################## 110 | ## Sample patch start 111 | ############################################################################## 112 | 113 | def get_random_min_overlap(): 114 | """Generating random minimum overlap value. 115 | outputs: 116 | min_overlap = random minimum overlap value 0d tensor 117 | """ 118 | overlaps = tf.constant([0.1, 0.3, 0.5, 0.7, 0.9], dtype=tf.float32) 119 | i = tf.random.uniform((), minval=0, maxval=tf.shape(overlaps)[0], dtype=tf.int32) 120 | return overlaps[i] 121 | 122 | def expand_image(img, gt_boxes, gt_landmarks, height, width): 123 | """Randomly expanding image and adjusting ground truth object coordinates. 124 | inputs: 125 | img = (height, width, depth) 126 | gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2]) 127 | gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y]) 128 | height = height of the image 129 | width = width of the image 130 | outputs: 131 | modified_img = (final_height, final_width, depth) 132 | modified_gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2]) 133 | modified_gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y]) 134 | """ 135 | expansion_ratio = tf.random.uniform((), minval=1, maxval=4, dtype=tf.float32) 136 | final_height, final_width = tf.round(height * expansion_ratio), tf.round(width * expansion_ratio) 137 | pad_left = tf.round(tf.random.uniform((), minval=0, maxval=final_width - width, dtype=tf.float32)) 138 | pad_top = tf.round(tf.random.uniform((), minval=0, maxval=final_height - height, dtype=tf.float32)) 139 | pad_right = final_width - (width + pad_left) 140 | pad_bottom = final_height - (height + pad_top) 141 | # 142 | mean, _ = tf.nn.moments(img, [0, 1]) 143 | expanded_image = tf.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0,0)), constant_values=-1) 144 | expanded_image = tf.where(expanded_image == -1, mean, expanded_image) 145 | # 146 | min_max = tf.stack([-pad_top, -pad_left, pad_bottom+height, pad_right+width], -1) / [height, width, height, width] 147 | modified_gt_boxes = bbox_utils.renormalize_bboxes_with_min_max(gt_boxes, min_max) 148 | modified_gt_landmarks = landmark_utils.renormalize_landmarks_with_min_max(gt_landmarks, min_max) 149 | # 150 | return expanded_image, modified_gt_boxes, modified_gt_landmarks 151 | 152 | def patch(img, gt_boxes, gt_landmarks): 153 | """Generating random patch and adjusting image and ground truth objects to this patch. 154 | After this operation some of the ground truth boxes / objects could be removed from the image. 155 | However, these objects are not excluded from the output, only the coordinates are changed as zero. 156 | inputs: 157 | img = (height, width, depth) 158 | gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2]) 159 | in normalized form [0, 1] 160 | gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y]) 161 | in normalized form [0, 1] 162 | outputs: 163 | modified_img = (final_height, final_width, depth) 164 | modified_gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2]) 165 | in normalized form [0, 1] 166 | modified_gt_landmarks = (ground_truth_object_count, total_landmarks, [x, y]) 167 | in normalized form [0, 1] 168 | """ 169 | img_shape = tf.cast(tf.shape(img), dtype=tf.float32) 170 | org_height, org_width = img_shape[0], img_shape[1] 171 | # Randomly expand image and adjust bounding boxes 172 | img, gt_boxes, gt_landmarks = randomly_apply_operation(expand_image, img, gt_boxes, gt_landmarks, org_height, org_width) 173 | # Get random minimum overlap value 174 | min_overlap = get_random_min_overlap() 175 | # 176 | begin, size, new_boundaries = tf.image.sample_distorted_bounding_box( 177 | tf.shape(img), 178 | bounding_boxes=tf.expand_dims(gt_boxes, 0), 179 | min_object_covered=min_overlap) 180 | # 181 | img = tf.slice(img, begin, size) 182 | img = tf.image.resize(img, (org_height, org_width)) 183 | gt_boxes = bbox_utils.renormalize_bboxes_with_min_max(gt_boxes, new_boundaries[0, 0]) 184 | gt_landmarks = landmark_utils.renormalize_landmarks_with_min_max(gt_landmarks, new_boundaries[0, 0]) 185 | # 186 | return img, gt_boxes, gt_landmarks 187 | -------------------------------------------------------------------------------- /blazeface.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras.models import Model 3 | from tensorflow.keras.layers import Layer, Input, DepthwiseConv2D, Conv2D, MaxPool2D, Add, Activation 4 | 5 | class HeadWrapper(Layer): 6 | """Merging all feature maps for detections. 7 | inputs: 8 | conv4_3 = (batch_size, (layer_shape x aspect_ratios), last_dimension) 9 | ssd300 conv4_3 shape => (38 x 38 x 4) = 5776 10 | conv7 = (batch_size, (layer_shape x aspect_ratios), last_dimension) 11 | ssd300 conv7 shape => (19 x 19 x 6) = 2166 12 | conv8_2 = (batch_size, (layer_shape x aspect_ratios), last_dimension) 13 | ssd300 conv8_2 shape => (10 x 10 x 6) = 600 14 | conv9_2 = (batch_size, (layer_shape x aspect_ratios), last_dimension) 15 | ssd300 conv9_2 shape => (5 x 5 x 6) = 150 16 | conv10_2 = (batch_size, (layer_shape x aspect_ratios), last_dimension) 17 | ssd300 conv10_2 shape => (3 x 3 x 4) = 36 18 | conv11_2 = (batch_size, (layer_shape x aspect_ratios), last_dimension) 19 | ssd300 conv11_2 shape => (1 x 1 x 4) = 4 20 | Total = 8732 default box 21 | 22 | outputs: 23 | merged_head = (batch_size, total_bboxes, last_dimension) 24 | """ 25 | 26 | def __init__(self, last_dimension, **kwargs): 27 | super(HeadWrapper, self).__init__(**kwargs) 28 | self.last_dimension = last_dimension 29 | 30 | def get_config(self): 31 | config = super(HeadWrapper, self).get_config() 32 | config.update({"last_dimension": self.last_dimension}) 33 | return config 34 | 35 | def call(self, inputs): 36 | last_dimension = self.last_dimension 37 | batch_size = tf.shape(inputs[0])[0] 38 | outputs = [] 39 | for conv_layer in inputs: 40 | outputs.append(tf.reshape(conv_layer, (batch_size, -1, last_dimension))) 41 | # 42 | return tf.concat(outputs, axis=1) 43 | 44 | def blaze_block(input, filters, stride=1): 45 | y = input 46 | x = DepthwiseConv2D((5,5), strides=stride, padding="same")(input) 47 | x = Conv2D(filters, (1,1), padding="same")(x) 48 | if stride == 2: 49 | y = MaxPool2D((2,2))(y) 50 | y = Conv2D(filters, (1,1), padding="same")(y) 51 | output = Add()([x, y]) 52 | return Activation("relu")(output) 53 | 54 | def double_blaze_block(input, filters, stride=1): 55 | y = input 56 | x = DepthwiseConv2D((5,5), strides=stride, padding="same")(input) 57 | x = Conv2D(filters[0], (1,1), padding="same")(x) 58 | x = Activation("relu")(x) 59 | x = DepthwiseConv2D((5,5), padding="same")(x) 60 | x = Conv2D(filters[1], (1,1), padding="same")(x) 61 | if stride == 2: 62 | y = MaxPool2D((2,2))(y) 63 | y = Conv2D(filters[1], (1,1), padding="same")(y) 64 | output = Add()([x, y]) 65 | return Activation("relu")(output) 66 | 67 | def get_model(hyper_params): 68 | detections_per_layer = hyper_params["detections_per_layer"] 69 | img_size = hyper_params["img_size"] 70 | total_reg_points = hyper_params["total_landmarks"] * 2 + 4 71 | # 72 | input = Input(shape=(None, None, 3)) 73 | # First conv layer 74 | first_conv = Conv2D(24, (5,5), strides=2, padding="same", activation="relu")(input) 75 | # First blaze block 76 | single_1 = blaze_block(first_conv, 24) 77 | # Second blaze block 78 | single_2 = blaze_block(single_1, 24) 79 | # Third blaze block 80 | single_3 = blaze_block(single_2, 48, 2) 81 | # Fourth blaze block 82 | single_4 = blaze_block(single_3, 48) 83 | # Fifth blaze block 84 | single_5 = blaze_block(single_4, 48) 85 | # First double blaze block 86 | double_1 = double_blaze_block(single_5, [24, 96], 2) 87 | # Second double blaze block 88 | double_2 = double_blaze_block(double_1, [24, 96]) 89 | # Third double blaze block 90 | double_3 = double_blaze_block(double_2, [24, 96]) 91 | # Fourth double blaze block 92 | double_4 = double_blaze_block(double_3, [24, 96], 2) 93 | # Fifth double blaze block 94 | double_5 = double_blaze_block(double_4, [24, 96]) 95 | # Sixth double blaze block 96 | double_6 = double_blaze_block(double_5, [24, 96]) 97 | # 98 | double_3_labels = Conv2D(detections_per_layer[0], (3, 3), padding="same")(double_3) 99 | double_6_labels = Conv2D(detections_per_layer[1], (3, 3), padding="same")(double_6) 100 | # 101 | double_3_boxes = Conv2D(detections_per_layer[0] * total_reg_points, (3, 3), padding="same")(double_3) 102 | double_6_boxes = Conv2D(detections_per_layer[1] * total_reg_points, (3, 3), padding="same")(double_6) 103 | # 104 | pred_labels = HeadWrapper(1, name="conf_head")([double_3_labels, double_6_labels]) 105 | pred_labels = Activation("sigmoid", name="conf")(pred_labels) 106 | pred_deltas = HeadWrapper(total_reg_points, name="loc")([double_3_boxes, double_6_boxes]) 107 | # 108 | return Model(inputs=input, outputs=[pred_deltas, pred_labels]) 109 | 110 | def init_model(model): 111 | """Initializing model with dummy data for load weights with optimizer state and also graph construction. 112 | inputs: 113 | model = tf.keras.model 114 | 115 | """ 116 | model(tf.random.uniform((1, 512, 512, 3))) 117 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: blazeface-env 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex=0.1=main 6 | - _tflow_select=2.1.0=gpu 7 | - absl-py=0.9.0=py37_0 8 | - asn1crypto=1.3.0=py37_0 9 | - astor=0.8.0=py37_0 10 | - blas=1.0=mkl 11 | - blinker=1.4=py37_0 12 | - c-ares=1.15.0=h7b6447c_1001 13 | - ca-certificates=2020.1.1=0 14 | - cachetools=3.1.1=py_0 15 | - certifi=2019.11.28=py37_1 16 | - cffi=1.14.0=py37h2e261b9_0 17 | - chardet=3.0.4=py37_1003 18 | - click=7.1.1=py_0 19 | - cryptography=2.8=py37h1ba5d50_0 20 | - cudatoolkit=10.0.130=0 21 | - cudnn=7.6.5=cuda10.0_0 22 | - cupti=10.0.130=0 23 | - gast=0.2.2=py37_0 24 | - google-auth=1.11.2=py_0 25 | - google-auth-oauthlib=0.4.1=py_2 26 | - google-pasta=0.2.0=py_0 27 | - grpcio=1.27.2=py37hf8bcb03_0 28 | - h5py=2.10.0=py37h7918eee_0 29 | - hdf5=1.10.4=hb1b8bf9_0 30 | - idna=2.9=py_1 31 | - intel-openmp=2020.0=166 32 | - keras-applications=1.0.8=py_0 33 | - keras-preprocessing=1.1.0=py_1 34 | - ld_impl_linux-64=2.33.1=h53a641e_7 35 | - libedit=3.1.20181209=hc058e9b_0 36 | - libffi=3.2.1=hd88cf55_4 37 | - libgcc-ng=9.1.0=hdf63c60_0 38 | - libgfortran-ng=7.3.0=hdf63c60_0 39 | - libprotobuf=3.11.4=hd408876_0 40 | - libstdcxx-ng=9.1.0=hdf63c60_0 41 | - markdown=3.1.1=py37_0 42 | - mkl=2020.0=166 43 | - mkl-service=2.3.0=py37he904b0f_0 44 | - mkl_fft=1.0.15=py37ha843d7b_0 45 | - mkl_random=1.1.0=py37hd6b4f25_0 46 | - ncurses=6.2=he6710b0_0 47 | - numpy=1.18.1=py37h4f9e942_0 48 | - numpy-base=1.18.1=py37hde5b4d6_1 49 | - oauthlib=3.1.0=py_0 50 | - openssl=1.1.1f=h7b6447c_0 51 | - opt_einsum=3.1.0=py_0 52 | - pip=20.0.2=py37_1 53 | - protobuf=3.11.4=py37he6710b0_0 54 | - pyasn1=0.4.8=py_0 55 | - pyasn1-modules=0.2.7=py_0 56 | - pycparser=2.20=py_0 57 | - pyjwt=1.7.1=py37_0 58 | - pyopenssl=19.1.0=py37_0 59 | - pysocks=1.7.1=py37_0 60 | - python=3.7.7=hcf32534_0_cpython 61 | - readline=8.0=h7b6447c_0 62 | - requests=2.23.0=py37_0 63 | - requests-oauthlib=1.3.0=py_0 64 | - rsa=4.0=py_0 65 | - scipy=1.4.1=py37h0b6359f_0 66 | - setuptools=46.1.3=py37_0 67 | - six=1.14.0=py37_0 68 | - sqlite=3.31.1=h7b6447c_0 69 | - tensorboard=2.1.0=py3_0 70 | - tensorflow=2.0.0=gpu_py37h768510d_0 71 | - tensorflow-base=2.0.0=gpu_py37h0ec5d1f_0 72 | - tensorflow-estimator=2.0.0=pyh2649769_0 73 | - tensorflow-gpu=2.0.0=h0d30ee6_0 74 | - termcolor=1.1.0=py37_1 75 | - tk=8.6.8=hbc83047_0 76 | - urllib3=1.25.8=py37_0 77 | - werkzeug=0.16.1=py_0 78 | - wheel=0.34.2=py37_0 79 | - wrapt=1.12.1=py37h7b6447c_1 80 | - xz=5.2.4=h14c3975_4 81 | - zlib=1.2.11=h7b6447c_3 82 | - pip: 83 | - attrs==19.3.0 84 | - cycler==0.10.0 85 | - dill==0.3.1.1 86 | - future==0.18.2 87 | - googleapis-common-protos==1.51.0 88 | - kiwisolver==1.2.0 89 | - matplotlib==3.2.1 90 | - pillow==7.1.1 91 | - promise==2.3 92 | - pyparsing==2.4.6 93 | - python-dateutil==2.8.1 94 | - tensorflow-datasets==2.1.0 95 | - tensorflow-metadata==0.21.1 96 | - tqdm==4.45.0 97 | prefix: /home/furkan/miniconda3/envs/blazeface-env 98 | -------------------------------------------------------------------------------- /predictor.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from utils import bbox_utils, data_utils, drawing_utils, io_utils, train_utils, landmark_utils 3 | import blazeface 4 | 5 | args = io_utils.handle_args() 6 | if args.handle_gpu: 7 | io_utils.handle_gpu_compatibility() 8 | 9 | batch_size = 1 10 | use_custom_images = False 11 | custom_image_path = "data/images/" 12 | hyper_params = train_utils.get_hyper_params() 13 | img_size = hyper_params["img_size"] 14 | 15 | data_types = data_utils.get_data_types() 16 | data_shapes = data_utils.get_data_shapes() 17 | padding_values = data_utils.get_padding_values() 18 | 19 | if use_custom_images: 20 | img_paths = data_utils.get_custom_imgs(custom_image_path) 21 | total_items = len(img_paths) 22 | test_data = tf.data.Dataset.from_generator(lambda: data_utils.custom_data_generator( 23 | img_paths, img_size, img_size), data_types, data_shapes) 24 | else: 25 | test_split = "train[80%:]" 26 | test_data, info = data_utils.get_dataset("the300w_lp", test_split) 27 | total_items = data_utils.get_total_item_size(info, test_split) 28 | test_data = test_data.map(lambda x: data_utils.preprocessing(x, img_size, img_size)) 29 | # 30 | test_data = test_data.padded_batch(batch_size, padded_shapes=data_shapes, padding_values=padding_values) 31 | 32 | model = blazeface.get_model(hyper_params) 33 | model_path = io_utils.get_model_path() 34 | model.load_weights(model_path) 35 | 36 | prior_boxes = bbox_utils.generate_prior_boxes(hyper_params["feature_map_shapes"], hyper_params["aspect_ratios"]) 37 | 38 | variances = hyper_params["variances"] 39 | total_landmarks = hyper_params["total_landmarks"] 40 | landmark_variances = total_landmarks * variances[0:2] 41 | variances += landmark_variances 42 | 43 | for image_data in test_data: 44 | img, _, _ = image_data 45 | pred_deltas, pred_scores = model.predict_on_batch(img) 46 | pred_deltas *= variances 47 | # 48 | pred_bboxes_and_landmarks = bbox_utils.get_bboxes_and_landmarks_from_deltas(prior_boxes, pred_deltas) 49 | pred_bboxes_and_landmarks = tf.clip_by_value(pred_bboxes_and_landmarks, 0, 1) 50 | # 51 | pred_scores = tf.cast(pred_scores, tf.float32) 52 | # 53 | weighted_suppressed_data = bbox_utils.weighted_suppression(pred_scores[0], pred_bboxes_and_landmarks[0]) 54 | # 55 | weighted_bboxes = weighted_suppressed_data[..., 0:4] 56 | weighted_landmarks = weighted_suppressed_data[..., 4:] 57 | # 58 | denormalized_bboxes = bbox_utils.denormalize_bboxes(weighted_bboxes, img_size, img_size) 59 | weighted_landmarks = tf.reshape(weighted_landmarks, (-1, total_landmarks, 2)) 60 | denormalized_landmarks = landmark_utils.denormalize_landmarks(weighted_landmarks, img_size, img_size) 61 | drawing_utils.draw_bboxes_with_landmarks(img[0], denormalized_bboxes, denormalized_landmarks) 62 | -------------------------------------------------------------------------------- /ssd_loss.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | class CustomLoss(object): 4 | def __init__(self, neg_pos_ratio, loc_loss_alpha): 5 | self.neg_pos_ratio = tf.constant(neg_pos_ratio, dtype=tf.float32) 6 | self.loc_loss_alpha = tf.constant(loc_loss_alpha, dtype=tf.float32) 7 | 8 | def loc_loss_fn(self, actual_bbox_deltas, pred_bbox_deltas): 9 | """Calculating SSD localization loss value for only positive samples. 10 | inputs: 11 | actual_bbox_deltas = (batch_size, total_bboxes, [delta_bbox_y, delta_bbox_x, delta_bbox_h, delta_bbox_w, delta_landmark_x0, delta_landmark_y0, ..., delta_landmark_xN, delta_landmark_yN]) 12 | pred_bbox_deltas = (batch_size, total_bboxes, [delta_bbox_y, delta_bbox_x, delta_bbox_h, delta_bbox_w, delta_landmark_x0, delta_landmark_y0, ..., delta_landmark_xN, delta_landmark_yN]) 13 | 14 | outputs: 15 | loc_loss = localization / regression / bounding box loss value 16 | """ 17 | total_reg_points = tf.shape(actual_bbox_deltas)[-1] 18 | # Localization / bbox / regression loss calculation for all bboxes 19 | loc_loss_fn = tf.losses.Huber(reduction=tf.losses.Reduction.NONE) 20 | loc_loss_for_all = loc_loss_fn(actual_bbox_deltas, pred_bbox_deltas) 21 | # After tf 2.2.0 version, the huber calculates mean over the last axis 22 | loc_loss_for_all = tf.cond(tf.greater(tf.rank(loc_loss_for_all), tf.constant(2)), 23 | lambda: tf.reduce_sum(loc_loss_for_all, axis=-1), 24 | lambda: loc_loss_for_all * tf.cast(total_reg_points, dtype=tf.float32)) 25 | # 26 | pos_cond = tf.reduce_any(tf.not_equal(actual_bbox_deltas, tf.constant(0.0)), axis=2) 27 | pos_mask = tf.cast(pos_cond, dtype=tf.float32) 28 | total_pos_bboxes = tf.reduce_sum(pos_mask, axis=1) 29 | # 30 | loc_loss = tf.reduce_sum(pos_mask * loc_loss_for_all, axis=-1) 31 | total_pos_bboxes = tf.where(tf.equal(total_pos_bboxes, tf.constant(0.0)), tf.constant(1.0), total_pos_bboxes) 32 | loc_loss = loc_loss / total_pos_bboxes 33 | # 34 | return loc_loss * self.loc_loss_alpha 35 | 36 | def conf_loss_fn(self, actual_labels, pred_labels): 37 | """Calculating SSD confidence loss value by performing hard negative mining as mentioned in the paper. 38 | inputs: 39 | actual_labels = (batch_size, total_bboxes, 1) 40 | pred_labels = (batch_size, total_bboxes, 1) 41 | 42 | outputs: 43 | conf_loss = confidence / class / label loss value 44 | """ 45 | # Confidence / Label loss calculation for all labels 46 | conf_loss_fn = tf.losses.BinaryCrossentropy(reduction=tf.losses.Reduction.NONE) 47 | conf_loss_for_all = conf_loss_fn(actual_labels, pred_labels) 48 | # 49 | squeezed_actual_labels = tf.squeeze(actual_labels, -1) 50 | pos_cond = tf.not_equal(squeezed_actual_labels, tf.constant(0.0)) 51 | pos_mask = tf.cast(pos_cond, dtype=tf.float32) 52 | total_pos_bboxes = tf.reduce_sum(pos_mask, axis=1) 53 | # Hard negative mining 54 | total_neg_bboxes = tf.cast(total_pos_bboxes * self.neg_pos_ratio, tf.int32) 55 | # 56 | masked_loss = tf.where(tf.equal(squeezed_actual_labels, tf.constant(0.0)), conf_loss_for_all, tf.zeros_like(conf_loss_for_all, dtype=tf.float32)) 57 | sorted_loss = tf.argsort(masked_loss, direction="DESCENDING") 58 | sorted_loss = tf.argsort(sorted_loss) 59 | neg_cond = tf.less(sorted_loss, tf.expand_dims(total_neg_bboxes, axis=1)) 60 | neg_mask = tf.cast(neg_cond, dtype=tf.float32) 61 | # 62 | final_mask = pos_mask + neg_mask 63 | conf_loss = tf.reduce_sum(final_mask * conf_loss_for_all, axis=-1) 64 | total_pos_bboxes = tf.where(tf.equal(total_pos_bboxes, tf.constant(0.0)), tf.constant(1.0), total_pos_bboxes) 65 | conf_loss = conf_loss / total_pos_bboxes 66 | # 67 | return conf_loss 68 | -------------------------------------------------------------------------------- /trainer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, LearningRateScheduler 3 | from tensorflow.keras.optimizers import SGD, Adam 4 | import augmentation 5 | from ssd_loss import CustomLoss 6 | from utils import bbox_utils, data_utils, io_utils, train_utils, drawing_utils, landmark_utils 7 | import blazeface 8 | import random 9 | 10 | args = io_utils.handle_args() 11 | if args.handle_gpu: 12 | io_utils.handle_gpu_compatibility() 13 | 14 | batch_size = 32 15 | epochs = 150 16 | load_weights = False 17 | hyper_params = train_utils.get_hyper_params() 18 | 19 | train_split = "train[:80%]" 20 | val_split = "train[80%:]" 21 | train_data, info = data_utils.get_dataset("the300w_lp", train_split) 22 | val_data, _ = data_utils.get_dataset("the300w_lp", val_split) 23 | train_total_items = data_utils.get_total_item_size(info, train_split) 24 | val_total_items = data_utils.get_total_item_size(info, val_split) 25 | # 26 | img_size = hyper_params["img_size"] 27 | 28 | train_data = train_data.map(lambda x : data_utils.preprocessing(x, img_size, img_size, augmentation.apply)) 29 | val_data = val_data.map(lambda x : data_utils.preprocessing(x, img_size, img_size)) 30 | # 31 | data_shapes = data_utils.get_data_shapes() 32 | padding_values = data_utils.get_padding_values() 33 | train_data = train_data.shuffle(batch_size*12).padded_batch(batch_size, padded_shapes=data_shapes, padding_values=padding_values) 34 | val_data = val_data.padded_batch(batch_size, padded_shapes=data_shapes, padding_values=padding_values) 35 | # 36 | model = blazeface.get_model(hyper_params) 37 | custom_losses = CustomLoss(hyper_params["neg_pos_ratio"], hyper_params["loc_loss_alpha"]) 38 | model.compile(optimizer=Adam(learning_rate=1e-3), 39 | loss=[custom_losses.loc_loss_fn, custom_losses.conf_loss_fn]) 40 | blazeface.init_model(model) 41 | # 42 | model_path = io_utils.get_model_path() 43 | if load_weights: 44 | model.load_weights(model_path) 45 | log_path = io_utils.get_log_path("blazeface/") 46 | # We calculate prior boxes for one time and use it for all operations because of the all images are the same sizes 47 | prior_boxes = bbox_utils.generate_prior_boxes(hyper_params["feature_map_shapes"], hyper_params["aspect_ratios"]) 48 | # 49 | train_feed = train_utils.generator(train_data, prior_boxes, hyper_params) 50 | val_feed = train_utils.generator(val_data, prior_boxes, hyper_params) 51 | 52 | checkpoint_callback = ModelCheckpoint(model_path, monitor="val_loss", save_best_only=True, save_weights_only=True) 53 | tensorboard_callback = TensorBoard(log_dir=log_path) 54 | learning_rate_callback = LearningRateScheduler(train_utils.scheduler, verbose=0) 55 | 56 | step_size_train = train_utils.get_step_size(train_total_items, batch_size) 57 | step_size_val = train_utils.get_step_size(val_total_items, batch_size) 58 | model.fit(train_feed, 59 | steps_per_epoch=step_size_train, 60 | validation_data=val_feed, 61 | validation_steps=step_size_val, 62 | epochs=epochs, 63 | callbacks=[checkpoint_callback, tensorboard_callback, learning_rate_callback]) 64 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FurkanOM/tf-blazeface/dce7aa2b9750b63a0e3611ccc18150e59f220645/utils/__init__.py -------------------------------------------------------------------------------- /utils/bbox_utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def get_weighted_boxes_and_landmarks(scores, bboxes_and_landmarks, mask): 4 | """Calculating weighted mean of given bboxes and landmarks according to the mask. 5 | inputs: 6 | scores = (total_bboxes, [probability]) 7 | bboxes_and_landmarks = (total_bboxes, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN]) 8 | mask = (total_bboxes,) 9 | 10 | outputs: 11 | weighted_bbox_and_landmark = (1, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN]) 12 | """ 13 | selected_scores = scores[mask] 14 | selected_bboxes_and_landmarks = bboxes_and_landmarks[mask] 15 | weighted_sum = tf.reduce_sum(selected_bboxes_and_landmarks * selected_scores, 0) 16 | sum_selected_scores = tf.reduce_sum(selected_scores, 0) 17 | sum_selected_scores = tf.where(tf.equal(sum_selected_scores, 0.0), 1.0, sum_selected_scores) 18 | return tf.expand_dims(weighted_sum / sum_selected_scores, 0) 19 | 20 | def weighted_suppression_body(counter, iou_threshold, scores, bboxes_and_landmarks, weighted_suppressed_data): 21 | """Weighted mean suppression algorithm while body. 22 | inputs: 23 | counter = while body counter 24 | iou_threshold = threshold value for overlapping bounding boxes 25 | scores = (total_bboxes, [probability]) 26 | bboxes_and_landmarks = (total_bboxes, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN]) 27 | weighted_suppressed_data = (M, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN]) 28 | 29 | outputs: 30 | counter = while body counter 31 | iou_threshold = threshold value for overlapping bounding boxes 32 | scores = (total_bboxes - N, [probability]) 33 | bboxes_and_landmarks = (total_bboxes - N, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN]) 34 | weighted_suppressed_data = (M + 1, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN]) 35 | """ 36 | counter = tf.add(counter, 1) 37 | first_box = bboxes_and_landmarks[0, 0:4] 38 | iou_map = generate_iou_map(first_box, bboxes_and_landmarks[..., 0:4], transpose_perm=[1, 0]) 39 | overlapped_mask = tf.reshape(tf.greater(iou_map, iou_threshold), (-1,)) 40 | weighted_bbox_and_landmark = get_weighted_boxes_and_landmarks(scores, bboxes_and_landmarks, overlapped_mask) 41 | weighted_suppressed_data = tf.concat([weighted_suppressed_data, weighted_bbox_and_landmark], axis=0) 42 | not_overlapped_mask = tf.logical_not(overlapped_mask) 43 | scores = scores[not_overlapped_mask] 44 | bboxes_and_landmarks = bboxes_and_landmarks[not_overlapped_mask] 45 | return counter, iou_threshold, scores, bboxes_and_landmarks, weighted_suppressed_data 46 | 47 | def weighted_suppression(scores, bboxes_and_landmarks, max_total_size=50, score_threshold=0.75, iou_threshold=0.3): 48 | """Blazeface weighted mean suppression algorithm. 49 | inputs: 50 | scores = (total_bboxes, [probability]) 51 | bboxes_and_landmarks = (total_bboxes, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN]) 52 | max_total_size = maximum returned bounding boxes and landmarks 53 | score_threshold = threshold value for bounding boxes and landmarks selection 54 | iou_threshold = threshold value for overlapping bounding boxes 55 | 56 | outputs: 57 | weighted_bboxes_and_landmarks = (dynamic_size, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN]) 58 | """ 59 | score_mask = tf.squeeze(tf.greater(scores, score_threshold), -1) 60 | scores = scores[score_mask] 61 | bboxes_and_landmarks = bboxes_and_landmarks[score_mask] 62 | sorted_indices = tf.argsort(scores, axis=0, direction="DESCENDING") 63 | sorted_scores = tf.gather_nd(scores, sorted_indices) 64 | sorted_bboxes_and_landmarks = tf.gather_nd(bboxes_and_landmarks, sorted_indices) 65 | counter = tf.constant(0, tf.int32) 66 | weighted_data = tf.zeros(tf.shape(bboxes_and_landmarks[0:1]), dtype=tf.float32) 67 | cond = lambda counter, iou_threshold, scores, data, weighted: tf.logical_and(tf.less(counter, max_total_size), tf.greater(tf.shape(scores)[0], 0)) 68 | _, _, _, _, weighted_data = tf.while_loop(cond, weighted_suppression_body, 69 | [counter, iou_threshold, sorted_scores, sorted_bboxes_and_landmarks, weighted_data]) 70 | # 71 | weighted_data = weighted_data[1:] 72 | pad_size = max_total_size - weighted_data.shape[0] 73 | weighted_data = tf.pad(weighted_data, ((0, pad_size),(0, 0))) 74 | return weighted_data 75 | 76 | def non_max_suppression(pred_bboxes, pred_labels, **kwargs): 77 | """Applying non maximum suppression. 78 | Details could be found on tensorflow documentation. 79 | https://www.tensorflow.org/api_docs/python/tf/image/combined_non_max_suppression 80 | inputs: 81 | pred_bboxes = (batch_size, total_bboxes, total_labels, [y1, x1, y2, x2]) 82 | total_labels should be 1 for binary operations like in rpn 83 | pred_labels = (batch_size, total_bboxes, total_labels) 84 | **kwargs = other parameters 85 | 86 | outputs: 87 | nms_boxes = (batch_size, max_detections, [y1, x1, y2, x2]) 88 | nmsed_scores = (batch_size, max_detections) 89 | nmsed_classes = (batch_size, max_detections) 90 | valid_detections = (batch_size) 91 | Only the top valid_detections[i] entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. 92 | The rest of the entries are zero paddings. 93 | """ 94 | return tf.image.combined_non_max_suppression( 95 | pred_bboxes, 96 | pred_labels, 97 | **kwargs 98 | ) 99 | 100 | def generate_iou_map(bboxes, gt_boxes, transpose_perm=[0, 2, 1]): 101 | """Calculating intersection over union values for each ground truth boxes in a dynamic manner. 102 | It is supported from 1d to 3d dimensions for bounding boxes. 103 | Even if bboxes have different rank from gt_boxes it should be work. 104 | inputs: 105 | bboxes = (dynamic_dimension, [y1, x1, y2, x2]) 106 | gt_boxes = (dynamic_dimension, [y1, x1, y2, x2]) 107 | transpose_perm = (transpose_perm_order) 108 | for 3d gt_boxes => [0, 2, 1] 109 | 110 | outputs: 111 | iou_map = (dynamic_dimension, total_gt_boxes) 112 | same rank with the gt_boxes 113 | """ 114 | gt_rank = tf.rank(gt_boxes) 115 | gt_expand_axis = gt_rank - 2 116 | # 117 | bbox_y1, bbox_x1, bbox_y2, bbox_x2 = tf.split(bboxes, 4, axis=-1) 118 | gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(gt_boxes, 4, axis=-1) 119 | # Calculate bbox and ground truth boxes areas 120 | gt_area = tf.squeeze((gt_y2 - gt_y1) * (gt_x2 - gt_x1), axis=-1) 121 | bbox_area = tf.squeeze((bbox_y2 - bbox_y1) * (bbox_x2 - bbox_x1), axis=-1) 122 | # 123 | x_top = tf.maximum(bbox_x1, tf.transpose(gt_x1, transpose_perm)) 124 | y_top = tf.maximum(bbox_y1, tf.transpose(gt_y1, transpose_perm)) 125 | x_bottom = tf.minimum(bbox_x2, tf.transpose(gt_x2, transpose_perm)) 126 | y_bottom = tf.minimum(bbox_y2, tf.transpose(gt_y2, transpose_perm)) 127 | ### Calculate intersection area 128 | intersection_area = tf.maximum(x_bottom - x_top, 0) * tf.maximum(y_bottom - y_top, 0) 129 | ### Calculate union area 130 | union_area = (tf.expand_dims(bbox_area, -1) + tf.expand_dims(gt_area, gt_expand_axis) - intersection_area) 131 | # Intersection over Union 132 | return intersection_area / union_area 133 | 134 | def get_bboxes_and_landmarks_from_deltas(prior_boxes, deltas): 135 | """Calculating bounding boxes and landmarks for given delta values. 136 | inputs: 137 | prior_boxes = (total_bboxes, [center_x, center_y, width, height]) 138 | deltas = (batch_size, total_bboxes, [delta_bbox_y, delta_bbox_x, delta_bbox_h, delta_bbox_w, delta_landmark_x0, delta_landmark_y0, ..., delta_landmark_xN, delta_landmark_yN]) 139 | 140 | outputs: 141 | bboxes_and_landmarks = (batch_size, total_bboxes, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN]) 142 | """ 143 | # 144 | bbox_width = deltas[..., 3] * prior_boxes[..., 2] 145 | bbox_height = deltas[..., 2] * prior_boxes[..., 3] 146 | bbox_ctr_x = (deltas[..., 1] * prior_boxes[..., 2]) + prior_boxes[..., 0] 147 | bbox_ctr_y = (deltas[..., 0] * prior_boxes[..., 3]) + prior_boxes[..., 1] 148 | # 149 | y1 = bbox_ctr_y - (0.5 * bbox_height) 150 | x1 = bbox_ctr_x - (0.5 * bbox_width) 151 | y2 = bbox_height + y1 152 | x2 = bbox_width + x1 153 | # 154 | total_landmarks = tf.shape(deltas[..., 4:])[-1] // 2 155 | xy_pairs = tf.tile(prior_boxes[..., 0:2], (1, total_landmarks)) 156 | wh_pairs = tf.tile(prior_boxes[..., 2:4], (1, total_landmarks)) 157 | landmarks = (deltas[..., 4:] * wh_pairs) + xy_pairs 158 | # 159 | return tf.concat([tf.stack([y1, x1, y2, x2], axis=-1), landmarks], -1) 160 | 161 | def get_deltas_from_bboxes_and_landmarks(prior_boxes, bboxes_and_landmarks): 162 | """Calculating bounding box and landmark deltas for given ground truth boxes and landmarks. 163 | inputs: 164 | prior_boxes = (total_bboxes, [center_x, center_y, width, height]) 165 | bboxes_and_landmarks = (batch_size, total_bboxes, [y1, x1, y2, x2, landmark_x0, landmark_y0, ..., landmark_xN, landmark_yN]) 166 | 167 | outputs: 168 | deltas = (batch_size, total_bboxes, [delta_bbox_y, delta_bbox_x, delta_bbox_h, delta_bbox_w, delta_landmark_x0, delta_landmark_y0, ..., delta_landmark_xN, delta_landmark_yN]) 169 | """ 170 | # 171 | gt_width = bboxes_and_landmarks[..., 3] - bboxes_and_landmarks[..., 1] 172 | gt_height = bboxes_and_landmarks[..., 2] - bboxes_and_landmarks[..., 0] 173 | gt_ctr_x = bboxes_and_landmarks[..., 1] + 0.5 * gt_width 174 | gt_ctr_y = bboxes_and_landmarks[..., 0] + 0.5 * gt_height 175 | # 176 | delta_x = (gt_ctr_x - prior_boxes[..., 0]) / prior_boxes[..., 2] 177 | delta_y = (gt_ctr_y - prior_boxes[..., 1]) / prior_boxes[..., 3] 178 | delta_w = gt_width / prior_boxes[..., 2] 179 | delta_h = gt_height / prior_boxes[..., 3] 180 | # 181 | total_landmarks = tf.shape(bboxes_and_landmarks[..., 4:])[-1] // 2 182 | xy_pairs = tf.tile(prior_boxes[..., 0:2], (1, total_landmarks)) 183 | wh_pairs = tf.tile(prior_boxes[..., 2:4], (1, total_landmarks)) 184 | landmark_deltas = (bboxes_and_landmarks[..., 4:] - xy_pairs) / wh_pairs 185 | # 186 | return tf.concat([tf.stack([delta_y, delta_x, delta_h, delta_w], -1), landmark_deltas], -1) 187 | 188 | def get_scale_for_nth_feature_map(k, m=4, scale_min=0.1484375, scale_max=0.75): 189 | """Calculating scale value for nth feature map using the given method in the paper. 190 | inputs: 191 | k = nth feature map for scale calculation 192 | m = length of all using feature maps for detections, 6 for ssd300, 4 for blazeface 193 | 194 | outputs: 195 | scale = calculated scale value for given index 196 | """ 197 | return scale_min + ((scale_max - scale_min) / (m - 1)) * (k - 1) 198 | 199 | def get_wh_pairs(aspect_ratios, feature_map_index, total_feature_map): 200 | """Generating height and width pairs for different aspect ratios and feature map shapes. 201 | inputs: 202 | aspect_ratios = for all feature map shapes + 1 for ratio 1 203 | feature_map_index = nth feature maps for scale calculation 204 | total_feature_map = length of all using feature map for detections, 6 for ssd300 205 | 206 | outputs: 207 | wh_pairs = [(width1, height1), ..., (widthN, heightN)] 208 | """ 209 | current_scale = get_scale_for_nth_feature_map(feature_map_index, m=total_feature_map) 210 | next_scale = get_scale_for_nth_feature_map(feature_map_index + 1, m=total_feature_map) 211 | wh_pairs = [] 212 | for aspect_ratio in aspect_ratios: 213 | height = current_scale / tf.sqrt(aspect_ratio) 214 | width = current_scale * tf.sqrt(aspect_ratio) 215 | wh_pairs.append([width, height]) 216 | # 1 extra pair for ratio 1 217 | height = width = tf.sqrt(current_scale * next_scale) 218 | wh_pairs.append([width, height]) 219 | return tf.cast(wh_pairs, dtype=tf.float32) 220 | 221 | def generate_prior_boxes(feature_map_shapes, aspect_ratios): 222 | """Generating top left prior boxes for given stride, height and width pairs of different aspect ratios. 223 | These prior boxes same with the anchors in Faster-RCNN. 224 | inputs: 225 | feature_map_shapes = for all feature map output size 226 | aspect_ratios = for all feature map shapes + 1 for ratio 1 227 | 228 | outputs: 229 | prior_boxes = (total_bboxes, [y1, x1, y2, x2]) 230 | these values in normalized format between [0, 1] 231 | """ 232 | prior_boxes = [] 233 | for i, feature_map_shape in enumerate(feature_map_shapes): 234 | wh_pairs = get_wh_pairs(aspect_ratios[i], i+1, len(feature_map_shapes)) 235 | # 236 | stride = 1 / feature_map_shape 237 | grid_coords = tf.cast(tf.range(0, feature_map_shape) / feature_map_shape + stride / 2, dtype=tf.float32) 238 | grid_x, grid_y = tf.meshgrid(grid_coords, grid_coords) 239 | flat_grid_x, flat_grid_y = tf.reshape(grid_x, (-1, )), tf.reshape(grid_y, (-1, )) 240 | # 241 | grid_map = tf.stack([flat_grid_x, flat_grid_y], axis=-1) 242 | grid_map = tf.pad(grid_map, ((0,0), (0,2))) 243 | wh_pairs = tf.pad(wh_pairs, ((0,0), (2,0))) 244 | # 245 | prior_boxes_for_feature_map = tf.reshape(wh_pairs, (1, -1, 4)) + tf.reshape(grid_map, (-1, 1, 4)) 246 | prior_boxes_for_feature_map = tf.reshape(prior_boxes_for_feature_map, (-1, 4)) 247 | # 248 | prior_boxes.append(prior_boxes_for_feature_map) 249 | prior_boxes = tf.concat(prior_boxes, axis=0) 250 | return tf.clip_by_value(prior_boxes, 0, 1) 251 | 252 | def convert_xywh_to_bboxes(xywh): 253 | """Converting center x, y and width height format to bounding boxes. 254 | inputs: 255 | xywh = (M, N, [center_x, center_y, width, height]) 256 | 257 | outputs: 258 | bboxes = (M, N, [y1, x1, y2, x2]) 259 | """ 260 | y1 = xywh[..., 1] - (0.5 * xywh[..., 3]) 261 | x1 = xywh[..., 0] - (0.5 * xywh[..., 2]) 262 | y2 = xywh[..., 3] + y1 263 | x2 = xywh[..., 2] + x1 264 | bboxes = tf.stack([y1, x1, y2, x2], axis=-1) 265 | return tf.clip_by_value(bboxes, 0, 1) 266 | 267 | def renormalize_bboxes_with_min_max(bboxes, min_max): 268 | """Renormalizing given bounding boxes to the new boundaries. 269 | r = (x - min) / (max - min) 270 | outputs: 271 | bboxes = (total_bboxes, [y1, x1, y2, x2]) 272 | min_max = ([y_min, x_min, y_max, x_max]) 273 | """ 274 | y_min, x_min, y_max, x_max = tf.split(min_max, 4) 275 | renomalized_bboxes = bboxes - tf.concat([y_min, x_min, y_min, x_min], -1) 276 | renomalized_bboxes /= tf.concat([y_max-y_min, x_max-x_min, y_max-y_min, x_max-x_min], -1) 277 | return tf.clip_by_value(renomalized_bboxes, 0, 1) 278 | 279 | def normalize_bboxes(bboxes, height, width): 280 | """Normalizing bounding boxes. 281 | inputs: 282 | bboxes = (M, N, [y1, x1, y2, x2]) 283 | height = image height 284 | width = image width 285 | 286 | outputs: 287 | normalized_bboxes = (M, N, [y1, x1, y2, x2]) 288 | in normalized form [0, 1] 289 | """ 290 | y1 = bboxes[..., 0] / height 291 | x1 = bboxes[..., 1] / width 292 | y2 = bboxes[..., 2] / height 293 | x2 = bboxes[..., 3] / width 294 | return tf.stack([y1, x1, y2, x2], axis=-1) 295 | 296 | def denormalize_bboxes(bboxes, height, width): 297 | """Denormalizing bounding boxes. 298 | inputs: 299 | bboxes = (M, N, [y1, x1, y2, x2]) 300 | in normalized form [0, 1] 301 | height = image height 302 | width = image width 303 | 304 | outputs: 305 | denormalized_bboxes = (M, N, [y1, x1, y2, x2]) 306 | """ 307 | y1 = bboxes[..., 0] * height 308 | x1 = bboxes[..., 1] * width 309 | y2 = bboxes[..., 2] * height 310 | x2 = bboxes[..., 3] * width 311 | return tf.round(tf.stack([y1, x1, y2, x2], axis=-1)) 312 | -------------------------------------------------------------------------------- /utils/data_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | import tensorflow_datasets as tfds 4 | from PIL import Image 5 | from utils import landmark_utils 6 | import numpy as np 7 | 8 | def filter_landmarks(landmarks): 9 | """Filtering landmark from 68 points to 6 points for blazeface. 10 | inputs: 11 | landmarks = (M, 68, [x, y]) 12 | 13 | outputs: 14 | filtered_landmarks = (M, 6, [x, y]) 15 | """ 16 | # Left eye 17 | left_eye_coords = tf.reduce_mean(landmarks[..., 36:42, :], -2) 18 | # Right eye 19 | right_eye_coords = tf.reduce_mean(landmarks[..., 42:48, :], -2) 20 | # Left ear 21 | left_ear_coords = tf.reduce_mean(landmarks[..., 0:2, :], -2) 22 | # Right ear 23 | right_ear_coords = tf.reduce_mean(landmarks[..., 15:17, :], -2) 24 | # Nose 25 | nose_coords = tf.reduce_mean(landmarks[..., 27:36, :], -2) 26 | # Mouth 27 | mouth_coords = tf.reduce_mean(landmarks[..., 48:68, :], -2) 28 | return tf.stack([ 29 | left_eye_coords, 30 | right_eye_coords, 31 | left_ear_coords, 32 | right_ear_coords, 33 | nose_coords, 34 | mouth_coords, 35 | ], -2) 36 | 37 | def generate_bboxes_from_landmarks(landmarks): 38 | """Generating bounding boxes from landmarks. 39 | inputs: 40 | landmarks = (M, total_landmarks, [x, y]) 41 | 42 | outputs: 43 | bboxes = (M, [y1, x1, y2, x2]) 44 | """ 45 | padding = 5e-3 46 | x1 = tf.reduce_min(landmarks[..., 0], -1) - padding 47 | x2 = tf.reduce_max(landmarks[..., 0], -1) + padding 48 | y1 = tf.reduce_min(landmarks[..., 1], -1) - padding 49 | y2 = tf.reduce_max(landmarks[..., 1], -1) + padding 50 | # 51 | gt_boxes = tf.stack([y1, x1, y2, x2], -1) 52 | return tf.clip_by_value(gt_boxes, 0, 1) 53 | 54 | def preprocessing(image_data, final_height, final_width, augmentation_fn=None): 55 | """Image resizing operation handled before batch operations. 56 | inputs: 57 | image_data = tensorflow dataset image_data 58 | final_height = final image height after resizing 59 | final_width = final image width after resizing 60 | 61 | outputs: 62 | img = (final_height, final_width, channels) 63 | gt_boxes = (gt_box_size, [y1, x1, y2, x2]) 64 | gt_landmarks = (gt_box_size, total_landmarks, [x, y]) 65 | """ 66 | img = image_data["image"] 67 | img = tf.image.convert_image_dtype(img, tf.float32) 68 | gt_landmarks = tf.expand_dims(image_data["landmarks_2d"], 0) 69 | gt_boxes = generate_bboxes_from_landmarks(gt_landmarks) 70 | gt_landmarks = filter_landmarks(gt_landmarks) 71 | img = tf.image.resize(img, (final_height, final_width)) 72 | if augmentation_fn: 73 | img, gt_boxes, gt_landmarks = augmentation_fn(img, gt_boxes, gt_landmarks) 74 | img = (img - 0.5) / 0.5 75 | return img, gt_boxes, gt_landmarks 76 | 77 | def get_dataset(name, split, data_dir="~/tensorflow_datasets"): 78 | """Get tensorflow dataset split and info. 79 | inputs: 80 | name = name of the dataset, the300w_lp, etc. 81 | split = data split string 82 | data_dir = read/write path for tensorflow datasets 83 | 84 | outputs: 85 | dataset = tensorflow dataset split 86 | info = tensorflow dataset info 87 | """ 88 | dataset, info = tfds.load(name, split=split, data_dir=data_dir, with_info=True) 89 | return dataset, info 90 | 91 | def get_total_item_size(info, split): 92 | """Get total item size for given split. 93 | inputs: 94 | info = tensorflow dataset info 95 | split = data split string 96 | 97 | outputs: 98 | total_item_size = number of total items 99 | """ 100 | return info.splits[split].num_examples 101 | 102 | def get_labels(info): 103 | """Get label names list. 104 | inputs: 105 | info = tensorflow dataset info 106 | 107 | outputs: 108 | labels = [labels list] 109 | """ 110 | return info.features["labels"].names 111 | 112 | def get_custom_imgs(custom_image_path): 113 | """Generating a list of images for given path. 114 | inputs: 115 | custom_image_path = folder of the custom images 116 | outputs: 117 | custom image list = [path1, path2] 118 | """ 119 | img_paths = [] 120 | for path, dir, filenames in os.walk(custom_image_path): 121 | for filename in filenames: 122 | img_paths.append(os.path.join(path, filename)) 123 | break 124 | return img_paths 125 | 126 | def custom_data_generator(img_paths, final_height, final_width): 127 | """Yielding custom entities as dataset. 128 | inputs: 129 | img_paths = custom image paths 130 | final_height = final image height after resizing 131 | final_width = final image width after resizing 132 | outputs: 133 | img = (final_height, final_width, depth) 134 | dummy_gt_boxes = (None, None) 135 | dummy_gt_labels = (None, ) 136 | """ 137 | for img_path in img_paths: 138 | image = Image.open(img_path) 139 | resized_image = image.resize((final_width, final_height), Image.LANCZOS) 140 | img = np.array(resized_image) 141 | img = tf.image.convert_image_dtype(img, tf.float32) 142 | img = (img - 0.5) / 0.5 143 | yield img, tf.constant([[]], dtype=tf.float32), tf.constant([[[]]], dtype=tf.float32) 144 | 145 | def get_data_types(): 146 | """Generating dataset parameter dtypes for tensorflow datasets. 147 | outputs: 148 | dtypes = output dtypes for (images, ground truth boxes, ground truth landmarks) 149 | """ 150 | return (tf.float32, tf.float32, tf.float32) 151 | 152 | def get_data_shapes(): 153 | """Generating dataset parameter shapes for tensorflow datasets. 154 | outputs: 155 | shapes = output shapes for (images, ground truth boxes, ground truth landmarks) 156 | """ 157 | return ([None, None, None], [None, None], [None, None, None]) 158 | 159 | def get_padding_values(): 160 | """Generating padding values for missing values in batch for tensorflow datasets. 161 | outputs: 162 | paddings = padding values with dtypes for (images, ground truth boxes, ground truth landmarks) 163 | """ 164 | return (tf.constant(0, tf.float32), tf.constant(0, tf.float32), tf.constant(0, tf.float32)) 165 | -------------------------------------------------------------------------------- /utils/drawing_utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from PIL import Image, ImageDraw 3 | import matplotlib.pyplot as plt 4 | 5 | def draw_bboxes(imgs, bboxes): 6 | """Drawing bounding boxes on given images. 7 | inputs: 8 | imgs = (batch_size, height, width, channels) 9 | bboxes = (batch_size, total_bboxes, [y1, x1, y2, x2]) 10 | in normalized form [0, 1] 11 | """ 12 | colors = tf.constant([[1, 0, 0, 1]], dtype=tf.float32) 13 | imgs_with_bb = tf.image.draw_bounding_boxes(imgs, bboxes, colors) 14 | plt.figure() 15 | for img_with_bb in imgs_with_bb: 16 | plt.imshow(img_with_bb) 17 | plt.show() 18 | 19 | def draw_bboxes_with_landmarks(img, bboxes, landmarks): 20 | """Drawing bounding boxes and landmarks on given image. 21 | inputs: 22 | img = (height, width, channels) 23 | bboxes = (total_bboxes, [y1, x1, y2, x2]) 24 | landmarks = (total_bboxes, total_landmarks, [x, y]) 25 | """ 26 | image = tf.keras.preprocessing.image.array_to_img(img) 27 | width, height = image.size 28 | draw = ImageDraw.Draw(image) 29 | color = (255, 0, 0, 255) 30 | for index, bbox in enumerate(bboxes): 31 | y1, x1, y2, x2 = tf.split(bbox, 4) 32 | width = x2 - x1 33 | height = y2 - y1 34 | if width <= 0 or height <= 0: 35 | continue 36 | draw.rectangle((x1, y1, x2, y2), outline=color, width=1) 37 | for index, landmark in enumerate(landmarks): 38 | if tf.reduce_max(landmark) <= 0: 39 | continue 40 | rects = tf.concat([landmark - 1, landmark + 1], -1) 41 | for rect in rects: 42 | draw.ellipse(rect, fill=color) 43 | plt.figure() 44 | plt.imshow(image) 45 | plt.show() 46 | -------------------------------------------------------------------------------- /utils/io_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import tensorflow as tf 4 | from datetime import datetime 5 | 6 | def get_log_path(custom_prefix=""): 7 | """Generating log path for tensorboard. 8 | inputs: 9 | custom_prefix = any custom string for log folder name 10 | 11 | outputs: 12 | log_path = tensorboard log path, for example: "logs/{custom_prefix}{date}" 13 | """ 14 | return "logs/{}{}".format(custom_prefix, datetime.now().strftime("%Y%m%d-%H%M%S")) 15 | 16 | def get_model_path(): 17 | """Generating model path for save/load model weights. 18 | 19 | outputs: 20 | model_path = os model path, for example: "models/blazeface_model_weights.h5" 21 | """ 22 | main_path = "trained" 23 | if not os.path.exists(main_path): 24 | os.makedirs(main_path) 25 | model_path = os.path.join(main_path, "blazeface_model_weights.h5") 26 | return model_path 27 | 28 | def handle_args(): 29 | """Handling of command line arguments using argparse library. 30 | 31 | outputs: 32 | args = parsed command line arguments 33 | """ 34 | parser = argparse.ArgumentParser(description="BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs Implementation") 35 | parser.add_argument("-handle-gpu", action="store_true", help="Tensorflow 2 GPU compatibility flag") 36 | args = parser.parse_args() 37 | return args 38 | 39 | def handle_gpu_compatibility(): 40 | """Handling of GPU issues for cuDNN initialize error and memory issues.""" 41 | try: 42 | gpus = tf.config.experimental.list_physical_devices("GPU") 43 | for gpu in gpus: 44 | tf.config.experimental.set_memory_growth(gpu, True) 45 | except Exception as e: 46 | print(e) 47 | -------------------------------------------------------------------------------- /utils/landmark_utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def renormalize_landmarks_with_min_max(landmarks, min_max): 4 | """Renormalizing given bounding boxes to the new boundaries. 5 | r = (x - min) / (max - min) 6 | outputs: 7 | landmarks = (total_count, total_landmarks, [x, y]) 8 | min_max = ([y_min, x_min, y_max, x_max]) 9 | """ 10 | y_min, x_min, y_max, x_max = tf.split(min_max, 4) 11 | renomalized_landmarks = landmarks - tf.concat([x_min, y_min], -1) 12 | renomalized_landmarks /= tf.concat([x_max-x_min, y_max-y_min], -1) 13 | return tf.clip_by_value(renomalized_landmarks, 0, 1) 14 | 15 | def normalize_landmarks(landmarks, height, width): 16 | """Normalizing landmarks. 17 | inputs: 18 | landmarks = (M, N, [x, y]) 19 | height = image height 20 | width = image width 21 | 22 | outputs: 23 | normalized_landmarks = (M, N, [x, y]) 24 | in normalized form [0, 1] 25 | """ 26 | return landmarks / tf.cast([width, height], tf.float32) 27 | 28 | def denormalize_landmarks(landmarks, height, width): 29 | """Denormalizing landmarks. 30 | inputs: 31 | landmarks = (M, N, [x, y]) 32 | in normalized form [0, 1] 33 | height = image height 34 | width = image width 35 | 36 | outputs: 37 | denormalized_landmarks = (M, N, [x, y]) 38 | """ 39 | return tf.round(landmarks * tf.cast([width, height], tf.float32)) 40 | -------------------------------------------------------------------------------- /utils/train_utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import math 3 | from utils import bbox_utils 4 | 5 | def get_hyper_params(**kwargs): 6 | """Generating hyper params in a dynamic way. 7 | inputs: 8 | **kwargs = any value could be updated in the hyper_params 9 | 10 | outputs: 11 | hyper_params = dictionary 12 | """ 13 | hyper_params = { 14 | "img_size": 128, 15 | "feature_map_shapes": [16, 8, 8, 8], 16 | "aspect_ratios": [[1.], [1.], [1.], [1.]], 17 | "detections_per_layer": [2, 6], 18 | "total_landmarks": 6, 19 | "iou_threshold": 0.5, 20 | "neg_pos_ratio": 3, 21 | "loc_loss_alpha": 1, 22 | "variances": [0.1, 0.1, 0.2, 0.2], 23 | } 24 | for key, value in kwargs.items(): 25 | if key in hyper_params and value: 26 | hyper_params[key] = value 27 | # 28 | return hyper_params 29 | 30 | def scheduler(epoch): 31 | """Generating learning rate value for a given epoch. 32 | inputs: 33 | epoch = number of current epoch 34 | 35 | outputs: 36 | learning_rate = float learning rate value 37 | """ 38 | if epoch < 100: 39 | return 1e-3 40 | elif epoch < 125: 41 | return 1e-4 42 | else: 43 | return 1e-5 44 | 45 | def get_step_size(total_items, batch_size): 46 | """Get step size for given total item size and batch size. 47 | inputs: 48 | total_items = number of total items 49 | batch_size = number of batch size during training or validation 50 | 51 | outputs: 52 | step_size = number of step size for model training 53 | """ 54 | return math.ceil(total_items / batch_size) 55 | 56 | def generator(dataset, prior_boxes, hyper_params): 57 | """Tensorflow data generator for fit method, yielding inputs and outputs. 58 | inputs: 59 | dataset = tf.data.Dataset, PaddedBatchDataset 60 | prior_boxes = (total_bboxes, [center_x, center_y, width, height]) 61 | these values in normalized format between [0, 1] 62 | hyper_params = dictionary 63 | 64 | outputs: 65 | yield inputs, outputs 66 | """ 67 | while True: 68 | for image_data in dataset: 69 | img, gt_boxes, gt_landmarks = image_data 70 | actual_deltas, actual_labels = calculate_actual_outputs(prior_boxes, gt_boxes, gt_landmarks, hyper_params) 71 | yield img, (actual_deltas, actual_labels) 72 | 73 | def calculate_actual_outputs(prior_boxes, gt_boxes, gt_landmarks, hyper_params): 74 | """Calculate ssd actual output values. 75 | Batch operations supported. 76 | inputs: 77 | prior_boxes = (total_bboxes, [center_x, center_y, width, height]) 78 | these values in normalized format between [0, 1] 79 | gt_boxes = (batch_size, gt_box_size, [y1, x1, y2, x2]) 80 | these values in normalized format between [0, 1] 81 | gt_landmarks = (batch_size, gt_box_size, total_landmarks, [x, y]) 82 | these values in normalized format between [0, 1] 83 | hyper_params = dictionary 84 | 85 | outputs: 86 | actual_deltas = (batch_size, total_bboxes, [delta_bbox_y, delta_bbox_x, delta_bbox_h, delta_bbox_w, delta_landmark_x0, delta_landmark_y0, ..., delta_landmark_xN, delta_landmark_yN]) 87 | actual_labels = (batch_size, total_bboxes, [1 or 0]) 88 | """ 89 | batch_size = tf.shape(gt_boxes)[0] 90 | iou_threshold = hyper_params["iou_threshold"] 91 | variances = hyper_params["variances"] 92 | total_landmarks = hyper_params["total_landmarks"] 93 | landmark_variances = total_landmarks * variances[0:2] 94 | # Calculate iou values between each bboxes and ground truth boxes 95 | iou_map = bbox_utils.generate_iou_map(bbox_utils.convert_xywh_to_bboxes(prior_boxes), gt_boxes) 96 | # Get max index value for each row 97 | max_indices_each_gt_box = tf.argmax(iou_map, axis=2, output_type=tf.int32) 98 | # IoU map has iou values for every gt boxes and we merge these values column wise 99 | merged_iou_map = tf.reduce_max(iou_map, axis=2) 100 | # 101 | pos_cond = tf.greater(merged_iou_map, iou_threshold) 102 | # 103 | gt_landmarks = tf.reshape(gt_landmarks, (batch_size, -1, total_landmarks * 2)) 104 | gt_boxes_and_landmarks = tf.concat([gt_boxes, gt_landmarks], -1) 105 | gt_boxes_and_landmarks_map = tf.gather(gt_boxes_and_landmarks, max_indices_each_gt_box, batch_dims=1) 106 | expanded_gt_boxes_and_landmarks = tf.where(tf.expand_dims(pos_cond, -1), gt_boxes_and_landmarks_map, tf.zeros_like(gt_boxes_and_landmarks_map)) 107 | actual_deltas = bbox_utils.get_deltas_from_bboxes_and_landmarks(prior_boxes, expanded_gt_boxes_and_landmarks) / (variances + landmark_variances) 108 | # 109 | actual_labels = tf.expand_dims(tf.cast(pos_cond, dtype=tf.float32), -1) 110 | # 111 | return actual_deltas, actual_labels 112 | --------------------------------------------------------------------------------