├── LICENSE.md
├── README.md
├── data2kitti.py
├── data_utils
    ├── argparser_data2kitti.py
    ├── check_labels.py
    ├── data-tree.txt
    ├── fddb2kitti.py
    ├── kaggle2kitti.py
    ├── mafa2kitti.py
    └── widerface2kitti.py
├── ds_configs
    ├── config_infer_primary_masknet_gpu.txt
    ├── deepstream_app_source1_camera_masknet_gpu.txt
    ├── deepstream_app_source1_video_masknet_gpu.txt
    └── labels_masknet.txt
├── face-mask-detection.ipynb
├── images
    └── face-mask-detect-output.png
├── requirements.txt
└── tlt_specs
    ├── detectnet_v2_inference_kitti_etlt.txt
    ├── detectnet_v2_inference_kitti_tlt.txt
    ├── detectnet_v2_retrain_resnet18_kitti.txt
    ├── detectnet_v2_tfrecords_kitti_trainval.txt
    ├── detectnet_v2_tfrecords_kitti_val.txt
    └── detectnet_v2_train_resnet18_kitti.txt


/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------
  2 | # This sample application is no longer maintained
  3 | # ------------------------------------------------------
  4 | 
  5 | # face_mask_detection
  6 | 
  7 | [NVIDIA Developer Blog](https://developer.nvidia.com/blog/implementing-a-real-time-ai-based-face-mask-detector-application-for-covid-19/)
  8 | 
  9 | The project shows, tutorial for NVIDIA's Transfer Learning Toolkit (TLT) + DeepStream (DS) SDK ie training and inference flow for detecting faces with mask and without mask on Jetson Platform.
 10 | 
 11 | By the end of this project; you will be able to build DeepStream app on Jetson platform to detect faces with mask and without mask. 
 12 | 
 13 | ![alt text](images/face-mask-detect-output.png "Output from Face Mask Detection Application")
 14 | 
 15 | ### What this project includes
 16 | - Transfer Learning Toolkit (TLT) scripts:
 17 | 	- Dataset processing script to convert it in KITTI format 
 18 | 	- Specification files for configuring tlt-train, tlt-prune, tlt-evalute
 19 | - DeepStream (DS) scripts:
 20 | 	- deepstream-app config files (For demo on single stream camera and detection on stored video file)
 21 | 
 22 | ### What this project does not provide
 23 | - Trained model for face-mask detection; we will go through step by step to produce detetctnet_v2 (with ResNet18 backbone) model for face-mask detection.
 24 | - NVIDIA specific dataset for faces with and without mask; we suggest following dataset based on our experiments.
 25 | 
 26 | 
 27 | ### Preferred Datasets
 28 | - Faces with Mask
 29 | 	- Kaggle Medical Mask Dataset [Download Link](https://www.kaggle.com/ivandanilovich/medical-masks-dataset-images-tfrecords)
 30 | 	- MAFA - MAsked FAces [Download Link](https://drive.google.com/drive/folders/1nbtM1n0--iZ3VVbNGhocxbnBGhMau_OG)
 31 | - Faces without Mask
 32 | 	- FDDB Dataset [Download Link](http://vis-www.cs.umass.edu/fddb/)
 33 | 	- WiderFace Dataset [Download Link](http://shuoyang1213.me/WIDERFACE/)
 34 | 
 35 | *Note: We do not use all the images from MAFA and WiderFace. Combining we will use about 6000 faces each with and without mask*
 36 | 
 37 | ## Steps to perform Face Detection with Mask:
 38 | 
 39 | - Install dependencies and Docker Container <br/>
 40 |   - On Training Machine with NVIDIA GPU:
 41 |       - Install NVIDIA Docker Container: [installation instructions](https://developer.nvidia.com/blog/gpu-containers-runtime/) [TLT Toolkit Requirements](https://docs.nvidia.com/metropolis/TLT/tlt-getting-started-guide/index.html#requirements) <br/>
 42 |       - [Running Transfer Learning Toolkit using Docker](https://ngc.nvidia.com/catalog/containers/nvidia:tlt-streamanalytics)
 43 |           - Pull docker container:<br/>
 44 |               ```docker pull nvcr.io/nvidia/tlt-streamanalytics:v2.0_py3```
 45 |           - Run the docker image:
 46 |               ```
 47 |               docker run --gpus all -it -v "/path/to/dir/on/host":"/path/to/dir/in/docker" \
 48 |                             -p 8888:8888 nvcr.io/nvidia/tlt-streamanalytics:v2.0_py3 /bin/bash
 49 |               ```
 50 |       - Clone Git repo in TLT container:
 51 |           ```
 52 |           git clone https://github.com/NVIDIA-AI-IOT/face-mask-detection.git
 53 |           ```
 54 |       - Install data conversion dependencies
 55 |           ```
 56 |           cd face-mask-detection
 57 |           python3 -m pip install -r requirements.txt
 58 |           ```
 59 |   - On NVIDIA Jetson:
 60 |       - [Install DeepStream](https://docs.nvidia.com/metropolis/deepstream/dev-guide/index.html#page/DeepStream_Development_Guide/deepstream_quick_start.html#wwpID0E0GI0HA)
 61 | 
 62 | - Prepare input data set (On training machine)
 63 |     - We expect downloaded data in [this](https://github.com/NVIDIA-AI-IOT/face-mask-detection/blob/master/data_utils/data-tree.txt) structure.
 64 |     - Convert data set to KITTI format
 65 |       ``` cd face-mask-detection ``` <br/>
 66 |       ```
 67 |       python3 data2kitti.py --kaggle-dataset-path <kaggle dataset absolute directory path> \
 68 |                                --mafa-dataset-path <mafa dataset absolute  directory path> \
 69 |                                --fddb-dataset-path < FDDB dataset absolute  directory path> \
 70 |                                --widerface-dataset-path <widerface dataset absolute  directory path> \
 71 |                                --kitti-base-path < Out directory for storing KITTI formatted annotations > \
 72 |                                --category-limit < Category Limit for Masked and No-Mask Faces > \
 73 |                                --tlt-input-dims_width < tlt input width > \
 74 |                                --tlt-input-dims_height <tlt input height > \
 75 |                                --train < for generating training dataset >
 76 |         ```
 77 | 
 78 |       You will see following output log:<br/>
 79 | 
 80 |       ```
 81 |         Kaggle Dataset: Total Mask faces: 4154 and No-Mask faces:790
 82 |         Total Mask Labelled:4154 and No-Mask Labelled:790
 83 | 
 84 |         MAFA Dataset: Total Mask faces: 1846 and No-Mask faces:232
 85 |         Total Mask Labelled:6000 and No-Mask Labelled:1022
 86 | 
 87 |         FDDB Dataset: Mask Labelled:0 and No-Mask Labelled:2845
 88 |         Total Mask Labelled:6000 and No-Mask Labelled:3867
 89 | 
 90 |         WideFace: Total Mask Labelled:0 and No-Mask Labelled:2134
 91 |         ----------------------------
 92 |         Final: Total Mask Labelled:6000
 93 |         Total No-Mask Labelled:6001
 94 |         ----------------------------
 95 |       ```
 96 |    *Note: You might get warnings; you can safely ignore it*
 97 | 
 98 |     
 99 | - Perform training using [TLT training flow](https://github.com/NVIDIA-AI-IOT/face-mask-detection#nvidia-transfer-learning-toolkit-tlt-training-flow-)
100 |     - Use ['face-mask-detection'](https://github.com/NVIDIA-AI-IOT/face-mask-detection/blob/master/face-mask-detection.ipynb) Jupyter Notebook provided with this repository. 
101 |     - Follow TLT training flow
102 | 
103 | - Perform inference using DeepStream SDK on Jetson
104 |     - Transfer model files (.etlt), if int8: calibration file (calibration.bin)
105 |     - Use config files from ```/ds_configs/*```
106 |         ``` $vi config_infer_primary_masknet.txt```
107 |         - Modify model and label paths: according to your directory locations<br/>
108 |             - Look for ``` tlt-encoded-model, labelfile-path, model-engine-file, int8-calib-file ``` <br/>
109 |         - Modify confidence_threshold, class-attributes according to training
110 |             - Look for ``` classifier-threshold, class-attrs ```
111 |     - Use ``` deepstream_config ``` files:
112 |         ``` $ vi deepstream_app_source1_masknet.txt ```
113 |         - Modify model file and config file paths:
114 |             - Look for ``` model-engine-file, config-file ``` under ```primary-gie```
115 |     - Use deepstream-app to deploy in real-time
116 |           ```$deepstream-app -c deepstream_app_source1_video_masknet_gpu.txt```<br/>
117 |     - We provide two different config files:
118 |         - DS running on GPU only with camera input: ```deepstream_app_source1__camera_masknet_gpu.txt ```
119 |         - DS running on GPU only with saved video input: ```deepstream_app_source1_video_masknet_gpu.txt ```
120 | 
121 | 
122 | *Note:*<br>
123 |     - ```model-engine-file``` is generated at first run; once done you can locate it in same directory as ```.etlt```
124 |     - In case you want to generate ```model-engine-file``` before first run; use [tlt-converter](https://docs.nvidia.com/metropolis/TLT/tlt-getting-started-guide/index.html#gen_eng_tlt_converter)
125 | 
126 | 
127 | ## Evaluation Results on NVIDIA Jetson Platform
128 | 
129 | <table cellspacing="0" border="0">
130 | 	<colgroup span="7" width="107"></colgroup>
131 | 	<tr>
132 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" rowspan=2 height="100" align="center" valign=middle bgcolor="#76B900"><b><font color="#000000">Pruned</font></b></td>
133 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" rowspan=2 align="center" valign=middle bgcolor="#76B900"><b><font color="#000000">mAP       (Mask/No-Mask)<br>(%)</font></b></td>
134 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle bgcolor="#76B900"><b><font color="#000000">Inference Evaluations on Nano</font></b></td>
135 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" colspan=2 align="center" valign=middle bgcolor="#76B900"><b><font color="#000000">Inference Evaluations on Xavier NX</font></b></td>
136 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" colspan=2 align="center" valign=middle bgcolor="#76B900"><b><font color="#000000">Inference Evaluations on Xavier</font></b></td>
137 | 		</tr>
138 | 	<tr>
139 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle bgcolor="#76B900"><b><font color="#000000">GPU<br>(FPS)</font></b></td>
140 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle bgcolor="#76B900"><b><font color="#000000">GPU<br>(FPS)</font></b></td>
141 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle bgcolor="#76B900"><b><font color="#000000">DLA<br>(FPS)</font></b></td>
142 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle bgcolor="#76B900"><b><font color="#000000">GPU<br>(FPS)</font></b></td>
143 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle bgcolor="#76B900"><b><font color="#000000">DLA<br>(FPS)</font></b></td>
144 | 	</tr>
145 | 	<tr>
146 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" height="38" align="center" valign=middle><font color="#000000">No</font></td>
147 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle><font color="#000000">86.12 (87.59, 84.65)</font></td>
148 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle sdval="6.5" sdnum="1033;"><font color="#000000">6.5</font></td>
149 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle sdval="125.36" sdnum="1033;"><font color="#000000">125.36</font></td>
150 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle sdval="30.31" sdnum="1033;"><font color="#000000">30.31</font></td>
151 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle sdval="269.04" sdnum="1033;"><font color="#000000">269.04</font></td>
152 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle sdval="61.96" sdnum="1033;"><font color="#000000">61.96</font></td>
153 | 	</tr>
154 | 	<tr>
155 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" height="38" align="center" valign=middle><font color="#000000">Yes (12%**)</font></td>
156 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle><font color="#000000">85.50 (86.72, 84.27)</font></td>
157 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle sdval="21.25" sdnum="1033;"><font color="#000000">21.25</font></td>
158 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle sdval="279" sdnum="1033;"><font color="#000000">279</font></td>
159 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle sdval="116.2" sdnum="1033;"><font color="#000000">116.2</font></td>
160 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle sdval="508.32" sdnum="1033;"><font color="#000000">508.32</font></td>
161 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="center" valign=middle sdval="155.5" sdnum="1033;"><font color="#000000">155.5</font></td>
162 | 	</tr>
163 | </table>
164 | 
165 | ## NVIDIA Transfer Learning Toolkit (TLT) Training Flow <br/>
166 | 1. Download Pre-trained model ( For Mask Detection application, we have experimented with Detectnet_v2 with ResNet18 backbone)
167 | 2. Convert dataset to KITTI format
168 | 3. Train Model (tlt-train)
169 | 4. Evaluate on validation data or infer on test images (tlt-evaluate,  tlt-infer)
170 | 5. Prune trained model (tlt-prune)<br/>
171 |    Pruning model will help you to reduce parameter count thus improving FPS performance
172 | 6. Retrain pruned model (tlt-train)
173 | 7. Evaluate re-trained model on validation data (tlt-evaluate)
174 | 8. If accuracy does not fall below satisfactory range in (7); perform step (5), (6), (7); else go to step (9)
175 | 9. Export trained model from step (6) (tlt-export)<br/>
176 |    Choose int8, fp16 based on you platform needs; such as Jetson Xavier and Jetson Xavier-NX has int8 DLA support
177 | 
178 | ### Interesting Resources
179 | - [Transfer Learning Toolkit (TLT) Getting Started](https://developer.nvidia.com/tlt-getting-started)
180 | - [Pruning Models with NVIDIA Transfer Learning Toolkit](https://developer.nvidia.com/blog/transfer-learning-toolkit-pruning-intelligent-video-analytics/)
181 | 
182 | ### References
183 | - Evan Danilovich (2020 March). Medical Masks Dataset. Version 1. Retrieved May 14, 2020 from https://www.kaggle.com/ivandanilovich/medical-masks-dataset
184 | - Shiming Ge, Jia Li, Qiting Ye, Zhao Luo; "Detecting Masked Faces in the Wild With LLE-CNNs", Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2017, pp. 2682-2690
185 | - Vidit Jain and Erik Learned-Miller. "FDDB: A Benchmark for Face Detection in Unconstrained Settings". Technical Report UM-CS-2010-009, Dept. of Computer Science, University of Massachusetts, Amherst. 2010
186 | - Yang, Shuo and Luo, Ping and Loy, Chen Change and Tang, Xiaoou; "WIDER FACE: A Face Detection Benchmark", IEEE Conference on Computer Vision and Pattern Recognition (CVPR),2016
187 | - MAFA Dataset Google Link: Courtesy [aome510](https://github.com/aome510/Mask-Classifier)
188 | 
189 | 


--------------------------------------------------------------------------------
/data2kitti.py:
--------------------------------------------------------------------------------
  1 | from data_utils.widerface2kitti import widerFace2kitti
  2 | from data_utils.mafa2kitti import mafa2kitti
  3 | from data_utils.fddb2kitti import fddb2kitti
  4 | from data_utils.kaggle2kitti import kaggle2kitti
  5 | from data_utils.argparser_data2kitti import argparser_data2kitti
  6 | from data_utils.check_labels import test_labels
  7 | import os
  8 | 
  9 | def main():
 10 |     # Set Parameters
 11 |     arg_parser = argparser_data2kitti()
 12 |     args = arg_parser.make_args()
 13 |     # Datasets for Masked Faces
 14 |     kaggle_base_dir = args.kaggle_dataset_path # Use all data from Kaggle
 15 |     mafa_base_dir = args.mafa_dataset_path # Use only about 4000 images from MAFA
 16 |     # Datasets for No-Masked Faces
 17 |     fddb_base_dir = args.fddb_dataset_path # Use all data from FDDB
 18 |     widerface_base_dir = args.widerface_dataset_path # Use only from selected sub-folders
 19 |     ''' Note: Kaggle, FDDB Data sets does not have validation data thus we use all data for training '''
 20 |     # Store Converted annotations in KITTI format
 21 |     kitti_base_dir = args.kitti_base_path
 22 |     category_limit = [args.category_limit, args.category_limit]  # Mask / No-Mask Limits
 23 |     kitti_resize_dims = (args.tlt_input_dims_width, args.tlt_input_dims_height)  # Default for DetectNet-v2 : Look at TLT model requirements
 24 | 
 25 |     total_masks, total_no_masks = 0, 0
 26 |     count_masks, count_no_masks = 0, 0
 27 |     # To check if labels are converted in right format
 28 |     if args.check_labels:
 29 |         # Check from train directory
 30 |         test_labels(kitti_base_dir=kitti_base_dir + '/train/', file_name=args.label_filename)
 31 |     else:
 32 |         # ----------------------------------------
 33 |         # Kaggle Dataset Conversion
 34 |         # ----------------------------------------
 35 |         if args.train:
 36 |             images_dir = os.path.join(kaggle_base_dir, 'images') #r'C:\Users\ameykulkarni\Downloads\527030_966454_bundle_archive\images'
 37 |             labels_dir = os.path.join(kaggle_base_dir, 'labels') #r'C:\Users\ameykulkarni\Downloads\527030_966454_bundle_archive\labels'
 38 |             medical_mask2kitti = kaggle2kitti(images_dir=images_dir, labels_dir=labels_dir,
 39 |                                               category_limit=category_limit,
 40 |                                               kitti_base_dir=kitti_base_dir, kitti_resize_dims=kitti_resize_dims)
 41 |             count_masks, count_no_masks = medical_mask2kitti.get_data_attributes()
 42 |         # ----------------------------------------
 43 |         # MAFA Dataset Conversion
 44 |         # ----------------------------------------
 45 |         if args.train:
 46 |             annotation_file = os.path.join(mafa_base_dir, 'MAFA-Label-Train/LabelTrainAll.mat')
 47 |             mafa_base_dir = os.path.join(mafa_base_dir, 'train-images/images')
 48 |         if args.val:
 49 |             annotation_file = os.path.join(mafa_base_dir, 'MAFA-Label-Test/LabelTestAll.mat')
 50 |             mafa_base_dir = os.path.join(mafa_base_dir, 'test-images/images')
 51 | 
 52 |         total_masks += count_masks
 53 |         total_no_masks += count_no_masks
 54 |         print("Total Mask Labelled:{} and No-Mask Labelled:{}".format(total_masks, total_no_masks))
 55 |         category_limit_mod = [category_limit[0] - total_masks, category_limit[1] - total_no_masks]
 56 | 
 57 |         kitti_label = mafa2kitti(annotation_file=annotation_file, mafa_base_dir=mafa_base_dir,
 58 |                                  kitti_base_dir=kitti_base_dir, kitti_resize_dims=kitti_resize_dims,
 59 |                                  category_limit=category_limit_mod, train=args.train)
 60 |         count_masks, count_no_masks = kitti_label.mat2data()
 61 | 
 62 |         # ----------------------------------------
 63 |         # FDDB Dataset Conversion
 64 |         # ----------------------------------------
 65 |         if args.train:
 66 |             # Modifying category limit based on FDDB
 67 |             total_masks += count_masks
 68 |             total_no_masks += count_no_masks
 69 |             print("Total Mask Labelled:{} and No-Mask Labelled:{}".format(total_masks, total_no_masks))
 70 |             category_limit_mod = [category_limit[0]-total_masks, category_limit[1]-total_no_masks]
 71 |             annotation_path = os.path.join(fddb_base_dir, 'FDDB-folds') #r'C:\Users\ameykulkarni\Downloads\FDDB-folds\FDDB-folds'
 72 |             kitti_label = fddb2kitti(annotation_path=annotation_path, fddb_base_dir=fddb_base_dir,
 73 |                                      kitti_base_dir=kitti_base_dir, kitti_resize_dims=kitti_resize_dims,
 74 |                                      category_limit=category_limit_mod)
 75 |             count_masks, count_no_masks = kitti_label.fddb_data()
 76 | 
 77 |         # ----------------------------------------
 78 |         # Wider-Face Dataset Conversion
 79 |         # ----------------------------------------
 80 |         total_masks += count_masks
 81 |         total_no_masks += count_no_masks
 82 |         print("Total Mask Labelled:{} and No-Mask Labelled:{}".format(total_masks, total_no_masks))
 83 |         category_limit_mod = [category_limit[0] - total_masks, category_limit[1] - total_no_masks]
 84 | 
 85 |         if args.train:
 86 |             annotation_file = os.path.join(widerface_base_dir, 'wider_face_split/wider_face_train.mat')
 87 |             widerFace_base_dir = os.path.join(widerface_base_dir, 'WIDER_train/images')
 88 |         if args.val:
 89 |             # Modify this
 90 |             annotation_file = os.path.join(widerface_base_dir, 'wider_face_split/wider_face_val.mat')
 91 |             widerFace_base_dir = os.path.join(widerface_base_dir, 'WIDER_val/images')
 92 | 
 93 |         kitti_label = widerFace2kitti(annotation_file=annotation_file, widerFace_base_dir=widerFace_base_dir,
 94 |                                       kitti_base_dir=kitti_base_dir, kitti_resize_dims=kitti_resize_dims,
 95 |                                       category_limit=category_limit_mod, train=args.train)
 96 |         count_masks, count_no_masks = kitti_label.mat2data()
 97 |         total_masks += count_masks
 98 |         total_no_masks += count_no_masks
 99 |         print("----------------------------")
100 |         print("Final: Total Mask Labelled:{}\nTotal No-Mask Labelled:{}".format(total_masks, total_no_masks))
101 |         print("----------------------------")
102 | 
103 | 
104 | if __name__ == '__main__':
105 |     main()
106 | 


--------------------------------------------------------------------------------
/data_utils/argparser_data2kitti.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | class argparser_data2kitti():
 4 |     def __init__(self):
 5 |         self.parser = argparse.ArgumentParser(description='')
 6 |         self.parser.add_argument('--kaggle-dataset-path', dest='kaggle_dataset_path',
 7 |                                  help='path to kaggle dataset train and validation images', type=str)
 8 |         self.parser.add_argument('--mafa-dataset-path', dest='mafa_dataset_path',
 9 |                                  help='path to MAFA dataset train and validation images', type=str)
10 |         self.parser.add_argument('--fddb-dataset-path', dest='fddb_dataset_path', help='path to fddb dataset train and validation images', type=str)
11 |         self.parser.add_argument('--widerface-dataset-path', dest='widerface_dataset_path', help='path to widerface dataset train and validation images', type=str)
12 |         self.parser.add_argument('--kitti-base-path', dest='kitti_base_path',
13 |                                  help='path to save converted data set', type=str)
14 |         self.parser.add_argument('--category-limit', dest='category_limit', default=6000,
15 |                                  help='data limit for TLT', type=int)
16 |         self.parser.add_argument('--tlt-input-dims_width', dest='tlt_input_dims_width', default=960,
17 |                                  help = 'TLT input dimensions', type = int)
18 |         self.parser.add_argument('--tlt-input-dims_height', dest='tlt_input_dims_height', default=544,
19 |                                  help='TLT input dimensions', type=int)
20 |         self.parser.add_argument('--label_filename', dest='label_filename', default='000_1OC3DT',
21 |                                  help='File name for label checking', type=str)
22 |         data_group = self.parser.add_mutually_exclusive_group()
23 |         data_group.add_argument('--train', dest='train', help='Convert Training dataset to KITTI', action='store_true')
24 |         data_group.add_argument('--val', dest='val', help='Convert validation dataset to KITTI', action='store_true')
25 |         data_group.add_argument('--check_labels', dest='check_labels', help='Check if Converted dataset is right', action='store_true')
26 | 
27 |     def make_args(self):
28 |         return self.parser.parse_args()


--------------------------------------------------------------------------------
/data_utils/check_labels.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from PIL import Image, ImageDraw
 3 | 
 4 | def test_labels(kitti_base_dir, file_name):
 5 |     img = Image.open(os.path.join(kitti_base_dir+'images/', file_name + '.jpg'))
 6 |     text_file = open(os.path.join(kitti_base_dir+'labels/', file_name + '.txt'), 'r')
 7 |     bbox = []
 8 |     category = []
 9 |     for line in text_file:
10 |         features = line.split()
11 |         bbox.append([float(features[4]), float(features[5]), float(features[6]), float(features[7])])
12 |         category.append(features[0])
13 |     print("Bounding Box", bbox)
14 |     print("Category:", category)
15 |     i = 0
16 |     for bb in bbox:
17 |         draw_img = ImageDraw.Draw(img)
18 |         shape = ((bb[0], bb[1]), (bb[2], bb[3]))
19 |         if category[i] == 'No-Mask':
20 |             outline_clr = "red"
21 |         elif category[i] == 'Mask':
22 |             outline_clr = "green"
23 |         draw_img.rectangle(shape, fill=None, outline=outline_clr, width=4)
24 |         i += 1
25 |     img.show()


--------------------------------------------------------------------------------
/data_utils/data-tree.txt:
--------------------------------------------------------------------------------
 1 | face-mask-detection-data
 2 | ├───MAFA Dataset                                      
 3 | │       ├───MAFA-Label-Test                                    
 4 | │       │       LabelTestAll.mat                                   
 5 | │       │       readme-test.txt                                
 6 | │       │                                                           
 7 | │       ├───MAFA-Label-Train                       
 8 | │       │       LabelTrainAll.mat                      
 9 | │       │       readme-train.txt                       
10 | │       │                                              
11 | │       │                                              
12 | │       ├───test-images                                
13 | │       │   └───images                                     
14 | │       │           test_00000001.jpg                          
15 | │       │           .........                                  
16 | │       │           .........                                      
17 | │       │           test_00004935.jpg                                  
18 | │       │                                                              
19 | │       ├───train-images                                               
20 | │           └───images                                             
21 | │                   train_00000001.jpg                                             
22 | │                   .........                                                          
23 | │                   .........                                                                              
24 | │                   train_00025876.jpg                     
25 | │                                        
26 | │
27 | ├───FDDB Dataset                        
28 | │    ├───2002                            
29 | │    ├───2003                            
30 | │    ├───FDDB-folds                      
31 | │            FDDB-fold-01-ellipseList.txt
32 | │            FDDB-fold-01.txt            
33 | │            ..........                  
34 | │            ..........                  
35 | │            FDDB-fold-10-ellipseList.txt
36 | │            FDDB-fold-10.txt            
37 | │
38 | │        
39 | ├───Kaggle Medical Mask Dataset
40 | │    ├───images
41 | │    │       -1x-1.jpg
42 | │    │       .........
43 | │    │       .........
44 | │    │       w1240-p16x9-fa978043deff83fed485af12d16e39c61398fc30.jpg
45 | │    │       W37H3GWNO5EV5HTJQNP4KLIRGI.jpg
46 | │    │       
47 | │    └───labels
48 | │            -1x-1.xml
49 | │            .........
50 | │            .........
51 | │            w1240-p16x9-fa978043deff83fed485af12d16e39c61398fc30.xml
52 | │            W37H3GWNO5EV5HTJQNP4KLIRGI.xml
53 | │
54 | ├───Wider Face Dataset 
55 |     ├───wider_face_split
56 |     │       readme.txt
57 |     │       wider_face_test.mat
58 |     │       wider_face_test_filelist.txt
59 |     │       wider_face_train.mat
60 |     │       wider_face_train_bbx_gt.txt
61 |     │       wider_face_val.mat
62 |     │       wider_face_val_bbx_gt.txt
63 |     │       
64 |     ├───WIDER_train
65 |     │   └───images
66 |     ├───WIDER_val
67 |     │   └───images


--------------------------------------------------------------------------------
/data_utils/fddb2kitti.py:
--------------------------------------------------------------------------------
  1 | import scipy.io
  2 | import os
  3 | from PIL import Image
  4 | import math
  5 | import re
  6 | import numpy as np
  7 | 
  8 | class fddb2kitti():
  9 |     def __init__(self, annotation_path, fddb_base_dir, kitti_base_dir, kitti_resize_dims, category_limit):
 10 |         self.annot_path = annotation_path
 11 |         self.kitti_base_dir = kitti_base_dir
 12 |         self.fddb_base_dir = fddb_base_dir
 13 |         self.count_mask = category_limit[0]
 14 |         self.count_no_mask = category_limit[1]
 15 |         self.kitti_resize_dims = kitti_resize_dims
 16 |         try:
 17 |             os.makedirs(self.kitti_base_dir+'/train/images',mode=0o777)
 18 |         except FileExistsError:
 19 |             print("Directory Already Exists")
 20 |         self.kitti_images = os.path.join(self.kitti_base_dir, 'train/images')
 21 |         try:
 22 |             os.makedirs(self.kitti_base_dir+ '/train/labels',mode=0o777)
 23 |         except FileExistsError:
 24 |             print("Directory Already Exists")
 25 |         self.kitti_labels = os.path.join(self.kitti_base_dir, 'train/labels')
 26 | 
 27 |     def ellipese2bbox(self, face_annotations):
 28 |         major_axis_radius = int(float(face_annotations[0]))
 29 |         minor_axis_radius = int(float(face_annotations[1]))
 30 |         angle = int(float(face_annotations[2]))
 31 |         center_x = int(float(face_annotations[3]))
 32 |         center_y = int(float(face_annotations[4]))
 33 | 
 34 |         cosin = math.cos(math.radians(-angle))
 35 |         sin = math.sin(math.radians(-angle))
 36 | 
 37 |         x1 = cosin * (-minor_axis_radius) - sin * (-major_axis_radius) + center_x
 38 |         y1 = sin * (-minor_axis_radius) + cosin * (-major_axis_radius) + center_y
 39 |         x2 = cosin * (minor_axis_radius) - sin * (-major_axis_radius) + center_x
 40 |         y2 = sin * (minor_axis_radius) + cosin * (-major_axis_radius) + center_y
 41 |         x3 = cosin * (minor_axis_radius) - sin * (major_axis_radius) + center_x
 42 |         y3 = sin * (minor_axis_radius) + cosin * (major_axis_radius) + center_y
 43 |         x4 = cosin * (-minor_axis_radius) - sin * (major_axis_radius) + center_x
 44 |         y4 = sin * (-minor_axis_radius) + cosin * (major_axis_radius) + center_y
 45 | 
 46 |         '''pts = cv.ellipse2Poly((center_x, center_y), (major_axis_radius, minor_axis_radius), angle, 0, 360, 10)
 47 |         rect = cv.boundingRect(pts)'''
 48 |         x_cords = [x1, x2, x3, x4]
 49 |         y_cords = [y1, y2, y3, y4]
 50 |         x_min = min(x_cords)
 51 |         x_max = max(x_cords)
 52 |         y_min = min(y_cords)
 53 |         y_max = max(y_cords)
 54 |         left = x_min
 55 |         top = y_min
 56 |         right = x_max
 57 |         bottom = y_max
 58 |         width = right - left + 1
 59 |         height = bottom - top + 1
 60 |         return left, top, width, height
 61 | 
 62 |     def make_labels(self, image_name, category_names, bboxes):
 63 |         # Process image
 64 |         file_image = os.path.splitext(os.path.split(image_name)[1])[0]
 65 |         img = Image.open(os.path.join(self.fddb_base_dir, image_name)).convert("RGB")
 66 |         resize_img = img.resize(self.kitti_resize_dims)
 67 |         resize_img.save(os.path.join(self.kitti_images, file_image + '.jpg'), 'JPEG')
 68 |         # Process labels
 69 |         with open(os.path.join(self.kitti_labels, file_image + '.txt'), 'w') as label_file:
 70 |             for i in range(0, len(bboxes)):
 71 |                 resized_bbox = self.resize_bbox(img=img, bbox=bboxes[i], dims=self.kitti_resize_dims)
 72 |                 out_str = [category_names[i].replace(" ", "")
 73 |                            + ' ' + ' '.join(['0'] * 1)
 74 |                            + ' ' + ' '.join(['0'] * 2)
 75 |                            + ' ' + ' '.join([b for b in resized_bbox])
 76 |                            + ' ' + ' '.join(['0'] * 7)
 77 |                            + '\n']
 78 |                 label_file.write(out_str[0])
 79 | 
 80 |     def resize_bbox(self, img, bbox, dims):
 81 |         img_w, img_h = img.size
 82 |         x_min, y_min, x_max, y_max = bbox
 83 |         ratio_w, ratio_h = dims[0] / img_w, dims[1] / img_h
 84 |         new_bbox = [str(int(np.round(x_min * ratio_w))), str(int(np.round(y_min * ratio_h))), str(int(np.round(x_max * ratio_w))),
 85 |                     str(int(np.round(y_max * ratio_h)))]
 86 |         return new_bbox
 87 | 
 88 |     def fddb_data(self):
 89 |         _count_mask, _count_no_mask = 0, 0
 90 |         for root, dirs, files in os.walk(self.annot_path):
 91 |             for file in files:
 92 |                 if file.endswith('ellipseList.txt'):
 93 |                     file_name = os.path.join(root, file)
 94 |                     _count_mask, _count_no_mask = self.mat2data(read_file=file_name,
 95 |                                                                 _count_no_mask=_count_no_mask,
 96 |                                                                 _count_mask = _count_mask)
 97 |         print("FDDB Dataset: Mask Labelled:{} and No-Mask Labelled:{}".format(_count_mask, _count_no_mask))
 98 |         return _count_mask, _count_no_mask
 99 | 
100 |     def mat2data(self, read_file, _count_no_mask, _count_mask):
101 |         strings = ("2002/", "2003/")
102 |         with open(read_file, 'r') as f:
103 |             lines = f.readlines()
104 |             for i in range(0, len(lines)):
105 |                 line = lines[i]
106 |                 if any(s in line for s in strings) and _count_no_mask < self.count_no_mask:
107 |                     image_file_location = line.strip('\n')
108 |                     num_faces_line = re.search(r"(\d+).*?", lines[i + 1])
109 |                     num_faces = int(num_faces_line.group(1))
110 |                     image_name = image_file_location + '.jpg'
111 |                     category_name = 'No-Mask'
112 |                     bboxes = []
113 |                     category_names = []
114 |                     for j in range(1, num_faces + 1):
115 |                         annot_line = str(lines[i + j + 1])
116 |                         faces = annot_line.split()
117 |                         left, top, width, height = self.ellipese2bbox(face_annotations=faces[0:5])
118 |                         bbox = [left, top, width+left, top+height]
119 |                         bboxes.append(bbox)
120 |                         category_names.append(category_name)
121 |                     if bboxes:
122 |                         self.make_labels(image_name=image_name, category_names=category_names, bboxes=bboxes)
123 |                     _count_no_mask+=1
124 |         return _count_mask, _count_no_mask
125 | 
126 | def main():
127 |     fddb_base_dir = '/home/nvidia/face-mask-detection/datasets/fddb' 
128 |     annotation_path = '/home/nvidia/face-mask-detection/datasets/fddb/FDDB-folds'
129 |     kitti_base_dir = '/home/nvidia/face-mask-detection/datasets/KITTI_1024'
130 | 
131 |     category_limit = [1000, 1000] # Mask / No-Mask Limits
132 |     kitti_resize_dims = (960, 544) # Look at TLT model requirements
133 |     kitti_label = fddb2kitti(annotation_path=annotation_path, fddb_base_dir=fddb_base_dir,
134 |                              kitti_base_dir=kitti_base_dir, kitti_resize_dims=kitti_resize_dims,
135 |                              category_limit=category_limit)
136 |     count_masks, count_no_masks = kitti_label.fddb_data()
137 | 
138 | if __name__ == '__main__':
139 |     main()
140 | 


--------------------------------------------------------------------------------
/data_utils/kaggle2kitti.py:
--------------------------------------------------------------------------------
  1 | from xml.etree import ElementTree
  2 | from PIL import Image, ImageDraw, ImageFont
  3 | import os
  4 | import numpy as np
  5 | 
  6 | class kaggle2kitti():
  7 |     def __init__(self, images_dir, labels_dir, kitti_base_dir, kitti_resize_dims, category_limit):
  8 |         self.images_dir = images_dir
  9 |         self.labels_dir = labels_dir
 10 |         self.count_mask = category_limit[0]
 11 |         self.count_no_mask = category_limit[1]
 12 |         self.kitti_base_dir = kitti_base_dir
 13 |         self.kitti_resize_dims = kitti_resize_dims
 14 |         try:
 15 |             os.makedirs(self.kitti_base_dir+'/train/images',mode=0o777)
 16 |         except:
 17 |             print("Directory Already Exists")
 18 |         self.kitti_images = os.path.join(self.kitti_base_dir, 'train/images')
 19 |         try:
 20 |             os.makedirs(self.kitti_base_dir+ '/train/labels',mode=0o777)
 21 |         except:
 22 |             print("Directory Already Exists")
 23 |         self.kitti_labels = os.path.join(self.kitti_base_dir, 'train/labels')
 24 |     def get_image_metafile(self, image_file):
 25 |         image_name = os.path.splitext(image_file)[0]
 26 |         return os.path.join(self.labels_dir, str(image_name+'.xml'))
 27 | 
 28 |     def make_labels(self, image_name, category_names, bboxes):
 29 |         # Process image
 30 |         file_image = os.path.splitext(image_name)[0]
 31 |         img = Image.open(os.path.join(self.images_dir, image_name)).convert("RGB")
 32 |         resize_img = img.resize(self.kitti_resize_dims)
 33 |         resize_img.save(os.path.join(self.kitti_images, file_image + '.jpg'), 'JPEG')
 34 |         # Process labels
 35 |         with open(os.path.join(self.kitti_labels, file_image + '.txt'), 'w') as label_file:
 36 |             for i in range(0, len(bboxes)):
 37 |                 resized_bbox = self.resize_bbox(img=img, bbox=bboxes[i], dims=self.kitti_resize_dims)
 38 |                 out_str = [category_names[i].replace(" ", "")
 39 |                            + ' ' + ' '.join(['0'] * 1)
 40 |                            + ' ' + ' '.join(['0'] * 2)
 41 |                            + ' ' + ' '.join([b for b in resized_bbox])
 42 |                            + ' ' + ' '.join(['0'] * 7)
 43 |                            + '\n']
 44 |                 label_file.write(out_str[0])
 45 | 
 46 |     def resize_bbox(self, img, bbox, dims):
 47 |         img_w, img_h = img.size
 48 |         x_min, y_min, x_max, y_max = bbox
 49 |         ratio_w, ratio_h = dims[0] / img_w, dims[1] / img_h
 50 |         new_bbox = [str(int(np.round(x_min * ratio_w))), str(int(np.round(y_min * ratio_h))), str(int(np.round(x_max * ratio_w))),
 51 |                     str(int(np.round(y_max * ratio_h)))]
 52 |         return new_bbox
 53 | 
 54 |     def get_data_attributes(self):
 55 |         image_extensions = ['.jpeg', '.jpg', '.png']
 56 |         _count_mask = 0
 57 |         _count_no_mask = 0
 58 |         for image_name in os.listdir(self.images_dir):
 59 |             if image_name.endswith('.jpeg') or image_name.endswith('.jpg') or image_name.endswith('.png'):
 60 |                 labels_xml = self.get_image_metafile(image_file=image_name)
 61 |                 if os.path.isfile(labels_xml):
 62 |                     labels = ElementTree.parse(labels_xml).getroot()
 63 |                     bboxes = []
 64 |                     categories = []
 65 |                     for object_tag in labels.findall("object"):
 66 |                         cat_name = object_tag.find("name").text
 67 | 
 68 |                         if (cat_name == 'mask'):
 69 |                             category = 'Mask'
 70 |                             xmin = int(object_tag.find("bndbox/xmin").text)
 71 |                             xmax = int(object_tag.find("bndbox/xmax").text)
 72 |                             ymin = int(object_tag.find("bndbox/ymin").text)
 73 |                             ymax = int(object_tag.find("bndbox/ymax").text)
 74 |                             bbox = [xmin, ymin, xmax, ymax]
 75 |                             categories.append(category)
 76 |                             bboxes.append(bbox)
 77 |                             _count_mask += 1
 78 |                         elif cat_name == 'none':
 79 |                             category = 'No-Mask'
 80 |                             xmin = int(object_tag.find("bndbox/xmin").text)
 81 |                             xmax = int(object_tag.find("bndbox/xmax").text)
 82 |                             ymin = int(object_tag.find("bndbox/ymin").text)
 83 |                             ymax = int(object_tag.find("bndbox/ymax").text)
 84 |                             bbox = [xmin, ymin, xmax, ymax]
 85 |                             categories.append(category)
 86 |                             bboxes.append(bbox)
 87 |                             _count_no_mask += 1
 88 |                     if bboxes:
 89 |                         self.make_labels(image_name=image_name, category_names=categories, bboxes=bboxes)
 90 |         print("Kaggle Dataset: Total Mask faces: {} and No-Mask faces:{}".format(_count_mask, _count_no_mask))
 91 |         return _count_mask, _count_no_mask
 92 | 
 93 | 
 94 | def main():
 95 |     images_dir = '/home/nvidia/face-mask-detection/datasets/medical-masks-dataset/images'
 96 |     labels_dir = '/home/nvidia/face-mask-detection/datasets/medical-masks-dataset/labels'
 97 |     kitti_base_dir = '/home/nvidia/face-mask-detection/datasets/medical-masks-dataset/KITTI_1024'
 98 |     kitti_resize_dims = (960,544)
 99 |     category_limit = [10,10]
100 |     medical_mask2kitti = kaggle2kitti(images_dir=images_dir, labels_dir=labels_dir,
101 |                                       category_limit=category_limit,
102 |                                       kitti_base_dir=kitti_base_dir, kitti_resize_dims=kitti_resize_dims)
103 |     medical_mask2kitti.get_data_attributes()
104 | 
105 | 
106 | if __name__ == '__main__':
107 |     main()
108 | 


--------------------------------------------------------------------------------
/data_utils/mafa2kitti.py:
--------------------------------------------------------------------------------
  1 | import scipy.io
  2 | import os
  3 | from PIL import Image, ImageDraw
  4 | import numpy as np
  5 | 
  6 | class mafa2kitti():
  7 |     def __init__(self, annotation_file, mafa_base_dir, kitti_base_dir, kitti_resize_dims, category_limit, train):
  8 |         self.annotation_file = annotation_file
  9 |         self.data = scipy.io.loadmat(self.annotation_file)
 10 |         self.kitti_base_dir = kitti_base_dir
 11 |         self.mafa_base_dir = mafa_base_dir
 12 |         self.count_mask = category_limit[0]
 13 |         self.count_no_mask = category_limit[1]
 14 |         self.kitti_resize_dims = kitti_resize_dims
 15 |         self.train = train
 16 |         if self.train:
 17 |             self.len_dataset = len(self.data["label_train"][0])
 18 |             try:
 19 |                 os.makedirs(self.kitti_base_dir+'/train/images',mode=0o777)
 20 |             except FileExistsError:
 21 |                 print("Directory Already Exists")
 22 |             self.kitti_images = os.path.join(self.kitti_base_dir, 'train/images')
 23 |             try:
 24 |                 os.makedirs(self.kitti_base_dir+ '/train/labels',mode=0o777)
 25 |             except FileExistsError:
 26 |                 print("Directory Already Exists")
 27 |             self.kitti_labels = os.path.join(self.kitti_base_dir, 'train/labels')
 28 |         else:
 29 |             self.len_dataset = len(self.data["LabelTest"][0])
 30 |             try:
 31 |                 os.makedirs(self.kitti_base_dir+'/test/images',mode=0o777)
 32 |             except FileExistsError:
 33 |                 print("Directory Already Exists")
 34 |             self.kitti_images = os.path.join(self.kitti_base_dir, 'test/images')
 35 |             try:
 36 |                 os.makedirs(self.kitti_base_dir+'/test/labels',mode=0o777)
 37 |             except FileExistsError:
 38 |                 print("Directory Already Exists")
 39 |             self.kitti_labels = os.path.join(self.kitti_base_dir, 'test/labels')
 40 | 
 41 |     def extract_labels(self, i, train_flag, _count_mask, _count_no_mask):
 42 |         if train_flag:
 43 |             train_image = self.data["label_train"][0][i]
 44 |             train_image_name = str(train_image[1]).strip("['']")  # Test [0]
 45 |             categories = []
 46 |             bboxes = []
 47 |             for i in range(0, len(train_image[2])):
 48 |                 _bbox_label = train_image[2][i]  # Test[1][0]
 49 |                 _category_id = _bbox_label[12]  # Occ_Type: For Train: 13th, 10th in Test
 50 |                 _occulution_degree = _bbox_label[13]
 51 |                 bbox = [_bbox_label[0], _bbox_label[1], _bbox_label[0]+_bbox_label[2], _bbox_label[1]+_bbox_label[3]]
 52 |                 if (_category_id != 3 and _occulution_degree > 2) and (_count_mask < self.count_mask):
 53 |                     category_name = 'Mask'  # Faces with Mask
 54 |                     _count_mask += 1
 55 |                     count = 0
 56 |                     categories.append(category_name)
 57 |                     bboxes.append(bbox)
 58 |                 elif (_category_id==3 and _occulution_degree<2) and (_count_no_mask < self.count_no_mask):
 59 |                     category_name = 'No-Mask'  # Faces with Mask
 60 |                     _count_no_mask += 1
 61 |                     count = 0
 62 |                     categories.append(category_name)
 63 |                     bboxes.append(bbox)
 64 |             if bboxes:
 65 |                 if not self.check_image_dims(image_name=train_image_name):
 66 |                     self.make_labels(image_name=train_image_name, category_names=categories,
 67 |                                              bboxes=bboxes)
 68 | 
 69 |         else:
 70 |             test_image = self.data["LabelTest"][0][i]
 71 |             test_image_name = str(test_image[0]).strip("['']")  # Test [0]
 72 |             categories = []
 73 |             bboxes = []
 74 |             for i in range(0, len(test_image[1])):
 75 |                 _bbox_label = test_image[1][i]  # Test[1][0]
 76 |                 # Occ_Type: For Train: 13th, 10th in Test
 77 |                 # In test Data: refer to Face_type, 5th
 78 |                 _face_type = _bbox_label[4] # Face Type
 79 |                 _occ_type = _bbox_label[9]
 80 |                 _occ_degree = _bbox_label[10]
 81 |                 bbox = [_bbox_label[0], _bbox_label[1], _bbox_label[0] + _bbox_label[2], _bbox_label[1] + _bbox_label[3]]
 82 |                 if (_face_type==1 and _occ_type!=3 and _occ_degree > 2) and _count_mask < self.count_mask:
 83 |                     category_name = 'Mask'
 84 |                     bboxes.append(bbox)
 85 |                     categories.append(category_name)
 86 |                     _count_mask+=1
 87 |                 elif (_face_type==2) and _count_mask < self.count_mask:
 88 |                     category_name = 'No-Mask'
 89 |                     bboxes.append(bbox)
 90 |                     categories.append(category_name)
 91 |                     _count_no_mask+1
 92 |             if bboxes:
 93 |                 if not self.check_image_dims(image_name=test_image_name):
 94 |                     self.make_labels(image_name=test_image_name, category_names=categories, bboxes=bboxes)
 95 |         return _count_mask, _count_no_mask
 96 | 
 97 |     def check_image_dims(self, image_name):
 98 |         file_name=os.path.join(self.mafa_base_dir, image_name)
 99 |         img = Image.open(file_name).convert("RGB")
100 |         img_w, img_h = img.size
101 |         if img_w < img_h:
102 |             return True
103 |         return False
104 | 
105 |     def make_labels(self, image_name, category_names, bboxes):
106 |         # Process image
107 |         file_image = os.path.splitext(image_name)[0]
108 |         img = Image.open(os.path.join(self.mafa_base_dir, image_name)).convert("RGB")
109 |         resize_img = img.resize(self.kitti_resize_dims)
110 |         resize_img.save(os.path.join(self.kitti_images, file_image + '.jpg'), 'JPEG')
111 |         # Process labels
112 |         with open(os.path.join(self.kitti_labels, file_image + '.txt'), 'w') as label_file:
113 |             for i in range(0, len(bboxes)):
114 |                 resized_bbox = self.resize_bbox(img=img, bbox=bboxes[i], dims=self.kitti_resize_dims)
115 |                 out_str = [category_names[i].replace(" ", "")
116 |                            + ' ' + ' '.join(['0'] * 1)
117 |                            + ' ' + ' '.join(['0'] * 2)
118 |                            + ' ' + ' '.join([b for b in resized_bbox])
119 |                            + ' ' + ' '.join(['0'] * 7)
120 |                            + '\n']
121 |                 label_file.write(out_str[0])
122 | 
123 |     def resize_bbox(self, img, bbox, dims):
124 |         img_w, img_h = img.size
125 |         x_min, y_min, x_max, y_max = bbox
126 |         ratio_w, ratio_h = dims[0] / img_w, dims[1]/img_h
127 |         new_bbox = [str(int(np.round(x_min*ratio_w))), str(int(np.round(y_min*ratio_h))), str(int(np.round(x_max*ratio_w))), str(int(np.round(y_max *ratio_h)))]
128 |         return new_bbox
129 | 
130 |     def mat2data(self):
131 |         _count_mask, _count_no_mask = 0,0
132 |         for i in range(0, self.len_dataset):
133 |             _count_mask, _count_no_mask = self.extract_labels(i=i, train_flag=self.train,
134 |                                                               _count_mask=_count_mask,
135 |                                                               _count_no_mask=_count_no_mask)
136 |         print("MAFA Dataset: Total Mask faces: {} and No-Mask faces:{}".format(_count_mask, _count_no_mask))
137 |         return _count_mask, _count_no_mask
138 | 
139 |     def test_labels(self, file_name):
140 |         img = Image.open(os.path.join(self.kitti_images, file_name + '.jpg'))
141 |         text_file = open(os .path.join(self.kitti_labels, file_name + '.txt'), 'r')
142 |         features = []
143 |         bbox = []
144 |         category = []
145 |         for line in text_file:
146 |             features = line.split()
147 |             bbox.append([float(features[4]), float(features[5]), float(features[6]), float(features[7])])
148 |             category.append(features[0])
149 |         print("Bounding Box", bbox)
150 |         print("Category:", category)
151 |         i = 0
152 |         for bb in bbox:
153 |             cc = category[i]
154 |             if cc == 'Mask':
155 |                 outline_box = 'red'
156 |             elif cc == "No-Mask":
157 |                 outline_box = 'green'
158 |             draw_img = ImageDraw.Draw(img)
159 |             shape = ((bb[0], bb[1]), (bb[2], bb[3]))
160 |             draw_img.rectangle(shape, fill=None, outline=outline_box)
161 |             draw_img.text((bb[0], bb[1]), cc, (255,255,255))
162 | 
163 |             i+=1
164 | 
165 |         img.show()
166 | 
167 | def main():
168 |     mafa_base_dir = r'C:\Users\ameykulkarni\Downloads\MAFA\MAFA'
169 |     kitti_base_dir = r'C:\Users\ameykulkarni\Downloads\MAFA\KITTI_test'
170 |     train = True
171 |     if train:
172 |         annotation_file = os.path.join(mafa_base_dir, 'MAFA-Label-Train/LabelTrainAll.mat')
173 |         mafa_base_dir = os.path.join(mafa_base_dir, 'train-images\images')
174 |     else:
175 |         annotation_file = os.path.join(mafa_base_dir, 'MAFA-Label-Test/LabelTestAll.mat')
176 |         mafa_base_dir = os.path.join(mafa_base_dir, 'test-images\images')
177 | 
178 |     category_limit = [25000, 25000] # Mask / No-Mask Limits
179 |     kitti_resize_dims = (480, 272) # Look at TLT model requirements
180 |     kitti_label = mafa2kitti(annotation_file=annotation_file, mafa_base_dir=mafa_base_dir,
181 |                              kitti_base_dir=kitti_base_dir, kitti_resize_dims=kitti_resize_dims,
182 |                              category_limit=category_limit, train=train)
183 |     count_masks, count_no_masks = kitti_label.mat2data()
184 |     kitti_label.test_labels(file_name='train_00006597')
185 | 
186 | if __name__ == '__main__':
187 |     main()
188 | 


--------------------------------------------------------------------------------
/data_utils/widerface2kitti.py:
--------------------------------------------------------------------------------
  1 | import scipy.io
  2 | import os
  3 | from PIL import Image, ImageDraw
  4 | 
  5 | class widerFace2kitti():
  6 |     def __init__(self, annotation_file, widerFace_base_dir, kitti_base_dir, kitti_resize_dims, category_limit, train):
  7 |         self.annotation_file = annotation_file
  8 |         self.data = scipy.io.loadmat(self.annotation_file)
  9 |         self.file_names = self.data.get('file_list') # File Name
 10 |         self.event_list = self.data.get('event_list') # Folder Name
 11 |         self.bbox_list = self.data.get('face_bbx_list') # Bounding Boxes
 12 |         self.label_list = self.data.get('occlusion_label_list')
 13 |         self.kitti_base_dir = kitti_base_dir
 14 |         self.widerFace_base_dir = widerFace_base_dir
 15 |         self.count_mask = category_limit[0]
 16 |         self.count_no_mask = category_limit[1]
 17 |         self.kitti_resize_dims = kitti_resize_dims
 18 |         self.train = train
 19 |         self.len_dataset = len(self.file_names)
 20 |         if self.train:
 21 |             try: 
 22 |                 os.makedirs(self.kitti_base_dir+'/train/images',mode=0o777)
 23 |             except FileExistsError:
 24 |                 print("Directory Already Exists")
 25 | 
 26 |             self.kitti_images = os.path.join(self.kitti_base_dir, 'train/images')
 27 |             try:
 28 |                 os.makedirs(self.kitti_base_dir+ '/train/labels',mode=0o777)
 29 |             except FileExistsError:
 30 |                 print("Directory Already Exists")
 31 | 
 32 |             self.kitti_labels = os.path.join(self.kitti_base_dir, 'train/labels')
 33 |         else:
 34 |             try:
 35 |                 os.makedirs(self.kitti_base_dir+'/test/images',mode=0o777)
 36 |             except FileExistsError:
 37 |                 print("Directory Already Exists")
 38 | 
 39 |             self.kitti_images = os.path.join(self.kitti_base_dir, 'test/images')
 40 |             try:
 41 |                 os.makedirs(self.kitti_base_dir+'/test/labels',mode=0o777)
 42 |             except FileExistsError:
 43 |                 print("Directory Already Exists")
 44 | 
 45 |             self.kitti_labels = os.path.join(self.kitti_base_dir, 'test/labels')
 46 | 
 47 |     def make_labels(self, image_name, category_names, bboxes):
 48 |         # Process image
 49 |         file_image = os.path.split(os.path.splitext(image_name)[0])[1]
 50 |         img = Image.open(os.path.join(self.widerFace_base_dir, image_name)).convert("RGB")
 51 |         resize_img = img.resize(self.kitti_resize_dims)
 52 |         resize_img.save(os.path.join(self.kitti_images, file_image+'.jpg'), 'JPEG')
 53 |         # Process labels
 54 |         with open(os.path.join(self.kitti_labels, file_image+ '.txt'), 'w') as label_file:
 55 |             for i in range (0, len(bboxes)):
 56 |                 resized_bbox = self.resize_bbox(img=img, bbox=bboxes[i], dims=self.kitti_resize_dims)
 57 |                 out_str = [category_names[i].replace(" ", "")
 58 |                            + ' ' + ' '.join(['0'] * 1)
 59 |                            + ' ' + ' '.join(['0'] * 2)
 60 |                            + ' ' + ' '.join([b for b in resized_bbox])
 61 |                            + ' ' + ' '.join(['0'] * 7)
 62 |                            + '\n']
 63 |                 label_file.write(out_str[0])
 64 | 
 65 |     def resize_bbox(self, img, bbox, dims):
 66 |         img_w, img_h = img.size
 67 |         x_min, y_min, x_max, y_max = bbox
 68 |         ratio_w, ratio_h = img_w / dims[0], img_h / dims[1]
 69 |         new_bbox = [str(x_min / ratio_w), str(y_min / ratio_h), str(x_max / ratio_w), str(y_max / ratio_h)]
 70 |         return new_bbox
 71 | 
 72 |     def mat2data(self):
 73 |         count = 0
 74 |         _count_mask, _count_no_mask = 0,0
 75 |         #pick_list = ['19--Couple', '13--Interview', '16--Award_Ceremony','2--Demonstration', '22--Picnic']
 76 |         # Use following pick list for more image data
 77 |         pick_list = ['2--Demonstration', '4--Dancing', '5--Car_Accident', '15--Stock_Market', '23--Shoppers',
 78 |                       '27--Spa', '32--Worker_Laborer', '33--Running', '37--Soccer',
 79 |                       '47--Matador_Bullfighter','57--Angler', '51--Dresses', '46--Jockey',
 80 |                       '9--Press_Conference','16--Award_Ceremony', '17--Ceremony',
 81 |                       '20--Family_Group', '22--Picnic', '25--Soldier_Patrol', '31--Waiter_Waitress',
 82 |                       '49--Greeting', '38--Tennis', '43--Row_Boat', '29--Students_Schoolkids']
 83 |         for event_idx, event in enumerate(self.event_list):
 84 |             directory = event[0][0]
 85 |             if any(ele in directory for ele in pick_list):
 86 |                 for im_idx, im in enumerate(self.file_names[event_idx][0]):
 87 |                     _t_count_no_mask = 0
 88 |                     im_name = im[0][0]
 89 |                     read_im_file = os.path.join(directory, im_name+'.jpg')
 90 |                     face_bbx = self.bbox_list[event_idx][0][im_idx][0]
 91 |                     category_id = self.label_list[event_idx][0][im_idx][0]
 92 |                     #  print face_bbx.shape
 93 |                     bboxes = []
 94 |                     category_names = []
 95 |                     if _count_no_mask < self.count_no_mask:
 96 |                         for i in range(face_bbx.shape[0]):
 97 |                             xmin = int(face_bbx[i][0])
 98 |                             ymin = int(face_bbx[i][1])
 99 |                             xmax = int(face_bbx[i][2]) + xmin
100 |                             ymax = int(face_bbx[i][3]) + ymin
101 |                             # Consider only Occlusion Free masks
102 |                             if category_id[i][0] ==0:
103 |                                 category_name = 'No-Mask'
104 |                                 bboxes.append((xmin, ymin, xmax, ymax))
105 |                                 category_names.append(category_name)
106 |                                 _t_count_no_mask+=1
107 |                         
108 |                         if bboxes and len(bboxes)<4:
109 |                             _count_no_mask += _t_count_no_mask
110 |                             print("Len of BBox:{} in Image:{}".format(len(bboxes),im_name))
111 |                             self.make_labels(image_name=read_im_file, category_names= category_names, bboxes=bboxes)
112 | 
113 |         print("WideFace: Total Mask Labelled:{} and No-Mask Labelled:{}".format(_count_mask, _count_no_mask))
114 |         return _count_mask, _count_no_mask
115 | 
116 |     def test_labels(self, file_name):
117 |         img = Image.open(os.path.join(self.kitti_images, file_name + '.jpg'))
118 |         text_file = open(os .path.join(self.kitti_labels, file_name + '.txt'), 'r')
119 |         features = []
120 |         bbox = []
121 |         category = []
122 |         for line in text_file:
123 |             features = line.split()
124 |             bbox.append([float(features[4]), float(features[5]), float(features[6]), float(features[7])])
125 |             category.append(features[0])
126 |         print("Bounding Box", bbox)
127 |         print("Category:", category)
128 |         i=0
129 |         for bb in bbox:
130 |             draw_img = ImageDraw.Draw(img)
131 |             shape = ((bb[0], bb[1]), (bb[2], bb[3]))
132 |             if category[i] == 'No-Mask':
133 |                 outline_clr = "red"
134 |             elif category[i] == 'Mask':
135 |                 outline_clr = "green"
136 |             draw_img.rectangle(shape, fill=None, outline=outline_clr, width=4)
137 |             i+=1
138 | 
139 |         img.show()
140 | 
141 | def main():
142 |     widerFace_base_dir = '/home/nvidia/tlt-ds-face_mask_detect/dataset/WiderFace-Dataset' # Update According to dataset location
143 |     kitti_base_dir = '/home/nvidia/tlt-ds-face_mask_detect/dataset/KITTI_960' # Update According to KITTI output dataset location
144 |     train = True # For generating validation dataset; select False
145 |     if train:
146 |         annotation_file = os.path.join(widerFace_base_dir, 'wider_face_split/wider_face_train.mat')
147 |         widerFace_base_dir = os.path.join(widerFace_base_dir, 'WIDER_train/images')
148 |     else:
149 |         # Modify this
150 |         annotation_file = os.path.join(widerFace_base_dir, 'MAFA-Label-Test/LabelTestAll.mat')
151 |         widerFace_base_dir = os.path.join(widerFace_base_dir, 'test-images\images')
152 | 
153 |     category_limit = [1000, 1000] # Mask / No-Mask Limits
154 |     kitti_resize_dims = (960, 544) # Look at TLT model requirements
155 |     kitti_label = widerFace2kitti(annotation_file=annotation_file, widerFace_base_dir=widerFace_base_dir,
156 |                              kitti_base_dir=kitti_base_dir, kitti_resize_dims=kitti_resize_dims,
157 |                              category_limit=category_limit, train=train)
158 |     count_masks, count_no_masks = kitti_label.mat2data()
159 |     # kitti_label.test_labels(file_name='0_Parade_Parade_0_371')
160 | if __name__ == '__main__':
161 |     main()
162 | 


--------------------------------------------------------------------------------
/ds_configs/config_infer_primary_masknet_gpu.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 NVIDIA Corporation.  All rights reserved.
 2 | #
 3 | # NVIDIA Corporation and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA Corporation is strictly prohibited.
 8 | 
 9 | [property]
10 | gpu-id=0
11 | net-scale-factor=0.0039215697906911373
12 | tlt-model-key=tlt_encode
13 | tlt-encoded-model=/mnt/8c3f68c9-a08a-400b-8c80-99c5fee26a06/detectnet_v2_models/detectnet_4K-fddb-12/resnet18_RGB960_detector_fddb_12_int8.etlt
14 | labelfile-path=labels_masknet.txt
15 | # GPU Engine File
16 | model-engine-file=/mnt/8c3f68c9-a08a-400b-8c80-99c5fee26a06/detectnet_v2_models/detectnet_4K-fddb-12/resnet18_RGB960_detector_fddb_12_int8.etlt_b1_gpu0_int8.engine
17 | # DLA Engine File
18 | # model-engine-file=/home/nvidia/detectnet_v2_models/detectnet_4K-fddb-12/resnet18_RGB960_detector_fddb_12_int8.etlt_b1_dla0_int8.engine
19 | input-dims=3;960;544;0
20 | uff-input-blob-name=input_1
21 | batch-size=1
22 | model-color-format=0
23 | ## 0=FP32, 1=INT8, 2=FP16 mode
24 | network-mode=1
25 | int8-calib-file=/mnt/8c3f68c9-a08a-400b-8c80-99c5fee26a06/detectnet_v2_models/detectnet_4K-fddb-12/calibration.bin
26 | num-detected-classes=2
27 | cluster-mode=1
28 | interval=0
29 | gie-unique-id=1
30 | is-classifier=0
31 | classifier-threshold=0.9
32 | output-blob-names=output_bbox/BiasAdd;output_cov/Sigmoid
33 | 
34 | [class-attrs-0]
35 | pre-cluster-threshold=0.3
36 | group-threshold=1
37 | eps=0.5
38 | #minBoxes=1
39 | detected-min-w=0
40 | detected-min-h=0
41 | detected-max-w=0
42 | detected-max-h=0
43 | 
44 | 
45 | [class-attrs-1]
46 | pre-cluster-threshold=0.3
47 | group-threshold=1
48 | eps=0.3
49 | #minBoxes=1
50 | detected-min-w=0
51 | detected-min-h=0
52 | detected-max-w=0
53 | detected-max-h=0
54 | 
55 | 


--------------------------------------------------------------------------------
/ds_configs/deepstream_app_source1_camera_masknet_gpu.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 NVIDIA Corporation.  All rights reserved.
 2 | #
 3 | # NVIDIA Corporation and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA Corporation is strictly prohibited.
 8 | 
 9 | [application]
10 | enable-perf-measurement=1
11 | perf-measurement-interval-sec=1
12 | 
13 | [tiled-display]
14 | enable=1
15 | rows=1
16 | columns=1
17 | width=1280 #640
18 | height=960 #480
19 | gpu-id=0
20 | 
21 | [source0]
22 | enable=1
23 | #Type - 1=CameraV4L2 2=URI 3=MultiURI
24 | type=1
25 | camera-width=640
26 | camera-height=480
27 | camera-fps-n=30
28 | camera-fps-d=1
29 | camera-v4l2-dev-node=0
30 | 
31 | [streammux]
32 | gpu-id=0
33 | batch-size=1
34 | batched-push-timeout=40000
35 | ## Set muxer output width and height
36 | width=1920
37 | height=1080
38 | 
39 | 
40 | [sink0]
41 | enable=1
42 | #Type - 1=FakeSink 2=EglSink 3=File
43 | type=2
44 | sync=1
45 | source-id=0
46 | gpu-id=0
47 | container=2
48 | codec=1
49 | bitrate=2000000
50 | output-file=/mnt/8c3f68c9-a08a-400b-8c80-99c5fee26a06/video_file.mp4
51 | 
52 | [osd]
53 | enable=1
54 | gpu-id=0
55 | border-width=4
56 | text-size=18
57 | text-color=1;1;1;1;
58 | text-bg-color=0.3;0.3;0.3;1
59 | font=Arial
60 | 
61 | [primary-gie]
62 | enable=1
63 | gpu-id=0
64 | # Modify as necessary
65 | # GPU engine file
66 | model-engine-file=/mnt/8c3f68c9-a08a-400b-8c80-99c5fee26a06/detectnet_v2_models/detectnet_4K-fddb-12/resnet18_RGB960_detector_fddb_12_int8.etlt_b1_gpu0_int8.engine
67 | batch-size=1
68 | #Required by the app for OSD, not a plugin property
69 | bbox-border-color0=0;1;0;1
70 | bbox-border-color1=1;0;0;1
71 | #bbox-border-color2=0;0;1;1 # Blue
72 | #bbox-border-color3=0;1;0;1
73 | gie-unique-id=1
74 | config-file=config_infer_primary_masknet_gpu.txt
75 | 
76 | [tracker]
77 | enable=0
78 | tracker-width=640
79 | tracker-height=384
80 | #ll-lib-file=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_mot_iou.so
81 | #ll-lib-file=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_nvdcf.so
82 | ll-lib-file=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_mot_klt.so
83 | #ll-config-file required for DCF/IOU only
84 | #ll-config-file=../deepstream-app/tracker_config.yml
85 | #ll-config-file=iou_config.txt
86 | gpu-id=0
87 | #enable-batch-process applicable to DCF only
88 | enable-batch-process=1
89 | 
90 | [tests]
91 | file-loop=1
92 | 


--------------------------------------------------------------------------------
/ds_configs/deepstream_app_source1_video_masknet_gpu.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 NVIDIA Corporation.  All rights reserved.
 2 | #
 3 | # NVIDIA Corporation and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA Corporation is strictly prohibited.
 8 | 
 9 | [application]
10 | enable-perf-measurement=1
11 | perf-measurement-interval-sec=1
12 | 
13 | [tiled-display]
14 | enable=1
15 | rows=1
16 | columns=1
17 | width=1280 #640
18 | height=960 #480
19 | gpu-id=0
20 | 
21 | 
22 | [source0]
23 | enable=1
24 | #Type - 1=CameraV4L2 2=URI 3=MultiURI
25 | type=3
26 | num-sources=1
27 | uri=file:/mnt/8c3f68c9-a08a-400b-8c80-99c5fee26a06/face-mask-videos/C0074.MP4
28 | gpu-id=0
29 | 
30 | 
31 | [streammux]
32 | gpu-id=0
33 | batch-size=1
34 | batched-push-timeout=40000
35 | ## Set muxer output width and height
36 | width=1920
37 | height=1080
38 | 
39 | 
40 | [sink0]
41 | enable=1
42 | #Type - 1=FakeSink 2=EglSink 3=File
43 | type=2
44 | sync=1
45 | source-id=0
46 | gpu-id=0
47 | container=2
48 | codec=1
49 | bitrate=2000000
50 | output-file=/mnt/8c3f68c9-a08a-400b-8c80-99c5fee26a06/video_file.mp4
51 | 
52 | [osd]
53 | enable=1
54 | gpu-id=0
55 | border-width=4
56 | text-size=18
57 | text-color=1;1;1;1;
58 | text-bg-color=0.3;0.3;0.3;1
59 | font=Arial
60 | 
61 | [primary-gie]
62 | enable=1
63 | gpu-id=0
64 | # Modify as necessary
65 | # GPU engine file
66 | model-engine-file=/mnt/8c3f68c9-a08a-400b-8c80-99c5fee26a06/detectnet_v2_models/detectnet_4K-fddb-12/resnet18_RGB960_detector_fddb_12_int8.etlt_b1_gpu0_int8.engine
67 | batch-size=1
68 | #Required by the app for OSD, not a plugin property
69 | bbox-border-color0=0;1;0;1
70 | bbox-border-color1=1;0;0;1
71 | #bbox-border-color2=0;0;1;1 # Blue
72 | #bbox-border-color3=0;1;0;1
73 | gie-unique-id=1
74 | config-file=config_infer_primary_masknet_gpu.txt
75 | 
76 | [tracker]
77 | enable=0
78 | tracker-width=640
79 | tracker-height=384
80 | #ll-lib-file=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_mot_iou.so
81 | #ll-lib-file=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_nvdcf.so
82 | ll-lib-file=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_mot_klt.so
83 | #ll-config-file required for DCF/IOU only
84 | #ll-config-file=../deepstream-app/tracker_config.yml
85 | #ll-config-file=iou_config.txt
86 | gpu-id=0
87 | #enable-batch-process applicable to DCF only
88 | enable-batch-process=1
89 | 
90 | [tests]
91 | file-loop=1
92 | 


--------------------------------------------------------------------------------
/ds_configs/labels_masknet.txt:
--------------------------------------------------------------------------------
1 | mask
2 | no-mask
3 | default
4 | 


--------------------------------------------------------------------------------
/face-mask-detection.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Face Mask Detection using NVIDIA TLT "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "The MIT License (MIT)\n",
 15 |     "\n",
 16 |     "Copyright (c) 2019-2020, NVIDIA CORPORATION.\n",
 17 |     "\n",
 18 |     "Permission is hereby granted, free of charge, to any person obtaining a copy of\n",
 19 |     "this software and associated documentation files (the \"Software\"), to deal in\n",
 20 |     "the Software without restriction, including without limitation the rights to\n",
 21 |     "use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of\n",
 22 |     "the Software, and to permit persons to whom the Software is furnished to do so,\n",
 23 |     "subject to the following conditions:\n",
 24 |     "\n",
 25 |     "The above copyright notice and this permission notice shall be included in all\n",
 26 |     "copies or substantial portions of the Software.\n",
 27 |     "\n",
 28 |     "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n",
 29 |     "IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS\n",
 30 |     "FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR\n",
 31 |     "COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER\n",
 32 |     "IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n",
 33 |     "CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE."
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "## DetectNet_v2 with ResNet-18 example usecase\n",
 41 |     "\n",
 42 |     "The goal of this notebook is to utilize NVIDIA TLT to train and make Face Mask detection model deploy ready.\n",
 43 |     "While working on such application, this notebook will serve as an example usecase of Object Detection using DetectNet_v2 in the Transfer Learning Toolkit.\n",
 44 |     "\n",
 45 |     "0. [Set up env variables](#head-0)\n",
 46 |     "1. [Prepare dataset and pre-trained model](#head-1)\n",
 47 |     "    1. [Download dataset and convert in KITTI Format](#head-1-1)\n",
 48 |     "    1. [Prepare tfrecords from kitti format dataset](#head-1-2)\n",
 49 |     "    2. [Download pre-trained model](#head-1-3)\n",
 50 |     "2. [Provide training specification](#head-2)\n",
 51 |     "3. [Run TLT training](#head-3)\n",
 52 |     "4. [Evaluate trained models](#head-4)\n",
 53 |     "5. [Prune trained models](#head-5)\n",
 54 |     "6. [Retrain pruned models](#head-6)\n",
 55 |     "7. [Evaluate retrained model](#head-7)\n",
 56 |     "8. [Visualize inferences](#head-8)\n",
 57 |     "9. [Deploy](#head-9)\n",
 58 |     "    1. [Int8 Optimization](#head-9-1)\n",
 59 |     "    2. [Generate TensorRT engine](#head-9-2)\n",
 60 |     "10. [Verify Deployed Model](#head-10)\n",
 61 |     "    1. [Inference using TensorRT engine](#head-10-1)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "![Face Mask Detection Output](https://raw.githubusercontent.com/NVIDIA-AI-IOT/face-mask-detection/master/images/face-mask-detect-output.png)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "## 0. Set up env variables <a class=\"anchor\" id=\"head-0\"></a>\n",
 76 |     "When using the purpose-built pretrained models from NGC, please make sure to set the `$KEY` environment variable to the key as mentioned in the model overview. Failing to do so, can lead to errors when trying to load them as pretrained models.\n",
 77 |     "\n",
 78 |     "*Note: Please make sure to remove any stray artifacts/files from the `$USER_EXPERIMENT_DIR` or `$DATA_DOWNLOAD_DIR` paths as mentioned below, that may have been generated from previous experiments. Having checkpoint files etc may interfere with creating a training graph for a new experiment.*"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "# Setting up env variables for cleaner command line commands.\n",
 88 |     "print(\"Update directory paths if needed\")\n",
 89 |     "%env KEY=tlt_encode\n",
 90 |     "# User directory - pre-trained/unpruned/pruned/final models will be saved here\n",
 91 |     "%env USER_EXPERIMENT_DIR=/home/detectnet_v2 \n",
 92 |     "# Download directory - tfrecords will be generated here\n",
 93 |     "%env DATA_DOWNLOAD_DIR=/home/data_fm_0916           \n",
 94 |     "# Spec Directory\n",
 95 |     "%env SPECS_DIR=/home/detectnet_v2/specs   \n",
 96 |     "# Number of GPUs used for training\n",
 97 |     "%env NUM_GPUS=1"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "## 1. Prepare dataset and pre-trained model <a class=\"anchor\" id=\"head-1\"></a>"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "metadata": {},
110 |    "source": [
111 |     "### A. Download dataset and convert in KITTI Format <a class=\"anchor\" id=\"head-1-1\"></a>\n",
112 |     "\n",
113 |     "In this experiment we will be using 4 different datasets; \n",
114 |     "\n",
115 |     "1. Faces with Mask:\n",
116 |     "    - Kaggle Medical Mask Dataset [Download Link](https://www.kaggle.com/ivandanilovich/medical-masks-dataset-images-tfrecords)\n",
117 |     "    - MAFA - MAsked FAces [Download Link](https://drive.google.com/drive/folders/1nbtM1n0--iZ3VVbNGhocxbnBGhMau_OG)\n",
118 |     "2. Faces without Mask:\n",
119 |     "    - FDDB Dataset [Download Link](http://vis-www.cs.umass.edu/fddb/)\n",
120 |     "    - WiderFace Dataset [Download Link](http://shuoyang1213.me/WIDERFACE/)"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "- Download the data using provided links, such that all images and label files are in one folder. We expect in structure noted in GitHub repo.\n",
128 |     "- Convert dataset to KITTI format \n",
129 |     "- Use KITTI format directory as \"$DATA_DOWNLOAD_DIR\"\n",
130 |     "\n",
131 |     "\n",
132 |     "Note: We do not use all the images from MAFA and WiderFace. Combining we will use about 6000 faces each with and without mask"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "markdown",
137 |    "metadata": {},
138 |    "source": [
139 |     "### B. Prepare tf records from kitti format dataset <a class=\"anchor\" id=\"head-1-2\"></a>\n",
140 |     "\n",
141 |     "* Update the tfrecords spec file to take in your kitti format dataset\n",
142 |     "* Create the tfrecords using the tlt-dataset-convert \n",
143 |     "\n",
144 |     "*Note: TfRecords only need to be generated once.*"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": [
153 |     "print(\"TFrecords conversion spec file for kitti training\")\n",
154 |     "!cat $SPECS_DIR/detectnet_v2_tfrecords_kitti_trainval.txt"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": null,
160 |    "metadata": {},
161 |    "outputs": [],
162 |    "source": [
163 |     "# Creating a new directory for the output tfrecords dump.\n",
164 |     "print(\"Converting Tfrecords for kitti trainval dataset\")\n",
165 |     "!tlt-dataset-convert -d $SPECS_DIR/detectnet_v2_tfrecords_kitti_trainval.txt \\\n",
166 |     "                     -o $DATA_DOWNLOAD_DIR/tfrecords/kitti_trainval/kitti_trainval"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": null,
172 |    "metadata": {},
173 |    "outputs": [],
174 |    "source": [
175 |     "!ls -rlt $DATA_DOWNLOAD_DIR/tfrecords/kitti_trainval/"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "markdown",
180 |    "metadata": {},
181 |    "source": [
182 |     "### C. Download pre-trained model <a class=\"anchor\" id=\"head-1-3\"></a>\n",
183 |     "Download the correct pretrained model from the NGC model registry for your experiment. Please note that for DetectNet_v2, the input is expected to be 0-1 normalized with input channels in RGB order. Therefore, for optimum results please download models with `*_detectnet_v2` in their name string. All other models expect input preprocessing with mean subtraction and input channels in BGR order. Thus, using them as pretrained weights may result in suboptimal performance. "
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": null,
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": [
192 |     "# List models available in the model registry.\n",
193 |     "!ngc registry model list nvidia/tlt_pretrained_detectnet_v2:*"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": null,
199 |    "metadata": {},
200 |    "outputs": [],
201 |    "source": [
202 |     "# Create the target destination to download the model.\n",
203 |     "!mkdir -p $USER_EXPERIMENT_DIR/pretrained_resnet18/"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "metadata": {},
210 |    "outputs": [],
211 |    "source": [
212 |     "# Download the pretrained model from NGC\n",
213 |     "!ngc registry model download-version nvidia/tlt_pretrained_detectnet_v2:resnet18 \\\n",
214 |     "    --dest $USER_EXPERIMENT_DIR/pretrained_resnet18"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": null,
220 |    "metadata": {},
221 |    "outputs": [],
222 |    "source": [
223 |     "!ls -rlt $USER_EXPERIMENT_DIR/pretrained_resnet18/tlt_pretrained_detectnet_v2_vresnet18"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "markdown",
228 |    "metadata": {},
229 |    "source": [
230 |     "## 2. Provide training specification <a class=\"anchor\" id=\"head-2\"></a>\n",
231 |     "* Tfrecords for the train datasets\n",
232 |     "    * In order to use the newly generated tfrecords, update the dataset_config parameter in the spec file at `$SPECS_DIR/detectnet_v2_train_resnet18_kitti.txt` \n",
233 |     "    * Update the fold number to use for evaluation. In case of random data split, please use fold `0` only\n",
234 |     "    * For sequence-wise split, you may use any fold generated from the dataset convert tool\n",
235 |     "* Pre-trained models\n",
236 |     "* Augmentation parameters for on the fly data augmentation\n",
237 |     "* Other training (hyper-)parameters such as batch size, number of epochs, learning rate etc."
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": null,
243 |    "metadata": {},
244 |    "outputs": [],
245 |    "source": [
246 |     "!cat $SPECS_DIR/detectnet_v2_train_resnet18_kitti.txt"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "markdown",
251 |    "metadata": {},
252 |    "source": [
253 |     "## 3. Run TLT training <a class=\"anchor\" id=\"head-3\"></a>\n",
254 |     "* Provide the sample spec file and the output directory location for models\n",
255 |     "\n",
256 |     "*Note: The training may take hours to complete. Also, the remaining notebook, assumes that the training was done in single-GPU mode. When run in multi-GPU mode, please expect to update the pruning and inference steps with new pruning thresholds and updated parameters in the clusterfile.json accordingly for optimum performance.*\n",
257 |     "\n",
258 |     "*Detectnet_v2 now supports restart from checkpoint. Incase, the training job is killed prematurely, you may resume training from the closest checkpoint by simply re-running the same command line. Please do make sure to use the same number of GPUs when restarting the training.*"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": null,
264 |    "metadata": {},
265 |    "outputs": [],
266 |    "source": [
267 |     "!tlt-train detectnet_v2 -e $SPECS_DIR/detectnet_v2_train_resnet18_kitti.txt \\\n",
268 |     "                        -r $USER_EXPERIMENT_DIR/experiment_dir_unpruned \\\n",
269 |     "                        -k $KEY \\\n",
270 |     "                        -n resnet18_detector \\\n",
271 |     "                        --gpus $NUM_GPUS"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": null,
277 |    "metadata": {},
278 |    "outputs": [],
279 |    "source": [
280 |     "print('Model for each epoch:')\n",
281 |     "print('---------------------')\n",
282 |     "!ls -lh $USER_EXPERIMENT_DIR/experiment_dir_unpruned/weights"
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "markdown",
287 |    "metadata": {},
288 |    "source": [
289 |     "## 4. Evaluate the trained model <a class=\"anchor\" id=\"head-4\"></a>"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": null,
295 |    "metadata": {},
296 |    "outputs": [],
297 |    "source": [
298 |     "!tlt-evaluate detectnet_v2 -e $SPECS_DIR/detectnet_v2_train_resnet18_kitti.txt\\\n",
299 |     "                           -m $USER_EXPERIMENT_DIR/experiment_dir_unpruned/weights/resnet18_detector.tlt \\\n",
300 |     "                           -k $KEY"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "markdown",
305 |    "metadata": {},
306 |    "source": [
307 |     "## 5. Prune the trained model <a class=\"anchor\" id=\"head-5\"></a>\n",
308 |     "* Specify pre-trained model\n",
309 |     "* Equalization criterion (`Applicable for resnets and mobilenets`)\n",
310 |     "* Threshold for pruning.\n",
311 |     "* A key to save and load the model\n",
312 |     "* Output directory to store the model\n",
313 |     "\n",
314 |     "*Usually, you just need to adjust `-pth` (threshold) for accuracy and model size trade off. Higher `pth` gives you smaller model (and thus higher inference speed) but worse accuracy. The threshold to use is depend on the dataset. A pth value `5.2e-6` is just a start point. If the retrain accuracy is good, you can increase this value to get smaller models. Otherwise, lower this value to get better accuracy.*\n",
315 |     "\n",
316 |     "*For some internal studies, we have noticed that a pth value of 0.01 is a good starting point for detectnet_v2 models.*"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": null,
322 |    "metadata": {},
323 |    "outputs": [],
324 |    "source": [
325 |     "# Create an output directory if it doesn't exist.\n",
326 |     "!mkdir -p $USER_EXPERIMENT_DIR/experiment_dir_pruned"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "execution_count": null,
332 |    "metadata": {},
333 |    "outputs": [],
334 |    "source": [
335 |     "print(\"Change Threshold (-pth) value according to you experiments\")\n",
336 |     "\n",
337 |     "!tlt-prune -m $USER_EXPERIMENT_DIR/experiment_dir_unpruned/weights/resnet18_detector.tlt \\\n",
338 |     "           -o $USER_EXPERIMENT_DIR/experiment_dir_pruned/resnet18_nopool_bn_detectnet_v2_pruned.tlt \\\n",
339 |     "           -eq union \\\n",
340 |     "           -pth 0.8 \\\n",
341 |     "           -k $KEY"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "code",
346 |    "execution_count": null,
347 |    "metadata": {},
348 |    "outputs": [],
349 |    "source": [
350 |     "!ls -rlt $USER_EXPERIMENT_DIR/experiment_dir_pruned/"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "markdown",
355 |    "metadata": {},
356 |    "source": [
357 |     "## 6. Retrain the pruned model <a class=\"anchor\" id=\"head-6\"></a>\n",
358 |     "* Model needs to be re-trained to bring back accuracy after pruning\n",
359 |     "* Specify re-training specification with pretrained weights as pruned model.\n",
360 |     "\n",
361 |     "*Note: For retraining, please set the `load_graph` option to `true` in the model_config to load the pruned model graph. Also, if after retraining, the model shows some decrease in mAP, it could be that the originally trained model, was pruned a little too much. Please try reducing the pruning threshold, thereby reducing the pruning ratio, and use the new model to retrain.*"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "code",
366 |    "execution_count": null,
367 |    "metadata": {},
368 |    "outputs": [],
369 |    "source": [
370 |     "# Printing the retrain experiment file. \n",
371 |     "# Note: We have updated the experiment file to include the \n",
372 |     "# newly pruned model as a pretrained weights and, the\n",
373 |     "# load_graph option is set to true \n",
374 |     "!cat $SPECS_DIR/detectnet_v2_retrain_resnet18_kitti.txt"
375 |    ]
376 |   },
377 |   {
378 |    "cell_type": "code",
379 |    "execution_count": null,
380 |    "metadata": {},
381 |    "outputs": [],
382 |    "source": [
383 |     "# Retraining using the pruned model as pretrained weights \n",
384 |     "!tlt-train detectnet_v2 -e $SPECS_DIR/detectnet_v2_retrain_resnet18_kitti.txt \\\n",
385 |     "                        -r $USER_EXPERIMENT_DIR/experiment_dir_retrain \\\n",
386 |     "                        -k $KEY \\\n",
387 |     "                        -n resnet18_detector_pruned \\\n",
388 |     "                        --gpus $NUM_GPUS"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "code",
393 |    "execution_count": null,
394 |    "metadata": {},
395 |    "outputs": [],
396 |    "source": [
397 |     "# Listing the newly retrained model.\n",
398 |     "!ls -rlt $USER_EXPERIMENT_DIR/experiment_dir_retrain/weights"
399 |    ]
400 |   },
401 |   {
402 |    "cell_type": "markdown",
403 |    "metadata": {},
404 |    "source": [
405 |     "## 7. Evaluate the retrained model <a class=\"anchor\" id=\"head-7\"></a>"
406 |    ]
407 |   },
408 |   {
409 |    "cell_type": "markdown",
410 |    "metadata": {},
411 |    "source": [
412 |     "This section evaluates the pruned and retrained model, using `tlt-evaluate`."
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "code",
417 |    "execution_count": null,
418 |    "metadata": {},
419 |    "outputs": [],
420 |    "source": [
421 |     "!tlt-evaluate detectnet_v2 -e $SPECS_DIR/detectnet_v2_retrain_resnet18_kitti.txt \\\n",
422 |     "                           -m $USER_EXPERIMENT_DIR/experiment_dir_retrain/weights/resnet18_detector_pruned.tlt \\\n",
423 |     "                           -k $KEY"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "markdown",
428 |    "metadata": {},
429 |    "source": [
430 |     "## 8. Visualize inferences <a class=\"anchor\" id=\"head-8\"></a>\n",
431 |     "In this section, we run the `tlt-infer` tool to generate inferences on the trained models. To render bboxes from more classes, please edit the spec file `detectnet_v2_inference_kitti_tlt.txt` to include all the classes you would like to visualize and edit the rest of the file accordingly."
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "markdown",
436 |    "metadata": {},
437 |    "source": [
438 |     "For this you will need to create `test_images` directory containing at least 8 images with masked and no-masked faces, it can be from test data or simply face captures from your own photos. "
439 |    ]
440 |   },
441 |   {
442 |    "cell_type": "code",
443 |    "execution_count": null,
444 |    "metadata": {},
445 |    "outputs": [],
446 |    "source": [
447 |     "# Running inference for detection on n images\n",
448 |     "!tlt-infer detectnet_v2 -e $SPECS_DIR/detectnet_v2_inference_kitti_tlt.txt \\\n",
449 |     "                        -o $USER_EXPERIMENT_DIR/tlt_infer_testing \\\n",
450 |     "                        -i $DATA_DOWNLOAD_DIR/test_images \\\n",
451 |     "                        -k $KEY"
452 |    ]
453 |   },
454 |   {
455 |    "cell_type": "markdown",
456 |    "metadata": {},
457 |    "source": [
458 |     "The `tlt-infer` tool produces two outputs. \n",
459 |     "1. Overlain images in `$USER_EXPERIMENT_DIR/tlt_infer_testing/images_annotated`\n",
460 |     "2. Frame by frame bbox labels in kitti format located in `$USER_EXPERIMENT_DIR/tlt_infer_testing/labels`\n",
461 |     "\n",
462 |     "*Note: To run inferences for a single image, simply replace the path to the -i flag in `tlt-infer` command with the path to the image.*"
463 |    ]
464 |   },
465 |   {
466 |    "cell_type": "code",
467 |    "execution_count": null,
468 |    "metadata": {},
469 |    "outputs": [],
470 |    "source": [
471 |     "# Simple grid visualizer\n",
472 |     "import matplotlib.pyplot as plt\n",
473 |     "import os\n",
474 |     "from math import ceil\n",
475 |     "valid_image_ext = ['.jpg', '.png', '.jpeg', '.ppm']\n",
476 |     "\n",
477 |     "def visualize_images(image_dir, num_cols=4, num_images=10):\n",
478 |     "    output_path = os.path.join(os.environ['USER_EXPERIMENT_DIR'], image_dir)\n",
479 |     "    num_rows = int(ceil(float(num_images) / float(num_cols)))\n",
480 |     "    f, axarr = plt.subplots(num_rows, num_cols, figsize=[80,30])\n",
481 |     "    f.tight_layout()\n",
482 |     "    a = [os.path.join(output_path, image) for image in os.listdir(output_path) \n",
483 |     "         if os.path.splitext(image)[1].lower() in valid_image_ext]\n",
484 |     "    for idx, img_path in enumerate(a[:num_images]):\n",
485 |     "        col_id = idx % num_cols\n",
486 |     "        row_id = idx / num_cols\n",
487 |     "        img = plt.imread(img_path)\n",
488 |     "        axarr[row_id, col_id].imshow(img) "
489 |    ]
490 |   },
491 |   {
492 |    "cell_type": "code",
493 |    "execution_count": null,
494 |    "metadata": {
495 |     "scrolled": true
496 |    },
497 |    "outputs": [],
498 |    "source": [
499 |     "# Visualizing the first 12 images.\n",
500 |     "OUTPUT_PATH = 'tlt_infer_testing/images_annotated' # relative path from $USER_EXPERIMENT_DIR.\n",
501 |     "COLS = 4 # number of columns in the visualizer grid.\n",
502 |     "IMAGES = 8 # number of images to visualize.\n",
503 |     "\n",
504 |     "visualize_images(OUTPUT_PATH, num_cols=COLS, num_images=IMAGES)"
505 |    ]
506 |   },
507 |   {
508 |    "cell_type": "markdown",
509 |    "metadata": {},
510 |    "source": [
511 |     "## 9. Deploy! <a class=\"anchor\" id=\"head-9\"></a>"
512 |    ]
513 |   },
514 |   {
515 |    "cell_type": "code",
516 |    "execution_count": null,
517 |    "metadata": {},
518 |    "outputs": [],
519 |    "source": [
520 |     "!mkdir -p $USER_EXPERIMENT_DIR/experiment_dir_final\n",
521 |     "# Removing a pre-existing copy of the etlt if there has been any.\n",
522 |     "import os\n",
523 |     "output_file=os.path.join(os.environ['USER_EXPERIMENT_DIR'],\n",
524 |     "                         \"experiment_dir_final/resnet18_detector.etlt\")\n",
525 |     "if os.path.exists(output_file):\n",
526 |     "    os.system(\"rm {}\".format(output_file))\n",
527 |     "!tlt-export detectnet_v2 \\\n",
528 |     "            -m $USER_EXPERIMENT_DIR/experiment_dir_retrain/weights/resnet18_detector_pruned.tlt \\\n",
529 |     "            -o $USER_EXPERIMENT_DIR/experiment_dir_final/resnet18_detector.etlt \\\n",
530 |     "            -k $KEY"
531 |    ]
532 |   },
533 |   {
534 |    "cell_type": "code",
535 |    "execution_count": null,
536 |    "metadata": {},
537 |    "outputs": [],
538 |    "source": [
539 |     "print('Exported model:')\n",
540 |     "print('------------')\n",
541 |     "!ls -lh $USER_EXPERIMENT_DIR/experiment_dir_final"
542 |    ]
543 |   },
544 |   {
545 |    "cell_type": "markdown",
546 |    "metadata": {},
547 |    "source": [
548 |     "### A. Int8 Optimization <a class=\"anchor\" id=\"head-9-1\"></a>\n",
549 |     "DetectNet_v2 model supports int8 inference mode in TRT. In order to use int8 mode, we must calibrate the model to run 8-bit inferences. This involves 2 steps\n",
550 |     "\n",
551 |     "* Generate calibration tensorfile from the training data using tlt-int8-tensorfile\n",
552 |     "* Use tlt-export to generate int8 calibration table.\n",
553 |     "\n",
554 |     "*Note: For this example, we generate a calibration tensorfile containing 10 batches of training data.\n",
555 |     "Ideally, it is best to use atleast 10-20% of the training data to calibrate the model. The more data provided during calibration, the closer int8 inferences are to fp32 inferences.*"
556 |    ]
557 |   },
558 |   {
559 |    "cell_type": "code",
560 |    "execution_count": null,
561 |    "metadata": {},
562 |    "outputs": [],
563 |    "source": [
564 |     "!tlt-int8-tensorfile detectnet_v2 -e $SPECS_DIR/detectnet_v2_retrain_resnet18_kitti.txt \\\n",
565 |     "                                  -m 40 \\\n",
566 |     "                                  -o $USER_EXPERIMENT_DIR/experiment_dir_final/calibration.tensor"
567 |    ]
568 |   },
569 |   {
570 |    "cell_type": "code",
571 |    "execution_count": null,
572 |    "metadata": {},
573 |    "outputs": [],
574 |    "source": [
575 |     "!rm -rf $USER_EXPERIMENT_DIR/experiment_dir_final/resnet18_detector.etlt\n",
576 |     "!rm -rf $USER_EXPERIMENT_DIR/experiment_dir_final/calibration.bin\n",
577 |     "!tlt-export detectnet_v2 \\\n",
578 |     "            -m $USER_EXPERIMENT_DIR/experiment_dir_retrain/weights/resnet18_detector_pruned.tlt \\\n",
579 |     "            -o $USER_EXPERIMENT_DIR/experiment_dir_final/resnet18_detector.etlt \\\n",
580 |     "            -k $KEY  \\\n",
581 |     "            --cal_data_file $USER_EXPERIMENT_DIR/experiment_dir_final/calibration.tensor \\\n",
582 |     "            --data_type int8 \\\n",
583 |     "            --batches 20 \\\n",
584 |     "            --batch_size 4 \\\n",
585 |     "            --max_batch_size 4\\\n",
586 |     "            --engine_file $USER_EXPERIMENT_DIR/experiment_dir_final/resnet18_detector.trt.int8 \\\n",
587 |     "            --cal_cache_file $USER_EXPERIMENT_DIR/experiment_dir_final/calibration.bin \\\n",
588 |     "            --verbose"
589 |    ]
590 |   },
591 |   {
592 |    "cell_type": "markdown",
593 |    "metadata": {},
594 |    "source": [
595 |     "### B. Generate TensorRT engine <a class=\"anchor\" id=\"head-9-2\"></a>\n",
596 |     "Verify engine generation using the `tlt-converter` utility included with the docker.\n",
597 |     "\n",
598 |     "The `tlt-converter` produces optimized tensorrt engines for the platform that it resides on. Therefore, to get maximum performance, please instantiate this docker and execute the `tlt-converter` command, with the exported `.etlt` file and calibration cache (for int8 mode) on your target device. The converter utility included in this docker only works for x86 devices, with discrete NVIDIA GPU's. \n",
599 |     "\n",
600 |     "For the jetson devices, please download the converter for jetson from the dev zone link [here](https://developer.nvidia.com/tlt-converter). \n",
601 |     "\n",
602 |     "If you choose to integrate your model into deepstream directly, you may do so by simply copying the exported `.etlt` file along with the calibration cache to the target device and updating the spec file that configures the `gst-nvinfer` element to point to this newly exported model. Usually this file is called `config_infer_primary.txt` for detection models and `config_infer_secondary_*.txt` for classification models."
603 |    ]
604 |   },
605 |   {
606 |    "cell_type": "code",
607 |    "execution_count": null,
608 |    "metadata": {},
609 |    "outputs": [],
610 |    "source": [
611 |     "!tlt-converter $USER_EXPERIMENT_DIR/experiment_dir_final/resnet18_detector.etlt \\\n",
612 |     "               -k $KEY \\\n",
613 |     "               -c $USER_EXPERIMENT_DIR/experiment_dir_final/calibration.bin \\\n",
614 |     "               -o output_cov/Sigmoid,output_bbox/BiasAdd \\\n",
615 |     "               -d 3,544,960 \\\n",
616 |     "               -i nchw \\\n",
617 |     "               -m 64 \\\n",
618 |     "               -t int8 \\\n",
619 |     "               -e $USER_EXPERIMENT_DIR/experiment_dir_final/resnet18_detector.trt \\\n",
620 |     "               -b 4"
621 |    ]
622 |   },
623 |   {
624 |    "cell_type": "markdown",
625 |    "metadata": {},
626 |    "source": [
627 |     "## 10. Verify Deployed Model <a class=\"anchor\" id=\"head-10\"></a>\n",
628 |     "Verify the exported model by visualizing inferences on TensorRT.\n",
629 |     "In addition to running inference on a `.tlt` model in [step 8](#head-8), the `tlt-infer` tool is also capable of consuming the converted `TensorRT engine` from [step 9.B](#head-9-2).\n",
630 |     "\n",
631 |     "*If after int-8 calibration the accuracy of the int-8 inferences seem to degrade, it could be because the there wasn't enough data in the calibration tensorfile used to calibrate thee model or, the training data is not entirely representative of your test images, and the calibration maybe incorrect. Therefore, you may either regenerate the calibration tensorfile with more batches of the training data, and recalibrate the model, or calibrate the model on a few images from the test set. This may be done using `--cal_image_dir` flag in the `tlt-export` tool. For more information, please follow the instructions in the USER GUIDE."
632 |    ]
633 |   },
634 |   {
635 |    "cell_type": "markdown",
636 |    "metadata": {},
637 |    "source": [
638 |     "### A. Inference using TensorRT engine <a class=\"anchor\" id=\"head-10-1\"></a>"
639 |    ]
640 |   },
641 |   {
642 |    "cell_type": "code",
643 |    "execution_count": null,
644 |    "metadata": {},
645 |    "outputs": [],
646 |    "source": [
647 |     "!tlt-infer detectnet_v2 -e $SPECS_DIR/detectnet_v2_inference_kitti_etlt.txt \\\n",
648 |     "                        -o $USER_EXPERIMENT_DIR/etlt_infer_testing \\\n",
649 |     "                        -i $DATA_DOWNLOAD_DIR/test_images \\\n",
650 |     "                        -k $KEY"
651 |    ]
652 |   },
653 |   {
654 |    "cell_type": "code",
655 |    "execution_count": null,
656 |    "metadata": {},
657 |    "outputs": [],
658 |    "source": [
659 |     "# visualize the first 12 inferenced images.\n",
660 |     "OUTPUT_PATH = 'etlt_infer_testing/images_annotated' # relative path from $USER_EXPERIMENT_DIR.\n",
661 |     "COLS = 4 # number of columns in the visualizer grid.\n",
662 |     "IMAGES = 8 # number of images to visualize.\n",
663 |     "\n",
664 |     "visualize_images(OUTPUT_PATH, num_cols=COLS, num_images=IMAGES)"
665 |    ]
666 |   }
667 |  ],
668 |  "metadata": {
669 |   "kernelspec": {
670 |    "display_name": "Python 3",
671 |    "language": "python",
672 |    "name": "python3"
673 |   },
674 |   "language_info": {
675 |    "codemirror_mode": {
676 |     "name": "ipython",
677 |     "version": 3
678 |    },
679 |    "file_extension": ".py",
680 |    "mimetype": "text/x-python",
681 |    "name": "python",
682 |    "nbconvert_exporter": "python",
683 |    "pygments_lexer": "ipython3",
684 |    "version": "3.6.11"
685 |   }
686 |  },
687 |  "nbformat": 4,
688 |  "nbformat_minor": 2
689 | }
690 | 


--------------------------------------------------------------------------------
/images/face-mask-detect-output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/face-mask-detection/984ae615f730ff13cc0a8047870f6846e78f9ccb/images/face-mask-detect-output.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | Pillow


--------------------------------------------------------------------------------
/tlt_specs/detectnet_v2_inference_kitti_etlt.txt:
--------------------------------------------------------------------------------
 1 | inferencer_config{
 2 |   # Defining input node of the model
 3 |   # defining target class names for the experiment.
 4 |   # Note: This must be mentioned in order of the networks classes.
 5 |   target_classes: "mask"
 6 |   target_classes: "no-mask"
 7 |   # Inference dimensions.
 8 |   image_width: 960
 9 |   image_height: 544
10 |   # Must match what the model was trained for.
11 |   image_channels: 3
12 |   batch_size: 16
13 |   gpu_index: 0
14 |   # model handler config
15 |   tensorrt_config{
16 |     trt_engine: "/home/detectnet_v2/experiment_dir_final/resnet18_RGB960_detector_fddb_int8_v100.trt"
17 |   }
18 | }
19 | bbox_handler_config{
20 |   kitti_dump: true
21 |   disable_overlay: false
22 |   overlay_linewidth: 4
23 |   classwise_bbox_handler_config{
24 |     key:"mask"
25 |     value: {
26 |       confidence_model: "aggregate_cov"
27 |       output_map: "mask"
28 |       confidence_threshold: 0.9
29 |       bbox_color{
30 |         R: 0
31 |         G: 255
32 |         B: 0
33 |       }
34 |       clustering_config{
35 |         coverage_threshold: 0.00
36 |         dbscan_eps: 0.3
37 |         dbscan_min_samples: 0.05
38 |         minimum_bounding_box_height: 4
39 |       }
40 |     }
41 |   }
42 |   classwise_bbox_handler_config{
43 |     key:"no-mask"
44 |     value: {
45 |       confidence_model: "aggregate_cov"
46 |       output_map: "no-mask"
47 |       confidence_threshold: 0.9
48 |       bbox_color{
49 |         R: 255
50 |         G: 0
51 |         B: 0
52 |       }
53 |       clustering_config{
54 |         coverage_threshold: 0.00
55 |         dbscan_eps: 0.3
56 |         dbscan_min_samples: 0.05
57 |         minimum_bounding_box_height: 4
58 |       }
59 |     }
60 |   }
61 |   classwise_bbox_handler_config{
62 |     key:"default"
63 |     value: {
64 |       confidence_model: "aggregate_cov"
65 |       confidence_threshold: 0.9
66 |       bbox_color{
67 |         R: 255
68 |         G: 255
69 |         B: 0
70 |       }
71 |       clustering_config{
72 |         coverage_threshold: 0.00
73 |         dbscan_eps: 0.3
74 |         dbscan_min_samples: 0.05
75 |         minimum_bounding_box_height: 4
76 |       }
77 |     }
78 |   }
79 | }
80 | 


--------------------------------------------------------------------------------
/tlt_specs/detectnet_v2_inference_kitti_tlt.txt:
--------------------------------------------------------------------------------
 1 | inferencer_config{
 2 |   # defining target class names for the experiment.
 3 |   # Note: This must be mentioned in order of the networks classes.
 4 |   target_classes: "mask"
 5 |   target_classes: "no-mask"
 6 |   # Inference dimensions.
 7 |   image_width: 960
 8 |   image_height: 544
 9 |   # Must match what the model was trained for.
10 |   image_channels: 3
11 |   batch_size: 16
12 |   gpu_index: 0
13 |   #model handler config
14 |   tlt_config{
15 |     model: "/home/detectnet_v2/experiment_dir_retrain/weights/resnet18_detector_pruned.tlt"
16 |     #model: "/home/detectnet_v2/experiment_dir_unpruned/weights/model.tlt"
17 |   }
18 | }
19 | bbox_handler_config{
20 |   kitti_dump: true
21 |   disable_overlay: false
22 |   overlay_linewidth: 4
23 |   classwise_bbox_handler_config{
24 |     key:"mask"
25 |     value: {
26 |       confidence_model: "aggregate_cov"
27 |       output_map: "mask"
28 |       confidence_threshold: 0.9
29 |       bbox_color{
30 |         R: 0
31 |         G: 255
32 |         B: 0
33 |       }
34 |       clustering_config{
35 |         coverage_threshold: 0.00
36 |         dbscan_eps: 0.3
37 |         dbscan_min_samples: 0.05
38 |         minimum_bounding_box_height: 4
39 |       }
40 |     }
41 |   }
42 |   classwise_bbox_handler_config{
43 |     key:"no-mask"
44 |     value: {
45 |       confidence_model: "aggregate_cov"
46 |       output_map: "no-mask"
47 |       confidence_threshold: 0.9
48 |       bbox_color{
49 |         R: 255
50 |         G: 0
51 |         B: 0 
52 |       }
53 |       clustering_config{
54 |         coverage_threshold: 0.00
55 |         dbscan_eps: 0.3
56 |         dbscan_min_samples: 0.05
57 |         minimum_bounding_box_height: 4
58 |       }
59 |     }
60 |   }
61 |   classwise_bbox_handler_config{
62 |     key:"default"
63 |     value: {
64 |       confidence_model: "aggregate_cov"
65 |       confidence_threshold: 0.9
66 |       bbox_color{
67 |         R: 255
68 |         G: 255
69 |         B: 255
70 |       }
71 |       clustering_config{
72 |         coverage_threshold: 0.00
73 |         dbscan_eps: 0.3
74 |         dbscan_min_samples: 0.05
75 |         minimum_bounding_box_height: 4
76 |       }
77 |     }
78 |   }
79 | }
80 | 
81 | 


--------------------------------------------------------------------------------
/tlt_specs/detectnet_v2_retrain_resnet18_kitti.txt:
--------------------------------------------------------------------------------
  1 | random_seed: 42
  2 | dataset_config {
  3 |   data_sources {
  4 |     tfrecords_path: "/home/data/tfrecords/kitti_trainval/*"
  5 |     image_directory_path: "/home/data/train"
  6 |   }
  7 |   image_extension: "jpg"
  8 |   target_class_mapping {
  9 |     key: "mask"
 10 |     value: "mask"
 11 |   }
 12 |   target_class_mapping {
 13 |     key: "no-mask"
 14 |     value: "no-mask"
 15 |   }
 16 |   validation_fold: 0
 17 |   #validation_data_source: {
 18 |     #tfrecords_path: "/home/data/tfrecords/kitti_val/*"
 19 |     #image_directory_path: "/home/data/test"
 20 |   #}
 21 | }
 22 | 
 23 | augmentation_config {
 24 |   preprocessing {
 25 |     output_image_width: 960
 26 |     output_image_height: 544
 27 |     min_bbox_width: 1.0
 28 |     min_bbox_height: 1.0
 29 |     output_image_channel: 3
 30 |   }
 31 |   spatial_augmentation {
 32 |     hflip_probability: 0.5
 33 |     zoom_min: 1.0
 34 |     zoom_max: 1.0
 35 |     translate_max_x: 8.0
 36 |     translate_max_y: 8.0
 37 |   }
 38 |   color_augmentation {
 39 |     hue_rotation_max: 25.0
 40 |     saturation_shift_max: 0.20000000298
 41 |     contrast_scale_max: 0.10000000149
 42 |     contrast_center: 0.5
 43 |   }
 44 | }
 45 | 
 46 | postprocessing_config {
 47 |   target_class_config {
 48 |     key: "mask"
 49 |     value {
 50 |       clustering_config {
 51 |         coverage_threshold: 0.00499999988824
 52 |         dbscan_eps: 0.20000000298
 53 |         dbscan_min_samples: 0.0500000007451
 54 |         minimum_bounding_box_height: 20
 55 |       }
 56 |     }
 57 |   }
 58 |   target_class_config {
 59 |     key: "no-mask"
 60 |     value {
 61 |       clustering_config {
 62 |         coverage_threshold: 0.00499999988824
 63 |         dbscan_eps: 0.15000000596
 64 |         dbscan_min_samples: 0.0500000007451
 65 |         minimum_bounding_box_height: 20
 66 |       }
 67 |     }
 68 |   }
 69 | }
 70 | 
 71 | model_config {
 72 |   pretrained_model_file: "/home/detectnet_v2/experiment_dir_pruned/resnet18_nopool_bn_detectnet_v2_pruned.tlt"
 73 |   num_layers: 18
 74 |   use_batch_norm: true
 75 |   load_graph: true
 76 |   objective_set {
 77 |     bbox {
 78 |       scale: 35.0
 79 |       offset: 0.5
 80 |     }
 81 |     cov {
 82 |     }
 83 |   }
 84 |   training_precision {
 85 |     backend_floatx: FLOAT32
 86 |   }
 87 |   arch: "resnet"
 88 | }
 89 | 
 90 | evaluation_config {
 91 |   validation_period_during_training: 10
 92 |   first_validation_epoch: 10
 93 |   minimum_detection_ground_truth_overlap {
 94 |     key: "mask"
 95 |     value: 0.5
 96 |   }
 97 |   minimum_detection_ground_truth_overlap {
 98 |     key: "no-mask"
 99 |     value: 0.5
100 |   }
101 |   evaluation_box_config {
102 |     key: "mask"
103 |     value {
104 |       minimum_height: 20
105 |       maximum_height: 9999
106 |       minimum_width: 10
107 |       maximum_width: 9999
108 |     }
109 |   }
110 |   evaluation_box_config {
111 |     key: "no-mask"
112 |     value {
113 |       minimum_height: 20
114 |       maximum_height: 9999
115 |       minimum_width: 10
116 |       maximum_width: 9999
117 |     }
118 |   }
119 |   average_precision_mode: INTEGRATE
120 | }
121 | 
122 | cost_function_config {
123 |   target_classes {
124 |     name: "mask"
125 |     class_weight: 1.0
126 |     coverage_foreground_weight: 0.0500000007451
127 |     objectives {
128 |       name: "cov"
129 |       initial_weight: 1.0
130 |       weight_target: 1.0
131 |     }
132 |     objectives {
133 |       name: "bbox"
134 |       initial_weight: 10.0
135 |       weight_target: 10.0
136 |     }
137 |   }
138 |   target_classes {
139 |     name: "no-mask"
140 |     class_weight: 8.0
141 |     coverage_foreground_weight: 0.0500000007451
142 |     objectives {
143 |       name: "cov"
144 |       initial_weight: 1.0
145 |       weight_target: 1.0
146 |     }
147 |     objectives {
148 |       name: "bbox"
149 |       initial_weight: 10.0
150 |       weight_target: 1.0
151 |     }
152 |   }
153 |   enable_autoweighting: true
154 |   max_objective_weight: 0.999899983406
155 |   min_objective_weight: 9.99999974738e-05
156 | }
157 | 
158 | training_config {
159 |   batch_size_per_gpu: 24
160 |   num_epochs: 120
161 |   learning_rate {
162 |     soft_start_annealing_schedule {
163 |       min_learning_rate: 5e-06
164 |       max_learning_rate: 5e-04
165 |       soft_start: 0.10000000149
166 |       annealing: 0.699999988079
167 |     }
168 |   }
169 |   regularizer {
170 |     type: L1
171 |     weight: 3.00000002618e-09
172 |   }
173 |   optimizer {
174 |     adam {
175 |       epsilon: 9.99999993923e-09
176 |       beta1: 0.899999976158
177 |       beta2: 0.999000012875
178 |     }
179 |   }
180 |   cost_scaling {
181 |     initial_exponent: 20.0
182 |     increment: 0.005
183 |     decrement: 1.0
184 |   }
185 |   checkpoint_interval: 10
186 | }
187 | 
188 | bbox_rasterizer_config {
189 |   target_class_config {
190 |     key: "mask"
191 |     value {
192 |       cov_center_x: 0.5
193 |       cov_center_y: 0.5
194 |       cov_radius_x: 0.40000000596
195 |       cov_radius_y: 0.40000000596
196 |       bbox_min_radius: 1.0
197 |     }
198 |   }
199 |   target_class_config {
200 |     key: "no-mask"
201 |     value {
202 |       cov_center_x: 0.5
203 |       cov_center_y: 0.5
204 |       cov_radius_x: 1.0
205 |       cov_radius_y: 1.0
206 |       bbox_min_radius: 1.0
207 |     }
208 |   }
209 |   deadzone_radius: 0.400000154972
210 | }
211 | 
212 | 


--------------------------------------------------------------------------------
/tlt_specs/detectnet_v2_tfrecords_kitti_trainval.txt:
--------------------------------------------------------------------------------
 1 | kitti_config {
 2 |   root_directory_path: "/home/data/train"
 3 |   image_dir_name: "images"
 4 |   label_dir_name: "labels"
 5 |   image_extension: ".jpg"
 6 |   partition_mode: "random"
 7 |   num_partitions: 2
 8 |   val_split: 20
 9 |   num_shards: 10 }
10 | 


--------------------------------------------------------------------------------
/tlt_specs/detectnet_v2_tfrecords_kitti_val.txt:
--------------------------------------------------------------------------------
 1 | kitti_config {
 2 |   root_directory_path: "/home/data/test"
 3 |   image_dir_name: "images"
 4 |   label_dir_name: "labels"
 5 |   image_extension: ".jpg"
 6 |   partition_mode: "random"
 7 |   num_partitions: 2
 8 |   val_split: 100
 9 |   num_shards: 10 }
10 | 


--------------------------------------------------------------------------------
/tlt_specs/detectnet_v2_train_resnet18_kitti.txt:
--------------------------------------------------------------------------------
  1 | random_seed: 42
  2 | dataset_config {
  3 |   data_sources {
  4 |     tfrecords_path: "/home/data/tfrecords/kitti_trainval/*"
  5 |     image_directory_path: "/home/data/train"
  6 |   }
  7 |   image_extension: "jpg"
  8 |   target_class_mapping {
  9 |     key: "mask"
 10 |     value: "mask"
 11 |   }
 12 |   target_class_mapping {
 13 |     key: "no-mask"
 14 |     value: "no-mask"
 15 |   }
 16 |   validation_fold: 0
 17 |   #validation_data_source: {
 18 |     #tfrecords_path: "/home/data/tfrecords/kitti_val/*"
 19 |     #image_directory_path: "/home/data/test"
 20 |   #}
 21 | }
 22 | 
 23 | 
 24 | augmentation_config {
 25 |   preprocessing {
 26 |     output_image_width: 960
 27 |     output_image_height: 544
 28 |     min_bbox_width: 1.0
 29 |     min_bbox_height: 1.0
 30 |     output_image_channel: 3
 31 |   }
 32 |   spatial_augmentation {
 33 |     hflip_probability: 0.5
 34 |     vflip_probability: 0.0
 35 |     zoom_min: 1.0
 36 |     zoom_max: 1.0
 37 |     translate_max_x: 8.0
 38 |     translate_max_y: 8.0
 39 |   }
 40 |   color_augmentation {
 41 |     hue_rotation_max: 25.0
 42 |     saturation_shift_max: 0.20000000298
 43 |     contrast_scale_max: 0.10000000149
 44 |     contrast_center: 0.5
 45 |   }
 46 | }
 47 | 
 48 | postprocessing_config {
 49 |   target_class_config {
 50 |     key: "mask"
 51 |     value {
 52 |       clustering_config {
 53 |         coverage_threshold: 0.00499999988824
 54 |         dbscan_eps: 0.20000000298
 55 |         dbscan_min_samples: 0.0500000007451
 56 |         minimum_bounding_box_height: 20
 57 |       }
 58 |     }
 59 |   }
 60 |   target_class_config {
 61 |     key: "no-mask"
 62 |     value {
 63 |       clustering_config {
 64 |         coverage_threshold: 0.00499999988824
 65 |         dbscan_eps: 0.15000000596
 66 |         dbscan_min_samples: 0.0500000007451
 67 |         minimum_bounding_box_height: 20
 68 |       }
 69 |     }
 70 |   }
 71 | }
 72 | 
 73 | model_config {
 74 |   pretrained_model_file: "/home/detectnet_v2/pretrained_resnet18/tlt_pretrained_detectnet_v2_vresnet18/resnet18.hdf5"
 75 |   num_layers: 18
 76 |   use_batch_norm: true
 77 |   objective_set {
 78 |     bbox {
 79 |       scale: 35.0
 80 |       offset: 0.5
 81 |     }
 82 |     cov {
 83 |     }
 84 |   }
 85 |   training_precision {
 86 |     backend_floatx: FLOAT32
 87 |   }
 88 |   arch: "resnet"
 89 | }
 90 | 
 91 | evaluation_config {
 92 |   validation_period_during_training: 10
 93 |   first_validation_epoch: 10
 94 |   minimum_detection_ground_truth_overlap {
 95 |     key: "mask"
 96 |     value: 0.5
 97 |   }
 98 |   minimum_detection_ground_truth_overlap {
 99 |     key: "no-mask"
100 |     value: 0.5
101 |   }
102 |   evaluation_box_config {
103 |     key: "mask"
104 |     value {
105 |       minimum_height: 20
106 |       maximum_height: 9999
107 |       minimum_width: 10
108 |       maximum_width: 9999
109 |     }
110 |   }
111 |   evaluation_box_config {
112 |     key: "no-mask"
113 |     value {
114 |       minimum_height: 20
115 |       maximum_height: 9999
116 |       minimum_width: 10
117 |       maximum_width: 9999
118 |     }
119 |   }
120 |   average_precision_mode: INTEGRATE
121 | }
122 | 
123 | cost_function_config {
124 |   target_classes {
125 |     name: "mask"
126 |     class_weight: 1.0
127 |     coverage_foreground_weight: 0.0500000007451
128 |     objectives {
129 |       name: "cov"
130 |       initial_weight: 1.0
131 |       weight_target: 1.0
132 |     }
133 |     objectives {
134 |       name: "bbox"
135 |       initial_weight: 10.0
136 |       weight_target: 10.0
137 |     }
138 |   }
139 |   target_classes {
140 |     name: "no-mask"
141 |     class_weight: 8.0
142 |     coverage_foreground_weight: 0.0500000007451
143 |     objectives {
144 |       name: "cov"
145 |       initial_weight: 1.0
146 |       weight_target: 1.0
147 |     }
148 |     objectives {
149 |       name: "bbox"
150 |       initial_weight: 10.0
151 |       weight_target: 1.0
152 |     }
153 |   }
154 |   enable_autoweighting: true
155 |   max_objective_weight: 0.999899983406
156 |   min_objective_weight: 9.99999974738e-05
157 | }
158 | 
159 | training_config {
160 |   batch_size_per_gpu: 24
161 |   num_epochs: 120
162 |   learning_rate {
163 |     soft_start_annealing_schedule {
164 |       min_learning_rate: 5e-06
165 |       max_learning_rate: 5e-04
166 |       soft_start: 0.10000000149
167 |       annealing: 0.699999988079
168 |     }
169 |   }
170 |   regularizer {
171 |     type: L1
172 |     weight: 3.00000002618e-09
173 |   }
174 |   optimizer {
175 |     adam {
176 |       epsilon: 9.99999993923e-09
177 |       beta1: 0.899999976158
178 |       beta2: 0.999000012875
179 |     }
180 |   }
181 |   cost_scaling {
182 |     initial_exponent: 20.0
183 |     increment: 0.005
184 |     decrement: 1.0
185 |   }
186 |   checkpoint_interval: 10
187 | }
188 | 
189 | bbox_rasterizer_config {
190 |   target_class_config {
191 |     key: "mask"
192 |     value {
193 |       cov_center_x: 0.5
194 |       cov_center_y: 0.5
195 |       cov_radius_x: 0.40000000596
196 |       cov_radius_y: 0.40000000596
197 |       bbox_min_radius: 1.0
198 |     }
199 |   }
200 |   target_class_config {
201 |     key: "no-mask"
202 |     value {
203 |       cov_center_x: 0.5
204 |       cov_center_y: 0.5
205 |       cov_radius_x: 1.0
206 |       cov_radius_y: 1.0
207 |       bbox_min_radius: 1.0
208 |     }
209 |   }
210 |   deadzone_radius: 0.400000154972
211 | }
212 | 
213 | 


--------------------------------------------------------------------------------