├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── YOLOV3MLNetSO ├── Assets │ ├── Images │ │ ├── cars road.jpg │ │ └── dog_cat.jpg │ └── Output │ │ ├── cars road._processed.jpg │ │ └── dog_cat._processed.jpg ├── DataStructures │ ├── YoloV3BitmapData.cs │ ├── YoloV3Prediction.cs │ └── YoloV3Result.cs ├── Program.cs ├── README.md └── YOLOV3MLNetSO.csproj ├── YOLOv3MLNet.sln ├── YOLOv3MLNet ├── Assets │ ├── Images │ │ └── PMC5055614_00001.jpg │ └── Output │ │ └── PMC5055614_00001._processed.jpg ├── DataStructures │ ├── YoloV3BitmapData.cs │ ├── YoloV3Prediction.cs │ └── YoloV3Result.cs ├── Program.cs └── YOLOv3MLNet.csproj └── netron.png /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Build results 17 | [Dd]ebug/ 18 | [Dd]ebugPublic/ 19 | [Rr]elease/ 20 | [Rr]eleases/ 21 | x64/ 22 | x86/ 23 | [Aa][Rr][Mm]/ 24 | [Aa][Rr][Mm]64/ 25 | bld/ 26 | [Bb]in/ 27 | [Oo]bj/ 28 | [Ll]og/ 29 | 30 | # Visual Studio 2015/2017 cache/options directory 31 | .vs/ 32 | # Uncomment if you have tasks that create the project's static files in wwwroot 33 | #wwwroot/ 34 | 35 | # Visual Studio 2017 auto generated files 36 | Generated\ Files/ 37 | 38 | # MSTest test Results 39 | [Tt]est[Rr]esult*/ 40 | [Bb]uild[Ll]og.* 41 | 42 | # NUNIT 43 | *.VisualState.xml 44 | TestResult.xml 45 | 46 | # Build Results of an ATL Project 47 | [Dd]ebugPS/ 48 | [Rr]eleasePS/ 49 | dlldata.c 50 | 51 | # Benchmark Results 52 | BenchmarkDotNet.Artifacts/ 53 | 54 | # .NET Core 55 | project.lock.json 56 | project.fragment.lock.json 57 | artifacts/ 58 | 59 | # StyleCop 60 | StyleCopReport.xml 61 | 62 | # Files built by Visual Studio 63 | *_i.c 64 | *_p.c 65 | *_h.h 66 | *.ilk 67 | *.meta 68 | *.obj 69 | *.iobj 70 | *.pch 71 | *.pdb 72 | *.ipdb 73 | *.pgc 74 | *.pgd 75 | *.rsp 76 | *.sbr 77 | *.tlb 78 | *.tli 79 | *.tlh 80 | *.tmp 81 | *.tmp_proj 82 | *_wpftmp.csproj 83 | *.log 84 | *.vspscc 85 | *.vssscc 86 | .builds 87 | *.pidb 88 | *.svclog 89 | *.scc 90 | 91 | # Chutzpah Test files 92 | _Chutzpah* 93 | 94 | # Visual C++ cache files 95 | ipch/ 96 | *.aps 97 | *.ncb 98 | *.opendb 99 | *.opensdf 100 | *.sdf 101 | *.cachefile 102 | *.VC.db 103 | *.VC.VC.opendb 104 | 105 | # Visual Studio profiler 106 | *.psess 107 | *.vsp 108 | *.vspx 109 | *.sap 110 | 111 | # Visual Studio Trace Files 112 | *.e2e 113 | 114 | # TFS 2012 Local Workspace 115 | $tf/ 116 | 117 | # Guidance Automation Toolkit 118 | *.gpState 119 | 120 | # ReSharper is a .NET coding add-in 121 | _ReSharper*/ 122 | *.[Rr]e[Ss]harper 123 | *.DotSettings.user 124 | 125 | # JustCode is a .NET coding add-in 126 | .JustCode 127 | 128 | # TeamCity is a build add-in 129 | _TeamCity* 130 | 131 | # DotCover is a Code Coverage Tool 132 | *.dotCover 133 | 134 | # AxoCover is a Code Coverage Tool 135 | .axoCover/* 136 | !.axoCover/settings.json 137 | 138 | # Visual Studio code coverage results 139 | *.coverage 140 | *.coveragexml 141 | 142 | # NCrunch 143 | _NCrunch_* 144 | .*crunch*.local.xml 145 | nCrunchTemp_* 146 | 147 | # MightyMoose 148 | *.mm.* 149 | AutoTest.Net/ 150 | 151 | # Web workbench (sass) 152 | .sass-cache/ 153 | 154 | # Installshield output folder 155 | [Ee]xpress/ 156 | 157 | # DocProject is a documentation generator add-in 158 | DocProject/buildhelp/ 159 | DocProject/Help/*.HxT 160 | DocProject/Help/*.HxC 161 | DocProject/Help/*.hhc 162 | DocProject/Help/*.hhk 163 | DocProject/Help/*.hhp 164 | DocProject/Help/Html2 165 | DocProject/Help/html 166 | 167 | # Click-Once directory 168 | publish/ 169 | 170 | # Publish Web Output 171 | *.[Pp]ublish.xml 172 | *.azurePubxml 173 | # Note: Comment the next line if you want to checkin your web deploy settings, 174 | # but database connection strings (with potential passwords) will be unencrypted 175 | *.pubxml 176 | *.publishproj 177 | 178 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 179 | # checkin your Azure Web App publish settings, but sensitive information contained 180 | # in these scripts will be unencrypted 181 | PublishScripts/ 182 | 183 | # NuGet Packages 184 | *.nupkg 185 | # The packages folder can be ignored because of Package Restore 186 | **/[Pp]ackages/* 187 | # except build/, which is used as an MSBuild target. 188 | !**/[Pp]ackages/build/ 189 | # Uncomment if necessary however generally it will be regenerated when needed 190 | #!**/[Pp]ackages/repositories.config 191 | # NuGet v3's project.json files produces more ignorable files 192 | *.nuget.props 193 | *.nuget.targets 194 | 195 | # Microsoft Azure Build Output 196 | csx/ 197 | *.build.csdef 198 | 199 | # Microsoft Azure Emulator 200 | ecf/ 201 | rcf/ 202 | 203 | # Windows Store app package directories and files 204 | AppPackages/ 205 | BundleArtifacts/ 206 | Package.StoreAssociation.xml 207 | _pkginfo.txt 208 | *.appx 209 | 210 | # Visual Studio cache files 211 | # files ending in .cache can be ignored 212 | *.[Cc]ache 213 | # but keep track of directories ending in .cache 214 | !?*.[Cc]ache/ 215 | 216 | # Others 217 | ClientBin/ 218 | ~$* 219 | *~ 220 | *.dbmdl 221 | *.dbproj.schemaview 222 | *.jfm 223 | *.pfx 224 | *.publishsettings 225 | orleans.codegen.cs 226 | 227 | # Including strong name files can present a security risk 228 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 229 | #*.snk 230 | 231 | # Since there are multiple workflows, uncomment next line to ignore bower_components 232 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 233 | #bower_components/ 234 | 235 | # RIA/Silverlight projects 236 | Generated_Code/ 237 | 238 | # Backup & report files from converting an old project file 239 | # to a newer Visual Studio version. Backup files are not needed, 240 | # because we have git ;-) 241 | _UpgradeReport_Files/ 242 | Backup*/ 243 | UpgradeLog*.XML 244 | UpgradeLog*.htm 245 | ServiceFabricBackup/ 246 | *.rptproj.bak 247 | 248 | # SQL Server files 249 | *.mdf 250 | *.ldf 251 | *.ndf 252 | 253 | # Business Intelligence projects 254 | *.rdl.data 255 | *.bim.layout 256 | *.bim_*.settings 257 | *.rptproj.rsuser 258 | *- Backup*.rdl 259 | 260 | # Microsoft Fakes 261 | FakesAssemblies/ 262 | 263 | # GhostDoc plugin setting file 264 | *.GhostDoc.xml 265 | 266 | # Node.js Tools for Visual Studio 267 | .ntvs_analysis.dat 268 | node_modules/ 269 | 270 | # Visual Studio 6 build log 271 | *.plg 272 | 273 | # Visual Studio 6 workspace options file 274 | *.opt 275 | 276 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 277 | *.vbw 278 | 279 | # Visual Studio LightSwitch build output 280 | **/*.HTMLClient/GeneratedArtifacts 281 | **/*.DesktopClient/GeneratedArtifacts 282 | **/*.DesktopClient/ModelManifest.xml 283 | **/*.Server/GeneratedArtifacts 284 | **/*.Server/ModelManifest.xml 285 | _Pvt_Extensions 286 | 287 | # Paket dependency manager 288 | .paket/paket.exe 289 | paket-files/ 290 | 291 | # FAKE - F# Make 292 | .fake/ 293 | 294 | # JetBrains Rider 295 | .idea/ 296 | *.sln.iml 297 | 298 | # CodeRush personal settings 299 | .cr/personal 300 | 301 | # Python Tools for Visual Studio (PTVS) 302 | __pycache__/ 303 | *.pyc 304 | 305 | # Cake - Uncomment if you are using it 306 | # tools/** 307 | # !tools/packages.config 308 | 309 | # Tabs Studio 310 | *.tss 311 | 312 | # Telerik's JustMock configuration file 313 | *.jmconfig 314 | 315 | # BizTalk build output 316 | *.btp.cs 317 | *.btm.cs 318 | *.odx.cs 319 | *.xsd.cs 320 | 321 | # OpenCover UI analysis results 322 | OpenCover/ 323 | 324 | # Azure Stream Analytics local run output 325 | ASALocalRun/ 326 | 327 | # MSBuild Binary and Structured Log 328 | *.binlog 329 | 330 | # NVidia Nsight GPU debugger configuration file 331 | *.nvuser 332 | 333 | # MFractors (Xamarin productivity tool) working folder 334 | .mfractor/ 335 | 336 | # Local History for Visual Studio 337 | .localhistory/ 338 | 339 | # BeatPulse healthcheck temp database 340 | healthchecksdb -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 BobLd 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **Another case study, based on [this](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/yolov3) YOLO v3 model is available [here](https://github.com/BobLd/YOLOv3MLNet/tree/master/YOLOV3MLNetSO).** 2 | 3 | **See [here](https://github.com/BobLd/YOLOv4MLNet) for YOLO v4 use.** 4 | 5 | # YOLO v3 in ML.Net 6 | Use the YOLO v3 algorithms for object detection in C# using ML.Net. We start with a Torch model, then converting it to ONNX format and use it in ML.Net. 7 | 8 | This is a case study on a document layout YOLO trained model. The model can be found in the following Medium article: [Object Detection — Document Layout Analysis Using Monk AI](https://medium.com/towards-artificial-intelligence/object-detection-document-layout-analysis-using-monk-object-detection-toolkit-6c57200bde5). 9 | 10 | ## Main differences 11 | - The ONNX conversion removes 1 feature which is the *objectness score*, pc. The original model has (5 + classes) features for each bounding box, the ONNX model has (4 + classes) features per bounding box. We will use the class probability as a proxy for the *objectness score* when performing the Non-maximum Suppression (NMS) step. This is a known issue, more info [here](https://github.com/ultralytics/yolov3/issues/750). 12 | - Image resizing is not optimised, and will always yield 416x416 size image. This is not the case in the original model (see this issue: [RECTANGULAR INFERENCE](https://github.com/ultralytics/yolov3/issues/232)). 13 | 14 | # Export to ONNX in Python 15 | This is based on this article [Object Detection — Document Layout Analysis Using Monk AI](https://medium.com/towards-artificial-intelligence/object-detection-document-layout-analysis-using-monk-object-detection-toolkit-6c57200bde5). 16 | 17 | ## Load the model 18 | ```python 19 | import os 20 | import sys 21 | from IPython.display import Image 22 | sys.path.append("../Monk_Object_Detection/7_yolov3/lib") 23 | from infer_detector import Infer 24 | 25 | gtf = Infer() 26 | 27 | f = open("dla_yolov3/classes.txt") 28 | class_list = f.readlines() 29 | f.close() 30 | 31 | model_name = "yolov3" 32 | weights = "dla_yolov3/dla_yolov3.pt" 33 | gtf.Model(model_name, class_list, weights, use_gpu=False, input_size=(416, 416)) 34 | ``` 35 | 36 | ## Test the model 37 | ```python 38 | img_path = "test_square.jpg" 39 | gtf.Predict(img_path, conf_thres=0.2, iou_thres=0.5) 40 | Image(filename='output/test_square.jpg') 41 | ``` 42 | 43 | ## Export the model 44 | You need to set `ONNX_EXPORT = True` in `...\Monk_Object_Detection\7_yolov3\lib\models.py` before loading the model. 45 | 46 | We name the input layer `image` and the 2 ouput layers `classes`, `bboxes`. This is not needed but helps the clarity. 47 | 48 | ```python 49 | import torch 50 | import torchvision.models as models 51 | 52 | dummy_input = torch.randn(1, 3, 416, 416) # Create the right input shape (e.g. for an image) 53 | dummy_input = torch.nn.Sigmoid()(dummy_input) # limit between 0 and 1 (superfluous?) 54 | torch.onnx.export(gtf.system_dict["local"]["model"], 55 | dummy_input, 56 | "dla_yolov3.onnx", 57 | input_names=["image"], 58 | output_names=["classes", "bboxes"], 59 | opset_version=9) 60 | ``` 61 | 62 | # Check exported model with Netron 63 | The ONNX model can be viewed in [Netron](https://www.electronjs.org/apps/netron). Our model looks like this: 64 | ![neutron](https://github.com/BobLd/YOLOv3MLNet/blob/master/netron.png) 65 | 66 | - The input layer size is [1 x 3 x 416 x 416]. This corresponds to 1 batch size x 3 colors x 416 pixels height x 416 pixel width (more info about fixed batch size [here](https://github.com/ultralytics/yolov3/issues/1030)). 67 | 68 | As per this [article](https://medium.com/analytics-vidhya/yolo-v3-theory-explained-33100f6d193): 69 | > For an image of size 416 x 416, YOLO predicts ((52 x 52) + (26 x 26) + 13 x 13)) x 3 = 10,647 bounding boxes. 70 | - The `bboxes` output layer is of size [10,647 x 4]. This corresponds to 10,647 bounding boxes x 4 bounding box coordinates (x, y, h, w). 71 | - The `classes` output layer is of size [10,647 x 18]. This corresponds to 10,647 bounding boxes x 18 classes (this model has only 18 classes). 72 | 73 | Hence, each bounding box has (4 + classes) = 22 features. The total number of prediction in this model is 22 x 10,647. 74 | 75 | **NB**: The ONNX conversion removes 1 feature which is the *objectness score*, pc. The original model has (5 + classes) features for each bounding box. We will use the class probability as a proxy for the *objectness score*. 76 | 77 | ![medium-explanation](https://miro.medium.com/max/700/1*6KLkWAWCINb8kVNuPRaDMQ.png) 78 | 79 | More information can be found in this article: [YOLO v3 theory explained](https://medium.com/analytics-vidhya/yolo-v3-theory-explained-33100f6d193) 80 | 81 | # Load model in C# 82 | 83 | # Predict in C# 84 | ![output](YOLOv3MLNet/Assets/Output/PMC5055614_00001._processed.jpg) 85 | 86 | # Resources 87 | - https://medium.com/towards-artificial-intelligence/object-detection-document-layout-analysis-using-monk-object-detection-toolkit-6c57200bde5 88 | - https://medium.com/analytics-vidhya/yolo-v3-theory-explained-33100f6d193 89 | - https://towardsdatascience.com/non-maximum-suppression-nms-93ce178e177c 90 | - https://michhar.github.io/convert-pytorch-onnx/ 91 | -------------------------------------------------------------------------------- /YOLOV3MLNetSO/Assets/Images/cars road.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BobLd/YOLOv3MLNet/e09caa384d9609961020413bf237923d5af31307/YOLOV3MLNetSO/Assets/Images/cars road.jpg -------------------------------------------------------------------------------- /YOLOV3MLNetSO/Assets/Images/dog_cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BobLd/YOLOv3MLNet/e09caa384d9609961020413bf237923d5af31307/YOLOV3MLNetSO/Assets/Images/dog_cat.jpg -------------------------------------------------------------------------------- /YOLOV3MLNetSO/Assets/Output/cars road._processed.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BobLd/YOLOv3MLNet/e09caa384d9609961020413bf237923d5af31307/YOLOV3MLNetSO/Assets/Output/cars road._processed.jpg -------------------------------------------------------------------------------- /YOLOV3MLNetSO/Assets/Output/dog_cat._processed.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BobLd/YOLOv3MLNet/e09caa384d9609961020413bf237923d5af31307/YOLOV3MLNetSO/Assets/Output/dog_cat._processed.jpg -------------------------------------------------------------------------------- /YOLOV3MLNetSO/DataStructures/YoloV3BitmapData.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.ML.Data; 2 | using Microsoft.ML.Transforms.Image; 3 | using System.Drawing; 4 | 5 | namespace YOLOV3MLNetSO.DataStructures 6 | { 7 | public class YoloV3BitmapData 8 | { 9 | [ColumnName("bitmap")] 10 | [ImageType(416, 416)] 11 | public Bitmap Image { get; set; } 12 | 13 | [ColumnName("width")] 14 | public float ImageWidth => Image.Width; 15 | 16 | [ColumnName("height")] 17 | public float ImageHeight => Image.Height; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /YOLOV3MLNetSO/DataStructures/YoloV3Prediction.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.ML.Data; 2 | 3 | namespace YOLOV3MLNetSO.DataStructures 4 | { 5 | public class YoloV3Prediction 6 | { 7 | /// 8 | /// ((52 x 52) + (26 x 26) + 13 x 13)) x 3 = 10,647. 9 | /// 10 | public const int YoloV3BboxPredictionCount = 10_647; 11 | 12 | /// 13 | /// Boxes 14 | /// Size is [1 x 'n_candidates' x 4] 15 | /// 16 | [VectorType(1, YoloV3BboxPredictionCount, 4)] 17 | [ColumnName("yolonms_layer_1/ExpandDims_1:0")] 18 | public float[] Boxes { get; set; } 19 | 20 | /// 21 | /// Scores 22 | /// Size is [1 x 80 x 'n_candidates'] 23 | /// 24 | [VectorType(1, 80, YoloV3BboxPredictionCount)] 25 | [ColumnName("yolonms_layer_1/ExpandDims_3:0")] 26 | public float[] Scores { get; set; } 27 | 28 | /// 29 | /// Concat 30 | /// Size is ['nbox' x 3] 31 | /// 32 | [VectorType(0, 3)] 33 | [ColumnName("yolonms_layer_1/concat_2:0")] 34 | public int[] Concat { get; set; } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /YOLOV3MLNetSO/DataStructures/YoloV3Result.cs: -------------------------------------------------------------------------------- 1 | namespace YOLOV3MLNetSO.DataStructures 2 | { 3 | public struct YoloV3Result 4 | { 5 | /// 6 | /// x1, y1, x2, y2 in page coordinates. 7 | /// 8 | public float[] BBox { get; } 9 | 10 | /// 11 | /// The Bbox category. 12 | /// 13 | public string Label { get; } 14 | 15 | /// 16 | /// Category's confidence level. 17 | /// 18 | public float Confidence { get; } 19 | 20 | public YoloV3Result(float[] bbox, string label, float confidence) 21 | { 22 | BBox = bbox; 23 | Label = label; 24 | Confidence = confidence; 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /YOLOV3MLNetSO/Program.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.ML; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Drawing; 5 | using System.IO; 6 | using System.Linq; 7 | using YOLOV3MLNetSO.DataStructures; 8 | using static Microsoft.ML.Transforms.Image.ImageResizingEstimator; 9 | 10 | namespace YOLOV3MLNetSO 11 | { 12 | /* 13 | * My answer to https://stackoverflow.com/questions/64407833/how-to-impelement-post-proccesing-for-yolo-v3-or-v4-onnx-models-in-ml-net 14 | */ 15 | class Program 16 | { 17 | const string modelPath = @"D:\yolov3-10.onnx"; 18 | 19 | const string imageFolder = @"Assets\Images"; 20 | 21 | const string imageOutputFolder = @"Assets\Output"; 22 | 23 | static readonly string[] classesNames = new string[] { "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" }; 24 | 25 | static void Main(string[] args) 26 | { 27 | Directory.CreateDirectory(imageOutputFolder); 28 | MLContext mlContext = new MLContext(); 29 | 30 | // model is available here: 31 | // https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/yolov3 32 | 33 | // Define scoring pipeline 34 | var pipeline = mlContext.Transforms.ResizeImages(inputColumnName: "bitmap", outputColumnName: "input_1", imageWidth: 416, imageHeight: 416, resizing: ResizingKind.IsoPad) 35 | .Append(mlContext.Transforms.ExtractPixels(outputColumnName: "input_1", scaleImage: 1f / 255f)) 36 | .Append(mlContext.Transforms.Concatenate("image_shape", "height", "width")) 37 | .Append(mlContext.Transforms.ApplyOnnxModel(shapeDictionary: new Dictionary() { { "input_1", new[] { 1, 3, 416, 416 } } }, 38 | inputColumnNames: new[] 39 | { 40 | "input_1", 41 | "image_shape" 42 | }, 43 | outputColumnNames: new[] 44 | { 45 | "yolonms_layer_1/ExpandDims_1:0", 46 | "yolonms_layer_1/ExpandDims_3:0", 47 | "yolonms_layer_1/concat_2:0" 48 | }, 49 | modelFile: modelPath, recursionLimit: 100)); 50 | 51 | // Fit on empty list to obtain input data schema 52 | var model = pipeline.Fit(mlContext.Data.LoadFromEnumerable(new List())); 53 | 54 | // Create prediction engine 55 | var predictionEngine = mlContext.Model.CreatePredictionEngine(model); 56 | 57 | // load image 58 | string imageName = "cars road.jpg"; 59 | using (var bitmap = new Bitmap(Image.FromFile(Path.Combine(imageFolder, imageName)))) 60 | { 61 | // predict 62 | var predict = predictionEngine.Predict(new YoloV3BitmapData() { Image = bitmap }); 63 | var results = GetResults(predict, classesNames); 64 | 65 | // draw predictions 66 | using (var g = Graphics.FromImage(bitmap)) 67 | { 68 | foreach (var result in results) 69 | { 70 | var y1 = result.BBox[0]; 71 | var x1 = result.BBox[1]; 72 | var y2 = result.BBox[2]; 73 | var x2 = result.BBox[3]; 74 | 75 | g.DrawRectangle(Pens.Red, x1, y1, x2 - x1, y2 - y1); 76 | using (var brushes = new SolidBrush(Color.FromArgb(50, Color.Red))) 77 | { 78 | g.FillRectangle(brushes, x1, y1, x2 - x1, y2 - y1); 79 | } 80 | 81 | g.DrawString(result.Label + " " + result.Confidence.ToString("0.00"), 82 | new Font("Arial", 12), Brushes.Blue, new PointF(x1, y1)); 83 | } 84 | 85 | bitmap.Save(Path.Combine(imageOutputFolder, Path.ChangeExtension(imageName, "_processed" + Path.GetExtension(imageName)))); 86 | } 87 | } 88 | } 89 | 90 | public static IReadOnlyList GetResults(YoloV3Prediction prediction, string[] categories) 91 | { 92 | if (prediction.Concat == null || prediction.Concat.Length == 0) 93 | { 94 | return new List(); 95 | } 96 | 97 | if (prediction.Boxes.Length != YoloV3Prediction.YoloV3BboxPredictionCount * 4) 98 | { 99 | throw new ArgumentException(); 100 | } 101 | 102 | if (prediction.Scores.Length != YoloV3Prediction.YoloV3BboxPredictionCount * categories.Length) 103 | { 104 | throw new ArgumentException(); 105 | } 106 | 107 | List results = new List(); 108 | 109 | // Concat size is 'nbox'x3 (batch_index, class_index, box_index) 110 | int resulstCount = prediction.Concat.Length / 3; 111 | for (int c = 0; c < resulstCount; c++) 112 | { 113 | var res = prediction.Concat.Skip(c * 3).Take(3).ToArray(); 114 | 115 | var batch_index = res[0]; 116 | var class_index = res[1]; 117 | var box_index = res[2]; 118 | 119 | var label = categories[class_index]; 120 | var bbox = new float[] 121 | { 122 | prediction.Boxes[box_index * 4], 123 | prediction.Boxes[box_index * 4 + 1], 124 | prediction.Boxes[box_index * 4 + 2], 125 | prediction.Boxes[box_index * 4 + 3], 126 | }; 127 | var score = prediction.Scores[box_index + class_index * YoloV3Prediction.YoloV3BboxPredictionCount]; 128 | 129 | results.Add(new YoloV3Result(bbox, label, score)); 130 | } 131 | 132 | return results; 133 | } 134 | 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /YOLOV3MLNetSO/README.md: -------------------------------------------------------------------------------- 1 | ## Answer to [How to impelement post-proccesing for yolo v3 or v4 onnx models in ML.Net](https://stackoverflow.com/questions/64407833/how-to-impelement-post-proccesing-for-yolo-v3-or-v4-onnx-models-in-ml-net) 2 | 3 | I'll take the [YOLO v3 (available in the onnx/models repo)](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/yolov3) as an example. A good explaination of the model can be found [here](https://medium.com/analytics-vidhya/yolo-v3-theory-explained-33100f6d193). 4 | 5 | First advice would be to look at the model using [Netron](https://www.electronjs.org/apps/netron). Doing so, you will see the input and output layers. They also describe these layers in the onnx/models documentation. 6 | 7 | [Netron screenshot][1] 8 | 9 | (I see in Netron that this particular YOLO v3 model also does some post-processing by doing the Non-maximum supression step.) 10 | 11 | - Input layers names: `input_1`, `image_shape` 12 | - Ouput layers names: `yolonms_layer_1/ExpandDims_1:0`, `yolonms_layer_1/ExpandDims_3:0`, `yolonms_layer_1/concat_2:0` 13 | 14 | As per the model documentation, the input shapes are: 15 | > Resized image (1x3x416x416) Original image size (1x2) which is [image.size['1], image.size[0]] 16 | 17 | We first need to define the ML.Net input and output classes as follow: 18 | ```csharp 19 | public class YoloV3BitmapData 20 | { 21 | [ColumnName("bitmap")] 22 | [ImageType(416, 416)] 23 | public Bitmap Image { get; set; } 24 | 25 | [ColumnName("width")] 26 | public float ImageWidth => Image.Width; 27 | 28 | [ColumnName("height")] 29 | public float ImageHeight => Image.Height; 30 | } 31 | 32 | public class YoloV3Prediction 33 | { 34 | /// 35 | /// ((52 x 52) + (26 x 26) + 13 x 13)) x 3 = 10,647. 36 | /// 37 | public const int YoloV3BboxPredictionCount = 10_647; 38 | 39 | /// 40 | /// Boxes 41 | /// 42 | [ColumnName("yolonms_layer_1/ExpandDims_1:0")] 43 | public float[] Boxes { get; set; } 44 | 45 | /// 46 | /// Scores 47 | /// 48 | [ColumnName("yolonms_layer_1/ExpandDims_3:0")] 49 | public float[] Scores { get; set; } 50 | 51 | /// 52 | /// Concat 53 | /// 54 | [ColumnName("yolonms_layer_1/concat_2:0")] 55 | public int[] Concat { get; set; } 56 | } 57 | ``` 58 | 59 | We then create the ML.Net pipeline and load the prediction engine: 60 | 61 | ```csharp 62 | // Define scoring pipeline 63 | var pipeline = mlContext.Transforms.ResizeImages(inputColumnName: "bitmap", outputColumnName: "input_1", imageWidth: 416, imageHeight: 416, resizing: ResizingKind.IsoPad) 64 | .Append(mlContext.Transforms.ExtractPixels(outputColumnName: "input_1", outputAsFloatArray: true, scaleImage: 1f / 255f)) 65 | .Append(mlContext.Transforms.Concatenate("image_shape", "height", "width")) 66 | .Append(mlContext.Transforms.ApplyOnnxModel(shapeDictionary: new Dictionary() { { "input_1", new[] { 1, 3, 416, 416 } } }, 67 | inputColumnNames: new[] 68 | { 69 | "input_1", 70 | "image_shape" 71 | }, 72 | outputColumnNames: new[] 73 | { 74 | "yolonms_layer_1/ExpandDims_1:0", 75 | "yolonms_layer_1/ExpandDims_3:0", 76 | "yolonms_layer_1/concat_2:0" 77 | }, 78 | modelFile: @"D:\yolov3-10.onnx")); 79 | 80 | // Fit on empty list to obtain input data schema 81 | var model = pipeline.Fit(mlContext.Data.LoadFromEnumerable(new List())); 82 | 83 | // Create prediction engine 84 | var predictionEngine = mlContext.Model.CreatePredictionEngine(model); 85 | ``` 86 | 87 | **NB**: We need to define the `shapeDictionary` parameter because they are not completly defined in the model. 88 | 89 | As per the model documentation, the output shapes are: 90 | > The model has 3 outputs. boxes: (1x'n_candidates'x4), the coordinates of all anchor boxes, scores: (1x80x'n_candidates'), the scores of all anchor boxes per class, indices: ('nbox'x3), selected indices from the boxes tensor. The selected index format is (batch_index, class_index, box_index). 91 | 92 | The function below will help you process the results, I leave it to you fine-tune it. 93 | 94 | ```csharp 95 | public IReadOnlyList GetResults(YoloV3Prediction prediction, string[] categories) 96 | { 97 | if (prediction.Concat == null || prediction.Concat.Length == 0) 98 | { 99 | return new List(); 100 | } 101 | 102 | if (prediction.Boxes.Length != YoloV3Prediction.YoloV3BboxPredictionCount * 4) 103 | { 104 | throw new ArgumentException(); 105 | } 106 | 107 | if (prediction.Scores.Length != YoloV3Prediction.YoloV3BboxPredictionCount * categories.Length) 108 | { 109 | throw new ArgumentException(); 110 | } 111 | 112 | List results = new List(); 113 | 114 | // Concat size is 'nbox'x3 (batch_index, class_index, box_index) 115 | int resulstCount = prediction.Concat.Length / 3; 116 | for (int c = 0; c < resulstCount; c++) 117 | { 118 | var res = prediction.Concat.Skip(c * 3).Take(3).ToArray(); 119 | 120 | var batch_index = res[0]; 121 | var class_index = res[1]; 122 | var box_index = res[2]; 123 | 124 | var label = categories[class_index]; 125 | var bbox = new float[] 126 | { 127 | prediction.Boxes[box_index * 4], 128 | prediction.Boxes[box_index * 4 + 1], 129 | prediction.Boxes[box_index * 4 + 2], 130 | prediction.Boxes[box_index * 4 + 3], 131 | }; 132 | var score = prediction.Scores[box_index + class_index * YoloV3Prediction.YoloV3BboxPredictionCount]; 133 | 134 | results.Add(new YoloV3Result(bbox, label, score)); 135 | } 136 | 137 | return results; 138 | } 139 | ``` 140 | 141 | In this version of the model, they are 80 classes (see the model's GitHub documentation for the link). 142 | 143 | You can use the above like this: 144 | ```csharp 145 | // load image 146 | string imageName = "dog_cat.jpg"; 147 | using (var bitmap = new Bitmap(Image.FromFile(Path.Combine(imageFolder, imageName)))) 148 | { 149 | // predict 150 | var predict = predictionEngine.Predict(new YoloV3BitmapData() { Image = bitmap }); 151 | 152 | var results = GetResults(predict, classesNames); 153 | 154 | // draw predictions 155 | using (var g = Graphics.FromImage(bitmap)) 156 | { 157 | foreach (var result in results) 158 | { 159 | var y1 = result.BBox[0]; 160 | var x1 = result.BBox[1]; 161 | var y2 = result.BBox[2]; 162 | var x2 = result.BBox[3]; 163 | 164 | g.DrawRectangle(Pens.Red, x1, y1, x2-x1, y2-y1); 165 | using (var brushes = new SolidBrush(Color.FromArgb(50, Color.Red))) 166 | { 167 | g.FillRectangle(brushes, x1, y1, x2 - x1, y2 - y1); 168 | } 169 | 170 | g.DrawString(result.Label + " " + result.Confidence.ToString("0.00"), 171 | new Font("Arial", 12), Brushes.Blue, new PointF(x1, y1)); 172 | } 173 | 174 | bitmap.Save(Path.Combine(imageOutputFolder, Path.ChangeExtension(imageName, "_processed" + Path.GetExtension(imageName)))); 175 | } 176 | } 177 | ``` 178 | 179 | ![example](https://github.com/BobLd/YOLOv3MLNet/blob/master/YOLOV3MLNetSO/Assets/Output/cars%20road._processed.jpg) 180 | -------------------------------------------------------------------------------- /YOLOV3MLNetSO/YOLOV3MLNetSO.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | netcoreapp3.1 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | PreserveNewest 18 | 19 | 20 | Always 21 | 22 | 23 | PreserveNewest 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /YOLOv3MLNet.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30225.117 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "YOLOv3MLNet", "YOLOv3MLNet\YOLOv3MLNet.csproj", "{829141C8-581A-4491-84F0-9D500DB921C3}" 7 | EndProject 8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "YOLOV3MLNetSO", "YOLOV3MLNetSO\YOLOV3MLNetSO.csproj", "{CC9A2611-3ABE-4994-991E-1722243743D3}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|Any CPU = Debug|Any CPU 13 | Release|Any CPU = Release|Any CPU 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {829141C8-581A-4491-84F0-9D500DB921C3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 17 | {829141C8-581A-4491-84F0-9D500DB921C3}.Debug|Any CPU.Build.0 = Debug|Any CPU 18 | {829141C8-581A-4491-84F0-9D500DB921C3}.Release|Any CPU.ActiveCfg = Release|Any CPU 19 | {829141C8-581A-4491-84F0-9D500DB921C3}.Release|Any CPU.Build.0 = Release|Any CPU 20 | {CC9A2611-3ABE-4994-991E-1722243743D3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 21 | {CC9A2611-3ABE-4994-991E-1722243743D3}.Debug|Any CPU.Build.0 = Debug|Any CPU 22 | {CC9A2611-3ABE-4994-991E-1722243743D3}.Release|Any CPU.ActiveCfg = Release|Any CPU 23 | {CC9A2611-3ABE-4994-991E-1722243743D3}.Release|Any CPU.Build.0 = Release|Any CPU 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {2F4D0364-1245-4F7A-905C-88CEEF697ED0} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /YOLOv3MLNet/Assets/Images/PMC5055614_00001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BobLd/YOLOv3MLNet/e09caa384d9609961020413bf237923d5af31307/YOLOv3MLNet/Assets/Images/PMC5055614_00001.jpg -------------------------------------------------------------------------------- /YOLOv3MLNet/Assets/Output/PMC5055614_00001._processed.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BobLd/YOLOv3MLNet/e09caa384d9609961020413bf237923d5af31307/YOLOv3MLNet/Assets/Output/PMC5055614_00001._processed.jpg -------------------------------------------------------------------------------- /YOLOv3MLNet/DataStructures/YoloV3BitmapData.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.ML.Data; 2 | using Microsoft.ML.Transforms.Image; 3 | using System.Drawing; 4 | 5 | namespace YOLOv3MLNet.DataStructures 6 | { 7 | public class YoloV3BitmapData 8 | { 9 | [ColumnName("bitmap")] 10 | [ImageType(416, 416)] 11 | public Bitmap Image { get; set; } 12 | 13 | [ColumnName("width")] 14 | public float ImageWidth => Image.Width; 15 | 16 | [ColumnName("height")] 17 | public float ImageHeight => Image.Height; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /YOLOv3MLNet/DataStructures/YoloV3Prediction.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.ML.Data; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Diagnostics; 5 | using System.Linq; 6 | 7 | namespace YOLOv3MLNet.DataStructures 8 | { 9 | public class YoloV3Prediction 10 | { 11 | /// 12 | /// ((52 x 52) + (26 x 26) + 13 x 13)) x 3 = 10,647. 13 | /// 14 | private const int yoloV3BboxPredictionCount = 10_647; 15 | 16 | [ColumnName("width")] 17 | public float ImageWidth { get; set; } 18 | 19 | [ColumnName("height")] 20 | public float ImageHeight { get; set; } 21 | 22 | /// 23 | /// Bounding boxes raw prediction. 24 | /// 25 | [ColumnName("bboxes")] 26 | public float[] BBoxes; 27 | 28 | /// 29 | /// Classes raw prediction. 30 | /// 31 | [ColumnName("classes")] 32 | public float[] Classes; 33 | 34 | public IReadOnlyList GetResults(string[] categories, float confThres = 0.5f, float iouThres = 0.5f) 35 | { 36 | if (BBoxes.Length != yoloV3BboxPredictionCount * 4) 37 | { 38 | throw new ArgumentException($"Bounding box prediction size is not correct. Expected {yoloV3BboxPredictionCount * 4}, got {BBoxes.Length}.", nameof(BBoxes)); 39 | } 40 | 41 | if (Classes.Length != yoloV3BboxPredictionCount * categories.Length) 42 | { 43 | throw new ArgumentException($"Classes prediction size is not correct. Expected {yoloV3BboxPredictionCount * categories.Length}, got {Classes.Length}. You might want to check the {nameof(categories)}.", nameof(Classes)); 44 | } 45 | 46 | // compute scale and pad factors 47 | float heightScale = 1f; 48 | float widthScale = 1f; 49 | float heightPad = 0f; 50 | float widthPad = 0f; 51 | if (ImageWidth < ImageHeight) 52 | { 53 | widthScale = ImageHeight / ImageWidth; 54 | widthPad = ImageWidth * (1 - widthScale) / 2f; 55 | } 56 | else if (ImageWidth > ImageHeight) 57 | { 58 | heightScale = ImageWidth / ImageHeight; 59 | heightPad = ImageHeight * (1 - heightScale) / 2f; 60 | } 61 | 62 | // process raw results 63 | List results = new List(); 64 | for (int r = 0; r < yoloV3BboxPredictionCount; r++) 65 | { 66 | var scores = Classes.Skip(r * categories.Length).Take(categories.Length); 67 | 68 | // get the class' max confidence 69 | var conf = scores.Max(); 70 | if (conf < confThres) 71 | { 72 | continue; // if below conf threshold, skip it 73 | } 74 | 75 | var bboxAdj = Xywh2xyxy(BBoxes.Skip(r * 4).Take(4).ToArray()); 76 | 77 | //[x1, y1, x2, y2, conf, c_0, c_1, ...] 78 | results.Add(bboxAdj.Concat(new[] { conf }).Concat(scores).ToArray()); 79 | } 80 | 81 | // Non-maximum Suppression 82 | results = results.OrderByDescending(x => x[4]).ToList(); // sort by confidence 83 | List resultsNms = new List(); 84 | 85 | int f = 0; 86 | while (f < results.Count) 87 | { 88 | var res = results[f]; 89 | if (res == null) 90 | { 91 | f++; 92 | continue; 93 | } 94 | 95 | var conf = res[4]; 96 | var classes_int = res.Skip(5).ToList().IndexOf(conf); 97 | string label = classes_int > -1 ? categories[classes_int] : "unknown"; 98 | 99 | resultsNms.Add(new YoloV3Result(scaleCoords(res.Take(4).ToArray(), ImageHeight, ImageWidth, heightScale, widthScale, heightPad, widthPad), label, conf)); 100 | results[f] = null; 101 | 102 | var iou = results.Select(bbox => bbox == null ? float.NaN : BoxIoU(res, bbox)).ToList(); 103 | for (int i = 0; i < iou.Count; i++) 104 | { 105 | if (float.IsNaN(iou[i])) continue; 106 | if (iou[i] > iouThres) 107 | { 108 | results[i] = null; 109 | } 110 | } 111 | f++; 112 | } 113 | 114 | return resultsNms; 115 | } 116 | 117 | /// 118 | /// Scale coordinates to page. 119 | /// 120 | /// [x1, y1, x2, y2] 121 | private static float[] scaleCoords(float[] bbox, float imageHeight, float imageWidth, float heightScale, float widthScale, float heightPad, float widthPad) 122 | { 123 | float[] adjBbox = new float[4]; 124 | adjBbox[0] = bbox[0] * imageWidth * widthScale + widthPad; 125 | adjBbox[1] = bbox[1] * imageHeight * heightScale + heightPad; 126 | adjBbox[2] = bbox[2] * imageWidth * widthScale + widthPad; 127 | adjBbox[3] = bbox[3] * imageHeight * heightScale + heightPad; 128 | return adjBbox; 129 | } 130 | 131 | /// 132 | /// Return intersection-over-union (Jaccard index) of boxes. 133 | /// Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 134 | /// 135 | public static float BoxIoU(float[] boxes1, float[] boxes2) 136 | { 137 | static float box_area(float[] box) 138 | { 139 | return (box[2] - box[0]) * (box[3] - box[1]); 140 | } 141 | 142 | var area1 = box_area(boxes1); 143 | var area2 = box_area(boxes2); 144 | 145 | Debug.Assert(area1 >= 0); 146 | Debug.Assert(area2 >= 0); 147 | 148 | var dx = Math.Max(0, Math.Min(boxes1[2], boxes2[2]) - Math.Max(boxes1[0], boxes2[0])); 149 | var dy = Math.Max(0, Math.Min(boxes1[3], boxes2[3]) - Math.Max(boxes1[1], boxes2[1])); 150 | var inter = dx * dy; 151 | 152 | return inter / (area1 + area2 - inter); 153 | } 154 | 155 | /// 156 | /// Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2] 157 | /// Box (center x, center y, width, height) to (x1, y1, x2, y2) 158 | /// 159 | public static float[] Xywh2xyxy(float[] bbox) 160 | { 161 | var bboxAdj = new float[4]; 162 | bboxAdj[0] = bbox[0] - bbox[2] / 2f; 163 | bboxAdj[1] = bbox[1] - bbox[3] / 2f; 164 | bboxAdj[2] = bbox[0] + bbox[2] / 2f; 165 | bboxAdj[3] = bbox[1] + bbox[3] / 2f; 166 | return bboxAdj; 167 | } 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /YOLOv3MLNet/DataStructures/YoloV3Result.cs: -------------------------------------------------------------------------------- 1 | namespace YOLOv3MLNet.DataStructures 2 | { 3 | public struct YoloV3Result 4 | { 5 | /// 6 | /// x1, y1, x2, y2 in page coordinates. 7 | /// 8 | public float[] BBox { get; } 9 | 10 | /// 11 | /// The Bbox category. 12 | /// 13 | public string Label { get; } 14 | 15 | /// 16 | /// Category's confidence level. 17 | /// 18 | public float Confidence { get; } 19 | 20 | public YoloV3Result(float[] bbox, string label, float confidence) 21 | { 22 | BBox = bbox; 23 | Label = label; 24 | Confidence = confidence; 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /YOLOv3MLNet/Program.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.ML; 2 | using System.Collections.Generic; 3 | using System.Drawing; 4 | using System.IO; 5 | using YOLOv3MLNet.DataStructures; 6 | using static Microsoft.ML.Transforms.Image.ImageResizingEstimator; 7 | 8 | namespace YOLOv3MLNet 9 | { 10 | class Program 11 | { 12 | const string modelLocation = @"D:\MachineLearning\Document Layout Analysis\notebooks\yolo\dla_yolov3.onnx"; 13 | 14 | const string imageFolder = @"Assets\Images"; 15 | 16 | const string imageOutputFolder = @"Assets\Output"; 17 | 18 | static readonly string[] classesNames = new string[] 19 | { 20 | "caption", "chart", "credit", 21 | "drop-capital", "floating", "footer", 22 | "frame", "graphics", "header", 23 | "heading", "image", "linedrawing", 24 | "maths", "noise", "page-number", 25 | "paragraph", "separator", "table" 26 | }; 27 | 28 | static float confidenceThreshold = 0.2f; 29 | 30 | static float confidenceIou = 0.5f; 31 | 32 | static void Main() 33 | { 34 | Directory.CreateDirectory(imageOutputFolder); 35 | MLContext mlContext = new MLContext(); 36 | 37 | // load prediction engine 38 | var predictionEngine = LoadPredictionEngine(mlContext, modelLocation); 39 | 40 | // load image 41 | string imageName = "PMC5055614_00001.jpg"; 42 | using (var bitmap = new Bitmap(Image.FromFile(Path.Combine(imageFolder, imageName)))) 43 | { 44 | // predict 45 | var results = predictionEngine.Predict(new YoloV3BitmapData() { Image = bitmap }) 46 | .GetResults(classesNames, confidenceThreshold, confidenceIou); 47 | 48 | // draw predictions 49 | using (var g = Graphics.FromImage(bitmap)) 50 | { 51 | foreach (var result in results) 52 | { 53 | var x1 = result.BBox[0]; 54 | var y1 = result.BBox[1]; 55 | var w = result.BBox[2] - x1; 56 | var h = result.BBox[3] - y1; 57 | 58 | g.DrawRectangle(Pens.Red, x1, y1, w, h); 59 | using (var brushes = new SolidBrush(Color.FromArgb(50, Color.Red))) 60 | { 61 | g.FillRectangle(brushes, x1, y1, w, h); 62 | } 63 | 64 | g.DrawString(result.Label + " " + result.Confidence.ToString("0.00"), 65 | new Font("Arial", 12), Brushes.Blue, new PointF(x1, y1)); 66 | } 67 | 68 | bitmap.Save(Path.Combine(imageOutputFolder, Path.ChangeExtension(imageName, "_processed" + Path.GetExtension(imageName)))); 69 | } 70 | } 71 | } 72 | 73 | public static PredictionEngine LoadPredictionEngine(MLContext mlContext, string modelPath) 74 | { 75 | // Define scoring pipeline 76 | var pipeline = mlContext.Transforms.ResizeImages(inputColumnName: "bitmap", outputColumnName: "image", imageWidth: 416, imageHeight: 416, resizing: ResizingKind.IsoPad) 77 | .Append(mlContext.Transforms.ExtractPixels(outputColumnName: "image", scaleImage: 1f / 255f)) 78 | .Append(mlContext.Transforms.ApplyOnnxModel(inputColumnNames: new[] { "image" }, outputColumnNames: new[] { "bboxes", "classes" }, modelFile: modelPath)); 79 | 80 | // Fit on empty list to obtain input data schema 81 | var model = pipeline.Fit(mlContext.Data.LoadFromEnumerable(new List())); 82 | 83 | // Create prediction engine 84 | return mlContext.Model.CreatePredictionEngine(model); 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /YOLOv3MLNet/YOLOv3MLNet.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | netcoreapp3.1 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | PreserveNewest 18 | 19 | 20 | PreserveNewest 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /netron.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BobLd/YOLOv3MLNet/e09caa384d9609961020413bf237923d5af31307/netron.png --------------------------------------------------------------------------------