├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── YOLOV3MLNetSO
├── Assets
│ ├── Images
│ │ ├── cars road.jpg
│ │ └── dog_cat.jpg
│ └── Output
│ │ ├── cars road._processed.jpg
│ │ └── dog_cat._processed.jpg
├── DataStructures
│ ├── YoloV3BitmapData.cs
│ ├── YoloV3Prediction.cs
│ └── YoloV3Result.cs
├── Program.cs
├── README.md
└── YOLOV3MLNetSO.csproj
├── YOLOv3MLNet.sln
├── YOLOv3MLNet
├── Assets
│ ├── Images
│ │ └── PMC5055614_00001.jpg
│ └── Output
│ │ └── PMC5055614_00001._processed.jpg
├── DataStructures
│ ├── YoloV3BitmapData.cs
│ ├── YoloV3Prediction.cs
│ └── YoloV3Result.cs
├── Program.cs
└── YOLOv3MLNet.csproj
└── netron.png
/.gitattributes:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # Set default behavior to automatically normalize line endings.
3 | ###############################################################################
4 | * text=auto
5 |
6 | ###############################################################################
7 | # Set default behavior for command prompt diff.
8 | #
9 | # This is need for earlier builds of msysgit that does not have it on by
10 | # default for csharp files.
11 | # Note: This is only used by command line
12 | ###############################################################################
13 | #*.cs diff=csharp
14 |
15 | ###############################################################################
16 | # Set the merge driver for project and solution files
17 | #
18 | # Merging from the command prompt will add diff markers to the files if there
19 | # are conflicts (Merging from VS is not affected by the settings below, in VS
20 | # the diff markers are never inserted). Diff markers may cause the following
21 | # file extensions to fail to load in VS. An alternative would be to treat
22 | # these files as binary and thus will always conflict and require user
23 | # intervention with every merge. To do so, just uncomment the entries below
24 | ###############################################################################
25 | #*.sln merge=binary
26 | #*.csproj merge=binary
27 | #*.vbproj merge=binary
28 | #*.vcxproj merge=binary
29 | #*.vcproj merge=binary
30 | #*.dbproj merge=binary
31 | #*.fsproj merge=binary
32 | #*.lsproj merge=binary
33 | #*.wixproj merge=binary
34 | #*.modelproj merge=binary
35 | #*.sqlproj merge=binary
36 | #*.wwaproj merge=binary
37 |
38 | ###############################################################################
39 | # behavior for image files
40 | #
41 | # image files are treated as binary by default.
42 | ###############################################################################
43 | #*.jpg binary
44 | #*.png binary
45 | #*.gif binary
46 |
47 | ###############################################################################
48 | # diff behavior for common document formats
49 | #
50 | # Convert binary document formats to text before diffing them. This feature
51 | # is only available from the command line. Turn it on by uncommenting the
52 | # entries below.
53 | ###############################################################################
54 | #*.doc diff=astextplain
55 | #*.DOC diff=astextplain
56 | #*.docx diff=astextplain
57 | #*.DOCX diff=astextplain
58 | #*.dot diff=astextplain
59 | #*.DOT diff=astextplain
60 | #*.pdf diff=astextplain
61 | #*.PDF diff=astextplain
62 | #*.rtf diff=astextplain
63 | #*.RTF diff=astextplain
64 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.rsuser
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 |
13 | # User-specific files (MonoDevelop/Xamarin Studio)
14 | *.userprefs
15 |
16 | # Build results
17 | [Dd]ebug/
18 | [Dd]ebugPublic/
19 | [Rr]elease/
20 | [Rr]eleases/
21 | x64/
22 | x86/
23 | [Aa][Rr][Mm]/
24 | [Aa][Rr][Mm]64/
25 | bld/
26 | [Bb]in/
27 | [Oo]bj/
28 | [Ll]og/
29 |
30 | # Visual Studio 2015/2017 cache/options directory
31 | .vs/
32 | # Uncomment if you have tasks that create the project's static files in wwwroot
33 | #wwwroot/
34 |
35 | # Visual Studio 2017 auto generated files
36 | Generated\ Files/
37 |
38 | # MSTest test Results
39 | [Tt]est[Rr]esult*/
40 | [Bb]uild[Ll]og.*
41 |
42 | # NUNIT
43 | *.VisualState.xml
44 | TestResult.xml
45 |
46 | # Build Results of an ATL Project
47 | [Dd]ebugPS/
48 | [Rr]eleasePS/
49 | dlldata.c
50 |
51 | # Benchmark Results
52 | BenchmarkDotNet.Artifacts/
53 |
54 | # .NET Core
55 | project.lock.json
56 | project.fragment.lock.json
57 | artifacts/
58 |
59 | # StyleCop
60 | StyleCopReport.xml
61 |
62 | # Files built by Visual Studio
63 | *_i.c
64 | *_p.c
65 | *_h.h
66 | *.ilk
67 | *.meta
68 | *.obj
69 | *.iobj
70 | *.pch
71 | *.pdb
72 | *.ipdb
73 | *.pgc
74 | *.pgd
75 | *.rsp
76 | *.sbr
77 | *.tlb
78 | *.tli
79 | *.tlh
80 | *.tmp
81 | *.tmp_proj
82 | *_wpftmp.csproj
83 | *.log
84 | *.vspscc
85 | *.vssscc
86 | .builds
87 | *.pidb
88 | *.svclog
89 | *.scc
90 |
91 | # Chutzpah Test files
92 | _Chutzpah*
93 |
94 | # Visual C++ cache files
95 | ipch/
96 | *.aps
97 | *.ncb
98 | *.opendb
99 | *.opensdf
100 | *.sdf
101 | *.cachefile
102 | *.VC.db
103 | *.VC.VC.opendb
104 |
105 | # Visual Studio profiler
106 | *.psess
107 | *.vsp
108 | *.vspx
109 | *.sap
110 |
111 | # Visual Studio Trace Files
112 | *.e2e
113 |
114 | # TFS 2012 Local Workspace
115 | $tf/
116 |
117 | # Guidance Automation Toolkit
118 | *.gpState
119 |
120 | # ReSharper is a .NET coding add-in
121 | _ReSharper*/
122 | *.[Rr]e[Ss]harper
123 | *.DotSettings.user
124 |
125 | # JustCode is a .NET coding add-in
126 | .JustCode
127 |
128 | # TeamCity is a build add-in
129 | _TeamCity*
130 |
131 | # DotCover is a Code Coverage Tool
132 | *.dotCover
133 |
134 | # AxoCover is a Code Coverage Tool
135 | .axoCover/*
136 | !.axoCover/settings.json
137 |
138 | # Visual Studio code coverage results
139 | *.coverage
140 | *.coveragexml
141 |
142 | # NCrunch
143 | _NCrunch_*
144 | .*crunch*.local.xml
145 | nCrunchTemp_*
146 |
147 | # MightyMoose
148 | *.mm.*
149 | AutoTest.Net/
150 |
151 | # Web workbench (sass)
152 | .sass-cache/
153 |
154 | # Installshield output folder
155 | [Ee]xpress/
156 |
157 | # DocProject is a documentation generator add-in
158 | DocProject/buildhelp/
159 | DocProject/Help/*.HxT
160 | DocProject/Help/*.HxC
161 | DocProject/Help/*.hhc
162 | DocProject/Help/*.hhk
163 | DocProject/Help/*.hhp
164 | DocProject/Help/Html2
165 | DocProject/Help/html
166 |
167 | # Click-Once directory
168 | publish/
169 |
170 | # Publish Web Output
171 | *.[Pp]ublish.xml
172 | *.azurePubxml
173 | # Note: Comment the next line if you want to checkin your web deploy settings,
174 | # but database connection strings (with potential passwords) will be unencrypted
175 | *.pubxml
176 | *.publishproj
177 |
178 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
179 | # checkin your Azure Web App publish settings, but sensitive information contained
180 | # in these scripts will be unencrypted
181 | PublishScripts/
182 |
183 | # NuGet Packages
184 | *.nupkg
185 | # The packages folder can be ignored because of Package Restore
186 | **/[Pp]ackages/*
187 | # except build/, which is used as an MSBuild target.
188 | !**/[Pp]ackages/build/
189 | # Uncomment if necessary however generally it will be regenerated when needed
190 | #!**/[Pp]ackages/repositories.config
191 | # NuGet v3's project.json files produces more ignorable files
192 | *.nuget.props
193 | *.nuget.targets
194 |
195 | # Microsoft Azure Build Output
196 | csx/
197 | *.build.csdef
198 |
199 | # Microsoft Azure Emulator
200 | ecf/
201 | rcf/
202 |
203 | # Windows Store app package directories and files
204 | AppPackages/
205 | BundleArtifacts/
206 | Package.StoreAssociation.xml
207 | _pkginfo.txt
208 | *.appx
209 |
210 | # Visual Studio cache files
211 | # files ending in .cache can be ignored
212 | *.[Cc]ache
213 | # but keep track of directories ending in .cache
214 | !?*.[Cc]ache/
215 |
216 | # Others
217 | ClientBin/
218 | ~$*
219 | *~
220 | *.dbmdl
221 | *.dbproj.schemaview
222 | *.jfm
223 | *.pfx
224 | *.publishsettings
225 | orleans.codegen.cs
226 |
227 | # Including strong name files can present a security risk
228 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
229 | #*.snk
230 |
231 | # Since there are multiple workflows, uncomment next line to ignore bower_components
232 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
233 | #bower_components/
234 |
235 | # RIA/Silverlight projects
236 | Generated_Code/
237 |
238 | # Backup & report files from converting an old project file
239 | # to a newer Visual Studio version. Backup files are not needed,
240 | # because we have git ;-)
241 | _UpgradeReport_Files/
242 | Backup*/
243 | UpgradeLog*.XML
244 | UpgradeLog*.htm
245 | ServiceFabricBackup/
246 | *.rptproj.bak
247 |
248 | # SQL Server files
249 | *.mdf
250 | *.ldf
251 | *.ndf
252 |
253 | # Business Intelligence projects
254 | *.rdl.data
255 | *.bim.layout
256 | *.bim_*.settings
257 | *.rptproj.rsuser
258 | *- Backup*.rdl
259 |
260 | # Microsoft Fakes
261 | FakesAssemblies/
262 |
263 | # GhostDoc plugin setting file
264 | *.GhostDoc.xml
265 |
266 | # Node.js Tools for Visual Studio
267 | .ntvs_analysis.dat
268 | node_modules/
269 |
270 | # Visual Studio 6 build log
271 | *.plg
272 |
273 | # Visual Studio 6 workspace options file
274 | *.opt
275 |
276 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
277 | *.vbw
278 |
279 | # Visual Studio LightSwitch build output
280 | **/*.HTMLClient/GeneratedArtifacts
281 | **/*.DesktopClient/GeneratedArtifacts
282 | **/*.DesktopClient/ModelManifest.xml
283 | **/*.Server/GeneratedArtifacts
284 | **/*.Server/ModelManifest.xml
285 | _Pvt_Extensions
286 |
287 | # Paket dependency manager
288 | .paket/paket.exe
289 | paket-files/
290 |
291 | # FAKE - F# Make
292 | .fake/
293 |
294 | # JetBrains Rider
295 | .idea/
296 | *.sln.iml
297 |
298 | # CodeRush personal settings
299 | .cr/personal
300 |
301 | # Python Tools for Visual Studio (PTVS)
302 | __pycache__/
303 | *.pyc
304 |
305 | # Cake - Uncomment if you are using it
306 | # tools/**
307 | # !tools/packages.config
308 |
309 | # Tabs Studio
310 | *.tss
311 |
312 | # Telerik's JustMock configuration file
313 | *.jmconfig
314 |
315 | # BizTalk build output
316 | *.btp.cs
317 | *.btm.cs
318 | *.odx.cs
319 | *.xsd.cs
320 |
321 | # OpenCover UI analysis results
322 | OpenCover/
323 |
324 | # Azure Stream Analytics local run output
325 | ASALocalRun/
326 |
327 | # MSBuild Binary and Structured Log
328 | *.binlog
329 |
330 | # NVidia Nsight GPU debugger configuration file
331 | *.nvuser
332 |
333 | # MFractors (Xamarin productivity tool) working folder
334 | .mfractor/
335 |
336 | # Local History for Visual Studio
337 | .localhistory/
338 |
339 | # BeatPulse healthcheck temp database
340 | healthchecksdb
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 BobLd
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | **Another case study, based on [this](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/yolov3) YOLO v3 model is available [here](https://github.com/BobLd/YOLOv3MLNet/tree/master/YOLOV3MLNetSO).**
2 |
3 | **See [here](https://github.com/BobLd/YOLOv4MLNet) for YOLO v4 use.**
4 |
5 | # YOLO v3 in ML.Net
6 | Use the YOLO v3 algorithms for object detection in C# using ML.Net. We start with a Torch model, then converting it to ONNX format and use it in ML.Net.
7 |
8 | This is a case study on a document layout YOLO trained model. The model can be found in the following Medium article: [Object Detection — Document Layout Analysis Using Monk AI](https://medium.com/towards-artificial-intelligence/object-detection-document-layout-analysis-using-monk-object-detection-toolkit-6c57200bde5).
9 |
10 | ## Main differences
11 | - The ONNX conversion removes 1 feature which is the *objectness score*, pc. The original model has (5 + classes) features for each bounding box, the ONNX model has (4 + classes) features per bounding box. We will use the class probability as a proxy for the *objectness score* when performing the Non-maximum Suppression (NMS) step. This is a known issue, more info [here](https://github.com/ultralytics/yolov3/issues/750).
12 | - Image resizing is not optimised, and will always yield 416x416 size image. This is not the case in the original model (see this issue: [RECTANGULAR INFERENCE](https://github.com/ultralytics/yolov3/issues/232)).
13 |
14 | # Export to ONNX in Python
15 | This is based on this article [Object Detection — Document Layout Analysis Using Monk AI](https://medium.com/towards-artificial-intelligence/object-detection-document-layout-analysis-using-monk-object-detection-toolkit-6c57200bde5).
16 |
17 | ## Load the model
18 | ```python
19 | import os
20 | import sys
21 | from IPython.display import Image
22 | sys.path.append("../Monk_Object_Detection/7_yolov3/lib")
23 | from infer_detector import Infer
24 |
25 | gtf = Infer()
26 |
27 | f = open("dla_yolov3/classes.txt")
28 | class_list = f.readlines()
29 | f.close()
30 |
31 | model_name = "yolov3"
32 | weights = "dla_yolov3/dla_yolov3.pt"
33 | gtf.Model(model_name, class_list, weights, use_gpu=False, input_size=(416, 416))
34 | ```
35 |
36 | ## Test the model
37 | ```python
38 | img_path = "test_square.jpg"
39 | gtf.Predict(img_path, conf_thres=0.2, iou_thres=0.5)
40 | Image(filename='output/test_square.jpg')
41 | ```
42 |
43 | ## Export the model
44 | You need to set `ONNX_EXPORT = True` in `...\Monk_Object_Detection\7_yolov3\lib\models.py` before loading the model.
45 |
46 | We name the input layer `image` and the 2 ouput layers `classes`, `bboxes`. This is not needed but helps the clarity.
47 |
48 | ```python
49 | import torch
50 | import torchvision.models as models
51 |
52 | dummy_input = torch.randn(1, 3, 416, 416) # Create the right input shape (e.g. for an image)
53 | dummy_input = torch.nn.Sigmoid()(dummy_input) # limit between 0 and 1 (superfluous?)
54 | torch.onnx.export(gtf.system_dict["local"]["model"],
55 | dummy_input,
56 | "dla_yolov3.onnx",
57 | input_names=["image"],
58 | output_names=["classes", "bboxes"],
59 | opset_version=9)
60 | ```
61 |
62 | # Check exported model with Netron
63 | The ONNX model can be viewed in [Netron](https://www.electronjs.org/apps/netron). Our model looks like this:
64 | 
65 |
66 | - The input layer size is [1 x 3 x 416 x 416]. This corresponds to 1 batch size x 3 colors x 416 pixels height x 416 pixel width (more info about fixed batch size [here](https://github.com/ultralytics/yolov3/issues/1030)).
67 |
68 | As per this [article](https://medium.com/analytics-vidhya/yolo-v3-theory-explained-33100f6d193):
69 | > For an image of size 416 x 416, YOLO predicts ((52 x 52) + (26 x 26) + 13 x 13)) x 3 = 10,647 bounding boxes.
70 | - The `bboxes` output layer is of size [10,647 x 4]. This corresponds to 10,647 bounding boxes x 4 bounding box coordinates (x, y, h, w).
71 | - The `classes` output layer is of size [10,647 x 18]. This corresponds to 10,647 bounding boxes x 18 classes (this model has only 18 classes).
72 |
73 | Hence, each bounding box has (4 + classes) = 22 features. The total number of prediction in this model is 22 x 10,647.
74 |
75 | **NB**: The ONNX conversion removes 1 feature which is the *objectness score*, pc. The original model has (5 + classes) features for each bounding box. We will use the class probability as a proxy for the *objectness score*.
76 |
77 | 
78 |
79 | More information can be found in this article: [YOLO v3 theory explained](https://medium.com/analytics-vidhya/yolo-v3-theory-explained-33100f6d193)
80 |
81 | # Load model in C#
82 |
83 | # Predict in C#
84 | 
85 |
86 | # Resources
87 | - https://medium.com/towards-artificial-intelligence/object-detection-document-layout-analysis-using-monk-object-detection-toolkit-6c57200bde5
88 | - https://medium.com/analytics-vidhya/yolo-v3-theory-explained-33100f6d193
89 | - https://towardsdatascience.com/non-maximum-suppression-nms-93ce178e177c
90 | - https://michhar.github.io/convert-pytorch-onnx/
91 |
--------------------------------------------------------------------------------
/YOLOV3MLNetSO/Assets/Images/cars road.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BobLd/YOLOv3MLNet/e09caa384d9609961020413bf237923d5af31307/YOLOV3MLNetSO/Assets/Images/cars road.jpg
--------------------------------------------------------------------------------
/YOLOV3MLNetSO/Assets/Images/dog_cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BobLd/YOLOv3MLNet/e09caa384d9609961020413bf237923d5af31307/YOLOV3MLNetSO/Assets/Images/dog_cat.jpg
--------------------------------------------------------------------------------
/YOLOV3MLNetSO/Assets/Output/cars road._processed.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BobLd/YOLOv3MLNet/e09caa384d9609961020413bf237923d5af31307/YOLOV3MLNetSO/Assets/Output/cars road._processed.jpg
--------------------------------------------------------------------------------
/YOLOV3MLNetSO/Assets/Output/dog_cat._processed.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BobLd/YOLOv3MLNet/e09caa384d9609961020413bf237923d5af31307/YOLOV3MLNetSO/Assets/Output/dog_cat._processed.jpg
--------------------------------------------------------------------------------
/YOLOV3MLNetSO/DataStructures/YoloV3BitmapData.cs:
--------------------------------------------------------------------------------
1 | using Microsoft.ML.Data;
2 | using Microsoft.ML.Transforms.Image;
3 | using System.Drawing;
4 |
5 | namespace YOLOV3MLNetSO.DataStructures
6 | {
7 | public class YoloV3BitmapData
8 | {
9 | [ColumnName("bitmap")]
10 | [ImageType(416, 416)]
11 | public Bitmap Image { get; set; }
12 |
13 | [ColumnName("width")]
14 | public float ImageWidth => Image.Width;
15 |
16 | [ColumnName("height")]
17 | public float ImageHeight => Image.Height;
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/YOLOV3MLNetSO/DataStructures/YoloV3Prediction.cs:
--------------------------------------------------------------------------------
1 | using Microsoft.ML.Data;
2 |
3 | namespace YOLOV3MLNetSO.DataStructures
4 | {
5 | public class YoloV3Prediction
6 | {
7 | ///
8 | /// ((52 x 52) + (26 x 26) + 13 x 13)) x 3 = 10,647.
9 | ///
10 | public const int YoloV3BboxPredictionCount = 10_647;
11 |
12 | ///
13 | /// Boxes
14 | /// Size is [1 x 'n_candidates' x 4]
15 | ///
16 | [VectorType(1, YoloV3BboxPredictionCount, 4)]
17 | [ColumnName("yolonms_layer_1/ExpandDims_1:0")]
18 | public float[] Boxes { get; set; }
19 |
20 | ///
21 | /// Scores
22 | /// Size is [1 x 80 x 'n_candidates']
23 | ///
24 | [VectorType(1, 80, YoloV3BboxPredictionCount)]
25 | [ColumnName("yolonms_layer_1/ExpandDims_3:0")]
26 | public float[] Scores { get; set; }
27 |
28 | ///
29 | /// Concat
30 | /// Size is ['nbox' x 3]
31 | ///
32 | [VectorType(0, 3)]
33 | [ColumnName("yolonms_layer_1/concat_2:0")]
34 | public int[] Concat { get; set; }
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/YOLOV3MLNetSO/DataStructures/YoloV3Result.cs:
--------------------------------------------------------------------------------
1 | namespace YOLOV3MLNetSO.DataStructures
2 | {
3 | public struct YoloV3Result
4 | {
5 | ///
6 | /// x1, y1, x2, y2 in page coordinates.
7 | ///
8 | public float[] BBox { get; }
9 |
10 | ///
11 | /// The Bbox category.
12 | ///
13 | public string Label { get; }
14 |
15 | ///
16 | /// Category's confidence level.
17 | ///
18 | public float Confidence { get; }
19 |
20 | public YoloV3Result(float[] bbox, string label, float confidence)
21 | {
22 | BBox = bbox;
23 | Label = label;
24 | Confidence = confidence;
25 | }
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/YOLOV3MLNetSO/Program.cs:
--------------------------------------------------------------------------------
1 | using Microsoft.ML;
2 | using System;
3 | using System.Collections.Generic;
4 | using System.Drawing;
5 | using System.IO;
6 | using System.Linq;
7 | using YOLOV3MLNetSO.DataStructures;
8 | using static Microsoft.ML.Transforms.Image.ImageResizingEstimator;
9 |
10 | namespace YOLOV3MLNetSO
11 | {
12 | /*
13 | * My answer to https://stackoverflow.com/questions/64407833/how-to-impelement-post-proccesing-for-yolo-v3-or-v4-onnx-models-in-ml-net
14 | */
15 | class Program
16 | {
17 | const string modelPath = @"D:\yolov3-10.onnx";
18 |
19 | const string imageFolder = @"Assets\Images";
20 |
21 | const string imageOutputFolder = @"Assets\Output";
22 |
23 | static readonly string[] classesNames = new string[] { "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" };
24 |
25 | static void Main(string[] args)
26 | {
27 | Directory.CreateDirectory(imageOutputFolder);
28 | MLContext mlContext = new MLContext();
29 |
30 | // model is available here:
31 | // https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/yolov3
32 |
33 | // Define scoring pipeline
34 | var pipeline = mlContext.Transforms.ResizeImages(inputColumnName: "bitmap", outputColumnName: "input_1", imageWidth: 416, imageHeight: 416, resizing: ResizingKind.IsoPad)
35 | .Append(mlContext.Transforms.ExtractPixels(outputColumnName: "input_1", scaleImage: 1f / 255f))
36 | .Append(mlContext.Transforms.Concatenate("image_shape", "height", "width"))
37 | .Append(mlContext.Transforms.ApplyOnnxModel(shapeDictionary: new Dictionary() { { "input_1", new[] { 1, 3, 416, 416 } } },
38 | inputColumnNames: new[]
39 | {
40 | "input_1",
41 | "image_shape"
42 | },
43 | outputColumnNames: new[]
44 | {
45 | "yolonms_layer_1/ExpandDims_1:0",
46 | "yolonms_layer_1/ExpandDims_3:0",
47 | "yolonms_layer_1/concat_2:0"
48 | },
49 | modelFile: modelPath, recursionLimit: 100));
50 |
51 | // Fit on empty list to obtain input data schema
52 | var model = pipeline.Fit(mlContext.Data.LoadFromEnumerable(new List()));
53 |
54 | // Create prediction engine
55 | var predictionEngine = mlContext.Model.CreatePredictionEngine(model);
56 |
57 | // load image
58 | string imageName = "cars road.jpg";
59 | using (var bitmap = new Bitmap(Image.FromFile(Path.Combine(imageFolder, imageName))))
60 | {
61 | // predict
62 | var predict = predictionEngine.Predict(new YoloV3BitmapData() { Image = bitmap });
63 | var results = GetResults(predict, classesNames);
64 |
65 | // draw predictions
66 | using (var g = Graphics.FromImage(bitmap))
67 | {
68 | foreach (var result in results)
69 | {
70 | var y1 = result.BBox[0];
71 | var x1 = result.BBox[1];
72 | var y2 = result.BBox[2];
73 | var x2 = result.BBox[3];
74 |
75 | g.DrawRectangle(Pens.Red, x1, y1, x2 - x1, y2 - y1);
76 | using (var brushes = new SolidBrush(Color.FromArgb(50, Color.Red)))
77 | {
78 | g.FillRectangle(brushes, x1, y1, x2 - x1, y2 - y1);
79 | }
80 |
81 | g.DrawString(result.Label + " " + result.Confidence.ToString("0.00"),
82 | new Font("Arial", 12), Brushes.Blue, new PointF(x1, y1));
83 | }
84 |
85 | bitmap.Save(Path.Combine(imageOutputFolder, Path.ChangeExtension(imageName, "_processed" + Path.GetExtension(imageName))));
86 | }
87 | }
88 | }
89 |
90 | public static IReadOnlyList GetResults(YoloV3Prediction prediction, string[] categories)
91 | {
92 | if (prediction.Concat == null || prediction.Concat.Length == 0)
93 | {
94 | return new List();
95 | }
96 |
97 | if (prediction.Boxes.Length != YoloV3Prediction.YoloV3BboxPredictionCount * 4)
98 | {
99 | throw new ArgumentException();
100 | }
101 |
102 | if (prediction.Scores.Length != YoloV3Prediction.YoloV3BboxPredictionCount * categories.Length)
103 | {
104 | throw new ArgumentException();
105 | }
106 |
107 | List results = new List();
108 |
109 | // Concat size is 'nbox'x3 (batch_index, class_index, box_index)
110 | int resulstCount = prediction.Concat.Length / 3;
111 | for (int c = 0; c < resulstCount; c++)
112 | {
113 | var res = prediction.Concat.Skip(c * 3).Take(3).ToArray();
114 |
115 | var batch_index = res[0];
116 | var class_index = res[1];
117 | var box_index = res[2];
118 |
119 | var label = categories[class_index];
120 | var bbox = new float[]
121 | {
122 | prediction.Boxes[box_index * 4],
123 | prediction.Boxes[box_index * 4 + 1],
124 | prediction.Boxes[box_index * 4 + 2],
125 | prediction.Boxes[box_index * 4 + 3],
126 | };
127 | var score = prediction.Scores[box_index + class_index * YoloV3Prediction.YoloV3BboxPredictionCount];
128 |
129 | results.Add(new YoloV3Result(bbox, label, score));
130 | }
131 |
132 | return results;
133 | }
134 |
135 | }
136 | }
137 |
--------------------------------------------------------------------------------
/YOLOV3MLNetSO/README.md:
--------------------------------------------------------------------------------
1 | ## Answer to [How to impelement post-proccesing for yolo v3 or v4 onnx models in ML.Net](https://stackoverflow.com/questions/64407833/how-to-impelement-post-proccesing-for-yolo-v3-or-v4-onnx-models-in-ml-net)
2 |
3 | I'll take the [YOLO v3 (available in the onnx/models repo)](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/yolov3) as an example. A good explaination of the model can be found [here](https://medium.com/analytics-vidhya/yolo-v3-theory-explained-33100f6d193).
4 |
5 | First advice would be to look at the model using [Netron](https://www.electronjs.org/apps/netron). Doing so, you will see the input and output layers. They also describe these layers in the onnx/models documentation.
6 |
7 | [Netron screenshot][1]
8 |
9 | (I see in Netron that this particular YOLO v3 model also does some post-processing by doing the Non-maximum supression step.)
10 |
11 | - Input layers names: `input_1`, `image_shape`
12 | - Ouput layers names: `yolonms_layer_1/ExpandDims_1:0`, `yolonms_layer_1/ExpandDims_3:0`, `yolonms_layer_1/concat_2:0`
13 |
14 | As per the model documentation, the input shapes are:
15 | > Resized image (1x3x416x416) Original image size (1x2) which is [image.size['1], image.size[0]]
16 |
17 | We first need to define the ML.Net input and output classes as follow:
18 | ```csharp
19 | public class YoloV3BitmapData
20 | {
21 | [ColumnName("bitmap")]
22 | [ImageType(416, 416)]
23 | public Bitmap Image { get; set; }
24 |
25 | [ColumnName("width")]
26 | public float ImageWidth => Image.Width;
27 |
28 | [ColumnName("height")]
29 | public float ImageHeight => Image.Height;
30 | }
31 |
32 | public class YoloV3Prediction
33 | {
34 | ///
35 | /// ((52 x 52) + (26 x 26) + 13 x 13)) x 3 = 10,647.
36 | ///
37 | public const int YoloV3BboxPredictionCount = 10_647;
38 |
39 | ///
40 | /// Boxes
41 | ///
42 | [ColumnName("yolonms_layer_1/ExpandDims_1:0")]
43 | public float[] Boxes { get; set; }
44 |
45 | ///
46 | /// Scores
47 | ///
48 | [ColumnName("yolonms_layer_1/ExpandDims_3:0")]
49 | public float[] Scores { get; set; }
50 |
51 | ///
52 | /// Concat
53 | ///
54 | [ColumnName("yolonms_layer_1/concat_2:0")]
55 | public int[] Concat { get; set; }
56 | }
57 | ```
58 |
59 | We then create the ML.Net pipeline and load the prediction engine:
60 |
61 | ```csharp
62 | // Define scoring pipeline
63 | var pipeline = mlContext.Transforms.ResizeImages(inputColumnName: "bitmap", outputColumnName: "input_1", imageWidth: 416, imageHeight: 416, resizing: ResizingKind.IsoPad)
64 | .Append(mlContext.Transforms.ExtractPixels(outputColumnName: "input_1", outputAsFloatArray: true, scaleImage: 1f / 255f))
65 | .Append(mlContext.Transforms.Concatenate("image_shape", "height", "width"))
66 | .Append(mlContext.Transforms.ApplyOnnxModel(shapeDictionary: new Dictionary() { { "input_1", new[] { 1, 3, 416, 416 } } },
67 | inputColumnNames: new[]
68 | {
69 | "input_1",
70 | "image_shape"
71 | },
72 | outputColumnNames: new[]
73 | {
74 | "yolonms_layer_1/ExpandDims_1:0",
75 | "yolonms_layer_1/ExpandDims_3:0",
76 | "yolonms_layer_1/concat_2:0"
77 | },
78 | modelFile: @"D:\yolov3-10.onnx"));
79 |
80 | // Fit on empty list to obtain input data schema
81 | var model = pipeline.Fit(mlContext.Data.LoadFromEnumerable(new List()));
82 |
83 | // Create prediction engine
84 | var predictionEngine = mlContext.Model.CreatePredictionEngine(model);
85 | ```
86 |
87 | **NB**: We need to define the `shapeDictionary` parameter because they are not completly defined in the model.
88 |
89 | As per the model documentation, the output shapes are:
90 | > The model has 3 outputs. boxes: (1x'n_candidates'x4), the coordinates of all anchor boxes, scores: (1x80x'n_candidates'), the scores of all anchor boxes per class, indices: ('nbox'x3), selected indices from the boxes tensor. The selected index format is (batch_index, class_index, box_index).
91 |
92 | The function below will help you process the results, I leave it to you fine-tune it.
93 |
94 | ```csharp
95 | public IReadOnlyList GetResults(YoloV3Prediction prediction, string[] categories)
96 | {
97 | if (prediction.Concat == null || prediction.Concat.Length == 0)
98 | {
99 | return new List();
100 | }
101 |
102 | if (prediction.Boxes.Length != YoloV3Prediction.YoloV3BboxPredictionCount * 4)
103 | {
104 | throw new ArgumentException();
105 | }
106 |
107 | if (prediction.Scores.Length != YoloV3Prediction.YoloV3BboxPredictionCount * categories.Length)
108 | {
109 | throw new ArgumentException();
110 | }
111 |
112 | List results = new List();
113 |
114 | // Concat size is 'nbox'x3 (batch_index, class_index, box_index)
115 | int resulstCount = prediction.Concat.Length / 3;
116 | for (int c = 0; c < resulstCount; c++)
117 | {
118 | var res = prediction.Concat.Skip(c * 3).Take(3).ToArray();
119 |
120 | var batch_index = res[0];
121 | var class_index = res[1];
122 | var box_index = res[2];
123 |
124 | var label = categories[class_index];
125 | var bbox = new float[]
126 | {
127 | prediction.Boxes[box_index * 4],
128 | prediction.Boxes[box_index * 4 + 1],
129 | prediction.Boxes[box_index * 4 + 2],
130 | prediction.Boxes[box_index * 4 + 3],
131 | };
132 | var score = prediction.Scores[box_index + class_index * YoloV3Prediction.YoloV3BboxPredictionCount];
133 |
134 | results.Add(new YoloV3Result(bbox, label, score));
135 | }
136 |
137 | return results;
138 | }
139 | ```
140 |
141 | In this version of the model, they are 80 classes (see the model's GitHub documentation for the link).
142 |
143 | You can use the above like this:
144 | ```csharp
145 | // load image
146 | string imageName = "dog_cat.jpg";
147 | using (var bitmap = new Bitmap(Image.FromFile(Path.Combine(imageFolder, imageName))))
148 | {
149 | // predict
150 | var predict = predictionEngine.Predict(new YoloV3BitmapData() { Image = bitmap });
151 |
152 | var results = GetResults(predict, classesNames);
153 |
154 | // draw predictions
155 | using (var g = Graphics.FromImage(bitmap))
156 | {
157 | foreach (var result in results)
158 | {
159 | var y1 = result.BBox[0];
160 | var x1 = result.BBox[1];
161 | var y2 = result.BBox[2];
162 | var x2 = result.BBox[3];
163 |
164 | g.DrawRectangle(Pens.Red, x1, y1, x2-x1, y2-y1);
165 | using (var brushes = new SolidBrush(Color.FromArgb(50, Color.Red)))
166 | {
167 | g.FillRectangle(brushes, x1, y1, x2 - x1, y2 - y1);
168 | }
169 |
170 | g.DrawString(result.Label + " " + result.Confidence.ToString("0.00"),
171 | new Font("Arial", 12), Brushes.Blue, new PointF(x1, y1));
172 | }
173 |
174 | bitmap.Save(Path.Combine(imageOutputFolder, Path.ChangeExtension(imageName, "_processed" + Path.GetExtension(imageName))));
175 | }
176 | }
177 | ```
178 |
179 | 
180 |
--------------------------------------------------------------------------------
/YOLOV3MLNetSO/YOLOV3MLNetSO.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | netcoreapp3.1
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | PreserveNewest
18 |
19 |
20 | Always
21 |
22 |
23 | PreserveNewest
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/YOLOv3MLNet.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 16
4 | VisualStudioVersion = 16.0.30225.117
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "YOLOv3MLNet", "YOLOv3MLNet\YOLOv3MLNet.csproj", "{829141C8-581A-4491-84F0-9D500DB921C3}"
7 | EndProject
8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "YOLOV3MLNetSO", "YOLOV3MLNetSO\YOLOV3MLNetSO.csproj", "{CC9A2611-3ABE-4994-991E-1722243743D3}"
9 | EndProject
10 | Global
11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
12 | Debug|Any CPU = Debug|Any CPU
13 | Release|Any CPU = Release|Any CPU
14 | EndGlobalSection
15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | {829141C8-581A-4491-84F0-9D500DB921C3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
17 | {829141C8-581A-4491-84F0-9D500DB921C3}.Debug|Any CPU.Build.0 = Debug|Any CPU
18 | {829141C8-581A-4491-84F0-9D500DB921C3}.Release|Any CPU.ActiveCfg = Release|Any CPU
19 | {829141C8-581A-4491-84F0-9D500DB921C3}.Release|Any CPU.Build.0 = Release|Any CPU
20 | {CC9A2611-3ABE-4994-991E-1722243743D3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
21 | {CC9A2611-3ABE-4994-991E-1722243743D3}.Debug|Any CPU.Build.0 = Debug|Any CPU
22 | {CC9A2611-3ABE-4994-991E-1722243743D3}.Release|Any CPU.ActiveCfg = Release|Any CPU
23 | {CC9A2611-3ABE-4994-991E-1722243743D3}.Release|Any CPU.Build.0 = Release|Any CPU
24 | EndGlobalSection
25 | GlobalSection(SolutionProperties) = preSolution
26 | HideSolutionNode = FALSE
27 | EndGlobalSection
28 | GlobalSection(ExtensibilityGlobals) = postSolution
29 | SolutionGuid = {2F4D0364-1245-4F7A-905C-88CEEF697ED0}
30 | EndGlobalSection
31 | EndGlobal
32 |
--------------------------------------------------------------------------------
/YOLOv3MLNet/Assets/Images/PMC5055614_00001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BobLd/YOLOv3MLNet/e09caa384d9609961020413bf237923d5af31307/YOLOv3MLNet/Assets/Images/PMC5055614_00001.jpg
--------------------------------------------------------------------------------
/YOLOv3MLNet/Assets/Output/PMC5055614_00001._processed.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BobLd/YOLOv3MLNet/e09caa384d9609961020413bf237923d5af31307/YOLOv3MLNet/Assets/Output/PMC5055614_00001._processed.jpg
--------------------------------------------------------------------------------
/YOLOv3MLNet/DataStructures/YoloV3BitmapData.cs:
--------------------------------------------------------------------------------
1 | using Microsoft.ML.Data;
2 | using Microsoft.ML.Transforms.Image;
3 | using System.Drawing;
4 |
5 | namespace YOLOv3MLNet.DataStructures
6 | {
7 | public class YoloV3BitmapData
8 | {
9 | [ColumnName("bitmap")]
10 | [ImageType(416, 416)]
11 | public Bitmap Image { get; set; }
12 |
13 | [ColumnName("width")]
14 | public float ImageWidth => Image.Width;
15 |
16 | [ColumnName("height")]
17 | public float ImageHeight => Image.Height;
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/YOLOv3MLNet/DataStructures/YoloV3Prediction.cs:
--------------------------------------------------------------------------------
1 | using Microsoft.ML.Data;
2 | using System;
3 | using System.Collections.Generic;
4 | using System.Diagnostics;
5 | using System.Linq;
6 |
7 | namespace YOLOv3MLNet.DataStructures
8 | {
9 | public class YoloV3Prediction
10 | {
11 | ///
12 | /// ((52 x 52) + (26 x 26) + 13 x 13)) x 3 = 10,647.
13 | ///
14 | private const int yoloV3BboxPredictionCount = 10_647;
15 |
16 | [ColumnName("width")]
17 | public float ImageWidth { get; set; }
18 |
19 | [ColumnName("height")]
20 | public float ImageHeight { get; set; }
21 |
22 | ///
23 | /// Bounding boxes raw prediction.
24 | ///
25 | [ColumnName("bboxes")]
26 | public float[] BBoxes;
27 |
28 | ///
29 | /// Classes raw prediction.
30 | ///
31 | [ColumnName("classes")]
32 | public float[] Classes;
33 |
34 | public IReadOnlyList GetResults(string[] categories, float confThres = 0.5f, float iouThres = 0.5f)
35 | {
36 | if (BBoxes.Length != yoloV3BboxPredictionCount * 4)
37 | {
38 | throw new ArgumentException($"Bounding box prediction size is not correct. Expected {yoloV3BboxPredictionCount * 4}, got {BBoxes.Length}.", nameof(BBoxes));
39 | }
40 |
41 | if (Classes.Length != yoloV3BboxPredictionCount * categories.Length)
42 | {
43 | throw new ArgumentException($"Classes prediction size is not correct. Expected {yoloV3BboxPredictionCount * categories.Length}, got {Classes.Length}. You might want to check the {nameof(categories)}.", nameof(Classes));
44 | }
45 |
46 | // compute scale and pad factors
47 | float heightScale = 1f;
48 | float widthScale = 1f;
49 | float heightPad = 0f;
50 | float widthPad = 0f;
51 | if (ImageWidth < ImageHeight)
52 | {
53 | widthScale = ImageHeight / ImageWidth;
54 | widthPad = ImageWidth * (1 - widthScale) / 2f;
55 | }
56 | else if (ImageWidth > ImageHeight)
57 | {
58 | heightScale = ImageWidth / ImageHeight;
59 | heightPad = ImageHeight * (1 - heightScale) / 2f;
60 | }
61 |
62 | // process raw results
63 | List results = new List();
64 | for (int r = 0; r < yoloV3BboxPredictionCount; r++)
65 | {
66 | var scores = Classes.Skip(r * categories.Length).Take(categories.Length);
67 |
68 | // get the class' max confidence
69 | var conf = scores.Max();
70 | if (conf < confThres)
71 | {
72 | continue; // if below conf threshold, skip it
73 | }
74 |
75 | var bboxAdj = Xywh2xyxy(BBoxes.Skip(r * 4).Take(4).ToArray());
76 |
77 | //[x1, y1, x2, y2, conf, c_0, c_1, ...]
78 | results.Add(bboxAdj.Concat(new[] { conf }).Concat(scores).ToArray());
79 | }
80 |
81 | // Non-maximum Suppression
82 | results = results.OrderByDescending(x => x[4]).ToList(); // sort by confidence
83 | List resultsNms = new List();
84 |
85 | int f = 0;
86 | while (f < results.Count)
87 | {
88 | var res = results[f];
89 | if (res == null)
90 | {
91 | f++;
92 | continue;
93 | }
94 |
95 | var conf = res[4];
96 | var classes_int = res.Skip(5).ToList().IndexOf(conf);
97 | string label = classes_int > -1 ? categories[classes_int] : "unknown";
98 |
99 | resultsNms.Add(new YoloV3Result(scaleCoords(res.Take(4).ToArray(), ImageHeight, ImageWidth, heightScale, widthScale, heightPad, widthPad), label, conf));
100 | results[f] = null;
101 |
102 | var iou = results.Select(bbox => bbox == null ? float.NaN : BoxIoU(res, bbox)).ToList();
103 | for (int i = 0; i < iou.Count; i++)
104 | {
105 | if (float.IsNaN(iou[i])) continue;
106 | if (iou[i] > iouThres)
107 | {
108 | results[i] = null;
109 | }
110 | }
111 | f++;
112 | }
113 |
114 | return resultsNms;
115 | }
116 |
117 | ///
118 | /// Scale coordinates to page.
119 | ///
120 | /// [x1, y1, x2, y2]
121 | private static float[] scaleCoords(float[] bbox, float imageHeight, float imageWidth, float heightScale, float widthScale, float heightPad, float widthPad)
122 | {
123 | float[] adjBbox = new float[4];
124 | adjBbox[0] = bbox[0] * imageWidth * widthScale + widthPad;
125 | adjBbox[1] = bbox[1] * imageHeight * heightScale + heightPad;
126 | adjBbox[2] = bbox[2] * imageWidth * widthScale + widthPad;
127 | adjBbox[3] = bbox[3] * imageHeight * heightScale + heightPad;
128 | return adjBbox;
129 | }
130 |
131 | ///
132 | /// Return intersection-over-union (Jaccard index) of boxes.
133 | /// Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
134 | ///
135 | public static float BoxIoU(float[] boxes1, float[] boxes2)
136 | {
137 | static float box_area(float[] box)
138 | {
139 | return (box[2] - box[0]) * (box[3] - box[1]);
140 | }
141 |
142 | var area1 = box_area(boxes1);
143 | var area2 = box_area(boxes2);
144 |
145 | Debug.Assert(area1 >= 0);
146 | Debug.Assert(area2 >= 0);
147 |
148 | var dx = Math.Max(0, Math.Min(boxes1[2], boxes2[2]) - Math.Max(boxes1[0], boxes2[0]));
149 | var dy = Math.Max(0, Math.Min(boxes1[3], boxes2[3]) - Math.Max(boxes1[1], boxes2[1]));
150 | var inter = dx * dy;
151 |
152 | return inter / (area1 + area2 - inter);
153 | }
154 |
155 | ///
156 | /// Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
157 | /// Box (center x, center y, width, height) to (x1, y1, x2, y2)
158 | ///
159 | public static float[] Xywh2xyxy(float[] bbox)
160 | {
161 | var bboxAdj = new float[4];
162 | bboxAdj[0] = bbox[0] - bbox[2] / 2f;
163 | bboxAdj[1] = bbox[1] - bbox[3] / 2f;
164 | bboxAdj[2] = bbox[0] + bbox[2] / 2f;
165 | bboxAdj[3] = bbox[1] + bbox[3] / 2f;
166 | return bboxAdj;
167 | }
168 | }
169 | }
170 |
--------------------------------------------------------------------------------
/YOLOv3MLNet/DataStructures/YoloV3Result.cs:
--------------------------------------------------------------------------------
1 | namespace YOLOv3MLNet.DataStructures
2 | {
3 | public struct YoloV3Result
4 | {
5 | ///
6 | /// x1, y1, x2, y2 in page coordinates.
7 | ///
8 | public float[] BBox { get; }
9 |
10 | ///
11 | /// The Bbox category.
12 | ///
13 | public string Label { get; }
14 |
15 | ///
16 | /// Category's confidence level.
17 | ///
18 | public float Confidence { get; }
19 |
20 | public YoloV3Result(float[] bbox, string label, float confidence)
21 | {
22 | BBox = bbox;
23 | Label = label;
24 | Confidence = confidence;
25 | }
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/YOLOv3MLNet/Program.cs:
--------------------------------------------------------------------------------
1 | using Microsoft.ML;
2 | using System.Collections.Generic;
3 | using System.Drawing;
4 | using System.IO;
5 | using YOLOv3MLNet.DataStructures;
6 | using static Microsoft.ML.Transforms.Image.ImageResizingEstimator;
7 |
8 | namespace YOLOv3MLNet
9 | {
10 | class Program
11 | {
12 | const string modelLocation = @"D:\MachineLearning\Document Layout Analysis\notebooks\yolo\dla_yolov3.onnx";
13 |
14 | const string imageFolder = @"Assets\Images";
15 |
16 | const string imageOutputFolder = @"Assets\Output";
17 |
18 | static readonly string[] classesNames = new string[]
19 | {
20 | "caption", "chart", "credit",
21 | "drop-capital", "floating", "footer",
22 | "frame", "graphics", "header",
23 | "heading", "image", "linedrawing",
24 | "maths", "noise", "page-number",
25 | "paragraph", "separator", "table"
26 | };
27 |
28 | static float confidenceThreshold = 0.2f;
29 |
30 | static float confidenceIou = 0.5f;
31 |
32 | static void Main()
33 | {
34 | Directory.CreateDirectory(imageOutputFolder);
35 | MLContext mlContext = new MLContext();
36 |
37 | // load prediction engine
38 | var predictionEngine = LoadPredictionEngine(mlContext, modelLocation);
39 |
40 | // load image
41 | string imageName = "PMC5055614_00001.jpg";
42 | using (var bitmap = new Bitmap(Image.FromFile(Path.Combine(imageFolder, imageName))))
43 | {
44 | // predict
45 | var results = predictionEngine.Predict(new YoloV3BitmapData() { Image = bitmap })
46 | .GetResults(classesNames, confidenceThreshold, confidenceIou);
47 |
48 | // draw predictions
49 | using (var g = Graphics.FromImage(bitmap))
50 | {
51 | foreach (var result in results)
52 | {
53 | var x1 = result.BBox[0];
54 | var y1 = result.BBox[1];
55 | var w = result.BBox[2] - x1;
56 | var h = result.BBox[3] - y1;
57 |
58 | g.DrawRectangle(Pens.Red, x1, y1, w, h);
59 | using (var brushes = new SolidBrush(Color.FromArgb(50, Color.Red)))
60 | {
61 | g.FillRectangle(brushes, x1, y1, w, h);
62 | }
63 |
64 | g.DrawString(result.Label + " " + result.Confidence.ToString("0.00"),
65 | new Font("Arial", 12), Brushes.Blue, new PointF(x1, y1));
66 | }
67 |
68 | bitmap.Save(Path.Combine(imageOutputFolder, Path.ChangeExtension(imageName, "_processed" + Path.GetExtension(imageName))));
69 | }
70 | }
71 | }
72 |
73 | public static PredictionEngine LoadPredictionEngine(MLContext mlContext, string modelPath)
74 | {
75 | // Define scoring pipeline
76 | var pipeline = mlContext.Transforms.ResizeImages(inputColumnName: "bitmap", outputColumnName: "image", imageWidth: 416, imageHeight: 416, resizing: ResizingKind.IsoPad)
77 | .Append(mlContext.Transforms.ExtractPixels(outputColumnName: "image", scaleImage: 1f / 255f))
78 | .Append(mlContext.Transforms.ApplyOnnxModel(inputColumnNames: new[] { "image" }, outputColumnNames: new[] { "bboxes", "classes" }, modelFile: modelPath));
79 |
80 | // Fit on empty list to obtain input data schema
81 | var model = pipeline.Fit(mlContext.Data.LoadFromEnumerable(new List()));
82 |
83 | // Create prediction engine
84 | return mlContext.Model.CreatePredictionEngine(model);
85 | }
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/YOLOv3MLNet/YOLOv3MLNet.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | netcoreapp3.1
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | PreserveNewest
18 |
19 |
20 | PreserveNewest
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/netron.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BobLd/YOLOv3MLNet/e09caa384d9609961020413bf237923d5af31307/netron.png
--------------------------------------------------------------------------------