├── .gitignore
├── Common
    ├── AppDelegate.swift
    └── dog416.png
├── Converter
    ├── Convert_pb_coreml.ipynb
    ├── README.markdown
    └── yolo.meta
├── LICENSE.txt
├── README.markdown
├── YOLO-CoreML
    ├── YOLO-CoreML.xcodeproj
    │   ├── project.pbxproj
    │   └── project.xcworkspace
    │   │   └── contents.xcworkspacedata
    └── YOLO-CoreML
    │   ├── Assets.xcassets
    │       └── AppIcon.appiconset
    │       │   └── Contents.json
    │   ├── Base.lproj
    │       └── Main.storyboard
    │   ├── Helpers.swift
    │   ├── Helpers
    │       ├── BoundingBox.swift
    │       ├── Helpers.swift
    │       └── VideoCapture.swift
    │   ├── Info.plist
    │   ├── Main.storyboard
    │   ├── ViewController.swift
    │   └── YOLO.swift
├── YOLO.jpg
└── download.sh


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Xcode
 2 | build/
 3 | DerivedData/
 4 | 
 5 | *.pbxuser
 6 | !default.pbxuser
 7 | *.mode1v3
 8 | !default.mode1v3
 9 | *.mode2v3
10 | !default.mode2v3
11 | *.perspectivev3
12 | !default.perspectivev3
13 | 
14 | *.xcuserstate
15 | xcuserdata/
16 | 
17 | ## Other
18 | *.moved-aside
19 | *.xccheckout
20 | *.xcscmblueprint
21 | 
22 | profile
23 | *.hmap
24 | *.ipa
25 | 
26 | # CocoaPods
27 | Pods/
28 | !Podfile.lock
29 | 
30 | # Temporary files
31 | .DS_Store
32 | .Trashes
33 | .Spotlight-V100
34 | *.swp
35 | *.lock
36 | 
37 | # Python
38 | __pycache__/
39 | *.py[cod]
40 | *$py.class
41 | 
42 | # Jupyter Notebook
43 | .ipynb_checkpoints
44 | 
45 | *.mlmodel
46 | *.pb
47 | 


--------------------------------------------------------------------------------
/Common/AppDelegate.swift:
--------------------------------------------------------------------------------
 1 | import UIKit
 2 | 
 3 | @UIApplicationMain
 4 | class AppDelegate: UIResponder, UIApplicationDelegate {
 5 | 
 6 |   var window: UIWindow?
 7 | 
 8 |   func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplicationLaunchOptionsKey: Any]?) -> Bool {
 9 |     // Override point for customization after application launch.
10 |     return true
11 |   }
12 | }
13 | 


--------------------------------------------------------------------------------
/Common/dog416.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/syshen/YOLO-CoreML/2c973fb2fa74ebdab7f3462d9c0f90f0ddcb3390/Common/dog416.png


--------------------------------------------------------------------------------
/Converter/Convert_pb_coreml.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import tfcoreml as tf_converter"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import tensorflow as tf"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "## Output the graph\n",
 30 |     "\n",
 31 |     "In this step we just want to know the exact name of input and output nodes in the tensorflow graph"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 3,
 37 |    "metadata": {
 38 |     "scrolled": true
 39 |    },
 40 |    "outputs": [
 41 |     {
 42 |      "name": "stdout",
 43 |      "output_type": "stream",
 44 |      "text": [
 45 |       "input\n",
 46 |       "Pad/paddings\n",
 47 |       "Pad\n",
 48 |       "0-convolutional/filter\n",
 49 |       "0-convolutional\n",
 50 |       "sub/y\n",
 51 |       "sub\n",
 52 |       "truediv/y\n",
 53 |       "truediv\n",
 54 |       "mul/y\n",
 55 |       "mul\n",
 56 |       "BiasAdd/bias\n",
 57 |       "BiasAdd\n",
 58 |       "mul_1/x\n",
 59 |       "mul_1\n",
 60 |       "1-leaky\n",
 61 |       "2-maxpool\n",
 62 |       "Pad_1/paddings\n",
 63 |       "Pad_1\n",
 64 |       "3-convolutional/filter\n",
 65 |       "3-convolutional\n",
 66 |       "sub_1/y\n",
 67 |       "sub_1\n",
 68 |       "truediv_1/y\n",
 69 |       "truediv_1\n",
 70 |       "mul_2/y\n",
 71 |       "mul_2\n",
 72 |       "BiasAdd_1/bias\n",
 73 |       "BiasAdd_1\n",
 74 |       "mul_3/x\n",
 75 |       "mul_3\n",
 76 |       "4-leaky\n",
 77 |       "5-maxpool\n",
 78 |       "Pad_2/paddings\n",
 79 |       "Pad_2\n",
 80 |       "6-convolutional/filter\n",
 81 |       "6-convolutional\n",
 82 |       "sub_2/y\n",
 83 |       "sub_2\n",
 84 |       "truediv_2/y\n",
 85 |       "truediv_2\n",
 86 |       "mul_4/y\n",
 87 |       "mul_4\n",
 88 |       "BiasAdd_2/bias\n",
 89 |       "BiasAdd_2\n",
 90 |       "mul_5/x\n",
 91 |       "mul_5\n",
 92 |       "7-leaky\n",
 93 |       "Pad_3/paddings\n",
 94 |       "Pad_3\n",
 95 |       "8-convolutional/filter\n",
 96 |       "8-convolutional\n",
 97 |       "sub_3/y\n",
 98 |       "sub_3\n",
 99 |       "truediv_3/y\n",
100 |       "truediv_3\n",
101 |       "mul_6/y\n",
102 |       "mul_6\n",
103 |       "BiasAdd_3/bias\n",
104 |       "BiasAdd_3\n",
105 |       "mul_7/x\n",
106 |       "mul_7\n",
107 |       "9-leaky\n",
108 |       "Pad_4/paddings\n",
109 |       "Pad_4\n",
110 |       "10-convolutional/filter\n",
111 |       "10-convolutional\n",
112 |       "sub_4/y\n",
113 |       "sub_4\n",
114 |       "truediv_4/y\n",
115 |       "truediv_4\n",
116 |       "mul_8/y\n",
117 |       "mul_8\n",
118 |       "BiasAdd_4/bias\n",
119 |       "BiasAdd_4\n",
120 |       "mul_9/x\n",
121 |       "mul_9\n",
122 |       "11-leaky\n",
123 |       "12-maxpool\n",
124 |       "Pad_5/paddings\n",
125 |       "Pad_5\n",
126 |       "13-convolutional/filter\n",
127 |       "13-convolutional\n",
128 |       "sub_5/y\n",
129 |       "sub_5\n",
130 |       "truediv_5/y\n",
131 |       "truediv_5\n",
132 |       "mul_10/y\n",
133 |       "mul_10\n",
134 |       "BiasAdd_5/bias\n",
135 |       "BiasAdd_5\n",
136 |       "mul_11/x\n",
137 |       "mul_11\n",
138 |       "14-leaky\n",
139 |       "Pad_6/paddings\n",
140 |       "Pad_6\n",
141 |       "15-convolutional/filter\n",
142 |       "15-convolutional\n",
143 |       "sub_6/y\n",
144 |       "sub_6\n",
145 |       "truediv_6/y\n",
146 |       "truediv_6\n",
147 |       "mul_12/y\n",
148 |       "mul_12\n",
149 |       "BiasAdd_6/bias\n",
150 |       "BiasAdd_6\n",
151 |       "mul_13/x\n",
152 |       "mul_13\n",
153 |       "16-leaky\n",
154 |       "Pad_7/paddings\n",
155 |       "Pad_7\n",
156 |       "17-convolutional/filter\n",
157 |       "17-convolutional\n",
158 |       "sub_7/y\n",
159 |       "sub_7\n",
160 |       "truediv_7/y\n",
161 |       "truediv_7\n",
162 |       "mul_14/y\n",
163 |       "mul_14\n",
164 |       "BiasAdd_7/bias\n",
165 |       "BiasAdd_7\n",
166 |       "mul_15/x\n",
167 |       "mul_15\n",
168 |       "18-leaky\n",
169 |       "19-maxpool\n",
170 |       "Pad_8/paddings\n",
171 |       "Pad_8\n",
172 |       "20-convolutional/filter\n",
173 |       "20-convolutional\n",
174 |       "sub_8/y\n",
175 |       "sub_8\n",
176 |       "truediv_8/y\n",
177 |       "truediv_8\n",
178 |       "mul_16/y\n",
179 |       "mul_16\n",
180 |       "BiasAdd_8/bias\n",
181 |       "BiasAdd_8\n",
182 |       "mul_17/x\n",
183 |       "mul_17\n",
184 |       "21-leaky\n",
185 |       "Pad_9/paddings\n",
186 |       "Pad_9\n",
187 |       "22-convolutional/filter\n",
188 |       "22-convolutional\n",
189 |       "sub_9/y\n",
190 |       "sub_9\n",
191 |       "truediv_9/y\n",
192 |       "truediv_9\n",
193 |       "mul_18/y\n",
194 |       "mul_18\n",
195 |       "BiasAdd_9/bias\n",
196 |       "BiasAdd_9\n",
197 |       "mul_19/x\n",
198 |       "mul_19\n",
199 |       "23-leaky\n",
200 |       "Pad_10/paddings\n",
201 |       "Pad_10\n",
202 |       "24-convolutional/filter\n",
203 |       "24-convolutional\n",
204 |       "sub_10/y\n",
205 |       "sub_10\n",
206 |       "truediv_10/y\n",
207 |       "truediv_10\n",
208 |       "mul_20/y\n",
209 |       "mul_20\n",
210 |       "BiasAdd_10/bias\n",
211 |       "BiasAdd_10\n",
212 |       "mul_21/x\n",
213 |       "mul_21\n",
214 |       "25-leaky\n",
215 |       "Pad_11/paddings\n",
216 |       "Pad_11\n",
217 |       "26-convolutional/filter\n",
218 |       "26-convolutional\n",
219 |       "sub_11/y\n",
220 |       "sub_11\n",
221 |       "truediv_11/y\n",
222 |       "truediv_11\n",
223 |       "mul_22/y\n",
224 |       "mul_22\n",
225 |       "BiasAdd_11/bias\n",
226 |       "BiasAdd_11\n",
227 |       "mul_23/x\n",
228 |       "mul_23\n",
229 |       "27-leaky\n",
230 |       "Pad_12/paddings\n",
231 |       "Pad_12\n",
232 |       "28-convolutional/filter\n",
233 |       "28-convolutional\n",
234 |       "sub_12/y\n",
235 |       "sub_12\n",
236 |       "truediv_12/y\n",
237 |       "truediv_12\n",
238 |       "mul_24/y\n",
239 |       "mul_24\n",
240 |       "BiasAdd_12/bias\n",
241 |       "BiasAdd_12\n",
242 |       "mul_25/x\n",
243 |       "mul_25\n",
244 |       "29-leaky\n",
245 |       "30-maxpool\n",
246 |       "Pad_13/paddings\n",
247 |       "Pad_13\n",
248 |       "31-convolutional/filter\n",
249 |       "31-convolutional\n",
250 |       "sub_13/y\n",
251 |       "sub_13\n",
252 |       "truediv_13/y\n",
253 |       "truediv_13\n",
254 |       "mul_26/y\n",
255 |       "mul_26\n",
256 |       "BiasAdd_13/bias\n",
257 |       "BiasAdd_13\n",
258 |       "mul_27/x\n",
259 |       "mul_27\n",
260 |       "32-leaky\n",
261 |       "Pad_14/paddings\n",
262 |       "Pad_14\n",
263 |       "33-convolutional/filter\n",
264 |       "33-convolutional\n",
265 |       "sub_14/y\n",
266 |       "sub_14\n",
267 |       "truediv_14/y\n",
268 |       "truediv_14\n",
269 |       "mul_28/y\n",
270 |       "mul_28\n",
271 |       "BiasAdd_14/bias\n",
272 |       "BiasAdd_14\n",
273 |       "mul_29/x\n",
274 |       "mul_29\n",
275 |       "34-leaky\n",
276 |       "Pad_15/paddings\n",
277 |       "Pad_15\n",
278 |       "35-convolutional/filter\n",
279 |       "35-convolutional\n",
280 |       "sub_15/y\n",
281 |       "sub_15\n",
282 |       "truediv_15/y\n",
283 |       "truediv_15\n",
284 |       "mul_30/y\n",
285 |       "mul_30\n",
286 |       "BiasAdd_15/bias\n",
287 |       "BiasAdd_15\n",
288 |       "mul_31/x\n",
289 |       "mul_31\n",
290 |       "36-leaky\n",
291 |       "Pad_16/paddings\n",
292 |       "Pad_16\n",
293 |       "37-convolutional/filter\n",
294 |       "37-convolutional\n",
295 |       "sub_16/y\n",
296 |       "sub_16\n",
297 |       "truediv_16/y\n",
298 |       "truediv_16\n",
299 |       "mul_32/y\n",
300 |       "mul_32\n",
301 |       "BiasAdd_16/bias\n",
302 |       "BiasAdd_16\n",
303 |       "mul_33/x\n",
304 |       "mul_33\n",
305 |       "38-leaky\n",
306 |       "Pad_17/paddings\n",
307 |       "Pad_17\n",
308 |       "39-convolutional/filter\n",
309 |       "39-convolutional\n",
310 |       "sub_17/y\n",
311 |       "sub_17\n",
312 |       "truediv_17/y\n",
313 |       "truediv_17\n",
314 |       "mul_34/y\n",
315 |       "mul_34\n",
316 |       "BiasAdd_17/bias\n",
317 |       "BiasAdd_17\n",
318 |       "mul_35/x\n",
319 |       "mul_35\n",
320 |       "40-leaky\n",
321 |       "Pad_18/paddings\n",
322 |       "Pad_18\n",
323 |       "41-convolutional/filter\n",
324 |       "41-convolutional\n",
325 |       "sub_18/y\n",
326 |       "sub_18\n",
327 |       "truediv_18/y\n",
328 |       "truediv_18\n",
329 |       "mul_36/y\n",
330 |       "mul_36\n",
331 |       "BiasAdd_18/bias\n",
332 |       "BiasAdd_18\n",
333 |       "mul_37/x\n",
334 |       "mul_37\n",
335 |       "42-leaky\n",
336 |       "Pad_19/paddings\n",
337 |       "Pad_19\n",
338 |       "43-convolutional/filter\n",
339 |       "43-convolutional\n",
340 |       "sub_19/y\n",
341 |       "sub_19\n",
342 |       "truediv_19/y\n",
343 |       "truediv_19\n",
344 |       "mul_38/y\n",
345 |       "mul_38\n",
346 |       "BiasAdd_19/bias\n",
347 |       "BiasAdd_19\n",
348 |       "mul_39/x\n",
349 |       "mul_39\n",
350 |       "44-leaky\n",
351 |       "concat/concat_dim\n",
352 |       "concat\n",
353 |       "Pad_20/paddings\n",
354 |       "Pad_20\n",
355 |       "46-convolutional/filter\n",
356 |       "46-convolutional\n",
357 |       "sub_20/y\n",
358 |       "sub_20\n",
359 |       "truediv_20/y\n",
360 |       "truediv_20\n",
361 |       "mul_40/y\n",
362 |       "mul_40\n",
363 |       "BiasAdd_20/bias\n",
364 |       "BiasAdd_20\n",
365 |       "mul_41/x\n",
366 |       "mul_41\n",
367 |       "47-leaky\n",
368 |       "ExtractImagePatches\n",
369 |       "concat_1/axis\n",
370 |       "concat_1\n",
371 |       "Pad_21/paddings\n",
372 |       "Pad_21\n",
373 |       "50-convolutional/filter\n",
374 |       "50-convolutional\n",
375 |       "sub_21/y\n",
376 |       "sub_21\n",
377 |       "truediv_21/y\n",
378 |       "truediv_21\n",
379 |       "mul_42/y\n",
380 |       "mul_42\n",
381 |       "BiasAdd_21/bias\n",
382 |       "BiasAdd_21\n",
383 |       "mul_43/x\n",
384 |       "mul_43\n",
385 |       "51-leaky\n",
386 |       "Pad_22/paddings\n",
387 |       "Pad_22\n",
388 |       "52-convolutional/filter\n",
389 |       "52-convolutional\n",
390 |       "BiasAdd_22/bias\n",
391 |       "BiasAdd_22\n",
392 |       "output\n",
393 |       "init\n"
394 |      ]
395 |     }
396 |    ],
397 |    "source": [
398 |     "def load_graph(frozen_graph_filename):\n",
399 |     "    # We load the protobuf file from the disk and parse it to retrieve the \n",
400 |     "    # unserialized graph_def\n",
401 |     "    with tf.gfile.GFile(frozen_graph_filename, \"rb\") as f:\n",
402 |     "        graph_def = tf.GraphDef()\n",
403 |     "        graph_def.ParseFromString(f.read())\n",
404 |     "\n",
405 |     "    # Then, we import the graph_def into a new Graph and return it \n",
406 |     "    with tf.Graph().as_default() as graph:\n",
407 |     "        tf.import_graph_def(graph_def, name=\"\")\n",
408 |     "    return graph\n",
409 |     "\n",
410 |     "graph = load_graph('yolo.pb')\n",
411 |     "for op in graph.get_operations(): \n",
412 |     "    print (op.name)"
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "markdown",
417 |    "metadata": {},
418 |    "source": [
419 |     "## Convert to mlmodel format\n",
420 |     "\n",
421 |     "In the previose step, we know the output and input node names. And we can also get the input shape size from the cfg file. We specify these in the convert procedure and save the mlmodel file as `yolo.mlmodel` ."
422 |    ]
423 |   },
424 |   {
425 |    "cell_type": "code",
426 |    "execution_count": 14,
427 |    "metadata": {},
428 |    "outputs": [
429 |     {
430 |      "name": "stdout",
431 |      "output_type": "stream",
432 |      "text": [
433 |       "(80, ' classes')\n",
434 |       "Shapes not found for 189 tensors. Executing graph to determine shapes. \n",
435 |       "Automatic shape interpretation succeeded for input blob input:0\n",
436 |       "161/349: Converting op name: input ( type:  Placeholder )\n",
437 |       "Skipping name of placeholder\n",
438 |       "162/349: Converting op name: Pad ( type:  Pad )\n",
439 |       "163/349: Converting op name: 0-convolutional ( type:  Conv2D )\n",
440 |       "164/349: Converting op name: sub ( type:  Sub )\n",
441 |       "165/349: Converting op name: truediv ( type:  RealDiv )\n",
442 |       "166/349: Converting op name: mul ( type:  Mul )\n",
443 |       "167/349: Converting op name: BiasAdd ( type:  BiasAdd )\n",
444 |       "168/349: Converting op name: mul_1 ( type:  Mul )\n",
445 |       "169/349: Converting op name: 1-leaky ( type:  Maximum )\n",
446 |       "170/349: Converting op name: 2-maxpool ( type:  MaxPool )\n",
447 |       "171/349: Converting op name: Pad_1 ( type:  Pad )\n",
448 |       "172/349: Converting op name: 3-convolutional ( type:  Conv2D )\n",
449 |       "173/349: Converting op name: sub_1 ( type:  Sub )\n",
450 |       "174/349: Converting op name: truediv_1 ( type:  RealDiv )\n",
451 |       "175/349: Converting op name: mul_2 ( type:  Mul )\n",
452 |       "176/349: Converting op name: BiasAdd_1 ( type:  BiasAdd )\n",
453 |       "177/349: Converting op name: mul_3 ( type:  Mul )\n",
454 |       "178/349: Converting op name: 4-leaky ( type:  Maximum )\n",
455 |       "179/349: Converting op name: 5-maxpool ( type:  MaxPool )\n",
456 |       "180/349: Converting op name: Pad_2 ( type:  Pad )\n",
457 |       "181/349: Converting op name: 6-convolutional ( type:  Conv2D )\n",
458 |       "182/349: Converting op name: sub_2 ( type:  Sub )\n",
459 |       "183/349: Converting op name: truediv_2 ( type:  RealDiv )\n",
460 |       "184/349: Converting op name: mul_4 ( type:  Mul )\n",
461 |       "185/349: Converting op name: BiasAdd_2 ( type:  BiasAdd )\n",
462 |       "186/349: Converting op name: mul_5 ( type:  Mul )\n",
463 |       "187/349: Converting op name: 7-leaky ( type:  Maximum )\n",
464 |       "188/349: Converting op name: Pad_3 ( type:  Pad )\n",
465 |       "189/349: Converting op name: 8-convolutional ( type:  Conv2D )\n",
466 |       "190/349: Converting op name: sub_3 ( type:  Sub )\n",
467 |       "191/349: Converting op name: truediv_3 ( type:  RealDiv )\n",
468 |       "192/349: Converting op name: mul_6 ( type:  Mul )\n",
469 |       "193/349: Converting op name: BiasAdd_3 ( type:  BiasAdd )\n",
470 |       "194/349: Converting op name: mul_7 ( type:  Mul )\n",
471 |       "195/349: Converting op name: 9-leaky ( type:  Maximum )\n",
472 |       "196/349: Converting op name: Pad_4 ( type:  Pad )\n",
473 |       "197/349: Converting op name: 10-convolutional ( type:  Conv2D )\n",
474 |       "198/349: Converting op name: sub_4 ( type:  Sub )\n",
475 |       "199/349: Converting op name: truediv_4 ( type:  RealDiv )\n",
476 |       "200/349: Converting op name: mul_8 ( type:  Mul )\n",
477 |       "201/349: Converting op name: BiasAdd_4 ( type:  BiasAdd )\n",
478 |       "202/349: Converting op name: mul_9 ( type:  Mul )\n",
479 |       "203/349: Converting op name: 11-leaky ( type:  Maximum )\n",
480 |       "204/349: Converting op name: 12-maxpool ( type:  MaxPool )\n",
481 |       "205/349: Converting op name: Pad_5 ( type:  Pad )\n",
482 |       "206/349: Converting op name: 13-convolutional ( type:  Conv2D )\n",
483 |       "207/349: Converting op name: sub_5 ( type:  Sub )\n",
484 |       "208/349: Converting op name: truediv_5 ( type:  RealDiv )\n",
485 |       "209/349: Converting op name: mul_10 ( type:  Mul )\n",
486 |       "210/349: Converting op name: BiasAdd_5 ( type:  BiasAdd )\n",
487 |       "211/349: Converting op name: mul_11 ( type:  Mul )\n",
488 |       "212/349: Converting op name: 14-leaky ( type:  Maximum )\n",
489 |       "213/349: Converting op name: Pad_6 ( type:  Pad )\n",
490 |       "214/349: Converting op name: 15-convolutional ( type:  Conv2D )\n",
491 |       "215/349: Converting op name: sub_6 ( type:  Sub )\n",
492 |       "216/349: Converting op name: truediv_6 ( type:  RealDiv )\n",
493 |       "217/349: Converting op name: mul_12 ( type:  Mul )\n",
494 |       "218/349: Converting op name: BiasAdd_6 ( type:  BiasAdd )\n",
495 |       "219/349: Converting op name: mul_13 ( type:  Mul )\n",
496 |       "220/349: Converting op name: 16-leaky ( type:  Maximum )\n",
497 |       "221/349: Converting op name: Pad_7 ( type:  Pad )\n",
498 |       "222/349: Converting op name: 17-convolutional ( type:  Conv2D )\n",
499 |       "223/349: Converting op name: sub_7 ( type:  Sub )\n",
500 |       "224/349: Converting op name: truediv_7 ( type:  RealDiv )\n",
501 |       "225/349: Converting op name: mul_14 ( type:  Mul )\n",
502 |       "226/349: Converting op name: BiasAdd_7 ( type:  BiasAdd )\n",
503 |       "227/349: Converting op name: mul_15 ( type:  Mul )\n",
504 |       "228/349: Converting op name: 18-leaky ( type:  Maximum )\n",
505 |       "229/349: Converting op name: 19-maxpool ( type:  MaxPool )\n",
506 |       "230/349: Converting op name: Pad_8 ( type:  Pad )\n",
507 |       "231/349: Converting op name: 20-convolutional ( type:  Conv2D )\n",
508 |       "232/349: Converting op name: sub_8 ( type:  Sub )\n",
509 |       "233/349: Converting op name: truediv_8 ( type:  RealDiv )\n",
510 |       "234/349: Converting op name: mul_16 ( type:  Mul )\n",
511 |       "235/349: Converting op name: BiasAdd_8 ( type:  BiasAdd )\n",
512 |       "236/349: Converting op name: mul_17 ( type:  Mul )\n",
513 |       "237/349: Converting op name: 21-leaky ( type:  Maximum )\n",
514 |       "238/349: Converting op name: Pad_9 ( type:  Pad )\n",
515 |       "239/349: Converting op name: 22-convolutional ( type:  Conv2D )\n",
516 |       "240/349: Converting op name: sub_9 ( type:  Sub )\n",
517 |       "241/349: Converting op name: truediv_9 ( type:  RealDiv )\n",
518 |       "242/349: Converting op name: mul_18 ( type:  Mul )\n",
519 |       "243/349: Converting op name: BiasAdd_9 ( type:  BiasAdd )\n",
520 |       "244/349: Converting op name: mul_19 ( type:  Mul )\n",
521 |       "245/349: Converting op name: 23-leaky ( type:  Maximum )\n",
522 |       "246/349: Converting op name: Pad_10 ( type:  Pad )\n",
523 |       "247/349: Converting op name: 24-convolutional ( type:  Conv2D )\n",
524 |       "248/349: Converting op name: sub_10 ( type:  Sub )\n",
525 |       "249/349: Converting op name: truediv_10 ( type:  RealDiv )\n",
526 |       "250/349: Converting op name: mul_20 ( type:  Mul )\n",
527 |       "251/349: Converting op name: BiasAdd_10 ( type:  BiasAdd )\n",
528 |       "252/349: Converting op name: mul_21 ( type:  Mul )\n",
529 |       "253/349: Converting op name: 25-leaky ( type:  Maximum )\n",
530 |       "254/349: Converting op name: Pad_11 ( type:  Pad )\n",
531 |       "255/349: Converting op name: 26-convolutional ( type:  Conv2D )\n",
532 |       "256/349: Converting op name: sub_11 ( type:  Sub )\n",
533 |       "257/349: Converting op name: truediv_11 ( type:  RealDiv )\n",
534 |       "258/349: Converting op name: mul_22 ( type:  Mul )\n",
535 |       "259/349: Converting op name: BiasAdd_11 ( type:  BiasAdd )\n",
536 |       "260/349: Converting op name: mul_23 ( type:  Mul )\n",
537 |       "261/349: Converting op name: 27-leaky ( type:  Maximum )\n",
538 |       "262/349: Converting op name: Pad_12 ( type:  Pad )\n",
539 |       "263/349: Converting op name: 28-convolutional ( type:  Conv2D )\n",
540 |       "264/349: Converting op name: sub_12 ( type:  Sub )\n",
541 |       "265/349: Converting op name: truediv_12 ( type:  RealDiv )\n",
542 |       "266/349: Converting op name: mul_24 ( type:  Mul )\n",
543 |       "267/349: Converting op name: BiasAdd_12 ( type:  BiasAdd )\n",
544 |       "268/349: Converting op name: mul_25 ( type:  Mul )\n",
545 |       "269/349: Converting op name: 29-leaky ( type:  Maximum )\n",
546 |       "270/349: Converting op name: concat ( type:  Identity )\n",
547 |       "271/349: Converting op name: Pad_20 ( type:  Pad )\n",
548 |       "272/349: Converting op name: 46-convolutional ( type:  Conv2D )\n",
549 |       "273/349: Converting op name: sub_20 ( type:  Sub )\n",
550 |       "274/349: Converting op name: truediv_20 ( type:  RealDiv )\n",
551 |       "275/349: Converting op name: mul_40 ( type:  Mul )\n",
552 |       "276/349: Converting op name: BiasAdd_20 ( type:  BiasAdd )\n",
553 |       "277/349: Converting op name: mul_41 ( type:  Mul )\n",
554 |       "278/349: Converting op name: 47-leaky ( type:  Maximum )\n",
555 |       "279/349: Converting op name: ExtractImagePatches ( type:  ExtractImagePatches )\n",
556 |       "280/349: Converting op name: 30-maxpool ( type:  MaxPool )\n",
557 |       "281/349: Converting op name: Pad_13 ( type:  Pad )\n",
558 |       "282/349: Converting op name: 31-convolutional ( type:  Conv2D )\n",
559 |       "283/349: Converting op name: sub_13 ( type:  Sub )\n",
560 |       "284/349: Converting op name: truediv_13 ( type:  RealDiv )\n",
561 |       "285/349: Converting op name: mul_26 ( type:  Mul )\n",
562 |       "286/349: Converting op name: BiasAdd_13 ( type:  BiasAdd )\n",
563 |       "287/349: Converting op name: mul_27 ( type:  Mul )\n",
564 |       "288/349: Converting op name: 32-leaky ( type:  Maximum )\n",
565 |       "289/349: Converting op name: Pad_14 ( type:  Pad )\n",
566 |       "290/349: Converting op name: 33-convolutional ( type:  Conv2D )\n",
567 |       "291/349: Converting op name: sub_14 ( type:  Sub )\n",
568 |       "292/349: Converting op name: truediv_14 ( type:  RealDiv )\n",
569 |       "293/349: Converting op name: mul_28 ( type:  Mul )\n",
570 |       "294/349: Converting op name: BiasAdd_14 ( type:  BiasAdd )\n",
571 |       "295/349: Converting op name: mul_29 ( type:  Mul )\n",
572 |       "296/349: Converting op name: 34-leaky ( type:  Maximum )\n",
573 |       "297/349: Converting op name: Pad_15 ( type:  Pad )\n",
574 |       "298/349: Converting op name: 35-convolutional ( type:  Conv2D )\n",
575 |       "299/349: Converting op name: sub_15 ( type:  Sub )\n",
576 |       "300/349: Converting op name: truediv_15 ( type:  RealDiv )\n",
577 |       "301/349: Converting op name: mul_30 ( type:  Mul )\n",
578 |       "302/349: Converting op name: BiasAdd_15 ( type:  BiasAdd )\n",
579 |       "303/349: Converting op name: mul_31 ( type:  Mul )\n",
580 |       "304/349: Converting op name: 36-leaky ( type:  Maximum )\n",
581 |       "305/349: Converting op name: Pad_16 ( type:  Pad )\n",
582 |       "306/349: Converting op name: 37-convolutional ( type:  Conv2D )\n"
583 |      ]
584 |     },
585 |     {
586 |      "name": "stdout",
587 |      "output_type": "stream",
588 |      "text": [
589 |       "307/349: Converting op name: sub_16 ( type:  Sub )\n",
590 |       "308/349: Converting op name: truediv_16 ( type:  RealDiv )\n",
591 |       "309/349: Converting op name: mul_32 ( type:  Mul )\n",
592 |       "310/349: Converting op name: BiasAdd_16 ( type:  BiasAdd )\n",
593 |       "311/349: Converting op name: mul_33 ( type:  Mul )\n",
594 |       "312/349: Converting op name: 38-leaky ( type:  Maximum )\n",
595 |       "313/349: Converting op name: Pad_17 ( type:  Pad )\n",
596 |       "314/349: Converting op name: 39-convolutional ( type:  Conv2D )\n",
597 |       "315/349: Converting op name: sub_17 ( type:  Sub )\n",
598 |       "316/349: Converting op name: truediv_17 ( type:  RealDiv )\n",
599 |       "317/349: Converting op name: mul_34 ( type:  Mul )\n",
600 |       "318/349: Converting op name: BiasAdd_17 ( type:  BiasAdd )\n",
601 |       "319/349: Converting op name: mul_35 ( type:  Mul )\n",
602 |       "320/349: Converting op name: 40-leaky ( type:  Maximum )\n",
603 |       "321/349: Converting op name: Pad_18 ( type:  Pad )\n",
604 |       "322/349: Converting op name: 41-convolutional ( type:  Conv2D )\n",
605 |       "323/349: Converting op name: sub_18 ( type:  Sub )\n",
606 |       "324/349: Converting op name: truediv_18 ( type:  RealDiv )\n",
607 |       "325/349: Converting op name: mul_36 ( type:  Mul )\n",
608 |       "326/349: Converting op name: BiasAdd_18 ( type:  BiasAdd )\n",
609 |       "327/349: Converting op name: mul_37 ( type:  Mul )\n",
610 |       "328/349: Converting op name: 42-leaky ( type:  Maximum )\n",
611 |       "329/349: Converting op name: Pad_19 ( type:  Pad )\n",
612 |       "330/349: Converting op name: 43-convolutional ( type:  Conv2D )\n",
613 |       "331/349: Converting op name: sub_19 ( type:  Sub )\n",
614 |       "332/349: Converting op name: truediv_19 ( type:  RealDiv )\n",
615 |       "333/349: Converting op name: mul_38 ( type:  Mul )\n",
616 |       "334/349: Converting op name: BiasAdd_19 ( type:  BiasAdd )\n",
617 |       "335/349: Converting op name: mul_39 ( type:  Mul )\n",
618 |       "336/349: Converting op name: 44-leaky ( type:  Maximum )\n",
619 |       "337/349: Converting op name: concat_1 ( type:  ConcatV2 )\n",
620 |       "338/349: Converting op name: Pad_21 ( type:  Pad )\n",
621 |       "339/349: Converting op name: 50-convolutional ( type:  Conv2D )\n",
622 |       "340/349: Converting op name: sub_21 ( type:  Sub )\n",
623 |       "341/349: Converting op name: truediv_21 ( type:  RealDiv )\n",
624 |       "342/349: Converting op name: mul_42 ( type:  Mul )\n",
625 |       "343/349: Converting op name: BiasAdd_21 ( type:  BiasAdd )\n",
626 |       "344/349: Converting op name: mul_43 ( type:  Mul )\n",
627 |       "345/349: Converting op name: 51-leaky ( type:  Maximum )\n",
628 |       "346/349: Converting op name: Pad_22 ( type:  Pad )\n",
629 |       "347/349: Converting op name: 52-convolutional ( type:  Conv2D )\n",
630 |       "348/349: Converting op name: BiasAdd_22 ( type:  BiasAdd )\n",
631 |       "349/349: Converting op name: output ( type:  Identity )\n",
632 |       "\n",
633 |       " Core ML model generated. Saved at location: yolo.mlmodel \n",
634 |       "\n",
635 |       "Core ML input(s): \n",
636 |       " [name: \"input__0\"\n",
637 |       "type {\n",
638 |       "  imageType {\n",
639 |       "    width: 608\n",
640 |       "    height: 608\n",
641 |       "    colorSpace: BGR\n",
642 |       "  }\n",
643 |       "}\n",
644 |       "]\n",
645 |       "Core ML output(s): \n",
646 |       " [name: \"output__0\"\n",
647 |       "type {\n",
648 |       "  multiArrayType {\n",
649 |       "    shape: 425\n",
650 |       "    shape: 19\n",
651 |       "    shape: 19\n",
652 |       "    dataType: DOUBLE\n",
653 |       "  }\n",
654 |       "}\n",
655 |       "]\n"
656 |      ]
657 |     }
658 |    ],
659 |    "source": [
660 |     "coreml_model = tf_converter.convert(tf_model_path = 'yolo.pb',\n",
661 |     "                                     mlmodel_path = 'yolo.mlmodel',\n",
662 |     "                             output_feature_names = ['output:0'],  # the output node name we get from the previouse step\n",
663 |     "                                 image_input_names= ['input:0'],   # CoreML allows image as the input, the only thing we need to do is to set which node is the image input node \n",
664 |     "                            input_name_shape_dict = {'input:0' : [1, 608, 608, 3]},  # the input node name we get from the previous step, and check the cfg file to know the exact input shape size\n",
665 |     "                                   is_bgr = True,   # the channel order is by BGR instead of RGB\n",
666 |     "                                   image_scale = 1 / 255.0)\t # the weights is already normalized in the range from 0 to 1"
667 |    ]
668 |   },
669 |   {
670 |    "cell_type": "code",
671 |    "execution_count": null,
672 |    "metadata": {
673 |     "collapsed": true
674 |    },
675 |    "outputs": [],
676 |    "source": []
677 |   }
678 |  ],
679 |  "metadata": {
680 |   "kernelspec": {
681 |    "display_name": "Python 2",
682 |    "language": "python",
683 |    "name": "python2"
684 |   },
685 |   "language_info": {
686 |    "codemirror_mode": {
687 |     "name": "ipython",
688 |     "version": 2
689 |    },
690 |    "file_extension": ".py",
691 |    "mimetype": "text/x-python",
692 |    "name": "python",
693 |    "nbconvert_exporter": "python",
694 |    "pygments_lexer": "ipython2",
695 |    "version": "2.7.14"
696 |   }
697 |  },
698 |  "nbformat": 4,
699 |  "nbformat_minor": 2
700 | }
701 | 


--------------------------------------------------------------------------------
/Converter/README.markdown:
--------------------------------------------------------------------------------
 1 | If you want to convert the pre-trained models by yourself. Here is the instructions:
 2 | 
 3 | 1. Download the pre-trained darknet models.
 4 | 
 5 | `% curl https://pjreddie.com/media/files/yolo.weights > yolo.weights`
 6 | 
 7 | 2. Also download the configuration file in [darknet's repo](https://github.com/pjreddie/darknet/blob/master/cfg/yolo.cfg).
 8 | 
 9 | 3. Build and install [darkflow](https://github.com/thtrieu/darkflow). And use the following command to convert the weights to tensorflow pb file:
10 | 
11 | `% flow --model yolo.cfg --load yolo.weights --savepb`
12 | 
13 | 4. Install [tf-coreml](https://github.com/tf-coreml/tf-coreml).
14 | 
15 | 5. Launch Jupyter notebook and open _Convert_pb_coreml.ipynb_, and run the script. After it finishes you will get yolo.mlmodel. Copy the file to Xcode project. 
16 | 
17 | 


--------------------------------------------------------------------------------
/Converter/yolo.meta:
--------------------------------------------------------------------------------
1 | {"net": {"type": "[net]", "batch": 1, "subdivisions": 1, "width": 608, "height": 608, "channels": 3, "momentum": 0.9, "decay": 0.0005, "angle": 0, "saturation": 1.5, "exposure": 1.5, "hue": 0.1, "learning_rate": 0.001, "burn_in": 1000, "max_batches": 500200, "policy": "steps", "steps": "400000,450000", "scales": ".1,.1"}, "type": "[region]", "anchors": [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828], "bias_match": 1, "classes": 80, "coords": 4, "num": 5, "softmax": 1, "jitter": 0.3, "rescore": 1, "object_scale": 5, "noobject_scale": 1, "class_scale": 1, "coord_scale": 1, "absolute": 1, "thresh": 0.6, "random": 1, "model": "../../YOLO-CoreML-MPSNNGraph/Convert/yolo.cfg", "inp_size": [608, 608, 3], "out_size": [19, 19, 425], "name": "yolo", "labels": ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"], "colors": [[254.0, 254.0, 254], [248.92, 228.6, 127], [243.84, 203.20000000000002, 0], [238.76, 177.79999999999998, -127], [233.68, 152.4, -254], [228.6, 127.0, 254], [223.52, 101.60000000000001, 127], [218.44, 76.20000000000002, 0], [213.35999999999999, 50.79999999999999, -127], [208.28000000000003, 25.399999999999995, -254], [203.20000000000002, 0.0, 254], [198.12, -25.400000000000023, 127], [193.04, -50.79999999999999, 0], [187.96, -76.20000000000002, -127], [182.88, -101.59999999999998, -254], [177.79999999999998, -127.0, 254], [172.71999999999997, -152.40000000000003, 127], [167.64, -177.79999999999998, 0], [162.56, -203.20000000000002, -127], [157.48, -228.59999999999997, -254], [152.4, -254.0, 254], [147.32000000000002, -279.40000000000003, 127], [142.24, -304.80000000000007, 0], [137.16, -330.19999999999993, -127], [132.08, -355.59999999999997, -254], [127.0, 254.0, 254], [121.92, 228.6, 127], [116.83999999999999, 203.20000000000002, 0], [111.75999999999999, 177.79999999999998, -127], [106.68, 152.4, -254], [101.60000000000001, 127.0, 254], [96.52, 101.60000000000001, 127], [91.44, 76.20000000000002, 0], [86.35999999999999, 50.79999999999999, -127], [81.27999999999999, 25.399999999999995, -254], [76.20000000000002, 0.0, 254], [71.12, -25.400000000000023, 127], [66.04, -50.79999999999999, 0], [60.96, -76.20000000000002, -127], [55.879999999999995, -101.59999999999998, -254], [50.79999999999999, -127.0, 254], [45.72000000000001, -152.40000000000003, 127], [40.64000000000001, -177.79999999999998, 0], [35.56, -203.20000000000002, -127], [30.48, -228.59999999999997, -254], [25.399999999999995, -254.0, 254], [20.31999999999999, -279.40000000000003, 127], [15.240000000000013, -304.80000000000007, 0], [10.160000000000009, -330.19999999999993, -127], [5.0800000000000045, -355.59999999999997, -254], [0.0, 254.0, 254], [-5.0800000000000045, 228.6, 127], [-10.160000000000009, 203.20000000000002, 0], [-15.240000000000013, 177.79999999999998, -127], [-20.320000000000018, 152.4, -254], [-25.400000000000023, 127.0, 254], [-30.480000000000025, 101.60000000000001, 127], [-35.559999999999974, 76.20000000000002, 0], [-40.63999999999998, 50.79999999999999, -127], [-45.719999999999985, 25.399999999999995, -254], [-50.79999999999999, 0.0, 254], [-55.879999999999995, -25.400000000000023, 127], [-60.96, -50.79999999999999, 0], [-66.04, -76.20000000000002, -127], [-71.12, -101.59999999999998, -254], [-76.20000000000002, -127.0, 254], [-81.28000000000002, -152.40000000000003, 127], [-86.36000000000001, -177.79999999999998, 0], [-91.44000000000003, -203.20000000000002, -127], [-96.51999999999997, -228.59999999999997, -254], [-101.59999999999998, -254.0, 254], [-106.67999999999998, -279.40000000000003, 127], [-111.75999999999999, -304.80000000000007, 0], [-116.83999999999999, -330.19999999999993, -127], [-121.92, -355.59999999999997, -254], [-127.0, 254.0, 254], [-132.08, 228.6, 127], [-137.16, 203.20000000000002, 0], [-142.24, 177.79999999999998, -127], [-147.32000000000002, 152.4, -254]]}


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017 M.I. Hollemans
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to
 5 | deal in the Software without restriction, including without limitation the
 6 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 7 | sell copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19 | IN THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/README.markdown:
--------------------------------------------------------------------------------
 1 | # YOLO with Core ML
 2 | 
 3 | This repo was forked and modified from [hollance/YOLO-CoreML-MPSNNGraph](https://github.com/hollance/YOLO-CoreML-MPSNNGraph). Some changes I made: 
 4 | 
 5 | 1. Only keep CoreML since that is the only part I am interested at
 6 | 2. Use YOLO2 pre-trained model instead of TinyYOLO. YOLO2 pre-trained model provides more classes and more accurate than Tiny-YOLO. It is slower, but it can recognizes more stuff. 
 7 | 3. Drop yad2k converter. I use darkflow to convert YOLO pre-trained models from darknet format to tensorflow. And use tf-coreml to convert from tensorflow to CoreML.
 8 | 
 9 | 
10 | ## About YOLO object detection
11 | 
12 | YOLO is an object detection network. It can detect multiple objects in an image and puts bounding boxes around these objects. [Read hollance's blog post about YOLO](http://machinethink.net/blog/object-detection-with-yolo/) to learn more about how it works.
13 | 
14 | ![YOLO in action](YOLO.jpg)
15 | 
16 | In this repo you'll find:
17 | 
18 | - **YOLO-CoreML:** A demo app that runs the YOLO neural network on Core ML.
19 | - **Converter:** The scripts needed to convert the original DarkNet YOLO model to Core ML.
20 | 
21 | To run the app:
22 | 
23 | 1. execute download.sh to download the pre-trained model
24 | `% sh download.sh`
25 | 2. open the **xcodeproj** file in Xcode 9 and run it on a device with iOS 11 or better installed.
26 | 
27 | The reported "elapsed" time is how long it takes the YOLO neural net to process a single image. The FPS is the actual throughput achieved by the app.
28 | 
29 | > **NOTE:** Running these kinds of neural networks eats up a lot of battery power. The app can put a limit on the number of times per second it runs the neural net. You can change this in `setUpCamera()` by changing the line `videoCapture.fps = 50` to a smaller number.
30 | 
31 | ## Converting the models
32 | 
33 | > **NOTE:** You don't need to convert the models yourself. Everything you need to run the demo apps is included in the Xcode projects already. 
34 | 
35 | If you're interested in how the conversion was done, check the [instructions](Converter/).
36 | 
37 | 


--------------------------------------------------------------------------------
/YOLO-CoreML/YOLO-CoreML.xcodeproj/project.pbxproj:
--------------------------------------------------------------------------------
  1 | // !$*UTF8*$!
  2 | {
  3 | 	archiveVersion = 1;
  4 | 	classes = {
  5 | 	};
  6 | 	objectVersion = 48;
  7 | 	objects = {
  8 | 
  9 | /* Begin PBXBuildFile section */
 10 | 		7BA1C6D01EF27DA000BB25EF /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BC25FB51EF27C0D002ECBBA /* VideoCapture.swift */; };
 11 | 		7BA1C6D61EF2861600BB25EF /* BoundingBox.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BA1C6D51EF2861400BB25EF /* BoundingBox.swift */; };
 12 | 		7BA1C6DC1EF2B57200BB25EF /* dog416.png in Resources */ = {isa = PBXBuildFile; fileRef = 7BA1C6DB1EF2B56D00BB25EF /* dog416.png */; };
 13 | 		7BC25FA21EF1B7D1002ECBBA /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BC25FA11EF1B7D1002ECBBA /* AppDelegate.swift */; };
 14 | 		7BC25FA91EF1B7D1002ECBBA /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 7BC25FA81EF1B7D1002ECBBA /* Assets.xcassets */; };
 15 | 		8020906C202C8BF30017321C /* yolo.mlmodel in Sources */ = {isa = PBXBuildFile; fileRef = 8020906B202C8BF30017321C /* yolo.mlmodel */; };
 16 | 		8020907A202C8C8D0017321C /* YOLO.swift in Sources */ = {isa = PBXBuildFile; fileRef = 80209075202C8C8D0017321C /* YOLO.swift */; };
 17 | 		8020907D202C8C8D0017321C /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 80209079202C8C8D0017321C /* ViewController.swift */; };
 18 | 		805125AB202EBA8000CF1DD0 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 805125A9202EBA8000CF1DD0 /* Main.storyboard */; };
 19 | 		805125AD202EBA8900CF1DD0 /* Helpers.swift in Sources */ = {isa = PBXBuildFile; fileRef = 805125AC202EBA8900CF1DD0 /* Helpers.swift */; };
 20 | /* End PBXBuildFile section */
 21 | 
 22 | /* Begin PBXFileReference section */
 23 | 		7BA1C6D51EF2861400BB25EF /* BoundingBox.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BoundingBox.swift; sourceTree = "<group>"; };
 24 | 		7BA1C6DB1EF2B56D00BB25EF /* dog416.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = dog416.png; path = ../../Common/dog416.png; sourceTree = "<group>"; };
 25 | 		7BC25F9E1EF1B7D1002ECBBA /* YOLO-CoreML.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "YOLO-CoreML.app"; sourceTree = BUILT_PRODUCTS_DIR; };
 26 | 		7BC25FA11EF1B7D1002ECBBA /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = AppDelegate.swift; path = ../../Common/AppDelegate.swift; sourceTree = "<group>"; };
 27 | 		7BC25FA81EF1B7D1002ECBBA /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
 28 | 		7BC25FAD1EF1B7D1002ECBBA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
 29 | 		7BC25FB51EF27C0D002ECBBA /* VideoCapture.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = "<group>"; };
 30 | 		8020906B202C8BF30017321C /* yolo.mlmodel */ = {isa = PBXFileReference; lastKnownFileType = file.mlmodel; path = yolo.mlmodel; sourceTree = "<group>"; };
 31 | 		80209075202C8C8D0017321C /* YOLO.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = YOLO.swift; sourceTree = "<group>"; };
 32 | 		80209079202C8C8D0017321C /* ViewController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; };
 33 | 		805125AA202EBA8000CF1DD0 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
 34 | 		805125AC202EBA8900CF1DD0 /* Helpers.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Helpers.swift; sourceTree = "<group>"; };
 35 | /* End PBXFileReference section */
 36 | 
 37 | /* Begin PBXFrameworksBuildPhase section */
 38 | 		7BC25F9B1EF1B7D1002ECBBA /* Frameworks */ = {
 39 | 			isa = PBXFrameworksBuildPhase;
 40 | 			buildActionMask = 2147483647;
 41 | 			files = (
 42 | 			);
 43 | 			runOnlyForDeploymentPostprocessing = 0;
 44 | 		};
 45 | /* End PBXFrameworksBuildPhase section */
 46 | 
 47 | /* Begin PBXGroup section */
 48 | 		7BC25F951EF1B7D1002ECBBA = {
 49 | 			isa = PBXGroup;
 50 | 			children = (
 51 | 				7BC25FA01EF1B7D1002ECBBA /* YOLO-CoreML */,
 52 | 				7BC25F9F1EF1B7D1002ECBBA /* Products */,
 53 | 			);
 54 | 			sourceTree = "<group>";
 55 | 		};
 56 | 		7BC25F9F1EF1B7D1002ECBBA /* Products */ = {
 57 | 			isa = PBXGroup;
 58 | 			children = (
 59 | 				7BC25F9E1EF1B7D1002ECBBA /* YOLO-CoreML.app */,
 60 | 			);
 61 | 			name = Products;
 62 | 			sourceTree = "<group>";
 63 | 		};
 64 | 		7BC25FA01EF1B7D1002ECBBA /* YOLO-CoreML */ = {
 65 | 			isa = PBXGroup;
 66 | 			children = (
 67 | 				805125AE202EBB4300CF1DD0 /* Helpers */,
 68 | 				8020906B202C8BF30017321C /* yolo.mlmodel */,
 69 | 				7BC25FA11EF1B7D1002ECBBA /* AppDelegate.swift */,
 70 | 				7BC25FA81EF1B7D1002ECBBA /* Assets.xcassets */,
 71 | 				80209079202C8C8D0017321C /* ViewController.swift */,
 72 | 				805125A9202EBA8000CF1DD0 /* Main.storyboard */,
 73 | 				80209075202C8C8D0017321C /* YOLO.swift */,
 74 | 				7BA1C6DB1EF2B56D00BB25EF /* dog416.png */,
 75 | 				7BC25FAD1EF1B7D1002ECBBA /* Info.plist */,
 76 | 			);
 77 | 			path = "YOLO-CoreML";
 78 | 			sourceTree = "<group>";
 79 | 		};
 80 | 		805125AE202EBB4300CF1DD0 /* Helpers */ = {
 81 | 			isa = PBXGroup;
 82 | 			children = (
 83 | 				805125AC202EBA8900CF1DD0 /* Helpers.swift */,
 84 | 				7BA1C6D51EF2861400BB25EF /* BoundingBox.swift */,
 85 | 				7BC25FB51EF27C0D002ECBBA /* VideoCapture.swift */,
 86 | 			);
 87 | 			path = Helpers;
 88 | 			sourceTree = "<group>";
 89 | 		};
 90 | /* End PBXGroup section */
 91 | 
 92 | /* Begin PBXNativeTarget section */
 93 | 		7BC25F9D1EF1B7D1002ECBBA /* YOLO-CoreML */ = {
 94 | 			isa = PBXNativeTarget;
 95 | 			buildConfigurationList = 7BC25FB01EF1B7D1002ECBBA /* Build configuration list for PBXNativeTarget "YOLO-CoreML" */;
 96 | 			buildPhases = (
 97 | 				7BC25F9A1EF1B7D1002ECBBA /* Sources */,
 98 | 				7BC25F9B1EF1B7D1002ECBBA /* Frameworks */,
 99 | 				7BC25F9C1EF1B7D1002ECBBA /* Resources */,
100 | 			);
101 | 			buildRules = (
102 | 			);
103 | 			dependencies = (
104 | 			);
105 | 			name = "YOLO-CoreML";
106 | 			productName = "TinyYOLO-CoreML";
107 | 			productReference = 7BC25F9E1EF1B7D1002ECBBA /* YOLO-CoreML.app */;
108 | 			productType = "com.apple.product-type.application";
109 | 		};
110 | /* End PBXNativeTarget section */
111 | 
112 | /* Begin PBXProject section */
113 | 		7BC25F961EF1B7D1002ECBBA /* Project object */ = {
114 | 			isa = PBXProject;
115 | 			attributes = {
116 | 				LastSwiftUpdateCheck = 0900;
117 | 				LastUpgradeCheck = 0900;
118 | 				ORGANIZATIONNAME = MachineThink;
119 | 				TargetAttributes = {
120 | 					7BC25F9D1EF1B7D1002ECBBA = {
121 | 						CreatedOnToolsVersion = 9.0;
122 | 					};
123 | 				};
124 | 			};
125 | 			buildConfigurationList = 7BC25F991EF1B7D1002ECBBA /* Build configuration list for PBXProject "YOLO-CoreML" */;
126 | 			compatibilityVersion = "Xcode 8.0";
127 | 			developmentRegion = en;
128 | 			hasScannedForEncodings = 0;
129 | 			knownRegions = (
130 | 				en,
131 | 				Base,
132 | 			);
133 | 			mainGroup = 7BC25F951EF1B7D1002ECBBA;
134 | 			productRefGroup = 7BC25F9F1EF1B7D1002ECBBA /* Products */;
135 | 			projectDirPath = "";
136 | 			projectRoot = "";
137 | 			targets = (
138 | 				7BC25F9D1EF1B7D1002ECBBA /* YOLO-CoreML */,
139 | 			);
140 | 		};
141 | /* End PBXProject section */
142 | 
143 | /* Begin PBXResourcesBuildPhase section */
144 | 		7BC25F9C1EF1B7D1002ECBBA /* Resources */ = {
145 | 			isa = PBXResourcesBuildPhase;
146 | 			buildActionMask = 2147483647;
147 | 			files = (
148 | 				805125AB202EBA8000CF1DD0 /* Main.storyboard in Resources */,
149 | 				7BA1C6DC1EF2B57200BB25EF /* dog416.png in Resources */,
150 | 				7BC25FA91EF1B7D1002ECBBA /* Assets.xcassets in Resources */,
151 | 			);
152 | 			runOnlyForDeploymentPostprocessing = 0;
153 | 		};
154 | /* End PBXResourcesBuildPhase section */
155 | 
156 | /* Begin PBXSourcesBuildPhase section */
157 | 		7BC25F9A1EF1B7D1002ECBBA /* Sources */ = {
158 | 			isa = PBXSourcesBuildPhase;
159 | 			buildActionMask = 2147483647;
160 | 			files = (
161 | 				7BA1C6D01EF27DA000BB25EF /* VideoCapture.swift in Sources */,
162 | 				805125AD202EBA8900CF1DD0 /* Helpers.swift in Sources */,
163 | 				8020907D202C8C8D0017321C /* ViewController.swift in Sources */,
164 | 				8020906C202C8BF30017321C /* yolo.mlmodel in Sources */,
165 | 				7BA1C6D61EF2861600BB25EF /* BoundingBox.swift in Sources */,
166 | 				8020907A202C8C8D0017321C /* YOLO.swift in Sources */,
167 | 				7BC25FA21EF1B7D1002ECBBA /* AppDelegate.swift in Sources */,
168 | 			);
169 | 			runOnlyForDeploymentPostprocessing = 0;
170 | 		};
171 | /* End PBXSourcesBuildPhase section */
172 | 
173 | /* Begin PBXVariantGroup section */
174 | 		805125A9202EBA8000CF1DD0 /* Main.storyboard */ = {
175 | 			isa = PBXVariantGroup;
176 | 			children = (
177 | 				805125AA202EBA8000CF1DD0 /* Base */,
178 | 			);
179 | 			name = Main.storyboard;
180 | 			sourceTree = "<group>";
181 | 		};
182 | /* End PBXVariantGroup section */
183 | 
184 | /* Begin XCBuildConfiguration section */
185 | 		7BC25FAE1EF1B7D1002ECBBA /* Debug */ = {
186 | 			isa = XCBuildConfiguration;
187 | 			buildSettings = {
188 | 				ALWAYS_SEARCH_USER_PATHS = NO;
189 | 				CLANG_ANALYZER_NONNULL = YES;
190 | 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
191 | 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
192 | 				CLANG_CXX_LIBRARY = "libc++";
193 | 				CLANG_ENABLE_MODULES = YES;
194 | 				CLANG_ENABLE_OBJC_ARC = YES;
195 | 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
196 | 				CLANG_WARN_BOOL_CONVERSION = YES;
197 | 				CLANG_WARN_COMMA = YES;
198 | 				CLANG_WARN_CONSTANT_CONVERSION = YES;
199 | 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
200 | 				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
201 | 				CLANG_WARN_EMPTY_BODY = YES;
202 | 				CLANG_WARN_ENUM_CONVERSION = YES;
203 | 				CLANG_WARN_INFINITE_RECURSION = YES;
204 | 				CLANG_WARN_INT_CONVERSION = YES;
205 | 				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
206 | 				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
207 | 				CLANG_WARN_STRICT_PROTOTYPES = YES;
208 | 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
209 | 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
210 | 				CLANG_WARN_UNREACHABLE_CODE = YES;
211 | 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
212 | 				CODE_SIGN_IDENTITY = "iPhone Developer";
213 | 				COPY_PHASE_STRIP = NO;
214 | 				DEBUG_INFORMATION_FORMAT = dwarf;
215 | 				ENABLE_STRICT_OBJC_MSGSEND = YES;
216 | 				ENABLE_TESTABILITY = YES;
217 | 				GCC_C_LANGUAGE_STANDARD = gnu11;
218 | 				GCC_DYNAMIC_NO_PIC = NO;
219 | 				GCC_NO_COMMON_BLOCKS = YES;
220 | 				GCC_OPTIMIZATION_LEVEL = 0;
221 | 				GCC_PREPROCESSOR_DEFINITIONS = (
222 | 					"DEBUG=1",
223 | 					"$(inherited)",
224 | 				);
225 | 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
226 | 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
227 | 				GCC_WARN_UNDECLARED_SELECTOR = YES;
228 | 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
229 | 				GCC_WARN_UNUSED_FUNCTION = YES;
230 | 				GCC_WARN_UNUSED_VARIABLE = YES;
231 | 				IPHONEOS_DEPLOYMENT_TARGET = 11.0;
232 | 				MTL_ENABLE_DEBUG_INFO = YES;
233 | 				ONLY_ACTIVE_ARCH = YES;
234 | 				SDKROOT = iphoneos;
235 | 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
236 | 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
237 | 			};
238 | 			name = Debug;
239 | 		};
240 | 		7BC25FAF1EF1B7D1002ECBBA /* Release */ = {
241 | 			isa = XCBuildConfiguration;
242 | 			buildSettings = {
243 | 				ALWAYS_SEARCH_USER_PATHS = NO;
244 | 				CLANG_ANALYZER_NONNULL = YES;
245 | 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
246 | 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
247 | 				CLANG_CXX_LIBRARY = "libc++";
248 | 				CLANG_ENABLE_MODULES = YES;
249 | 				CLANG_ENABLE_OBJC_ARC = YES;
250 | 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
251 | 				CLANG_WARN_BOOL_CONVERSION = YES;
252 | 				CLANG_WARN_COMMA = YES;
253 | 				CLANG_WARN_CONSTANT_CONVERSION = YES;
254 | 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
255 | 				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
256 | 				CLANG_WARN_EMPTY_BODY = YES;
257 | 				CLANG_WARN_ENUM_CONVERSION = YES;
258 | 				CLANG_WARN_INFINITE_RECURSION = YES;
259 | 				CLANG_WARN_INT_CONVERSION = YES;
260 | 				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
261 | 				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
262 | 				CLANG_WARN_STRICT_PROTOTYPES = YES;
263 | 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
264 | 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
265 | 				CLANG_WARN_UNREACHABLE_CODE = YES;
266 | 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
267 | 				CODE_SIGN_IDENTITY = "iPhone Developer";
268 | 				COPY_PHASE_STRIP = NO;
269 | 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
270 | 				ENABLE_NS_ASSERTIONS = NO;
271 | 				ENABLE_STRICT_OBJC_MSGSEND = YES;
272 | 				GCC_C_LANGUAGE_STANDARD = gnu11;
273 | 				GCC_NO_COMMON_BLOCKS = YES;
274 | 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
275 | 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
276 | 				GCC_WARN_UNDECLARED_SELECTOR = YES;
277 | 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
278 | 				GCC_WARN_UNUSED_FUNCTION = YES;
279 | 				GCC_WARN_UNUSED_VARIABLE = YES;
280 | 				IPHONEOS_DEPLOYMENT_TARGET = 11.0;
281 | 				MTL_ENABLE_DEBUG_INFO = NO;
282 | 				SDKROOT = iphoneos;
283 | 				SWIFT_OPTIMIZATION_LEVEL = "-Owholemodule";
284 | 				VALIDATE_PRODUCT = YES;
285 | 			};
286 | 			name = Release;
287 | 		};
288 | 		7BC25FB11EF1B7D1002ECBBA /* Debug */ = {
289 | 			isa = XCBuildConfiguration;
290 | 			buildSettings = {
291 | 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
292 | 				DEVELOPMENT_TEAM = B89DDD6AUG;
293 | 				GCC_OPTIMIZATION_LEVEL = s;
294 | 				INFOPLIST_FILE = "YOLO-CoreML/Info.plist";
295 | 				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
296 | 				PRODUCT_BUNDLE_IDENTIFIER = "cc.syshen.YOLO-CoreML";
297 | 				PRODUCT_NAME = "$(TARGET_NAME)";
298 | 				SWIFT_OPTIMIZATION_LEVEL = "-Owholemodule";
299 | 				SWIFT_VERSION = 4.0;
300 | 				TARGETED_DEVICE_FAMILY = "1,2";
301 | 			};
302 | 			name = Debug;
303 | 		};
304 | 		7BC25FB21EF1B7D1002ECBBA /* Release */ = {
305 | 			isa = XCBuildConfiguration;
306 | 			buildSettings = {
307 | 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
308 | 				DEVELOPMENT_TEAM = B89DDD6AUG;
309 | 				INFOPLIST_FILE = "YOLO-CoreML/Info.plist";
310 | 				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
311 | 				PRODUCT_BUNDLE_IDENTIFIER = "cc.syshen.YOLO-CoreML";
312 | 				PRODUCT_NAME = "$(TARGET_NAME)";
313 | 				SWIFT_VERSION = 4.0;
314 | 				TARGETED_DEVICE_FAMILY = "1,2";
315 | 			};
316 | 			name = Release;
317 | 		};
318 | /* End XCBuildConfiguration section */
319 | 
320 | /* Begin XCConfigurationList section */
321 | 		7BC25F991EF1B7D1002ECBBA /* Build configuration list for PBXProject "YOLO-CoreML" */ = {
322 | 			isa = XCConfigurationList;
323 | 			buildConfigurations = (
324 | 				7BC25FAE1EF1B7D1002ECBBA /* Debug */,
325 | 				7BC25FAF1EF1B7D1002ECBBA /* Release */,
326 | 			);
327 | 			defaultConfigurationIsVisible = 0;
328 | 			defaultConfigurationName = Release;
329 | 		};
330 | 		7BC25FB01EF1B7D1002ECBBA /* Build configuration list for PBXNativeTarget "YOLO-CoreML" */ = {
331 | 			isa = XCConfigurationList;
332 | 			buildConfigurations = (
333 | 				7BC25FB11EF1B7D1002ECBBA /* Debug */,
334 | 				7BC25FB21EF1B7D1002ECBBA /* Release */,
335 | 			);
336 | 			defaultConfigurationIsVisible = 0;
337 | 			defaultConfigurationName = Release;
338 | 		};
339 | /* End XCConfigurationList section */
340 | 	};
341 | 	rootObject = 7BC25F961EF1B7D1002ECBBA /* Project object */;
342 | }
343 | 


--------------------------------------------------------------------------------
/YOLO-CoreML/YOLO-CoreML.xcodeproj/project.xcworkspace/contents.xcworkspacedata:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <Workspace
3 |    version = "1.0">
4 |    <FileRef
5 |       location = "self:/Users/syshen/Codes/deep-learning/YOLO-CoreML/TinyYOLO-CoreML/YOLO-CoreML.xcodeproj">
6 |    </FileRef>
7 | </Workspace>
8 | 


--------------------------------------------------------------------------------
/YOLO-CoreML/YOLO-CoreML/Assets.xcassets/AppIcon.appiconset/Contents.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "images" : [
 3 |     {
 4 |       "idiom" : "iphone",
 5 |       "size" : "20x20",
 6 |       "scale" : "2x"
 7 |     },
 8 |     {
 9 |       "idiom" : "iphone",
10 |       "size" : "20x20",
11 |       "scale" : "3x"
12 |     },
13 |     {
14 |       "idiom" : "iphone",
15 |       "size" : "29x29",
16 |       "scale" : "2x"
17 |     },
18 |     {
19 |       "idiom" : "iphone",
20 |       "size" : "29x29",
21 |       "scale" : "3x"
22 |     },
23 |     {
24 |       "idiom" : "iphone",
25 |       "size" : "40x40",
26 |       "scale" : "2x"
27 |     },
28 |     {
29 |       "idiom" : "iphone",
30 |       "size" : "40x40",
31 |       "scale" : "3x"
32 |     },
33 |     {
34 |       "idiom" : "iphone",
35 |       "size" : "60x60",
36 |       "scale" : "2x"
37 |     },
38 |     {
39 |       "idiom" : "iphone",
40 |       "size" : "60x60",
41 |       "scale" : "3x"
42 |     },
43 |     {
44 |       "idiom" : "ipad",
45 |       "size" : "20x20",
46 |       "scale" : "1x"
47 |     },
48 |     {
49 |       "idiom" : "ipad",
50 |       "size" : "20x20",
51 |       "scale" : "2x"
52 |     },
53 |     {
54 |       "idiom" : "ipad",
55 |       "size" : "29x29",
56 |       "scale" : "1x"
57 |     },
58 |     {
59 |       "idiom" : "ipad",
60 |       "size" : "29x29",
61 |       "scale" : "2x"
62 |     },
63 |     {
64 |       "idiom" : "ipad",
65 |       "size" : "40x40",
66 |       "scale" : "1x"
67 |     },
68 |     {
69 |       "idiom" : "ipad",
70 |       "size" : "40x40",
71 |       "scale" : "2x"
72 |     },
73 |     {
74 |       "idiom" : "ipad",
75 |       "size" : "76x76",
76 |       "scale" : "1x"
77 |     },
78 |     {
79 |       "idiom" : "ipad",
80 |       "size" : "76x76",
81 |       "scale" : "2x"
82 |     },
83 |     {
84 |       "idiom" : "ipad",
85 |       "size" : "83.5x83.5",
86 |       "scale" : "2x"
87 |     }
88 |   ],
89 |   "info" : {
90 |     "version" : 1,
91 |     "author" : "xcode"
92 |   }
93 | }


--------------------------------------------------------------------------------
/YOLO-CoreML/YOLO-CoreML/Base.lproj/Main.storyboard:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13771" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
 3 |     <device id="retina4_7" orientation="portrait">
 4 |         <adaptation id="fullscreen"/>
 5 |     </device>
 6 |     <dependencies>
 7 |         <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13772"/>
 8 |         <capability name="Safe area layout guides" minToolsVersion="9.0"/>
 9 |         <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
10 |     </dependencies>
11 |     <customFonts key="customFonts">
12 |         <array key="Menlo.ttc">
13 |             <string>Menlo-Regular</string>
14 |         </array>
15 |     </customFonts>
16 |     <scenes>
17 |         <!--View Controller-->
18 |         <scene sceneID="tne-QT-ifu">
19 |             <objects>
20 |                 <viewController id="BYZ-38-t0r" customClass="ViewController" customModule="YOLO_CoreML" customModuleProvider="target" sceneMemberID="viewController">
21 |                     <view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
22 |                         <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
23 |                         <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
24 |                         <subviews>
25 |                             <view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="6b2-uP-TiX" userLabel="Video Preview">
26 |                                 <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
27 |                                 <color key="backgroundColor" white="0.0" alpha="1" colorSpace="calibratedWhite"/>
28 |                             </view>
29 |                             <view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="Cau-LW-nDZ" userLabel="Results">
30 |                                 <rect key="frame" x="0.0" y="633" width="375" height="34"/>
31 |                                 <subviews>
32 |                                     <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="Elapsed time" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="Nv7-zv-Hv7">
33 |                                         <rect key="frame" x="20" y="10" width="335" height="14"/>
34 |                                         <fontDescription key="fontDescription" name="Menlo-Regular" family="Menlo" pointSize="12"/>
35 |                                         <color key="textColor" white="1" alpha="1" colorSpace="calibratedWhite"/>
36 |                                         <nil key="highlightedColor"/>
37 |                                     </label>
38 |                                 </subviews>
39 |                                 <color key="backgroundColor" white="0.0" alpha="0.5" colorSpace="calibratedWhite"/>
40 |                                 <constraints>
41 |                                     <constraint firstAttribute="bottom" secondItem="Nv7-zv-Hv7" secondAttribute="bottom" constant="10" id="RnN-mQ-1Ld"/>
42 |                                     <constraint firstItem="Nv7-zv-Hv7" firstAttribute="leading" secondItem="Cau-LW-nDZ" secondAttribute="leading" constant="20" id="YWK-0U-hbi"/>
43 |                                     <constraint firstAttribute="trailing" secondItem="Nv7-zv-Hv7" secondAttribute="trailing" constant="20" id="c27-CJ-wZL"/>
44 |                                     <constraint firstItem="Nv7-zv-Hv7" firstAttribute="top" secondItem="Cau-LW-nDZ" secondAttribute="top" constant="10" id="lQX-Nw-dxK"/>
45 |                                 </constraints>
46 |                             </view>
47 |                         </subviews>
48 |                         <color key="backgroundColor" white="0.0" alpha="1" colorSpace="calibratedWhite"/>
49 |                         <constraints>
50 |                             <constraint firstItem="6b2-uP-TiX" firstAttribute="top" secondItem="8bC-Xf-vdC" secondAttribute="top" id="5fy-Bv-RZx"/>
51 |                             <constraint firstItem="Cau-LW-nDZ" firstAttribute="leading" secondItem="YPF-Id-4Kr" secondAttribute="leading" id="F86-Vt-lK5"/>
52 |                             <constraint firstItem="YPF-Id-4Kr" firstAttribute="trailing" secondItem="6b2-uP-TiX" secondAttribute="trailing" id="VLN-Lj-dMD"/>
53 |                             <constraint firstItem="YPF-Id-4Kr" firstAttribute="trailing" secondItem="Cau-LW-nDZ" secondAttribute="trailing" id="bJZ-RP-DH0"/>
54 |                             <constraint firstItem="YPF-Id-4Kr" firstAttribute="bottom" secondItem="Cau-LW-nDZ" secondAttribute="bottom" id="nW1-Gf-bkQ"/>
55 |                             <constraint firstItem="6b2-uP-TiX" firstAttribute="leading" secondItem="YPF-Id-4Kr" secondAttribute="leading" id="tQx-fh-d7D"/>
56 |                             <constraint firstAttribute="bottom" secondItem="6b2-uP-TiX" secondAttribute="bottom" id="xIr-G9-dgn"/>
57 |                         </constraints>
58 |                         <viewLayoutGuide key="safeArea" id="YPF-Id-4Kr"/>
59 |                     </view>
60 |                     <connections>
61 |                         <outlet property="timeLabel" destination="Nv7-zv-Hv7" id="U6m-yA-4aP"/>
62 |                         <outlet property="videoPreview" destination="6b2-uP-TiX" id="F9M-LA-bjs"/>
63 |                     </connections>
64 |                 </viewController>
65 |                 <placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
66 |             </objects>
67 |             <point key="canvasLocation" x="140" y="137.18140929535232"/>
68 |         </scene>
69 |     </scenes>
70 | </document>
71 | 


--------------------------------------------------------------------------------
/YOLO-CoreML/YOLO-CoreML/Helpers.swift:
--------------------------------------------------------------------------------
  1 | import Foundation
  2 | import UIKit
  3 | import CoreML
  4 | import Accelerate
  5 | 
  6 | let anchors: [Float] = [1.08, 1.19, 3.42, 4.41, 6.63, 11.38, 9.42, 5.11, 16.62, 10.52]
  7 | 
  8 | /*
  9 |   The tiny-yolo-voc network from YOLOv2. https://pjreddie.com/darknet/yolo/
 10 | 
 11 |   This implementation is cobbled together from the following sources:
 12 | 
 13 |   - https://github.com/pjreddie/darknet
 14 |   - https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowYoloDetector.java
 15 |   - https://github.com/allanzelener/YAD2K
 16 | */
 17 | class YOLO {
 18 |   public static let inputWidth = 416
 19 |   public static let inputHeight = 416
 20 | 
 21 |   public struct Prediction {
 22 |     let classIndex: Int
 23 |     let score: Float
 24 |     let rect: CGRect
 25 |   }
 26 | 
 27 |   let model = TinyYOLO()
 28 | 
 29 |   public init() { }
 30 | 
 31 |   public func predict(image: CVPixelBuffer) throws -> [Prediction] {
 32 |     if let output = try? model.prediction(image: image) {
 33 |       return computeBoundingBoxes(features: output.grid)
 34 |     } else {
 35 |       return []
 36 |     }
 37 |   }
 38 | 
 39 |   public func computeBoundingBoxes(features: MLMultiArray) -> [Prediction] {
 40 |     assert(features.count == 125*13*13)
 41 | 
 42 |     var predictions = [Prediction]()
 43 | 
 44 |     let blockSize: Float = 32
 45 |     let gridHeight = 13
 46 |     let gridWidth = 13
 47 |     let boxesPerCell = 5
 48 |     let numClasses = 20
 49 | 
 50 |     // The 416x416 image is divided into a 13x13 grid. Each of these grid cells
 51 |     // will predict 5 bounding boxes (boxesPerCell). A bounding box consists of
 52 |     // five data items: x, y, width, height, and a confidence score. Each grid
 53 |     // cell also predicts which class each bounding box belongs to.
 54 |     //
 55 |     // The "features" array therefore contains (numClasses + 5)*boxesPerCell
 56 |     // values for each grid cell, i.e. 125 channels. The total features array
 57 |     // contains 13x13x125 elements (actually x128 instead of x125 because in
 58 |     // Metal the number of channels must be a multiple of 4).
 59 | 
 60 |     for cy in 0..<gridHeight {
 61 |       for cx in 0..<gridWidth {
 62 |         for b in 0..<boxesPerCell {
 63 | 
 64 |           // For the first bounding box (b=0) we have to read channels 0-24,
 65 |           // for b=1 we have to read channels 25-49, and so on.
 66 |           let channel = b*(numClasses + 5)
 67 |           let tx = features[[channel    , cx, cy] as [NSNumber]].floatValue
 68 |           let ty = features[[channel + 1, cx, cy] as [NSNumber]].floatValue
 69 |           let tw = features[[channel + 2, cx, cy] as [NSNumber]].floatValue
 70 |           let th = features[[channel + 3, cx, cy] as [NSNumber]].floatValue
 71 |           let tc = features[[channel + 4, cx, cy] as [NSNumber]].floatValue
 72 | 
 73 |           // The predicted tx and ty coordinates are relative to the location
 74 |           // of the grid cell; we use the logistic sigmoid to constrain these
 75 |           // coordinates to the range 0 - 1. Then we add the cell coordinates
 76 |           // (0-12) and multiply by the number of pixels per grid cell (32).
 77 |           // Now x and y represent center of the bounding box in the original
 78 |           // 416x416 image space.
 79 |           let x = (Float(cx) + sigmoid(tx)) * blockSize
 80 |           let y = (Float(cy) + sigmoid(ty)) * blockSize
 81 | 
 82 |           // The size of the bounding box, tw and th, is predicted relative to
 83 |           // the size of an "anchor" box. Here we also transform the width and
 84 |           // height into the original 416x416 image space.
 85 |           let w = exp(tw) * anchors[2*b    ] * blockSize
 86 |           let h = exp(th) * anchors[2*b + 1] * blockSize
 87 | 
 88 |           // The confidence value for the bounding box is given by tc. We use
 89 |           // the logistic sigmoid to turn this into a percentage.
 90 |           let confidence = sigmoid(tc)
 91 | 
 92 |           // Gather the predicted classes for this anchor box and softmax them,
 93 |           // so we can interpret these numbers as percentages.
 94 |           var classes = [Float](repeating: 0, count: numClasses)
 95 |           for c in 0..<numClasses {
 96 |             classes[c] = features[[channel + 5 + c, cx, cy] as [NSNumber]].floatValue
 97 |           }
 98 |           classes = softmax(classes)
 99 | 
100 |           // Find the index of the class with the largest score.
101 |           let (detectedClass, bestClassScore) = classes.argmax()
102 | 
103 |           // Combine the confidence score for the bounding box, which tells us
104 |           // how likely it is that there is an object in this box (but not what
105 |           // kind of object it is), with the largest class prediction, which
106 |           // tells us what kind of object it detected (but not where).
107 |           let confidenceInClass = bestClassScore * confidence
108 | 
109 |           // Since we compute 13x13x5 = 845 bounding boxes, we only want to
110 |           // keep the ones whose combined score is over a certain threshold.
111 |           if confidenceInClass > 0.3 {
112 |             let rect = CGRect(x: CGFloat(x - w/2), y: CGFloat(y - h/2),
113 |                               width: CGFloat(w), height: CGFloat(h))
114 | 
115 |             let prediction = Prediction(classIndex: detectedClass,
116 |                                         score: confidenceInClass,
117 |                                         rect: rect)
118 |             predictions.append(prediction)
119 |           }
120 |         }
121 |       }
122 |     }
123 | 
124 |     // We already filtered out any bounding boxes that have very low scores,
125 |     // but there still may be boxes that overlap too much with others. We'll
126 |     // use "non-maximum suppression" to prune those duplicate bounding boxes.
127 |     return nonMaxSuppression(boxes: predictions, limit: 10, threshold: 0.5)
128 |   }
129 | }
130 | 
131 | /**
132 |   Removes bounding boxes that overlap too much with other boxes that have
133 |   a higher score.
134 | 
135 |   Based on code from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/non_max_suppression_op.cc
136 | 
137 |   - Parameters:
138 |     - boxes: an array of bounding boxes and their scores
139 |     - limit: the maximum number of boxes that will be selected
140 |     - threshold: used to decide whether boxes overlap too much
141 | */
142 | func nonMaxSuppression(boxes: [YOLO.Prediction], limit: Int, threshold: Float) -> [YOLO.Prediction] {
143 | 
144 |   // Do an argsort on the confidence scores, from high to low.
145 |   let sortedIndices = boxes.indices.sorted { boxes[$0].score > boxes[$1].score }
146 | 
147 |   var selected: [YOLO.Prediction] = []
148 |   var active = [Bool](repeating: true, count: boxes.count)
149 |   var numActive = active.count
150 | 
151 |   // The algorithm is simple: Start with the box that has the highest score.
152 |   // Remove any remaining boxes that overlap it more than the given threshold
153 |   // amount. If there are any boxes left (i.e. these did not overlap with any
154 |   // previous boxes), then repeat this procedure, until no more boxes remain
155 |   // or the limit has been reached.
156 |   outer: for i in 0..<boxes.count {
157 |     if active[i] {
158 |       let boxA = boxes[sortedIndices[i]]
159 |       selected.append(boxA)
160 |       if selected.count >= limit { break }
161 | 
162 |       for j in i+1..<boxes.count {
163 |         if active[j] {
164 |           let boxB = boxes[sortedIndices[j]]
165 |           if IOU(a: boxA.rect, b: boxB.rect) > threshold {
166 |             active[j] = false
167 |             numActive -= 1
168 |             if numActive <= 0 { break outer }
169 |           }
170 |         }
171 |       }
172 |     }
173 |   }
174 |   return selected
175 | }
176 | 
177 | /**
178 |   Computes intersection-over-union overlap between two bounding boxes.
179 | */
180 | public func IOU(a: CGRect, b: CGRect) -> Float {
181 |   let areaA = a.width * a.height
182 |   if areaA <= 0 { return 0 }
183 | 
184 |   let areaB = b.width * b.height
185 |   if areaB <= 0 { return 0 }
186 | 
187 |   let intersectionMinX = max(a.minX, b.minX)
188 |   let intersectionMinY = max(a.minY, b.minY)
189 |   let intersectionMaxX = min(a.maxX, b.maxX)
190 |   let intersectionMaxY = min(a.maxY, b.maxY)
191 |   let intersectionArea = max(intersectionMaxY - intersectionMinY, 0) *
192 |                          max(intersectionMaxX - intersectionMinX, 0)
193 |   return Float(intersectionArea / (areaA + areaB - intersectionArea))
194 | }
195 | 
196 | extension Array where Element: Comparable {
197 |   /**
198 |     Returns the index and value of the largest element in the array.
199 |   */
200 |   public func argmax() -> (Int, Element) {
201 |     precondition(self.count > 0)
202 |     var maxIndex = 0
203 |     var maxValue = self[0]
204 |     for i in 1..<self.count {
205 |       if self[i] > maxValue {
206 |         maxValue = self[i]
207 |         maxIndex = i
208 |       }
209 |     }
210 |     return (maxIndex, maxValue)
211 |   }
212 | }
213 | 
214 | /**
215 |   Logistic sigmoid.
216 | */
217 | public func sigmoid(_ x: Float) -> Float {
218 |   return 1 / (1 + exp(-x))
219 | }
220 | 
221 | /**
222 |   Computes the "softmax" function over an array.
223 | 
224 |   Based on code from https://github.com/nikolaypavlov/MLPNeuralNet/
225 | 
226 |   This is what softmax looks like in "pseudocode" (actually using Python
227 |   and numpy):
228 | 
229 |       x -= np.max(x)
230 |       exp_scores = np.exp(x)
231 |       softmax = exp_scores / np.sum(exp_scores)
232 | 
233 |   First we shift the values of x so that the highest value in the array is 0.
234 |   This ensures numerical stability with the exponents, so they don't blow up.
235 | */
236 | public func softmax(_ x: [Float]) -> [Float] {
237 |   var x = x
238 |   let len = vDSP_Length(x.count)
239 | 
240 |   // Find the maximum value in the input array.
241 |   var max: Float = 0
242 |   vDSP_maxv(x, 1, &max, len)
243 | 
244 |   // Subtract the maximum from all the elements in the array.
245 |   // Now the highest value in the array is 0.
246 |   max = -max
247 |   vDSP_vsadd(x, 1, &max, &x, 1, len)
248 | 
249 |   // Exponentiate all the elements in the array.
250 |   var count = Int32(x.count)
251 |   vvexpf(&x, x, &count)
252 | 
253 |   // Compute the sum of all exponentiated values.
254 |   var sum: Float = 0
255 |   vDSP_sve(x, 1, &sum, len)
256 | 
257 |   // Divide each element by the sum. This normalizes the array contents
258 |   // so that they all add up to 1.
259 |   vDSP_vsdiv(x, 1, &sum, &x, 1, len)
260 | 
261 |   return x
262 | }
263 | 


--------------------------------------------------------------------------------
/YOLO-CoreML/YOLO-CoreML/Helpers/BoundingBox.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | import UIKit
 3 | 
 4 | class BoundingBox {
 5 |   let shapeLayer: CAShapeLayer
 6 |   let textLayer: CATextLayer
 7 | 
 8 |   init() {
 9 |     shapeLayer = CAShapeLayer()
10 |     shapeLayer.fillColor = UIColor.clear.cgColor
11 |     shapeLayer.lineWidth = 4
12 |     shapeLayer.isHidden = true
13 | 
14 |     textLayer = CATextLayer()
15 |     textLayer.foregroundColor = UIColor.black.cgColor
16 |     textLayer.isHidden = true
17 |     textLayer.contentsScale = UIScreen.main.scale
18 |     textLayer.fontSize = 14
19 |     textLayer.font = UIFont(name: "Avenir", size: textLayer.fontSize)
20 |     textLayer.alignmentMode = kCAAlignmentCenter
21 |   }
22 | 
23 |   func addToLayer(_ parent: CALayer) {
24 |     parent.addSublayer(shapeLayer)
25 |     parent.addSublayer(textLayer)
26 |   }
27 | 
28 |   func show(frame: CGRect, label: String, color: UIColor) {
29 |     CATransaction.setDisableActions(true)
30 | 
31 |     let path = UIBezierPath(rect: frame)
32 |     shapeLayer.path = path.cgPath
33 |     shapeLayer.strokeColor = color.cgColor
34 |     shapeLayer.isHidden = false
35 | 
36 |     textLayer.string = label
37 |     textLayer.backgroundColor = color.cgColor
38 |     textLayer.isHidden = false
39 | 
40 |     let attributes = [
41 |       NSAttributedStringKey.font: textLayer.font as Any
42 |     ]
43 | 
44 |     let textRect = label.boundingRect(with: CGSize(width: 400, height: 100),
45 |                                       options: .truncatesLastVisibleLine,
46 |                                       attributes: attributes, context: nil)
47 |     let textSize = CGSize(width: textRect.width + 12, height: textRect.height)
48 |     let textOrigin = CGPoint(x: frame.origin.x - 2, y: frame.origin.y - textSize.height)
49 |     textLayer.frame = CGRect(origin: textOrigin, size: textSize)
50 |   }
51 | 
52 |   func hide() {
53 |     shapeLayer.isHidden = true
54 |     textLayer.isHidden = true
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/YOLO-CoreML/YOLO-CoreML/Helpers/Helpers.swift:
--------------------------------------------------------------------------------
  1 | import Foundation
  2 | import UIKit
  3 | import CoreML
  4 | import Accelerate
  5 | 
  6 | // The labels for the 80 classes.
  7 | let labels = [
  8 |     "person",
  9 |     "bicycle",
 10 |     "car",
 11 |     "motorbike",
 12 |     "aeroplane",
 13 |     "bus",
 14 |     "train",
 15 |     "truck",
 16 |     "boat",
 17 |     "traffic light",
 18 |     "fire hydrant",
 19 |     "stop sign",
 20 |     "parking meter",
 21 |     "bench",
 22 |     "bird",
 23 |     "cat",
 24 |     "dog",
 25 |     "horse",
 26 |     "sheep",
 27 |     "cow",
 28 |     "elephant",
 29 |     "bear",
 30 |     "zebra",
 31 |     "giraffe",
 32 |     "backpack",
 33 |     "umbrella",
 34 |     "handbag",
 35 |     "tie",
 36 |     "suitcase",
 37 |     "frisbee",
 38 |     "skis",
 39 |     "snowboard",
 40 |     "sports ball",
 41 |     "kite",
 42 |     "baseball bat",
 43 |     "baseball glove",
 44 |     "skateboard",
 45 |     "surfboard",
 46 |     "tennis racket",
 47 |     "bottle",
 48 |     "wine glass",
 49 |     "cup",
 50 |     "fork",
 51 |     "knife",
 52 |     "spoon",
 53 |     "bowl",
 54 |     "banana",
 55 |     "apple",
 56 |     "sandwich",
 57 |     "orange",
 58 |     "broccoli",
 59 |     "carrot",
 60 |     "hot dog",
 61 |     "pizza",
 62 |     "donut",
 63 |     "cake",
 64 |     "chair",
 65 |     "sofa",
 66 |     "pottedplant",
 67 |     "bed",
 68 |     "diningtable",
 69 |     "toilet",
 70 |     "tvmonitor",
 71 |     "laptop",
 72 |     "mouse",
 73 |     "remote",
 74 |     "keyboard",
 75 |     "cell phone",
 76 |     "microwave",
 77 |     "oven",
 78 |     "toaster",
 79 |     "sink",
 80 |     "refrigerator",
 81 |     "book",
 82 |     "clock",
 83 |     "vase",
 84 |     "scissors",
 85 |     "teddy bear",
 86 |     "hair drier",
 87 |     "toothbrush"
 88 | ]
 89 | 
 90 | // anchor boxes
 91 | let anchors: [Float] = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]
 92 | 
 93 | /**
 94 |   Removes bounding boxes that overlap too much with other boxes that have
 95 |   a higher score.
 96 | 
 97 |   Based on code from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/non_max_suppression_op.cc
 98 | 
 99 |   - Parameters:
100 |     - boxes: an array of bounding boxes and their scores
101 |     - limit: the maximum number of boxes that will be selected
102 |     - threshold: used to decide whether boxes overlap too much
103 | */
104 | func nonMaxSuppression(boxes: [YOLO.Prediction], limit: Int, threshold: Float) -> [YOLO.Prediction] {
105 | 
106 |   // Do an argsort on the confidence scores, from high to low.
107 |   let sortedIndices = boxes.indices.sorted { boxes[$0].score > boxes[$1].score }
108 | 
109 |   var selected: [YOLO.Prediction] = []
110 |   var active = [Bool](repeating: true, count: boxes.count)
111 |   var numActive = active.count
112 | 
113 |   // The algorithm is simple: Start with the box that has the highest score.
114 |   // Remove any remaining boxes that overlap it more than the given threshold
115 |   // amount. If there are any boxes left (i.e. these did not overlap with any
116 |   // previous boxes), then repeat this procedure, until no more boxes remain
117 |   // or the limit has been reached.
118 |   outer: for i in 0..<boxes.count {
119 |     if active[i] {
120 |       let boxA = boxes[sortedIndices[i]]
121 |       selected.append(boxA)
122 |       if selected.count >= limit { break }
123 | 
124 |       for j in i+1..<boxes.count {
125 |         if active[j] {
126 |           let boxB = boxes[sortedIndices[j]]
127 |           if IOU(a: boxA.rect, b: boxB.rect) > threshold {
128 |             active[j] = false
129 |             numActive -= 1
130 |             if numActive <= 0 { break outer }
131 |           }
132 |         }
133 |       }
134 |     }
135 |   }
136 |   return selected
137 | }
138 | 
139 | /**
140 |   Computes intersection-over-union overlap between two bounding boxes.
141 | */
142 | public func IOU(a: CGRect, b: CGRect) -> Float {
143 |   let areaA = a.width * a.height
144 |   if areaA <= 0 { return 0 }
145 | 
146 |   let areaB = b.width * b.height
147 |   if areaB <= 0 { return 0 }
148 | 
149 |   let intersectionMinX = max(a.minX, b.minX)
150 |   let intersectionMinY = max(a.minY, b.minY)
151 |   let intersectionMaxX = min(a.maxX, b.maxX)
152 |   let intersectionMaxY = min(a.maxY, b.maxY)
153 |   let intersectionArea = max(intersectionMaxY - intersectionMinY, 0) *
154 |                          max(intersectionMaxX - intersectionMinX, 0)
155 |   return Float(intersectionArea / (areaA + areaB - intersectionArea))
156 | }
157 | 
158 | extension Array where Element: Comparable {
159 |   /**
160 |     Returns the index and value of the largest element in the array.
161 |   */
162 |   public func argmax() -> (Int, Element) {
163 |     precondition(self.count > 0)
164 |     var maxIndex = 0
165 |     var maxValue = self[0]
166 |     for i in 1..<self.count {
167 |       if self[i] > maxValue {
168 |         maxValue = self[i]
169 |         maxIndex = i
170 |       }
171 |     }
172 |     return (maxIndex, maxValue)
173 |   }
174 | }
175 | 
176 | /**
177 |   Logistic sigmoid.
178 | */
179 | public func sigmoid(_ x: Float) -> Float {
180 |   return 1 / (1 + exp(-x))
181 | }
182 | 
183 | /**
184 |   Computes the "softmax" function over an array.
185 | 
186 |   Based on code from https://github.com/nikolaypavlov/MLPNeuralNet/
187 | 
188 |   This is what softmax looks like in "pseudocode" (actually using Python
189 |   and numpy):
190 | 
191 |       x -= np.max(x)
192 |       exp_scores = np.exp(x)
193 |       softmax = exp_scores / np.sum(exp_scores)
194 | 
195 |   First we shift the values of x so that the highest value in the array is 0.
196 |   This ensures numerical stability with the exponents, so they don't blow up.
197 | */
198 | public func softmax(_ x: [Float]) -> [Float] {
199 |   var x = x
200 |   let len = vDSP_Length(x.count)
201 | 
202 |   // Find the maximum value in the input array.
203 |   var max: Float = 0
204 |   vDSP_maxv(x, 1, &max, len)
205 | 
206 |   // Subtract the maximum from all the elements in the array.
207 |   // Now the highest value in the array is 0.
208 |   max = -max
209 |   vDSP_vsadd(x, 1, &max, &x, 1, len)
210 | 
211 |   // Exponentiate all the elements in the array.
212 |   var count = Int32(x.count)
213 |   vvexpf(&x, x, &count)
214 | 
215 |   // Compute the sum of all exponentiated values.
216 |   var sum: Float = 0
217 |   vDSP_sve(x, 1, &sum, len)
218 | 
219 |   // Divide each element by the sum. This normalizes the array contents
220 |   // so that they all add up to 1.
221 |   vDSP_vsdiv(x, 1, &sum, &x, 1, len)
222 | 
223 |   return x
224 | }
225 | 


--------------------------------------------------------------------------------
/YOLO-CoreML/YOLO-CoreML/Helpers/VideoCapture.swift:
--------------------------------------------------------------------------------
  1 | import UIKit
  2 | import AVFoundation
  3 | import CoreVideo
  4 | 
  5 | public protocol VideoCaptureDelegate: class {
  6 |   func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame: CVPixelBuffer?, timestamp: CMTime)
  7 | }
  8 | 
  9 | public class VideoCapture: NSObject {
 10 |   public var previewLayer: AVCaptureVideoPreviewLayer?
 11 |   public weak var delegate: VideoCaptureDelegate?
 12 |   public var fps = 15
 13 | 
 14 |   let captureSession = AVCaptureSession()
 15 |   let videoOutput = AVCaptureVideoDataOutput()
 16 |   let queue = DispatchQueue(label: "net.machinethink.camera-queue")
 17 | 
 18 |   var lastTimestamp = CMTime()
 19 | 
 20 |   public func setUp(sessionPreset: AVCaptureSession.Preset = .medium,
 21 |                     completion: @escaping (Bool) -> Void) {
 22 |     queue.async {
 23 |       let success = self.setUpCamera(sessionPreset: sessionPreset)
 24 |       DispatchQueue.main.async {
 25 |         completion(success)
 26 |       }
 27 |     }
 28 |   }
 29 | 
 30 |   func setUpCamera(sessionPreset: AVCaptureSession.Preset) -> Bool {
 31 |     captureSession.beginConfiguration()
 32 |     captureSession.sessionPreset = sessionPreset
 33 | 
 34 |     guard let captureDevice = AVCaptureDevice.default(for: AVMediaType.video) else {
 35 |       print("Error: no video devices available")
 36 |       return false
 37 |     }
 38 | 
 39 |     guard let videoInput = try? AVCaptureDeviceInput(device: captureDevice) else {
 40 |       print("Error: could not create AVCaptureDeviceInput")
 41 |       return false
 42 |     }
 43 | 
 44 |     if captureSession.canAddInput(videoInput) {
 45 |       captureSession.addInput(videoInput)
 46 |     }
 47 | 
 48 |     let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
 49 |     previewLayer.videoGravity = AVLayerVideoGravity.resizeAspect
 50 |     previewLayer.connection?.videoOrientation = .portrait
 51 |     self.previewLayer = previewLayer
 52 | 
 53 |     let settings: [String : Any] = [
 54 |       kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA),
 55 |     ]
 56 | 
 57 |     videoOutput.videoSettings = settings
 58 |     videoOutput.alwaysDiscardsLateVideoFrames = true
 59 |     videoOutput.setSampleBufferDelegate(self, queue: queue)
 60 |     if captureSession.canAddOutput(videoOutput) {
 61 |       captureSession.addOutput(videoOutput)
 62 |     }
 63 | 
 64 |     // We want the buffers to be in portrait orientation otherwise they are
 65 |     // rotated by 90 degrees. Need to set this _after_ addOutput()!
 66 |     videoOutput.connection(with: AVMediaType.video)?.videoOrientation = .portrait
 67 | 
 68 |     captureSession.commitConfiguration()
 69 |     return true
 70 |   }
 71 | 
 72 |   public func start() {
 73 |     if !captureSession.isRunning {
 74 |       captureSession.startRunning()
 75 |     }
 76 |   }
 77 | 
 78 |   public func stop() {
 79 |     if captureSession.isRunning {
 80 |       captureSession.stopRunning()
 81 |     }
 82 |   }
 83 | }
 84 | 
 85 | extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate {
 86 |   public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
 87 |     // Because lowering the capture device's FPS looks ugly in the preview,
 88 |     // we capture at full speed but only call the delegate at its desired
 89 |     // framerate.
 90 |     let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
 91 |     let deltaTime = timestamp - lastTimestamp
 92 |     if deltaTime >= CMTimeMake(1, Int32(fps)) {
 93 |       lastTimestamp = timestamp
 94 |       let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer)
 95 |       delegate?.videoCapture(self, didCaptureVideoFrame: imageBuffer, timestamp: timestamp)
 96 |     }
 97 |   }
 98 | 
 99 |   public func captureOutput(_ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
100 |     //print("dropped frame")
101 |   }
102 | }
103 | 


--------------------------------------------------------------------------------
/YOLO-CoreML/YOLO-CoreML/Info.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | <dict>
 5 | 	<key>CFBundleDevelopmentRegion</key>
 6 | 	<string>$(DEVELOPMENT_LANGUAGE)</string>
 7 | 	<key>CFBundleExecutable</key>
 8 | 	<string>$(EXECUTABLE_NAME)</string>
 9 | 	<key>CFBundleIdentifier</key>
10 | 	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
11 | 	<key>CFBundleInfoDictionaryVersion</key>
12 | 	<string>6.0</string>
13 | 	<key>CFBundleName</key>
14 | 	<string>$(PRODUCT_NAME)</string>
15 | 	<key>CFBundlePackageType</key>
16 | 	<string>APPL</string>
17 | 	<key>CFBundleShortVersionString</key>
18 | 	<string>1.0</string>
19 | 	<key>CFBundleVersion</key>
20 | 	<string>1</string>
21 | 	<key>LSRequiresIPhoneOS</key>
22 | 	<true/>
23 | 	<key>NSCameraUsageDescription</key>
24 | 	<string>Let's do some deep learning!</string>
25 | 	<key>UILaunchStoryboardName</key>
26 | 	<string>Main</string>
27 | 	<key>UIMainStoryboardFile</key>
28 | 	<string>Main</string>
29 | 	<key>UIRequiredDeviceCapabilities</key>
30 | 	<array>
31 | 		<string>armv7</string>
32 | 	</array>
33 | 	<key>UIRequiresFullScreen</key>
34 | 	<true/>
35 | 	<key>UIStatusBarStyle</key>
36 | 	<string>UIStatusBarStyleLightContent</string>
37 | 	<key>UISupportedInterfaceOrientations</key>
38 | 	<array>
39 | 		<string>UIInterfaceOrientationPortrait</string>
40 | 	</array>
41 | 	<key>UISupportedInterfaceOrientations~ipad</key>
42 | 	<array>
43 | 		<string>UIInterfaceOrientationPortrait</string>
44 | 	</array>
45 | </dict>
46 | </plist>
47 | 


--------------------------------------------------------------------------------
/YOLO-CoreML/YOLO-CoreML/Main.storyboard:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13771" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
 3 |     <device id="retina4_7" orientation="portrait">
 4 |         <adaptation id="fullscreen"/>
 5 |     </device>
 6 |     <dependencies>
 7 |         <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13772"/>
 8 |         <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
 9 |     </dependencies>
10 |     <customFonts key="customFonts">
11 |         <array key="Menlo.ttc">
12 |             <string>Menlo-Regular</string>
13 |         </array>
14 |     </customFonts>
15 |     <scenes>
16 |         <!--View Controller-->
17 |         <scene sceneID="tne-QT-ifu">
18 |             <objects>
19 |                 <viewController id="BYZ-38-t0r" customClass="ViewController" customModule="TinyYOLO_CoreML" customModuleProvider="target" sceneMemberID="viewController">
20 |                     <layoutGuides>
21 |                         <viewControllerLayoutGuide type="top" id="y3c-jy-aDJ"/>
22 |                         <viewControllerLayoutGuide type="bottom" id="wfy-db-euE"/>
23 |                     </layoutGuides>
24 |                     <view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
25 |                         <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
26 |                         <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
27 |                         <subviews>
28 |                             <view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="6b2-uP-TiX" userLabel="Video Preview">
29 |                                 <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
30 |                                 <color key="backgroundColor" white="0.0" alpha="1" colorSpace="calibratedWhite"/>
31 |                             </view>
32 |                             <view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="Cau-LW-nDZ" userLabel="Results">
33 |                                 <rect key="frame" x="0.0" y="633" width="375" height="34"/>
34 |                                 <subviews>
35 |                                     <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="Elapsed time" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="Nv7-zv-Hv7">
36 |                                         <rect key="frame" x="20" y="10" width="335" height="14"/>
37 |                                         <fontDescription key="fontDescription" name="Menlo-Regular" family="Menlo" pointSize="12"/>
38 |                                         <color key="textColor" white="1" alpha="1" colorSpace="calibratedWhite"/>
39 |                                         <nil key="highlightedColor"/>
40 |                                     </label>
41 |                                 </subviews>
42 |                                 <color key="backgroundColor" white="0.0" alpha="0.5" colorSpace="calibratedWhite"/>
43 |                                 <constraints>
44 |                                     <constraint firstAttribute="bottom" secondItem="Nv7-zv-Hv7" secondAttribute="bottom" constant="10" id="RnN-mQ-1Ld"/>
45 |                                     <constraint firstItem="Nv7-zv-Hv7" firstAttribute="leading" secondItem="Cau-LW-nDZ" secondAttribute="leading" constant="20" id="YWK-0U-hbi"/>
46 |                                     <constraint firstAttribute="trailing" secondItem="Nv7-zv-Hv7" secondAttribute="trailing" constant="20" id="c27-CJ-wZL"/>
47 |                                     <constraint firstItem="Nv7-zv-Hv7" firstAttribute="top" secondItem="Cau-LW-nDZ" secondAttribute="top" constant="10" id="lQX-Nw-dxK"/>
48 |                                 </constraints>
49 |                             </view>
50 |                         </subviews>
51 |                         <color key="backgroundColor" white="0.0" alpha="1" colorSpace="calibratedWhite"/>
52 |                         <constraints>
53 |                             <constraint firstItem="6b2-uP-TiX" firstAttribute="top" secondItem="8bC-Xf-vdC" secondAttribute="top" id="5fy-Bv-RZx"/>
54 |                             <constraint firstItem="Cau-LW-nDZ" firstAttribute="leading" secondItem="8bC-Xf-vdC" secondAttribute="leading" id="F86-Vt-lK5"/>
55 |                             <constraint firstAttribute="trailing" secondItem="6b2-uP-TiX" secondAttribute="trailing" id="VLN-Lj-dMD"/>
56 |                             <constraint firstAttribute="trailing" secondItem="Cau-LW-nDZ" secondAttribute="trailing" id="bJZ-RP-DH0"/>
57 |                             <constraint firstAttribute="bottom" secondItem="Cau-LW-nDZ" secondAttribute="bottom" id="nW1-Gf-bkQ"/>
58 |                             <constraint firstItem="6b2-uP-TiX" firstAttribute="leading" secondItem="8bC-Xf-vdC" secondAttribute="leading" id="tQx-fh-d7D"/>
59 |                             <constraint firstAttribute="bottom" secondItem="6b2-uP-TiX" secondAttribute="bottom" id="xIr-G9-dgn"/>
60 |                         </constraints>
61 |                     </view>
62 |                     <connections>
63 |                         <outlet property="timeLabel" destination="Nv7-zv-Hv7" id="U6m-yA-4aP"/>
64 |                         <outlet property="videoPreview" destination="6b2-uP-TiX" id="F9M-LA-bjs"/>
65 |                     </connections>
66 |                 </viewController>
67 |                 <placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
68 |             </objects>
69 |             <point key="canvasLocation" x="140" y="137.18140929535232"/>
70 |         </scene>
71 |     </scenes>
72 | </document>
73 | 


--------------------------------------------------------------------------------
/YOLO-CoreML/YOLO-CoreML/ViewController.swift:
--------------------------------------------------------------------------------
  1 | import UIKit
  2 | import Vision
  3 | import AVFoundation
  4 | import CoreMedia
  5 | import VideoToolbox
  6 | 
  7 | class ViewController: UIViewController {
  8 |   @IBOutlet weak var videoPreview: UIView!
  9 |   @IBOutlet weak var timeLabel: UILabel!
 10 | 
 11 |   let yolo = YOLO()
 12 | 
 13 |   var videoCapture: VideoCapture!
 14 |   var request: VNCoreMLRequest!
 15 |   var startTimes: [CFTimeInterval] = []
 16 | 
 17 |   var boundingBoxes = [BoundingBox]()
 18 |   var colors: [UIColor] = []
 19 | 
 20 |   var framesDone = 0
 21 |   var frameCapturingStartTime = CACurrentMediaTime()
 22 |   let semaphore = DispatchSemaphore(value: 2)
 23 | 
 24 |   override func viewDidLoad() {
 25 |     super.viewDidLoad()
 26 | 
 27 |     timeLabel.text = ""
 28 | 
 29 |     setUpBoundingBoxes()
 30 |     setUpVision()
 31 |     setUpCamera()
 32 | 
 33 |     frameCapturingStartTime = CACurrentMediaTime()
 34 |   }
 35 | 
 36 |   override func didReceiveMemoryWarning() {
 37 |     super.didReceiveMemoryWarning()
 38 |     print(#function)
 39 |   }
 40 | 
 41 |   // MARK: - Initialization
 42 | 
 43 |   func setUpBoundingBoxes() {
 44 |     for _ in 0..<YOLO.maxBoundingBoxes {
 45 |       boundingBoxes.append(BoundingBox())
 46 |     }
 47 | 
 48 |     // Make colors for the bounding boxes. There is one color for each class,
 49 |     // 80 classes in total.
 50 |     for r: CGFloat in [0.2, 0.4, 0.6, 0.85, 1.0] {
 51 |       for g: CGFloat in [0.6, 0.7, 0.8, 0.9] {
 52 |         for b: CGFloat in [0.6, 0.7, 0.8, 1.0] {
 53 |           let color = UIColor(red: r, green: g, blue: b, alpha: 1)
 54 |           colors.append(color)
 55 |         }
 56 |       }
 57 |     }
 58 |   }
 59 |   
 60 |   func setUpVision() {
 61 |     guard let visionModel = try? VNCoreMLModel(for: yolo.model.model) else {
 62 |       print("Error: could not create Vision model")
 63 |       return
 64 |     }
 65 | 
 66 |     request = VNCoreMLRequest(model: visionModel, completionHandler: visionRequestDidComplete)
 67 | 
 68 |     // NOTE: If you choose another crop/scale option, then you must also
 69 |     // change how the BoundingBox objects get scaled when they are drawn.
 70 |     // Currently they assume the full input image is used.
 71 |     request.imageCropAndScaleOption = .scaleFill
 72 |   }
 73 | 
 74 |   func setUpCamera() {
 75 |     videoCapture = VideoCapture()
 76 |     videoCapture.delegate = self
 77 |     videoCapture.fps = 50
 78 |     videoCapture.setUp(sessionPreset: AVCaptureSession.Preset.vga640x480) { success in
 79 |       if success {
 80 |         // Add the video preview into the UI.
 81 |         if let previewLayer = self.videoCapture.previewLayer {
 82 |           self.videoPreview.layer.addSublayer(previewLayer)
 83 |           self.resizePreviewLayer()
 84 |         }
 85 | 
 86 |         // Add the bounding box layers to the UI, on top of the video preview.
 87 |         for box in self.boundingBoxes {
 88 |           box.addToLayer(self.videoPreview.layer)
 89 |         }
 90 | 
 91 |         // Once everything is set up, we can start capturing live video.
 92 |         self.videoCapture.start()
 93 |       }
 94 |     }
 95 |   }
 96 | 
 97 |   // MARK: - UI stuff
 98 | 
 99 |   override func viewWillLayoutSubviews() {
100 |     super.viewWillLayoutSubviews()
101 |     resizePreviewLayer()
102 |   }
103 | 
104 |   override var preferredStatusBarStyle: UIStatusBarStyle {
105 |     return .lightContent
106 |   }
107 | 
108 |   func resizePreviewLayer() {
109 |     videoCapture.previewLayer?.frame = videoPreview.bounds
110 |   }
111 | 
112 |   // MARK: - Doing inference
113 |   func predictUsingVision(pixelBuffer: CVPixelBuffer) {
114 |     // Measure how long it takes to predict a single video frame. Note that
115 |     // predict() can be called on the next frame while the previous one is
116 |     // still being processed. Hence the need to queue up the start times.
117 |     startTimes.append(CACurrentMediaTime())
118 | 
119 |     // Vision will automatically resize the input image.
120 |     let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer)
121 |     try? handler.perform([request])
122 |   }
123 | 
124 |   func visionRequestDidComplete(request: VNRequest, error: Error?) {
125 |     if let observations = request.results as? [VNCoreMLFeatureValueObservation],
126 |        let features = observations.first?.featureValue.multiArrayValue {
127 | 
128 |       let boundingBoxes = yolo.computeBoundingBoxes(features: features)
129 |       let elapsed = CACurrentMediaTime() - startTimes.remove(at: 0)
130 |       showOnMainThread(boundingBoxes, elapsed)
131 |     }
132 |   }
133 | 
134 |   func showOnMainThread(_ boundingBoxes: [YOLO.Prediction], _ elapsed: CFTimeInterval) {
135 |     DispatchQueue.main.async {
136 | 
137 |       self.show(predictions: boundingBoxes)
138 | 
139 |       let fps = self.measureFPS()
140 |       self.timeLabel.text = String(format: "Elapsed %.5f seconds - %.2f FPS", elapsed, fps)
141 | 
142 |       self.semaphore.signal()
143 |     }
144 |   }
145 | 
146 |   func measureFPS() -> Double {
147 |     // Measure how many frames were actually delivered per second.
148 |     framesDone += 1
149 |     let frameCapturingElapsed = CACurrentMediaTime() - frameCapturingStartTime
150 |     let currentFPSDelivered = Double(framesDone) / frameCapturingElapsed
151 |     if frameCapturingElapsed > 1 {
152 |       framesDone = 0
153 |       frameCapturingStartTime = CACurrentMediaTime()
154 |     }
155 |     return currentFPSDelivered
156 |   }
157 | 
158 |   func show(predictions: [YOLO.Prediction]) {
159 |     for i in 0..<boundingBoxes.count {
160 |       if i < predictions.count {
161 |         let prediction = predictions[i]
162 | 
163 |         // The predicted bounding box is in the coordinate space of the input
164 |         // image, which is a square image of 416x416 pixels. We want to show it
165 |         // on the video preview, which is as wide as the screen and has a 4:3
166 |         // aspect ratio. The video preview also may be letterboxed at the top
167 |         // and bottom.
168 |         let width = view.bounds.width
169 |         let height = width * 4 / 3
170 |         let scaleX = width / CGFloat(YOLO.inputWidth)
171 |         let scaleY = height / CGFloat(YOLO.inputHeight)
172 |         let top = (view.bounds.height - height) / 2
173 | 
174 |         // Translate and scale the rectangle to our own coordinate system.
175 |         var rect = prediction.rect
176 |         rect.origin.x *= scaleX
177 |         rect.origin.y *= scaleY
178 |         rect.origin.y += top
179 |         rect.size.width *= scaleX
180 |         rect.size.height *= scaleY
181 | 
182 |         // Show the bounding box.
183 |         let label = String(format: "%@ %.1f", labels[prediction.classIndex], prediction.score * 100)
184 |         let color = colors[prediction.classIndex]
185 |         boundingBoxes[i].show(frame: rect, label: label, color: color)
186 |       } else {
187 |         boundingBoxes[i].hide()
188 |       }
189 |     }
190 |   }
191 | }
192 | 
193 | extension ViewController: VideoCaptureDelegate {
194 |   func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame pixelBuffer: CVPixelBuffer?, timestamp: CMTime) {
195 |     // For debugging.
196 |     //predict(image: UIImage(named: "dog416")!); return
197 | 
198 |     semaphore.wait()
199 | 
200 |     if let pixelBuffer = pixelBuffer {
201 |       // For better throughput, perform the prediction on a background queue
202 |       // instead of on the VideoCapture queue. We use the semaphore to block
203 |       // the capture queue and drop frames when Core ML can't keep up.
204 |       DispatchQueue.global().async {
205 |         //self.predict(pixelBuffer: pixelBuffer)
206 |         self.predictUsingVision(pixelBuffer: pixelBuffer)
207 |       }
208 |     }
209 |   }
210 | }
211 | 


--------------------------------------------------------------------------------
/YOLO-CoreML/YOLO-CoreML/YOLO.swift:
--------------------------------------------------------------------------------
  1 | import Foundation
  2 | import UIKit
  3 | import CoreML
  4 | 
  5 | class YOLO {
  6 |   // YOLO2 input is 608x608
  7 |   public static let inputWidth = 608
  8 |   public static let inputHeight = 608
  9 |   public static let maxBoundingBoxes = 10
 10 | 
 11 |   // Tweak these values to get more or fewer predictions.
 12 |   let confidenceThreshold: Float = 0.5
 13 |   let iouThreshold: Float = 0.6
 14 | 
 15 |   struct Prediction {
 16 |     let classIndex: Int
 17 |     let score: Float
 18 |     let rect: CGRect
 19 |   }
 20 | 
 21 |   let model = yolo()
 22 | 
 23 |   public init() { }
 24 | 
 25 |   public func predict(image: CVPixelBuffer) throws -> [Prediction] {
 26 |     if let output = try? model.prediction(input__0: image) {
 27 |       return computeBoundingBoxes(features: output.output__0)
 28 |     } else {
 29 |       return []
 30 |     }
 31 |   }
 32 | 
 33 |   public func computeBoundingBoxes(features: MLMultiArray) -> [Prediction] {
 34 | //    assert(features.count == 125*13*13)
 35 |     assert(features.count == 425*19*19)
 36 | 
 37 |     var predictions = [Prediction]()
 38 | 
 39 |     let blockSize: Float = 32
 40 |     let gridHeight = 19
 41 |     let gridWidth = 19
 42 |     let boxesPerCell = 5;//Int(anchors.count/5)
 43 |     let numClasses = 80
 44 | 
 45 |     // The 608x608 image is divided into a 19x19 grid. Each of these grid cells
 46 |     // will predict 5 bounding boxes (boxesPerCell). A bounding box consists of
 47 |     // five data items: x, y, width, height, and a confidence score. Each grid
 48 |     // cell also predicts which class each bounding box belongs to.
 49 |     //
 50 |     // The "features" array therefore contains (numClasses + 5)*boxesPerCell
 51 |     // values for each grid cell, i.e. 425 channels. The total features array
 52 |     // contains 425x19x19 elements.
 53 | 
 54 |     // NOTE: It turns out that accessing the elements in the multi-array as
 55 |     // `features[[channel, cy, cx] as [NSNumber]].floatValue` is kinda slow.
 56 |     // It's much faster to use direct memory access to the features.
 57 |     let featurePointer = UnsafeMutablePointer<Double>(OpaquePointer(features.dataPointer))
 58 |     let channelStride = features.strides[0].intValue
 59 |     let yStride = features.strides[1].intValue
 60 |     let xStride = features.strides[2].intValue
 61 | 
 62 |     func offset(_ channel: Int, _ x: Int, _ y: Int) -> Int {
 63 |       return channel*channelStride + y*yStride + x*xStride
 64 |     }
 65 | 
 66 |     for cy in 0..<gridHeight {
 67 |       for cx in 0..<gridWidth {
 68 |         for b in 0..<boxesPerCell {
 69 | 
 70 |           // For the first bounding box (b=0) we have to read channels 0-24,
 71 |           // for b=1 we have to read channels 25-49, and so on.
 72 |           let channel = b*(numClasses + 5)
 73 | 
 74 |           // The slow way:
 75 |           /*
 76 |           let tx = features[[channel    , cy, cx] as [NSNumber]].floatValue
 77 |           let ty = features[[channel + 1, cy, cx] as [NSNumber]].floatValue
 78 |           let tw = features[[channel + 2, cy, cx] as [NSNumber]].floatValue
 79 |           let th = features[[channel + 3, cy, cx] as [NSNumber]].floatValue
 80 |           let tc = features[[channel + 4, cy, cx] as [NSNumber]].floatValue
 81 |           */
 82 | 
 83 |           // The fast way:
 84 |           let tx = Float(featurePointer[offset(channel    , cx, cy)])
 85 |           let ty = Float(featurePointer[offset(channel + 1, cx, cy)])
 86 |           let tw = Float(featurePointer[offset(channel + 2, cx, cy)])
 87 |           let th = Float(featurePointer[offset(channel + 3, cx, cy)])
 88 |           let tc = Float(featurePointer[offset(channel + 4, cx, cy)])
 89 |             
 90 |           // The predicted tx and ty coordinates are relative to the location
 91 |           // of the grid cell; we use the logistic sigmoid to constrain these
 92 |           // coordinates to the range 0 - 1. Then we add the cell coordinates
 93 |           // (0-12) and multiply by the number of pixels per grid cell (32).
 94 |           // Now x and y represent center of the bounding box in the original
 95 |           // 608x608 image space.
 96 |           let x = (Float(cx) + sigmoid(tx)) * blockSize
 97 |           let y = (Float(cy) + sigmoid(ty)) * blockSize
 98 | 
 99 |           // The size of the bounding box, tw and th, is predicted relative to
100 |           // the size of an "anchor" box. Here we also transform the width and
101 |           // height into the original 416x416 image space.
102 |           let w = exp(tw) * anchors[2*b    ] * blockSize
103 |           let h = exp(th) * anchors[2*b + 1] * blockSize
104 | 
105 |           // The confidence value for the bounding box is given by tc. We use
106 |           // the logistic sigmoid to turn this into a percentage.
107 |           let confidence = sigmoid(tc)
108 | 
109 |           // Gather the predicted classes for this anchor box and softmax them,
110 |           // so we can interpret these numbers as percentages.
111 |           var classes = [Float](repeating: 0, count: numClasses)
112 |           for c in 0..<numClasses {
113 |             // The slow way:
114 |             //classes[c] = features[[channel + 5 + c, cy, cx] as [NSNumber]].floatValue
115 | 
116 |             // The fast way:
117 |             classes[c] = Float(featurePointer[offset(channel + 5 + c, cx, cy)])
118 |           }
119 |           classes = softmax(classes)
120 | 
121 |           // Find the index of the class with the largest score.
122 |           let (detectedClass, bestClassScore) = classes.argmax()
123 | 
124 |           // Combine the confidence score for the bounding box, which tells us
125 |           // how likely it is that there is an object in this box (but not what
126 |           // kind of object it is), with the largest class prediction, which
127 |           // tells us what kind of object it detected (but not where).
128 |           let confidenceInClass = bestClassScore * confidence
129 | 
130 |           // Since we compute 19x19x5 = 1805 bounding boxes, we only want to
131 |           // keep the ones whose combined score is over a certain threshold.
132 |           if confidenceInClass > confidenceThreshold {
133 |             let rect = CGRect(x: CGFloat(x - w/2), y: CGFloat(y - h/2),
134 |                               width: CGFloat(w), height: CGFloat(h))
135 | 
136 |             let prediction = Prediction(classIndex: detectedClass,
137 |                                         score: confidenceInClass,
138 |                                         rect: rect)
139 |             predictions.append(prediction)
140 |           }
141 |         }
142 |       }
143 |     }
144 | 
145 |     // We already filtered out any bounding boxes that have very low scores,
146 |     // but there still may be boxes that overlap too much with others. We'll
147 |     // use "non-maximum suppression" to prune those duplicate bounding boxes.
148 |     return nonMaxSuppression(boxes: predictions, limit: YOLO.maxBoundingBoxes, threshold: iouThreshold)
149 |   }
150 | }
151 | 


--------------------------------------------------------------------------------
/YOLO.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/syshen/YOLO-CoreML/2c973fb2fa74ebdab7f3462d9c0f90f0ddcb3390/YOLO.jpg


--------------------------------------------------------------------------------
/download.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | curl -L https://goo.gl/TgrwZx > YOLO-CoreML/YOLO-CoreML/yolo.mlmodel
4 | 
5 | curl -L https://goo.gl/bGfVRk > Converter/yolo.pb
6 | 
7 | 


--------------------------------------------------------------------------------