├── .gitignore ├── Common ├── AppDelegate.swift └── dog416.png ├── Converter ├── Convert_pb_coreml.ipynb ├── README.markdown └── yolo.meta ├── LICENSE.txt ├── README.markdown ├── YOLO-CoreML ├── YOLO-CoreML.xcodeproj │ ├── project.pbxproj │ └── project.xcworkspace │ │ └── contents.xcworkspacedata └── YOLO-CoreML │ ├── Assets.xcassets │ └── AppIcon.appiconset │ │ └── Contents.json │ ├── Base.lproj │ └── Main.storyboard │ ├── Helpers.swift │ ├── Helpers │ ├── BoundingBox.swift │ ├── Helpers.swift │ └── VideoCapture.swift │ ├── Info.plist │ ├── Main.storyboard │ ├── ViewController.swift │ └── YOLO.swift ├── YOLO.jpg └── download.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Xcode 2 | build/ 3 | DerivedData/ 4 | 5 | *.pbxuser 6 | !default.pbxuser 7 | *.mode1v3 8 | !default.mode1v3 9 | *.mode2v3 10 | !default.mode2v3 11 | *.perspectivev3 12 | !default.perspectivev3 13 | 14 | *.xcuserstate 15 | xcuserdata/ 16 | 17 | ## Other 18 | *.moved-aside 19 | *.xccheckout 20 | *.xcscmblueprint 21 | 22 | profile 23 | *.hmap 24 | *.ipa 25 | 26 | # CocoaPods 27 | Pods/ 28 | !Podfile.lock 29 | 30 | # Temporary files 31 | .DS_Store 32 | .Trashes 33 | .Spotlight-V100 34 | *.swp 35 | *.lock 36 | 37 | # Python 38 | __pycache__/ 39 | *.py[cod] 40 | *$py.class 41 | 42 | # Jupyter Notebook 43 | .ipynb_checkpoints 44 | 45 | *.mlmodel 46 | *.pb 47 | -------------------------------------------------------------------------------- /Common/AppDelegate.swift: -------------------------------------------------------------------------------- 1 | import UIKit 2 | 3 | @UIApplicationMain 4 | class AppDelegate: UIResponder, UIApplicationDelegate { 5 | 6 | var window: UIWindow? 7 | 8 | func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplicationLaunchOptionsKey: Any]?) -> Bool { 9 | // Override point for customization after application launch. 10 | return true 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /Common/dog416.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syshen/YOLO-CoreML/2c973fb2fa74ebdab7f3462d9c0f90f0ddcb3390/Common/dog416.png -------------------------------------------------------------------------------- /Converter/Convert_pb_coreml.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import tfcoreml as tf_converter" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "import tensorflow as tf" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "## Output the graph\n", 30 | "\n", 31 | "In this step we just want to know the exact name of input and output nodes in the tensorflow graph" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "metadata": { 38 | "scrolled": true 39 | }, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "input\n", 46 | "Pad/paddings\n", 47 | "Pad\n", 48 | "0-convolutional/filter\n", 49 | "0-convolutional\n", 50 | "sub/y\n", 51 | "sub\n", 52 | "truediv/y\n", 53 | "truediv\n", 54 | "mul/y\n", 55 | "mul\n", 56 | "BiasAdd/bias\n", 57 | "BiasAdd\n", 58 | "mul_1/x\n", 59 | "mul_1\n", 60 | "1-leaky\n", 61 | "2-maxpool\n", 62 | "Pad_1/paddings\n", 63 | "Pad_1\n", 64 | "3-convolutional/filter\n", 65 | "3-convolutional\n", 66 | "sub_1/y\n", 67 | "sub_1\n", 68 | "truediv_1/y\n", 69 | "truediv_1\n", 70 | "mul_2/y\n", 71 | "mul_2\n", 72 | "BiasAdd_1/bias\n", 73 | "BiasAdd_1\n", 74 | "mul_3/x\n", 75 | "mul_3\n", 76 | "4-leaky\n", 77 | "5-maxpool\n", 78 | "Pad_2/paddings\n", 79 | "Pad_2\n", 80 | "6-convolutional/filter\n", 81 | "6-convolutional\n", 82 | "sub_2/y\n", 83 | "sub_2\n", 84 | "truediv_2/y\n", 85 | "truediv_2\n", 86 | "mul_4/y\n", 87 | "mul_4\n", 88 | "BiasAdd_2/bias\n", 89 | "BiasAdd_2\n", 90 | "mul_5/x\n", 91 | "mul_5\n", 92 | "7-leaky\n", 93 | "Pad_3/paddings\n", 94 | "Pad_3\n", 95 | "8-convolutional/filter\n", 96 | "8-convolutional\n", 97 | "sub_3/y\n", 98 | "sub_3\n", 99 | "truediv_3/y\n", 100 | "truediv_3\n", 101 | "mul_6/y\n", 102 | "mul_6\n", 103 | "BiasAdd_3/bias\n", 104 | "BiasAdd_3\n", 105 | "mul_7/x\n", 106 | "mul_7\n", 107 | "9-leaky\n", 108 | "Pad_4/paddings\n", 109 | "Pad_4\n", 110 | "10-convolutional/filter\n", 111 | "10-convolutional\n", 112 | "sub_4/y\n", 113 | "sub_4\n", 114 | "truediv_4/y\n", 115 | "truediv_4\n", 116 | "mul_8/y\n", 117 | "mul_8\n", 118 | "BiasAdd_4/bias\n", 119 | "BiasAdd_4\n", 120 | "mul_9/x\n", 121 | "mul_9\n", 122 | "11-leaky\n", 123 | "12-maxpool\n", 124 | "Pad_5/paddings\n", 125 | "Pad_5\n", 126 | "13-convolutional/filter\n", 127 | "13-convolutional\n", 128 | "sub_5/y\n", 129 | "sub_5\n", 130 | "truediv_5/y\n", 131 | "truediv_5\n", 132 | "mul_10/y\n", 133 | "mul_10\n", 134 | "BiasAdd_5/bias\n", 135 | "BiasAdd_5\n", 136 | "mul_11/x\n", 137 | "mul_11\n", 138 | "14-leaky\n", 139 | "Pad_6/paddings\n", 140 | "Pad_6\n", 141 | "15-convolutional/filter\n", 142 | "15-convolutional\n", 143 | "sub_6/y\n", 144 | "sub_6\n", 145 | "truediv_6/y\n", 146 | "truediv_6\n", 147 | "mul_12/y\n", 148 | "mul_12\n", 149 | "BiasAdd_6/bias\n", 150 | "BiasAdd_6\n", 151 | "mul_13/x\n", 152 | "mul_13\n", 153 | "16-leaky\n", 154 | "Pad_7/paddings\n", 155 | "Pad_7\n", 156 | "17-convolutional/filter\n", 157 | "17-convolutional\n", 158 | "sub_7/y\n", 159 | "sub_7\n", 160 | "truediv_7/y\n", 161 | "truediv_7\n", 162 | "mul_14/y\n", 163 | "mul_14\n", 164 | "BiasAdd_7/bias\n", 165 | "BiasAdd_7\n", 166 | "mul_15/x\n", 167 | "mul_15\n", 168 | "18-leaky\n", 169 | "19-maxpool\n", 170 | "Pad_8/paddings\n", 171 | "Pad_8\n", 172 | "20-convolutional/filter\n", 173 | "20-convolutional\n", 174 | "sub_8/y\n", 175 | "sub_8\n", 176 | "truediv_8/y\n", 177 | "truediv_8\n", 178 | "mul_16/y\n", 179 | "mul_16\n", 180 | "BiasAdd_8/bias\n", 181 | "BiasAdd_8\n", 182 | "mul_17/x\n", 183 | "mul_17\n", 184 | "21-leaky\n", 185 | "Pad_9/paddings\n", 186 | "Pad_9\n", 187 | "22-convolutional/filter\n", 188 | "22-convolutional\n", 189 | "sub_9/y\n", 190 | "sub_9\n", 191 | "truediv_9/y\n", 192 | "truediv_9\n", 193 | "mul_18/y\n", 194 | "mul_18\n", 195 | "BiasAdd_9/bias\n", 196 | "BiasAdd_9\n", 197 | "mul_19/x\n", 198 | "mul_19\n", 199 | "23-leaky\n", 200 | "Pad_10/paddings\n", 201 | "Pad_10\n", 202 | "24-convolutional/filter\n", 203 | "24-convolutional\n", 204 | "sub_10/y\n", 205 | "sub_10\n", 206 | "truediv_10/y\n", 207 | "truediv_10\n", 208 | "mul_20/y\n", 209 | "mul_20\n", 210 | "BiasAdd_10/bias\n", 211 | "BiasAdd_10\n", 212 | "mul_21/x\n", 213 | "mul_21\n", 214 | "25-leaky\n", 215 | "Pad_11/paddings\n", 216 | "Pad_11\n", 217 | "26-convolutional/filter\n", 218 | "26-convolutional\n", 219 | "sub_11/y\n", 220 | "sub_11\n", 221 | "truediv_11/y\n", 222 | "truediv_11\n", 223 | "mul_22/y\n", 224 | "mul_22\n", 225 | "BiasAdd_11/bias\n", 226 | "BiasAdd_11\n", 227 | "mul_23/x\n", 228 | "mul_23\n", 229 | "27-leaky\n", 230 | "Pad_12/paddings\n", 231 | "Pad_12\n", 232 | "28-convolutional/filter\n", 233 | "28-convolutional\n", 234 | "sub_12/y\n", 235 | "sub_12\n", 236 | "truediv_12/y\n", 237 | "truediv_12\n", 238 | "mul_24/y\n", 239 | "mul_24\n", 240 | "BiasAdd_12/bias\n", 241 | "BiasAdd_12\n", 242 | "mul_25/x\n", 243 | "mul_25\n", 244 | "29-leaky\n", 245 | "30-maxpool\n", 246 | "Pad_13/paddings\n", 247 | "Pad_13\n", 248 | "31-convolutional/filter\n", 249 | "31-convolutional\n", 250 | "sub_13/y\n", 251 | "sub_13\n", 252 | "truediv_13/y\n", 253 | "truediv_13\n", 254 | "mul_26/y\n", 255 | "mul_26\n", 256 | "BiasAdd_13/bias\n", 257 | "BiasAdd_13\n", 258 | "mul_27/x\n", 259 | "mul_27\n", 260 | "32-leaky\n", 261 | "Pad_14/paddings\n", 262 | "Pad_14\n", 263 | "33-convolutional/filter\n", 264 | "33-convolutional\n", 265 | "sub_14/y\n", 266 | "sub_14\n", 267 | "truediv_14/y\n", 268 | "truediv_14\n", 269 | "mul_28/y\n", 270 | "mul_28\n", 271 | "BiasAdd_14/bias\n", 272 | "BiasAdd_14\n", 273 | "mul_29/x\n", 274 | "mul_29\n", 275 | "34-leaky\n", 276 | "Pad_15/paddings\n", 277 | "Pad_15\n", 278 | "35-convolutional/filter\n", 279 | "35-convolutional\n", 280 | "sub_15/y\n", 281 | "sub_15\n", 282 | "truediv_15/y\n", 283 | "truediv_15\n", 284 | "mul_30/y\n", 285 | "mul_30\n", 286 | "BiasAdd_15/bias\n", 287 | "BiasAdd_15\n", 288 | "mul_31/x\n", 289 | "mul_31\n", 290 | "36-leaky\n", 291 | "Pad_16/paddings\n", 292 | "Pad_16\n", 293 | "37-convolutional/filter\n", 294 | "37-convolutional\n", 295 | "sub_16/y\n", 296 | "sub_16\n", 297 | "truediv_16/y\n", 298 | "truediv_16\n", 299 | "mul_32/y\n", 300 | "mul_32\n", 301 | "BiasAdd_16/bias\n", 302 | "BiasAdd_16\n", 303 | "mul_33/x\n", 304 | "mul_33\n", 305 | "38-leaky\n", 306 | "Pad_17/paddings\n", 307 | "Pad_17\n", 308 | "39-convolutional/filter\n", 309 | "39-convolutional\n", 310 | "sub_17/y\n", 311 | "sub_17\n", 312 | "truediv_17/y\n", 313 | "truediv_17\n", 314 | "mul_34/y\n", 315 | "mul_34\n", 316 | "BiasAdd_17/bias\n", 317 | "BiasAdd_17\n", 318 | "mul_35/x\n", 319 | "mul_35\n", 320 | "40-leaky\n", 321 | "Pad_18/paddings\n", 322 | "Pad_18\n", 323 | "41-convolutional/filter\n", 324 | "41-convolutional\n", 325 | "sub_18/y\n", 326 | "sub_18\n", 327 | "truediv_18/y\n", 328 | "truediv_18\n", 329 | "mul_36/y\n", 330 | "mul_36\n", 331 | "BiasAdd_18/bias\n", 332 | "BiasAdd_18\n", 333 | "mul_37/x\n", 334 | "mul_37\n", 335 | "42-leaky\n", 336 | "Pad_19/paddings\n", 337 | "Pad_19\n", 338 | "43-convolutional/filter\n", 339 | "43-convolutional\n", 340 | "sub_19/y\n", 341 | "sub_19\n", 342 | "truediv_19/y\n", 343 | "truediv_19\n", 344 | "mul_38/y\n", 345 | "mul_38\n", 346 | "BiasAdd_19/bias\n", 347 | "BiasAdd_19\n", 348 | "mul_39/x\n", 349 | "mul_39\n", 350 | "44-leaky\n", 351 | "concat/concat_dim\n", 352 | "concat\n", 353 | "Pad_20/paddings\n", 354 | "Pad_20\n", 355 | "46-convolutional/filter\n", 356 | "46-convolutional\n", 357 | "sub_20/y\n", 358 | "sub_20\n", 359 | "truediv_20/y\n", 360 | "truediv_20\n", 361 | "mul_40/y\n", 362 | "mul_40\n", 363 | "BiasAdd_20/bias\n", 364 | "BiasAdd_20\n", 365 | "mul_41/x\n", 366 | "mul_41\n", 367 | "47-leaky\n", 368 | "ExtractImagePatches\n", 369 | "concat_1/axis\n", 370 | "concat_1\n", 371 | "Pad_21/paddings\n", 372 | "Pad_21\n", 373 | "50-convolutional/filter\n", 374 | "50-convolutional\n", 375 | "sub_21/y\n", 376 | "sub_21\n", 377 | "truediv_21/y\n", 378 | "truediv_21\n", 379 | "mul_42/y\n", 380 | "mul_42\n", 381 | "BiasAdd_21/bias\n", 382 | "BiasAdd_21\n", 383 | "mul_43/x\n", 384 | "mul_43\n", 385 | "51-leaky\n", 386 | "Pad_22/paddings\n", 387 | "Pad_22\n", 388 | "52-convolutional/filter\n", 389 | "52-convolutional\n", 390 | "BiasAdd_22/bias\n", 391 | "BiasAdd_22\n", 392 | "output\n", 393 | "init\n" 394 | ] 395 | } 396 | ], 397 | "source": [ 398 | "def load_graph(frozen_graph_filename):\n", 399 | " # We load the protobuf file from the disk and parse it to retrieve the \n", 400 | " # unserialized graph_def\n", 401 | " with tf.gfile.GFile(frozen_graph_filename, \"rb\") as f:\n", 402 | " graph_def = tf.GraphDef()\n", 403 | " graph_def.ParseFromString(f.read())\n", 404 | "\n", 405 | " # Then, we import the graph_def into a new Graph and return it \n", 406 | " with tf.Graph().as_default() as graph:\n", 407 | " tf.import_graph_def(graph_def, name=\"\")\n", 408 | " return graph\n", 409 | "\n", 410 | "graph = load_graph('yolo.pb')\n", 411 | "for op in graph.get_operations(): \n", 412 | " print (op.name)" 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": {}, 418 | "source": [ 419 | "## Convert to mlmodel format\n", 420 | "\n", 421 | "In the previose step, we know the output and input node names. And we can also get the input shape size from the cfg file. We specify these in the convert procedure and save the mlmodel file as `yolo.mlmodel` ." 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 14, 427 | "metadata": {}, 428 | "outputs": [ 429 | { 430 | "name": "stdout", 431 | "output_type": "stream", 432 | "text": [ 433 | "(80, ' classes')\n", 434 | "Shapes not found for 189 tensors. Executing graph to determine shapes. \n", 435 | "Automatic shape interpretation succeeded for input blob input:0\n", 436 | "161/349: Converting op name: input ( type: Placeholder )\n", 437 | "Skipping name of placeholder\n", 438 | "162/349: Converting op name: Pad ( type: Pad )\n", 439 | "163/349: Converting op name: 0-convolutional ( type: Conv2D )\n", 440 | "164/349: Converting op name: sub ( type: Sub )\n", 441 | "165/349: Converting op name: truediv ( type: RealDiv )\n", 442 | "166/349: Converting op name: mul ( type: Mul )\n", 443 | "167/349: Converting op name: BiasAdd ( type: BiasAdd )\n", 444 | "168/349: Converting op name: mul_1 ( type: Mul )\n", 445 | "169/349: Converting op name: 1-leaky ( type: Maximum )\n", 446 | "170/349: Converting op name: 2-maxpool ( type: MaxPool )\n", 447 | "171/349: Converting op name: Pad_1 ( type: Pad )\n", 448 | "172/349: Converting op name: 3-convolutional ( type: Conv2D )\n", 449 | "173/349: Converting op name: sub_1 ( type: Sub )\n", 450 | "174/349: Converting op name: truediv_1 ( type: RealDiv )\n", 451 | "175/349: Converting op name: mul_2 ( type: Mul )\n", 452 | "176/349: Converting op name: BiasAdd_1 ( type: BiasAdd )\n", 453 | "177/349: Converting op name: mul_3 ( type: Mul )\n", 454 | "178/349: Converting op name: 4-leaky ( type: Maximum )\n", 455 | "179/349: Converting op name: 5-maxpool ( type: MaxPool )\n", 456 | "180/349: Converting op name: Pad_2 ( type: Pad )\n", 457 | "181/349: Converting op name: 6-convolutional ( type: Conv2D )\n", 458 | "182/349: Converting op name: sub_2 ( type: Sub )\n", 459 | "183/349: Converting op name: truediv_2 ( type: RealDiv )\n", 460 | "184/349: Converting op name: mul_4 ( type: Mul )\n", 461 | "185/349: Converting op name: BiasAdd_2 ( type: BiasAdd )\n", 462 | "186/349: Converting op name: mul_5 ( type: Mul )\n", 463 | "187/349: Converting op name: 7-leaky ( type: Maximum )\n", 464 | "188/349: Converting op name: Pad_3 ( type: Pad )\n", 465 | "189/349: Converting op name: 8-convolutional ( type: Conv2D )\n", 466 | "190/349: Converting op name: sub_3 ( type: Sub )\n", 467 | "191/349: Converting op name: truediv_3 ( type: RealDiv )\n", 468 | "192/349: Converting op name: mul_6 ( type: Mul )\n", 469 | "193/349: Converting op name: BiasAdd_3 ( type: BiasAdd )\n", 470 | "194/349: Converting op name: mul_7 ( type: Mul )\n", 471 | "195/349: Converting op name: 9-leaky ( type: Maximum )\n", 472 | "196/349: Converting op name: Pad_4 ( type: Pad )\n", 473 | "197/349: Converting op name: 10-convolutional ( type: Conv2D )\n", 474 | "198/349: Converting op name: sub_4 ( type: Sub )\n", 475 | "199/349: Converting op name: truediv_4 ( type: RealDiv )\n", 476 | "200/349: Converting op name: mul_8 ( type: Mul )\n", 477 | "201/349: Converting op name: BiasAdd_4 ( type: BiasAdd )\n", 478 | "202/349: Converting op name: mul_9 ( type: Mul )\n", 479 | "203/349: Converting op name: 11-leaky ( type: Maximum )\n", 480 | "204/349: Converting op name: 12-maxpool ( type: MaxPool )\n", 481 | "205/349: Converting op name: Pad_5 ( type: Pad )\n", 482 | "206/349: Converting op name: 13-convolutional ( type: Conv2D )\n", 483 | "207/349: Converting op name: sub_5 ( type: Sub )\n", 484 | "208/349: Converting op name: truediv_5 ( type: RealDiv )\n", 485 | "209/349: Converting op name: mul_10 ( type: Mul )\n", 486 | "210/349: Converting op name: BiasAdd_5 ( type: BiasAdd )\n", 487 | "211/349: Converting op name: mul_11 ( type: Mul )\n", 488 | "212/349: Converting op name: 14-leaky ( type: Maximum )\n", 489 | "213/349: Converting op name: Pad_6 ( type: Pad )\n", 490 | "214/349: Converting op name: 15-convolutional ( type: Conv2D )\n", 491 | "215/349: Converting op name: sub_6 ( type: Sub )\n", 492 | "216/349: Converting op name: truediv_6 ( type: RealDiv )\n", 493 | "217/349: Converting op name: mul_12 ( type: Mul )\n", 494 | "218/349: Converting op name: BiasAdd_6 ( type: BiasAdd )\n", 495 | "219/349: Converting op name: mul_13 ( type: Mul )\n", 496 | "220/349: Converting op name: 16-leaky ( type: Maximum )\n", 497 | "221/349: Converting op name: Pad_7 ( type: Pad )\n", 498 | "222/349: Converting op name: 17-convolutional ( type: Conv2D )\n", 499 | "223/349: Converting op name: sub_7 ( type: Sub )\n", 500 | "224/349: Converting op name: truediv_7 ( type: RealDiv )\n", 501 | "225/349: Converting op name: mul_14 ( type: Mul )\n", 502 | "226/349: Converting op name: BiasAdd_7 ( type: BiasAdd )\n", 503 | "227/349: Converting op name: mul_15 ( type: Mul )\n", 504 | "228/349: Converting op name: 18-leaky ( type: Maximum )\n", 505 | "229/349: Converting op name: 19-maxpool ( type: MaxPool )\n", 506 | "230/349: Converting op name: Pad_8 ( type: Pad )\n", 507 | "231/349: Converting op name: 20-convolutional ( type: Conv2D )\n", 508 | "232/349: Converting op name: sub_8 ( type: Sub )\n", 509 | "233/349: Converting op name: truediv_8 ( type: RealDiv )\n", 510 | "234/349: Converting op name: mul_16 ( type: Mul )\n", 511 | "235/349: Converting op name: BiasAdd_8 ( type: BiasAdd )\n", 512 | "236/349: Converting op name: mul_17 ( type: Mul )\n", 513 | "237/349: Converting op name: 21-leaky ( type: Maximum )\n", 514 | "238/349: Converting op name: Pad_9 ( type: Pad )\n", 515 | "239/349: Converting op name: 22-convolutional ( type: Conv2D )\n", 516 | "240/349: Converting op name: sub_9 ( type: Sub )\n", 517 | "241/349: Converting op name: truediv_9 ( type: RealDiv )\n", 518 | "242/349: Converting op name: mul_18 ( type: Mul )\n", 519 | "243/349: Converting op name: BiasAdd_9 ( type: BiasAdd )\n", 520 | "244/349: Converting op name: mul_19 ( type: Mul )\n", 521 | "245/349: Converting op name: 23-leaky ( type: Maximum )\n", 522 | "246/349: Converting op name: Pad_10 ( type: Pad )\n", 523 | "247/349: Converting op name: 24-convolutional ( type: Conv2D )\n", 524 | "248/349: Converting op name: sub_10 ( type: Sub )\n", 525 | "249/349: Converting op name: truediv_10 ( type: RealDiv )\n", 526 | "250/349: Converting op name: mul_20 ( type: Mul )\n", 527 | "251/349: Converting op name: BiasAdd_10 ( type: BiasAdd )\n", 528 | "252/349: Converting op name: mul_21 ( type: Mul )\n", 529 | "253/349: Converting op name: 25-leaky ( type: Maximum )\n", 530 | "254/349: Converting op name: Pad_11 ( type: Pad )\n", 531 | "255/349: Converting op name: 26-convolutional ( type: Conv2D )\n", 532 | "256/349: Converting op name: sub_11 ( type: Sub )\n", 533 | "257/349: Converting op name: truediv_11 ( type: RealDiv )\n", 534 | "258/349: Converting op name: mul_22 ( type: Mul )\n", 535 | "259/349: Converting op name: BiasAdd_11 ( type: BiasAdd )\n", 536 | "260/349: Converting op name: mul_23 ( type: Mul )\n", 537 | "261/349: Converting op name: 27-leaky ( type: Maximum )\n", 538 | "262/349: Converting op name: Pad_12 ( type: Pad )\n", 539 | "263/349: Converting op name: 28-convolutional ( type: Conv2D )\n", 540 | "264/349: Converting op name: sub_12 ( type: Sub )\n", 541 | "265/349: Converting op name: truediv_12 ( type: RealDiv )\n", 542 | "266/349: Converting op name: mul_24 ( type: Mul )\n", 543 | "267/349: Converting op name: BiasAdd_12 ( type: BiasAdd )\n", 544 | "268/349: Converting op name: mul_25 ( type: Mul )\n", 545 | "269/349: Converting op name: 29-leaky ( type: Maximum )\n", 546 | "270/349: Converting op name: concat ( type: Identity )\n", 547 | "271/349: Converting op name: Pad_20 ( type: Pad )\n", 548 | "272/349: Converting op name: 46-convolutional ( type: Conv2D )\n", 549 | "273/349: Converting op name: sub_20 ( type: Sub )\n", 550 | "274/349: Converting op name: truediv_20 ( type: RealDiv )\n", 551 | "275/349: Converting op name: mul_40 ( type: Mul )\n", 552 | "276/349: Converting op name: BiasAdd_20 ( type: BiasAdd )\n", 553 | "277/349: Converting op name: mul_41 ( type: Mul )\n", 554 | "278/349: Converting op name: 47-leaky ( type: Maximum )\n", 555 | "279/349: Converting op name: ExtractImagePatches ( type: ExtractImagePatches )\n", 556 | "280/349: Converting op name: 30-maxpool ( type: MaxPool )\n", 557 | "281/349: Converting op name: Pad_13 ( type: Pad )\n", 558 | "282/349: Converting op name: 31-convolutional ( type: Conv2D )\n", 559 | "283/349: Converting op name: sub_13 ( type: Sub )\n", 560 | "284/349: Converting op name: truediv_13 ( type: RealDiv )\n", 561 | "285/349: Converting op name: mul_26 ( type: Mul )\n", 562 | "286/349: Converting op name: BiasAdd_13 ( type: BiasAdd )\n", 563 | "287/349: Converting op name: mul_27 ( type: Mul )\n", 564 | "288/349: Converting op name: 32-leaky ( type: Maximum )\n", 565 | "289/349: Converting op name: Pad_14 ( type: Pad )\n", 566 | "290/349: Converting op name: 33-convolutional ( type: Conv2D )\n", 567 | "291/349: Converting op name: sub_14 ( type: Sub )\n", 568 | "292/349: Converting op name: truediv_14 ( type: RealDiv )\n", 569 | "293/349: Converting op name: mul_28 ( type: Mul )\n", 570 | "294/349: Converting op name: BiasAdd_14 ( type: BiasAdd )\n", 571 | "295/349: Converting op name: mul_29 ( type: Mul )\n", 572 | "296/349: Converting op name: 34-leaky ( type: Maximum )\n", 573 | "297/349: Converting op name: Pad_15 ( type: Pad )\n", 574 | "298/349: Converting op name: 35-convolutional ( type: Conv2D )\n", 575 | "299/349: Converting op name: sub_15 ( type: Sub )\n", 576 | "300/349: Converting op name: truediv_15 ( type: RealDiv )\n", 577 | "301/349: Converting op name: mul_30 ( type: Mul )\n", 578 | "302/349: Converting op name: BiasAdd_15 ( type: BiasAdd )\n", 579 | "303/349: Converting op name: mul_31 ( type: Mul )\n", 580 | "304/349: Converting op name: 36-leaky ( type: Maximum )\n", 581 | "305/349: Converting op name: Pad_16 ( type: Pad )\n", 582 | "306/349: Converting op name: 37-convolutional ( type: Conv2D )\n" 583 | ] 584 | }, 585 | { 586 | "name": "stdout", 587 | "output_type": "stream", 588 | "text": [ 589 | "307/349: Converting op name: sub_16 ( type: Sub )\n", 590 | "308/349: Converting op name: truediv_16 ( type: RealDiv )\n", 591 | "309/349: Converting op name: mul_32 ( type: Mul )\n", 592 | "310/349: Converting op name: BiasAdd_16 ( type: BiasAdd )\n", 593 | "311/349: Converting op name: mul_33 ( type: Mul )\n", 594 | "312/349: Converting op name: 38-leaky ( type: Maximum )\n", 595 | "313/349: Converting op name: Pad_17 ( type: Pad )\n", 596 | "314/349: Converting op name: 39-convolutional ( type: Conv2D )\n", 597 | "315/349: Converting op name: sub_17 ( type: Sub )\n", 598 | "316/349: Converting op name: truediv_17 ( type: RealDiv )\n", 599 | "317/349: Converting op name: mul_34 ( type: Mul )\n", 600 | "318/349: Converting op name: BiasAdd_17 ( type: BiasAdd )\n", 601 | "319/349: Converting op name: mul_35 ( type: Mul )\n", 602 | "320/349: Converting op name: 40-leaky ( type: Maximum )\n", 603 | "321/349: Converting op name: Pad_18 ( type: Pad )\n", 604 | "322/349: Converting op name: 41-convolutional ( type: Conv2D )\n", 605 | "323/349: Converting op name: sub_18 ( type: Sub )\n", 606 | "324/349: Converting op name: truediv_18 ( type: RealDiv )\n", 607 | "325/349: Converting op name: mul_36 ( type: Mul )\n", 608 | "326/349: Converting op name: BiasAdd_18 ( type: BiasAdd )\n", 609 | "327/349: Converting op name: mul_37 ( type: Mul )\n", 610 | "328/349: Converting op name: 42-leaky ( type: Maximum )\n", 611 | "329/349: Converting op name: Pad_19 ( type: Pad )\n", 612 | "330/349: Converting op name: 43-convolutional ( type: Conv2D )\n", 613 | "331/349: Converting op name: sub_19 ( type: Sub )\n", 614 | "332/349: Converting op name: truediv_19 ( type: RealDiv )\n", 615 | "333/349: Converting op name: mul_38 ( type: Mul )\n", 616 | "334/349: Converting op name: BiasAdd_19 ( type: BiasAdd )\n", 617 | "335/349: Converting op name: mul_39 ( type: Mul )\n", 618 | "336/349: Converting op name: 44-leaky ( type: Maximum )\n", 619 | "337/349: Converting op name: concat_1 ( type: ConcatV2 )\n", 620 | "338/349: Converting op name: Pad_21 ( type: Pad )\n", 621 | "339/349: Converting op name: 50-convolutional ( type: Conv2D )\n", 622 | "340/349: Converting op name: sub_21 ( type: Sub )\n", 623 | "341/349: Converting op name: truediv_21 ( type: RealDiv )\n", 624 | "342/349: Converting op name: mul_42 ( type: Mul )\n", 625 | "343/349: Converting op name: BiasAdd_21 ( type: BiasAdd )\n", 626 | "344/349: Converting op name: mul_43 ( type: Mul )\n", 627 | "345/349: Converting op name: 51-leaky ( type: Maximum )\n", 628 | "346/349: Converting op name: Pad_22 ( type: Pad )\n", 629 | "347/349: Converting op name: 52-convolutional ( type: Conv2D )\n", 630 | "348/349: Converting op name: BiasAdd_22 ( type: BiasAdd )\n", 631 | "349/349: Converting op name: output ( type: Identity )\n", 632 | "\n", 633 | " Core ML model generated. Saved at location: yolo.mlmodel \n", 634 | "\n", 635 | "Core ML input(s): \n", 636 | " [name: \"input__0\"\n", 637 | "type {\n", 638 | " imageType {\n", 639 | " width: 608\n", 640 | " height: 608\n", 641 | " colorSpace: BGR\n", 642 | " }\n", 643 | "}\n", 644 | "]\n", 645 | "Core ML output(s): \n", 646 | " [name: \"output__0\"\n", 647 | "type {\n", 648 | " multiArrayType {\n", 649 | " shape: 425\n", 650 | " shape: 19\n", 651 | " shape: 19\n", 652 | " dataType: DOUBLE\n", 653 | " }\n", 654 | "}\n", 655 | "]\n" 656 | ] 657 | } 658 | ], 659 | "source": [ 660 | "coreml_model = tf_converter.convert(tf_model_path = 'yolo.pb',\n", 661 | " mlmodel_path = 'yolo.mlmodel',\n", 662 | " output_feature_names = ['output:0'], # the output node name we get from the previouse step\n", 663 | " image_input_names= ['input:0'], # CoreML allows image as the input, the only thing we need to do is to set which node is the image input node \n", 664 | " input_name_shape_dict = {'input:0' : [1, 608, 608, 3]}, # the input node name we get from the previous step, and check the cfg file to know the exact input shape size\n", 665 | " is_bgr = True, # the channel order is by BGR instead of RGB\n", 666 | " image_scale = 1 / 255.0)\t # the weights is already normalized in the range from 0 to 1" 667 | ] 668 | }, 669 | { 670 | "cell_type": "code", 671 | "execution_count": null, 672 | "metadata": { 673 | "collapsed": true 674 | }, 675 | "outputs": [], 676 | "source": [] 677 | } 678 | ], 679 | "metadata": { 680 | "kernelspec": { 681 | "display_name": "Python 2", 682 | "language": "python", 683 | "name": "python2" 684 | }, 685 | "language_info": { 686 | "codemirror_mode": { 687 | "name": "ipython", 688 | "version": 2 689 | }, 690 | "file_extension": ".py", 691 | "mimetype": "text/x-python", 692 | "name": "python", 693 | "nbconvert_exporter": "python", 694 | "pygments_lexer": "ipython2", 695 | "version": "2.7.14" 696 | } 697 | }, 698 | "nbformat": 4, 699 | "nbformat_minor": 2 700 | } 701 | -------------------------------------------------------------------------------- /Converter/README.markdown: -------------------------------------------------------------------------------- 1 | If you want to convert the pre-trained models by yourself. Here is the instructions: 2 | 3 | 1. Download the pre-trained darknet models. 4 | 5 | `% curl https://pjreddie.com/media/files/yolo.weights > yolo.weights` 6 | 7 | 2. Also download the configuration file in [darknet's repo](https://github.com/pjreddie/darknet/blob/master/cfg/yolo.cfg). 8 | 9 | 3. Build and install [darkflow](https://github.com/thtrieu/darkflow). And use the following command to convert the weights to tensorflow pb file: 10 | 11 | `% flow --model yolo.cfg --load yolo.weights --savepb` 12 | 13 | 4. Install [tf-coreml](https://github.com/tf-coreml/tf-coreml). 14 | 15 | 5. Launch Jupyter notebook and open _Convert_pb_coreml.ipynb_, and run the script. After it finishes you will get yolo.mlmodel. Copy the file to Xcode project. 16 | 17 | -------------------------------------------------------------------------------- /Converter/yolo.meta: -------------------------------------------------------------------------------- 1 | {"net": {"type": "[net]", "batch": 1, "subdivisions": 1, "width": 608, "height": 608, "channels": 3, "momentum": 0.9, "decay": 0.0005, "angle": 0, "saturation": 1.5, "exposure": 1.5, "hue": 0.1, "learning_rate": 0.001, "burn_in": 1000, "max_batches": 500200, "policy": "steps", "steps": "400000,450000", "scales": ".1,.1"}, "type": "[region]", "anchors": [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828], "bias_match": 1, "classes": 80, "coords": 4, "num": 5, "softmax": 1, "jitter": 0.3, "rescore": 1, "object_scale": 5, "noobject_scale": 1, "class_scale": 1, "coord_scale": 1, "absolute": 1, "thresh": 0.6, "random": 1, "model": "../../YOLO-CoreML-MPSNNGraph/Convert/yolo.cfg", "inp_size": [608, 608, 3], "out_size": [19, 19, 425], "name": "yolo", "labels": ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"], "colors": [[254.0, 254.0, 254], [248.92, 228.6, 127], [243.84, 203.20000000000002, 0], [238.76, 177.79999999999998, -127], [233.68, 152.4, -254], [228.6, 127.0, 254], [223.52, 101.60000000000001, 127], [218.44, 76.20000000000002, 0], [213.35999999999999, 50.79999999999999, -127], [208.28000000000003, 25.399999999999995, -254], [203.20000000000002, 0.0, 254], [198.12, -25.400000000000023, 127], [193.04, -50.79999999999999, 0], [187.96, -76.20000000000002, -127], [182.88, -101.59999999999998, -254], [177.79999999999998, -127.0, 254], [172.71999999999997, -152.40000000000003, 127], [167.64, -177.79999999999998, 0], [162.56, -203.20000000000002, -127], [157.48, -228.59999999999997, -254], [152.4, -254.0, 254], [147.32000000000002, -279.40000000000003, 127], [142.24, -304.80000000000007, 0], [137.16, -330.19999999999993, -127], [132.08, -355.59999999999997, -254], [127.0, 254.0, 254], [121.92, 228.6, 127], [116.83999999999999, 203.20000000000002, 0], [111.75999999999999, 177.79999999999998, -127], [106.68, 152.4, -254], [101.60000000000001, 127.0, 254], [96.52, 101.60000000000001, 127], [91.44, 76.20000000000002, 0], [86.35999999999999, 50.79999999999999, -127], [81.27999999999999, 25.399999999999995, -254], [76.20000000000002, 0.0, 254], [71.12, -25.400000000000023, 127], [66.04, -50.79999999999999, 0], [60.96, -76.20000000000002, -127], [55.879999999999995, -101.59999999999998, -254], [50.79999999999999, -127.0, 254], [45.72000000000001, -152.40000000000003, 127], [40.64000000000001, -177.79999999999998, 0], [35.56, -203.20000000000002, -127], [30.48, -228.59999999999997, -254], [25.399999999999995, -254.0, 254], [20.31999999999999, -279.40000000000003, 127], [15.240000000000013, -304.80000000000007, 0], [10.160000000000009, -330.19999999999993, -127], [5.0800000000000045, -355.59999999999997, -254], [0.0, 254.0, 254], [-5.0800000000000045, 228.6, 127], [-10.160000000000009, 203.20000000000002, 0], [-15.240000000000013, 177.79999999999998, -127], [-20.320000000000018, 152.4, -254], [-25.400000000000023, 127.0, 254], [-30.480000000000025, 101.60000000000001, 127], [-35.559999999999974, 76.20000000000002, 0], [-40.63999999999998, 50.79999999999999, -127], [-45.719999999999985, 25.399999999999995, -254], [-50.79999999999999, 0.0, 254], [-55.879999999999995, -25.400000000000023, 127], [-60.96, -50.79999999999999, 0], [-66.04, -76.20000000000002, -127], [-71.12, -101.59999999999998, -254], [-76.20000000000002, -127.0, 254], [-81.28000000000002, -152.40000000000003, 127], [-86.36000000000001, -177.79999999999998, 0], [-91.44000000000003, -203.20000000000002, -127], [-96.51999999999997, -228.59999999999997, -254], [-101.59999999999998, -254.0, 254], [-106.67999999999998, -279.40000000000003, 127], [-111.75999999999999, -304.80000000000007, 0], [-116.83999999999999, -330.19999999999993, -127], [-121.92, -355.59999999999997, -254], [-127.0, 254.0, 254], [-132.08, 228.6, 127], [-137.16, 203.20000000000002, 0], [-142.24, 177.79999999999998, -127], [-147.32000000000002, 152.4, -254]]} -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 M.I. Hollemans 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to 5 | deal in the Software without restriction, including without limitation the 6 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | sell copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | # YOLO with Core ML 2 | 3 | This repo was forked and modified from [hollance/YOLO-CoreML-MPSNNGraph](https://github.com/hollance/YOLO-CoreML-MPSNNGraph). Some changes I made: 4 | 5 | 1. Only keep CoreML since that is the only part I am interested at 6 | 2. Use YOLO2 pre-trained model instead of TinyYOLO. YOLO2 pre-trained model provides more classes and more accurate than Tiny-YOLO. It is slower, but it can recognizes more stuff. 7 | 3. Drop yad2k converter. I use darkflow to convert YOLO pre-trained models from darknet format to tensorflow. And use tf-coreml to convert from tensorflow to CoreML. 8 | 9 | 10 | ## About YOLO object detection 11 | 12 | YOLO is an object detection network. It can detect multiple objects in an image and puts bounding boxes around these objects. [Read hollance's blog post about YOLO](http://machinethink.net/blog/object-detection-with-yolo/) to learn more about how it works. 13 | 14 | ![YOLO in action](YOLO.jpg) 15 | 16 | In this repo you'll find: 17 | 18 | - **YOLO-CoreML:** A demo app that runs the YOLO neural network on Core ML. 19 | - **Converter:** The scripts needed to convert the original DarkNet YOLO model to Core ML. 20 | 21 | To run the app: 22 | 23 | 1. execute download.sh to download the pre-trained model 24 | `% sh download.sh` 25 | 2. open the **xcodeproj** file in Xcode 9 and run it on a device with iOS 11 or better installed. 26 | 27 | The reported "elapsed" time is how long it takes the YOLO neural net to process a single image. The FPS is the actual throughput achieved by the app. 28 | 29 | > **NOTE:** Running these kinds of neural networks eats up a lot of battery power. The app can put a limit on the number of times per second it runs the neural net. You can change this in `setUpCamera()` by changing the line `videoCapture.fps = 50` to a smaller number. 30 | 31 | ## Converting the models 32 | 33 | > **NOTE:** You don't need to convert the models yourself. Everything you need to run the demo apps is included in the Xcode projects already. 34 | 35 | If you're interested in how the conversion was done, check the [instructions](Converter/). 36 | 37 | -------------------------------------------------------------------------------- /YOLO-CoreML/YOLO-CoreML.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 48; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | 7BA1C6D01EF27DA000BB25EF /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BC25FB51EF27C0D002ECBBA /* VideoCapture.swift */; }; 11 | 7BA1C6D61EF2861600BB25EF /* BoundingBox.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BA1C6D51EF2861400BB25EF /* BoundingBox.swift */; }; 12 | 7BA1C6DC1EF2B57200BB25EF /* dog416.png in Resources */ = {isa = PBXBuildFile; fileRef = 7BA1C6DB1EF2B56D00BB25EF /* dog416.png */; }; 13 | 7BC25FA21EF1B7D1002ECBBA /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BC25FA11EF1B7D1002ECBBA /* AppDelegate.swift */; }; 14 | 7BC25FA91EF1B7D1002ECBBA /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 7BC25FA81EF1B7D1002ECBBA /* Assets.xcassets */; }; 15 | 8020906C202C8BF30017321C /* yolo.mlmodel in Sources */ = {isa = PBXBuildFile; fileRef = 8020906B202C8BF30017321C /* yolo.mlmodel */; }; 16 | 8020907A202C8C8D0017321C /* YOLO.swift in Sources */ = {isa = PBXBuildFile; fileRef = 80209075202C8C8D0017321C /* YOLO.swift */; }; 17 | 8020907D202C8C8D0017321C /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 80209079202C8C8D0017321C /* ViewController.swift */; }; 18 | 805125AB202EBA8000CF1DD0 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 805125A9202EBA8000CF1DD0 /* Main.storyboard */; }; 19 | 805125AD202EBA8900CF1DD0 /* Helpers.swift in Sources */ = {isa = PBXBuildFile; fileRef = 805125AC202EBA8900CF1DD0 /* Helpers.swift */; }; 20 | /* End PBXBuildFile section */ 21 | 22 | /* Begin PBXFileReference section */ 23 | 7BA1C6D51EF2861400BB25EF /* BoundingBox.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BoundingBox.swift; sourceTree = ""; }; 24 | 7BA1C6DB1EF2B56D00BB25EF /* dog416.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = dog416.png; path = ../../Common/dog416.png; sourceTree = ""; }; 25 | 7BC25F9E1EF1B7D1002ECBBA /* YOLO-CoreML.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "YOLO-CoreML.app"; sourceTree = BUILT_PRODUCTS_DIR; }; 26 | 7BC25FA11EF1B7D1002ECBBA /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = AppDelegate.swift; path = ../../Common/AppDelegate.swift; sourceTree = ""; }; 27 | 7BC25FA81EF1B7D1002ECBBA /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; 28 | 7BC25FAD1EF1B7D1002ECBBA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 29 | 7BC25FB51EF27C0D002ECBBA /* VideoCapture.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = ""; }; 30 | 8020906B202C8BF30017321C /* yolo.mlmodel */ = {isa = PBXFileReference; lastKnownFileType = file.mlmodel; path = yolo.mlmodel; sourceTree = ""; }; 31 | 80209075202C8C8D0017321C /* YOLO.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = YOLO.swift; sourceTree = ""; }; 32 | 80209079202C8C8D0017321C /* ViewController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = ""; }; 33 | 805125AA202EBA8000CF1DD0 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = ""; }; 34 | 805125AC202EBA8900CF1DD0 /* Helpers.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Helpers.swift; sourceTree = ""; }; 35 | /* End PBXFileReference section */ 36 | 37 | /* Begin PBXFrameworksBuildPhase section */ 38 | 7BC25F9B1EF1B7D1002ECBBA /* Frameworks */ = { 39 | isa = PBXFrameworksBuildPhase; 40 | buildActionMask = 2147483647; 41 | files = ( 42 | ); 43 | runOnlyForDeploymentPostprocessing = 0; 44 | }; 45 | /* End PBXFrameworksBuildPhase section */ 46 | 47 | /* Begin PBXGroup section */ 48 | 7BC25F951EF1B7D1002ECBBA = { 49 | isa = PBXGroup; 50 | children = ( 51 | 7BC25FA01EF1B7D1002ECBBA /* YOLO-CoreML */, 52 | 7BC25F9F1EF1B7D1002ECBBA /* Products */, 53 | ); 54 | sourceTree = ""; 55 | }; 56 | 7BC25F9F1EF1B7D1002ECBBA /* Products */ = { 57 | isa = PBXGroup; 58 | children = ( 59 | 7BC25F9E1EF1B7D1002ECBBA /* YOLO-CoreML.app */, 60 | ); 61 | name = Products; 62 | sourceTree = ""; 63 | }; 64 | 7BC25FA01EF1B7D1002ECBBA /* YOLO-CoreML */ = { 65 | isa = PBXGroup; 66 | children = ( 67 | 805125AE202EBB4300CF1DD0 /* Helpers */, 68 | 8020906B202C8BF30017321C /* yolo.mlmodel */, 69 | 7BC25FA11EF1B7D1002ECBBA /* AppDelegate.swift */, 70 | 7BC25FA81EF1B7D1002ECBBA /* Assets.xcassets */, 71 | 80209079202C8C8D0017321C /* ViewController.swift */, 72 | 805125A9202EBA8000CF1DD0 /* Main.storyboard */, 73 | 80209075202C8C8D0017321C /* YOLO.swift */, 74 | 7BA1C6DB1EF2B56D00BB25EF /* dog416.png */, 75 | 7BC25FAD1EF1B7D1002ECBBA /* Info.plist */, 76 | ); 77 | path = "YOLO-CoreML"; 78 | sourceTree = ""; 79 | }; 80 | 805125AE202EBB4300CF1DD0 /* Helpers */ = { 81 | isa = PBXGroup; 82 | children = ( 83 | 805125AC202EBA8900CF1DD0 /* Helpers.swift */, 84 | 7BA1C6D51EF2861400BB25EF /* BoundingBox.swift */, 85 | 7BC25FB51EF27C0D002ECBBA /* VideoCapture.swift */, 86 | ); 87 | path = Helpers; 88 | sourceTree = ""; 89 | }; 90 | /* End PBXGroup section */ 91 | 92 | /* Begin PBXNativeTarget section */ 93 | 7BC25F9D1EF1B7D1002ECBBA /* YOLO-CoreML */ = { 94 | isa = PBXNativeTarget; 95 | buildConfigurationList = 7BC25FB01EF1B7D1002ECBBA /* Build configuration list for PBXNativeTarget "YOLO-CoreML" */; 96 | buildPhases = ( 97 | 7BC25F9A1EF1B7D1002ECBBA /* Sources */, 98 | 7BC25F9B1EF1B7D1002ECBBA /* Frameworks */, 99 | 7BC25F9C1EF1B7D1002ECBBA /* Resources */, 100 | ); 101 | buildRules = ( 102 | ); 103 | dependencies = ( 104 | ); 105 | name = "YOLO-CoreML"; 106 | productName = "TinyYOLO-CoreML"; 107 | productReference = 7BC25F9E1EF1B7D1002ECBBA /* YOLO-CoreML.app */; 108 | productType = "com.apple.product-type.application"; 109 | }; 110 | /* End PBXNativeTarget section */ 111 | 112 | /* Begin PBXProject section */ 113 | 7BC25F961EF1B7D1002ECBBA /* Project object */ = { 114 | isa = PBXProject; 115 | attributes = { 116 | LastSwiftUpdateCheck = 0900; 117 | LastUpgradeCheck = 0900; 118 | ORGANIZATIONNAME = MachineThink; 119 | TargetAttributes = { 120 | 7BC25F9D1EF1B7D1002ECBBA = { 121 | CreatedOnToolsVersion = 9.0; 122 | }; 123 | }; 124 | }; 125 | buildConfigurationList = 7BC25F991EF1B7D1002ECBBA /* Build configuration list for PBXProject "YOLO-CoreML" */; 126 | compatibilityVersion = "Xcode 8.0"; 127 | developmentRegion = en; 128 | hasScannedForEncodings = 0; 129 | knownRegions = ( 130 | en, 131 | Base, 132 | ); 133 | mainGroup = 7BC25F951EF1B7D1002ECBBA; 134 | productRefGroup = 7BC25F9F1EF1B7D1002ECBBA /* Products */; 135 | projectDirPath = ""; 136 | projectRoot = ""; 137 | targets = ( 138 | 7BC25F9D1EF1B7D1002ECBBA /* YOLO-CoreML */, 139 | ); 140 | }; 141 | /* End PBXProject section */ 142 | 143 | /* Begin PBXResourcesBuildPhase section */ 144 | 7BC25F9C1EF1B7D1002ECBBA /* Resources */ = { 145 | isa = PBXResourcesBuildPhase; 146 | buildActionMask = 2147483647; 147 | files = ( 148 | 805125AB202EBA8000CF1DD0 /* Main.storyboard in Resources */, 149 | 7BA1C6DC1EF2B57200BB25EF /* dog416.png in Resources */, 150 | 7BC25FA91EF1B7D1002ECBBA /* Assets.xcassets in Resources */, 151 | ); 152 | runOnlyForDeploymentPostprocessing = 0; 153 | }; 154 | /* End PBXResourcesBuildPhase section */ 155 | 156 | /* Begin PBXSourcesBuildPhase section */ 157 | 7BC25F9A1EF1B7D1002ECBBA /* Sources */ = { 158 | isa = PBXSourcesBuildPhase; 159 | buildActionMask = 2147483647; 160 | files = ( 161 | 7BA1C6D01EF27DA000BB25EF /* VideoCapture.swift in Sources */, 162 | 805125AD202EBA8900CF1DD0 /* Helpers.swift in Sources */, 163 | 8020907D202C8C8D0017321C /* ViewController.swift in Sources */, 164 | 8020906C202C8BF30017321C /* yolo.mlmodel in Sources */, 165 | 7BA1C6D61EF2861600BB25EF /* BoundingBox.swift in Sources */, 166 | 8020907A202C8C8D0017321C /* YOLO.swift in Sources */, 167 | 7BC25FA21EF1B7D1002ECBBA /* AppDelegate.swift in Sources */, 168 | ); 169 | runOnlyForDeploymentPostprocessing = 0; 170 | }; 171 | /* End PBXSourcesBuildPhase section */ 172 | 173 | /* Begin PBXVariantGroup section */ 174 | 805125A9202EBA8000CF1DD0 /* Main.storyboard */ = { 175 | isa = PBXVariantGroup; 176 | children = ( 177 | 805125AA202EBA8000CF1DD0 /* Base */, 178 | ); 179 | name = Main.storyboard; 180 | sourceTree = ""; 181 | }; 182 | /* End PBXVariantGroup section */ 183 | 184 | /* Begin XCBuildConfiguration section */ 185 | 7BC25FAE1EF1B7D1002ECBBA /* Debug */ = { 186 | isa = XCBuildConfiguration; 187 | buildSettings = { 188 | ALWAYS_SEARCH_USER_PATHS = NO; 189 | CLANG_ANALYZER_NONNULL = YES; 190 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 191 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; 192 | CLANG_CXX_LIBRARY = "libc++"; 193 | CLANG_ENABLE_MODULES = YES; 194 | CLANG_ENABLE_OBJC_ARC = YES; 195 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 196 | CLANG_WARN_BOOL_CONVERSION = YES; 197 | CLANG_WARN_COMMA = YES; 198 | CLANG_WARN_CONSTANT_CONVERSION = YES; 199 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 200 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 201 | CLANG_WARN_EMPTY_BODY = YES; 202 | CLANG_WARN_ENUM_CONVERSION = YES; 203 | CLANG_WARN_INFINITE_RECURSION = YES; 204 | CLANG_WARN_INT_CONVERSION = YES; 205 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 206 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 207 | CLANG_WARN_STRICT_PROTOTYPES = YES; 208 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 209 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 210 | CLANG_WARN_UNREACHABLE_CODE = YES; 211 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 212 | CODE_SIGN_IDENTITY = "iPhone Developer"; 213 | COPY_PHASE_STRIP = NO; 214 | DEBUG_INFORMATION_FORMAT = dwarf; 215 | ENABLE_STRICT_OBJC_MSGSEND = YES; 216 | ENABLE_TESTABILITY = YES; 217 | GCC_C_LANGUAGE_STANDARD = gnu11; 218 | GCC_DYNAMIC_NO_PIC = NO; 219 | GCC_NO_COMMON_BLOCKS = YES; 220 | GCC_OPTIMIZATION_LEVEL = 0; 221 | GCC_PREPROCESSOR_DEFINITIONS = ( 222 | "DEBUG=1", 223 | "$(inherited)", 224 | ); 225 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 226 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 227 | GCC_WARN_UNDECLARED_SELECTOR = YES; 228 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 229 | GCC_WARN_UNUSED_FUNCTION = YES; 230 | GCC_WARN_UNUSED_VARIABLE = YES; 231 | IPHONEOS_DEPLOYMENT_TARGET = 11.0; 232 | MTL_ENABLE_DEBUG_INFO = YES; 233 | ONLY_ACTIVE_ARCH = YES; 234 | SDKROOT = iphoneos; 235 | SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; 236 | SWIFT_OPTIMIZATION_LEVEL = "-Onone"; 237 | }; 238 | name = Debug; 239 | }; 240 | 7BC25FAF1EF1B7D1002ECBBA /* Release */ = { 241 | isa = XCBuildConfiguration; 242 | buildSettings = { 243 | ALWAYS_SEARCH_USER_PATHS = NO; 244 | CLANG_ANALYZER_NONNULL = YES; 245 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 246 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; 247 | CLANG_CXX_LIBRARY = "libc++"; 248 | CLANG_ENABLE_MODULES = YES; 249 | CLANG_ENABLE_OBJC_ARC = YES; 250 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 251 | CLANG_WARN_BOOL_CONVERSION = YES; 252 | CLANG_WARN_COMMA = YES; 253 | CLANG_WARN_CONSTANT_CONVERSION = YES; 254 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 255 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 256 | CLANG_WARN_EMPTY_BODY = YES; 257 | CLANG_WARN_ENUM_CONVERSION = YES; 258 | CLANG_WARN_INFINITE_RECURSION = YES; 259 | CLANG_WARN_INT_CONVERSION = YES; 260 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 261 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 262 | CLANG_WARN_STRICT_PROTOTYPES = YES; 263 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 264 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 265 | CLANG_WARN_UNREACHABLE_CODE = YES; 266 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 267 | CODE_SIGN_IDENTITY = "iPhone Developer"; 268 | COPY_PHASE_STRIP = NO; 269 | DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; 270 | ENABLE_NS_ASSERTIONS = NO; 271 | ENABLE_STRICT_OBJC_MSGSEND = YES; 272 | GCC_C_LANGUAGE_STANDARD = gnu11; 273 | GCC_NO_COMMON_BLOCKS = YES; 274 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 275 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 276 | GCC_WARN_UNDECLARED_SELECTOR = YES; 277 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 278 | GCC_WARN_UNUSED_FUNCTION = YES; 279 | GCC_WARN_UNUSED_VARIABLE = YES; 280 | IPHONEOS_DEPLOYMENT_TARGET = 11.0; 281 | MTL_ENABLE_DEBUG_INFO = NO; 282 | SDKROOT = iphoneos; 283 | SWIFT_OPTIMIZATION_LEVEL = "-Owholemodule"; 284 | VALIDATE_PRODUCT = YES; 285 | }; 286 | name = Release; 287 | }; 288 | 7BC25FB11EF1B7D1002ECBBA /* Debug */ = { 289 | isa = XCBuildConfiguration; 290 | buildSettings = { 291 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 292 | DEVELOPMENT_TEAM = B89DDD6AUG; 293 | GCC_OPTIMIZATION_LEVEL = s; 294 | INFOPLIST_FILE = "YOLO-CoreML/Info.plist"; 295 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; 296 | PRODUCT_BUNDLE_IDENTIFIER = "cc.syshen.YOLO-CoreML"; 297 | PRODUCT_NAME = "$(TARGET_NAME)"; 298 | SWIFT_OPTIMIZATION_LEVEL = "-Owholemodule"; 299 | SWIFT_VERSION = 4.0; 300 | TARGETED_DEVICE_FAMILY = "1,2"; 301 | }; 302 | name = Debug; 303 | }; 304 | 7BC25FB21EF1B7D1002ECBBA /* Release */ = { 305 | isa = XCBuildConfiguration; 306 | buildSettings = { 307 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 308 | DEVELOPMENT_TEAM = B89DDD6AUG; 309 | INFOPLIST_FILE = "YOLO-CoreML/Info.plist"; 310 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; 311 | PRODUCT_BUNDLE_IDENTIFIER = "cc.syshen.YOLO-CoreML"; 312 | PRODUCT_NAME = "$(TARGET_NAME)"; 313 | SWIFT_VERSION = 4.0; 314 | TARGETED_DEVICE_FAMILY = "1,2"; 315 | }; 316 | name = Release; 317 | }; 318 | /* End XCBuildConfiguration section */ 319 | 320 | /* Begin XCConfigurationList section */ 321 | 7BC25F991EF1B7D1002ECBBA /* Build configuration list for PBXProject "YOLO-CoreML" */ = { 322 | isa = XCConfigurationList; 323 | buildConfigurations = ( 324 | 7BC25FAE1EF1B7D1002ECBBA /* Debug */, 325 | 7BC25FAF1EF1B7D1002ECBBA /* Release */, 326 | ); 327 | defaultConfigurationIsVisible = 0; 328 | defaultConfigurationName = Release; 329 | }; 330 | 7BC25FB01EF1B7D1002ECBBA /* Build configuration list for PBXNativeTarget "YOLO-CoreML" */ = { 331 | isa = XCConfigurationList; 332 | buildConfigurations = ( 333 | 7BC25FB11EF1B7D1002ECBBA /* Debug */, 334 | 7BC25FB21EF1B7D1002ECBBA /* Release */, 335 | ); 336 | defaultConfigurationIsVisible = 0; 337 | defaultConfigurationName = Release; 338 | }; 339 | /* End XCConfigurationList section */ 340 | }; 341 | rootObject = 7BC25F961EF1B7D1002ECBBA /* Project object */; 342 | } 343 | -------------------------------------------------------------------------------- /YOLO-CoreML/YOLO-CoreML.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /YOLO-CoreML/YOLO-CoreML/Assets.xcassets/AppIcon.appiconset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "idiom" : "iphone", 5 | "size" : "20x20", 6 | "scale" : "2x" 7 | }, 8 | { 9 | "idiom" : "iphone", 10 | "size" : "20x20", 11 | "scale" : "3x" 12 | }, 13 | { 14 | "idiom" : "iphone", 15 | "size" : "29x29", 16 | "scale" : "2x" 17 | }, 18 | { 19 | "idiom" : "iphone", 20 | "size" : "29x29", 21 | "scale" : "3x" 22 | }, 23 | { 24 | "idiom" : "iphone", 25 | "size" : "40x40", 26 | "scale" : "2x" 27 | }, 28 | { 29 | "idiom" : "iphone", 30 | "size" : "40x40", 31 | "scale" : "3x" 32 | }, 33 | { 34 | "idiom" : "iphone", 35 | "size" : "60x60", 36 | "scale" : "2x" 37 | }, 38 | { 39 | "idiom" : "iphone", 40 | "size" : "60x60", 41 | "scale" : "3x" 42 | }, 43 | { 44 | "idiom" : "ipad", 45 | "size" : "20x20", 46 | "scale" : "1x" 47 | }, 48 | { 49 | "idiom" : "ipad", 50 | "size" : "20x20", 51 | "scale" : "2x" 52 | }, 53 | { 54 | "idiom" : "ipad", 55 | "size" : "29x29", 56 | "scale" : "1x" 57 | }, 58 | { 59 | "idiom" : "ipad", 60 | "size" : "29x29", 61 | "scale" : "2x" 62 | }, 63 | { 64 | "idiom" : "ipad", 65 | "size" : "40x40", 66 | "scale" : "1x" 67 | }, 68 | { 69 | "idiom" : "ipad", 70 | "size" : "40x40", 71 | "scale" : "2x" 72 | }, 73 | { 74 | "idiom" : "ipad", 75 | "size" : "76x76", 76 | "scale" : "1x" 77 | }, 78 | { 79 | "idiom" : "ipad", 80 | "size" : "76x76", 81 | "scale" : "2x" 82 | }, 83 | { 84 | "idiom" : "ipad", 85 | "size" : "83.5x83.5", 86 | "scale" : "2x" 87 | } 88 | ], 89 | "info" : { 90 | "version" : 1, 91 | "author" : "xcode" 92 | } 93 | } -------------------------------------------------------------------------------- /YOLO-CoreML/YOLO-CoreML/Base.lproj/Main.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | Menlo-Regular 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /YOLO-CoreML/YOLO-CoreML/Helpers.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | import UIKit 3 | import CoreML 4 | import Accelerate 5 | 6 | let anchors: [Float] = [1.08, 1.19, 3.42, 4.41, 6.63, 11.38, 9.42, 5.11, 16.62, 10.52] 7 | 8 | /* 9 | The tiny-yolo-voc network from YOLOv2. https://pjreddie.com/darknet/yolo/ 10 | 11 | This implementation is cobbled together from the following sources: 12 | 13 | - https://github.com/pjreddie/darknet 14 | - https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowYoloDetector.java 15 | - https://github.com/allanzelener/YAD2K 16 | */ 17 | class YOLO { 18 | public static let inputWidth = 416 19 | public static let inputHeight = 416 20 | 21 | public struct Prediction { 22 | let classIndex: Int 23 | let score: Float 24 | let rect: CGRect 25 | } 26 | 27 | let model = TinyYOLO() 28 | 29 | public init() { } 30 | 31 | public func predict(image: CVPixelBuffer) throws -> [Prediction] { 32 | if let output = try? model.prediction(image: image) { 33 | return computeBoundingBoxes(features: output.grid) 34 | } else { 35 | return [] 36 | } 37 | } 38 | 39 | public func computeBoundingBoxes(features: MLMultiArray) -> [Prediction] { 40 | assert(features.count == 125*13*13) 41 | 42 | var predictions = [Prediction]() 43 | 44 | let blockSize: Float = 32 45 | let gridHeight = 13 46 | let gridWidth = 13 47 | let boxesPerCell = 5 48 | let numClasses = 20 49 | 50 | // The 416x416 image is divided into a 13x13 grid. Each of these grid cells 51 | // will predict 5 bounding boxes (boxesPerCell). A bounding box consists of 52 | // five data items: x, y, width, height, and a confidence score. Each grid 53 | // cell also predicts which class each bounding box belongs to. 54 | // 55 | // The "features" array therefore contains (numClasses + 5)*boxesPerCell 56 | // values for each grid cell, i.e. 125 channels. The total features array 57 | // contains 13x13x125 elements (actually x128 instead of x125 because in 58 | // Metal the number of channels must be a multiple of 4). 59 | 60 | for cy in 0.. 0.3 { 112 | let rect = CGRect(x: CGFloat(x - w/2), y: CGFloat(y - h/2), 113 | width: CGFloat(w), height: CGFloat(h)) 114 | 115 | let prediction = Prediction(classIndex: detectedClass, 116 | score: confidenceInClass, 117 | rect: rect) 118 | predictions.append(prediction) 119 | } 120 | } 121 | } 122 | } 123 | 124 | // We already filtered out any bounding boxes that have very low scores, 125 | // but there still may be boxes that overlap too much with others. We'll 126 | // use "non-maximum suppression" to prune those duplicate bounding boxes. 127 | return nonMaxSuppression(boxes: predictions, limit: 10, threshold: 0.5) 128 | } 129 | } 130 | 131 | /** 132 | Removes bounding boxes that overlap too much with other boxes that have 133 | a higher score. 134 | 135 | Based on code from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/non_max_suppression_op.cc 136 | 137 | - Parameters: 138 | - boxes: an array of bounding boxes and their scores 139 | - limit: the maximum number of boxes that will be selected 140 | - threshold: used to decide whether boxes overlap too much 141 | */ 142 | func nonMaxSuppression(boxes: [YOLO.Prediction], limit: Int, threshold: Float) -> [YOLO.Prediction] { 143 | 144 | // Do an argsort on the confidence scores, from high to low. 145 | let sortedIndices = boxes.indices.sorted { boxes[$0].score > boxes[$1].score } 146 | 147 | var selected: [YOLO.Prediction] = [] 148 | var active = [Bool](repeating: true, count: boxes.count) 149 | var numActive = active.count 150 | 151 | // The algorithm is simple: Start with the box that has the highest score. 152 | // Remove any remaining boxes that overlap it more than the given threshold 153 | // amount. If there are any boxes left (i.e. these did not overlap with any 154 | // previous boxes), then repeat this procedure, until no more boxes remain 155 | // or the limit has been reached. 156 | outer: for i in 0..= limit { break } 161 | 162 | for j in i+1.. threshold { 166 | active[j] = false 167 | numActive -= 1 168 | if numActive <= 0 { break outer } 169 | } 170 | } 171 | } 172 | } 173 | } 174 | return selected 175 | } 176 | 177 | /** 178 | Computes intersection-over-union overlap between two bounding boxes. 179 | */ 180 | public func IOU(a: CGRect, b: CGRect) -> Float { 181 | let areaA = a.width * a.height 182 | if areaA <= 0 { return 0 } 183 | 184 | let areaB = b.width * b.height 185 | if areaB <= 0 { return 0 } 186 | 187 | let intersectionMinX = max(a.minX, b.minX) 188 | let intersectionMinY = max(a.minY, b.minY) 189 | let intersectionMaxX = min(a.maxX, b.maxX) 190 | let intersectionMaxY = min(a.maxY, b.maxY) 191 | let intersectionArea = max(intersectionMaxY - intersectionMinY, 0) * 192 | max(intersectionMaxX - intersectionMinX, 0) 193 | return Float(intersectionArea / (areaA + areaB - intersectionArea)) 194 | } 195 | 196 | extension Array where Element: Comparable { 197 | /** 198 | Returns the index and value of the largest element in the array. 199 | */ 200 | public func argmax() -> (Int, Element) { 201 | precondition(self.count > 0) 202 | var maxIndex = 0 203 | var maxValue = self[0] 204 | for i in 1.. maxValue { 206 | maxValue = self[i] 207 | maxIndex = i 208 | } 209 | } 210 | return (maxIndex, maxValue) 211 | } 212 | } 213 | 214 | /** 215 | Logistic sigmoid. 216 | */ 217 | public func sigmoid(_ x: Float) -> Float { 218 | return 1 / (1 + exp(-x)) 219 | } 220 | 221 | /** 222 | Computes the "softmax" function over an array. 223 | 224 | Based on code from https://github.com/nikolaypavlov/MLPNeuralNet/ 225 | 226 | This is what softmax looks like in "pseudocode" (actually using Python 227 | and numpy): 228 | 229 | x -= np.max(x) 230 | exp_scores = np.exp(x) 231 | softmax = exp_scores / np.sum(exp_scores) 232 | 233 | First we shift the values of x so that the highest value in the array is 0. 234 | This ensures numerical stability with the exponents, so they don't blow up. 235 | */ 236 | public func softmax(_ x: [Float]) -> [Float] { 237 | var x = x 238 | let len = vDSP_Length(x.count) 239 | 240 | // Find the maximum value in the input array. 241 | var max: Float = 0 242 | vDSP_maxv(x, 1, &max, len) 243 | 244 | // Subtract the maximum from all the elements in the array. 245 | // Now the highest value in the array is 0. 246 | max = -max 247 | vDSP_vsadd(x, 1, &max, &x, 1, len) 248 | 249 | // Exponentiate all the elements in the array. 250 | var count = Int32(x.count) 251 | vvexpf(&x, x, &count) 252 | 253 | // Compute the sum of all exponentiated values. 254 | var sum: Float = 0 255 | vDSP_sve(x, 1, &sum, len) 256 | 257 | // Divide each element by the sum. This normalizes the array contents 258 | // so that they all add up to 1. 259 | vDSP_vsdiv(x, 1, &sum, &x, 1, len) 260 | 261 | return x 262 | } 263 | -------------------------------------------------------------------------------- /YOLO-CoreML/YOLO-CoreML/Helpers/BoundingBox.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | import UIKit 3 | 4 | class BoundingBox { 5 | let shapeLayer: CAShapeLayer 6 | let textLayer: CATextLayer 7 | 8 | init() { 9 | shapeLayer = CAShapeLayer() 10 | shapeLayer.fillColor = UIColor.clear.cgColor 11 | shapeLayer.lineWidth = 4 12 | shapeLayer.isHidden = true 13 | 14 | textLayer = CATextLayer() 15 | textLayer.foregroundColor = UIColor.black.cgColor 16 | textLayer.isHidden = true 17 | textLayer.contentsScale = UIScreen.main.scale 18 | textLayer.fontSize = 14 19 | textLayer.font = UIFont(name: "Avenir", size: textLayer.fontSize) 20 | textLayer.alignmentMode = kCAAlignmentCenter 21 | } 22 | 23 | func addToLayer(_ parent: CALayer) { 24 | parent.addSublayer(shapeLayer) 25 | parent.addSublayer(textLayer) 26 | } 27 | 28 | func show(frame: CGRect, label: String, color: UIColor) { 29 | CATransaction.setDisableActions(true) 30 | 31 | let path = UIBezierPath(rect: frame) 32 | shapeLayer.path = path.cgPath 33 | shapeLayer.strokeColor = color.cgColor 34 | shapeLayer.isHidden = false 35 | 36 | textLayer.string = label 37 | textLayer.backgroundColor = color.cgColor 38 | textLayer.isHidden = false 39 | 40 | let attributes = [ 41 | NSAttributedStringKey.font: textLayer.font as Any 42 | ] 43 | 44 | let textRect = label.boundingRect(with: CGSize(width: 400, height: 100), 45 | options: .truncatesLastVisibleLine, 46 | attributes: attributes, context: nil) 47 | let textSize = CGSize(width: textRect.width + 12, height: textRect.height) 48 | let textOrigin = CGPoint(x: frame.origin.x - 2, y: frame.origin.y - textSize.height) 49 | textLayer.frame = CGRect(origin: textOrigin, size: textSize) 50 | } 51 | 52 | func hide() { 53 | shapeLayer.isHidden = true 54 | textLayer.isHidden = true 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /YOLO-CoreML/YOLO-CoreML/Helpers/Helpers.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | import UIKit 3 | import CoreML 4 | import Accelerate 5 | 6 | // The labels for the 80 classes. 7 | let labels = [ 8 | "person", 9 | "bicycle", 10 | "car", 11 | "motorbike", 12 | "aeroplane", 13 | "bus", 14 | "train", 15 | "truck", 16 | "boat", 17 | "traffic light", 18 | "fire hydrant", 19 | "stop sign", 20 | "parking meter", 21 | "bench", 22 | "bird", 23 | "cat", 24 | "dog", 25 | "horse", 26 | "sheep", 27 | "cow", 28 | "elephant", 29 | "bear", 30 | "zebra", 31 | "giraffe", 32 | "backpack", 33 | "umbrella", 34 | "handbag", 35 | "tie", 36 | "suitcase", 37 | "frisbee", 38 | "skis", 39 | "snowboard", 40 | "sports ball", 41 | "kite", 42 | "baseball bat", 43 | "baseball glove", 44 | "skateboard", 45 | "surfboard", 46 | "tennis racket", 47 | "bottle", 48 | "wine glass", 49 | "cup", 50 | "fork", 51 | "knife", 52 | "spoon", 53 | "bowl", 54 | "banana", 55 | "apple", 56 | "sandwich", 57 | "orange", 58 | "broccoli", 59 | "carrot", 60 | "hot dog", 61 | "pizza", 62 | "donut", 63 | "cake", 64 | "chair", 65 | "sofa", 66 | "pottedplant", 67 | "bed", 68 | "diningtable", 69 | "toilet", 70 | "tvmonitor", 71 | "laptop", 72 | "mouse", 73 | "remote", 74 | "keyboard", 75 | "cell phone", 76 | "microwave", 77 | "oven", 78 | "toaster", 79 | "sink", 80 | "refrigerator", 81 | "book", 82 | "clock", 83 | "vase", 84 | "scissors", 85 | "teddy bear", 86 | "hair drier", 87 | "toothbrush" 88 | ] 89 | 90 | // anchor boxes 91 | let anchors: [Float] = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828] 92 | 93 | /** 94 | Removes bounding boxes that overlap too much with other boxes that have 95 | a higher score. 96 | 97 | Based on code from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/non_max_suppression_op.cc 98 | 99 | - Parameters: 100 | - boxes: an array of bounding boxes and their scores 101 | - limit: the maximum number of boxes that will be selected 102 | - threshold: used to decide whether boxes overlap too much 103 | */ 104 | func nonMaxSuppression(boxes: [YOLO.Prediction], limit: Int, threshold: Float) -> [YOLO.Prediction] { 105 | 106 | // Do an argsort on the confidence scores, from high to low. 107 | let sortedIndices = boxes.indices.sorted { boxes[$0].score > boxes[$1].score } 108 | 109 | var selected: [YOLO.Prediction] = [] 110 | var active = [Bool](repeating: true, count: boxes.count) 111 | var numActive = active.count 112 | 113 | // The algorithm is simple: Start with the box that has the highest score. 114 | // Remove any remaining boxes that overlap it more than the given threshold 115 | // amount. If there are any boxes left (i.e. these did not overlap with any 116 | // previous boxes), then repeat this procedure, until no more boxes remain 117 | // or the limit has been reached. 118 | outer: for i in 0..= limit { break } 123 | 124 | for j in i+1.. threshold { 128 | active[j] = false 129 | numActive -= 1 130 | if numActive <= 0 { break outer } 131 | } 132 | } 133 | } 134 | } 135 | } 136 | return selected 137 | } 138 | 139 | /** 140 | Computes intersection-over-union overlap between two bounding boxes. 141 | */ 142 | public func IOU(a: CGRect, b: CGRect) -> Float { 143 | let areaA = a.width * a.height 144 | if areaA <= 0 { return 0 } 145 | 146 | let areaB = b.width * b.height 147 | if areaB <= 0 { return 0 } 148 | 149 | let intersectionMinX = max(a.minX, b.minX) 150 | let intersectionMinY = max(a.minY, b.minY) 151 | let intersectionMaxX = min(a.maxX, b.maxX) 152 | let intersectionMaxY = min(a.maxY, b.maxY) 153 | let intersectionArea = max(intersectionMaxY - intersectionMinY, 0) * 154 | max(intersectionMaxX - intersectionMinX, 0) 155 | return Float(intersectionArea / (areaA + areaB - intersectionArea)) 156 | } 157 | 158 | extension Array where Element: Comparable { 159 | /** 160 | Returns the index and value of the largest element in the array. 161 | */ 162 | public func argmax() -> (Int, Element) { 163 | precondition(self.count > 0) 164 | var maxIndex = 0 165 | var maxValue = self[0] 166 | for i in 1.. maxValue { 168 | maxValue = self[i] 169 | maxIndex = i 170 | } 171 | } 172 | return (maxIndex, maxValue) 173 | } 174 | } 175 | 176 | /** 177 | Logistic sigmoid. 178 | */ 179 | public func sigmoid(_ x: Float) -> Float { 180 | return 1 / (1 + exp(-x)) 181 | } 182 | 183 | /** 184 | Computes the "softmax" function over an array. 185 | 186 | Based on code from https://github.com/nikolaypavlov/MLPNeuralNet/ 187 | 188 | This is what softmax looks like in "pseudocode" (actually using Python 189 | and numpy): 190 | 191 | x -= np.max(x) 192 | exp_scores = np.exp(x) 193 | softmax = exp_scores / np.sum(exp_scores) 194 | 195 | First we shift the values of x so that the highest value in the array is 0. 196 | This ensures numerical stability with the exponents, so they don't blow up. 197 | */ 198 | public func softmax(_ x: [Float]) -> [Float] { 199 | var x = x 200 | let len = vDSP_Length(x.count) 201 | 202 | // Find the maximum value in the input array. 203 | var max: Float = 0 204 | vDSP_maxv(x, 1, &max, len) 205 | 206 | // Subtract the maximum from all the elements in the array. 207 | // Now the highest value in the array is 0. 208 | max = -max 209 | vDSP_vsadd(x, 1, &max, &x, 1, len) 210 | 211 | // Exponentiate all the elements in the array. 212 | var count = Int32(x.count) 213 | vvexpf(&x, x, &count) 214 | 215 | // Compute the sum of all exponentiated values. 216 | var sum: Float = 0 217 | vDSP_sve(x, 1, &sum, len) 218 | 219 | // Divide each element by the sum. This normalizes the array contents 220 | // so that they all add up to 1. 221 | vDSP_vsdiv(x, 1, &sum, &x, 1, len) 222 | 223 | return x 224 | } 225 | -------------------------------------------------------------------------------- /YOLO-CoreML/YOLO-CoreML/Helpers/VideoCapture.swift: -------------------------------------------------------------------------------- 1 | import UIKit 2 | import AVFoundation 3 | import CoreVideo 4 | 5 | public protocol VideoCaptureDelegate: class { 6 | func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame: CVPixelBuffer?, timestamp: CMTime) 7 | } 8 | 9 | public class VideoCapture: NSObject { 10 | public var previewLayer: AVCaptureVideoPreviewLayer? 11 | public weak var delegate: VideoCaptureDelegate? 12 | public var fps = 15 13 | 14 | let captureSession = AVCaptureSession() 15 | let videoOutput = AVCaptureVideoDataOutput() 16 | let queue = DispatchQueue(label: "net.machinethink.camera-queue") 17 | 18 | var lastTimestamp = CMTime() 19 | 20 | public func setUp(sessionPreset: AVCaptureSession.Preset = .medium, 21 | completion: @escaping (Bool) -> Void) { 22 | queue.async { 23 | let success = self.setUpCamera(sessionPreset: sessionPreset) 24 | DispatchQueue.main.async { 25 | completion(success) 26 | } 27 | } 28 | } 29 | 30 | func setUpCamera(sessionPreset: AVCaptureSession.Preset) -> Bool { 31 | captureSession.beginConfiguration() 32 | captureSession.sessionPreset = sessionPreset 33 | 34 | guard let captureDevice = AVCaptureDevice.default(for: AVMediaType.video) else { 35 | print("Error: no video devices available") 36 | return false 37 | } 38 | 39 | guard let videoInput = try? AVCaptureDeviceInput(device: captureDevice) else { 40 | print("Error: could not create AVCaptureDeviceInput") 41 | return false 42 | } 43 | 44 | if captureSession.canAddInput(videoInput) { 45 | captureSession.addInput(videoInput) 46 | } 47 | 48 | let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession) 49 | previewLayer.videoGravity = AVLayerVideoGravity.resizeAspect 50 | previewLayer.connection?.videoOrientation = .portrait 51 | self.previewLayer = previewLayer 52 | 53 | let settings: [String : Any] = [ 54 | kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA), 55 | ] 56 | 57 | videoOutput.videoSettings = settings 58 | videoOutput.alwaysDiscardsLateVideoFrames = true 59 | videoOutput.setSampleBufferDelegate(self, queue: queue) 60 | if captureSession.canAddOutput(videoOutput) { 61 | captureSession.addOutput(videoOutput) 62 | } 63 | 64 | // We want the buffers to be in portrait orientation otherwise they are 65 | // rotated by 90 degrees. Need to set this _after_ addOutput()! 66 | videoOutput.connection(with: AVMediaType.video)?.videoOrientation = .portrait 67 | 68 | captureSession.commitConfiguration() 69 | return true 70 | } 71 | 72 | public func start() { 73 | if !captureSession.isRunning { 74 | captureSession.startRunning() 75 | } 76 | } 77 | 78 | public func stop() { 79 | if captureSession.isRunning { 80 | captureSession.stopRunning() 81 | } 82 | } 83 | } 84 | 85 | extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate { 86 | public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { 87 | // Because lowering the capture device's FPS looks ugly in the preview, 88 | // we capture at full speed but only call the delegate at its desired 89 | // framerate. 90 | let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer) 91 | let deltaTime = timestamp - lastTimestamp 92 | if deltaTime >= CMTimeMake(1, Int32(fps)) { 93 | lastTimestamp = timestamp 94 | let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) 95 | delegate?.videoCapture(self, didCaptureVideoFrame: imageBuffer, timestamp: timestamp) 96 | } 97 | } 98 | 99 | public func captureOutput(_ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { 100 | //print("dropped frame") 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /YOLO-CoreML/YOLO-CoreML/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | APPL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | LSRequiresIPhoneOS 22 | 23 | NSCameraUsageDescription 24 | Let's do some deep learning! 25 | UILaunchStoryboardName 26 | Main 27 | UIMainStoryboardFile 28 | Main 29 | UIRequiredDeviceCapabilities 30 | 31 | armv7 32 | 33 | UIRequiresFullScreen 34 | 35 | UIStatusBarStyle 36 | UIStatusBarStyleLightContent 37 | UISupportedInterfaceOrientations 38 | 39 | UIInterfaceOrientationPortrait 40 | 41 | UISupportedInterfaceOrientations~ipad 42 | 43 | UIInterfaceOrientationPortrait 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /YOLO-CoreML/YOLO-CoreML/Main.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | Menlo-Regular 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /YOLO-CoreML/YOLO-CoreML/ViewController.swift: -------------------------------------------------------------------------------- 1 | import UIKit 2 | import Vision 3 | import AVFoundation 4 | import CoreMedia 5 | import VideoToolbox 6 | 7 | class ViewController: UIViewController { 8 | @IBOutlet weak var videoPreview: UIView! 9 | @IBOutlet weak var timeLabel: UILabel! 10 | 11 | let yolo = YOLO() 12 | 13 | var videoCapture: VideoCapture! 14 | var request: VNCoreMLRequest! 15 | var startTimes: [CFTimeInterval] = [] 16 | 17 | var boundingBoxes = [BoundingBox]() 18 | var colors: [UIColor] = [] 19 | 20 | var framesDone = 0 21 | var frameCapturingStartTime = CACurrentMediaTime() 22 | let semaphore = DispatchSemaphore(value: 2) 23 | 24 | override func viewDidLoad() { 25 | super.viewDidLoad() 26 | 27 | timeLabel.text = "" 28 | 29 | setUpBoundingBoxes() 30 | setUpVision() 31 | setUpCamera() 32 | 33 | frameCapturingStartTime = CACurrentMediaTime() 34 | } 35 | 36 | override func didReceiveMemoryWarning() { 37 | super.didReceiveMemoryWarning() 38 | print(#function) 39 | } 40 | 41 | // MARK: - Initialization 42 | 43 | func setUpBoundingBoxes() { 44 | for _ in 0.. Double { 147 | // Measure how many frames were actually delivered per second. 148 | framesDone += 1 149 | let frameCapturingElapsed = CACurrentMediaTime() - frameCapturingStartTime 150 | let currentFPSDelivered = Double(framesDone) / frameCapturingElapsed 151 | if frameCapturingElapsed > 1 { 152 | framesDone = 0 153 | frameCapturingStartTime = CACurrentMediaTime() 154 | } 155 | return currentFPSDelivered 156 | } 157 | 158 | func show(predictions: [YOLO.Prediction]) { 159 | for i in 0.. [Prediction] { 26 | if let output = try? model.prediction(input__0: image) { 27 | return computeBoundingBoxes(features: output.output__0) 28 | } else { 29 | return [] 30 | } 31 | } 32 | 33 | public func computeBoundingBoxes(features: MLMultiArray) -> [Prediction] { 34 | // assert(features.count == 125*13*13) 35 | assert(features.count == 425*19*19) 36 | 37 | var predictions = [Prediction]() 38 | 39 | let blockSize: Float = 32 40 | let gridHeight = 19 41 | let gridWidth = 19 42 | let boxesPerCell = 5;//Int(anchors.count/5) 43 | let numClasses = 80 44 | 45 | // The 608x608 image is divided into a 19x19 grid. Each of these grid cells 46 | // will predict 5 bounding boxes (boxesPerCell). A bounding box consists of 47 | // five data items: x, y, width, height, and a confidence score. Each grid 48 | // cell also predicts which class each bounding box belongs to. 49 | // 50 | // The "features" array therefore contains (numClasses + 5)*boxesPerCell 51 | // values for each grid cell, i.e. 425 channels. The total features array 52 | // contains 425x19x19 elements. 53 | 54 | // NOTE: It turns out that accessing the elements in the multi-array as 55 | // `features[[channel, cy, cx] as [NSNumber]].floatValue` is kinda slow. 56 | // It's much faster to use direct memory access to the features. 57 | let featurePointer = UnsafeMutablePointer(OpaquePointer(features.dataPointer)) 58 | let channelStride = features.strides[0].intValue 59 | let yStride = features.strides[1].intValue 60 | let xStride = features.strides[2].intValue 61 | 62 | func offset(_ channel: Int, _ x: Int, _ y: Int) -> Int { 63 | return channel*channelStride + y*yStride + x*xStride 64 | } 65 | 66 | for cy in 0.. confidenceThreshold { 133 | let rect = CGRect(x: CGFloat(x - w/2), y: CGFloat(y - h/2), 134 | width: CGFloat(w), height: CGFloat(h)) 135 | 136 | let prediction = Prediction(classIndex: detectedClass, 137 | score: confidenceInClass, 138 | rect: rect) 139 | predictions.append(prediction) 140 | } 141 | } 142 | } 143 | } 144 | 145 | // We already filtered out any bounding boxes that have very low scores, 146 | // but there still may be boxes that overlap too much with others. We'll 147 | // use "non-maximum suppression" to prune those duplicate bounding boxes. 148 | return nonMaxSuppression(boxes: predictions, limit: YOLO.maxBoundingBoxes, threshold: iouThreshold) 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /YOLO.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syshen/YOLO-CoreML/2c973fb2fa74ebdab7f3462d9c0f90f0ddcb3390/YOLO.jpg -------------------------------------------------------------------------------- /download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | curl -L https://goo.gl/TgrwZx > YOLO-CoreML/YOLO-CoreML/yolo.mlmodel 4 | 5 | curl -L https://goo.gl/bGfVRk > Converter/yolo.pb 6 | 7 | --------------------------------------------------------------------------------