├── .gitignore ├── Assets └── demo.gif ├── ConversionScripts ├── models.py ├── test.png ├── test.py ├── test_converted.jpg ├── test_out.jpg └── transfer.py ├── FastDepth-CoreML.xcodeproj ├── project.pbxproj ├── project.xcworkspace │ ├── contents.xcworkspacedata │ ├── xcshareddata │ │ └── IDEWorkspaceChecks.plist │ └── xcuserdata │ │ └── fincher.xcuserdatad │ │ └── UserInterfaceState.xcuserstate └── xcuserdata │ └── fincher.xcuserdatad │ ├── xcdebugger │ └── Breakpoints_v2.xcbkptlist │ └── xcschemes │ └── xcschememanagement.plist ├── FastDepth-CoreML ├── AppDelegate.swift ├── Assets.xcassets │ ├── AppIcon.appiconset │ │ └── Contents.json │ └── Contents.json ├── Base.lproj │ ├── LaunchScreen.storyboard │ └── Main.storyboard ├── CoreMLHelpers │ ├── Array+Extensions.swift │ ├── CGImage+CVPixelBuffer.swift │ ├── CGImage+RawBytes.swift │ ├── CGImagePropertyOrientation.swift │ ├── CVPixelBuffer+Helpers.swift │ ├── MLMultiArray+Helpers.swift │ ├── MLMultiArray+Image.swift │ ├── Math.swift │ ├── NonMaxSuppression.swift │ ├── Predictions.swift │ ├── UIImage+CVPixelBuffer.swift │ ├── UIImage+Extensions.swift │ └── UIImage+RawBytes.swift ├── FastDepth-CoreML.entitlements ├── Info.plist ├── LiveImageViewController.swift ├── Measure.swift ├── VideoCapture.swift └── mlmodel │ └── FastDepth.mlmodel ├── Models ├── FastDepth.mlmodel ├── fastdepth.onnx └── fastestdepth.onnx └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | .DS_Store 3 | -------------------------------------------------------------------------------- /Assets/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JustinFincher/FastDepth-CoreML/65d643dfd31000d4f505857071e59ab211a5f0cf/Assets/demo.gif -------------------------------------------------------------------------------- /ConversionScripts/models.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | import torchvision.models 5 | import collections 6 | import math 7 | import torch.nn.functional as F 8 | import imagenet.mobilenet 9 | 10 | class Identity(nn.Module): 11 | # a dummy identity module 12 | def __init__(self): 13 | super(Identity, self).__init__() 14 | 15 | def forward(self, x): 16 | return x 17 | 18 | class Unpool(nn.Module): 19 | # Unpool: 2*2 unpooling with zero padding 20 | def __init__(self, stride=2): 21 | super(Unpool, self).__init__() 22 | 23 | self.stride = stride 24 | 25 | # create kernel [1, 0; 0, 0] 26 | self.mask = torch.zeros(1, 1, stride, stride) 27 | self.mask[:,:,0,0] = 1 28 | 29 | def forward(self, x): 30 | assert x.dim() == 4 31 | num_channels = x.size(1) 32 | return F.conv_transpose2d(x, 33 | self.mask.detach().type_as(x).expand(num_channels, 1, -1, -1), 34 | stride=self.stride, groups=num_channels) 35 | 36 | def weights_init(m): 37 | # Initialize kernel weights with Gaussian distributions 38 | if isinstance(m, nn.Conv2d): 39 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 40 | m.weight.data.normal_(0, math.sqrt(2. / n)) 41 | if m.bias is not None: 42 | m.bias.data.zero_() 43 | elif isinstance(m, nn.ConvTranspose2d): 44 | n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels 45 | m.weight.data.normal_(0, math.sqrt(2. / n)) 46 | if m.bias is not None: 47 | m.bias.data.zero_() 48 | elif isinstance(m, nn.BatchNorm2d): 49 | m.weight.data.fill_(1) 50 | m.bias.data.zero_() 51 | 52 | def conv(in_channels, out_channels, kernel_size): 53 | padding = (kernel_size-1) // 2 54 | assert 2*padding == kernel_size-1, "parameters incorrect. kernel={}, padding={}".format(kernel_size, padding) 55 | return nn.Sequential( 56 | nn.Conv2d(in_channels,out_channels,kernel_size,stride=1,padding=padding,bias=False), 57 | nn.BatchNorm2d(out_channels), 58 | nn.ReLU(inplace=True), 59 | ) 60 | 61 | def depthwise(in_channels, kernel_size): 62 | padding = (kernel_size-1) // 2 63 | assert 2*padding == kernel_size-1, "parameters incorrect. kernel={}, padding={}".format(kernel_size, padding) 64 | return nn.Sequential( 65 | nn.Conv2d(in_channels,in_channels,kernel_size,stride=1,padding=padding,bias=False,groups=in_channels), 66 | nn.BatchNorm2d(in_channels), 67 | nn.ReLU(inplace=True), 68 | ) 69 | 70 | def pointwise(in_channels, out_channels): 71 | return nn.Sequential( 72 | nn.Conv2d(in_channels,out_channels,1,1,0,bias=False), 73 | nn.BatchNorm2d(out_channels), 74 | nn.ReLU(inplace=True), 75 | ) 76 | 77 | def convt(in_channels, out_channels, kernel_size): 78 | stride = 2 79 | padding = (kernel_size - 1) // 2 80 | output_padding = kernel_size % 2 81 | assert -2 - 2*padding + kernel_size + output_padding == 0, "deconv parameters incorrect" 82 | return nn.Sequential( 83 | nn.ConvTranspose2d(in_channels,out_channels,kernel_size, 84 | stride,padding,output_padding,bias=False), 85 | nn.BatchNorm2d(out_channels), 86 | nn.ReLU(inplace=True), 87 | ) 88 | 89 | def convt_dw(channels, kernel_size): 90 | stride = 2 91 | padding = (kernel_size - 1) // 2 92 | output_padding = kernel_size % 2 93 | assert -2 - 2*padding + kernel_size + output_padding == 0, "deconv parameters incorrect" 94 | return nn.Sequential( 95 | nn.ConvTranspose2d(channels,channels,kernel_size, 96 | stride,padding,output_padding,bias=False,groups=channels), 97 | nn.BatchNorm2d(channels), 98 | nn.ReLU(inplace=True), 99 | ) 100 | 101 | def upconv(in_channels, out_channels): 102 | return nn.Sequential( 103 | Unpool(2), 104 | nn.Conv2d(in_channels,out_channels,kernel_size=5,stride=1,padding=2,bias=False), 105 | nn.BatchNorm2d(out_channels), 106 | nn.ReLU(), 107 | ) 108 | 109 | class upproj(nn.Module): 110 | # UpProj module has two branches, with a Unpool at the start and a ReLu at the end 111 | # upper branch: 5*5 conv -> batchnorm -> ReLU -> 3*3 conv -> batchnorm 112 | # bottom branch: 5*5 conv -> batchnorm 113 | 114 | def __init__(self, in_channels, out_channels): 115 | super(upproj, self).__init__() 116 | self.unpool = Unpool(2) 117 | self.branch1 = nn.Sequential( 118 | nn.Conv2d(in_channels,out_channels,kernel_size=5,stride=1,padding=2,bias=False), 119 | nn.BatchNorm2d(out_channels), 120 | nn.ReLU(inplace=True), 121 | nn.Conv2d(out_channels,out_channels,kernel_size=3,stride=1,padding=1,bias=False), 122 | nn.BatchNorm2d(out_channels), 123 | ) 124 | self.branch2 = nn.Sequential( 125 | nn.Conv2d(in_channels,out_channels,kernel_size=5,stride=1,padding=2,bias=False), 126 | nn.BatchNorm2d(out_channels), 127 | ) 128 | 129 | def forward(self, x): 130 | x = self.unpool(x) 131 | x1 = self.branch1(x) 132 | x2 = self.branch2(x) 133 | return F.relu(x1 + x2) 134 | 135 | class Decoder(nn.Module): 136 | names = ['deconv{}{}'.format(i,dw) for i in range(3,10,2) for dw in ['', 'dw']] 137 | names.append("upconv") 138 | names.append("upproj") 139 | for i in range(3,10,2): 140 | for dw in ['', 'dw']: 141 | names.append("nnconv{}{}".format(i, dw)) 142 | names.append("blconv{}{}".format(i, dw)) 143 | names.append("shuffle{}{}".format(i, dw)) 144 | 145 | class DeConv(nn.Module): 146 | 147 | def __init__(self, kernel_size, dw): 148 | super(DeConv, self).__init__() 149 | if dw: 150 | self.convt1 = nn.Sequential( 151 | convt_dw(1024, kernel_size), 152 | pointwise(1024, 512)) 153 | self.convt2 = nn.Sequential( 154 | convt_dw(512, kernel_size), 155 | pointwise(512, 256)) 156 | self.convt3 = nn.Sequential( 157 | convt_dw(256, kernel_size), 158 | pointwise(256, 128)) 159 | self.convt4 = nn.Sequential( 160 | convt_dw(128, kernel_size), 161 | pointwise(128, 64)) 162 | self.convt5 = nn.Sequential( 163 | convt_dw(64, kernel_size), 164 | pointwise(64, 32)) 165 | else: 166 | self.convt1 = convt(1024, 512, kernel_size) 167 | self.convt2 = convt(512, 256, kernel_size) 168 | self.convt3 = convt(256, 128, kernel_size) 169 | self.convt4 = convt(128, 64, kernel_size) 170 | self.convt5 = convt(64, 32, kernel_size) 171 | self.convf = pointwise(32, 1) 172 | 173 | def forward(self, x): 174 | x = self.convt1(x) 175 | x = self.convt2(x) 176 | x = self.convt3(x) 177 | x = self.convt4(x) 178 | x = self.convt5(x) 179 | x = self.convf(x) 180 | return x 181 | 182 | 183 | class UpConv(nn.Module): 184 | 185 | def __init__(self): 186 | super(UpConv, self).__init__() 187 | self.upconv1 = upconv(1024, 512) 188 | self.upconv2 = upconv(512, 256) 189 | self.upconv3 = upconv(256, 128) 190 | self.upconv4 = upconv(128, 64) 191 | self.upconv5 = upconv(64, 32) 192 | self.convf = pointwise(32, 1) 193 | 194 | def forward(self, x): 195 | x = self.upconv1(x) 196 | x = self.upconv2(x) 197 | x = self.upconv3(x) 198 | x = self.upconv4(x) 199 | x = self.upconv5(x) 200 | x = self.convf(x) 201 | return x 202 | 203 | class UpProj(nn.Module): 204 | # UpProj decoder consists of 4 upproj modules with decreasing number of channels and increasing feature map size 205 | 206 | def __init__(self): 207 | super(UpProj, self).__init__() 208 | self.upproj1 = upproj(1024, 512) 209 | self.upproj2 = upproj(512, 256) 210 | self.upproj3 = upproj(256, 128) 211 | self.upproj4 = upproj(128, 64) 212 | self.upproj5 = upproj(64, 32) 213 | self.convf = pointwise(32, 1) 214 | 215 | def forward(self, x): 216 | x = self.upproj1(x) 217 | x = self.upproj2(x) 218 | x = self.upproj3(x) 219 | x = self.upproj4(x) 220 | x = self.upproj5(x) 221 | x = self.convf(x) 222 | return x 223 | 224 | class NNConv(nn.Module): 225 | 226 | def __init__(self, kernel_size, dw): 227 | super(NNConv, self).__init__() 228 | if dw: 229 | self.conv1 = nn.Sequential( 230 | depthwise(1024, kernel_size), 231 | pointwise(1024, 512)) 232 | self.conv2 = nn.Sequential( 233 | depthwise(512, kernel_size), 234 | pointwise(512, 256)) 235 | self.conv3 = nn.Sequential( 236 | depthwise(256, kernel_size), 237 | pointwise(256, 128)) 238 | self.conv4 = nn.Sequential( 239 | depthwise(128, kernel_size), 240 | pointwise(128, 64)) 241 | self.conv5 = nn.Sequential( 242 | depthwise(64, kernel_size), 243 | pointwise(64, 32)) 244 | self.conv6 = pointwise(32, 1) 245 | else: 246 | self.conv1 = conv(1024, 512, kernel_size) 247 | self.conv2 = conv(512, 256, kernel_size) 248 | self.conv3 = conv(256, 128, kernel_size) 249 | self.conv4 = conv(128, 64, kernel_size) 250 | self.conv5 = conv(64, 32, kernel_size) 251 | self.conv6 = pointwise(32, 1) 252 | 253 | def forward(self, x): 254 | x = self.conv1(x) 255 | x = F.interpolate(x, scale_factor=2, mode='nearest') 256 | 257 | x = self.conv2(x) 258 | x = F.interpolate(x, scale_factor=2, mode='nearest') 259 | 260 | x = self.conv3(x) 261 | x = F.interpolate(x, scale_factor=2, mode='nearest') 262 | 263 | x = self.conv4(x) 264 | x = F.interpolate(x, scale_factor=2, mode='nearest') 265 | 266 | x = self.conv5(x) 267 | x = F.interpolate(x, scale_factor=2, mode='nearest') 268 | 269 | x = self.conv6(x) 270 | return x 271 | 272 | class BLConv(NNConv): 273 | 274 | def __init__(self, kernel_size, dw): 275 | super(BLConv, self).__init__(kernel_size, dw) 276 | 277 | def forward(self, x): 278 | x = self.conv1(x) 279 | x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False) 280 | 281 | x = self.conv2(x) 282 | x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False) 283 | 284 | x = self.conv3(x) 285 | x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False) 286 | 287 | x = self.conv4(x) 288 | x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False) 289 | 290 | x = self.conv5(x) 291 | x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False) 292 | 293 | x = self.conv6(x) 294 | return x 295 | 296 | class ShuffleConv(nn.Module): 297 | 298 | def __init__(self, kernel_size, dw): 299 | super(ShuffleConv, self).__init__() 300 | if dw: 301 | self.conv1 = nn.Sequential( 302 | depthwise(256, kernel_size), 303 | pointwise(256, 256)) 304 | self.conv2 = nn.Sequential( 305 | depthwise(64, kernel_size), 306 | pointwise(64, 64)) 307 | self.conv3 = nn.Sequential( 308 | depthwise(16, kernel_size), 309 | pointwise(16, 16)) 310 | self.conv4 = nn.Sequential( 311 | depthwise(4, kernel_size), 312 | pointwise(4, 4)) 313 | else: 314 | self.conv1 = conv(256, 256, kernel_size) 315 | self.conv2 = conv(64, 64, kernel_size) 316 | self.conv3 = conv(16, 16, kernel_size) 317 | self.conv4 = conv(4, 4, kernel_size) 318 | 319 | def forward(self, x): 320 | x = F.pixel_shuffle(x, 2) 321 | x = self.conv1(x) 322 | 323 | x = F.pixel_shuffle(x, 2) 324 | x = self.conv2(x) 325 | 326 | x = F.pixel_shuffle(x, 2) 327 | x = self.conv3(x) 328 | 329 | x = F.pixel_shuffle(x, 2) 330 | x = self.conv4(x) 331 | 332 | x = F.pixel_shuffle(x, 2) 333 | return x 334 | 335 | def choose_decoder(decoder): 336 | depthwise = ('dw' in decoder) 337 | if decoder[:6] == 'deconv': 338 | assert len(decoder)==7 or (len(decoder)==9 and 'dw' in decoder) 339 | kernel_size = int(decoder[6]) 340 | model = DeConv(kernel_size, depthwise) 341 | elif decoder == "upproj": 342 | model = UpProj() 343 | elif decoder == "upconv": 344 | model = UpConv() 345 | elif decoder[:7] == 'shuffle': 346 | assert len(decoder)==8 or (len(decoder)==10 and 'dw' in decoder) 347 | kernel_size = int(decoder[7]) 348 | model = ShuffleConv(kernel_size, depthwise) 349 | elif decoder[:6] == 'nnconv': 350 | assert len(decoder)==7 or (len(decoder)==9 and 'dw' in decoder) 351 | kernel_size = int(decoder[6]) 352 | model = NNConv(kernel_size, depthwise) 353 | elif decoder[:6] == 'blconv': 354 | assert len(decoder)==7 or (len(decoder)==9 and 'dw' in decoder) 355 | kernel_size = int(decoder[6]) 356 | model = BLConv(kernel_size, depthwise) 357 | else: 358 | assert False, "invalid option for decoder: {}".format(decoder) 359 | model.apply(weights_init) 360 | return model 361 | 362 | 363 | class ResNet(nn.Module): 364 | def __init__(self, layers, decoder, output_size, in_channels=3, pretrained=True): 365 | 366 | if layers not in [18, 34, 50, 101, 152]: 367 | raise RuntimeError('Only 18, 34, 50, 101, and 152 layer model are defined for ResNet. Got {}'.format(layers)) 368 | 369 | super(ResNet, self).__init__() 370 | self.output_size = output_size 371 | pretrained_model = torchvision.models.__dict__['resnet{}'.format(layers)](pretrained=pretrained) 372 | if not pretrained: 373 | pretrained_model.apply(weights_init) 374 | 375 | if in_channels == 3: 376 | self.conv1 = pretrained_model._modules['conv1'] 377 | self.bn1 = pretrained_model._modules['bn1'] 378 | else: 379 | self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False) 380 | self.bn1 = nn.BatchNorm2d(64) 381 | weights_init(self.conv1) 382 | weights_init(self.bn1) 383 | 384 | self.relu = pretrained_model._modules['relu'] 385 | self.maxpool = pretrained_model._modules['maxpool'] 386 | self.layer1 = pretrained_model._modules['layer1'] 387 | self.layer2 = pretrained_model._modules['layer2'] 388 | self.layer3 = pretrained_model._modules['layer3'] 389 | self.layer4 = pretrained_model._modules['layer4'] 390 | 391 | # clear memory 392 | del pretrained_model 393 | 394 | # define number of intermediate channels 395 | if layers <= 34: 396 | num_channels = 512 397 | elif layers >= 50: 398 | num_channels = 2048 399 | self.conv2 = nn.Conv2d(num_channels, 1024, 1) 400 | weights_init(self.conv2) 401 | self.decoder = choose_decoder(decoder) 402 | 403 | def forward(self, x): 404 | # resnet 405 | x = self.conv1(x) 406 | x = self.bn1(x) 407 | x = self.relu(x) 408 | x = self.maxpool(x) 409 | x = self.layer1(x) 410 | x = self.layer2(x) 411 | x = self.layer3(x) 412 | x = self.layer4(x) 413 | x = self.conv2(x) 414 | 415 | # decoder 416 | x = self.decoder(x) 417 | 418 | return x 419 | 420 | class MobileNet(nn.Module): 421 | def __init__(self, decoder, output_size, in_channels=3, pretrained=True): 422 | 423 | super(MobileNet, self).__init__() 424 | self.output_size = output_size 425 | mobilenet = imagenet.mobilenet.MobileNet() 426 | if pretrained: 427 | pretrained_path = os.path.join('imagenet', 'results', 'imagenet.arch=mobilenet.lr=0.1.bs=256', 'model_best.pth.tar') 428 | checkpoint = torch.load(pretrained_path) 429 | state_dict = checkpoint['state_dict'] 430 | 431 | from collections import OrderedDict 432 | new_state_dict = OrderedDict() 433 | for k, v in state_dict.items(): 434 | name = k[7:] # remove `module.` 435 | new_state_dict[name] = v 436 | mobilenet.load_state_dict(new_state_dict) 437 | else: 438 | mobilenet.apply(weights_init) 439 | 440 | if in_channels == 3: 441 | self.mobilenet = nn.Sequential(*(mobilenet.model[i] for i in range(14))) 442 | else: 443 | def conv_bn(inp, oup, stride): 444 | return nn.Sequential( 445 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 446 | nn.BatchNorm2d(oup), 447 | nn.ReLU6(inplace=True) 448 | ) 449 | 450 | self.mobilenet = nn.Sequential( 451 | conv_bn(in_channels, 32, 2), 452 | *(mobilenet.model[i] for i in range(1,14)) 453 | ) 454 | 455 | self.decoder = choose_decoder(decoder) 456 | 457 | def forward(self, x): 458 | x = self.mobilenet(x) 459 | x = self.decoder(x) 460 | return x 461 | 462 | class ResNetSkipAdd(nn.Module): 463 | def __init__(self, layers, output_size, in_channels=3, pretrained=True): 464 | 465 | if layers not in [18, 34, 50, 101, 152]: 466 | raise RuntimeError('Only 18, 34, 50, 101, and 152 layer model are defined for ResNet. Got {}'.format(layers)) 467 | 468 | super(ResNetSkipAdd, self).__init__() 469 | self.output_size = output_size 470 | pretrained_model = torchvision.models.__dict__['resnet{}'.format(layers)](pretrained=pretrained) 471 | if not pretrained: 472 | pretrained_model.apply(weights_init) 473 | 474 | if in_channels == 3: 475 | self.conv1 = pretrained_model._modules['conv1'] 476 | self.bn1 = pretrained_model._modules['bn1'] 477 | else: 478 | self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False) 479 | self.bn1 = nn.BatchNorm2d(64) 480 | weights_init(self.conv1) 481 | weights_init(self.bn1) 482 | 483 | self.relu = pretrained_model._modules['relu'] 484 | self.maxpool = pretrained_model._modules['maxpool'] 485 | self.layer1 = pretrained_model._modules['layer1'] 486 | self.layer2 = pretrained_model._modules['layer2'] 487 | self.layer3 = pretrained_model._modules['layer3'] 488 | self.layer4 = pretrained_model._modules['layer4'] 489 | 490 | # clear memory 491 | del pretrained_model 492 | 493 | # define number of intermediate channels 494 | if layers <= 34: 495 | num_channels = 512 496 | elif layers >= 50: 497 | num_channels = 2048 498 | self.conv2 = nn.Conv2d(num_channels, 1024, 1) 499 | weights_init(self.conv2) 500 | 501 | kernel_size = 5 502 | self.decode_conv1 = conv(1024, 512, kernel_size) 503 | self.decode_conv2 = conv(512, 256, kernel_size) 504 | self.decode_conv3 = conv(256, 128, kernel_size) 505 | self.decode_conv4 = conv(128, 64, kernel_size) 506 | self.decode_conv5 = conv(64, 32, kernel_size) 507 | self.decode_conv6 = pointwise(32, 1) 508 | weights_init(self.decode_conv1) 509 | weights_init(self.decode_conv2) 510 | weights_init(self.decode_conv3) 511 | weights_init(self.decode_conv4) 512 | weights_init(self.decode_conv5) 513 | weights_init(self.decode_conv6) 514 | 515 | def forward(self, x): 516 | # resnet 517 | x = self.conv1(x) 518 | x = self.bn1(x) 519 | x1 = self.relu(x) 520 | # print("x1", x1.size()) 521 | x2 = self.maxpool(x1) 522 | # print("x2", x2.size()) 523 | x3 = self.layer1(x2) 524 | # print("x3", x3.size()) 525 | x4 = self.layer2(x3) 526 | # print("x4", x4.size()) 527 | x5 = self.layer3(x4) 528 | # print("x5", x5.size()) 529 | x6 = self.layer4(x5) 530 | # print("x6", x6.size()) 531 | x7 = self.conv2(x6) 532 | 533 | # decoder 534 | y10 = self.decode_conv1(x7) 535 | # print("y10", y10.size()) 536 | y9 = F.interpolate(y10 + x6, scale_factor=2, mode='nearest') 537 | # print("y9", y9.size()) 538 | y8 = self.decode_conv2(y9) 539 | # print("y8", y8.size()) 540 | y7 = F.interpolate(y8 + x5, scale_factor=2, mode='nearest') 541 | # print("y7", y7.size()) 542 | y6 = self.decode_conv3(y7) 543 | # print("y6", y6.size()) 544 | y5 = F.interpolate(y6 + x4, scale_factor=2, mode='nearest') 545 | # print("y5", y5.size()) 546 | y4 = self.decode_conv4(y5) 547 | # print("y4", y4.size()) 548 | y3 = F.interpolate(y4 + x3, scale_factor=2, mode='nearest') 549 | # print("y3", y3.size()) 550 | y2 = self.decode_conv5(y3 + x1) 551 | # print("y2", y2.size()) 552 | y1 = F.interpolate(y2, scale_factor=2, mode='nearest') 553 | # print("y1", y1.size()) 554 | y = self.decode_conv6(y1) 555 | 556 | return y 557 | 558 | class ResNetSkipConcat(nn.Module): 559 | def __init__(self, layers, output_size, in_channels=3, pretrained=True): 560 | 561 | if layers not in [18, 34, 50, 101, 152]: 562 | raise RuntimeError('Only 18, 34, 50, 101, and 152 layer model are defined for ResNet. Got {}'.format(layers)) 563 | 564 | super(ResNetSkipConcat, self).__init__() 565 | self.output_size = output_size 566 | pretrained_model = torchvision.models.__dict__['resnet{}'.format(layers)](pretrained=pretrained) 567 | if not pretrained: 568 | pretrained_model.apply(weights_init) 569 | 570 | if in_channels == 3: 571 | self.conv1 = pretrained_model._modules['conv1'] 572 | self.bn1 = pretrained_model._modules['bn1'] 573 | else: 574 | self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False) 575 | self.bn1 = nn.BatchNorm2d(64) 576 | weights_init(self.conv1) 577 | weights_init(self.bn1) 578 | 579 | self.relu = pretrained_model._modules['relu'] 580 | self.maxpool = pretrained_model._modules['maxpool'] 581 | self.layer1 = pretrained_model._modules['layer1'] 582 | self.layer2 = pretrained_model._modules['layer2'] 583 | self.layer3 = pretrained_model._modules['layer3'] 584 | self.layer4 = pretrained_model._modules['layer4'] 585 | 586 | # clear memory 587 | del pretrained_model 588 | 589 | # define number of intermediate channels 590 | if layers <= 34: 591 | num_channels = 512 592 | elif layers >= 50: 593 | num_channels = 2048 594 | self.conv2 = nn.Conv2d(num_channels, 1024, 1) 595 | weights_init(self.conv2) 596 | 597 | kernel_size = 5 598 | self.decode_conv1 = conv(1024, 512, kernel_size) 599 | self.decode_conv2 = conv(768, 256, kernel_size) 600 | self.decode_conv3 = conv(384, 128, kernel_size) 601 | self.decode_conv4 = conv(192, 64, kernel_size) 602 | self.decode_conv5 = conv(128, 32, kernel_size) 603 | self.decode_conv6 = pointwise(32, 1) 604 | weights_init(self.decode_conv1) 605 | weights_init(self.decode_conv2) 606 | weights_init(self.decode_conv3) 607 | weights_init(self.decode_conv4) 608 | weights_init(self.decode_conv5) 609 | weights_init(self.decode_conv6) 610 | 611 | def forward(self, x): 612 | # resnet 613 | x = self.conv1(x) 614 | x = self.bn1(x) 615 | x1 = self.relu(x) 616 | # print("x1", x1.size()) 617 | x2 = self.maxpool(x1) 618 | # print("x2", x2.size()) 619 | x3 = self.layer1(x2) 620 | # print("x3", x3.size()) 621 | x4 = self.layer2(x3) 622 | # print("x4", x4.size()) 623 | x5 = self.layer3(x4) 624 | # print("x5", x5.size()) 625 | x6 = self.layer4(x5) 626 | # print("x6", x6.size()) 627 | x7 = self.conv2(x6) 628 | 629 | # decoder 630 | y10 = self.decode_conv1(x7) 631 | # print("y10", y10.size()) 632 | y9 = F.interpolate(y10, scale_factor=2, mode='nearest') 633 | # print("y9", y9.size()) 634 | y8 = self.decode_conv2(torch.cat((y9, x5), 1)) 635 | # print("y8", y8.size()) 636 | y7 = F.interpolate(y8, scale_factor=2, mode='nearest') 637 | # print("y7", y7.size()) 638 | y6 = self.decode_conv3(torch.cat((y7, x4), 1)) 639 | # print("y6", y6.size()) 640 | y5 = F.interpolate(y6, scale_factor=2, mode='nearest') 641 | # print("y5", y5.size()) 642 | y4 = self.decode_conv4(torch.cat((y5, x3), 1)) 643 | # print("y4", y4.size()) 644 | y3 = F.interpolate(y4, scale_factor=2, mode='nearest') 645 | # print("y3", y3.size()) 646 | y2 = self.decode_conv5(torch.cat((y3, x1), 1)) 647 | # print("y2", y2.size()) 648 | y1 = F.interpolate(y2, scale_factor=2, mode='nearest') 649 | # print("y1", y1.size()) 650 | y = self.decode_conv6(y1) 651 | 652 | return y 653 | 654 | class MobileNetSkipAdd(nn.Module): 655 | def __init__(self, output_size, pretrained=True): 656 | 657 | super(MobileNetSkipAdd, self).__init__() 658 | self.output_size = output_size 659 | mobilenet = imagenet.mobilenet.MobileNet() 660 | if pretrained: 661 | pretrained_path = os.path.join('imagenet', 'results', 'imagenet.arch=mobilenet.lr=0.1.bs=256', 'model_best.pth.tar') 662 | checkpoint = torch.load(pretrained_path,map_location=torch.device('cpu')) 663 | state_dict = checkpoint['state_dict'] 664 | 665 | from collections import OrderedDict 666 | new_state_dict = OrderedDict() 667 | for k, v in state_dict.items(): 668 | name = k[7:] # remove `module.` 669 | new_state_dict[name] = v 670 | mobilenet.load_state_dict(new_state_dict) 671 | else: 672 | mobilenet.apply(weights_init) 673 | 674 | for i in range(14): 675 | setattr( self, 'conv{}'.format(i), mobilenet.model[i]) 676 | 677 | kernel_size = 5 678 | # self.decode_conv1 = conv(1024, 512, kernel_size) 679 | # self.decode_conv2 = conv(512, 256, kernel_size) 680 | # self.decode_conv3 = conv(256, 128, kernel_size) 681 | # self.decode_conv4 = conv(128, 64, kernel_size) 682 | # self.decode_conv5 = conv(64, 32, kernel_size) 683 | self.decode_conv1 = nn.Sequential( 684 | depthwise(1024, kernel_size), 685 | pointwise(1024, 512)) 686 | self.decode_conv2 = nn.Sequential( 687 | depthwise(512, kernel_size), 688 | pointwise(512, 256)) 689 | self.decode_conv3 = nn.Sequential( 690 | depthwise(256, kernel_size), 691 | pointwise(256, 128)) 692 | self.decode_conv4 = nn.Sequential( 693 | depthwise(128, kernel_size), 694 | pointwise(128, 64)) 695 | self.decode_conv5 = nn.Sequential( 696 | depthwise(64, kernel_size), 697 | pointwise(64, 32)) 698 | self.decode_conv6 = pointwise(32, 1) 699 | weights_init(self.decode_conv1) 700 | weights_init(self.decode_conv2) 701 | weights_init(self.decode_conv3) 702 | weights_init(self.decode_conv4) 703 | weights_init(self.decode_conv5) 704 | weights_init(self.decode_conv6) 705 | 706 | def forward(self, x): 707 | # skip connections: dec4: enc1 708 | # dec 3: enc2 or enc3 709 | # dec 2: enc4 or enc5 710 | for i in range(14): 711 | layer = getattr(self, 'conv{}'.format(i)) 712 | x = layer(x) 713 | # print("{}: {}".format(i, x.size())) 714 | if i==1: 715 | x1 = x 716 | elif i==3: 717 | x2 = x 718 | elif i==5: 719 | x3 = x 720 | for i in range(1,6): 721 | layer = getattr(self, 'decode_conv{}'.format(i)) 722 | x = layer(x) 723 | x = F.interpolate(x, scale_factor=2, mode='nearest') 724 | if i==4: 725 | x = x + x1 726 | elif i==3: 727 | x = x + x2 728 | elif i==2: 729 | x = x + x3 730 | # print("{}: {}".format(i, x.size())) 731 | x = self.decode_conv6(x) 732 | return x 733 | 734 | class MobileNetSkipConcat(nn.Module): 735 | def __init__(self, output_size, pretrained=True): 736 | 737 | super(MobileNetSkipConcat, self).__init__() 738 | self.output_size = output_size 739 | mobilenet = imagenet.mobilenet.MobileNet() 740 | if pretrained: 741 | pretrained_path = os.path.join('imagenet', 'results', 'imagenet.arch=mobilenet.lr=0.1.bs=256', 'model_best.pth.tar') 742 | # pretrained_path = os.path.join('mobilenet-nnconv5dw-skipadd-pruned.pth.tar') 743 | checkpoint = torch.load(pretrained_path) 744 | state_dict = checkpoint['state_dict'] 745 | 746 | from collections import OrderedDict 747 | new_state_dict = OrderedDict() 748 | for k, v in state_dict.items(): 749 | name = k[7:] # remove `module.` 750 | new_state_dict[name] = v 751 | mobilenet.load_state_dict(new_state_dict) 752 | else: 753 | mobilenet.apply(weights_init) 754 | 755 | for i in range(14): 756 | setattr( self, 'conv{}'.format(i), mobilenet.model[i]) 757 | 758 | kernel_size = 5 759 | # self.decode_conv1 = conv(1024, 512, kernel_size) 760 | # self.decode_conv2 = conv(512, 256, kernel_size) 761 | # self.decode_conv3 = conv(256, 128, kernel_size) 762 | # self.decode_conv4 = conv(128, 64, kernel_size) 763 | # self.decode_conv5 = conv(64, 32, kernel_size) 764 | self.decode_conv1 = nn.Sequential( 765 | depthwise(1024, kernel_size), 766 | pointwise(1024, 512)) 767 | self.decode_conv2 = nn.Sequential( 768 | depthwise(512, kernel_size), 769 | pointwise(512, 256)) 770 | self.decode_conv3 = nn.Sequential( 771 | depthwise(512, kernel_size), 772 | pointwise(512, 128)) 773 | self.decode_conv4 = nn.Sequential( 774 | depthwise(256, kernel_size), 775 | pointwise(256, 64)) 776 | self.decode_conv5 = nn.Sequential( 777 | depthwise(128, kernel_size), 778 | pointwise(128, 32)) 779 | self.decode_conv6 = pointwise(32, 1) 780 | weights_init(self.decode_conv1) 781 | weights_init(self.decode_conv2) 782 | weights_init(self.decode_conv3) 783 | weights_init(self.decode_conv4) 784 | weights_init(self.decode_conv5) 785 | weights_init(self.decode_conv6) 786 | 787 | def forward(self, x): 788 | # skip connections: dec4: enc1 789 | # dec 3: enc2 or enc3 790 | # dec 2: enc4 or enc5 791 | for i in range(14): 792 | layer = getattr(self, 'conv{}'.format(i)) 793 | x = layer(x) 794 | # print("{}: {}".format(i, x.size())) 795 | if i==1: 796 | x1 = x 797 | elif i==3: 798 | x2 = x 799 | elif i==5: 800 | x3 = x 801 | for i in range(1,6): 802 | layer = getattr(self, 'decode_conv{}'.format(i)) 803 | # print("{}a: {}".format(i, x.size())) 804 | x = layer(x) 805 | # print("{}b: {}".format(i, x.size())) 806 | x = F.interpolate(x, scale_factor=2, mode='nearest') 807 | if i==4: 808 | x = torch.cat((x, x1), 1) 809 | elif i==3: 810 | x = torch.cat((x, x2), 1) 811 | elif i==2: 812 | x = torch.cat((x, x3), 1) 813 | # print("{}c: {}".format(i, x.size())) 814 | x = self.decode_conv6(x) 815 | return x 816 | -------------------------------------------------------------------------------- /ConversionScripts/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JustinFincher/FastDepth-CoreML/65d643dfd31000d4f505857071e59ab211a5f0cf/ConversionScripts/test.png -------------------------------------------------------------------------------- /ConversionScripts/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import onnxruntime 4 | import numpy as np 5 | import PIL 6 | import PIL.Image 7 | from skimage.transform import resize 8 | from skimage import io 9 | 10 | onnx_fast_model_path = os.path.join('fastestdepth.onnx') 11 | # onnx_faster_model = onnx.load(onnx_fast_model_path) 12 | 13 | 14 | rgba_image = PIL.Image.open("./test.png") 15 | rgb_image = rgba_image.convert('RGB') 16 | rgb_image.save('./test_converted.jpg') 17 | 18 | img = io.imread("./test.png") 19 | img = np.rollaxis(img, 2, 0) 20 | img224 = resize(img / 255, (3, 224, 224), anti_aliasing=True) 21 | ximg = img224[np.newaxis, :, :, :] 22 | ximg = ximg.astype(np.float32) 23 | sess = onnxruntime.InferenceSession(onnx_fast_model_path) 24 | 25 | input_name = sess.get_inputs()[0].name 26 | label_name = sess.get_outputs()[0].name 27 | result = sess.run(None, {input_name: ximg}) 28 | prob = result[0] -------------------------------------------------------------------------------- /ConversionScripts/test_converted.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JustinFincher/FastDepth-CoreML/65d643dfd31000d4f505857071e59ab211a5f0cf/ConversionScripts/test_converted.jpg -------------------------------------------------------------------------------- /ConversionScripts/test_out.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JustinFincher/FastDepth-CoreML/65d643dfd31000d4f505857071e59ab211a5f0cf/ConversionScripts/test_out.jpg -------------------------------------------------------------------------------- /ConversionScripts/transfer.py: -------------------------------------------------------------------------------- 1 | import coremltools 2 | import scipy 3 | import torch 4 | import torch.onnx 5 | import os 6 | import torchvision 7 | import logging 8 | 9 | from coremltools.models.neural_network import flexible_shape_utils 10 | 11 | from models import MobileNetSkipAdd 12 | import onnx 13 | from onnx import version_converter 14 | import onnx_coreml 15 | import onnx.utils 16 | import torch.jit 17 | import PIL 18 | 19 | INT_MAX = 2 ** 30 20 | 21 | onnx_model_path = os.path.join('fastdepth.onnx') 22 | onnx_fast_model_path = os.path.join('fastestdepth.onnx') 23 | coreml_model_path = os.path.join('FastDepth.mlmodel') 24 | 25 | 26 | def _convert_upsample(builder, node, graph, err): 27 | if 'scales' in node.attrs: 28 | scales = node.attrs['scales'] 29 | elif len(node.input_tensors): 30 | scales = node.input_tensors[node.inputs[1]] 31 | else: 32 | # HACK: Manual scales 33 | # PROVIDE MANUAL SCALE HERE 34 | scales = [1, 1, 0.5, 0.5] 35 | 36 | scale_h = scales[2] 37 | scale_w = scales[3] 38 | input_shape = graph.shape_dict[node.inputs[0]] 39 | target_height = int(input_shape[-2] * scale_h) 40 | target_width = int(input_shape[-1] * scale_w) 41 | 42 | builder.add_resize_bilinear( 43 | name=node.name, 44 | input_name=node.inputs[0], 45 | output_name=node.outputs[0], 46 | target_height=target_height, 47 | target_width=target_width, 48 | mode='UPSAMPLE_MODE' 49 | ) 50 | 51 | 52 | def _convert_slice_v9(builder, node, graph, err): 53 | ''' 54 | convert to CoreML Slice Static Layer: 55 | https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L5082 56 | ''' 57 | logging.warn(graph.shape_dict) 58 | 59 | data_shape = graph.shape_dict[node.inputs[0]] 60 | len_of_data = len(data_shape) 61 | begin_masks = [True] * len_of_data 62 | end_masks = [True] * len_of_data 63 | 64 | default_axes = list(range(len_of_data)) 65 | default_steps = [1] * len_of_data 66 | 67 | ip_starts = node.attrs.get('starts') 68 | ip_ends = node.attrs.get('ends') 69 | axes = node.attrs.get('axes', default_axes) 70 | steps = node.attrs.get('steps', default_steps) 71 | 72 | starts = [0] * len_of_data 73 | ends = [0] * len_of_data 74 | 75 | for i in range(len(axes)): 76 | current_axes = axes[i] 77 | starts[current_axes] = ip_starts[i] 78 | ends[current_axes] = ip_ends[i] 79 | if ends[current_axes] != INT_MAX or ends[current_axes] < data_shape[current_axes]: 80 | end_masks[current_axes] = False 81 | 82 | if starts[current_axes] != 0: 83 | begin_masks[current_axes] = False 84 | 85 | builder.add_slice_static( 86 | name=node.name, 87 | input_name=node.inputs[0], 88 | output_name=node.outputs[0], 89 | begin_ids=starts, 90 | end_ids=ends, 91 | strides=steps, 92 | begin_masks=begin_masks, 93 | end_masks=end_masks 94 | ) 95 | 96 | 97 | # model = MobileNetSkipAdd((224, 224)) 98 | # model.eval() 99 | 100 | checkpoint = torch.load(os.path.join('mobilenet-nnconv5dw-skipadd-pruned.pth.tar'),map_location=torch.device('cpu')) 101 | start_epoch = checkpoint['epoch'] 102 | best_result = checkpoint['best_result'] 103 | model = checkpoint['model'] 104 | model.eval() 105 | print("=> loaded best model (epoch {})".format(checkpoint['epoch'])) 106 | 107 | batch_size = 1 108 | dummy_input = torch.randn((1, 3, 224, 224)) 109 | 110 | traced_script_module = torch.jit.trace(model, dummy_input) 111 | traced_script_module.save("fastdepth.pt") 112 | 113 | torch_out = torch.onnx.export( 114 | model=model, 115 | args=dummy_input, 116 | f=onnx_model_path, 117 | opset_version=9, 118 | verbose=True, 119 | do_constant_folding=False, 120 | input_names=['data'], 121 | output_names=['decode_conv6/2'], 122 | export_params=True, 123 | training=False, 124 | operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK 125 | ) 126 | 127 | onnx_model = onnx.load(onnx_model_path) 128 | # onnx_model = version_converter.convert_version(onnx_model, target_version = 11) 129 | # polished_model = onnx.utils.polish_model(onnx_model) 130 | # print('The model is:\n{}'.format(onnx_model)) 131 | # onnx.checker.check_model(polished_model) 132 | # print('The model is checked!') 133 | # logging.warn(onnx_model.graph.shape_dict) 134 | os.system("python3 -m onnxsim fastdepth.onnx fastestdepth.onnx") 135 | onnx_faster_model = onnx.load(onnx_fast_model_path) 136 | cml_model = onnx_coreml.convert( 137 | onnx_faster_model, 138 | preprocessing_args={'is_bgr': True, 'image_scale': 1.0/255.0}, 139 | image_input_names=['data'], 140 | # image_output_names=['decode_conv6/2'], 141 | custom_conversion_functions={ 142 | # "Slice": _convert_slice_v9, 143 | 'Upsample': _convert_upsample 144 | }, 145 | # disable_coreml_rank5_mapping=True 146 | target_ios='13' 147 | ) 148 | cml_model.save(coreml_model_path) 149 | 150 | # spec = coremltools.utils.load_spec(coreml_model_path) 151 | # img_size_ranges = flexible_shape_utils.NeuralNetworkImageSizeRange() 152 | # img_size_ranges.add_height_range((64, -1)) 153 | # img_size_ranges.add_width_range((64, -1)) 154 | # flexible_shape_utils.update_image_size_range(spec, feature_name='data', size_range=img_size_ranges) 155 | # coremltools.models.utils.save_spec(spec, coreml_model_path) 156 | 157 | exported_model = coremltools.models.MLModel(coreml_model_path) 158 | exported_model.author = 'Fincher' 159 | exported_model.short_description = 'Fast Depth' 160 | exported_model.save(coreml_model_path) 161 | 162 | rgba_image = PIL.Image.open("./test.png") 163 | rgb_image = rgba_image.convert('RGB') 164 | rgb_image.save('./test_converted.jpg') 165 | predictions = exported_model.predict({'data': rgb_image}) 166 | print(predictions['decode_conv6/2'][0][0]) 167 | scipy.misc.toimage(predictions['decode_conv6/2'][0][0]).save('test_out.jpg') 168 | 169 | 170 | spec = coremltools.utils.load_spec(coreml_model_path) 171 | spec.description.input[0].type.imageType.colorSpace = coremltools.proto.FeatureTypes_pb2.ImageFeatureType.RGB 172 | coremltools.utils.save_spec(spec, coreml_model_path) 173 | -------------------------------------------------------------------------------- /FastDepth-CoreML.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 50; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | 0D9A36572367EFD6000FDDB4 /* Array+Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0D9A364A2367EFD5000FDDB4 /* Array+Extensions.swift */; }; 11 | 0D9A36582367EFD6000FDDB4 /* Math.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0D9A364B2367EFD5000FDDB4 /* Math.swift */; }; 12 | 0D9A36592367EFD6000FDDB4 /* UIImage+CVPixelBuffer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0D9A364C2367EFD5000FDDB4 /* UIImage+CVPixelBuffer.swift */; }; 13 | 0D9A365A2367EFD6000FDDB4 /* Predictions.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0D9A364D2367EFD5000FDDB4 /* Predictions.swift */; }; 14 | 0D9A365B2367EFD6000FDDB4 /* UIImage+Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0D9A364E2367EFD5000FDDB4 /* UIImage+Extensions.swift */; }; 15 | 0D9A365C2367EFD6000FDDB4 /* CGImagePropertyOrientation.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0D9A364F2367EFD5000FDDB4 /* CGImagePropertyOrientation.swift */; }; 16 | 0D9A365D2367EFD6000FDDB4 /* UIImage+RawBytes.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0D9A36502367EFD5000FDDB4 /* UIImage+RawBytes.swift */; }; 17 | 0D9A365E2367EFD6000FDDB4 /* MLMultiArray+Helpers.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0D9A36512367EFD5000FDDB4 /* MLMultiArray+Helpers.swift */; }; 18 | 0D9A365F2367EFD6000FDDB4 /* CVPixelBuffer+Helpers.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0D9A36522367EFD5000FDDB4 /* CVPixelBuffer+Helpers.swift */; }; 19 | 0D9A36602367EFD6000FDDB4 /* CGImage+CVPixelBuffer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0D9A36532367EFD5000FDDB4 /* CGImage+CVPixelBuffer.swift */; }; 20 | 0D9A36612367EFD6000FDDB4 /* NonMaxSuppression.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0D9A36542367EFD5000FDDB4 /* NonMaxSuppression.swift */; }; 21 | 0D9A36622367EFD6000FDDB4 /* CGImage+RawBytes.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0D9A36552367EFD5000FDDB4 /* CGImage+RawBytes.swift */; }; 22 | 0D9A36632367EFD6000FDDB4 /* MLMultiArray+Image.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0D9A36562367EFD5000FDDB4 /* MLMultiArray+Image.swift */; }; 23 | 0D9A366523683070000FDDB4 /* FastDepth.mlmodel in Sources */ = {isa = PBXBuildFile; fileRef = 0D9A366423683070000FDDB4 /* FastDepth.mlmodel */; }; 24 | 71BBE01722E2D2EB00E74F11 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 71BBE01622E2D2EB00E74F11 /* AppDelegate.swift */; }; 25 | 71BBE01C22E2D2EB00E74F11 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 71BBE01A22E2D2EB00E74F11 /* Main.storyboard */; }; 26 | 71BBE01E22E2D2EC00E74F11 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 71BBE01D22E2D2EC00E74F11 /* Assets.xcassets */; }; 27 | 71BBE02122E2D2EC00E74F11 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 71BBE01F22E2D2EC00E74F11 /* LaunchScreen.storyboard */; }; 28 | 71BBE03222E2E9F200E74F11 /* LiveImageViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 71BBE03122E2E9F200E74F11 /* LiveImageViewController.swift */; }; 29 | 71BBE03422E2EB8700E74F11 /* Measure.swift in Sources */ = {isa = PBXBuildFile; fileRef = 71BBE03322E2EB8700E74F11 /* Measure.swift */; }; 30 | 71BBE03622E2EC1400E74F11 /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = 71BBE03522E2EC1400E74F11 /* VideoCapture.swift */; }; 31 | /* End PBXBuildFile section */ 32 | 33 | /* Begin PBXFileReference section */ 34 | 0D9A364A2367EFD5000FDDB4 /* Array+Extensions.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "Array+Extensions.swift"; sourceTree = ""; }; 35 | 0D9A364B2367EFD5000FDDB4 /* Math.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Math.swift; sourceTree = ""; }; 36 | 0D9A364C2367EFD5000FDDB4 /* UIImage+CVPixelBuffer.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "UIImage+CVPixelBuffer.swift"; sourceTree = ""; }; 37 | 0D9A364D2367EFD5000FDDB4 /* Predictions.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Predictions.swift; sourceTree = ""; }; 38 | 0D9A364E2367EFD5000FDDB4 /* UIImage+Extensions.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "UIImage+Extensions.swift"; sourceTree = ""; }; 39 | 0D9A364F2367EFD5000FDDB4 /* CGImagePropertyOrientation.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = CGImagePropertyOrientation.swift; sourceTree = ""; }; 40 | 0D9A36502367EFD5000FDDB4 /* UIImage+RawBytes.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "UIImage+RawBytes.swift"; sourceTree = ""; }; 41 | 0D9A36512367EFD5000FDDB4 /* MLMultiArray+Helpers.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "MLMultiArray+Helpers.swift"; sourceTree = ""; }; 42 | 0D9A36522367EFD5000FDDB4 /* CVPixelBuffer+Helpers.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "CVPixelBuffer+Helpers.swift"; sourceTree = ""; }; 43 | 0D9A36532367EFD5000FDDB4 /* CGImage+CVPixelBuffer.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "CGImage+CVPixelBuffer.swift"; sourceTree = ""; }; 44 | 0D9A36542367EFD5000FDDB4 /* NonMaxSuppression.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = NonMaxSuppression.swift; sourceTree = ""; }; 45 | 0D9A36552367EFD5000FDDB4 /* CGImage+RawBytes.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "CGImage+RawBytes.swift"; sourceTree = ""; }; 46 | 0D9A36562367EFD5000FDDB4 /* MLMultiArray+Image.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "MLMultiArray+Image.swift"; sourceTree = ""; }; 47 | 0D9A366423683070000FDDB4 /* FastDepth.mlmodel */ = {isa = PBXFileReference; lastKnownFileType = file.mlmodel; path = FastDepth.mlmodel; sourceTree = ""; }; 48 | 0D9B23F523A764B000824853 /* FastDepth-CoreML.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = "FastDepth-CoreML.entitlements"; sourceTree = ""; }; 49 | 71BBE01322E2D2EB00E74F11 /* FastDepth-CoreML.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "FastDepth-CoreML.app"; sourceTree = BUILT_PRODUCTS_DIR; }; 50 | 71BBE01622E2D2EB00E74F11 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 51 | 71BBE01B22E2D2EB00E74F11 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = ""; }; 52 | 71BBE01D22E2D2EC00E74F11 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; 53 | 71BBE02022E2D2EC00E74F11 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = ""; }; 54 | 71BBE02222E2D2EC00E74F11 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 55 | 71BBE03122E2E9F200E74F11 /* LiveImageViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LiveImageViewController.swift; sourceTree = ""; }; 56 | 71BBE03322E2EB8700E74F11 /* Measure.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Measure.swift; sourceTree = ""; }; 57 | 71BBE03522E2EC1400E74F11 /* VideoCapture.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = ""; }; 58 | /* End PBXFileReference section */ 59 | 60 | /* Begin PBXFrameworksBuildPhase section */ 61 | 71BBE01022E2D2EB00E74F11 /* Frameworks */ = { 62 | isa = PBXFrameworksBuildPhase; 63 | buildActionMask = 2147483647; 64 | files = ( 65 | ); 66 | runOnlyForDeploymentPostprocessing = 0; 67 | }; 68 | /* End PBXFrameworksBuildPhase section */ 69 | 70 | /* Begin PBXGroup section */ 71 | 0D7B4AC623648DB1008CE519 /* mlmodel */ = { 72 | isa = PBXGroup; 73 | children = ( 74 | 0D9A366423683070000FDDB4 /* FastDepth.mlmodel */, 75 | ); 76 | path = mlmodel; 77 | sourceTree = ""; 78 | }; 79 | 0D9A36492367EFD5000FDDB4 /* CoreMLHelpers */ = { 80 | isa = PBXGroup; 81 | children = ( 82 | 0D9A364A2367EFD5000FDDB4 /* Array+Extensions.swift */, 83 | 0D9A364B2367EFD5000FDDB4 /* Math.swift */, 84 | 0D9A364C2367EFD5000FDDB4 /* UIImage+CVPixelBuffer.swift */, 85 | 0D9A364D2367EFD5000FDDB4 /* Predictions.swift */, 86 | 0D9A364E2367EFD5000FDDB4 /* UIImage+Extensions.swift */, 87 | 0D9A364F2367EFD5000FDDB4 /* CGImagePropertyOrientation.swift */, 88 | 0D9A36502367EFD5000FDDB4 /* UIImage+RawBytes.swift */, 89 | 0D9A36512367EFD5000FDDB4 /* MLMultiArray+Helpers.swift */, 90 | 0D9A36522367EFD5000FDDB4 /* CVPixelBuffer+Helpers.swift */, 91 | 0D9A36532367EFD5000FDDB4 /* CGImage+CVPixelBuffer.swift */, 92 | 0D9A36542367EFD5000FDDB4 /* NonMaxSuppression.swift */, 93 | 0D9A36552367EFD5000FDDB4 /* CGImage+RawBytes.swift */, 94 | 0D9A36562367EFD5000FDDB4 /* MLMultiArray+Image.swift */, 95 | ); 96 | path = CoreMLHelpers; 97 | sourceTree = ""; 98 | }; 99 | 71BBE00A22E2D2EB00E74F11 = { 100 | isa = PBXGroup; 101 | children = ( 102 | 71BBE01522E2D2EB00E74F11 /* FastDepth-CoreML */, 103 | 71BBE01422E2D2EB00E74F11 /* Products */, 104 | ); 105 | sourceTree = ""; 106 | }; 107 | 71BBE01422E2D2EB00E74F11 /* Products */ = { 108 | isa = PBXGroup; 109 | children = ( 110 | 71BBE01322E2D2EB00E74F11 /* FastDepth-CoreML.app */, 111 | ); 112 | name = Products; 113 | sourceTree = ""; 114 | }; 115 | 71BBE01522E2D2EB00E74F11 /* FastDepth-CoreML */ = { 116 | isa = PBXGroup; 117 | children = ( 118 | 0D9B23F523A764B000824853 /* FastDepth-CoreML.entitlements */, 119 | 0D9A36492367EFD5000FDDB4 /* CoreMLHelpers */, 120 | 71BBE01A22E2D2EB00E74F11 /* Main.storyboard */, 121 | 71BBE01622E2D2EB00E74F11 /* AppDelegate.swift */, 122 | 71BBE03122E2E9F200E74F11 /* LiveImageViewController.swift */, 123 | 71BBE03522E2EC1400E74F11 /* VideoCapture.swift */, 124 | 71BBE03322E2EB8700E74F11 /* Measure.swift */, 125 | 0D7B4AC623648DB1008CE519 /* mlmodel */, 126 | 71BBE01D22E2D2EC00E74F11 /* Assets.xcassets */, 127 | 71BBE01F22E2D2EC00E74F11 /* LaunchScreen.storyboard */, 128 | 71BBE02222E2D2EC00E74F11 /* Info.plist */, 129 | ); 130 | path = "FastDepth-CoreML"; 131 | sourceTree = ""; 132 | }; 133 | /* End PBXGroup section */ 134 | 135 | /* Begin PBXNativeTarget section */ 136 | 71BBE01222E2D2EB00E74F11 /* FastDepth-CoreML */ = { 137 | isa = PBXNativeTarget; 138 | buildConfigurationList = 71BBE02522E2D2EC00E74F11 /* Build configuration list for PBXNativeTarget "FastDepth-CoreML" */; 139 | buildPhases = ( 140 | 71BBE00F22E2D2EB00E74F11 /* Sources */, 141 | 71BBE01022E2D2EB00E74F11 /* Frameworks */, 142 | 71BBE01122E2D2EB00E74F11 /* Resources */, 143 | ); 144 | buildRules = ( 145 | ); 146 | dependencies = ( 147 | ); 148 | name = "FastDepth-CoreML"; 149 | productName = "DepthPrediction-CoreML"; 150 | productReference = 71BBE01322E2D2EB00E74F11 /* FastDepth-CoreML.app */; 151 | productType = "com.apple.product-type.application"; 152 | }; 153 | /* End PBXNativeTarget section */ 154 | 155 | /* Begin PBXProject section */ 156 | 71BBE00B22E2D2EB00E74F11 /* Project object */ = { 157 | isa = PBXProject; 158 | attributes = { 159 | LastSwiftUpdateCheck = 1020; 160 | LastUpgradeCheck = 1020; 161 | ORGANIZATIONNAME = "Doyoung Gwak"; 162 | TargetAttributes = { 163 | 71BBE01222E2D2EB00E74F11 = { 164 | CreatedOnToolsVersion = 10.2.1; 165 | }; 166 | }; 167 | }; 168 | buildConfigurationList = 71BBE00E22E2D2EB00E74F11 /* Build configuration list for PBXProject "FastDepth-CoreML" */; 169 | compatibilityVersion = "Xcode 9.3"; 170 | developmentRegion = en; 171 | hasScannedForEncodings = 0; 172 | knownRegions = ( 173 | en, 174 | Base, 175 | ); 176 | mainGroup = 71BBE00A22E2D2EB00E74F11; 177 | productRefGroup = 71BBE01422E2D2EB00E74F11 /* Products */; 178 | projectDirPath = ""; 179 | projectRoot = ""; 180 | targets = ( 181 | 71BBE01222E2D2EB00E74F11 /* FastDepth-CoreML */, 182 | ); 183 | }; 184 | /* End PBXProject section */ 185 | 186 | /* Begin PBXResourcesBuildPhase section */ 187 | 71BBE01122E2D2EB00E74F11 /* Resources */ = { 188 | isa = PBXResourcesBuildPhase; 189 | buildActionMask = 2147483647; 190 | files = ( 191 | 71BBE02122E2D2EC00E74F11 /* LaunchScreen.storyboard in Resources */, 192 | 71BBE01E22E2D2EC00E74F11 /* Assets.xcassets in Resources */, 193 | 71BBE01C22E2D2EB00E74F11 /* Main.storyboard in Resources */, 194 | ); 195 | runOnlyForDeploymentPostprocessing = 0; 196 | }; 197 | /* End PBXResourcesBuildPhase section */ 198 | 199 | /* Begin PBXSourcesBuildPhase section */ 200 | 71BBE00F22E2D2EB00E74F11 /* Sources */ = { 201 | isa = PBXSourcesBuildPhase; 202 | buildActionMask = 2147483647; 203 | files = ( 204 | 0D9A36612367EFD6000FDDB4 /* NonMaxSuppression.swift in Sources */, 205 | 0D9A36592367EFD6000FDDB4 /* UIImage+CVPixelBuffer.swift in Sources */, 206 | 0D9A365C2367EFD6000FDDB4 /* CGImagePropertyOrientation.swift in Sources */, 207 | 0D9A365A2367EFD6000FDDB4 /* Predictions.swift in Sources */, 208 | 0D9A366523683070000FDDB4 /* FastDepth.mlmodel in Sources */, 209 | 71BBE03422E2EB8700E74F11 /* Measure.swift in Sources */, 210 | 0D9A36572367EFD6000FDDB4 /* Array+Extensions.swift in Sources */, 211 | 71BBE03222E2E9F200E74F11 /* LiveImageViewController.swift in Sources */, 212 | 71BBE03622E2EC1400E74F11 /* VideoCapture.swift in Sources */, 213 | 0D9A36622367EFD6000FDDB4 /* CGImage+RawBytes.swift in Sources */, 214 | 0D9A36632367EFD6000FDDB4 /* MLMultiArray+Image.swift in Sources */, 215 | 0D9A365D2367EFD6000FDDB4 /* UIImage+RawBytes.swift in Sources */, 216 | 0D9A365B2367EFD6000FDDB4 /* UIImage+Extensions.swift in Sources */, 217 | 0D9A36602367EFD6000FDDB4 /* CGImage+CVPixelBuffer.swift in Sources */, 218 | 71BBE01722E2D2EB00E74F11 /* AppDelegate.swift in Sources */, 219 | 0D9A365E2367EFD6000FDDB4 /* MLMultiArray+Helpers.swift in Sources */, 220 | 0D9A365F2367EFD6000FDDB4 /* CVPixelBuffer+Helpers.swift in Sources */, 221 | 0D9A36582367EFD6000FDDB4 /* Math.swift in Sources */, 222 | ); 223 | runOnlyForDeploymentPostprocessing = 0; 224 | }; 225 | /* End PBXSourcesBuildPhase section */ 226 | 227 | /* Begin PBXVariantGroup section */ 228 | 71BBE01A22E2D2EB00E74F11 /* Main.storyboard */ = { 229 | isa = PBXVariantGroup; 230 | children = ( 231 | 71BBE01B22E2D2EB00E74F11 /* Base */, 232 | ); 233 | name = Main.storyboard; 234 | sourceTree = ""; 235 | }; 236 | 71BBE01F22E2D2EC00E74F11 /* LaunchScreen.storyboard */ = { 237 | isa = PBXVariantGroup; 238 | children = ( 239 | 71BBE02022E2D2EC00E74F11 /* Base */, 240 | ); 241 | name = LaunchScreen.storyboard; 242 | sourceTree = ""; 243 | }; 244 | /* End PBXVariantGroup section */ 245 | 246 | /* Begin XCBuildConfiguration section */ 247 | 71BBE02322E2D2EC00E74F11 /* Debug */ = { 248 | isa = XCBuildConfiguration; 249 | buildSettings = { 250 | ALWAYS_SEARCH_USER_PATHS = NO; 251 | CLANG_ANALYZER_NONNULL = YES; 252 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 253 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; 254 | CLANG_CXX_LIBRARY = "libc++"; 255 | CLANG_ENABLE_MODULES = YES; 256 | CLANG_ENABLE_OBJC_ARC = YES; 257 | CLANG_ENABLE_OBJC_WEAK = YES; 258 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 259 | CLANG_WARN_BOOL_CONVERSION = YES; 260 | CLANG_WARN_COMMA = YES; 261 | CLANG_WARN_CONSTANT_CONVERSION = YES; 262 | CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; 263 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 264 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 265 | CLANG_WARN_EMPTY_BODY = YES; 266 | CLANG_WARN_ENUM_CONVERSION = YES; 267 | CLANG_WARN_INFINITE_RECURSION = YES; 268 | CLANG_WARN_INT_CONVERSION = YES; 269 | CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; 270 | CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; 271 | CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; 272 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 273 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 274 | CLANG_WARN_STRICT_PROTOTYPES = YES; 275 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 276 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 277 | CLANG_WARN_UNREACHABLE_CODE = YES; 278 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 279 | CODE_SIGN_IDENTITY = "iPhone Developer"; 280 | COPY_PHASE_STRIP = NO; 281 | DEBUG_INFORMATION_FORMAT = dwarf; 282 | ENABLE_STRICT_OBJC_MSGSEND = YES; 283 | ENABLE_TESTABILITY = YES; 284 | GCC_C_LANGUAGE_STANDARD = gnu11; 285 | GCC_DYNAMIC_NO_PIC = NO; 286 | GCC_NO_COMMON_BLOCKS = YES; 287 | GCC_OPTIMIZATION_LEVEL = 0; 288 | GCC_PREPROCESSOR_DEFINITIONS = ( 289 | "DEBUG=1", 290 | "$(inherited)", 291 | ); 292 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 293 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 294 | GCC_WARN_UNDECLARED_SELECTOR = YES; 295 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 296 | GCC_WARN_UNUSED_FUNCTION = YES; 297 | GCC_WARN_UNUSED_VARIABLE = YES; 298 | IPHONEOS_DEPLOYMENT_TARGET = 13.0; 299 | MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; 300 | MTL_FAST_MATH = YES; 301 | ONLY_ACTIVE_ARCH = YES; 302 | SDKROOT = iphoneos; 303 | SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; 304 | SWIFT_OPTIMIZATION_LEVEL = "-Onone"; 305 | }; 306 | name = Debug; 307 | }; 308 | 71BBE02422E2D2EC00E74F11 /* Release */ = { 309 | isa = XCBuildConfiguration; 310 | buildSettings = { 311 | ALWAYS_SEARCH_USER_PATHS = NO; 312 | CLANG_ANALYZER_NONNULL = YES; 313 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 314 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; 315 | CLANG_CXX_LIBRARY = "libc++"; 316 | CLANG_ENABLE_MODULES = YES; 317 | CLANG_ENABLE_OBJC_ARC = YES; 318 | CLANG_ENABLE_OBJC_WEAK = YES; 319 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 320 | CLANG_WARN_BOOL_CONVERSION = YES; 321 | CLANG_WARN_COMMA = YES; 322 | CLANG_WARN_CONSTANT_CONVERSION = YES; 323 | CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; 324 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 325 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 326 | CLANG_WARN_EMPTY_BODY = YES; 327 | CLANG_WARN_ENUM_CONVERSION = YES; 328 | CLANG_WARN_INFINITE_RECURSION = YES; 329 | CLANG_WARN_INT_CONVERSION = YES; 330 | CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; 331 | CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; 332 | CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; 333 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 334 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 335 | CLANG_WARN_STRICT_PROTOTYPES = YES; 336 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 337 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 338 | CLANG_WARN_UNREACHABLE_CODE = YES; 339 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 340 | CODE_SIGN_IDENTITY = "iPhone Developer"; 341 | COPY_PHASE_STRIP = NO; 342 | DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; 343 | ENABLE_NS_ASSERTIONS = NO; 344 | ENABLE_STRICT_OBJC_MSGSEND = YES; 345 | GCC_C_LANGUAGE_STANDARD = gnu11; 346 | GCC_NO_COMMON_BLOCKS = YES; 347 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 348 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 349 | GCC_WARN_UNDECLARED_SELECTOR = YES; 350 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 351 | GCC_WARN_UNUSED_FUNCTION = YES; 352 | GCC_WARN_UNUSED_VARIABLE = YES; 353 | IPHONEOS_DEPLOYMENT_TARGET = 13.0; 354 | MTL_ENABLE_DEBUG_INFO = NO; 355 | MTL_FAST_MATH = YES; 356 | SDKROOT = iphoneos; 357 | SWIFT_COMPILATION_MODE = wholemodule; 358 | SWIFT_OPTIMIZATION_LEVEL = "-O"; 359 | VALIDATE_PRODUCT = YES; 360 | }; 361 | name = Release; 362 | }; 363 | 71BBE02622E2D2EC00E74F11 /* Debug */ = { 364 | isa = XCBuildConfiguration; 365 | buildSettings = { 366 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 367 | CODE_SIGN_ENTITLEMENTS = "FastDepth-CoreML/FastDepth-CoreML.entitlements"; 368 | CODE_SIGN_STYLE = Automatic; 369 | CURRENT_PROJECT_VERSION = 0; 370 | DERIVE_MACCATALYST_PRODUCT_BUNDLE_IDENTIFIER = YES; 371 | DEVELOPMENT_TEAM = 9QJ8M25J77; 372 | INFOPLIST_FILE = "FastDepth-CoreML/Info.plist"; 373 | IPHONEOS_DEPLOYMENT_TARGET = 13.0; 374 | LD_RUNPATH_SEARCH_PATHS = ( 375 | "$(inherited)", 376 | "@executable_path/Frameworks", 377 | ); 378 | MARKETING_VERSION = 0; 379 | PRODUCT_BUNDLE_IDENTIFIER = com.coreml.fastdepth; 380 | PRODUCT_NAME = "$(TARGET_NAME)"; 381 | SUPPORTS_MACCATALYST = YES; 382 | SWIFT_VERSION = 5.0; 383 | TARGETED_DEVICE_FAMILY = "1,2"; 384 | }; 385 | name = Debug; 386 | }; 387 | 71BBE02722E2D2EC00E74F11 /* Release */ = { 388 | isa = XCBuildConfiguration; 389 | buildSettings = { 390 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 391 | CODE_SIGN_ENTITLEMENTS = "FastDepth-CoreML/FastDepth-CoreML.entitlements"; 392 | CODE_SIGN_STYLE = Automatic; 393 | CURRENT_PROJECT_VERSION = 0; 394 | DERIVE_MACCATALYST_PRODUCT_BUNDLE_IDENTIFIER = YES; 395 | DEVELOPMENT_TEAM = 9QJ8M25J77; 396 | INFOPLIST_FILE = "FastDepth-CoreML/Info.plist"; 397 | IPHONEOS_DEPLOYMENT_TARGET = 13.0; 398 | LD_RUNPATH_SEARCH_PATHS = ( 399 | "$(inherited)", 400 | "@executable_path/Frameworks", 401 | ); 402 | MARKETING_VERSION = 0; 403 | PRODUCT_BUNDLE_IDENTIFIER = com.coreml.fastdepth; 404 | PRODUCT_NAME = "$(TARGET_NAME)"; 405 | SUPPORTS_MACCATALYST = YES; 406 | SWIFT_VERSION = 5.0; 407 | TARGETED_DEVICE_FAMILY = "1,2"; 408 | }; 409 | name = Release; 410 | }; 411 | /* End XCBuildConfiguration section */ 412 | 413 | /* Begin XCConfigurationList section */ 414 | 71BBE00E22E2D2EB00E74F11 /* Build configuration list for PBXProject "FastDepth-CoreML" */ = { 415 | isa = XCConfigurationList; 416 | buildConfigurations = ( 417 | 71BBE02322E2D2EC00E74F11 /* Debug */, 418 | 71BBE02422E2D2EC00E74F11 /* Release */, 419 | ); 420 | defaultConfigurationIsVisible = 0; 421 | defaultConfigurationName = Release; 422 | }; 423 | 71BBE02522E2D2EC00E74F11 /* Build configuration list for PBXNativeTarget "FastDepth-CoreML" */ = { 424 | isa = XCConfigurationList; 425 | buildConfigurations = ( 426 | 71BBE02622E2D2EC00E74F11 /* Debug */, 427 | 71BBE02722E2D2EC00E74F11 /* Release */, 428 | ); 429 | defaultConfigurationIsVisible = 0; 430 | defaultConfigurationName = Release; 431 | }; 432 | /* End XCConfigurationList section */ 433 | }; 434 | rootObject = 71BBE00B22E2D2EB00E74F11 /* Project object */; 435 | } 436 | -------------------------------------------------------------------------------- /FastDepth-CoreML.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /FastDepth-CoreML.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | IDEDidComputeMac32BitWarning 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /FastDepth-CoreML.xcodeproj/project.xcworkspace/xcuserdata/fincher.xcuserdatad/UserInterfaceState.xcuserstate: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JustinFincher/FastDepth-CoreML/65d643dfd31000d4f505857071e59ab211a5f0cf/FastDepth-CoreML.xcodeproj/project.xcworkspace/xcuserdata/fincher.xcuserdatad/UserInterfaceState.xcuserstate -------------------------------------------------------------------------------- /FastDepth-CoreML.xcodeproj/xcuserdata/fincher.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 9 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /FastDepth-CoreML.xcodeproj/xcuserdata/fincher.xcuserdatad/xcschemes/xcschememanagement.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SchemeUserState 6 | 7 | DepthPrediction-CoreML.xcscheme_^#shared#^_ 8 | 9 | orderHint 10 | 0 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /FastDepth-CoreML/AppDelegate.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AppDelegate.swift 3 | // DepthPrediction-CoreML 4 | // 5 | // Created by Doyoung Gwak on 20/07/2019. 6 | // Copyright © 2019 Doyoung Gwak. All rights reserved. 7 | // 8 | 9 | import UIKit 10 | 11 | @UIApplicationMain 12 | class AppDelegate: UIResponder, UIApplicationDelegate { 13 | 14 | var window: UIWindow? 15 | 16 | 17 | func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool { 18 | // Override point for customization after application launch. 19 | return true 20 | } 21 | 22 | func applicationWillResignActive(_ application: UIApplication) { 23 | // Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state. 24 | // Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game. 25 | } 26 | 27 | func applicationDidEnterBackground(_ application: UIApplication) { 28 | // Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later. 29 | // If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits. 30 | } 31 | 32 | func applicationWillEnterForeground(_ application: UIApplication) { 33 | // Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background. 34 | } 35 | 36 | func applicationDidBecomeActive(_ application: UIApplication) { 37 | // Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface. 38 | } 39 | 40 | func applicationWillTerminate(_ application: UIApplication) { 41 | // Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:. 42 | } 43 | 44 | 45 | } 46 | 47 | -------------------------------------------------------------------------------- /FastDepth-CoreML/Assets.xcassets/AppIcon.appiconset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "idiom" : "iphone", 5 | "size" : "20x20", 6 | "scale" : "2x" 7 | }, 8 | { 9 | "idiom" : "iphone", 10 | "size" : "20x20", 11 | "scale" : "3x" 12 | }, 13 | { 14 | "idiom" : "iphone", 15 | "size" : "29x29", 16 | "scale" : "2x" 17 | }, 18 | { 19 | "idiom" : "iphone", 20 | "size" : "29x29", 21 | "scale" : "3x" 22 | }, 23 | { 24 | "idiom" : "iphone", 25 | "size" : "40x40", 26 | "scale" : "2x" 27 | }, 28 | { 29 | "idiom" : "iphone", 30 | "size" : "40x40", 31 | "scale" : "3x" 32 | }, 33 | { 34 | "idiom" : "iphone", 35 | "size" : "60x60", 36 | "scale" : "2x" 37 | }, 38 | { 39 | "idiom" : "iphone", 40 | "size" : "60x60", 41 | "scale" : "3x" 42 | }, 43 | { 44 | "idiom" : "ipad", 45 | "size" : "20x20", 46 | "scale" : "1x" 47 | }, 48 | { 49 | "idiom" : "ipad", 50 | "size" : "20x20", 51 | "scale" : "2x" 52 | }, 53 | { 54 | "idiom" : "ipad", 55 | "size" : "29x29", 56 | "scale" : "1x" 57 | }, 58 | { 59 | "idiom" : "ipad", 60 | "size" : "29x29", 61 | "scale" : "2x" 62 | }, 63 | { 64 | "idiom" : "ipad", 65 | "size" : "40x40", 66 | "scale" : "1x" 67 | }, 68 | { 69 | "idiom" : "ipad", 70 | "size" : "40x40", 71 | "scale" : "2x" 72 | }, 73 | { 74 | "idiom" : "ipad", 75 | "size" : "76x76", 76 | "scale" : "1x" 77 | }, 78 | { 79 | "idiom" : "ipad", 80 | "size" : "76x76", 81 | "scale" : "2x" 82 | }, 83 | { 84 | "idiom" : "ipad", 85 | "size" : "83.5x83.5", 86 | "scale" : "2x" 87 | }, 88 | { 89 | "idiom" : "ios-marketing", 90 | "size" : "1024x1024", 91 | "scale" : "1x" 92 | } 93 | ], 94 | "info" : { 95 | "version" : 1, 96 | "author" : "xcode" 97 | } 98 | } -------------------------------------------------------------------------------- /FastDepth-CoreML/Assets.xcassets/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "info" : { 3 | "version" : 1, 4 | "author" : "xcode" 5 | } 6 | } -------------------------------------------------------------------------------- /FastDepth-CoreML/Base.lproj/LaunchScreen.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /FastDepth-CoreML/Base.lproj/Main.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 33 | 39 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /FastDepth-CoreML/CoreMLHelpers/Array+Extensions.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import Swift 24 | 25 | extension Array where Element: Comparable { 26 | /** 27 | Returns the index and value of the largest element in the array. 28 | 29 | - Note: This method is slow. For faster results, use the standalone 30 | version of argmax() instead. 31 | */ 32 | public func argmax() -> (Int, Element) { 33 | precondition(self.count > 0) 34 | var maxIndex = 0 35 | var maxValue = self[0] 36 | for i in 1.. maxValue { 37 | maxValue = self[i] 38 | maxIndex = i 39 | } 40 | return (maxIndex, maxValue) 41 | } 42 | 43 | /** 44 | Returns the indices of the array's elements in sorted order. 45 | */ 46 | public func argsort(by areInIncreasingOrder: (Element, Element) -> Bool) -> [Array.Index] { 47 | return self.indices.sorted { areInIncreasingOrder(self[$0], self[$1]) } 48 | } 49 | 50 | /** 51 | Returns a new array containing the elements at the specified indices. 52 | */ 53 | public func gather(indices: [Array.Index]) -> [Element] { 54 | return indices.map { self[$0] } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /FastDepth-CoreML/CoreMLHelpers/CGImage+CVPixelBuffer.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import CoreGraphics 24 | import CoreImage 25 | import VideoToolbox 26 | 27 | extension CGImage { 28 | /** 29 | Resizes the image to width x height and converts it to an RGB CVPixelBuffer. 30 | */ 31 | public func pixelBuffer(width: Int, height: Int, 32 | orientation: CGImagePropertyOrientation) -> CVPixelBuffer? { 33 | return pixelBuffer(width: width, height: height, 34 | pixelFormatType: kCVPixelFormatType_32ARGB, 35 | colorSpace: CGColorSpaceCreateDeviceRGB(), 36 | alphaInfo: .noneSkipFirst, 37 | orientation: orientation) 38 | } 39 | 40 | /** 41 | Resizes the image to width x height and converts it to a grayscale CVPixelBuffer. 42 | */ 43 | public func pixelBufferGray(width: Int, height: Int, 44 | orientation: CGImagePropertyOrientation) -> CVPixelBuffer? { 45 | return pixelBuffer(width: width, height: height, 46 | pixelFormatType: kCVPixelFormatType_OneComponent8, 47 | colorSpace: CGColorSpaceCreateDeviceGray(), 48 | alphaInfo: .none, 49 | orientation: orientation) 50 | } 51 | 52 | func pixelBuffer(width: Int, height: Int, pixelFormatType: OSType, 53 | colorSpace: CGColorSpace, alphaInfo: CGImageAlphaInfo, 54 | orientation: CGImagePropertyOrientation) -> CVPixelBuffer? { 55 | 56 | // TODO: If the orientation is not .up, then rotate the CGImage. 57 | // See also: https://stackoverflow.com/a/40438893/ 58 | assert(orientation == .up) 59 | 60 | var maybePixelBuffer: CVPixelBuffer? 61 | let attrs = [kCVPixelBufferCGImageCompatibilityKey: kCFBooleanTrue, 62 | kCVPixelBufferCGBitmapContextCompatibilityKey: kCFBooleanTrue] 63 | let status = CVPixelBufferCreate(kCFAllocatorDefault, 64 | width, 65 | height, 66 | pixelFormatType, 67 | attrs as CFDictionary, 68 | &maybePixelBuffer) 69 | 70 | guard status == kCVReturnSuccess, let pixelBuffer = maybePixelBuffer else { 71 | return nil 72 | } 73 | 74 | let flags = CVPixelBufferLockFlags(rawValue: 0) 75 | guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(pixelBuffer, flags) else { 76 | return nil 77 | } 78 | defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, flags) } 79 | 80 | guard let context = CGContext(data: CVPixelBufferGetBaseAddress(pixelBuffer), 81 | width: width, 82 | height: height, 83 | bitsPerComponent: 8, 84 | bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer), 85 | space: colorSpace, 86 | bitmapInfo: alphaInfo.rawValue) 87 | else { 88 | return nil 89 | } 90 | 91 | context.draw(self, in: CGRect(x: 0, y: 0, width: width, height: height)) 92 | return pixelBuffer 93 | } 94 | } 95 | 96 | extension CGImage { 97 | /** 98 | Creates a new CGImage from a CVPixelBuffer. 99 | 100 | - Note: Not all CVPixelBuffer pixel formats support conversion into a 101 | CGImage-compatible pixel format. 102 | */ 103 | public static func create(pixelBuffer: CVPixelBuffer) -> CGImage? { 104 | var cgImage: CGImage? 105 | VTCreateCGImageFromCVPixelBuffer(pixelBuffer, options: nil, imageOut: &cgImage) 106 | return cgImage 107 | } 108 | 109 | public static func createGrayScale(pixelBuffer: CVPixelBuffer) -> CGImage? { 110 | // This method creates a bitmap CGContext using the pixel buffer's memory. 111 | // It currently only handles kCVPixelFormatType_32ARGB images. To support 112 | // other pixel formats too, you'll have to change the bitmapInfo and maybe 113 | // the color space for the CGContext. 114 | 115 | guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly) else { 116 | return nil 117 | } 118 | defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly) } 119 | 120 | if let context = CGContext(data: CVPixelBufferGetBaseAddress(pixelBuffer), 121 | width: CVPixelBufferGetWidth(pixelBuffer), 122 | height: CVPixelBufferGetHeight(pixelBuffer), 123 | bitsPerComponent: 8, 124 | bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer), 125 | space: CGColorSpaceCreateDeviceGray(), 126 | bitmapInfo: CGImageAlphaInfo.none.rawValue), 127 | let cgImage = context.makeImage() { 128 | return cgImage 129 | } else { 130 | return nil 131 | } 132 | } 133 | 134 | /* 135 | // Alternative implementation: 136 | public static func create(pixelBuffer: CVPixelBuffer) -> CGImage? { 137 | // This method creates a bitmap CGContext using the pixel buffer's memory. 138 | // It currently only handles kCVPixelFormatType_32ARGB images. To support 139 | // other pixel formats too, you'll have to change the bitmapInfo and maybe 140 | // the color space for the CGContext. 141 | 142 | guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly) else { 143 | return nil 144 | } 145 | defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly) } 146 | 147 | if let context = CGContext(data: CVPixelBufferGetBaseAddress(pixelBuffer), 148 | width: CVPixelBufferGetWidth(pixelBuffer), 149 | height: CVPixelBufferGetHeight(pixelBuffer), 150 | bitsPerComponent: 8, 151 | bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer), 152 | space: CGColorSpaceCreateDeviceRGB(), 153 | bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue), 154 | let cgImage = context.makeImage() { 155 | return cgImage 156 | } else { 157 | return nil 158 | } 159 | } 160 | */ 161 | 162 | /** 163 | Creates a new CGImage from a CVPixelBuffer, using Core Image. 164 | */ 165 | public static func create(pixelBuffer: CVPixelBuffer, context: CIContext) -> CGImage? { 166 | let ciImage = CIImage(cvPixelBuffer: pixelBuffer) 167 | let rect = CGRect(x: 0, y: 0, width: CVPixelBufferGetWidth(pixelBuffer), 168 | height: CVPixelBufferGetHeight(pixelBuffer)) 169 | return context.createCGImage(ciImage, from: rect) 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /FastDepth-CoreML/CoreMLHelpers/CGImage+RawBytes.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import CoreGraphics 24 | 25 | extension CGImage { 26 | /** 27 | Converts the image into an array of RGBA bytes. 28 | */ 29 | @nonobjc public func toByteArrayRGBA() -> [UInt8] { 30 | var bytes = [UInt8](repeating: 0, count: width * height * 4) 31 | bytes.withUnsafeMutableBytes { ptr in 32 | if let colorSpace = colorSpace, 33 | let context = CGContext( 34 | data: ptr.baseAddress, 35 | width: width, 36 | height: height, 37 | bitsPerComponent: bitsPerComponent, 38 | bytesPerRow: bytesPerRow, 39 | space: colorSpace, 40 | bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue) { 41 | let rect = CGRect(x: 0, y: 0, width: width, height: height) 42 | context.draw(self, in: rect) 43 | } 44 | } 45 | return bytes 46 | } 47 | 48 | /** 49 | Creates a new CGImage from an array of RGBA bytes. 50 | */ 51 | @nonobjc public class func fromByteArrayRGBA(_ bytes: [UInt8], 52 | width: Int, 53 | height: Int) -> CGImage? { 54 | return fromByteArray(bytes, width: width, height: height, 55 | bytesPerRow: width * 4, 56 | colorSpace: CGColorSpaceCreateDeviceRGB(), 57 | alphaInfo: .premultipliedLast) 58 | } 59 | 60 | /** 61 | Creates a new CGImage from an array of grayscale bytes. 62 | */ 63 | @nonobjc public class func fromByteArrayGray(_ bytes: [UInt8], 64 | width: Int, 65 | height: Int) -> CGImage? { 66 | return fromByteArray(bytes, width: width, height: height, 67 | bytesPerRow: width, 68 | colorSpace: CGColorSpaceCreateDeviceGray(), 69 | alphaInfo: .none) 70 | } 71 | 72 | @nonobjc class func fromByteArray(_ bytes: [UInt8], 73 | width: Int, 74 | height: Int, 75 | bytesPerRow: Int, 76 | colorSpace: CGColorSpace, 77 | alphaInfo: CGImageAlphaInfo) -> CGImage? { 78 | return bytes.withUnsafeBytes { ptr in 79 | let context = CGContext(data: UnsafeMutableRawPointer(mutating: ptr.baseAddress!), 80 | width: width, 81 | height: height, 82 | bitsPerComponent: 8, 83 | bytesPerRow: bytesPerRow, 84 | space: colorSpace, 85 | bitmapInfo: alphaInfo.rawValue) 86 | return context?.makeImage() 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /FastDepth-CoreML/CoreMLHelpers/CGImagePropertyOrientation.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | #if canImport(UIKit) 24 | 25 | import UIKit 26 | 27 | public extension CGImagePropertyOrientation { 28 | init(_ orientation: UIImage.Orientation) { 29 | switch orientation { 30 | case .up: self = .up 31 | case .upMirrored: self = .upMirrored 32 | case .down: self = .down 33 | case .downMirrored: self = .downMirrored 34 | case .left: self = .left 35 | case .leftMirrored: self = .leftMirrored 36 | case .right: self = .right 37 | case .rightMirrored: self = .rightMirrored 38 | @unknown default: self = .up 39 | } 40 | } 41 | } 42 | 43 | #if !os(tvOS) 44 | 45 | public extension CGImagePropertyOrientation { 46 | init(_ orientation: UIDeviceOrientation) { 47 | switch orientation { 48 | case .portraitUpsideDown: self = .left 49 | case .landscapeLeft: self = .up 50 | case .landscapeRight: self = .down 51 | default: self = .right 52 | } 53 | } 54 | } 55 | 56 | #endif 57 | 58 | extension UIImage.Orientation { 59 | init(_ cgOrientation: UIImage.Orientation) { 60 | switch cgOrientation { 61 | case .up: self = .up 62 | case .upMirrored: self = .upMirrored 63 | case .down: self = .down 64 | case .downMirrored: self = .downMirrored 65 | case .left: self = .left 66 | case .leftMirrored: self = .leftMirrored 67 | case .right: self = .right 68 | case .rightMirrored: self = .rightMirrored 69 | @unknown default: self = .up 70 | } 71 | } 72 | } 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /FastDepth-CoreML/CoreMLHelpers/CVPixelBuffer+Helpers.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import Foundation 24 | import Accelerate 25 | import CoreImage 26 | 27 | /** 28 | Creates a RGB pixel buffer of the specified width and height. 29 | */ 30 | public func createPixelBuffer(width: Int, height: Int) -> CVPixelBuffer? { 31 | var pixelBuffer: CVPixelBuffer? 32 | let status = CVPixelBufferCreate(nil, width, height, 33 | kCVPixelFormatType_32BGRA, nil, 34 | &pixelBuffer) 35 | if status != kCVReturnSuccess { 36 | print("Error: could not create pixel buffer", status) 37 | return nil 38 | } 39 | return pixelBuffer 40 | } 41 | 42 | /** 43 | First crops the pixel buffer, then resizes it. 44 | 45 | - Note: The new CVPixelBuffer is not backed by an IOSurface and therefore 46 | cannot be turned into a Metal texture. 47 | */ 48 | public func resizePixelBuffer(_ srcPixelBuffer: CVPixelBuffer, 49 | cropX: Int, 50 | cropY: Int, 51 | cropWidth: Int, 52 | cropHeight: Int, 53 | scaleWidth: Int, 54 | scaleHeight: Int) -> CVPixelBuffer? { 55 | let flags = CVPixelBufferLockFlags(rawValue: 0) 56 | guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(srcPixelBuffer, flags) else { 57 | return nil 58 | } 59 | defer { CVPixelBufferUnlockBaseAddress(srcPixelBuffer, flags) } 60 | 61 | guard let srcData = CVPixelBufferGetBaseAddress(srcPixelBuffer) else { 62 | print("Error: could not get pixel buffer base address") 63 | return nil 64 | } 65 | let srcBytesPerRow = CVPixelBufferGetBytesPerRow(srcPixelBuffer) 66 | let offset = cropY*srcBytesPerRow + cropX*4 67 | var srcBuffer = vImage_Buffer(data: srcData.advanced(by: offset), 68 | height: vImagePixelCount(cropHeight), 69 | width: vImagePixelCount(cropWidth), 70 | rowBytes: srcBytesPerRow) 71 | 72 | let destBytesPerRow = scaleWidth*4 73 | guard let destData = malloc(scaleHeight*destBytesPerRow) else { 74 | print("Error: out of memory") 75 | return nil 76 | } 77 | var destBuffer = vImage_Buffer(data: destData, 78 | height: vImagePixelCount(scaleHeight), 79 | width: vImagePixelCount(scaleWidth), 80 | rowBytes: destBytesPerRow) 81 | 82 | let error = vImageScale_ARGB8888(&srcBuffer, &destBuffer, nil, vImage_Flags(0)) 83 | if error != kvImageNoError { 84 | print("Error:", error) 85 | free(destData) 86 | return nil 87 | } 88 | 89 | let releaseCallback: CVPixelBufferReleaseBytesCallback = { _, ptr in 90 | if let ptr = ptr { 91 | free(UnsafeMutableRawPointer(mutating: ptr)) 92 | } 93 | } 94 | 95 | let pixelFormat = CVPixelBufferGetPixelFormatType(srcPixelBuffer) 96 | var dstPixelBuffer: CVPixelBuffer? 97 | let status = CVPixelBufferCreateWithBytes(nil, scaleWidth, scaleHeight, 98 | pixelFormat, destData, 99 | destBytesPerRow, releaseCallback, 100 | nil, nil, &dstPixelBuffer) 101 | if status != kCVReturnSuccess { 102 | print("Error: could not create new pixel buffer") 103 | free(destData) 104 | return nil 105 | } 106 | return dstPixelBuffer 107 | } 108 | 109 | /** 110 | Resizes a CVPixelBuffer to a new width and height. 111 | 112 | - Note: The new CVPixelBuffer is not backed by an IOSurface and therefore 113 | cannot be turned into a Metal texture. 114 | */ 115 | public func resizePixelBuffer(_ pixelBuffer: CVPixelBuffer, 116 | width: Int, height: Int) -> CVPixelBuffer? { 117 | return resizePixelBuffer(pixelBuffer, cropX: 0, cropY: 0, 118 | cropWidth: CVPixelBufferGetWidth(pixelBuffer), 119 | cropHeight: CVPixelBufferGetHeight(pixelBuffer), 120 | scaleWidth: width, scaleHeight: height) 121 | } 122 | 123 | /** 124 | Resizes a CVPixelBuffer to a new width and height. 125 | */ 126 | public func resizePixelBuffer(_ pixelBuffer: CVPixelBuffer, 127 | width: Int, height: Int, 128 | output: CVPixelBuffer, context: CIContext) { 129 | let ciImage = CIImage(cvPixelBuffer: pixelBuffer) 130 | let sx = CGFloat(width) / CGFloat(CVPixelBufferGetWidth(pixelBuffer)) 131 | let sy = CGFloat(height) / CGFloat(CVPixelBufferGetHeight(pixelBuffer)) 132 | let scaleTransform = CGAffineTransform(scaleX: sx, y: sy) 133 | let scaledImage = ciImage.transformed(by: scaleTransform) 134 | context.render(scaledImage, to: output) 135 | } 136 | 137 | /** 138 | Rotates CVPixelBuffer by the provided factor of 90 counterclock-wise. 139 | 140 | - Note: The new CVPixelBuffer is not backed by an IOSurface and therefore 141 | cannot be turned into a Metal texture. 142 | */ 143 | public func rotate90PixelBuffer(_ srcPixelBuffer: CVPixelBuffer, factor: UInt8) -> CVPixelBuffer? { 144 | let flags = CVPixelBufferLockFlags(rawValue: 0) 145 | guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(srcPixelBuffer, flags) else { 146 | return nil 147 | } 148 | defer { CVPixelBufferUnlockBaseAddress(srcPixelBuffer, flags) } 149 | 150 | guard let srcData = CVPixelBufferGetBaseAddress(srcPixelBuffer) else { 151 | print("Error: could not get pixel buffer base address") 152 | return nil 153 | } 154 | let sourceWidth = CVPixelBufferGetWidth(srcPixelBuffer) 155 | let sourceHeight = CVPixelBufferGetHeight(srcPixelBuffer) 156 | var destWidth = sourceHeight 157 | var destHeight = sourceWidth 158 | var color = UInt8(0) 159 | 160 | if factor % 2 == 0 { 161 | destWidth = sourceWidth 162 | destHeight = sourceHeight 163 | } 164 | 165 | let srcBytesPerRow = CVPixelBufferGetBytesPerRow(srcPixelBuffer) 166 | var srcBuffer = vImage_Buffer(data: srcData, 167 | height: vImagePixelCount(sourceHeight), 168 | width: vImagePixelCount(sourceWidth), 169 | rowBytes: srcBytesPerRow) 170 | 171 | let destBytesPerRow = destWidth*4 172 | guard let destData = malloc(destHeight*destBytesPerRow) else { 173 | print("Error: out of memory") 174 | return nil 175 | } 176 | var destBuffer = vImage_Buffer(data: destData, 177 | height: vImagePixelCount(destHeight), 178 | width: vImagePixelCount(destWidth), 179 | rowBytes: destBytesPerRow) 180 | 181 | let error = vImageRotate90_ARGB8888(&srcBuffer, &destBuffer, factor, &color, vImage_Flags(0)) 182 | if error != kvImageNoError { 183 | print("Error:", error) 184 | free(destData) 185 | return nil 186 | } 187 | 188 | let releaseCallback: CVPixelBufferReleaseBytesCallback = { _, ptr in 189 | if let ptr = ptr { 190 | free(UnsafeMutableRawPointer(mutating: ptr)) 191 | } 192 | } 193 | 194 | let pixelFormat = CVPixelBufferGetPixelFormatType(srcPixelBuffer) 195 | var dstPixelBuffer: CVPixelBuffer? 196 | let status = CVPixelBufferCreateWithBytes(nil, destWidth, destHeight, 197 | pixelFormat, destData, 198 | destBytesPerRow, releaseCallback, 199 | nil, nil, &dstPixelBuffer) 200 | if status != kCVReturnSuccess { 201 | print("Error: could not create new pixel buffer") 202 | free(destData) 203 | return nil 204 | } 205 | return dstPixelBuffer 206 | } 207 | 208 | public extension CVPixelBuffer { 209 | /** 210 | Copies a CVPixelBuffer to a new CVPixelBuffer that is compatible with Metal. 211 | 212 | - Tip: If CVMetalTextureCacheCreateTextureFromImage is failing, then call 213 | this method first! 214 | */ 215 | func copyToMetalCompatible() -> CVPixelBuffer? { 216 | // Other possible options: 217 | // String(kCVPixelBufferOpenGLCompatibilityKey): true, 218 | // String(kCVPixelBufferIOSurfacePropertiesKey): [ 219 | // "IOSurfaceOpenGLESFBOCompatibility": true, 220 | // "IOSurfaceOpenGLESTextureCompatibility": true, 221 | // "IOSurfaceCoreAnimationCompatibility": true 222 | // ] 223 | let attributes: [String: Any] = [ 224 | String(kCVPixelBufferMetalCompatibilityKey): true, 225 | ] 226 | return deepCopy(withAttributes: attributes) 227 | } 228 | 229 | /** 230 | Copies a CVPixelBuffer to a new CVPixelBuffer. 231 | 232 | This lets you specify new attributes, such as whether the new CVPixelBuffer 233 | must be IOSurface-backed. 234 | 235 | See: https://developer.apple.com/library/archive/qa/qa1781/_index.html 236 | */ 237 | func deepCopy(withAttributes attributes: [String: Any] = [:]) -> CVPixelBuffer? { 238 | let srcPixelBuffer = self 239 | let srcFlags: CVPixelBufferLockFlags = .readOnly 240 | guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(srcPixelBuffer, srcFlags) else { 241 | return nil 242 | } 243 | defer { CVPixelBufferUnlockBaseAddress(srcPixelBuffer, srcFlags) } 244 | 245 | var combinedAttributes: [String: Any] = [:] 246 | 247 | // Copy attachment attributes. 248 | if let attachments = CVBufferGetAttachments(srcPixelBuffer, .shouldPropagate) as? [String: Any] { 249 | for (key, value) in attachments { 250 | combinedAttributes[key] = value 251 | } 252 | } 253 | 254 | // Add user attributes. 255 | combinedAttributes = combinedAttributes.merging(attributes) { $1 } 256 | 257 | var maybePixelBuffer: CVPixelBuffer? 258 | let status = CVPixelBufferCreate(kCFAllocatorDefault, 259 | CVPixelBufferGetWidth(srcPixelBuffer), 260 | CVPixelBufferGetHeight(srcPixelBuffer), 261 | CVPixelBufferGetPixelFormatType(srcPixelBuffer), 262 | combinedAttributes as CFDictionary, 263 | &maybePixelBuffer) 264 | 265 | guard status == kCVReturnSuccess, let dstPixelBuffer = maybePixelBuffer else { 266 | return nil 267 | } 268 | 269 | let dstFlags = CVPixelBufferLockFlags(rawValue: 0) 270 | guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(dstPixelBuffer, dstFlags) else { 271 | return nil 272 | } 273 | defer { CVPixelBufferUnlockBaseAddress(dstPixelBuffer, dstFlags) } 274 | 275 | for plane in 0...max(0, CVPixelBufferGetPlaneCount(srcPixelBuffer) - 1) { 276 | if let srcAddr = CVPixelBufferGetBaseAddressOfPlane(srcPixelBuffer, plane), 277 | let dstAddr = CVPixelBufferGetBaseAddressOfPlane(dstPixelBuffer, plane) { 278 | let srcBytesPerRow = CVPixelBufferGetBytesPerRowOfPlane(srcPixelBuffer, plane) 279 | let dstBytesPerRow = CVPixelBufferGetBytesPerRowOfPlane(dstPixelBuffer, plane) 280 | 281 | for h in 0.. MLMultiArray { 34 | let newCount = dimensions.reduce(1, *) 35 | precondition(newCount == count, "Cannot reshape \(shape) to \(dimensions)") 36 | 37 | var newStrides = [Int](repeating: 0, count: dimensions.count) 38 | newStrides[dimensions.count - 1] = 1 39 | for i in stride(from: dimensions.count - 1, to: 0, by: -1) { 40 | newStrides[i - 1] = newStrides[i] * dimensions[i] 41 | } 42 | 43 | let newShape_ = dimensions.map { NSNumber(value: $0) } 44 | let newStrides_ = newStrides.map { NSNumber(value: $0) } 45 | 46 | return try MLMultiArray(dataPointer: self.dataPointer, 47 | shape: newShape_, 48 | dataType: self.dataType, 49 | strides: newStrides_) 50 | } 51 | 52 | /** 53 | Returns a transposed version of this MLMultiArray. 54 | 55 | - Note: This copies the data. 56 | 57 | - TODO: Support .float32 and .int32 types too. 58 | */ 59 | @nonobjc public func transposed(to order: [Int]) throws -> MLMultiArray { 60 | let ndim = order.count 61 | 62 | precondition(dataType == .double) 63 | precondition(ndim == strides.count) 64 | 65 | let newShape = shape.indices.map { shape[order[$0]] } 66 | let newArray = try MLMultiArray(shape: newShape, dataType: self.dataType) 67 | 68 | let srcPtr = UnsafeMutablePointer(OpaquePointer(dataPointer)) 69 | let dstPtr = UnsafeMutablePointer(OpaquePointer(newArray.dataPointer)) 70 | 71 | let srcShape = shape.map { $0.intValue } 72 | let dstStride = newArray.strides.map { $0.intValue } 73 | var idx = [Int](repeating: 0, count: ndim) 74 | 75 | for j in 0.. 0 && idx[i] >= srcShape[i] { 89 | idx[i] = 0 90 | idx[i - 1] += 1 91 | i -= 1 92 | } 93 | } 94 | return newArray 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /FastDepth-CoreML/CoreMLHelpers/MLMultiArray+Image.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import Accelerate 24 | import CoreML 25 | 26 | public protocol MultiArrayType: Comparable { 27 | static var multiArrayDataType: MLMultiArrayDataType { get } 28 | static func +(lhs: Self, rhs: Self) -> Self 29 | static func -(lhs: Self, rhs: Self) -> Self 30 | static func *(lhs: Self, rhs: Self) -> Self 31 | static func /(lhs: Self, rhs: Self) -> Self 32 | init(_: Int) 33 | var toUInt8: UInt8 { get } 34 | } 35 | 36 | extension Double: MultiArrayType { 37 | public static var multiArrayDataType: MLMultiArrayDataType { return .double } 38 | public var toUInt8: UInt8 { return UInt8(self) } 39 | } 40 | 41 | extension Float: MultiArrayType { 42 | public static var multiArrayDataType: MLMultiArrayDataType { return .float32 } 43 | public var toUInt8: UInt8 { return UInt8(self) } 44 | } 45 | 46 | extension Int32: MultiArrayType { 47 | public static var multiArrayDataType: MLMultiArrayDataType { return .int32 } 48 | public var toUInt8: UInt8 { return UInt8(self) } 49 | } 50 | 51 | extension MLMultiArray { 52 | /** 53 | Converts the multi-array to a CGImage. 54 | 55 | The multi-array must have at least 2 dimensions for a grayscale image, or 56 | at least 3 dimensions for a color image. 57 | 58 | The default expected shape is (height, width) or (channels, height, width). 59 | However, you can change this using the `axes` parameter. For example, if 60 | the array shape is (1, height, width, channels), use `axes: (3, 1, 2)`. 61 | 62 | If `channel` is not nil, only converts that channel to a grayscale image. 63 | This lets you visualize individual channels from a multi-array with more 64 | than 4 channels. 65 | 66 | Otherwise, converts all channels. In this case, the number of channels in 67 | the multi-array must be 1 for grayscale, 3 for RGB, or 4 for RGBA. 68 | 69 | Use the `min` and `max` parameters to put the values from the array into 70 | the range [0, 255], if not already: 71 | 72 | - `min`: should be the smallest value in the data; this will be mapped to 0. 73 | - `max`: should be the largest value in the data; will be mapped to 255. 74 | 75 | For example, if the range of the data in the multi-array is [-1, 1], use 76 | `min: -1, max: 1`. If the range is already [0, 255], then use the defaults. 77 | */ 78 | public func cgImage(min: Double = 0, 79 | max: Double = 255, 80 | channel: Int? = nil, 81 | axes: (Int, Int, Int)? = nil) -> CGImage? { 82 | switch self.dataType { 83 | case .double: 84 | return _image(min: min, max: max, channel: channel, axes: axes) 85 | case .float32: 86 | return _image(min: Float(min), max: Float(max), channel: channel, axes: axes) 87 | case .int32: 88 | return _image(min: Int32(min), max: Int32(max), channel: channel, axes: axes) 89 | @unknown default: 90 | fatalError("Unsupported data type \(dataType.rawValue)") 91 | } 92 | } 93 | 94 | /** 95 | Helper function that allows us to use generics. The type of `min` and `max` 96 | is also the dataType of the MLMultiArray. 97 | */ 98 | private func _image(min: T, 99 | max: T, 100 | channel: Int?, 101 | axes: (Int, Int, Int)?) -> CGImage? { 102 | if let (b, w, h, c) = toRawBytes(min: min, max: max, channel: channel, axes: axes) { 103 | if c == 1 { 104 | return CGImage.fromByteArrayGray(b, width: w, height: h) 105 | } else { 106 | return CGImage.fromByteArrayRGBA(b, width: w, height: h) 107 | } 108 | } 109 | return nil 110 | } 111 | 112 | /** 113 | Converts the multi-array into an array of RGBA or grayscale pixels. 114 | 115 | - Note: This is not particularly fast, but it is flexible. You can change 116 | the loops to convert the multi-array whichever way you please. 117 | 118 | - Note: The type of `min` and `max` must match the dataType of the 119 | MLMultiArray object. 120 | 121 | - Returns: tuple containing the RGBA bytes, the dimensions of the image, 122 | and the number of channels in the image (1, 3, or 4). 123 | */ 124 | public func toRawBytes(min: T, 125 | max: T, 126 | channel: Int? = nil, 127 | axes: (Int, Int, Int)? = nil) 128 | -> (bytes: [UInt8], width: Int, height: Int, channels: Int)? { 129 | // MLMultiArray with unsupported shape? 130 | if shape.count < 2 { 131 | print("Cannot convert MLMultiArray of shape \(shape) to image") 132 | return nil 133 | } 134 | 135 | // Figure out which dimensions to use for the channels, height, and width. 136 | let channelAxis: Int 137 | let heightAxis: Int 138 | let widthAxis: Int 139 | if let axes = axes { 140 | channelAxis = axes.0 141 | heightAxis = axes.1 142 | widthAxis = axes.2 143 | guard channelAxis >= 0 && channelAxis < shape.count && 144 | heightAxis >= 0 && heightAxis < shape.count && 145 | widthAxis >= 0 && widthAxis < shape.count else { 146 | print("Invalid axes \(axes) for shape \(shape)") 147 | return nil 148 | } 149 | } else if shape.count == 2 { 150 | // Expected shape for grayscale is (height, width) 151 | heightAxis = 0 152 | widthAxis = 1 153 | channelAxis = -1 // Never be used 154 | } else { 155 | // Expected shape for color is (channels, height, width) 156 | channelAxis = 0 157 | heightAxis = 1 158 | widthAxis = 2 159 | } 160 | 161 | let height = self.shape[heightAxis].intValue 162 | let width = self.shape[widthAxis].intValue 163 | let yStride = self.strides[heightAxis].intValue 164 | let xStride = self.strides[widthAxis].intValue 165 | 166 | let channels: Int 167 | let cStride: Int 168 | let bytesPerPixel: Int 169 | let channelOffset: Int 170 | 171 | // MLMultiArray with just two dimensions is always grayscale. (We ignore 172 | // the value of channelAxis here.) 173 | if shape.count == 2 { 174 | channels = 1 175 | cStride = 0 176 | bytesPerPixel = 1 177 | channelOffset = 0 178 | 179 | // MLMultiArray with more than two dimensions can be color or grayscale. 180 | } else { 181 | let channelDim = self.shape[channelAxis].intValue 182 | if let channel = channel { 183 | if channel < 0 || channel >= channelDim { 184 | print("Channel must be -1, or between 0 and \(channelDim - 1)") 185 | return nil 186 | } 187 | channels = 1 188 | bytesPerPixel = 1 189 | channelOffset = channel 190 | } else if channelDim == 1 { 191 | channels = 1 192 | bytesPerPixel = 1 193 | channelOffset = 0 194 | } else { 195 | if channelDim != 3 && channelDim != 4 { 196 | print("Expected channel dimension to have 1, 3, or 4 channels, got \(channelDim)") 197 | return nil 198 | } 199 | channels = channelDim 200 | bytesPerPixel = 4 201 | channelOffset = 0 202 | } 203 | cStride = self.strides[channelAxis].intValue 204 | } 205 | 206 | // Allocate storage for the RGBA or grayscale pixels. Set everything to 207 | // 255 so that alpha channel is filled in if only 3 channels. 208 | let count = height * width * bytesPerPixel 209 | var pixels = [UInt8](repeating: 255, count: count) 210 | 211 | // Grab the pointer to MLMultiArray's memory. 212 | var ptr = UnsafeMutablePointer(OpaquePointer(self.dataPointer)) 213 | ptr = ptr.advanced(by: channelOffset * cStride) 214 | 215 | // Loop through all the pixels and all the channels and copy them over. 216 | for c in 0.. CGImage? { 246 | assert(features.dataType == .float32) 247 | assert(features.shape.count == 3) 248 | 249 | let ptr = UnsafeMutablePointer(OpaquePointer(features.dataPointer)) 250 | 251 | let height = features.shape[1].intValue 252 | let width = features.shape[2].intValue 253 | let channelStride = features.strides[0].intValue 254 | let rowStride = features.strides[1].intValue 255 | let srcRowBytes = rowStride * MemoryLayout.stride 256 | 257 | var blueBuffer = vImage_Buffer(data: ptr, 258 | height: vImagePixelCount(height), 259 | width: vImagePixelCount(width), 260 | rowBytes: srcRowBytes) 261 | var greenBuffer = vImage_Buffer(data: ptr.advanced(by: channelStride), 262 | height: vImagePixelCount(height), 263 | width: vImagePixelCount(width), 264 | rowBytes: srcRowBytes) 265 | var redBuffer = vImage_Buffer(data: ptr.advanced(by: channelStride * 2), 266 | height: vImagePixelCount(height), 267 | width: vImagePixelCount(width), 268 | rowBytes: srcRowBytes) 269 | 270 | let destRowBytes = width * 4 271 | var pixels = [UInt8](repeating: 0, count: height * destRowBytes) 272 | var destBuffer = vImage_Buffer(data: &pixels, 273 | height: vImagePixelCount(height), 274 | width: vImagePixelCount(width), 275 | rowBytes: destRowBytes) 276 | 277 | let error = vImageConvert_PlanarFToBGRX8888(&blueBuffer, 278 | &greenBuffer, 279 | &redBuffer, 280 | Pixel_8(255), 281 | &destBuffer, 282 | [max, max, max], 283 | [min, min, min], 284 | vImage_Flags(0)) 285 | if error == kvImageNoError { 286 | return CGImage.fromByteArrayRGBA(pixels, width: width, height: height) 287 | } else { 288 | return nil 289 | } 290 | } 291 | 292 | #if canImport(UIKit) 293 | 294 | import UIKit 295 | 296 | extension MLMultiArray { 297 | public func image(min: Double = 0, 298 | max: Double = 255, 299 | channel: Int? = nil, 300 | axes: (Int, Int, Int)? = nil) -> UIImage? { 301 | let cgImg = cgImage(min: min, max: max, channel: channel, axes: axes) 302 | return cgImg.map { UIImage(cgImage: $0) } 303 | } 304 | } 305 | 306 | public func createUIImage(fromFloatArray features: MLMultiArray, 307 | min: Float = 0, 308 | max: Float = 255) -> UIImage? { 309 | let cgImg = createCGImage(fromFloatArray: features, min: min, max: max) 310 | return cgImg.map { UIImage(cgImage: $0) } 311 | } 312 | 313 | #endif 314 | -------------------------------------------------------------------------------- /FastDepth-CoreML/CoreMLHelpers/Math.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import Foundation 24 | import Accelerate 25 | 26 | /** 27 | Returns the index and value of the largest element in the array. 28 | 29 | - Parameters: 30 | - count: If provided, only look at the first `count` elements of the array, 31 | otherwise look at the entire array. 32 | */ 33 | public func argmax(_ array: [Float], count: Int? = nil) -> (Int, Float) { 34 | var maxValue: Float = 0 35 | var maxIndex: vDSP_Length = 0 36 | vDSP_maxvi(array, 1, &maxValue, &maxIndex, vDSP_Length(count ?? array.count)) 37 | return (Int(maxIndex), maxValue) 38 | } 39 | 40 | /** 41 | Returns the index and value of the largest element in the array. 42 | 43 | - Parameters: 44 | - ptr: Pointer to the first element in memory. 45 | - count: How many elements to look at. 46 | - stride: The distance between two elements in memory. 47 | */ 48 | public func argmax(_ ptr: UnsafePointer, count: Int, stride: Int = 1) -> (Int, Float) { 49 | var maxValue: Float = 0 50 | var maxIndex: vDSP_Length = 0 51 | vDSP_maxvi(ptr, vDSP_Stride(stride), &maxValue, &maxIndex, vDSP_Length(count)) 52 | return (Int(maxIndex), maxValue) 53 | } 54 | 55 | /** 56 | Returns the index and value of the largest element in the array. 57 | 58 | - Parameters: 59 | - count: If provided, only look at the first `count` elements of the array, 60 | otherwise look at the entire array. 61 | */ 62 | public func argmax(_ array: [Double], count: Int? = nil) -> (Int, Double) { 63 | var maxValue: Double = 0 64 | var maxIndex: vDSP_Length = 0 65 | vDSP_maxviD(array, 1, &maxValue, &maxIndex, vDSP_Length(count ?? array.count)) 66 | return (Int(maxIndex), maxValue) 67 | } 68 | 69 | /** 70 | Returns the index and value of the largest element in the array. 71 | 72 | - Parameters: 73 | - ptr: Pointer to the first element in memory. 74 | - count: How many elements to look at. 75 | - stride: The distance between two elements in memory. 76 | */ 77 | public func argmax(_ ptr: UnsafePointer, count: Int, stride: Int = 1) -> (Int, Double) { 78 | var maxValue: Double = 0 79 | var maxIndex: vDSP_Length = 0 80 | vDSP_maxviD(ptr, vDSP_Stride(stride), &maxValue, &maxIndex, vDSP_Length(count)) 81 | return (Int(maxIndex), maxValue) 82 | } 83 | 84 | /** Ensures that `x` is in the range `[min, max]`. */ 85 | public func clamp(_ x: T, min: T, max: T) -> T { 86 | if x < min { return min } 87 | if x > max { return max } 88 | return x 89 | } 90 | 91 | /** Logistic sigmoid. */ 92 | public func sigmoid(_ x: Float) -> Float { 93 | return 1 / (1 + exp(-x)) 94 | } 95 | 96 | /** Logistic sigmoid. */ 97 | public func sigmoid(_ x: Double) -> Double { 98 | return 1 / (1 + exp(-x)) 99 | } 100 | 101 | /* In-place logistic sigmoid: x = 1 / (1 + exp(-x)) */ 102 | public func sigmoid(_ x: UnsafeMutablePointer, count: Int) { 103 | vDSP_vneg(x, 1, x, 1, vDSP_Length(count)) 104 | var cnt = Int32(count) 105 | vvexpf(x, x, &cnt) 106 | var y: Float = 1 107 | vDSP_vsadd(x, 1, &y, x, 1, vDSP_Length(count)) 108 | vvrecf(x, x, &cnt) 109 | } 110 | 111 | /* In-place logistic sigmoid: x = 1 / (1 + exp(-x)) */ 112 | public func sigmoid(_ x: UnsafeMutablePointer, count: Int) { 113 | vDSP_vnegD(x, 1, x, 1, vDSP_Length(count)) 114 | var cnt = Int32(count) 115 | vvexp(x, x, &cnt) 116 | var y: Double = 1 117 | vDSP_vsaddD(x, 1, &y, x, 1, vDSP_Length(count)) 118 | vvrec(x, x, &cnt) 119 | } 120 | 121 | /** 122 | Computes the "softmax" function over an array. 123 | 124 | Based on code from https://github.com/nikolaypavlov/MLPNeuralNet/ 125 | 126 | This is what softmax looks like in "pseudocode" (actually using Python 127 | and numpy): 128 | 129 | x -= np.max(x) 130 | exp_scores = np.exp(x) 131 | softmax = exp_scores / np.sum(exp_scores) 132 | 133 | First we shift the values of x so that the highest value in the array is 0. 134 | This ensures numerical stability with the exponents, so they don't blow up. 135 | */ 136 | public func softmax(_ x: [Float]) -> [Float] { 137 | var x = x 138 | let len = vDSP_Length(x.count) 139 | 140 | // Find the maximum value in the input array. 141 | var max: Float = 0 142 | vDSP_maxv(x, 1, &max, len) 143 | 144 | // Subtract the maximum from all the elements in the array. 145 | // Now the highest value in the array is 0. 146 | max = -max 147 | vDSP_vsadd(x, 1, &max, &x, 1, len) 148 | 149 | // Exponentiate all the elements in the array. 150 | var count = Int32(x.count) 151 | vvexpf(&x, x, &count) 152 | 153 | // Compute the sum of all exponentiated values. 154 | var sum: Float = 0 155 | vDSP_sve(x, 1, &sum, len) 156 | 157 | // Divide each element by the sum. This normalizes the array contents 158 | // so that they all add up to 1. 159 | vDSP_vsdiv(x, 1, &sum, &x, 1, len) 160 | 161 | return x 162 | } 163 | -------------------------------------------------------------------------------- /FastDepth-CoreML/CoreMLHelpers/NonMaxSuppression.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import Foundation 24 | import Accelerate 25 | 26 | public struct BoundingBox { 27 | /** Index of the predicted class. */ 28 | public let classIndex: Int 29 | 30 | /** Confidence score. */ 31 | public let score: Float 32 | 33 | /** Normalized coordinates between 0 and 1. */ 34 | public let rect: CGRect 35 | 36 | public init(classIndex: Int, score: Float, rect: CGRect) { 37 | self.classIndex = classIndex 38 | self.score = score 39 | self.rect = rect 40 | } 41 | } 42 | 43 | /** 44 | Computes intersection-over-union overlap between two bounding boxes. 45 | */ 46 | public func IOU(_ a: CGRect, _ b: CGRect) -> Float { 47 | let areaA = a.width * a.height 48 | if areaA <= 0 { return 0 } 49 | 50 | let areaB = b.width * b.height 51 | if areaB <= 0 { return 0 } 52 | 53 | let intersectionMinX = max(a.minX, b.minX) 54 | let intersectionMinY = max(a.minY, b.minY) 55 | let intersectionMaxX = min(a.maxX, b.maxX) 56 | let intersectionMaxY = min(a.maxY, b.maxY) 57 | let intersectionArea = max(intersectionMaxY - intersectionMinY, 0) * 58 | max(intersectionMaxX - intersectionMinX, 0) 59 | return Float(intersectionArea / (areaA + areaB - intersectionArea)) 60 | } 61 | 62 | /** 63 | Removes bounding boxes that overlap too much with other boxes that have 64 | a higher score. 65 | */ 66 | public func nonMaxSuppression(boundingBoxes: [BoundingBox], 67 | iouThreshold: Float, 68 | maxBoxes: Int) -> [Int] { 69 | return nonMaxSuppression(boundingBoxes: boundingBoxes, 70 | indices: Array(boundingBoxes.indices), 71 | iouThreshold: iouThreshold, 72 | maxBoxes: maxBoxes) 73 | } 74 | 75 | /** 76 | Removes bounding boxes that overlap too much with other boxes that have 77 | a higher score. 78 | 79 | Based on code from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/non_max_suppression_op.cc 80 | 81 | - Note: This version of NMS ignores the class of the bounding boxes. Since it 82 | selects the bounding boxes in a greedy fashion, if a certain class has many 83 | boxes that are selected, then it is possible none of the boxes of the other 84 | classes get selected. 85 | 86 | - Parameters: 87 | - boundingBoxes: an array of bounding boxes and their scores 88 | - indices: which predictions to look at 89 | - iouThreshold: used to decide whether boxes overlap too much 90 | - maxBoxes: the maximum number of boxes that will be selected 91 | 92 | - Returns: the array indices of the selected bounding boxes 93 | */ 94 | public func nonMaxSuppression(boundingBoxes: [BoundingBox], 95 | indices: [Int], 96 | iouThreshold: Float, 97 | maxBoxes: Int) -> [Int] { 98 | 99 | // Sort the boxes based on their confidence scores, from high to low. 100 | let sortedIndices = indices.sorted { boundingBoxes[$0].score > boundingBoxes[$1].score } 101 | 102 | var selected: [Int] = [] 103 | 104 | // Loop through the bounding boxes, from highest score to lowest score, 105 | // and determine whether or not to keep each box. 106 | for i in 0..= maxBoxes { break } 108 | 109 | var shouldSelect = true 110 | let boxA = boundingBoxes[sortedIndices[i]] 111 | 112 | // Does the current box overlap one of the selected boxes more than the 113 | // given threshold amount? Then it's too similar, so don't keep it. 114 | for j in 0.. iouThreshold { 117 | shouldSelect = false 118 | break 119 | } 120 | } 121 | 122 | // This bounding box did not overlap too much with any previously selected 123 | // bounding box, so we'll keep it. 124 | if shouldSelect { 125 | selected.append(sortedIndices[i]) 126 | } 127 | } 128 | 129 | return selected 130 | } 131 | 132 | /** 133 | Multi-class version of non maximum suppression. 134 | 135 | Where `nonMaxSuppression()` does not look at the class of the predictions at 136 | all, the multi-class version first selects the best bounding boxes for each 137 | class, and then keeps the best ones of those. 138 | 139 | With this method you can usually expect to see at least one bounding box for 140 | each class (unless all the scores for a given class are really low). 141 | 142 | Based on code from: https://github.com/tensorflow/models/blob/master/object_detection/core/post_processing.py 143 | 144 | - Parameters: 145 | - numClasses: the number of classes 146 | - boundingBoxes: an array of bounding boxes and their scores 147 | - scoreThreshold: used to only keep bounding boxes with a high enough score 148 | - iouThreshold: used to decide whether boxes overlap too much 149 | - maxPerClass: the maximum number of boxes that will be selected per class 150 | - maxTotal: maximum number of boxes that will be selected over all classes 151 | 152 | - Returns: the array indices of the selected bounding boxes 153 | */ 154 | public func nonMaxSuppressionMultiClass(numClasses: Int, 155 | boundingBoxes: [BoundingBox], 156 | scoreThreshold: Float, 157 | iouThreshold: Float, 158 | maxPerClass: Int, 159 | maxTotal: Int) -> [Int] { 160 | var selectedBoxes: [Int] = [] 161 | 162 | // Look at all the classes one-by-one. 163 | for c in 0.. scoreThreshold { 173 | filteredBoxes.append(p) 174 | } 175 | } 176 | } 177 | 178 | // Only keep the best bounding boxes for this class. 179 | let nmsBoxes = nonMaxSuppression(boundingBoxes: boundingBoxes, 180 | indices: filteredBoxes, 181 | iouThreshold: iouThreshold, 182 | maxBoxes: maxPerClass) 183 | 184 | // Add the indices of the surviving boxes to the big list. 185 | selectedBoxes.append(contentsOf: nmsBoxes) 186 | } 187 | 188 | // Sort all the surviving boxes by score and only keep the best ones. 189 | let sortedBoxes = selectedBoxes.sorted { boundingBoxes[$0].score > boundingBoxes[$1].score } 190 | return Array(sortedBoxes.prefix(maxTotal)) 191 | } 192 | -------------------------------------------------------------------------------- /FastDepth-CoreML/CoreMLHelpers/Predictions.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | import Vision 24 | 25 | /** 26 | Returns the top `k` predictions from Core ML classification results as an 27 | array of `(String, Double)` pairs. 28 | */ 29 | public func top(_ k: Int, _ prob: [String: Double]) -> [(String, Double)] { 30 | return Array(prob.map { x in (x.key, x.value) } 31 | .sorted(by: { a, b -> Bool in a.1 > b.1 }) 32 | .prefix(through: min(k, prob.count) - 1)) 33 | } 34 | 35 | /** 36 | Returns the top `k` predictions from Vision classification results as an 37 | array of `(String, Double)` pairs. 38 | */ 39 | public func top(_ k: Int, _ observations: [VNClassificationObservation]) -> [(String, Double)] { 40 | // The Vision observations are sorted by confidence already. 41 | return observations.prefix(through: min(k, observations.count) - 1) 42 | .map { ($0.identifier, Double($0.confidence)) } 43 | } 44 | -------------------------------------------------------------------------------- /FastDepth-CoreML/CoreMLHelpers/UIImage+CVPixelBuffer.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | #if canImport(UIKit) 24 | 25 | import UIKit 26 | import VideoToolbox 27 | 28 | extension UIImage { 29 | /** 30 | Resizes the image to width x height and converts it to an RGB CVPixelBuffer. 31 | */ 32 | public func pixelBuffer(width: Int, height: Int) -> CVPixelBuffer? { 33 | return pixelBuffer(width: width, height: height, 34 | pixelFormatType: kCVPixelFormatType_32ARGB, 35 | colorSpace: CGColorSpaceCreateDeviceRGB(), 36 | alphaInfo: .noneSkipFirst) 37 | } 38 | 39 | /** 40 | Resizes the image to width x height and converts it to a grayscale CVPixelBuffer. 41 | */ 42 | public func pixelBufferGray(width: Int, height: Int) -> CVPixelBuffer? { 43 | return pixelBuffer(width: width, height: height, 44 | pixelFormatType: kCVPixelFormatType_OneComponent8, 45 | colorSpace: CGColorSpaceCreateDeviceGray(), 46 | alphaInfo: .none) 47 | } 48 | 49 | func pixelBuffer(width: Int, height: Int, pixelFormatType: OSType, 50 | colorSpace: CGColorSpace, alphaInfo: CGImageAlphaInfo) -> CVPixelBuffer? { 51 | var maybePixelBuffer: CVPixelBuffer? 52 | let attrs = [kCVPixelBufferCGImageCompatibilityKey: kCFBooleanTrue, 53 | kCVPixelBufferCGBitmapContextCompatibilityKey: kCFBooleanTrue] 54 | let status = CVPixelBufferCreate(kCFAllocatorDefault, 55 | width, 56 | height, 57 | pixelFormatType, 58 | attrs as CFDictionary, 59 | &maybePixelBuffer) 60 | 61 | guard status == kCVReturnSuccess, let pixelBuffer = maybePixelBuffer else { 62 | return nil 63 | } 64 | 65 | let flags = CVPixelBufferLockFlags(rawValue: 0) 66 | guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(pixelBuffer, flags) else { 67 | return nil 68 | } 69 | defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, flags) } 70 | 71 | guard let context = CGContext(data: CVPixelBufferGetBaseAddress(pixelBuffer), 72 | width: width, 73 | height: height, 74 | bitsPerComponent: 8, 75 | bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer), 76 | space: colorSpace, 77 | bitmapInfo: alphaInfo.rawValue) 78 | else { 79 | return nil 80 | } 81 | 82 | UIGraphicsPushContext(context) 83 | context.translateBy(x: 0, y: CGFloat(height)) 84 | context.scaleBy(x: 1, y: -1) 85 | self.draw(in: CGRect(x: 0, y: 0, width: width, height: height)) 86 | UIGraphicsPopContext() 87 | 88 | return pixelBuffer 89 | } 90 | } 91 | 92 | extension UIImage { 93 | /** 94 | Creates a new UIImage from a CVPixelBuffer. 95 | 96 | - Note: Not all CVPixelBuffer pixel formats support conversion into a 97 | CGImage-compatible pixel format. 98 | */ 99 | public convenience init?(pixelBuffer: CVPixelBuffer) { 100 | if let cgImage = CGImage.create(pixelBuffer: pixelBuffer) { 101 | self.init(cgImage: cgImage) 102 | } else { 103 | return nil 104 | } 105 | } 106 | 107 | /* 108 | // Alternative implementation: 109 | public convenience init?(pixelBuffer: CVPixelBuffer) { 110 | // This converts the image to a CIImage first and then to a UIImage. 111 | // Does not appear to work on the simulator but is OK on the device. 112 | self.init(ciImage: CIImage(cvPixelBuffer: pixelBuffer)) 113 | } 114 | */ 115 | 116 | /** 117 | Creates a new UIImage from a CVPixelBuffer, using a Core Image context. 118 | */ 119 | public convenience init?(pixelBuffer: CVPixelBuffer, context: CIContext) { 120 | if let cgImage = CGImage.create(pixelBuffer: pixelBuffer, context: context) { 121 | self.init(cgImage: cgImage) 122 | } else { 123 | return nil 124 | } 125 | } 126 | } 127 | 128 | #endif 129 | -------------------------------------------------------------------------------- /FastDepth-CoreML/CoreMLHelpers/UIImage+Extensions.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | #if canImport(UIKit) 24 | 25 | import UIKit 26 | 27 | extension UIImage { 28 | /** 29 | Resizes the image. 30 | 31 | - Parameters: 32 | - scale: If this is 1, `newSize` is the size in pixels. 33 | */ 34 | @nonobjc public func resized(to newSize: CGSize, scale: CGFloat = 1) -> UIImage { 35 | let format = UIGraphicsImageRendererFormat.default() 36 | format.scale = scale 37 | let renderer = UIGraphicsImageRenderer(size: newSize, format: format) 38 | let image = renderer.image { _ in 39 | draw(in: CGRect(origin: .zero, size: newSize)) 40 | } 41 | return image 42 | } 43 | } 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /FastDepth-CoreML/CoreMLHelpers/UIImage+RawBytes.swift: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017-2019 M.I. Hollemans 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including without limitation the 7 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | sell copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | IN THE SOFTWARE. 21 | */ 22 | 23 | #if canImport(UIKit) 24 | 25 | import UIKit 26 | 27 | extension UIImage { 28 | /** 29 | Converts the image into an array of RGBA bytes. 30 | */ 31 | @nonobjc public func toByteArrayRGBA() -> [UInt8]? { 32 | return cgImage?.toByteArrayRGBA() 33 | } 34 | 35 | /** 36 | Creates a new UIImage from an array of RGBA bytes. 37 | */ 38 | @nonobjc public class func fromByteArrayRGBA(_ bytes: [UInt8], 39 | width: Int, 40 | height: Int, 41 | scale: CGFloat = 0, 42 | orientation: UIImage.Orientation = .up) -> UIImage? { 43 | if let cgImage = CGImage.fromByteArrayRGBA(bytes, width: width, height: height) { 44 | return UIImage(cgImage: cgImage, scale: scale, orientation: orientation) 45 | } else { 46 | return nil 47 | } 48 | } 49 | 50 | /** 51 | Creates a new UIImage from an array of grayscale bytes. 52 | */ 53 | @nonobjc public class func fromByteArrayGray(_ bytes: [UInt8], 54 | width: Int, 55 | height: Int, 56 | scale: CGFloat = 0, 57 | orientation: UIImage.Orientation = .up) -> UIImage? { 58 | if let cgImage = CGImage.fromByteArrayGray(bytes, width: width, height: height) { 59 | return UIImage(cgImage: cgImage, scale: scale, orientation: orientation) 60 | } else { 61 | return nil 62 | } 63 | } 64 | } 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /FastDepth-CoreML/FastDepth-CoreML.entitlements: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | com.apple.security.app-sandbox 6 | 7 | com.apple.security.network.client 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /FastDepth-CoreML/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | APPL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | LSRequiresIPhoneOS 22 | 23 | NSCameraUsageDescription 24 | for inference 25 | NSPhotoLibraryUsageDescription 26 | for inference 27 | UILaunchStoryboardName 28 | LaunchScreen 29 | UIMainStoryboardFile 30 | Main 31 | UIRequiredDeviceCapabilities 32 | 33 | armv7 34 | 35 | UISupportedInterfaceOrientations~ipad 36 | 37 | UIInterfaceOrientationPortrait 38 | UIInterfaceOrientationPortraitUpsideDown 39 | UIInterfaceOrientationLandscapeLeft 40 | UIInterfaceOrientationLandscapeRight 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /FastDepth-CoreML/LiveImageViewController.swift: -------------------------------------------------------------------------------- 1 | // 2 | // LiveImageViewController.swift 3 | // DepthPrediction-CoreML 4 | // 5 | // Created by Doyoung Gwak on 20/07/2019. 6 | // Copyright © 2019 Doyoung Gwak. All rights reserved. 7 | // 8 | 9 | import UIKit 10 | import Vision 11 | import AVFoundation 12 | 13 | class LiveImageViewController: UIViewController { 14 | 15 | // MARK: - UI Properties 16 | @IBOutlet weak var videoPreview: UIView! 17 | @IBOutlet weak var drawingView: UIImageView! 18 | 19 | @IBOutlet weak var inferenceLabel: UILabel! 20 | @IBOutlet weak var etimeLabel: UILabel! 21 | @IBOutlet weak var fpsLabel: UILabel! 22 | 23 | @IBOutlet weak var depthSlider: UISlider! 24 | // MARK: - AV Properties 25 | var videoCapture: VideoCapture! 26 | 27 | // MARK - Core ML model 28 | // FCRN(iOS11+), FCRNFP16(iOS11+) 29 | let estimationModel = FastDepth() 30 | 31 | // MARK: - Vision Properties 32 | var request: VNCoreMLRequest? 33 | var visionModel: VNCoreMLModel? 34 | 35 | var depthMax : Float = 4; 36 | 37 | // MARK: - Performance Measurement Property 38 | private let 👨‍🔧 = 📏() 39 | 40 | // MARK: - View Controller Life Cycle 41 | override func viewDidLoad() { 42 | super.viewDidLoad() 43 | 44 | // setup ml model 45 | setUpModel() 46 | 47 | // setup camera 48 | setUpCamera() 49 | 50 | // setup delegate for performance measurement 51 | 👨‍🔧.delegate = self 52 | 53 | depthSlider.setValue(depthMax, animated: true) 54 | } 55 | 56 | @IBAction func depthMaxValueChanged(_ sender: UISlider) { 57 | depthMax = sender.value 58 | } 59 | 60 | override func didReceiveMemoryWarning() { 61 | super.didReceiveMemoryWarning() 62 | // Dispose of any resources that can be recreated. 63 | } 64 | 65 | override func viewWillAppear(_ animated: Bool) { 66 | super.viewWillAppear(animated) 67 | self.videoCapture.start() 68 | } 69 | 70 | override func viewWillDisappear(_ animated: Bool) { 71 | super.viewWillDisappear(animated) 72 | self.videoCapture.stop() 73 | } 74 | 75 | // MARK: - Setup Core ML 76 | func setUpModel() { 77 | if let visionModel = try? VNCoreMLModel(for: estimationModel.model) { 78 | self.visionModel = visionModel 79 | request = VNCoreMLRequest(model: visionModel, completionHandler: visionRequestDidComplete) 80 | request?.imageCropAndScaleOption = .centerCrop 81 | } else { 82 | fatalError() 83 | } 84 | } 85 | 86 | // MARK: - Setup camera 87 | func setUpCamera() { 88 | videoCapture = VideoCapture() 89 | videoCapture.delegate = self 90 | videoCapture.fps = 50 91 | videoCapture.setUp(sessionPreset: .cif352x288) { success in 92 | 93 | if success { 94 | if let previewLayer = self.videoCapture.previewLayer { 95 | self.videoPreview.layer.addSublayer(previewLayer) 96 | self.resizePreviewLayer() 97 | } 98 | self.videoCapture.start() 99 | } 100 | } 101 | } 102 | 103 | override func viewDidLayoutSubviews() { 104 | super.viewDidLayoutSubviews() 105 | resizePreviewLayer() 106 | } 107 | 108 | func resizePreviewLayer() { 109 | let bounds = videoPreview.bounds 110 | videoCapture.previewLayer?.videoGravity = AVLayerVideoGravity.resizeAspectFill 111 | videoCapture.previewLayer?.bounds = bounds 112 | videoCapture.previewLayer?.position = CGPoint(x:bounds.midX, y:bounds.midY) 113 | } 114 | } 115 | 116 | // MARK: - VideoCaptureDelegate 117 | extension LiveImageViewController: VideoCaptureDelegate { 118 | func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame pixelBuffer: CVPixelBuffer?/*, timestamp: CMTime*/) { 119 | 120 | // the captured image from camera is contained on pixelBuffer 121 | if let pixelBuffer = pixelBuffer { 122 | // start of measure 123 | self.👨‍🔧.🎬👏() 124 | predict(with: pixelBuffer) 125 | } 126 | } 127 | } 128 | 129 | // MARK: - Inference 130 | extension LiveImageViewController { 131 | // prediction 132 | func predict(with pixelBuffer: CVPixelBuffer) { 133 | guard let request = request else { fatalError() } 134 | 135 | // vision framework configures the input size of image following our model's input configuration automatically 136 | let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, options: [:]) 137 | try? handler.perform([request]) 138 | } 139 | 140 | // post-processing 141 | func visionRequestDidComplete(request: VNRequest, error: Error?) { 142 | 143 | self.👨‍🔧.🏷(with: "endInference") 144 | 145 | 146 | if let observations = request.results as? [VNCoreMLFeatureValueObservation], 147 | let array = observations.first?.featureValue.multiArrayValue, 148 | let map = try? array.reshaped(to: [1,224,224]), 149 | let image = map.image(min: Double(depthMax), max: 0, channel: nil, axes: nil) 150 | { 151 | DispatchQueue.main.async { [weak self] in 152 | self?.drawingView.image = image 153 | // end of measure 154 | self?.👨‍🔧.🎬🤚() 155 | } 156 | } else { 157 | // end of measure 158 | self.👨‍🔧.🎬🤚() 159 | } 160 | } 161 | } 162 | 163 | // MARK: - 📏(Performance Measurement) Delegate 164 | extension LiveImageViewController: 📏Delegate { 165 | func updateMeasure(inferenceTime: Double, executionTime: Double, fps: Int) { 166 | //print(executionTime, fps) 167 | self.inferenceLabel.text = "inference: \(Int(inferenceTime*1000.0)) mm" 168 | self.etimeLabel.text = "execution: \(Int(executionTime*1000.0)) mm" 169 | self.fpsLabel.text = "fps: \(fps)" 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /FastDepth-CoreML/Measure.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Measure.swift 3 | // TurtleApp-CoreML 4 | // 5 | // Created by GwakDoyoung on 03/07/2018. 6 | // Copyright © 2018 GwakDoyoung. All rights reserved. 7 | // 8 | 9 | import UIKit 10 | 11 | protocol 📏Delegate { 12 | func updateMeasure(inferenceTime: Double, executionTime: Double, fps: Int) 13 | } 14 | // Performance Measurement 15 | class 📏 { 16 | 17 | var delegate: 📏Delegate? 18 | 19 | var index: Int = -1 20 | var measurements: [Dictionary] 21 | 22 | init() { 23 | let measurement = [ 24 | "start": CACurrentMediaTime(), 25 | "end": CACurrentMediaTime() 26 | ] 27 | measurements = Array>(repeating: measurement, count: 30) 28 | } 29 | 30 | // start 31 | func 🎬👏() { 32 | index += 1 33 | index %= 30 34 | measurements[index] = [:] 35 | 36 | 🏷(for: index, with: "start") 37 | } 38 | 39 | // stop 40 | func 🎬🤚() { 41 | 🏷(for: index, with: "end") 42 | 43 | let beforeMeasurement = getBeforeMeasurment(for: index) 44 | let currentMeasurement = measurements[index] 45 | if let startTime = currentMeasurement["start"], 46 | let endInferenceTime = currentMeasurement["endInference"], 47 | let endTime = currentMeasurement["end"], 48 | let beforeStartTime = beforeMeasurement["start"] { 49 | delegate?.updateMeasure(inferenceTime: endInferenceTime - startTime, 50 | executionTime: endTime - startTime, 51 | fps: Int(1/(startTime - beforeStartTime))) 52 | } 53 | 54 | } 55 | 56 | // labeling with 57 | func 🏷(with msg: String? = "") { 58 | 🏷(for: index, with: msg) 59 | } 60 | 61 | private func 🏷(for index: Int, with msg: String? = "") { 62 | if let message = msg { 63 | measurements[index][message] = CACurrentMediaTime() 64 | } 65 | } 66 | 67 | private func getBeforeMeasurment(for index: Int) -> Dictionary { 68 | return measurements[(index + 30 - 1) % 30] 69 | } 70 | 71 | // log 72 | func 🖨() { 73 | 74 | } 75 | } 76 | 77 | class MeasureLogView: UIView { 78 | let etimeLabel = UILabel(frame: .zero) 79 | let fpsLabel = UILabel(frame: .zero) 80 | 81 | 82 | required init?(coder aDecoder: NSCoder) { 83 | fatalError("init(coder:) has not been implemented") 84 | } 85 | 86 | } 87 | -------------------------------------------------------------------------------- /FastDepth-CoreML/VideoCapture.swift: -------------------------------------------------------------------------------- 1 | // 2 | // VideoCapture.swift 3 | // Awesome ML 4 | // 5 | // Created by Eugene Bokhan on 3/13/18. 6 | // Copyright © 2018 Eugene Bokhan. All rights reserved. 7 | // 8 | 9 | import UIKit 10 | import AVFoundation 11 | import CoreVideo 12 | 13 | public protocol VideoCaptureDelegate: class { 14 | func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame: CVPixelBuffer?/*, timestamp: CMTime*/) 15 | } 16 | 17 | public class VideoCapture: NSObject { 18 | public var previewLayer: AVCaptureVideoPreviewLayer? 19 | public weak var delegate: VideoCaptureDelegate? 20 | public var fps = 15 21 | 22 | let captureSession = AVCaptureSession() 23 | let videoOutput = AVCaptureVideoDataOutput() 24 | let queue = DispatchQueue(label: "com.coreml.camera-queue") 25 | 26 | // var lastTimestamp = CMTime() 27 | 28 | public func setUp(sessionPreset: AVCaptureSession.Preset = .vga640x480, 29 | completion: @escaping (Bool) -> Void) { 30 | self.setUpCamera(sessionPreset: sessionPreset, completion: { success in 31 | completion(success) 32 | }) 33 | } 34 | 35 | func setUpCamera(sessionPreset: AVCaptureSession.Preset, completion: @escaping (_ success: Bool) -> Void) { 36 | 37 | captureSession.beginConfiguration() 38 | captureSession.sessionPreset = sessionPreset 39 | 40 | guard let captureDevice = AVCaptureDevice.default(for: AVMediaType.video) else { 41 | print("Error: no video devices available") 42 | return 43 | } 44 | 45 | guard let videoInput = try? AVCaptureDeviceInput(device: captureDevice) else { 46 | print("Error: could not create AVCaptureDeviceInput") 47 | return 48 | } 49 | 50 | if captureSession.canAddInput(videoInput) { 51 | captureSession.addInput(videoInput) 52 | } 53 | 54 | let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession) 55 | previewLayer.videoGravity = AVLayerVideoGravity.resizeAspect 56 | previewLayer.connection?.videoOrientation = .portrait 57 | self.previewLayer = previewLayer 58 | 59 | let settings: [String : Any] = [ 60 | kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA), 61 | ] 62 | 63 | videoOutput.videoSettings = settings 64 | videoOutput.alwaysDiscardsLateVideoFrames = true 65 | videoOutput.setSampleBufferDelegate(self, queue: queue) 66 | if captureSession.canAddOutput(videoOutput) { 67 | captureSession.addOutput(videoOutput) 68 | } 69 | 70 | // We want the buffers to be in portrait orientation otherwise they are 71 | // rotated by 90 degrees. Need to set this _after_ addOutput()! 72 | videoOutput.connection(with: AVMediaType.video)?.videoOrientation = .portrait 73 | 74 | captureSession.commitConfiguration() 75 | 76 | let success = true 77 | completion(success) 78 | } 79 | 80 | public func start() { 81 | if !captureSession.isRunning { 82 | captureSession.startRunning() 83 | } 84 | } 85 | 86 | public func stop() { 87 | if captureSession.isRunning { 88 | captureSession.stopRunning() 89 | } 90 | } 91 | } 92 | 93 | extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate { 94 | public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { 95 | // Because lowering the capture device's FPS looks ugly in the preview, 96 | // we capture at full speed but only call the delegate at its desired 97 | // framerate. 98 | // let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer) 99 | // let deltaTime = timestamp - lastTimestamp 100 | // if deltaTime >= CMTimeMake(1, Int32(fps)) { 101 | // lastTimestamp = timestamp 102 | let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) 103 | delegate?.videoCapture(self, didCaptureVideoFrame: imageBuffer/*, timestamp: timestamp*/) 104 | // } 105 | } 106 | 107 | public func captureOutput(_ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { 108 | //print("dropped frame") 109 | } 110 | } 111 | 112 | -------------------------------------------------------------------------------- /FastDepth-CoreML/mlmodel/FastDepth.mlmodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JustinFincher/FastDepth-CoreML/65d643dfd31000d4f505857071e59ab211a5f0cf/FastDepth-CoreML/mlmodel/FastDepth.mlmodel -------------------------------------------------------------------------------- /Models/FastDepth.mlmodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JustinFincher/FastDepth-CoreML/65d643dfd31000d4f505857071e59ab211a5f0cf/Models/FastDepth.mlmodel -------------------------------------------------------------------------------- /Models/fastdepth.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JustinFincher/FastDepth-CoreML/65d643dfd31000d4f505857071e59ab211a5f0cf/Models/fastdepth.onnx -------------------------------------------------------------------------------- /Models/fastestdepth.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JustinFincher/FastDepth-CoreML/65d643dfd31000d4f505857071e59ab211a5f0cf/Models/fastestdepth.onnx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FastDepth-CoreML 2 | This is a CoreML version of [FastDepth: Fast Monocular Depth Estimation on Embedded Systems](https://github.com/dwofk/fast-depth). I do not guarantee the correctness of my conversion. 3 | The iOS project is based on tucan9389's [DepthPrediction](https://github.com/tucan9389/DepthPrediction-CoreML), but with [CoreMLHelpers](https://github.com/hollance/CoreMLHelpers/) for convenience. 4 | 5 | # Example 6 | Recorded on an iPhone X. 7 | ![](Assets/demo.gif) --------------------------------------------------------------------------------