├── !App
    ├── !figs
    │   ├── airdrop.png
    │   ├── app.png
    │   ├── interface.png
    │   └── signing.png
    ├── ConvertBinaries.py
    ├── ImageBundleApp
    │   ├── Configuration
    │   │   └── SampleCode.xcconfig
    │   ├── ImageBundleRecorder.xcodeproj
    │   │   ├── .xcodesamplecode.plist
    │   │   └── project.pbxproj
    │   ├── ImageBundleRecorder
    │   │   ├── Assets.xcassets
    │   │   │   ├── AccentColor.colorset
    │   │   │   │   └── Contents.json
    │   │   │   └── Contents.json
    │   │   ├── CameraController.swift
    │   │   ├── CameraManager.swift
    │   │   ├── ImageBundleRecorder.entitlements
    │   │   ├── ImageBundleRecorder.swift
    │   │   ├── Info.plist
    │   │   ├── MetalTextureView.swift
    │   │   ├── MetalTextureViewDepth.swift
    │   │   ├── MetalViewSample.swift
    │   │   └── shaders.metal
    │   ├── LICENSE
    │   │   └── LICENSE.txt
    │   └── Launch Screen.storyboard
    └── README.md
├── !figs
    ├── experiments-thumb.png
    ├── extra-thumb.png
    ├── scenes-thumb.png
    └── synth-thumb.png
├── LICENSE
├── README.md
├── checkpoints
    └── __init__.py
├── config
    ├── config_depth.json
    └── config_rgb.json
├── data
    └── __init__.py
├── requirements.txt
├── train.py
├── tutorial.ipynb
└── utils
    └── utils.py


/!App/!figs/airdrop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/SoaP/857dda0f7578126ed9feb8410eedeef053679f9e/!App/!figs/airdrop.png


--------------------------------------------------------------------------------
/!App/!figs/app.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/SoaP/857dda0f7578126ed9feb8410eedeef053679f9e/!App/!figs/app.png


--------------------------------------------------------------------------------
/!App/!figs/interface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/SoaP/857dda0f7578126ed9feb8410eedeef053679f9e/!App/!figs/interface.png


--------------------------------------------------------------------------------
/!App/!figs/signing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/SoaP/857dda0f7578126ed9feb8410eedeef053679f9e/!App/!figs/signing.png


--------------------------------------------------------------------------------
/!App/ConvertBinaries.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import numpy as np
  3 | import struct
  4 | from matplotlib import gridspec
  5 | import matplotlib.pyplot as plt
  6 | from glob import glob
  7 | import os
  8 | from os.path import join
  9 | from os.path import split
 10 | from natsort import natsorted
 11 | from skimage.transform import resize
 12 | import re
 13 | from tqdm import tqdm
 14 | 
 15 | """ Code to process depth/image/pose binaries the ios DepthBundleRecorder app into more useable .npz files.
 16 |     Tested for iPhone 12, 13, 14 pro.
 17 |     Usage: python ConvertBinaries.py -d folder_with_bundles
 18 |     Output: a folder processed_folder_with_bundles containing the processed depth bundles
 19 | """
 20 | 
 21 | def cut(x): # return value after the ":"
 22 |     return x.split(":")[1]
 23 | 
 24 | def process_raw(npz_file, raw_name):
 25 |     global is_iphone12
 26 |     with open(raw_name, mode='rb') as file:
 27 |         raw = file.read()
 28 | 
 29 |     raw_split = raw.split(b"<BEGINHEADER>")
 30 |     num_raw_frames = 0
 31 | 
 32 |     for i, raw_frame in tqdm(enumerate(raw_split[1:])):
 33 |         if len(raw_frame) < 100: # skip weird outliers
 34 |             continue
 35 | 
 36 |         raw_header, raw_image = raw_frame.split(b"<ENDHEADER>")
 37 |         raw_header = re.sub("\[|\]|\(|\)|\s|\'", "", str(raw_header)) # Strip all delims but <> and commas
 38 |         raw_header = re.sub(r"\s+", "", raw_header) # Strip spaces
 39 |         rw = raw_header.split(",")
 40 | 
 41 |         frame_count = int(cut(rw[1])) # skip description
 42 |         timestamp = float(cut(rw[2]))
 43 |         height = int(cut(rw[3]))
 44 |         width = int(cut(rw[4]))
 45 |         ISO = int(cut(rw[6]))
 46 |         exposure_time = float(cut(rw[7]))
 47 |         aperture = float(cut(rw[8]))
 48 |         brightness = float(cut(rw[9]))
 49 |         shutter_speed = float(cut(rw[10]))
 50 |         black_level = int(cut(rw[13]))
 51 |         white_level = int(cut(rw[14]))
 52 |         
 53 | 
 54 |         raw_image = struct.unpack('H'* ((len(raw_image)//2)), raw_image)
 55 |         raw_image = np.reshape(raw_image, (height, width))
 56 |         raw_image = np.flip(raw_image.swapaxes(0,1), 1) # make vertical
 57 |         
 58 |         if is_iphone12:
 59 |             # Make everything same bayer format:
 60 |             # G B
 61 |             # R G
 62 |             A1 = raw_image[::2,1::2].copy()
 63 |             A2 = raw_image[1::2,::2].copy()
 64 |             raw_image[::2,1::2] = A2
 65 |             raw_image[1::2,::2] = A1
 66 | 
 67 |         raw_image = raw_image.astype(np.uint16)
 68 |         raw_frame = {'frame_count' : frame_count,
 69 |                      'timestamp' : timestamp,
 70 |                      'height' : height,
 71 |                      'width' : width,
 72 |                      'ISO' : ISO,
 73 |                      'exposure_time' : exposure_time,
 74 |                      'aperture' : aperture,
 75 |                      'brightness' : brightness,
 76 |                      'shutter_speed' : shutter_speed,
 77 |                      'black_level' : black_level,
 78 |                      'white_level' : white_level,
 79 |                      'raw' : raw_image
 80 |         }
 81 | 
 82 |         num_raw_frames += 1
 83 |         npz_file[f'raw_{i}'] = raw_frame
 84 |     npz_file['num_raw_frames'] = num_raw_frames
 85 | 
 86 | def process_rgb(npz_file, rgb_name):
 87 |     global is_iphone12
 88 |     with open(rgb_name, mode='rb') as file:
 89 |         rgb = file.read()
 90 | 
 91 |     rgb_split = rgb.split(b"<BEGINHEADER>")
 92 |     num_rgb_frames = 0
 93 | 
 94 |     for i, rgb_frame in tqdm(enumerate(rgb_split[1:])):
 95 |         if len(rgb_frame) < 100: # skip weird outliers
 96 |             continue
 97 | 
 98 |         rgb_header, rgb_image = rgb_frame.split(b"<ENDHEADER>")
 99 |         rgb_header = re.sub("\[|\]|\(|\)|\s|\'", "", str(rgb_header)) # Strip all delims but <> and commas
100 |         rgb_header = re.sub(r"\s+", "", rgb_header) # Strip spaces
101 |         r = rgb_header.split(",")
102 | 
103 |         frame_count = int(cut(r[1])) # skip description
104 |         timestamp = float(cut(r[2]))
105 |         height = int(cut(r[3]))
106 |         width = int(cut(r[4]))
107 |         intrinsics = np.array([[cut(r[6]), r[7],  r[8]],
108 |                                    [r[9],  r[10], r[11]],
109 |                                    [r[12], r[13], r[14]]], dtype=np.float32)
110 |         
111 |         rgb_image = struct.unpack('B'* ((len(rgb_image))), rgb_image)
112 |         
113 |         try:
114 |             rgb_image = np.reshape(rgb_image, (1440, 1920, 4)) # 12 extra bytes at the end of each row, mystery
115 |             rgb_image = rgb_image[:,:,[2,1,0,3]] # cut extra bytes, go from BGRA to RGBA
116 |             rgb_image = np.flip(rgb_image.swapaxes(0,1), 1) # make vertical
117 |         except:
118 |             raise Exception("RGB format not understood.")
119 | 
120 |         rgb_image = rgb_image.astype(np.uint8)
121 |         rgb_frame = {'frame_count' : frame_count,
122 |                      'timestamp' : timestamp,
123 |                      'height' : height,
124 |                      'width' : width,
125 |                      'intrinsics' : intrinsics,
126 |                      'rgb' : rgb_image
127 |         }
128 | 
129 |         num_rgb_frames += 1
130 |         npz_file[f'rgb_{i}'] = rgb_frame
131 |     npz_file['num_rgb_frames'] = num_rgb_frames
132 | 
133 | def process_depth(npz_file, depth_name):
134 |     with open(depth_name, mode='rb') as file:
135 |         depth = file.read()
136 | 
137 |     depth_split = depth.split(b"<BEGINHEADER>")
138 |     num_depth_frames = 0
139 | 
140 |     for i, depth_frame in tqdm(enumerate(depth_split[1:])):
141 |         if len(depth_frame) < 100: # skip weird outliers
142 |             continue
143 | 
144 |         depth_header, depth_image = depth_frame.split(b"<ENDHEADER>")
145 |         depth_header = re.sub("\[|\]|\(|\)|\s|\'", "", str(depth_header)) # Strip all delims but <> and commas
146 |         depth_header = re.sub(r"\s+", "", depth_header) # Strip spaces
147 |         d = depth_header.split(",")
148 | 
149 |         frame_count = int(cut(d[1])) # skip description
150 |         timestamp = float(cut(d[2]))
151 |         height = int(cut(d[3]))
152 |         width = int(cut(d[4]))
153 |         intrinsic_width = int(float(cut(d[6])))
154 |         intrinsic_height = int(float(cut(d[7]))) # it has a decimal for some reason
155 |         intrinsics = np.array([[cut(d[8]), d[9],  d[10]],
156 |                                    [d[11], d[12], d[13]],
157 |                                    [d[14], d[15], d[16]]], dtype=np.float32)
158 |         lens_distortion = np.array([cut(d[17]), *d[18:59]], dtype=np.float32)
159 |         lens_undistortion = np.array([cut(d[59]), *d[60:101]], dtype=np.float32) # 42 numbers, heh
160 |         depth_accuracy = int(cut(d[101]))
161 | 
162 |         depth_image = struct.unpack('e'* ((len(depth_image)) // 2), depth_image)
163 |         depth_image = np.reshape(depth_image, (height, width))
164 |         depth_image = np.flip(depth_image.swapaxes(0,1), 1) # make vertical
165 |         depth_image = depth_image.astype(np.float16)
166 | 
167 |         depth_frame = {'frame_count' : frame_count,
168 |                        'timestamp' : timestamp,
169 |                        'height' : height,
170 |                        'width' : width,
171 |                        'intrinsic_height' : intrinsic_height,
172 |                        'intrinsic_width' : intrinsic_width,
173 |                        'intrinsics' : intrinsics,
174 |                        'lens_distortion' : lens_distortion,
175 |                        'lens_undistortion' : lens_undistortion,
176 |                        'depth_accuracy' : depth_accuracy,
177 |                        'depth' : depth_image
178 |         }
179 | 
180 |         num_depth_frames += 1
181 |         npz_file[f'depth_{i}'] = depth_frame
182 |     npz_file['num_depth_frames'] = num_depth_frames
183 | 
184 | def process_motion(npz_file, motion_name):
185 |     with open(motion_name, mode='rb') as file:
186 |         motion = str(file.read())
187 | 
188 |     motion_split = motion.split("<BEGINHEADER>")
189 |     num_motion_frames = 0
190 | 
191 |     frame_count = []
192 |     timestamp = []
193 |     quaternion = []
194 |     roll_pitch_yaw = []
195 |     rotation_rate = []
196 |     acceleration = []
197 |     gravity = []
198 | 
199 |     for i, motion_frame in tqdm(enumerate(motion_split)):
200 |         if len(motion_frame) < 100: # skip weird outliers
201 |             continue
202 | 
203 |         motion_frame = motion_frame.strip().replace("<ENDHEADER>", "")
204 |         motion_frame = re.sub("\[|\]|\(|\)|\s|\'", "", motion_frame) # Strip all delims but <> and commas
205 |         motion_frame = re.sub(r"\s+", "", motion_frame) # Strip spaces
206 |         m = motion_frame.split(",")
207 | 
208 |         frame_count.append(int(cut(m[0])))
209 |         timestamp.append(float(cut(m[1])))
210 |         # quaternion x,y,z,w
211 |         quaternion.append(np.array([cut(m[2]), cut(m[3]), cut(m[4]), cut(m[5])], dtype=np.float32))
212 |         rotation_rate.append(np.array([cut(m[6]), cut(m[7]), cut(m[8])], dtype=np.float32))
213 |         roll_pitch_yaw.append(np.array([cut(m[9]), cut(m[10]), cut(m[11])], dtype=np.float32))
214 |         acceleration.append(np.array([cut(m[12]), cut(m[13]), cut(m[14])], dtype=np.float32))
215 |         gravity.append(np.array([cut(m[15]), cut(m[16]), cut(m[17])], dtype=np.float32))
216 | 
217 |         num_motion_frames += 1
218 | 
219 |     motion_frame = {'frame_count' : np.array(frame_count),
220 |                 'timestamp' : np.array(timestamp), 
221 |                 'quaternion' : np.array(quaternion), 
222 |                 'rotation_rate' : np.array(rotation_rate),
223 |                 'roll_pitch_yaw' : np.array(roll_pitch_yaw),
224 |                 'acceleration' : np.array(acceleration),
225 |                 'gravity' : np.array(gravity),
226 |                 'num_motion_frames': np.array(num_motion_frames)}
227 | 
228 | 
229 |     npz_file["motion"] = motion_frame
230 |     
231 | def match_timestamps(npz_file):
232 |     raw_timestamps = np.array([npz_file[f'raw_{i}']['timestamp'] for i in range(npz_file['num_raw_frames'])])
233 |     raw_timestamps = np.around(raw_timestamps, 3)
234 |     rgb_timestamps = np.array([npz_file[f'rgb_{i}']['timestamp'] for i in range(npz_file['num_rgb_frames'])])
235 |     rgb_timestamps = np.around(rgb_timestamps, 3)
236 |     depth_timestamps = np.array([npz_file[f'depth_{i}']['timestamp'] for i in range(npz_file['num_depth_frames'])])
237 |     depth_timestamps = np.around(depth_timestamps, 3)
238 |     assert (rgb_timestamps == depth_timestamps).all()
239 |     
240 |     matches = np.array([np.where(rgb_timestamps == raw_timestamps[i])[0][0] for i in range(npz_file['num_raw_frames'])])
241 |     assert len(matches) == npz_file['num_raw_frames'] # all frames have a match
242 |     
243 |     for i in range(npz_file['num_raw_frames']):
244 |         match_idx = matches[i]
245 |         npz_file[f'rgb_{i}'] = npz_file[f'rgb_{match_idx}']
246 |         npz_file[f'depth_{i}'] = npz_file[f'depth_{match_idx}']
247 | 
248 |     for i in range(npz_file['num_raw_frames'], npz_file['num_rgb_frames']):
249 |         del npz_file[f'rgb_{i}']
250 |         del npz_file[f'depth_{i}']
251 | 
252 |     npz_file['num_rgb_frames'] = npz_file['num_depth_frames'] = npz_file['num_raw_frames']
253 |     
254 |     raw_timestamps = np.array([npz_file[f'raw_{i}']['timestamp'] for i in range(npz_file['num_raw_frames'])])
255 |     raw_timestamps = np.around(raw_timestamps, 3)
256 |     rgb_timestamps = np.array([npz_file[f'rgb_{i}']['timestamp'] for i in range(npz_file['num_rgb_frames'])])
257 |     rgb_timestamps = np.around(rgb_timestamps, 3)
258 |     depth_timestamps = np.array([npz_file[f'depth_{i}']['timestamp'] for i in range(npz_file['num_depth_frames'])])
259 |     depth_timestamps = np.around(depth_timestamps, 3)
260 |     assert (rgb_timestamps == raw_timestamps).all()
261 | 
262 | def main():
263 |     global is_iphone12
264 |     parser = argparse.ArgumentParser()
265 |     parser.add_argument('-d', default=None, type=str, required=True, help='Data directory')
266 |     parser.add_argument('-iphone12', action='store_true', help='Flag that this is an iPhone 12 to rotate bayer array.')
267 |     args = parser.parse_args()
268 |     is_iphone12 = args.iphone12
269 |     
270 |     if "bundle-" not in args.d:
271 |         bundle_names = natsorted(glob(join(args.d, "bundle*")))
272 |     else:
273 |         bundle_names = [args.d]
274 |     
275 |     for bundle_name in bundle_names:
276 |         print(f"Processing {split(bundle_name)[-1]}.")
277 |         
278 |         if "processed-" in bundle_name:
279 |             continue # already processed, skip
280 |         
281 |         if "-motion" not in bundle_name:
282 |             # Process image + depth bundle
283 |             motion_name = join(bundle_name, "motion.bin") 
284 |             rgb_name = join(bundle_name, "imageRGB.bin")
285 |             raw_name = join(bundle_name, "imageRAW.bin")
286 |             depth_name = join(bundle_name, "depth.bin")
287 | 
288 |             npz_file = {}
289 |             
290 |             process_depth(npz_file, depth_name)
291 |             process_motion(npz_file, motion_name)
292 |             process_rgb(npz_file, rgb_name)
293 |             process_raw(npz_file, raw_name)
294 |             try:
295 |                 match_timestamps(npz_file)
296 |             except Exception as e:
297 |                 print(f"Skipping {bundle_name} due to:\n{e}.")
298 |                 continue
299 | 
300 |             save_path = join(split(bundle_name)[0], "processed-" + split(bundle_name)[1])
301 |             os.makedirs(save_path, exist_ok=True)
302 | 
303 |             # Save first frame preview
304 |             fig = plt.figure(figsize=(14, 30)) 
305 |             gs = gridspec.GridSpec(1, 3, wspace=0.0, hspace=0.0, width_ratios=[1,1,1.12])
306 |             ax1 = plt.subplot(gs[0,0])
307 |             ax1.imshow(npz_file['rgb_0']['rgb'])
308 |             ax1.axis('off')
309 |             ax1.set_title("RGB")
310 |             ax2 = plt.subplot(gs[0,1])
311 |             ax2.imshow(npz_file['raw_0']['raw'], cmap="gray")
312 |             ax2.axis('off')
313 |             ax2.set_title("RAW")
314 |             ax3 = plt.subplot(gs[0,2])
315 |             d = ax3.imshow(npz_file['depth_0']['depth'], cmap="Spectral", vmin=0, vmax=5)
316 |             ax3.axis('off')
317 |             ax3.set_title("Depth")
318 |             fig.colorbar(d, fraction=0.055, label="Depth [m]")
319 |             plt.savefig(join(save_path, "frame_first.png"), bbox_inches='tight', pad_inches=0.05, facecolor='white')
320 |             plt.close()
321 | 
322 |             # Save last frame preview
323 |             fig = plt.figure(figsize=(14, 30)) 
324 |             gs = gridspec.GridSpec(1, 3, wspace=0.0, hspace=0.0, width_ratios=[1,1,1.12])
325 |             ax1 = plt.subplot(gs[0,0])
326 |             ax1.imshow(npz_file[f'rgb_{npz_file["num_raw_frames"] - 1}']['rgb'])
327 |             ax1.axis('off')
328 |             ax1.set_title("RGB")
329 |             ax2 = plt.subplot(gs[0,1])
330 |             ax2.imshow(npz_file[f'raw_{npz_file["num_raw_frames"] - 1}']['raw'], cmap="gray")
331 |             ax2.axis('off')
332 |             ax2.set_title("RAW")
333 |             ax3 = plt.subplot(gs[0,2])
334 |             d = ax3.imshow(npz_file[f'depth_{npz_file["num_raw_frames"] - 1}']['depth'], cmap="Spectral", vmin=0, vmax=5)
335 |             ax3.axis('off')
336 |             ax3.set_title("Depth")
337 |             fig.colorbar(d, fraction=0.055, label="Depth [m]")
338 |             plt.savefig(join(save_path, "frame_last.png"), bbox_inches='tight', pad_inches=0.05, facecolor='white')
339 |             plt.close()
340 | 
341 |             # Save bundle
342 |             np.savez_compressed(join(save_path, "frame_bundle"), **npz_file)
343 |         
344 |         else:
345 |             # Process only motion bundle
346 |             
347 |             motion_name = join(bundle_name, "motion.bin")
348 |             
349 |             save_path = bundle_name.replace("bundle-", "bundle_processed-")
350 |             os.makedirs(save_path, exist_ok=True)
351 | 
352 |             npz_file = {}
353 |             
354 |             process_motion(npz_file, motion_name)
355 | 
356 |             # Save bundle
357 |             np.savez(join(save_path, "motion_bundle"), **npz_file)
358 |             
359 | if __name__ == '__main__':
360 |     is_iphone12 = False
361 |     main()
362 | 


--------------------------------------------------------------------------------
/!App/ImageBundleApp/Configuration/SampleCode.xcconfig:
--------------------------------------------------------------------------------
 1 | //
 2 | // See LICENSE folder for this sample’s licensing information.
 3 | //
 4 | // SampleCode.xcconfig
 5 | //
 6 | 
 7 | // The `SAMPLE_CODE_DISAMBIGUATOR` configuration is to make it easier to build
 8 | // and run a sample code project. Once you set your project's development team,
 9 | // you'll have a unique bundle identifier. This is because the bundle identifier
10 | // is derived based on the 'SAMPLE_CODE_DISAMBIGUATOR' value. Do not use this
11 | // approach in your own projects—it's only useful for sample code projects because
12 | // they are frequently downloaded and don't have a development team set.
13 | SAMPLE_CODE_DISAMBIGUATOR=${DEVELOPMENT_TEAM}
14 | 


--------------------------------------------------------------------------------
/!App/ImageBundleApp/ImageBundleRecorder.xcodeproj/.xcodesamplecode.plist:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3 | <plist version="1.0">
4 | <array/>
5 | </plist>
6 | 


--------------------------------------------------------------------------------
/!App/ImageBundleApp/ImageBundleRecorder.xcodeproj/project.pbxproj:
--------------------------------------------------------------------------------
  1 | // !$*UTF8*$!
  2 | {
  3 | 	archiveVersion = 1;
  4 | 	classes = {
  5 | 	};
  6 | 	objectVersion = 55;
  7 | 	objects = {
  8 | 
  9 | /* Begin PBXBuildFile section */
 10 | 		AE9AD78D270ECD01001218B2 /* CameraController.swift in Sources */ = {isa = PBXBuildFile; fileRef = AE9AD78C270ECD01001218B2 /* CameraController.swift */; };
 11 | 		AED300A5271D6D18008F6007 /* CameraManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = AED300A4271D6D18008F6007 /* CameraManager.swift */; };
 12 | 		C12A11492857D820009A991A /* MetalTextureViewDepth.swift in Sources */ = {isa = PBXBuildFile; fileRef = C12A11442857D820009A991A /* MetalTextureViewDepth.swift */; };
 13 | 		C12A114A2857D820009A991A /* MetalViewSample.swift in Sources */ = {isa = PBXBuildFile; fileRef = C12A11452857D820009A991A /* MetalViewSample.swift */; };
 14 | 		C12A114B2857D820009A991A /* MetalTextureView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C12A11462857D820009A991A /* MetalTextureView.swift */; };
 15 | 		C12A114C2857D820009A991A /* ImageBundleRecorder.swift in Sources */ = {isa = PBXBuildFile; fileRef = C12A11472857D820009A991A /* ImageBundleRecorder.swift */; };
 16 | 		C12A114D2857D820009A991A /* shaders.metal in Sources */ = {isa = PBXBuildFile; fileRef = C12A11482857D820009A991A /* shaders.metal */; };
 17 | /* End PBXBuildFile section */
 18 | 
 19 | /* Begin PBXFileReference section */
 20 | 		0C34ECABC9005D93C0DF4297 /* LICENSE.txt */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text; path = LICENSE.txt; sourceTree = "<group>"; };
 21 | 		9ED80CBFEB14F0F0F5121BC6 /* SampleCode.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; name = SampleCode.xcconfig; path = Configuration/SampleCode.xcconfig; sourceTree = "<group>"; };
 22 | 		AE921D26270D7B02000E95C6 /* ImageBundleRecorder.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = ImageBundleRecorder.app; sourceTree = BUILT_PRODUCTS_DIR; };
 23 | 		AE921D2D270D7B04000E95C6 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
 24 | 		AE9AD78C270ECD01001218B2 /* CameraController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CameraController.swift; sourceTree = "<group>"; };
 25 | 		AED300A4271D6D18008F6007 /* CameraManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CameraManager.swift; sourceTree = "<group>"; };
 26 | 		C12A11442857D820009A991A /* MetalTextureViewDepth.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalTextureViewDepth.swift; sourceTree = "<group>"; };
 27 | 		C12A11452857D820009A991A /* MetalViewSample.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalViewSample.swift; sourceTree = "<group>"; };
 28 | 		C12A11462857D820009A991A /* MetalTextureView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalTextureView.swift; sourceTree = "<group>"; };
 29 | 		C12A11472857D820009A991A /* ImageBundleRecorder.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ImageBundleRecorder.swift; sourceTree = "<group>"; };
 30 | 		C12A11482857D820009A991A /* shaders.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = shaders.metal; sourceTree = "<group>"; };
 31 | 		C1400FB3287F9BF400CB4E63 /* ImageBundleRecorder.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = ImageBundleRecorder.entitlements; sourceTree = "<group>"; };
 32 | 		C1400FB42880E45300CB4E63 /* Launch Screen.storyboard */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; path = "Launch Screen.storyboard"; sourceTree = "<group>"; };
 33 | 		C1970A7928592FC900B08ECB /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = Info.plist; path = /Users/chugunov/source/HandshakeHDR/ImageBundleApp/ImageBundleRecorder/Info.plist; sourceTree = "<absolute>"; };
 34 | 		D0C5477C199C5F5C91FCD321 /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
 35 | /* End PBXFileReference section */
 36 | 
 37 | /* Begin PBXFrameworksBuildPhase section */
 38 | 		AE921D23270D7B02000E95C6 /* Frameworks */ = {
 39 | 			isa = PBXFrameworksBuildPhase;
 40 | 			buildActionMask = 2147483647;
 41 | 			files = (
 42 | 			);
 43 | 			runOnlyForDeploymentPostprocessing = 0;
 44 | 		};
 45 | /* End PBXFrameworksBuildPhase section */
 46 | 
 47 | /* Begin PBXGroup section */
 48 | 		AE921D1D270D7B02000E95C6 = {
 49 | 			isa = PBXGroup;
 50 | 			children = (
 51 | 				C1400FB42880E45300CB4E63 /* Launch Screen.storyboard */,
 52 | 				D0C5477C199C5F5C91FCD321 /* README.md */,
 53 | 				AE921D28270D7B02000E95C6 /* ImageBundleRecorder */,
 54 | 				AE921D27270D7B02000E95C6 /* Products */,
 55 | 				C42CD530FDC5FDCBE24A185E /* Configuration */,
 56 | 				EA03DA1400B7EED6A2A7C7E7 /* LICENSE */,
 57 | 			);
 58 | 			sourceTree = "<group>";
 59 | 		};
 60 | 		AE921D27270D7B02000E95C6 /* Products */ = {
 61 | 			isa = PBXGroup;
 62 | 			children = (
 63 | 				AE921D26270D7B02000E95C6 /* ImageBundleRecorder.app */,
 64 | 			);
 65 | 			name = Products;
 66 | 			sourceTree = "<group>";
 67 | 		};
 68 | 		AE921D28270D7B02000E95C6 /* ImageBundleRecorder */ = {
 69 | 			isa = PBXGroup;
 70 | 			children = (
 71 | 				C1400FB3287F9BF400CB4E63 /* ImageBundleRecorder.entitlements */,
 72 | 				AE9AD78C270ECD01001218B2 /* CameraController.swift */,
 73 | 				AED300A4271D6D18008F6007 /* CameraManager.swift */,
 74 | 				C12A11462857D820009A991A /* MetalTextureView.swift */,
 75 | 				C12A11442857D820009A991A /* MetalTextureViewDepth.swift */,
 76 | 				C12A11452857D820009A991A /* MetalViewSample.swift */,
 77 | 				C12A11472857D820009A991A /* ImageBundleRecorder.swift */,
 78 | 				C12A11482857D820009A991A /* shaders.metal */,
 79 | 				AE921D2D270D7B04000E95C6 /* Assets.xcassets */,
 80 | 				C1970A7928592FC900B08ECB /* Info.plist */,
 81 | 			);
 82 | 			path = ImageBundleRecorder;
 83 | 			sourceTree = "<group>";
 84 | 		};
 85 | 		C42CD530FDC5FDCBE24A185E /* Configuration */ = {
 86 | 			isa = PBXGroup;
 87 | 			children = (
 88 | 				9ED80CBFEB14F0F0F5121BC6 /* SampleCode.xcconfig */,
 89 | 			);
 90 | 			name = Configuration;
 91 | 			sourceTree = "<group>";
 92 | 		};
 93 | 		EA03DA1400B7EED6A2A7C7E7 /* LICENSE */ = {
 94 | 			isa = PBXGroup;
 95 | 			children = (
 96 | 				0C34ECABC9005D93C0DF4297 /* LICENSE.txt */,
 97 | 			);
 98 | 			path = LICENSE;
 99 | 			sourceTree = "<group>";
100 | 		};
101 | /* End PBXGroup section */
102 | 
103 | /* Begin PBXNativeTarget section */
104 | 		AE921D25270D7B02000E95C6 /* ImageBundleRecorder */ = {
105 | 			isa = PBXNativeTarget;
106 | 			buildConfigurationList = AE921D34270D7B04000E95C6 /* Build configuration list for PBXNativeTarget "ImageBundleRecorder" */;
107 | 			buildPhases = (
108 | 				AE921D22270D7B02000E95C6 /* Sources */,
109 | 				AE921D23270D7B02000E95C6 /* Frameworks */,
110 | 			);
111 | 			buildRules = (
112 | 			);
113 | 			dependencies = (
114 | 			);
115 | 			name = ImageBundleRecorder;
116 | 			productName = DepthAPISample;
117 | 			productReference = AE921D26270D7B02000E95C6 /* ImageBundleRecorder.app */;
118 | 			productType = "com.apple.product-type.application";
119 | 		};
120 | /* End PBXNativeTarget section */
121 | 
122 | /* Begin PBXProject section */
123 | 		AE921D1E270D7B02000E95C6 /* Project object */ = {
124 | 			isa = PBXProject;
125 | 			attributes = {
126 | 				BuildIndependentTargetsInParallel = 1;
127 | 				KnownAssetTags = (
128 | 					New,
129 | 				);
130 | 				LastSwiftUpdateCheck = 1320;
131 | 				LastUpgradeCheck = 1320;
132 | 				ORGANIZATIONNAME = Apple;
133 | 				TargetAttributes = {
134 | 					AE921D25270D7B02000E95C6 = {
135 | 						CreatedOnToolsVersion = 13.2;
136 | 						LastSwiftMigration = 1320;
137 | 					};
138 | 				};
139 | 			};
140 | 			buildConfigurationList = AE921D21270D7B02000E95C6 /* Build configuration list for PBXProject "ImageBundleRecorder" */;
141 | 			compatibilityVersion = "Xcode 13.0";
142 | 			developmentRegion = en;
143 | 			hasScannedForEncodings = 0;
144 | 			knownRegions = (
145 | 				en,
146 | 				Base,
147 | 			);
148 | 			mainGroup = AE921D1D270D7B02000E95C6;
149 | 			productRefGroup = AE921D27270D7B02000E95C6 /* Products */;
150 | 			projectDirPath = "";
151 | 			projectRoot = "";
152 | 			targets = (
153 | 				AE921D25270D7B02000E95C6 /* ImageBundleRecorder */,
154 | 			);
155 | 		};
156 | /* End PBXProject section */
157 | 
158 | /* Begin PBXSourcesBuildPhase section */
159 | 		AE921D22270D7B02000E95C6 /* Sources */ = {
160 | 			isa = PBXSourcesBuildPhase;
161 | 			buildActionMask = 2147483647;
162 | 			files = (
163 | 				AE9AD78D270ECD01001218B2 /* CameraController.swift in Sources */,
164 | 				C12A114A2857D820009A991A /* MetalViewSample.swift in Sources */,
165 | 				AED300A5271D6D18008F6007 /* CameraManager.swift in Sources */,
166 | 				C12A114B2857D820009A991A /* MetalTextureView.swift in Sources */,
167 | 				C12A114C2857D820009A991A /* ImageBundleRecorder.swift in Sources */,
168 | 				C12A11492857D820009A991A /* MetalTextureViewDepth.swift in Sources */,
169 | 				C12A114D2857D820009A991A /* shaders.metal in Sources */,
170 | 			);
171 | 			runOnlyForDeploymentPostprocessing = 0;
172 | 		};
173 | /* End PBXSourcesBuildPhase section */
174 | 
175 | /* Begin XCBuildConfiguration section */
176 | 		AE921D32270D7B04000E95C6 /* Debug */ = {
177 | 			isa = XCBuildConfiguration;
178 | 			baseConfigurationReference = 9ED80CBFEB14F0F0F5121BC6 /* SampleCode.xcconfig */;
179 | 			buildSettings = {
180 | 				ALWAYS_SEARCH_USER_PATHS = NO;
181 | 				CLANG_ANALYZER_NONNULL = YES;
182 | 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
183 | 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++17";
184 | 				CLANG_CXX_LIBRARY = "libc++";
185 | 				CLANG_ENABLE_MODULES = YES;
186 | 				CLANG_ENABLE_OBJC_ARC = YES;
187 | 				CLANG_ENABLE_OBJC_WEAK = YES;
188 | 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
189 | 				CLANG_WARN_BOOL_CONVERSION = YES;
190 | 				CLANG_WARN_COMMA = YES;
191 | 				CLANG_WARN_CONSTANT_CONVERSION = YES;
192 | 				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
193 | 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
194 | 				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
195 | 				CLANG_WARN_EMPTY_BODY = YES;
196 | 				CLANG_WARN_ENUM_CONVERSION = YES;
197 | 				CLANG_WARN_INFINITE_RECURSION = YES;
198 | 				CLANG_WARN_INT_CONVERSION = YES;
199 | 				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
200 | 				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
201 | 				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
202 | 				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
203 | 				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
204 | 				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
205 | 				CLANG_WARN_STRICT_PROTOTYPES = YES;
206 | 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
207 | 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
208 | 				CLANG_WARN_UNREACHABLE_CODE = YES;
209 | 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
210 | 				COPY_PHASE_STRIP = NO;
211 | 				DEBUG_INFORMATION_FORMAT = dwarf;
212 | 				ENABLE_STRICT_OBJC_MSGSEND = YES;
213 | 				ENABLE_TESTABILITY = YES;
214 | 				GCC_C_LANGUAGE_STANDARD = gnu11;
215 | 				GCC_DYNAMIC_NO_PIC = NO;
216 | 				GCC_NO_COMMON_BLOCKS = YES;
217 | 				GCC_OPTIMIZATION_LEVEL = 0;
218 | 				GCC_PREPROCESSOR_DEFINITIONS = (
219 | 					"DEBUG=1",
220 | 					"$(inherited)",
221 | 				);
222 | 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
223 | 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
224 | 				GCC_WARN_UNDECLARED_SELECTOR = YES;
225 | 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
226 | 				GCC_WARN_UNUSED_FUNCTION = YES;
227 | 				GCC_WARN_UNUSED_VARIABLE = YES;
228 | 				IPHONEOS_DEPLOYMENT_TARGET = 16.1;
229 | 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
230 | 				MTL_FAST_MATH = YES;
231 | 				ONLY_ACTIVE_ARCH = YES;
232 | 				SDKROOT = iphoneos;
233 | 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
234 | 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
235 | 			};
236 | 			name = Debug;
237 | 		};
238 | 		AE921D33270D7B04000E95C6 /* Release */ = {
239 | 			isa = XCBuildConfiguration;
240 | 			baseConfigurationReference = 9ED80CBFEB14F0F0F5121BC6 /* SampleCode.xcconfig */;
241 | 			buildSettings = {
242 | 				ALWAYS_SEARCH_USER_PATHS = NO;
243 | 				CLANG_ANALYZER_NONNULL = YES;
244 | 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
245 | 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++17";
246 | 				CLANG_CXX_LIBRARY = "libc++";
247 | 				CLANG_ENABLE_MODULES = YES;
248 | 				CLANG_ENABLE_OBJC_ARC = YES;
249 | 				CLANG_ENABLE_OBJC_WEAK = YES;
250 | 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
251 | 				CLANG_WARN_BOOL_CONVERSION = YES;
252 | 				CLANG_WARN_COMMA = YES;
253 | 				CLANG_WARN_CONSTANT_CONVERSION = YES;
254 | 				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
255 | 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
256 | 				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
257 | 				CLANG_WARN_EMPTY_BODY = YES;
258 | 				CLANG_WARN_ENUM_CONVERSION = YES;
259 | 				CLANG_WARN_INFINITE_RECURSION = YES;
260 | 				CLANG_WARN_INT_CONVERSION = YES;
261 | 				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
262 | 				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
263 | 				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
264 | 				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
265 | 				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
266 | 				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
267 | 				CLANG_WARN_STRICT_PROTOTYPES = YES;
268 | 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
269 | 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
270 | 				CLANG_WARN_UNREACHABLE_CODE = YES;
271 | 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
272 | 				COPY_PHASE_STRIP = NO;
273 | 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
274 | 				ENABLE_NS_ASSERTIONS = NO;
275 | 				ENABLE_STRICT_OBJC_MSGSEND = YES;
276 | 				GCC_C_LANGUAGE_STANDARD = gnu11;
277 | 				GCC_NO_COMMON_BLOCKS = YES;
278 | 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
279 | 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
280 | 				GCC_WARN_UNDECLARED_SELECTOR = YES;
281 | 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
282 | 				GCC_WARN_UNUSED_FUNCTION = YES;
283 | 				GCC_WARN_UNUSED_VARIABLE = YES;
284 | 				IPHONEOS_DEPLOYMENT_TARGET = 16.1;
285 | 				MTL_ENABLE_DEBUG_INFO = NO;
286 | 				MTL_FAST_MATH = YES;
287 | 				SDKROOT = iphoneos;
288 | 				SWIFT_COMPILATION_MODE = wholemodule;
289 | 				SWIFT_OPTIMIZATION_LEVEL = "-O";
290 | 				VALIDATE_PRODUCT = YES;
291 | 			};
292 | 			name = Release;
293 | 		};
294 | 		AE921D35270D7B04000E95C6 /* Debug */ = {
295 | 			isa = XCBuildConfiguration;
296 | 			baseConfigurationReference = 9ED80CBFEB14F0F0F5121BC6 /* SampleCode.xcconfig */;
297 | 			buildSettings = {
298 | 				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
299 | 				ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO;
300 | 				CODE_SIGN_ENTITLEMENTS = ImageBundleRecorder/ImageBundleRecorder.entitlements;
301 | 				CODE_SIGN_IDENTITY = "Apple Development";
302 | 				CODE_SIGN_STYLE = Automatic;
303 | 				CURRENT_PROJECT_VERSION = 2;
304 | 				DEVELOPMENT_ASSET_PATHS = "";
305 | 				DEVELOPMENT_TEAM = "";
306 | 				ENABLE_PREVIEWS = YES;
307 | 				GENERATE_INFOPLIST_FILE = YES;
308 | 				INFOPLIST_FILE = ImageBundleRecorder/Info.plist;
309 | 				INFOPLIST_KEY_CFBundleDisplayName = "Image Bundle Recorder";
310 | 				INFOPLIST_KEY_LSSupportsOpeningDocumentsInPlace = YES;
311 | 				INFOPLIST_KEY_NSCameraUsageDescription = "This app requires the camera for augmented reality.";
312 | 				INFOPLIST_KEY_NSLocationWhenInUseUsageDescription = "";
313 | 				INFOPLIST_KEY_NSMicrophoneUsageDescription = "";
314 | 				INFOPLIST_KEY_NSPhotoLibraryAddUsageDescription = "This app saves captures to phone.";
315 | 				INFOPLIST_KEY_NSPhotoLibraryUsageDescription = "";
316 | 				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
317 | 				INFOPLIST_KEY_UILaunchStoryboardName = "Launch Screen";
318 | 				INFOPLIST_KEY_UIRequiredDeviceCapabilities = "armv7 arkit";
319 | 				INFOPLIST_KEY_UIRequiresFullScreen = YES;
320 | 				INFOPLIST_KEY_UIStatusBarHidden = YES;
321 | 				INFOPLIST_KEY_UIStatusBarStyle = UIStatusBarStyleDarkContent;
322 | 				INFOPLIST_KEY_UISupportedInterfaceOrientations = UIInterfaceOrientationPortrait;
323 | 				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = UIInterfaceOrientationPortrait;
324 | 				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = UIInterfaceOrientationPortrait;
325 | 				INFOPLIST_KEY_UISupportsDocumentBrowser = YES;
326 | 				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
327 | 				LD_RUNPATH_SEARCH_PATHS = (
328 | 					"$(inherited)",
329 | 					"@executable_path/Frameworks",
330 | 				);
331 | 				MARKETING_VERSION = 1.0;
332 | 				PRODUCT_BUNDLE_IDENTIFIER = com.imageBundleRecorder;
333 | 				PRODUCT_NAME = "$(TARGET_NAME)";
334 | 				PROVISIONING_PROFILE_SPECIFIER = "";
335 | 				SWIFT_EMIT_LOC_STRINGS = YES;
336 | 				SWIFT_INSTALL_OBJC_HEADER = NO;
337 | 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
338 | 				SWIFT_VERSION = 5.0;
339 | 				TARGETED_DEVICE_FAMILY = 1;
340 | 			};
341 | 			name = Debug;
342 | 		};
343 | 		AE921D36270D7B04000E95C6 /* Release */ = {
344 | 			isa = XCBuildConfiguration;
345 | 			baseConfigurationReference = 9ED80CBFEB14F0F0F5121BC6 /* SampleCode.xcconfig */;
346 | 			buildSettings = {
347 | 				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
348 | 				ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO;
349 | 				CODE_SIGN_ENTITLEMENTS = ImageBundleRecorder/ImageBundleRecorder.entitlements;
350 | 				CODE_SIGN_IDENTITY = "Apple Development";
351 | 				CODE_SIGN_STYLE = Automatic;
352 | 				CURRENT_PROJECT_VERSION = 2;
353 | 				DEVELOPMENT_ASSET_PATHS = "";
354 | 				DEVELOPMENT_TEAM = "";
355 | 				ENABLE_PREVIEWS = YES;
356 | 				GENERATE_INFOPLIST_FILE = YES;
357 | 				INFOPLIST_FILE = ImageBundleRecorder/Info.plist;
358 | 				INFOPLIST_KEY_CFBundleDisplayName = "Image Bundle Recorder";
359 | 				INFOPLIST_KEY_LSSupportsOpeningDocumentsInPlace = YES;
360 | 				INFOPLIST_KEY_NSCameraUsageDescription = "This app requires the camera for augmented reality.";
361 | 				INFOPLIST_KEY_NSLocationWhenInUseUsageDescription = "";
362 | 				INFOPLIST_KEY_NSMicrophoneUsageDescription = "";
363 | 				INFOPLIST_KEY_NSPhotoLibraryAddUsageDescription = "This app saves captures to phone.";
364 | 				INFOPLIST_KEY_NSPhotoLibraryUsageDescription = "";
365 | 				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
366 | 				INFOPLIST_KEY_UILaunchStoryboardName = "Launch Screen";
367 | 				INFOPLIST_KEY_UIRequiredDeviceCapabilities = "armv7 arkit";
368 | 				INFOPLIST_KEY_UIRequiresFullScreen = YES;
369 | 				INFOPLIST_KEY_UIStatusBarHidden = YES;
370 | 				INFOPLIST_KEY_UIStatusBarStyle = UIStatusBarStyleDarkContent;
371 | 				INFOPLIST_KEY_UISupportedInterfaceOrientations = UIInterfaceOrientationPortrait;
372 | 				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = UIInterfaceOrientationPortrait;
373 | 				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = UIInterfaceOrientationPortrait;
374 | 				INFOPLIST_KEY_UISupportsDocumentBrowser = YES;
375 | 				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
376 | 				LD_RUNPATH_SEARCH_PATHS = (
377 | 					"$(inherited)",
378 | 					"@executable_path/Frameworks",
379 | 				);
380 | 				MARKETING_VERSION = 1.0;
381 | 				PRODUCT_BUNDLE_IDENTIFIER = com.imageBundleRecorder;
382 | 				PRODUCT_NAME = "$(TARGET_NAME)";
383 | 				PROVISIONING_PROFILE_SPECIFIER = "";
384 | 				SWIFT_EMIT_LOC_STRINGS = YES;
385 | 				SWIFT_INSTALL_OBJC_HEADER = NO;
386 | 				SWIFT_VERSION = 5.0;
387 | 				TARGETED_DEVICE_FAMILY = 1;
388 | 			};
389 | 			name = Release;
390 | 		};
391 | /* End XCBuildConfiguration section */
392 | 
393 | /* Begin XCConfigurationList section */
394 | 		AE921D21270D7B02000E95C6 /* Build configuration list for PBXProject "ImageBundleRecorder" */ = {
395 | 			isa = XCConfigurationList;
396 | 			buildConfigurations = (
397 | 				AE921D32270D7B04000E95C6 /* Debug */,
398 | 				AE921D33270D7B04000E95C6 /* Release */,
399 | 			);
400 | 			defaultConfigurationIsVisible = 0;
401 | 			defaultConfigurationName = Release;
402 | 		};
403 | 		AE921D34270D7B04000E95C6 /* Build configuration list for PBXNativeTarget "ImageBundleRecorder" */ = {
404 | 			isa = XCConfigurationList;
405 | 			buildConfigurations = (
406 | 				AE921D35270D7B04000E95C6 /* Debug */,
407 | 				AE921D36270D7B04000E95C6 /* Release */,
408 | 			);
409 | 			defaultConfigurationIsVisible = 0;
410 | 			defaultConfigurationName = Release;
411 | 		};
412 | /* End XCConfigurationList section */
413 | 	};
414 | 	rootObject = AE921D1E270D7B02000E95C6 /* Project object */;
415 | }
416 | 


--------------------------------------------------------------------------------
/!App/ImageBundleApp/ImageBundleRecorder/Assets.xcassets/AccentColor.colorset/Contents.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "colors" : [
 3 |     {
 4 |       "idiom" : "universal"
 5 |     }
 6 |   ],
 7 |   "info" : {
 8 |     "author" : "xcode",
 9 |     "version" : 1
10 |   }
11 | }
12 | 


--------------------------------------------------------------------------------
/!App/ImageBundleApp/ImageBundleRecorder/Assets.xcassets/Contents.json:
--------------------------------------------------------------------------------
1 | {
2 |   "info" : {
3 |     "author" : "xcode",
4 |     "version" : 1
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/!App/ImageBundleApp/ImageBundleRecorder/CameraController.swift:
--------------------------------------------------------------------------------
  1 | /*
  2 |  See LICENSE folder for this sample’s licensing information.
  3 |  
  4 |  Abstract:
  5 |  An object that configures and manages the capture pipeline to stream video and LiDAR depth data.
  6 |  */
  7 | 
  8 | import Foundation
  9 | import AVFoundation
 10 | import CoreImage
 11 | import CoreMotion
 12 | 
 13 | protocol CaptureDataReceiver: AnyObject {
 14 |     func onNewData(capturedData: CameraCapturedData)
 15 |     func onNewPhotoData(capturedData: CameraCapturedData)
 16 | }
 17 | 
 18 | class CameraController: NSObject, ObservableObject, AVCaptureVideoDataOutputSampleBufferDelegate {
 19 |     
 20 |     enum ConfigurationError: Error {
 21 |         case lidarDeviceUnavailable
 22 |         case requiredFormatUnavailable
 23 |     }
 24 |     
 25 |     private let preferredWidthResolution = 4032
 26 |     
 27 |     private(set) var captureSession: AVCaptureSession!
 28 |     
 29 |     private let videoQueue = DispatchQueue(label: "com.example.apple-samplecode.VideoQueue", qos: .userInteractive)
 30 |     
 31 |     private var photoOutput: AVCapturePhotoOutput!
 32 |     private var depthDataOutput: AVCaptureDepthDataOutput!
 33 |     private var videoDataOutput: AVCaptureVideoDataOutput!
 34 |     private var outputVideoSync: AVCaptureDataOutputSynchronizer!
 35 |     private let metalDevice: MTLDevice?
 36 |     private var timer: Timer?
 37 |     private var motion: CMMotionManager!
 38 |     public var device: AVCaptureDevice!
 39 |     public var savingState = 0 // 0 - not saving, 1 - saving, 2 - error
 40 |     public var frameCount = 99999
 41 |     public var bundleSize = 42
 42 |     public var convertedDepth: AVDepthData!
 43 |     public var recordScene = false
 44 |     
 45 |     public var saveSuffix: String!
 46 |     public var rawFrameTimes: [Double] = []
 47 |     public var rgbFrameTimes: [Double] = []
 48 |     public var motionURL: URL!
 49 |     public var motionData: Data!
 50 |     public var imageRGBData: Data!
 51 |     public var imageRGBURL: URL!
 52 |     public var depthData: Data!
 53 |     public var depthURL: URL!
 54 |     public var imageRAWData: Data!
 55 |     public var imageRAWURL: URL!
 56 |     
 57 |     @Published var bundleFolder : URL?
 58 |     
 59 |     
 60 |     private var textureCache: CVMetalTextureCache!
 61 |     
 62 |     weak var delegate: CaptureDataReceiver?
 63 |     
 64 |     var isFilteringEnabled = true
 65 |     
 66 |     override init() {
 67 |         
 68 |         // create a texture cache to hold sample buffer textures
 69 |         metalDevice = MTLCreateSystemDefaultDevice()
 70 |         CVMetalTextureCacheCreate(nil,
 71 |                                   nil,
 72 |                                   metalDevice!,
 73 |                                   nil,
 74 |                                   &textureCache)
 75 |         
 76 |         super.init()
 77 |         
 78 |         do {
 79 |             try setupSession()
 80 |         } catch {
 81 |             fatalError("Unable to configure the capture session.")
 82 |         }
 83 |         
 84 |         
 85 |     }
 86 |     
 87 |     private func setupSession() throws {
 88 |         captureSession = AVCaptureSession()
 89 |         
 90 |         // configure the capture session
 91 |         captureSession.beginConfiguration()
 92 |         captureSession.sessionPreset = .photo
 93 |         
 94 |         try setupCaptureInput()
 95 |         setupCaptureOutputs()
 96 |         
 97 |         // finalize capture session configuration
 98 |         captureSession.commitConfiguration()
 99 |     }
100 |     
101 |     // MARK: Init Bundle
102 |     private func initBundleFolder(suffix: String = "") {
103 |         let currDate = Date()
104 |         let dateFormatter = DateFormatter()
105 |         dateFormatter.dateFormat = "yyyy-MM-dd_HH-mm-ss"
106 |         let currDateString = dateFormatter.string(from : currDate)
107 |         
108 |         let DocumentDirectory = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
109 |         let DirPath = DocumentDirectory.appendingPathComponent("bundle-" + currDateString + suffix + "/")
110 |         
111 |         do {
112 |             try FileManager.default.createDirectory(atPath: DirPath.path, withIntermediateDirectories: true, attributes: nil)
113 |         } catch let error as NSError {
114 |             print("Unable to create directory \(error.debugDescription)")
115 |         }
116 |         
117 |         bundleFolder = URL(fileURLWithPath: DirPath.path)
118 |     }
119 |     
120 |     public func recordMotionBundle(saveSuffix: String = ""){
121 |         self.saveSuffix = saveSuffix
122 |         
123 |         recordScene = false
124 |         
125 |         motionData = Data.init()
126 |         rawFrameTimes = []
127 |         rgbFrameTimes = []
128 |         frameCount = 0
129 |         
130 |         capturePhoto()
131 |     }
132 |     
133 |     public func recordBundle(saveSuffix: String = ""){
134 |         self.saveSuffix = saveSuffix
135 |         
136 |         recordScene = true
137 |         
138 |         motionData = Data.init()
139 |         imageRGBData = Data.init()
140 |         imageRAWData = Data.init()
141 |         depthData = Data.init()
142 |         rawFrameTimes = []
143 |         rgbFrameTimes = []
144 |         frameCount = 0
145 |         
146 |         capturePhoto()
147 |     }
148 |     
149 |     
150 |     // MARK: Start Motion Capture
151 |     private func startMotionCapture() {
152 |         self.motion = CMMotionManager()
153 |         
154 |         if self.motion.isDeviceMotionAvailable { self.motion!.deviceMotionUpdateInterval = 1.0 / 200.0 // ask for 200Hz but max frequency is 100Hz for 14pro
155 |             self.motion.showsDeviceMovementDisplay = true
156 |             // get the attitude relative to the magnetic north reference frame
157 |             self.motion.startDeviceMotionUpdates(using: .xArbitraryZVertical,
158 |                                                  to: OperationQueue(), withHandler: { (data, error) in
159 |                 // make sure the data is valid before accessing it
160 |                 if let validData = data {
161 |                     
162 |                     let timestamp = validData.timestamp
163 |                     
164 |                     let attitude = validData.attitude
165 |                     let quaternion = validData.attitude.quaternion
166 |                     let rotationRate = validData.rotationRate
167 |                     let userAcceleration = validData.userAcceleration
168 |                     let gravity = validData.gravity
169 |                     
170 |                     // generate header information to parse later in python
171 |                     var header = """
172 |                     <BEGINHEADER>
173 |                     frameCount:\(String(describing: self.frameCount)),timestamp:\(String(describing: timestamp)),
174 |                     quaternionX:\(String(describing: quaternion.x)),quaternionY:\(String(describing: quaternion.y)),
175 |                     quaternionZ:\(String(describing: quaternion.z)),quaternionW:\(String(describing: quaternion.w)),
176 |                     rotationRateX:\(String(describing: rotationRate.x)),rotationRateY:\(String(describing: rotationRate.y)),
177 |                     rotationRateZ:\(String(describing: rotationRate.z)),roll:\(String(describing: attitude.roll)),
178 |                     pitch:\(String(describing: attitude.pitch)),yaw:\(String(describing: attitude.yaw)),
179 |                     userAccelerationX:\(String(describing: userAcceleration.x)),userAccelerationY:\(String(describing: userAcceleration.y)),
180 |                     userAccelerationZ:\(String(describing: userAcceleration.z)),gravityX:\(String(describing: gravity.x)),
181 |                     gravityY:\(String(describing: gravity.y)),gravityZ:\(String(describing: gravity.z))
182 |                     <ENDHEADER>
183 |                     """
184 |                     header = header.components(separatedBy: .whitespacesAndNewlines).joined() // remove newlines
185 |                     let encodedHeader = [UInt8](header.utf8)
186 |                     
187 |                     if self.motionData != nil && self.frameCount != 99999 {
188 |                         self.motionData.append(encodedHeader, count: header.utf8.count)
189 |                     }
190 |                 }
191 |             })
192 |         }
193 |     }
194 |     
195 |     // MARK: Set Up Capture
196 |     private func setupCaptureInput() throws {
197 |         
198 |         self.startMotionCapture()
199 |         
200 |         // LiDAR + main wide lens
201 |         self.device = AVCaptureDevice.default(.builtInLiDARDepthCamera, for: .video, position: .back)
202 |         
203 |         guard let format = (self.device.formats.last { format in
204 |             format.formatDescription.dimensions.width == preferredWidthResolution &&
205 |             format.formatDescription.mediaSubType.rawValue == kCVPixelFormatType_420YpCbCr8BiPlanarFullRange &&
206 |             !format.isVideoBinned &&
207 |             !format.supportedDepthDataFormats.isEmpty
208 |         }) else {
209 |             print("No such image format.")
210 |             throw ConfigurationError.requiredFormatUnavailable
211 |         }
212 |         
213 |         guard let depthFormat = (format.supportedDepthDataFormats.last { depthFormat in
214 |             depthFormat.formatDescription.mediaSubType.rawValue == kCVPixelFormatType_DepthFloat16
215 |         }) else {
216 |             print("No such depth format.")
217 |             throw ConfigurationError.requiredFormatUnavailable
218 |         }
219 |         
220 |         // begin the device configuration
221 |         try self.device.lockForConfiguration()
222 |         
223 |         // configure the device and depth formats
224 |         self.device.activeFormat = format
225 |         self.device.activeDepthDataFormat = depthFormat
226 |         self.device.focusMode = .continuousAutoFocus
227 |         self.device.activeVideoMaxFrameDuration = CMTimeMake(value: 1, timescale: 30) // 30 fps
228 |         self.device.activeVideoMinFrameDuration = CMTimeMake(value: 1, timescale: 30) // 30 fps
229 |         self.device.activeDepthDataMinFrameDuration = CMTimeMake(value: 1, timescale: 30) // 30 fps
230 |         
231 |         // finish the device configuration
232 |         self.device.unlockForConfiguration()
233 |         
234 |         print("Selected video format: \(self.device.activeFormat)")
235 |         print("Selected depth format: \(String(describing: self.device.activeDepthDataFormat))")
236 |         
237 |         // add a device input to the capture session
238 |         let deviceInput = try AVCaptureDeviceInput(device: self.device)
239 |         captureSession.addInput(deviceInput)
240 |     }
241 |     
242 |     private func setupCaptureOutputs() {
243 |         // create an object to output video sample buffers
244 |         videoDataOutput = AVCaptureVideoDataOutput()
245 |         videoDataOutput.videoSettings = [(kCVPixelBufferPixelFormatTypeKey as String): NSNumber(value: 1111970369), // BGRA stream
246 |                                          (kCVPixelBufferWidthKey as String): NSNumber(value: 1920),
247 |                                          (kCVPixelBufferHeightKey as String): NSNumber(value: 1440)]
248 |         captureSession.addOutput(videoDataOutput)
249 |         
250 |         // create an object to output depth data.
251 |         depthDataOutput = AVCaptureDepthDataOutput()
252 |         depthDataOutput.isFilteringEnabled = true
253 |         captureSession.addOutput(depthDataOutput)
254 |         
255 |         
256 |         // create an object to synchronize the delivery of depth and video data
257 |         outputVideoSync = AVCaptureDataOutputSynchronizer(dataOutputs: [depthDataOutput, videoDataOutput])
258 |         outputVideoSync.setDelegate(self, queue: videoQueue)
259 |         
260 |         // enable camera intrinsics matrix delivery
261 |         guard let outputConnection = videoDataOutput.connection(with: .video) else { return }
262 |         if outputConnection.isCameraIntrinsicMatrixDeliverySupported {
263 |             outputConnection.isCameraIntrinsicMatrixDeliveryEnabled = true
264 |         }
265 |         
266 |         // create an object to output photos
267 |         photoOutput = AVCapturePhotoOutput()
268 |         captureSession.addOutput(photoOutput)
269 |         photoOutput.maxPhotoQualityPrioritization = .speed
270 |         photoOutput.isAppleProRAWEnabled = false // if true, captures are extremely slow as they stitch/process images
271 |         photoOutput.maxPhotoDimensions = .init(width: 8064, height: 6048) // only gives 4k even if you ask for 8k unless you set proraw true
272 |         
273 |         // enable delivery of depth data after adding the output to the capture session
274 |         photoOutput.isDepthDataDeliveryEnabled = true
275 |     }
276 |     
277 |     func startStream() {
278 |         captureSession.startRunning()
279 |     }
280 |     
281 |     func stopStream() {
282 |         captureSession.stopRunning()
283 |     }
284 | }
285 | 
286 | // MARK: Synchronized RGB and Depth
287 | extension CameraController: AVCaptureDataOutputSynchronizerDelegate {
288 |     
289 |     func dataOutputSynchronizer(_ synchronizer: AVCaptureDataOutputSynchronizer,
290 |                                 didOutput synchronizedDataCollection: AVCaptureSynchronizedDataCollection) {
291 |         
292 |         // retrieve the synchronized depth and sample buffer container objects
293 |         guard let syncedDepthData = synchronizedDataCollection.synchronizedData(for: depthDataOutput) as? AVCaptureSynchronizedDepthData,
294 |               let syncedVideoData = synchronizedDataCollection.synchronizedData(for: videoDataOutput) as? AVCaptureSynchronizedSampleBufferData else { return }
295 |         
296 |         guard let pixelBuffer = syncedVideoData.sampleBuffer.imageBuffer else { return }
297 |         
298 |         let timestamp = syncedDepthData.timestamp.seconds
299 |         self.convertedDepth = syncedDepthData.depthData.converting(toDepthDataType: kCVPixelFormatType_DepthFloat16)
300 |         var data: CameraCapturedData!
301 |         
302 |         if (self.frameCount != 99999 && self.recordScene) || (self.recordScene && self.rawFrameTimes.contains(timestamp)){
303 |             // if long-burst being recorded, write data
304 |             self.writeImageBGRA(sampleBuffer: syncedVideoData.sampleBuffer, timestamp: timestamp, frameCount: self.frameCount)
305 |             self.writeDepth(depthData: syncedDepthData.depthData, timestamp: timestamp, frameCount: self.frameCount)
306 |             self.rgbFrameTimes.append(round(timestamp * 1000) / 1000.0)
307 |         }
308 |         data = CameraCapturedData(depth: self.convertedDepth.depthDataMap.texture(withFormat: .r16Float, planeIndex: 0, addToCache: textureCache),
309 |                                   color: pixelBuffer.texture(withFormat: .bgra8Unorm, planeIndex: 0, addToCache: textureCache),
310 |                                   timestamp: timestamp)
311 |         
312 |         
313 |         delegate?.onNewPhotoData(capturedData: data)
314 |     }
315 | }
316 | 
317 | 
318 | extension CameraController: AVCapturePhotoCaptureDelegate {
319 |     
320 |     // MARK: Capture Photo
321 |     func capturePhoto() {
322 |         var photoSettings: AVCapturePhotoSettings
323 |         
324 |         
325 |         // MARK: Terminate Recording
326 |         if self.frameCount == self.bundleSize {
327 |             
328 |             // delay so we catch last RGB/depth pair if it's delayed
329 |             DispatchQueue.main.asyncAfter(deadline: .now() + 0.25) {
330 |                 self.frameCount = 99999
331 |                 print("Resetting camera back to autoexposure.")
332 |                 
333 |                 do{
334 |                     try self.device.lockForConfiguration()
335 |                 } catch {
336 |                     fatalError("Device could not be locked.")
337 |                 }
338 |                 
339 |                 self.device.exposureMode = .continuousAutoExposure
340 |                 self.device.focusMode = .continuousAutoFocus
341 |                 self.device.unlockForConfiguration()
342 |                 
343 |                 print("Writing to disk.")
344 |                 self.savingState = 1
345 |             }
346 |             
347 |             // delay more so UI catches the 'savingData' change
348 |             DispatchQueue.main.asyncAfter(deadline: .now() + 0.5) { [self] in
349 |                 
350 |                 var missingTimes : [Double] = []
351 |                 for elem in self.rawFrameTimes {
352 |                     if !self.rgbFrameTimes.contains(elem){
353 |                         missingTimes.append(elem)
354 |                     }
355 |                 }
356 |                 
357 |                 if missingTimes.count > 0 || self.rgbFrameTimes.count < self.bundleSize {
358 |                     // something broke, missing synced frames
359 |                     print("Missing times: ", missingTimes)
360 |                     
361 |                     self.motionData = nil
362 |                     self.imageRGBData = nil
363 |                     self.imageRAWData = nil
364 |                     self.depthData = nil
365 |                     
366 |                     self.savingState = 2 // error
367 |                     DispatchQueue.main.asyncAfter(deadline: .now() + 5) {
368 |                         self.savingState = 0 // clear error in 5 seconds
369 |                     }
370 |                     
371 |                     return
372 |                 }
373 |                 
374 |                 // make folders to save files to
375 |                 if self.recordScene {
376 |                     if self.saveSuffix != "" {
377 |                         self.initBundleFolder(suffix: "-" + self.saveSuffix)
378 |                     } else {
379 |                         self.initBundleFolder()
380 |                     }
381 |                     
382 |                     print("Recording bundle into \(String(describing: self.bundleFolder!.path))")
383 |                     
384 |                     self.motionURL = URL(fileURLWithPath: "motion", relativeTo: self.bundleFolder).appendingPathExtension("bin")
385 |                     self.imageRGBURL = URL(fileURLWithPath: "imageRGB", relativeTo: self.bundleFolder).appendingPathExtension("bin")
386 |                     self.imageRAWURL = URL(fileURLWithPath: "imageRAW", relativeTo: self.bundleFolder).appendingPathExtension("bin")
387 |                     self.depthURL = URL(fileURLWithPath: "depth", relativeTo: self.bundleFolder).appendingPathExtension("bin")
388 |                     
389 |                 } else { // motion bundle
390 |                     if self.saveSuffix != "" {
391 |                         self.initBundleFolder(suffix: "-" + self.saveSuffix + "-motion")
392 |                     } else {
393 |                         self.initBundleFolder(suffix: "-motion")
394 |                     }
395 |                     
396 |                     print("Recording motion into \(String(describing: self.bundleFolder!.path))")
397 |                     
398 |                     self.motionURL = URL(fileURLWithPath: "motion", relativeTo: self.bundleFolder).appendingPathExtension("bin")
399 |                 }
400 |                 
401 |                 
402 |                 try? self.motionData.write(to: self.motionURL)
403 |                 
404 |                 // record to disk
405 |                 if self.recordScene {
406 |                     try? self.imageRGBData.write(to: self.imageRGBURL)
407 |                     try? self.imageRAWData.write(to: self.imageRAWURL)
408 |                     try? self.depthData.write(to: self.depthURL)
409 |                 }
410 |                 
411 |                 self.recordScene = false
412 |                 self.savingState = 0
413 |                 
414 |                 // clear memory
415 |                 self.motionData = nil
416 |                 self.imageRGBData = nil
417 |                 self.imageRAWData = nil
418 |                 self.depthData = nil
419 |                 
420 |                 print("Done recording bundle.")
421 |             }
422 |             return
423 |             
424 |         } else if self.frameCount >= self.bundleSize {
425 |             self.frameCount = 99999
426 |             return // don't record past bundle size
427 |         }
428 |         
429 |         if  photoOutput.availableRawPhotoPixelFormatTypes.count > 0 {
430 |             
431 |             for format in photoOutput.availableRawPhotoPixelFormatTypes {
432 |                 print(format)
433 |             }
434 |             
435 |             let rawType = photoOutput.availableRawPhotoPixelFormatTypes.first!
436 |             
437 |             // set ISO and Exposure Time
438 |             do{
439 |                 try self.device.lockForConfiguration()
440 |             } catch {
441 |                 fatalError("Device could not be locked.")
442 |             }
443 |             
444 |             let deviceISO = device.iso
445 |             let deviceExposureDuration = device.exposureDuration.seconds
446 |             
447 |             let iso = deviceISO
448 |             let maxExposureDuration: CMTime = CMTime(seconds: 0.041, preferredTimescale: CMTimeScale(1000000))
449 |             let exposureDuration: CMTime = min(device.exposureDuration, maxExposureDuration) // Don't drop under 21fps
450 |             
451 |             self.device.setExposureModeCustom(duration: exposureDuration, iso: iso)
452 |             self.device.focusMode = .locked
453 |             
454 |             self.device.unlockForConfiguration()
455 |             
456 |             if frameCount == 0 { // sleep for 200 milliseconds to let exposure catch up for first frame
457 |                 usleep(200000)
458 |             }
459 |             
460 |             photoSettings = AVCapturePhotoSettings(rawPixelFormatType: rawType, processedFormat: nil)
461 |             photoSettings.isDepthDataDeliveryEnabled = false
462 |         } else {
463 |             fatalError("No RAW format found.")
464 |         }
465 |         
466 |         photoOutput.capturePhoto(with: photoSettings, delegate: self)
467 |     }
468 |     
469 |     // MARK: Photo Output
470 |     func photoOutput(_ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error?) {
471 |         
472 |         // Retrieve the image and depth data.
473 |         guard let pixelBuffer = photo.pixelBuffer else {return}
474 |         
475 |         if self.frameCount >= self.bundleSize {
476 |             self.frameCount = 99999
477 |             return // don't record past bundle size
478 |         }
479 |         
480 |         
481 |         if self.recordScene {
482 |             self.writeImageRAW(photo: photo, timestamp: photo.timestamp.seconds, frameCount: self.frameCount)
483 |             self.rawFrameTimes.append(round(photo.timestamp.seconds * 1000) / 1000.0)
484 |         }
485 |         
486 |         self.frameCount += 1
487 |         self.capturePhoto()
488 |         
489 |     }
490 |     
491 |     // MARK: Write Depth
492 |     func convertLensDistortionLookupTable(lookupTable: Data) -> [Float] {
493 |         let tableLength = lookupTable.count / MemoryLayout<Float>.size
494 |         var floatArray: [Float] = Array(repeating: 0, count: tableLength)
495 |         _ = floatArray.withUnsafeMutableBytes{lookupTable.copyBytes(to: $0)}
496 |         return floatArray
497 |     }
498 |     
499 |     func convertIntrinsicMatrix(intrinsicMatrix: simd_float3x3) -> [[Float]]{
500 |         return (0 ..< 3).map{ x in
501 |             (0 ..< 3).map{ y in intrinsicMatrix[x][y]}
502 |         }
503 |     }
504 |     
505 |     func writeDepth(depthData: AVDepthData, timestamp: Double, frameCount: Int) {
506 |         let pixelBuffer = depthData.depthDataMap
507 |         guard CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly) == noErr else { return }
508 |         defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly) }
509 |         
510 |         guard let srcPtr = CVPixelBufferGetBaseAddress(pixelBuffer) else {
511 |             print("Failed to retrieve depth pointer.")
512 |             return
513 |         }
514 |         
515 |         let rowBytes : Int = CVPixelBufferGetBytesPerRow(pixelBuffer)
516 |         let width = Int(CVPixelBufferGetWidth(pixelBuffer))
517 |         let height = Int(CVPixelBufferGetHeight(pixelBuffer))
518 |         let capacity = CVPixelBufferGetDataSize(pixelBuffer)
519 |         let uint8Pointer = srcPtr.bindMemory(to: UInt8.self, capacity: capacity)
520 |         
521 |         let intrinsicWidth = depthData.cameraCalibrationData!.intrinsicMatrixReferenceDimensions.width
522 |         let intrinsicHeight = depthData.cameraCalibrationData!.intrinsicMatrixReferenceDimensions.height
523 |         let intrinsicMatrix = depthData.cameraCalibrationData!.intrinsicMatrix
524 |         let lensDistortion = depthData.cameraCalibrationData!.lensDistortionLookupTable!
525 |         let lensInverseDistortion = depthData.cameraCalibrationData!.inverseLensDistortionLookupTable!
526 |         let depthAccuracy = depthData.depthDataAccuracy.rawValue
527 |         
528 |         var header = """
529 |         <BEGINHEADER>
530 |         description:depthmap,
531 |         frameCount:\(String(describing: frameCount)),
532 |         timestamp:\(String(describing: timestamp)),
533 |         height:\(String(describing: height)),
534 |         width:\(String(describing: width)),
535 |         rowBytes:\(String(describing: rowBytes)),
536 |         intrinsicWidth:\(String(describing: intrinsicWidth)),
537 |         intrinsicHeight:\(String(describing: intrinsicHeight)),
538 |         intrinsicMatrix:\(String(describing: convertIntrinsicMatrix(intrinsicMatrix: intrinsicMatrix))),
539 |         lensDistortion:\(String(describing: convertLensDistortionLookupTable(lookupTable: lensDistortion))),
540 |         lensInverseDistortion:\(String(describing: convertLensDistortionLookupTable(lookupTable: lensInverseDistortion))),
541 |         depthAccuracy:\(String(describing: depthAccuracy))
542 |         <ENDHEADER>
543 |         """
544 |         
545 |         header = header.components(separatedBy: .whitespacesAndNewlines).joined() // remove newlines
546 |         let encodedHeader = [UInt8](header.utf8)
547 |         self.depthData.append(encodedHeader, count: header.utf8.count)
548 |         self.depthData.append(uint8Pointer, count: Int(rowBytes * height))
549 |     }
550 |     
551 |     // MARK: Write RAW
552 |     func writeImageRAW(photo: AVCapturePhoto, timestamp: Double, frameCount: Int) {
553 |         guard let pixelBuffer = photo.pixelBuffer else { return }
554 |         
555 |         guard CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly) == noErr else {
556 |             print("Failed to retrieve readonly base address for RAW.")
557 |             return
558 |         }
559 |         defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly) }
560 |         
561 |         
562 |         guard let srcPtr = CVPixelBufferGetBaseAddress(pixelBuffer) else {
563 |             print("Failed to retrieve RAW pointer.")
564 |             return
565 |         }
566 |         
567 |         let rowBytes : Int = CVPixelBufferGetBytesPerRow(pixelBuffer)
568 |         let width = Int(CVPixelBufferGetWidth(pixelBuffer))
569 |         let height = Int(CVPixelBufferGetHeight(pixelBuffer))
570 |         let capacity = CVPixelBufferGetDataSize(pixelBuffer)
571 |         let uint8Pointer = srcPtr.bindMemory(to: UInt8.self, capacity: capacity)
572 |         
573 |         let exifdata = photo.metadata["{Exif}"] as! NSDictionary
574 |         let DNGdata = photo.metadata["{DNG}"] as! NSDictionary
575 |         let brightnessValue = exifdata["BrightnessValue"] != nil ? exifdata["BrightnessValue"]! : -1.0
576 |         
577 |         var header = """
578 |         <BEGINHEADER>
579 |         description:imageRAW,
580 |         frameCount:\(String(describing: frameCount)),
581 |         timestamp:\(String(describing: timestamp)),
582 |         height:\(String(describing: height)),
583 |         width:\(String(describing: width)),
584 |         rowBytes:\(String(describing: rowBytes)),
585 |         ISO:\(String(describing: (exifdata["ISOSpeedRatings"] as! NSArray)[0])),
586 |         exposureTime:\(String(describing: exifdata["ExposureTime"]!)),
587 |         apertureValue:\(String(describing: exifdata["ApertureValue"]!)),
588 |         brightnessValue:\(String(describing: brightnessValue)),
589 |         shutterSpeedValue:\(String(describing: exifdata["ShutterSpeedValue"]!)),
590 |         pixelXDimension:\(String(describing: exifdata["PixelXDimension"]!)),
591 |         pixelYDimension:\(String(describing: exifdata["PixelYDimension"]!)),
592 |         blackLevel:\(String(describing: DNGdata["BlackLevel"]!)),
593 |         whiteLevel:\(String(describing: DNGdata["WhiteLevel"]!))
594 |         <ENDHEADER>
595 |         """
596 |         
597 |         header = header.components(separatedBy: .whitespacesAndNewlines).joined() // remove newlines
598 |         let encodedHeader = [UInt8](header.utf8)
599 |         self.imageRAWData.append(encodedHeader, count: header.utf8.count)
600 |         self.imageRAWData.append(uint8Pointer, count: Int(rowBytes * height))
601 |     }
602 |     
603 |     
604 |     // MARK: Write BGRA
605 |     func writeImageBGRA(sampleBuffer: CMSampleBuffer, timestamp: Double, frameCount: Int) {
606 |         
607 |         var intrinsicMatrix: simd_float3x3?
608 |         
609 |         if let camData = CMGetAttachment(sampleBuffer, key: kCMSampleBufferAttachmentKey_CameraIntrinsicMatrix, attachmentModeOut: nil) as? Data {
610 |             intrinsicMatrix = camData.withUnsafeBytes { $0.pointee }
611 |         }
612 |         
613 |         guard let pixelBuffer = sampleBuffer.imageBuffer else { return }
614 |         
615 |         guard CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly) == noErr else { return }
616 |         defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly) }
617 |         
618 |         
619 |         guard let srcPtr = CVPixelBufferGetBaseAddress(pixelBuffer) else {
620 |             print("Failed to retrieve BGRA pointer.")
621 |             return
622 |         }
623 |         
624 |         let rowBytes : Int = CVPixelBufferGetBytesPerRow(pixelBuffer)
625 |         let width = Int(CVPixelBufferGetWidth(pixelBuffer))
626 |         let height = Int(CVPixelBufferGetHeight(pixelBuffer))
627 |         let capacity = CVPixelBufferGetDataSize(pixelBuffer)
628 |         let uint8Pointer = srcPtr.bindMemory(to: UInt8.self, capacity: capacity)
629 |         
630 |         
631 |         var header = """
632 |         <BEGINHEADER>
633 |         description:imageBGRA,
634 |         frameCount:\(String(describing: frameCount)),
635 |         timestamp:\(String(describing: timestamp)),
636 |         height:\(String(describing: height)),
637 |         width:\(String(describing: width)),
638 |         rowBytes:\(String(describing: rowBytes)),
639 |         intrinsicMatrix:\(String(describing: convertIntrinsicMatrix(intrinsicMatrix: intrinsicMatrix!)))
640 |         <ENDHEADER>
641 |         """
642 |         
643 |         header = header.components(separatedBy: .whitespacesAndNewlines).joined() // remove newlines
644 |         let encodedHeader = [UInt8](header.utf8)
645 |         self.imageRGBData.append(encodedHeader, count: header.utf8.count)
646 |         self.imageRGBData.append(uint8Pointer, count: Int(rowBytes * height))
647 |         
648 |     }
649 | }
650 | 
651 | 


--------------------------------------------------------------------------------
/!App/ImageBundleApp/ImageBundleRecorder/CameraManager.swift:
--------------------------------------------------------------------------------
  1 | /*
  2 |  See LICENSE folder for this sample’s licensing information.
  3 |  
  4 |  Abstract:
  5 |  An object that connects the CameraController and the views.
  6 |  */
  7 | 
  8 | import Foundation
  9 | import SwiftUI
 10 | import Combine
 11 | import simd
 12 | import AVFoundation
 13 | 
 14 | final class MetalTextureContent {
 15 |     var texture: MTLTexture?
 16 | }
 17 | 
 18 | extension CVPixelBuffer {
 19 |     
 20 |     func texture(withFormat pixelFormat: MTLPixelFormat, planeIndex: Int, addToCache cache: CVMetalTextureCache) -> MTLTexture? {
 21 |         
 22 |         let width = CVPixelBufferGetWidthOfPlane(self, planeIndex)
 23 |         let height = CVPixelBufferGetHeightOfPlane(self, planeIndex)
 24 |         
 25 |         var cvtexture: CVMetalTexture?
 26 |         CVMetalTextureCacheCreateTextureFromImage(nil, cache, self, nil, pixelFormat, width, height, planeIndex, &cvtexture)
 27 |         guard let texture = cvtexture else { return nil }
 28 |         return CVMetalTextureGetTexture(texture)
 29 |     }
 30 |     
 31 | }
 32 | 
 33 | 
 34 | class CameraManager: ObservableObject, CaptureDataReceiver {
 35 |     
 36 |     var capturedData: CameraCapturedData
 37 |     @Published var isFilteringDepth: Bool {
 38 |         didSet {
 39 |             controller.isFilteringEnabled = isFilteringDepth
 40 |         }
 41 |     }
 42 |     @Published var orientation = UIDevice.current.orientation
 43 |     
 44 |     var fpsArray = Array(repeating: 0.0, count: 30)
 45 |     var fpsCount = 0
 46 |     var timePrev = 0.0
 47 |     
 48 |     var controller: CameraController
 49 |     var cancellables = Set<AnyCancellable>()
 50 |     var session: AVCaptureSession { controller.captureSession }
 51 |     
 52 |     @Published var iso : Float = 0
 53 |     @Published var exposureTime : Double = 0
 54 |     @Published var frameCount = 99999
 55 |     @Published var savingState = 0
 56 |     
 57 |     init() {
 58 |         // Create an object to store the captured data for the views to present.
 59 |         capturedData = CameraCapturedData(depth: nil, color: nil, timestamp: 0)
 60 |         controller = CameraController()
 61 |         controller.isFilteringEnabled = true
 62 |         controller.startStream()
 63 |         isFilteringDepth = controller.isFilteringEnabled
 64 |         
 65 |         NotificationCenter.default.publisher(for: UIDevice.orientationDidChangeNotification).sink { _ in
 66 |             self.orientation = UIDevice.current.orientation
 67 |         }.store(in: &cancellables)
 68 |         controller.delegate = self
 69 |     }
 70 |     
 71 |     func resumeStream() {
 72 |         controller.startStream()
 73 |     }
 74 |     
 75 |     func onNewPhotoData(capturedData: CameraCapturedData) {
 76 |         // Because the views hold a reference to `capturedData`, the app updates each texture separately.
 77 |         self.capturedData.depthContent.texture = capturedData.depth
 78 |         self.capturedData.colorRGBContent.texture = capturedData.color
 79 |         self.capturedData.timestamp = capturedData.timestamp
 80 |         
 81 |         if capturedData.timestamp != nil && 1.0/(capturedData.timestamp! - self.timePrev) < 10000 { // skip double-frames
 82 |             self.fpsCount += 1
 83 |             self.fpsArray[self.fpsCount % self.fpsArray.count] = 1.0/(capturedData.timestamp! - self.timePrev)
 84 | //            print("Current FPS: ", self.fpsArray.reduce(0.0, +)/(Double(self.fpsArray.count)))
 85 |             self.timePrev = capturedData.timestamp!
 86 |         }
 87 |         
 88 |         DispatchQueue.main.async { // Hacky, for printing to UI
 89 |             self.iso = self.controller.device.iso
 90 |             self.exposureTime = self.controller.device.exposureDuration.seconds
 91 |             self.frameCount = self.controller.frameCount
 92 |             self.savingState = self.controller.savingState
 93 |             
 94 |         }
 95 |     }
 96 |     
 97 |     func onNewData(capturedData: CameraCapturedData) {
 98 |         // do nothing
 99 |     }
100 |     
101 | }
102 | 
103 | class CameraCapturedData {
104 |     
105 |     var depth: MTLTexture?
106 |     var depthContent: MetalTextureContent
107 |     var color: MTLTexture?
108 |     var colorRGBContent: MetalTextureContent
109 |     var timestamp: Double?
110 |     
111 |     init(depth: MTLTexture?,
112 |          color: MTLTexture?,
113 |          timestamp: Double?) {
114 |         
115 |         self.depth = depth
116 |         self.depthContent = MetalTextureContent()
117 |         self.depthContent.texture = depth
118 |         self.color = color
119 |         self.colorRGBContent = MetalTextureContent()
120 |         self.colorRGBContent.texture = color
121 |         self.timestamp = timestamp
122 |     }
123 | }
124 | 


--------------------------------------------------------------------------------
/!App/ImageBundleApp/ImageBundleRecorder/ImageBundleRecorder.entitlements:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3 | <plist version="1.0">
4 | <dict>
5 | 	<key>com.apple.developer.kernel.increased-memory-limit</key>
6 | 	<true/>
7 | </dict>
8 | </plist>
9 | 


--------------------------------------------------------------------------------
/!App/ImageBundleApp/ImageBundleRecorder/ImageBundleRecorder.swift:
--------------------------------------------------------------------------------
 1 | /*
 2 | See LICENSE folder for this sample’s licensing information.
 3 | 
 4 | Abstract:
 5 | The single entry point for DepthBundleRecorder.
 6 | */
 7 | 
 8 | import SwiftUI
 9 | @main
10 | struct ImageBundleRecorder: App {
11 |     var body: some Scene {
12 |         WindowGroup {
13 |             MetalDepthView()
14 |         }
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/!App/ImageBundleApp/ImageBundleRecorder/Info.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | <dict>
 5 | 	<key>LSSupportsOpeningDocumentsInPlace
 6 | 
 7 | </key>
 8 | 	<true/>
 9 | 	<key>UIApplicationSceneManifest</key>
10 | 	<false/>
11 | 	<key>UIFileSharingEnabled
12 | 
13 | </key>
14 | 	<true/>
15 | 	<key>UIRequiresFullScreen - 2</key>
16 | 	<true/>
17 | 	<key>UISupportedInterfaceOrientations~ipad - 2</key>
18 | 	<array>
19 | 		<string>UIInterfaceOrientationPortrait</string>
20 | 		<string>UIInterfaceOrientationPortraitUpsideDown</string>
21 | 	</array>
22 | </dict>
23 | </plist>
24 | 


--------------------------------------------------------------------------------
/!App/ImageBundleApp/ImageBundleRecorder/MetalTextureView.swift:
--------------------------------------------------------------------------------
  1 | /*
  2 | See LICENSE folder for this sample’s licensing information.
  3 | 
  4 | Abstract:
  5 | A view that displays a Metal-rendered depth visualization.
  6 | */
  7 | 
  8 | import Foundation
  9 | import SwiftUI
 10 | import MetalKit
 11 | import Metal
 12 | 
 13 | // Display `MTLTextures` in an `MTKView` using SwiftUI.
 14 | //- Tag: MTKCoordinator`
 15 | class MTKCoordinator: NSObject, MTKViewDelegate {
 16 |     var content: MetalTextureContent
 17 |     let view: MTKView
 18 |     var pipelineState: MTLRenderPipelineState!
 19 |     var metalCommandQueue: MTLCommandQueue!
 20 | 
 21 |     init(content: MetalTextureContent, view: MTKView) {
 22 |         self.content = content
 23 |         self.view = view
 24 |         if let metalDevice = MTLCreateSystemDefaultDevice() {
 25 |             view.device = metalDevice
 26 |             self.metalCommandQueue = metalDevice.makeCommandQueue()!
 27 |         }
 28 |         super.init()
 29 | 
 30 |         prepareFunctions()
 31 |     }
 32 |     func prepareFunctions() {
 33 |         guard let metalDevice = view.device else { fatalError("Expected a Metal device.") }
 34 |         do {
 35 |             let library = metalDevice.makeDefaultLibrary()
 36 |             let pipelineDescriptor = MTLRenderPipelineDescriptor()
 37 |             pipelineDescriptor.colorAttachments[0].pixelFormat = .bgra8Unorm
 38 |             pipelineDescriptor.vertexFunction = library!.makeFunction(name: "planeVertexShader")
 39 |             pipelineDescriptor.fragmentFunction = library!.makeFunction(name: "planeFragmentShader")
 40 |             pipelineDescriptor.vertexDescriptor = createPlaneMetalVertexDescriptor()
 41 |             pipelineState = try metalDevice.makeRenderPipelineState(descriptor: pipelineDescriptor)
 42 |         } catch {
 43 |             print("Unexpected error: \(error).")
 44 |         }
 45 |     }
 46 |     func createPlaneMetalVertexDescriptor() -> MTLVertexDescriptor {
 47 |         let mtlVertexDescriptor: MTLVertexDescriptor = MTLVertexDescriptor()
 48 |         // Store position in `attribute[[0]]`.
 49 |         mtlVertexDescriptor.attributes[0].format = .float2
 50 |         mtlVertexDescriptor.attributes[0].offset = 0
 51 |         mtlVertexDescriptor.attributes[0].bufferIndex = 0
 52 | 
 53 |         // Store texture coordinates in `attribute[[1]]`.
 54 |         mtlVertexDescriptor.attributes[1].format = .float2
 55 |         mtlVertexDescriptor.attributes[1].offset = 8
 56 |         mtlVertexDescriptor.attributes[1].bufferIndex = 0
 57 | 
 58 |         // Set stride to twice the `float2` bytes per vertex.
 59 |         mtlVertexDescriptor.layouts[0].stride = 2 * MemoryLayout<SIMD2<Float>>.stride
 60 |         mtlVertexDescriptor.layouts[0].stepRate = 1
 61 |         mtlVertexDescriptor.layouts[0].stepFunction = .perVertex
 62 | 
 63 |         return mtlVertexDescriptor
 64 |     }
 65 | 
 66 |     func mtkView(_ view: MTKView, drawableSizeWillChange size: CGSize) {
 67 | 
 68 |     }
 69 | 
 70 |     // Draw a textured quad.
 71 |     func draw(in view: MTKView) {
 72 |         guard content.texture != nil else {
 73 | //            print("There's no content to display.")
 74 |             return
 75 |         }
 76 |         guard let commandBuffer = metalCommandQueue.makeCommandBuffer() else { return }
 77 |         guard let passDescriptor = view.currentRenderPassDescriptor else { return }
 78 |         guard let encoder = commandBuffer.makeRenderCommandEncoder(descriptor: passDescriptor) else { return }
 79 |         let vertexData: [Float] = [  -1, -1, 1, 1,
 80 |                                      1, -1, 1, 0,
 81 |                                      -1, 1, 0, 1,
 82 |                                      1, 1, 0, 0]
 83 |         encoder.setVertexBytes(vertexData, length: vertexData.count * MemoryLayout<Float>.stride, index: 0)
 84 |         encoder.setFragmentTexture(content.texture, index: 0)
 85 |         encoder.setRenderPipelineState(pipelineState)
 86 |         encoder.drawPrimitives(type: .triangleStrip, vertexStart: 0, vertexCount: 4)
 87 |         encoder.endEncoding()
 88 |         commandBuffer.present(view.currentDrawable!)
 89 |         commandBuffer.commit()
 90 |     }
 91 | 
 92 | }
 93 | //- Tag: MetalTextureView
 94 | struct MetalTextureView: UIViewRepresentable {
 95 |     var mtkView: MTKView
 96 |     var content: MetalTextureContent
 97 |     func makeCoordinator() -> MTKCoordinator {
 98 |         MTKCoordinator(content: content, view: mtkView)
 99 |     }
100 |     func makeUIView(context: UIViewRepresentableContext<MetalTextureView>) -> MTKView {
101 |         mtkView.delegate = context.coordinator
102 |         mtkView.preferredFramesPerSecond = 120
103 |         mtkView.backgroundColor = context.environment.colorScheme == .dark ? .black : .white
104 |         mtkView.isOpaque = true
105 |         mtkView.framebufferOnly = false
106 |         mtkView.clearColor = MTLClearColor(red: 0, green: 0, blue: 0, alpha: 0)
107 |         mtkView.drawableSize = mtkView.frame.size
108 |         mtkView.enableSetNeedsDisplay = false
109 |         mtkView.colorPixelFormat = .bgra8Unorm
110 |         return mtkView
111 |     }
112 | 
113 |     // `UIViewRepresentable` requires this implementation; however, the sample
114 |     // app doesn't use it. Instead, `MTKView.delegate` handles display updates.
115 |     func updateUIView(_ uiView: MTKView, context: UIViewRepresentableContext<MetalTextureView>) {
116 | 
117 |     }
118 | }
119 | 


--------------------------------------------------------------------------------
/!App/ImageBundleApp/ImageBundleRecorder/MetalTextureViewDepth.swift:
--------------------------------------------------------------------------------
 1 | /*
 2 | See LICENSE folder for this sample’s licensing information.
 3 | 
 4 | Abstract:
 5 | A view that displays scene depth information.
 6 | */
 7 | 
 8 | import Foundation
 9 | import SwiftUI
10 | import MetalKit
11 | import Metal
12 | 
13 | //- Tag: CoordinatorDepth
14 | final class CoordinatorDepth: MTKCoordinator {
15 |     @Binding var confSelection: Int
16 |     init(mtkView: MTKView, depthContent: MetalTextureContent, confSelection: Binding<Int>) {
17 |         self._confSelection = confSelection
18 |         super.init(content: depthContent, view: mtkView)
19 |     }
20 |     override func prepareFunctions() {
21 |         guard let metalDevice = view.device else { fatalError("Expected a Metal device.") }
22 |         do {
23 |             let library = metalDevice.makeDefaultLibrary()
24 |             let pipelineDescriptor = MTLRenderPipelineDescriptor()
25 |             pipelineDescriptor.colorAttachments[0].pixelFormat = .bgra8Unorm
26 |             pipelineDescriptor.vertexFunction = library!.makeFunction(name: "planeVertexShader")
27 |             pipelineDescriptor.fragmentFunction = library!.makeFunction(name: "planeFragmentShaderDepth")
28 |             pipelineDescriptor.vertexDescriptor = createPlaneMetalVertexDescriptor()
29 |             pipelineState = try metalDevice.makeRenderPipelineState(descriptor: pipelineDescriptor)
30 |         } catch {
31 |             print("Unexpected error: \(error).")
32 |         }
33 |     }
34 | 
35 | }
36 | 
37 | struct MetalTextureViewDepth: UIViewRepresentable {
38 |     var mtkView: MTKView
39 |     var content: MetalTextureContent
40 |     
41 |     @Binding var confSelection: Int
42 |     func makeCoordinator() -> CoordinatorDepth {
43 |         CoordinatorDepth(mtkView: mtkView, depthContent: content, confSelection: $confSelection)
44 |     }
45 |     
46 |     func makeUIView(context: UIViewRepresentableContext<MetalTextureViewDepth>) -> MTKView {
47 |         mtkView.delegate = context.coordinator
48 |         mtkView.preferredFramesPerSecond = 120
49 |         mtkView.backgroundColor = context.environment.colorScheme == .dark ? .black : .white
50 |         mtkView.isOpaque = true
51 |         mtkView.framebufferOnly = false
52 |         mtkView.clearColor = MTLClearColor(red: 0, green: 0, blue: 0, alpha: 0)
53 |         mtkView.drawableSize = mtkView.frame.size
54 |         mtkView.enableSetNeedsDisplay = false
55 |         mtkView.colorPixelFormat = .bgra8Unorm
56 |         return mtkView
57 |     }
58 |     
59 |     // `UIViewRepresentable` requires this implementation; however, the sample
60 |     // app doesn't use it. Instead, `MTKView.delegate` handles display updates.
61 |     func updateUIView(_ uiView: MTKView, context: UIViewRepresentableContext<MetalTextureViewDepth>) {
62 |         
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/!App/ImageBundleApp/ImageBundleRecorder/MetalViewSample.swift:
--------------------------------------------------------------------------------
  1 | /*
  2 |  See LICENSE folder for this sample’s licensing information.
  3 |  
  4 |  Abstract:
  5 |  The app's main user interface.
  6 |  */
  7 | 
  8 | import Foundation
  9 | import SwiftUI
 10 | import MetalKit
 11 | 
 12 | // Add a title to a view that enlarges the view to full screen on tap.
 13 | struct Texture<T: View>: ViewModifier {
 14 |     let height: CGFloat
 15 |     let width: CGFloat
 16 |     let title: String
 17 |     let view: T
 18 |     func body(content: Content) -> some View {
 19 |         VStack {
 20 |             Text(title).foregroundColor(Color.red)
 21 |             // To display the same view in the navigation, reference the view
 22 |             // directly versus using the view's `content` property.
 23 |             NavigationLink(destination: view.aspectRatio(CGSize(width: width, height: height), contentMode: .fill)) {
 24 |                 view.frame(maxWidth: width, maxHeight: height, alignment: .center)
 25 |                     .aspectRatio(CGSize(width: width, height: height), contentMode: .fill)
 26 |             }
 27 |         }
 28 |     }
 29 | }
 30 | 
 31 | extension View {
 32 |     // Apply `zoomOnTapModifier` with a `self` reference to show the same view
 33 |     // on tap.
 34 |     func zoomOnTapModifier(height: CGFloat, width: CGFloat, title: String) -> some View {
 35 |         modifier(Texture(height: height, width: width, title: title, view: self))
 36 |     }
 37 | }
 38 | extension Image {
 39 |     init(_ texture: MTLTexture, ciContext: CIContext, scale: CGFloat, orientation: Image.Orientation, label: Text) {
 40 |         let ciimage = CIImage(mtlTexture: texture)!
 41 |         let cgimage = ciContext.createCGImage(ciimage, from: ciimage.extent)
 42 |         self.init(cgimage!, scale: 1.0, orientation: .leftMirrored, label: label)
 43 |     }
 44 | }
 45 | 
 46 | 
 47 | struct MetalDepthView: View {
 48 |     @ObservedObject var manager = CameraManager()
 49 |     
 50 |     // Set the default sizes for the texture views.
 51 |     let sizeH: CGFloat = 320
 52 |     let sizeW: CGFloat = 240
 53 |     
 54 |     // Manage the AR session and AR data processing.
 55 |     //- Tag: ARProvider
 56 |     let ciContext: CIContext = CIContext()
 57 |     
 58 |     // Save the user's confidence selection.
 59 |     @State var isPaused = false
 60 |     @State var selectedConfidence = 0
 61 |     @State private var scaleMovement: Float = 1.5
 62 |     @State var saveSuffix: String = ""
 63 |     @State var numRecordedSceneBundles = 0
 64 |     @State var numRecordedPoseBundles = 0
 65 |     let screenWidth = UIScreen.main.bounds.size.width
 66 |     let fontSize : CGFloat = 22
 67 |     
 68 |     var body: some View {
 69 |         VStack(alignment: .leading, spacing: 0) {
 70 |         
 71 |             
 72 |             // depth and image display
 73 |             HStack(alignment: .top) {
 74 |                 if manager.savingState == 1 {
 75 |                     Spacer(minLength: 10)
 76 |                     Image(systemName: "square.and.arrow.down.fill").font(.system(size: fontSize + 2)); Text("SAVING DATA TO DISK").font(.system(size: fontSize + 2))
 77 |                     Spacer(minLength: 10)
 78 |                 } else if manager.savingState == 2 {
 79 |                     Spacer(minLength: 10)
 80 |                     Image(systemName: "exclamationmark.triangle.fill").font(.system(size: fontSize + 2)); Text("SOMETHING WENT WRONG,\nWAIT A MOMENT AND TRY AGAIN").font(.system(size: fontSize + 2))
 81 |                     Spacer(minLength: 10)
 82 |                 } else {
 83 |                     MetalTextureViewDepth(mtkView: MTKView(), content: manager.capturedData.depthContent, confSelection: $selectedConfidence)
 84 |                     MetalTextureView(mtkView: MTKView(), content: manager.capturedData.colorRGBContent)
 85 |                 }
 86 |             }.frame(width: screenWidth, height:400)
 87 |             
 88 |             HStack() {
 89 |                 if manager.savingState == 0 {
 90 |                     Text("Exposure: \(manager.exposureTime) ISO: \(manager.iso)").font(.system(size: fontSize))
 91 |                 }
 92 |             }.frame(width: 400, height: 30)
 93 |             
 94 |             // input field
 95 |             HStack() {
 96 |                 Spacer(minLength: 10)
 97 |                 TextField("Save File Suffix", text: $saveSuffix)
 98 |                     .disableAutocorrection(true)
 99 |                     .border(Color(UIColor.separator))
100 |                     .autocapitalization(.none)
101 |                     .font(.system(size: fontSize))
102 |                 Spacer(minLength: 10)
103 |             }.frame(width: screenWidth, height: 50)
104 |             
105 |             // input field
106 |             HStack() {
107 |                 Spacer(minLength: 10)
108 |                 Text("Recorded \(numRecordedPoseBundles) Motion, \(numRecordedSceneBundles) Scene Bundles").font(.system(size: fontSize))
109 |                 Spacer(minLength: 10)
110 |             }.frame(width: screenWidth, height: 30)
111 |             
112 |             // bundle size selector
113 |             HStack {
114 |                 Spacer(minLength: 10)
115 |                 Text("Bundle Size:").font(.system(size: fontSize))
116 |                 Picker(selection: $manager.controller.bundleSize, label: Text("Bundle Size:")) {
117 |                     Text("1").tag(1)
118 |                     Text("15").tag(15)
119 |                     Text("30").tag(30)
120 |                     Text("42").tag(42)
121 |                 }.pickerStyle(SegmentedPickerStyle())
122 |                 Spacer(minLength: 10)
123 |             }.frame(width: screenWidth, height:50)
124 |             
125 |             // buttons for stream interaction
126 |             HStack() {
127 |                 Spacer(minLength: 20)
128 |                 Button(action: {
129 |                     manager.controller.frameCount = 99999
130 |                     manager.controller.stopStream()
131 |                     usleep(100000)
132 |                     manager.controller.startStream()
133 |                 }) {
134 |                     Image(systemName: "exclamationmark.arrow.circlepath").font(.system(size: 40))
135 |                 }
136 |                 Spacer(minLength: 80)
137 |                 Button(action: {
138 |                     if manager.controller.frameCount == 99999 {
139 |                         manager.controller.recordBundle(saveSuffix: saveSuffix)
140 |                         numRecordedSceneBundles += 1
141 |                     }
142 |                 }) {
143 |                     Image(systemName: (manager.frameCount == 99999) ? "record.circle.fill" : "" ).font(.system(size: 40))
144 |                 }
145 |                 Spacer(minLength: 80)
146 |                 Button(action: {
147 |                     if manager.controller.frameCount == 99999 {
148 |                         manager.controller.recordMotionBundle(saveSuffix: saveSuffix)
149 |                         numRecordedPoseBundles += 1
150 |                     }
151 |                 }) {
152 |                     Image(systemName: (manager.frameCount == 99999) ? "move.3d" : "" ).font(.system(size: 40))
153 |                 }
154 |                 Spacer(minLength: 20)
155 |             }.frame(width: screenWidth, height: 90)
156 |         }.frame(maxWidth: .infinity, maxHeight: .infinity)
157 |             .background(Color(CGColor(red: 0, green: 0, blue: 0, alpha: 0.3)))
158 |             .ignoresSafeArea()
159 |     }
160 |     
161 | }
162 | 


--------------------------------------------------------------------------------
/!App/ImageBundleApp/ImageBundleRecorder/shaders.metal:
--------------------------------------------------------------------------------
  1 | /*
  2 | See LICENSE folder for this sample’s licensing information.
  3 | 
  4 | Abstract:
  5 | The sample app's Metal shaders.
  6 | */
  7 | 
  8 | #include <metal_stdlib>
  9 | 
 10 | using namespace metal;
 11 | 
 12 | 
 13 | typedef struct
 14 | {
 15 |     float2 position [[attribute(0)]];
 16 |     float2 texCoord [[attribute(1)]];
 17 | } Vertex;
 18 | 
 19 | typedef struct
 20 | {
 21 |     float4 position [[position]];
 22 |     float2 texCoord;
 23 | } ColorInOut;
 24 | 
 25 | 
 26 | 
 27 | // Display a 2D texture.
 28 | vertex ColorInOut planeVertexShader(Vertex in [[stage_in]])
 29 | {
 30 |     ColorInOut out;
 31 |     out.position = float4(in.position, 0.0f, 1.0f);
 32 |     out.texCoord = in.texCoord;
 33 |     return out;
 34 | }
 35 | 
 36 | // Shade a 2D plane by passing through the texture inputs.
 37 | fragment float4 planeFragmentShader(ColorInOut in [[stage_in]], texture2d<float, access::sample> textureIn [[ texture(0) ]])
 38 | {
 39 |     constexpr sampler colorSampler(address::clamp_to_edge, filter::linear);
 40 |     float4 sample = textureIn.sample(colorSampler, in.texCoord);
 41 |     return sample;
 42 | }
 43 | 
 44 | // Convert a color value to RGB using a Jet color scheme.
 45 | static half4 getJetColorsFromNormalizedVal(half val) {
 46 |     half4 res ;
 47 |     if(val <= 0.01h)
 48 |         return half4();
 49 |     res.r = 1.5h - fabs(4.0h * val - 3.0h);
 50 |     res.g = 1.5h - fabs(4.0h * val - 2.0h);
 51 |     res.b = 1.5h - fabs(4.0h * val - 1.0h);
 52 |     res.a = 1.0h;
 53 |     res = clamp(res,0.0h,1.0h);
 54 |     return res;
 55 | }
 56 | 
 57 | // Shade a texture with depth values using a Jet color scheme.
 58 | //- Tag: planeFragmentShaderDepth
 59 | fragment half4 planeFragmentShaderDepth(ColorInOut in [[stage_in]], texture2d<float, access::sample> textureDepth [[ texture(0) ]])
 60 | {
 61 |     constexpr sampler colorSampler(address::clamp_to_edge, filter::nearest);
 62 |     float4 s = textureDepth.sample(colorSampler, in.texCoord);
 63 |     
 64 |     // Size the color gradient to a maximum distance of 2.5 meters.
 65 |     // The LiDAR Scanner supports a value no larger than 5.0; the
 66 |     // sample app uses a value of 2.5 to better distinguish depth
 67 |     // in smaller environments.
 68 |     half val = s.r / 2.5h;
 69 |     half4 res = getJetColorsFromNormalizedVal(val);
 70 |     return res;
 71 | }
 72 | 
 73 | // Shade a texture with confidence levels low, medium, and high to red, green, and blue, respectively.
 74 | fragment half4 planeFragmentShaderConfidence(ColorInOut in [[stage_in]], texture2d<float, access::sample> textureIn [[ texture(0) ]])
 75 | {
 76 |     constexpr sampler colorSampler(address::clamp_to_edge, filter::nearest);
 77 |     float4 s = textureIn.sample(colorSampler, in.texCoord);
 78 |     float res = round( 255.0f*(s.r) ) ;
 79 |     int resI = int(res);
 80 |     half4 color = half4(0.0h, 0.0h, 0.0h, 0.0h);
 81 |     if (resI == 0)
 82 |         color = half4(1.0h, 0.0h, 0.0h, 1.0h);
 83 |     else if (resI == 1)
 84 |         color = half4(0.0h, 1.0h, 0.0h, 1.0h);
 85 |     else if (resI == 2)
 86 |         color = half4(0.0h, 0.0h, 1.0h, 1.0h);
 87 |     return color;
 88 | }
 89 | 
 90 | 
 91 | // Declare a particle class that the `pointCloudVertexShader` inputs
 92 | // to `pointCloudFragmentShader`.
 93 | typedef struct
 94 | {
 95 |     float4 clipSpacePosition [[position]];
 96 |     float2 coor;
 97 |     float pSize [[point_size]];
 98 |     float depth;
 99 |     half4 color;
100 | } ParticleVertexInOut;
101 | 
102 | 
103 | // Position vertices for the point cloud view. Filters out points with
104 | // confidence below the selected confidence value and calculates the color of a
105 | // particle using the color Y and CbCr per vertex. Use `viewMatrix` and
106 | // `cameraIntrinsics` to calculate the world point location of each vertex in
107 | // the depth map.
108 | //- Tag: pointCloudVertexShader
109 | vertex ParticleVertexInOut pointCloudVertexShader(
110 |     uint vertexID [[ vertex_id ]],
111 |     texture2d<float, access::read> depthTexture [[ texture(0) ]],
112 |     texture2d<float, access::read> confTexture [[ texture(1) ]],
113 |     constant float4x4& viewMatrix [[ buffer(0) ]],
114 |     constant float3x3& cameraIntrinsics [[ buffer(1) ]],
115 |     constant int &confFilterMode [[ buffer(2) ]],
116 |     texture2d<half> colorYtexture [[ texture(2) ]],
117 |     texture2d<half> colorCbCrtexture [[ texture(3) ]]
118 |     )
119 | { // ...
120 |     ParticleVertexInOut out;
121 |     uint2 pos;
122 |     // Count the rows that are depth-texture-width wide to determine the y-value.
123 |     pos.y = vertexID / depthTexture.get_width();
124 |     
125 |     // The x-position is the remainder of the y-value division.
126 |     pos.x = vertexID % depthTexture.get_width();
127 |     //get depth in [mm]
128 |     float depth = depthTexture.read(pos).x * 1000.0f;
129 |     
130 |     // Convert confidence from normalized `float` to `int`.
131 |     float4 conf = confTexture.read(pos);
132 |     int confInt = int(round( 255.0f*(conf.r) )) ;
133 |     
134 |     // Filter points by confidence level.
135 |     const auto visibility = confInt >= confFilterMode;
136 |     if(visibility == false)
137 |         depth = 0.0f;
138 | 
139 |     // Calculate the vertex's world coordinates.
140 |     float xrw = ((int)pos.x - cameraIntrinsics[2][0]) * depth / cameraIntrinsics[0][0];
141 |     float yrw = ((int)pos.y - cameraIntrinsics[2][1]) * depth / cameraIntrinsics[1][1];
142 |     float4 xyzw = { xrw, yrw, depth, 1.f };
143 | 
144 |     // Project the coordinates to the view.
145 |     float4 vecout = viewMatrix * xyzw;
146 | 
147 |     // Color the vertex.
148 |     constexpr sampler textureSampler (mag_filter::linear,
149 |                                       min_filter::linear);
150 |     out.coor = { pos.x / (depthTexture.get_width() - 1.0f), pos.y / (depthTexture.get_height() - 1.0f) };
151 |     half y = colorYtexture.sample(textureSampler, out.coor).r;
152 |     half2 uv = colorCbCrtexture.sample(textureSampler, out.coor).rg - half2(0.5h, 0.5h);
153 |     // Convert YUV to RGB inline.
154 |     half4 rgbaResult = half4(y + 1.402h * uv.y, y - 0.7141h * uv.y - 0.3441h * uv.x, y + 1.772h * uv.x, 1.0h);
155 | 
156 |     out.color = rgbaResult;
157 |     out.clipSpacePosition = vecout;
158 |     out.depth = depth;
159 |     // Set the particle display size.
160 |     out.pSize = 5.0f;
161 |     
162 |     return out;
163 | }
164 | 
165 | // Shade the point cloud points by using quad particles.
166 | fragment half4 pointCloudFragmentShader(
167 |     ParticleVertexInOut in [[stage_in]])
168 | {
169 |     // Avoid drawing particles that are too close, or filtered particles that
170 |     // have zero depth.
171 |     if (in.depth < 1.0f)
172 |         discard_fragment();
173 |     else
174 |     {
175 |         return in.color;
176 |     }
177 |     return half4();
178 | }
179 | 
180 | 
181 | // Convert the Y and CbCr textures into a single RGBA texture.
182 | kernel void convertYCbCrToRGBA(texture2d<float, access::read> colorYtexture [[texture(0)]],
183 |                                texture2d<float, access::read> colorCbCrtexture [[texture(1)]],
184 |                                texture2d<float, access::write> colorRGBTexture [[texture(2)]],
185 |                                uint2 gid [[thread_position_in_grid]])
186 | {
187 |     float y = colorYtexture.read(gid).r;
188 |     float2 uv = colorCbCrtexture.read(gid / 2).rg;
189 |     
190 |     const float4x4 ycbcrToRGBTransform = float4x4(
191 |         float4(+1.0000f, +1.0000f, +1.0000f, +0.0000f),
192 |         float4(+0.0000f, -0.3441f, +1.7720f, +0.0000f),
193 |         float4(+1.4020f, -0.7141f, +0.0000f, +0.0000f),
194 |         float4(-0.7010f, +0.5291f, -0.8860f, +1.0000f)
195 |     );
196 |     
197 |     // Sample Y and CbCr textures to get the YCbCr color at the given texture
198 |     // coordinate.
199 |     float4 ycbcr = float4(y, uv.x, uv.y, 1.0f);
200 |     
201 |     // Return the converted RGB color.
202 |     float4 colorSample = ycbcrToRGBTransform * ycbcr;
203 |     colorRGBTexture.write(colorSample, uint2(gid.xy));
204 | 
205 | }
206 | 


--------------------------------------------------------------------------------
/!App/ImageBundleApp/LICENSE/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright © 2022 Apple Inc.
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 
9 | 


--------------------------------------------------------------------------------
/!App/ImageBundleApp/Launch Screen.storyboard:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="20037" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
 3 |     <device id="retina6_1" orientation="portrait" appearance="light"/>
 4 |     <dependencies>
 5 |         <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="20020"/>
 6 |         <capability name="Safe area layout guides" minToolsVersion="9.0"/>
 7 |         <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
 8 |     </dependencies>
 9 |     <scenes>
10 |         <!--View Controller-->
11 |         <scene sceneID="EHf-IW-A2E">
12 |             <objects>
13 |                 <viewController id="01J-lp-oVM" sceneMemberID="viewController">
14 |                     <view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
15 |                         <rect key="frame" x="0.0" y="0.0" width="414" height="896"/>
16 |                         <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
17 |                         <subviews>
18 |                             <label opaque="NO" clipsSubviews="YES" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="Copyright © 2022 Apple. All rights reserved." textAlignment="center" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" minimumFontSize="9" translatesAutoresizingMaskIntoConstraints="NO" id="obG-Y5-kRd">
19 |                                 <rect key="frame" x="0.0" y="855.5" width="414" height="20.5"/>
20 |                                 <fontDescription key="fontDescription" type="system" pointSize="17"/>
21 |                                 <color key="textColor" red="0.0" green="0.0" blue="0.0" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
22 |                                 <nil key="highlightedColor"/>
23 |                             </label>
24 |                             <label opaque="NO" clipsSubviews="YES" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="ImageBundleRecorder" textAlignment="center" lineBreakMode="middleTruncation" baselineAdjustment="alignBaselines" minimumFontSize="18" translatesAutoresizingMaskIntoConstraints="NO" id="GJd-Yh-RWb">
25 |                                 <rect key="frame" x="0.0" y="278" width="414" height="43"/>
26 |                                 <fontDescription key="fontDescription" type="boldSystem" pointSize="36"/>
27 |                                 <color key="textColor" red="0.0" green="0.0" blue="0.0" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
28 |                                 <nil key="highlightedColor"/>
29 |                             </label>
30 |                         </subviews>
31 |                         <viewLayoutGuide key="safeArea" id="Bcu-3y-fUS"/>
32 |                         <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
33 |                         <constraints>
34 |                             <constraint firstItem="Bcu-3y-fUS" firstAttribute="centerX" secondItem="obG-Y5-kRd" secondAttribute="centerX" id="5cz-MP-9tL"/>
35 |                             <constraint firstItem="Bcu-3y-fUS" firstAttribute="centerX" secondItem="GJd-Yh-RWb" secondAttribute="centerX" id="Q3B-4B-g5h"/>
36 |                             <constraint firstItem="obG-Y5-kRd" firstAttribute="leading" secondItem="Bcu-3y-fUS" secondAttribute="leading" symbolic="YES" id="SfN-ll-jLj"/>
37 |                             <constraint firstAttribute="bottom" secondItem="obG-Y5-kRd" secondAttribute="bottom" constant="20" id="Y44-ml-fuU"/>
38 |                             <constraint firstItem="GJd-Yh-RWb" firstAttribute="centerY" secondItem="Ze5-6b-2t3" secondAttribute="bottom" multiplier="1/3" constant="1" id="moa-c2-u7t"/>
39 |                             <constraint firstItem="GJd-Yh-RWb" firstAttribute="leading" secondItem="Bcu-3y-fUS" secondAttribute="leading" symbolic="YES" id="x7j-FC-K8j"/>
40 |                         </constraints>
41 |                     </view>
42 |                 </viewController>
43 |                 <placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
44 |             </objects>
45 |             <point key="canvasLocation" x="53" y="375"/>
46 |         </scene>
47 |     </scenes>
48 | </document>
49 | 


--------------------------------------------------------------------------------
/!App/README.md:
--------------------------------------------------------------------------------
 1 | # iOS App for RAW Long-Burst Recording
 2 | 
 3 | This app is part the official code repository for the work: [Shakes on a Plane: Unsupervised Depth Estimation from Unstabilized Photography](https://light.princeton.edu/publication/soap/)
 4 | 
 5 | If you use parts of this work, or otherwise take inspiration from it, please considering citing our paper:
 6 | ```
 7 | @InProceedings{Chugunov_2023_CVPR,
 8 |     author    = {Chugunov, Ilya and Zhang, Yuxuan and Heide, Felix},
 9 |     title     = {Shakes on a Plane: Unsupervised Depth Estimation From Unstabilized Photography},
10 |     booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
11 |     month     = {June},
12 |     year      = {2023},
13 |     pages     = {13240-13251}
14 | }
15 | ```
16 | 
17 | The code is partially derived from [this sample code for recording depth with AVFoundation](https://developer.apple.com/documentation/avfoundation/additional_data_capture/capturing_depth_using_the_lidar_camera). We highly recommend you read through that example to familiarize yourself with the structure and function of portions of this app.
18 | 
19 | ## Getting the App Running
20 | 1. Open the `.xcodeproj` project file with Xcode (tested for Xcode 14).
21 | 2. Click on the project and select `Signing & Capabilities` to provision the app. You'll have to make an [Apple developer account](https://developer.apple.com/) if you don't have one already. Check the `Automatically manage signing` box, select your team (likely your personal Apple developer account), and you should be done.
22 | 
23 | ![xcode](!figs/signing.png)
24 | 
25 | 3. Plug in your device (in our case an iPhone 14 Pro), and trust it. It should appear in the list of devices at the top of Xcode. Select it as the device to build/deploy to.
26 | 4. Press the play button at the top of Xcode to build the app, its icon should now appear on your phone.
27 | 5. As this is an app not from the app store, you will have to trust the developer in your settings under `Settings / General / VPN & Device Management` (This location may depend on your iOS version).
28 | 
29 | ![app](!figs/app.png)
30 | 
31 | 6. You should now be able to run the app.
32 | 
33 | ## Using the App
34 | 
35 | ![interface](!figs/interface.png)
36 | 
37 | 1. Enter a suffix (or leave blank). Captured long-burst data will be saved into folders named `bundle-{capture_time}-{suffix}`.
38 | 2. Use the sliding bar to select how many frames to record per capture: 1, 15, 30, or 42. The app records at ~21 fps, so the longest recording length is approximately 2 seconds.
39 | 3. Left button resets the video stream. Middle button captures a long-burst with RAW, RGB, Depth, and motion (gyro/accelerometer) data. Right button records only motion data.
40 | 4. If the recording was sucessful, a message will pop up stating the device is "Saving Data to Disk". Otherwise an error message will appear and no data will be written to the device.
41 | 5. If the video stream stops or appears to have high latency, try restarting the app and closing other apps that may be taking up phone memory.
42 | 6. ! **careful** ! : This app records completely uncompressed 14-bit, 12-megapixel RAWs; so a 42-frame recording is like 2 gigabytes of data. It's really easy to quickly fill up your phone's entire storage, so remember to delete unneeded captures and empty the `Recently Deleted` folder.
43 | 
44 | ## Processing the Recorded Long-Bursts
45 | 1. Airdrop is the easiest way to move recorded long-burst bundles from the phone to your computer. Navigate in the `Files` app to `On My iPhone` and you should see an `Image Bundle Recorder` folder. Then select and airdrop the desired data to your device:
46 | 
47 | ![airdrop](!figs/airdrop.png)
48 | 
49 | 2. Place all these bundles into a folder, and convert them to `.npz` dictionaries with:
50 | ```python ConvertBinaries.py -d {folder_containing_bundles}```. This will populate the parent directory with folders containing the processed `.npz` data and preview first/last images of the recorded data.
51 | 
52 | 3. See [0_data_format.ipynb](https://github.com/princeton-computational-imaging/SoaP/blob/main/0_data_format.ipynb) in the main repo to understand what's inside this `.npz` data and [1_reconstruction.ipynb](https://github.com/princeton-computational-imaging/SoaP/blob/main/1_reconstruction.ipynb) to learn how to train a SoaP model with it.
53 | 
54 | Best fishes,  
55 | Ilya
56 | 
57 | 


--------------------------------------------------------------------------------
/!figs/experiments-thumb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/SoaP/857dda0f7578126ed9feb8410eedeef053679f9e/!figs/experiments-thumb.png


--------------------------------------------------------------------------------
/!figs/extra-thumb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/SoaP/857dda0f7578126ed9feb8410eedeef053679f9e/!figs/extra-thumb.png


--------------------------------------------------------------------------------
/!figs/scenes-thumb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/SoaP/857dda0f7578126ed9feb8410eedeef053679f9e/!figs/scenes-thumb.png


--------------------------------------------------------------------------------
/!figs/synth-thumb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/SoaP/857dda0f7578126ed9feb8410eedeef053679f9e/!figs/synth-thumb.png


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Ilya Chugunov
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Shakes on a Plane: Unsupervised Depth Estimation from Unstabilized Photography
 3 | <a href="https://colab.research.google.com/github/princeton-computational-imaging/SoaP/blob/main/tutorial.ipynb" style="text-decoration: none;">
 4 |   <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" style="height:25px;"/>
 5 | </a>
 6 | 
 7 | 
 8 | This is the official code repository for the work: [Shakes on a Plane: Unsupervised Depth Estimation from Unstabilized Photography](https://light.princeton.edu/publication/soap/), presented at CVPR 2023.
 9 | 
10 | If you use parts of this work, or otherwise take inspiration from it, please considering citing our paper:
11 | ```
12 | @InProceedings{Chugunov_2023_CVPR,
13 |     author    = {Chugunov, Ilya and Zhang, Yuxuan and Heide, Felix},
14 |     title     = {Shakes on a Plane: Unsupervised Depth Estimation From Unstabilized Photography},
15 |     booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
16 |     month     = {June},
17 |     year      = {2023},
18 |     pages     = {13240-13251}
19 | }
20 | ```
21 | 
22 | ## Requirements:
23 | - Developed using PyTorch 1.13.0 and PyTorch Ligthning 1.8.3 on Linux x64 machine
24 | - Condensed package requirements are in `\requirements.txt`. Note that this contains the exact package versions at the time of publishing. Code will likely work with newer versions of the libraries, but you will need to watch out for changes in class/function calls.
25 | 
26 | This code also requires tiny-cuda-nn, see [NVlabs/tiny-cuda-nn](https://github.com/NVlabs/tiny-cuda-nn) for installation instructions (we used Version 1.6 at the time of publishing).
27 | 
28 | ## Project Structure:
29 | ```cpp
30 | SoaP
31 |   ├── checkpoints  
32 |   │   └── // folder for network checkpoints
33 |   ├── config
34 |   │   ├── config_depth.json  // depth MLP configuration
35 |   │   └── config_rgb.json // image MLP configuration
36 |   ├── data  
37 |   │   └── // folder for long-burst data
38 |   ├── utils  
39 |   │   └── utils.py  // network helper functions (e.g. camera projection, spline interpolation)
40 |   ├── tutorial.ipynb  // interactive tutorial for training and depth reconstruction
41 |   ├── README.md  // <- You Are Here
42 |   ├── requirements.txt  // frozen package requirements
43 |   └── train.py  // dataloader, network, visualization, and trainer code
44 |   ```
45 | ## Getting Started:
46 | We recommend you start by going through `tutorial.ipynb` to download a sample long-burst and familiarize yourself with the data contained within it and how to train a model with it. This tutorial is also available as a [Colab notebook](https://colab.research.google.com/github/princeton-computational-imaging/SoaP/blob/main/tutorial.ipynb).
47 | 
48 | For other training arguments, see the argument parser section of `\train.py`.
49 | 
50 | ## Data:
51 | You can download the long-burst data used in the paper (and extra bonus scenes) via the following links:
52 | 
53 | 1. Shade map used for lens shading compensation (**important**, see paper supplemental for more information): [shade_map.npy](https://soap.cs.princeton.edu/shade_map.npy)  
54 | 
55 |  2. Main scenes: [scenes.zip](https://soap.cs.princeton.edu/scenes.zip)
56 |  ![xcode](!figs/scenes-thumb.png)
57 | Model checkpoints: [scenes-checkpoints.zip](https://soap.cs.princeton.edu/scenes-checkpoints.zip)  
58 | These checkpoints may require you to download the full scene data in order to properly load them.
59 | 
60 |  3. Supplemental experiment scenes: [experiments.zip](https://soap.cs.princeton.edu/experiments.zip)
61 |  ![xcode](!figs/experiments-thumb.png)
62 | 
63 |  4. Extra un-used (but neat) scenes: [extra.zip](https://soap.cs.princeton.edu/extra.zip)
64 |  ![xcode](!figs/extra-thumb.png)
65 | 
66 |  5. Synthetic rendered data (with scanned object meshes): [synthetic.zip](https://soap.cs.princeton.edu/synthetic.zip)
67 | ![xcode](!figs/synth-thumb.png)
68 |  
69 | 
70 | We recommend you unzip these folders and place them into `\data`
71 | 
72 | ## App:
73 | Want to record your own long-burst data? See [!App](https://github.com/princeton-computational-imaging/SoaP/tree/main/!App) for details!
74 | 
75 | 
76 | Good luck have fun,  
77 | Ilya
78 | 


--------------------------------------------------------------------------------
/checkpoints/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/SoaP/857dda0f7578126ed9feb8410eedeef053679f9e/checkpoints/__init__.py


--------------------------------------------------------------------------------
/config/config_depth.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"encoding": {
 3 |         "otype": "HashGrid",
 4 |         "n_levels": 8,
 5 |         "n_features_per_level": 4,
 6 |         "log2_hashmap_size": 14,
 7 |         "base_resolution": 8,
 8 |         "per_level_scale": 1.4
 9 |     },
10 | 	"network": {
11 | 		"otype": "FullyFusedMLP",
12 | 		"activation": "ReLU",
13 | 		"output_activation": "None",
14 | 		"n_neurons": 128,
15 | 		"n_hidden_layers": 5
16 | 	}
17 | }
18 | 


--------------------------------------------------------------------------------
/config/config_rgb.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"encoding": {
 3 |         "otype": "HashGrid",
 4 |         "n_levels": 16,
 5 |         "n_features_per_level": 4,
 6 |         "log2_hashmap_size": 22,
 7 |         "base_resolution": 8,
 8 |         "per_level_scale": 1.5
 9 |     },
10 | 	"network": {
11 | 		"otype": "FullyFusedMLP",
12 | 		"activation": "ReLU",
13 | 		"output_activation": "None",
14 | 		"n_neurons": 128,
15 | 		"n_hidden_layers": 5
16 | 	}
17 | }


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/SoaP/857dda0f7578126ed9feb8410eedeef053679f9e/data/__init__.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | commentjson>=0.9.0
2 | matplotlib>=3.6.2
3 | numpy>=1.22.3
4 | pytorch_lightning>=1.8.3.post1
5 | torch>=1.13.0
6 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import commentjson as json
  3 | import numpy as np
  4 | import os
  5 | 
  6 | import tinycudann as tcnn
  7 | 
  8 | from utils import utils
  9 | from utils.utils import debatch
 10 | 
 11 | import torch
 12 | from torch.nn import functional as F
 13 | from torch.utils.data import Dataset
 14 | from torch.utils.data import DataLoader
 15 | import pytorch_lightning as pl
 16 | 
 17 | #########################################################################################################
 18 | ################################################ DATASET ################################################
 19 | #########################################################################################################
 20 | 
 21 | class BundleDataset(Dataset):
 22 |     def __init__(self, args):
 23 |         bundle = dict(np.load(args.bundle_path, allow_pickle=True))
 24 |         utils.de_item(bundle)
 25 |         
 26 |         if not args.no_raw:
 27 |             raw_frames = torch.tensor(np.array([bundle[f'raw_{i}']['raw'] for i in range(bundle['num_raw_frames'])]).astype(np.int32))[None]  # B,T,H,W
 28 |         if args.no_shade_map or args.no_raw:
 29 |             pass # no shade map needed
 30 |         else:
 31 |             shade_map = torch.tensor(np.load(os.path.join(os.getcwd(), "data/shade_map.npy")))[None,None,:,:] # 1,1,H,W, compensation for lens shading
 32 |             raw_frames = raw_frames * shade_map
 33 |         
 34 |         self.motion = bundle['motion']
 35 |         if args.no_device_rotations:
 36 |             self.frame_timestamps = torch.tensor(np.linspace(0,1, bundle['num_rgb_frames']))
 37 |             self.motion_timestamps = torch.tensor(np.linspace(0,1, bundle['num_rgb_frames']))
 38 |             self.quaternions = torch.tensor(np.repeat([[0,0,0,1.0]], bundle['num_rgb_frames'], axis=0)).float()
 39 |         else:
 40 |             self.frame_timestamps = torch.tensor([bundle[f'raw_{i}']['timestamp'] for i in range(bundle['num_rgb_frames'])])
 41 |             self.motion_timestamps = torch.tensor(self.motion['timestamp'])
 42 |             self.quaternions = torch.tensor(self.motion['quaternion']) # T',4, has different timestamps from frames
 43 | 
 44 |         self.reference_quaternion = utils.multi_interp(self.frame_timestamps[0:1], self.motion_timestamps, self.quaternions) # quaternion at frame 0
 45 |         self.reference_rotation = utils.convert_quaternions_to_rot(self.reference_quaternion)
 46 |         
 47 |         self.processed_rgb_volume = torch.tensor(np.array([bundle[f'rgb_{i}']['rgb'] for i in range(bundle['num_rgb_frames'])]))
 48 |         self.processed_rgb_volume = (self.processed_rgb_volume[:,:,:,:3].permute(0,3,1,2)).float() # remove alpha, make: T,C,H,W
 49 |         self.processed_rgb_volume = self.processed_rgb_volume / self.processed_rgb_volume[0].max() # scale 0-1
 50 |         
 51 |         intrinsics_ratio = 1.0
 52 |         if args.no_phone_depth and not args.no_raw: # intrinsics from RGB, but img from RAW, rescale
 53 |             intrinsics_ratio = bundle['raw_0']['height'] / bundle['rgb_0']['height']
 54 |         elif not args.no_phone_depth and args.no_raw: # intrinsics from depth, img from processed RGB
 55 |             intrinsics_ratio = bundle['rgb_0']['height'] / bundle['raw_0']['height']
 56 | 
 57 |         if args.no_phone_depth:    
 58 |             self.intrinsics = torch.tensor(np.array([bundle[f'rgb_{i}']['intrinsics'] for i in range(bundle['num_rgb_frames'])])).float() # T,3,3
 59 |         else:
 60 |             self.intrinsics = torch.tensor(np.array([bundle[f'depth_{i}']['intrinsics'] for i in range(bundle['num_depth_frames'])]))
 61 |             
 62 |         self.intrinsics[:,:3,:2] = self.intrinsics[:,:3,:2] * intrinsics_ratio
 63 |             
 64 |         if args.no_raw: # use processed RGB
 65 |             self.rgb_volume = (self.processed_rgb_volume).float()
 66 |             self.rgb_volume = self.rgb_volume - self.rgb_volume.min()
 67 |             self.rgb_volume = self.rgb_volume/self.rgb_volume.max()
 68 |             
 69 |         else: # use minimally processed RAW
 70 |             self.rgb_volume = (utils.raw_to_rgb(raw_frames)).float() # T,C,H,W
 71 |             self.rgb_volume = self.rgb_volume - self.rgb_volume.min()
 72 |             self.rgb_volume = self.rgb_volume/self.rgb_volume.max()
 73 |             if args.dark: # cut off highlights for scaling (long-tail-distribution)
 74 |                 self.rgb_volume = self.rgb_volume/np.percentile(self.rgb_volume, 98)
 75 |                 self.rgb_volume = self.rgb_volume.clamp(0,1)
 76 |                                                                         
 77 |         self.reference_intrinsics = self.intrinsics[0:1]
 78 |         
 79 |         if args.no_phone_depth:
 80 |             self.depth_volume = torch.zeros(bundle['num_rgb_frames'], 1, 64, 64, dtype=torch.float32) # placeholder depth
 81 |         else:
 82 |             self.depth_volume = torch.tensor(np.array([bundle[f'depth_{i}']['depth'] for i in range(bundle['num_depth_frames'])]))
 83 |             self.depth_volume = 1/(self.depth_volume[:,:,:,None].permute(0,3,1,2)).float() # T,C,H,W; lidar has inverse depth
 84 |         
 85 |         T,C,H,W = self.rgb_volume.shape
 86 |         self.num_frames, self.img_channels, self.img_height, self.img_width = T,C,H,W
 87 |         
 88 |         self.point_batch_size = args.point_batch_size
 89 |         self.num_batches = args.num_batches
 90 | 
 91 |     def __len__(self):
 92 |         return self.num_batches  # arbitrary as we continuously generate random samples
 93 |         
 94 |     def __getitem__(self, idx):
 95 |         # create uniform u,v between 0.025 and 0.975 to preserve edges
 96 |         uv = torch.rand(self.point_batch_size, 2) * torch.tensor([[0.95,0.95]]) + torch.tensor([[0.025,0.025]])
 97 |         
 98 |         # t is time for all frames, looks like [0, 0,... 0, 1/41, 1/41, ..., 1/41, 2/41, 2/41, ..., 2/41, etc.]
 99 |         t = torch.linspace(0,1,self.num_frames).repeat_interleave(uv.shape[0])[:,None] # num_frames * point_batch_size, 1
100 |         
101 |         return self.sample_grid(uv, t, frame=0, sample_depth=True, sample_rgb=True, sample_processed_rgb=False)
102 | 
103 |     def sample_grid(self, uv, t, frame, sample_depth=False, sample_rgb=False, sample_processed_rgb=False):
104 |         """ Return TUV grid, interpolated rotation, intrinsics, depth, rgb samples
105 |         """
106 |         
107 |         lidar_samples, rgb_samples, rgb_processed_samples = -1, -1, -1
108 |         
109 |         # convert to frame times [0-1] -> (seconds)
110 |         t_frame = torch.tensor(np.interp(t, np.linspace(0,1,len(self.frame_timestamps)), self.frame_timestamps)).squeeze()
111 |         # grab linearly interpolated quaternions at those timestamps
112 |         quaternions = utils.multi_interp(t_frame, self.motion_timestamps, self.quaternions)
113 |         # grab linearly interpolated intrinsics at those timestamps
114 |         intrinsics = utils.multi_interp(t_frame, self.frame_timestamps, self.intrinsics.view(-1,9)).reshape(-1,3,3)
115 |         
116 |         if sample_depth:
117 |             # grid_sample uses coordinates [-1,1] whereas MLP uses [0,1], hence rescaling
118 |             grid_uv = ((uv - 0.5) * 2)[None,:,None,:] # 1,point_batch_size,1,2
119 |             lidar_samples = F.grid_sample(self.depth_volume[frame:frame+1], grid_uv, mode="bilinear", padding_mode="border", align_corners=True)
120 |             lidar_samples = lidar_samples.squeeze()[:,None] # point_batch_size, C
121 |         
122 |         if sample_rgb:
123 |             grid_uv = ((uv - 0.5) * 2)[None,:,None,:] # 1,point_batch_size,1,2
124 |             rgb_samples = F.grid_sample(self.rgb_volume[frame:frame+1], grid_uv, mode="bilinear", padding_mode="border", align_corners=True)
125 |             rgb_samples = rgb_samples.squeeze().permute(1,0) # point_batch_size, C
126 |             
127 |         if sample_processed_rgb:
128 |             grid_uv = ((uv - 0.5) * 2)[None,:,None,:] # 1,point_batch_size,1,2
129 |             rgb_processed_samples = F.grid_sample(self.processed_rgb_volume[frame:frame+1], grid_uv, mode="bilinear", padding_mode="border", align_corners=True)
130 |             rgb_processed_samples = rgb_processed_samples.squeeze().permute(1,0) # point_batch_size, C
131 | 
132 |         return t, uv, quaternions, intrinsics, lidar_samples, rgb_samples, rgb_processed_samples
133 | 
134 | #########################################################################################################
135 | ################################################ MODELS #################$###############################
136 | #########################################################################################################
137 |     
138 | class PlaneModel(pl.LightningModule):
139 |     def __init__(self, depth):
140 |         super().__init__()
141 |         # ax + by + c
142 |         self.plane_coefs = torch.nn.Parameter(data=torch.tensor([1/10,1/10,depth/5]), requires_grad=True)
143 |         # increase effective learning rate of plane without custom lr scheduler
144 |         self.scale_factor = torch.nn.Parameter(data=torch.tensor([5.0,5.0,5.0]), requires_grad=False)
145 |         
146 |     def forward(self, uv):
147 |         uv_homogenous = torch.cat((uv, torch.ones_like(uv[:,:1])), dim=1)
148 |         plane = uv_homogenous * self.plane_coefs * self.scale_factor
149 |         return torch.sum(plane, dim=1, keepdims=True)
150 |         
151 |     
152 | class LearnedRotationModel(pl.LightningModule):
153 |     def __init__(self, args):
154 |         super().__init__()
155 |         self.args = args
156 |         self.rotation_betas = torch.nn.Parameter(data=torch.zeros(args.control_points_motion, 3, 1, dtype=torch.float32), requires_grad=True)
157 |         
158 |     def forward(self, quaternions, t):
159 |         # use de casteljau algorithm for interpolation
160 |         rotation_deltas = utils.de_casteljau(self.rotation_betas, t)
161 |         rx, ry, rz = rotation_deltas[:,0], rotation_deltas[:,1], rotation_deltas[:,2]
162 |         r1 = torch.ones_like(rx)
163 |         
164 |         # identity rotation eye(3) plus small rotational offsets
165 |         rotations = torch.stack([torch.stack([ r1, -rz,  ry], dim=-1),
166 |                                  torch.stack([ rz,  r1, -rx], dim=-1),
167 |                                  torch.stack([-ry,  rx,  r1], dim=-1)], dim=-1)
168 |         
169 |         return rotations
170 |     
171 |     
172 | class DeviceRotationModel(pl.LightningModule):
173 |     def __init__(self, args, reference_rotation):
174 |         super().__init__()
175 |         self.args = args
176 |         self.reference_rotation = reference_rotation
177 |         self.rotation_betas = torch.nn.Parameter(data=torch.zeros(args.control_points_motion, 3, 1, dtype=torch.float32), requires_grad=True)
178 | 
179 |     def forward(self, quaternions, t):
180 |         rotations = torch.inverse(self.reference_rotation) @ utils.convert_quaternions_to_rot(quaternions) # from gyro
181 |         
182 |         rotation_deltas = utils.de_casteljau(self.rotation_betas, t)
183 |         rx, ry, rz = rotation_deltas[:,0], rotation_deltas[:,1], rotation_deltas[:,2]
184 |         r0 = torch.zeros_like(rx)
185 |         
186 |         rotation_offsets = torch.stack([torch.stack([ r0, -rz,  ry], dim=-1),
187 |                                         torch.stack([ rz,  r0, -rx], dim=-1),
188 |                                         torch.stack([-ry,  rx,  r0], dim=-1)], dim=-1)
189 |         
190 |         return rotations + self.args.rotation_weight * rotation_offsets
191 |         
192 |         
193 | class TranslationModel(pl.LightningModule):
194 |     def __init__(self, args):
195 |         super().__init__()
196 |         self.args = args
197 |         self.translation_betas = torch.nn.Parameter(data=torch.zeros(args.control_points_motion, 3, 1, dtype=torch.float32), requires_grad=True)
198 | 
199 |     def forward(self, t):
200 |         return self.args.translation_weight * utils.de_casteljau(self.translation_betas, t)
201 |     
202 | class IntrinsicsModel(pl.LightningModule):
203 |     def __init__(self, args, reference_intrinsics):
204 |         super().__init__()
205 |         self.args = args
206 |         self.intrinsic_betas = torch.nn.Parameter(data=torch.zeros(args.control_points_intrinsics, 1, 1, dtype=torch.float32), requires_grad=True)
207 |         self.focal = torch.nn.Parameter(data=torch.tensor([reference_intrinsics[0,0,0]]),  requires_grad=True)
208 |         self.cy = reference_intrinsics[0,2,0]
209 |         self.cx = reference_intrinsics[0,2,1]
210 | 
211 |     def forward(self, t):
212 |         f_deltas = utils.de_casteljau(self.intrinsic_betas, t)
213 |                 
214 |         cy = self.cy * torch.ones_like(t)
215 |         cx = self.cx * torch.ones_like(t)
216 |         f = (self.focal * torch.ones_like(t)) + f_deltas
217 |         f0 = torch.zeros_like(t)
218 |         f1 = torch.ones_like(t)
219 |         
220 |         intrinsics = torch.stack([torch.stack([f,  f0, cy], dim=-1),
221 |                                   torch.stack([f0, f,  cx], dim=-1),
222 |                                   torch.stack([f0, f0, f1], dim=-1)], dim=-1)
223 |         return intrinsics.squeeze(dim=1)
224 |         
225 | #########################################################################################################
226 | ################################################ NETWORK ################################################
227 | #########################################################################################################
228 |     
229 | class BundleMLP(pl.LightningModule):
230 |     def __init__(self, args):
231 |         super().__init__()
232 |         # load network configs
233 |         with open(args.config_path_depth) as config_depth:
234 |             config_depth = json.load(config_depth)
235 |         with open(args.config_path_rgb) as config_rgb:
236 |             config_rgb = json.load(config_rgb)
237 |             
238 |         self.args = args
239 |                     
240 |         self.encoding_depth = tcnn.Encoding(n_input_dims=2, encoding_config=config_depth["encoding"])
241 |         self.network_depth = tcnn.Network(n_input_dims=self.encoding_depth.n_output_dims, n_output_dims=1, network_config=config_depth["network"])
242 |         
243 |         self.encoding_rgb = tcnn.Encoding(n_input_dims=2, encoding_config=config_rgb["encoding"])
244 |         self.network_rgb = tcnn.Network(n_input_dims=self.encoding_rgb.n_output_dims, n_output_dims=3, network_config=config_rgb["network"])
245 |         
246 |         self.model_translation = TranslationModel(args)
247 |         self.model_plane = PlaneModel(depth=1.0)
248 |         
249 |         self.mask = torch.ones(self.encoding_depth.n_output_dims, dtype=torch.float32)
250 |         self.save_hyperparameters()
251 |         
252 |         bundle = BundleDataset(args)
253 |         self.bundle = bundle
254 |         self.rgb_volume = bundle.rgb_volume
255 |         self.processed_rgb_volume = bundle.processed_rgb_volume
256 |         self.reference_intrinsics = bundle.reference_intrinsics
257 |         self.reference_rotation = bundle.reference_rotation
258 |         
259 |         if args.no_device_rotations: # learn rotations from scratch
260 |             self.model_rotation = LearnedRotationModel(args)
261 |         else: # use gyro data
262 |             self.model_rotation = DeviceRotationModel(args, self.reference_rotation)
263 |             
264 |         self.model_intrinsics = IntrinsicsModel(args, self.reference_intrinsics)
265 | 
266 |     def sample_volume(self, uv, volume, frame=None):
267 |         """ Grid sample from 2D image volume at coordinates (u,v)
268 |             If frame=None, sample from all frames, else single frame
269 |         """
270 |         pbs = self.bundle.point_batch_size
271 |         grid_uv = ((uv - 0.5) * 2)
272 |         
273 |         if frame is None:
274 |             grid_uv = grid_uv.reshape(self.bundle.num_frames, pbs, 1, -1) # frames, pbs, 1, 2
275 |             rgb_samples = F.grid_sample(volume, grid_uv, mode="bilinear", padding_mode="border", align_corners=True)
276 |             rgb_samples = rgb_samples.squeeze().permute(0,2,1).reshape(pbs * self.bundle.num_frames, -1)
277 |         else:
278 |             grid_uv = grid_uv[None,:,None,:] # frames, pbs, 1, 2
279 |             rgb_samples = F.grid_sample(volume[frame:frame+1], grid_uv, mode="bilinear", padding_mode="border", align_corners=True)
280 |             rgb_samples = rgb_samples.squeeze().permute(1,0)
281 |         
282 |         return rgb_samples
283 |         
284 |     def configure_optimizers(self):
285 |         optimizer = torch.optim.Adam(self.parameters(), lr=self.args.lr, betas=(0.9, 0.99), eps=1e-15, weight_decay=0)
286 |         scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=self.args.gamma)
287 |         return [optimizer], [scheduler]
288 |         
289 |     def forward(self, t, uv, quaternions, lidar_samples, rgb_samples):
290 |         """ Forward model pass, estimate motion, implicit depth + image.
291 |         """
292 |         translation = self.model_translation(t)
293 |         rotation = self.model_rotation(quaternions, t)
294 |         
295 |         uv_depth = self.encoding_depth(uv)
296 |         uv_rgb = self.encoding_rgb(uv)
297 | 
298 |         mask = self.mask.to(self.device)[None,:]
299 |         
300 |         if self.args.allow_negative_depth: # no ReLUs, no clamps, depth can go hog-wild
301 |             plane = self.model_plane(uv)
302 |             depth = (plane - self.network_depth(uv_depth * mask))
303 |         else: # clamp depth between 0.01 and 10, depth offset must be *in front* of plane
304 |             plane = F.relu(self.model_plane(uv)).clamp(0.01, 10.0)
305 |             depth = (plane - F.relu(self.network_depth(uv_depth * mask))).clamp(0.01, 10)
306 |             
307 |         if self.args.fixed_image: # just sample static reference frame
308 |             rgb = rgb_samples
309 |         else: # sample from RGB MLP
310 |             rgb = F.relu(0.5 + self.network_rgb(uv_rgb)).float()
311 | 
312 |         return rgb, depth, plane, rotation, translation
313 |     
314 |     def reproject(self, t, uv, depth, rotation, translation, intrinsics):
315 |         """ Reproject uv coordinates to new refererence frame
316 |         """
317 |         if self.args.no_intrinsics: # use learned model
318 |             intrinsics = self.model_intrinsics(t)
319 |             reference_intrinsics = self.model_intrinsics(torch.zeros_like(t)[0:1])
320 |         else: # used stored intrinsics
321 |             reference_intrinsics = self.reference_intrinsics
322 |             
323 |         uvz = torch.cat((uv, depth), dim=1)
324 |         xyz = utils.uvz_to_xyz(uvz, reference_intrinsics, img_width=self.bundle.img_width, img_height=self.bundle.img_height)
325 |         xyz = (torch.inverse(rotation) @ xyz[:,:,None])[:,:,0] + translation # project to query
326 |         uvz_reprojected = utils.xyz_to_uvz(xyz, intrinsics, img_width=self.bundle.img_width, img_height=self.bundle.img_height)
327 |         tuv_reprojected = torch.cat((t, uvz_reprojected[:,0:2]), dim=1)
328 |         
329 |         return tuv_reprojected
330 |     
331 |     def training_step(self, train_batch, batch_idx):
332 |         N = self.bundle.num_frames
333 |         pbs = self.args.point_batch_size
334 |         
335 |         t, uv, quaternions, intrinsics, lidar_samples, rgb_samples, _ = train_batch # collapse batch + point dimensions
336 |         t, uv, quaternions, intrinsics, lidar_samples, rgb_samples = debatch(t), debatch(uv), debatch(quaternions), debatch(intrinsics), debatch(lidar_samples), debatch(rgb_samples)
337 |         
338 |         rgb, depth, plane, rotation, translation = self.forward(t, uv, quaternions, lidar_samples, rgb_samples)
339 |         uv, rgb, depth, plane = uv.repeat(N,1), rgb.repeat(N,1), depth.repeat(N,1), plane.repeat(N,1)
340 |                 
341 |         tuv_plane_reprojected = self.reproject(t, uv, plane, rotation, translation, intrinsics)
342 |         tuv_depth_reprojected = self.reproject(t, uv, depth, rotation, translation, intrinsics)
343 |                 
344 |         rgb_plane_reprojected = self.sample_volume(tuv_plane_reprojected[:,1:], self.rgb_volume) # sample all timesteps with u,v
345 |         rgb_depth_reprojected = self.sample_volume(tuv_depth_reprojected[:,1:], self.rgb_volume)
346 |         
347 |         loss = 0.0
348 | 
349 |         # overall depth loss
350 |         depth_rgb_loss = ((rgb/(rgb.detach() + 0.001)) - (rgb_depth_reprojected/(rgb.detach() + 0.001))) ** 2
351 |         depth_rgb_loss = depth_rgb_loss.mean(dim=1, keepdims=True) # mean over RGB channels
352 |         loss += depth_rgb_loss.mean()
353 | 
354 |         # plane-only loss
355 |         plane_rgb_loss = ((rgb/(rgb.detach() + 0.001)) - (rgb_plane_reprojected/(rgb.detach() + 0.001))) ** 2
356 |         plane_rgb_loss = plane_rgb_loss.mean(dim=1, keepdims=True) # mean over RGB channels
357 | 
358 |         # weighted plane loss
359 |         plane_depth_loss = (depth/plane - 1) ** 2
360 |         weighted_plane_depth_loss = plane_rgb_loss/(depth_rgb_loss + 0.001) * plane_depth_loss
361 |         loss += self.args.plane_weight * weighted_plane_depth_loss.mean()
362 |         
363 |         self.log('loss', loss)
364 |         return loss
365 |     
366 |     def make_grid(self, height, width, u_lims, v_lims):
367 |         """ Create (u,v) meshgrid with size (height,width) extent (u_lims, v_lims)
368 |         """
369 |         u = torch.linspace(u_lims[0], u_lims[1], width)
370 |         v = torch.linspace(v_lims[0], v_lims[1], height)
371 |         u_grid, v_grid = torch.meshgrid([u, v], indexing="xy") # u/v grid
372 |         return torch.stack((u_grid.flatten(), v_grid.flatten())).t()
373 |         
374 |     def generate_imgs(self, frame, height=960, width=720, u_lims=[0,1], v_lims=[0,1]):
375 |         """ Produce reference images and depth maps for tensorboard/visualization
376 |         """
377 |         device = self.device
378 |         uv = self.make_grid(height, width, u_lims, v_lims)
379 |         t = torch.tensor(frame/(self.bundle.num_frames - 1)).repeat(uv.shape[0])[:,None] # num_points, 1
380 |         
381 |         batch = self.bundle.sample_grid(uv, t, frame, sample_depth=True, sample_rgb=True, sample_processed_rgb=True)
382 |         batch = [elem.to(device) for elem in batch]
383 |         t, uv, quaternions, intrinsics, lidar_samples, rgb_samples, rgb_processed_samples = batch
384 |         
385 |         rgb_raw = rgb_samples.reshape(height, width, 3).permute(2,0,1) # channel first
386 |         rgb_processed = rgb_processed_samples.reshape(height, width, 3).permute(2,0,1) # channel first
387 |         depth_lidar = lidar_samples.reshape(height, width)
388 |         depth_lidar_img = utils.colorize_tensor(depth_lidar, vmin=lidar_samples.min(), vmax=lidar_samples.max(), cmap="RdYlBu")
389 |             
390 |         return rgb_raw, rgb_processed, depth_lidar, depth_lidar_img
391 |     
392 |     def generate_outputs(self, frame, height=960, width=720, u_lims=[0,1], v_lims=[0,1]):
393 |         """ Use forward model to sample implicit image I(u,v), depth D(u,v) and raw/processed images
394 |             at reprojected u,v, coordinates. Results should be aligned (sampled at (u',v'))
395 |         """
396 |         device = self.device
397 |         uv = self.make_grid(height, width, u_lims, v_lims)
398 |         t = torch.tensor(frame/(self.bundle.num_frames - 1)).repeat(uv.shape[0])[:,None] # num_points, 1
399 |         
400 |         batch = self.bundle.sample_grid(uv, t, frame, sample_depth=True, sample_rgb=True, sample_processed_rgb=True)
401 |         batch = [elem.to(device) for elem in batch]
402 |         t, uv, quaternions, intrinsics, lidar_samples, rgb_samples, rgb_processed_samples = batch
403 |         
404 |         with torch.no_grad():
405 |             rgb, depth, plane, rotation, translation = self.forward(t, uv, quaternions, lidar_samples, rgb_samples)
406 |             tuv_reprojected = self.reproject(t, uv, depth, rotation, translation, intrinsics)
407 |             rgb_raw = self.sample_volume(tuv_reprojected[:,1:], self.rgb_volume, frame=frame)
408 |             rgb_processed = self.sample_volume(tuv_reprojected[:,1:], self.processed_rgb_volume, frame=frame)
409 |             
410 |             rgb = rgb.reshape(height, width, 3).permute(2,0,1) # channel first
411 |             rgb_raw = rgb_raw.reshape(height, width, 3).permute(2,0,1) # channel first
412 |             rgb_processed = rgb_processed.reshape(height, width, 3).permute(2,0,1)
413 |             
414 |             depth = depth.reshape(height, width)
415 |             depth_img = utils.colorize_tensor(depth, vmin=0, vmax=depth.max(), cmap="RdYlBu")
416 |             
417 |         return rgb, rgb_raw, rgb_processed, depth, depth_img
418 |     
419 | #########################################################################################################
420 | ############################################### VALIDATION ##############################################
421 | #########################################################################################################
422 |         
423 | class ValidationCallback(pl.Callback):
424 |     def __init__(self):
425 |         super().__init__()
426 |         
427 |     def on_train_epoch_start(self, trainer, model):
428 |         args = model.args
429 |         coef = ((model.current_epoch/model.args.max_epochs) * args.mask_k_max) + ((1 - model.current_epoch/model.args.max_epochs) * args.mask_k_min)
430 |         model.mask = torch.sigmoid(torch.linspace(args.mask_k_max, coef, len(model.mask)))
431 |         print("Mask mean:", model.mask.mean())
432 |         
433 |         # let plane train on its own for 10 epochs
434 |         if model.current_epoch == 10:
435 |             # start training depth
436 |             model.encoding_depth.requires_grad_(True)
437 |             model.encoding_depth.train(True)
438 |             model.network_depth.requires_grad_(True)
439 |             model.network_depth.train(True)
440 |             if args.no_intrinsics:
441 |                 model.model_intrinsics.requires_grad_(True)
442 |                 model.model_intrinsics.train(True)
443 | 
444 |         for i, frame in enumerate([0]): # can sample more frames
445 |             rgb, rgb_raw, rgb_processed, depth, depth_img = model.generate_outputs(frame)
446 |             model.logger.experiment.add_image(f'pred/{i}_rgb', rgb, global_step=trainer.global_step)
447 |             model.logger.experiment.add_image(f'pred/{i}_raw', rgb_raw, global_step=trainer.global_step)
448 |             model.logger.experiment.add_image(f'pred/{i}_processed', rgb_processed, global_step=trainer.global_step)
449 |             model.logger.experiment.add_image(f'pred/{i}_depth', depth_img, global_step=trainer.global_step)
450 |             
451 |             if model.args.save_video: # save the evolution of the model
452 |                 if i == 0: # save first frame
453 |                     np.save(f"video/{model.args.name}/{model.current_epoch}_depth.npy", depth.detach().cpu().numpy())
454 |                     np.save(f"video/{model.args.name}/{model.current_epoch}_rgb.npy", rgb.detach().cpu().numpy())
455 |             
456 |             # zoomed images
457 |             # rgb, rgb_raw, rgb_processed, depth, depth_img, depth_lidar, depth_lidar_img = model.generate_imgs(frame, u_lims=[0.4,0.6], v_lims=[0.4,0.6])
458 |             # model.logger.experiment.add_image(f'pred/{i}_rgb_zoom', rgb, global_step=trainer.global_step)
459 |             # model.logger.experiment.add_image(f'pred/{i}_depth_zoom', depth_img, global_step=trainer.global_step)
460 |             
461 |     def on_train_start(self, trainer, model):
462 |         pl.seed_everything(42)
463 |         
464 |         # pl doesn't put non-parameters on the right device
465 |         model.rgb_volume = model.rgb_volume.to(model.device)
466 |         model.processed_rgb_volume = model.processed_rgb_volume.to(model.device)
467 |         model.reference_intrinsics = model.reference_intrinsics.to(model.device)
468 |         if not model.args.no_device_rotations:
469 |             model.model_rotation.reference_rotation = model.model_rotation.reference_rotation.to(model.device)
470 |         model.model_intrinsics.focal = model.model_intrinsics.focal.to(model.device)
471 |         
472 |         model.logger.experiment.add_text("args", str(model.args))
473 |         
474 |         rgb_raw, rgb_processed, depth_lidar, depth_lidar_img = model.generate_imgs(0)
475 |         model.logger.experiment.add_image('gt/lidar', depth_lidar_img, global_step=trainer.global_step)
476 |         
477 |         for i, frame in enumerate([0]):
478 |             rgb_raw, rgb_processed, depth_lidar, depth_lidar_img = model.generate_imgs(frame)
479 |             model.logger.experiment.add_image(f'gt/{i}_rgb_raw', rgb_raw, global_step=trainer.global_step)
480 |             model.logger.experiment.add_image(f'gt/{i}_rgb_processed', rgb_processed, global_step=trainer.global_step)
481 |             # zoomed images
482 |             # rgb, rgb_raw, rgb_processed, depth, depth_img, depth_lidar, depth_lidar_img = model.generate_imgs(frame, u_lims=[0.4,0.6], v_lims=[0.4,0.6])
483 |             # model.logger.experiment.add_image(f'gt/{i}_rgb_raw_zoom', rgb_raw, global_step=trainer.global_step)
484 |             # model.logger.experiment.add_image(f'gt/{i}_rgb_processed_zoom', rgb_processed, global_step=trainer.global_step)
485 |         
486 |         if model.args.save_video:
487 |             os.makedirs(f"video/{model.args.name}", exist_ok=True)
488 |             
489 |     def on_train_end(self, trainer, model):
490 |         checkpoint_dir = os.path.join("checkpoints", args.name, "last.ckpt")
491 |         trainer.save_checkpoint(checkpoint_dir)
492 | 
493 | if __name__ == "__main__":
494 |     
495 |     # argparse
496 |     parser = argparse.ArgumentParser()
497 | 
498 |     # data
499 |     parser.add_argument('--point_batch_size', type=int, default=1024, help="Number of points to sample per dataloader index.")
500 |     parser.add_argument('--num_batches', type=int, default=256, help="Number of training batches.")
501 |     parser.add_argument('--no_shade_map', action='store_true', help="Don't use shade map, useful for low-light captures.")
502 |     parser.add_argument('--no_raw', action='store_true', help="No RAW data available, use RGB volume instead.")
503 |     parser.add_argument('--no_device_rotations', action='store_true', help="Learn rotations from scratch, useful if no gyro data available.")
504 |     parser.add_argument('--no_intrinsics', action='store_true', help="Learn camera intrinsics from scratch, useful if no camera intrinsics available.")
505 |     parser.add_argument('--no_phone_depth', action='store_true', help="No phone depth data in bundle.")
506 |     parser.add_argument('--allow_negative_depth', action='store_true', help="Allow negative depth solutions, useful for weird or digitally stabilized data.")
507 |     parser.add_argument('--dark', action='store_true', help="Low-light capture, automatically also turns off shade map.")
508 |     
509 |     # model
510 |     parser.add_argument('--control_points_motion', type=int, default=21, help="Spline control points for translation/rotation model.")
511 |     parser.add_argument('--control_points_intrinsics', type=int, default=4, help="Spline control points for intrinsics model.")
512 |     parser.add_argument('--config_path_depth', type=str, default="config/config_depth.json", help="Depth model config.")
513 |     parser.add_argument('--config_path_rgb', type=str, default="config/config_rgb.json", help="RGB model config.")
514 |     parser.add_argument('--plane_weight', type=float, default=1e-4, help="Depth regularization.")
515 |     parser.add_argument('--rotation_weight', type=float, default=1e-1, help="Scale learned rotation.")
516 |     parser.add_argument('--translation_weight', type=float, default=1e-1, help="Scale learned translation.")
517 |     parser.add_argument('--mask_k_min', type=float, default=-100, help="Mask weight evolution parameter.")
518 |     parser.add_argument('--mask_k_max', type=float, default=100, help="Mask weight evolution parameter.")
519 |     parser.add_argument('--fixed_image', action='store_true', help="Fix I(u,v) to be the zero-th frame during training.")
520 | 
521 |     
522 |     # training
523 |     parser.add_argument('--bundle_path', type=str, required=True, help="Path to frame_bundle.npz")
524 |     parser.add_argument('--name', type=str, required=True, help="Experiment name for logs and checkpoints.")
525 |     parser.add_argument('--max_epochs', type=int, default=100, help="Number of training epochs.")
526 |     parser.add_argument('--gamma', type=float, default=0.98, help="Learning rate decay gamma.")
527 |     parser.add_argument('--lr', type=float, default=1e-4, help="Learning rate.")
528 |     parser.add_argument('--save_video', action='store_true', help="Store training outputs at each epoch for visualization.")
529 | 
530 | 
531 |     args = parser.parse_args()
532 |     if args.dark:
533 |         args.no_shade_map = True
534 |     
535 |     print(args)
536 |     
537 |     # dataset
538 |     bundle_dataset = BundleDataset(args)
539 |     train_loader = DataLoader(bundle_dataset, batch_size=1, num_workers=os.cpu_count(), shuffle=True, pin_memory=True)
540 | 
541 |     # model
542 |     model = BundleMLP(args)
543 |     # let plane train on its own at the start
544 |     model.network_depth.requires_grad_(False)
545 |     model.encoding_depth.requires_grad_(False)
546 |     model.model_intrinsics.requires_grad_(False)
547 | 
548 |     # training
549 |     # checkpoint_callback = pl.callbacks.ModelCheckpoint(dirpath=os.path.join("checkpoints", args.name), save_top_k=1, save_last=True, monitor="loss")
550 |     lr_callback = pl.callbacks.LearningRateMonitor()
551 |     logger = pl.loggers.TensorBoardLogger(save_dir=os.getcwd(), version=args.name, name="lightning_logs")
552 |     validation_callback = ValidationCallback()
553 |     trainer = pl.Trainer(accelerator="auto", strategy="auto", max_epochs=args.max_epochs,
554 |                          logger=logger, callbacks=[validation_callback, lr_callback], enable_checkpointing=False)
555 |     trainer.fit(model, train_loader)
556 | 


--------------------------------------------------------------------------------
/tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "id": "4J5E1CJukt0H"
  7 |    },
  8 |    "source": [
  9 |     "### Section 0: Setup\n",
 10 |     "If running this in Google Colab, make sure that you are connected to a GPU instance and run the install script below. It should (hopefully) take about 2-5mins to execute."
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "metadata": {
 17 |     "id": "AI4QE-aXkt0J"
 18 |    },
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "import subprocess\n",
 22 |     "import os\n",
 23 |     "# Check if GPU exists\n",
 24 |     "\n",
 25 |     "try:\n",
 26 |     "    subprocess.check_output('nvidia-smi')\n",
 27 |     "    print(\"GPU is enabled.\")\n",
 28 |     "    # Check if running in Google Colab\n",
 29 |     "    if 'COLAB_GPU' in os.environ:\n",
 30 |     "      # Instal TinyCuda\n",
 31 |     "      %cd /content/\n",
 32 |     "      # cursed one-line wheel download/install\n",
 33 |     "      !curl -L \"https://github.com/Ilya-Muromets/TinyCudaColab/releases/latest/download/tinycudann-colab-gpu.zip\" -o tinycudann-colab-gpu.zip && unzip -o tinycudann-colab-gpu.zip && WHEEL=$(find . -maxdepth 1 -name \"*.whl\" | head -n 1) && echo \"Found wheel: $WHEEL\" && pip install \"$WHEEL\" --force-reinstall\n",
 34 |     "      !pip install commentjson\n",
 35 |     "      !pip install pytorch_lightning\n",
 36 |     "      !pip install matplotlib==3.7.0\n",
 37 |     "      # broken cuda version\n",
 38 |     "      !pip uninstall -y torchaudio\n",
 39 |     "    else:\n",
 40 |     "      print(\"COLAB_GPU not detected\")\n",
 41 |     "except FileNotFoundError as e:\n",
 42 |     "    print(\"GPU is not enabled in this notebook.\")\n",
 43 |     "    print(\"Please select 'Runtime -> Change runtime type' and set the hardware accelerator to GPU.\")"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "\n",
 51 |     "## WARNING:\n",
 52 |     "### Colab will ask to restart the session after running the above cell (because it pre-loads matplotlib for some reason). You should first restart the session, then continue running the cells below. Do not re-run the cell above after restarting the session."
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {
 59 |     "id": "AfKFeJKQPPmc"
 60 |    },
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "import os\n",
 64 |     "# Clone repo (Colab only)\n",
 65 |     "if 'COLAB_GPU' in os.environ:\n",
 66 |     "    !git clone https://github.com/princeton-computational-imaging/SoaP/\n",
 67 |     "    %cd /content/SoaP/"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {
 74 |     "id": "866qbOL9kt0K"
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "!wget https://soap.cs.princeton.edu/shade_map.npy -P data/\n",
 79 |     "!wget https://soap.cs.princeton.edu/demo.zip -P data/\n",
 80 |     "!unzip data/demo.zip -d data/"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {
 86 |     "id": "GB_OzWSbcd83"
 87 |    },
 88 |    "source": [
 89 |     "### Section 1: (Optional) What is a `frame_bundle.npz`?\n"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {
 96 |     "id": "ut2yAwYZc1Ei"
 97 |    },
 98 |    "outputs": [],
 99 |    "source": [
100 |     "%matplotlib inline\n",
101 |     "\n",
102 |     "import torch\n",
103 |     "import numpy as np\n",
104 |     "import matplotlib.pyplot as plt\n",
105 |     "from utils import utils"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {
111 |     "id": "r_NBzSevc4pP"
112 |    },
113 |    "source": [
114 |     "Load data from disk:"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {
121 |     "id": "bRhSQ01dc6m8"
122 |    },
123 |    "outputs": [],
124 |    "source": [
125 |     "bundle_path = \"data/demo/dragon/compressed_frame_bundle.npz\"\n",
126 |     "# convert to dictionary - important, by default npz load as a namespace which can have odd behaviour\n",
127 |     "bundle = dict(np.load(bundle_path, allow_pickle=True))\n",
128 |     "# remove extra dimensions\n",
129 |     "utils.de_item(bundle)"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "metadata": {
135 |     "id": "v0FAm169dL_d"
136 |    },
137 |    "source": [
138 |     "Our bundle contains four sets of data:  \n",
139 |     "1. `motion` : device motion data including rotation, gravity, and acceleration  \n",
140 |     "2. `raw_[x]` : Bayer RAW frames enumerated from `0` to `num_raw_frames - 1`, with associated metadata  \n",
141 |     "3. `rgb_[x]` : Processed Apple RGB frames enumerated from `0` to `num_rgb_frames - 1`, with associated metadata  \n",
142 |     "4. `depth_[x]` : Apple depth maps enumerated from `0` to `num_depth_frames - 1`, with associated metadata  \n",
143 |     "\n",
144 |     "Lets take a closer look at this data:"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {
151 |     "id": "RYldYeSxdLCZ"
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "bundle[\"motion\"].keys()"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "metadata": {
161 |     "id": "PbsVcU-LdQBt"
162 |    },
163 |    "source": [
164 |     "The motion data `motion` contains:  \n",
165 |     "1. `frame_count` : what frame was being recorded when the associated motion data was recorded. There can be multiple motion values for the same frame as the frequency of the accelerometer/gyroscope (100Hz) is higher than the framerate we're recording at (21fps).\n",
166 |     "2. `timestamp` : absolute device time at which measurements were recorded\n",
167 |     "3. `quaternion` : device relative rotation expressed in quaternion format\n",
168 |     "4. `rotation_rate` : velocity of device rotation expressed in roll-pitch-yaw\n",
169 |     "5. `roll_pitch_yaw` : device relative rotation expressed in roll-pitch-yaw\n",
170 |     "6. `acceleration` : device relative acceleration (with gravity removed) expressed in x-y-z\n",
171 |     "7. `gravity` : acceleration due to gravity expressed in x-y-z\n",
172 |     "8. `num_motion_frames` : number of recorded measurements\n",
173 |     "\n",
174 |     "As an example lets plot the device roll over time:"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "metadata": {
181 |     "id": "8xxPIJ6IdYZS"
182 |    },
183 |    "outputs": [],
184 |    "source": [
185 |     "roll_pitch_yaw =  bundle[\"motion\"][\"roll_pitch_yaw\"] # [3,N]\n",
186 |     "timestamp =  bundle[\"motion\"][\"timestamp\"] # [N]\n",
187 |     "roll = roll_pitch_yaw[:,0]\n",
188 |     "pitch = roll_pitch_yaw[:,1]\n",
189 |     "yaw = roll_pitch_yaw[:,2]\n",
190 |     "\n",
191 |     "plt.plot(timestamp, roll)\n",
192 |     "plt.ylabel(\"Roll [Rad]\")\n",
193 |     "plt.xlabel(\"Device Time [s]\")\n",
194 |     "plt.show()"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "markdown",
199 |    "metadata": {
200 |     "id": "oF1jRJgudb3g"
201 |    },
202 |    "source": [
203 |     "RAW image data:"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "metadata": {
210 |     "id": "c9pcNfyEdaTP"
211 |    },
212 |    "outputs": [],
213 |    "source": [
214 |     "frame = 0 # change this to view other frames\n",
215 |     "raw = bundle[f\"raw_{frame}\"]\n",
216 |     "rgb = bundle[f\"rgb_{frame}\"]\n",
217 |     "depth = bundle[f\"depth_{frame}\"]"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": null,
223 |    "metadata": {
224 |     "id": "R9B2Q6kjdPiK"
225 |    },
226 |    "outputs": [],
227 |    "source": [
228 |     "print(raw.keys())\n",
229 |     "print(\"height:\", raw['height'], \"width:\", raw['width'])"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "markdown",
234 |    "metadata": {
235 |     "id": "7x0KZnv9dgDg"
236 |    },
237 |    "source": [
238 |     "Each `raw` frame consists of:\n",
239 |     "1. `frame_count` : frame number, ranges from 0 - `num_raw_frames`\n",
240 |     "2. `timestamp` : absolute device time at which frame was recorded\n",
241 |     "3. `height, width` : frame dimensions (**WARNING**: these may not match the expected orientation of the frame, i.e. if you are recording with the phone vertical or horizontal, the `width` does not change and always refers to the long side of the capture)\n",
242 |     "4. `ISO`, `exposure_time`, `aperture` : camera ISO, exposure time (seconds), and f-stop used to capture the image\n",
243 |     "5. `brightnesss` : the estimated 'brightness' of the scene, honestly not sure what this is (pls message me if you know)\n",
244 |     "6. `shutter_speed` : inverse of `exposure_time`\n",
245 |     "7. `black_level`, `white_level`: min and max real RAW values\n",
246 |     "8. `raw`, 4032 x 3024 single channel, 14-bit mosaiced bayer CFA frame\n",
247 |     "\n",
248 |     "Lets look at the RAW image data:"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": null,
254 |    "metadata": {
255 |     "id": "aaAoETy-dhrm"
256 |    },
257 |    "outputs": [],
258 |    "source": [
259 |     "raw_img = raw[\"raw\"]\n",
260 |     "\n",
261 |     "# use simple demosaicing the fill gap values (see paper supplemental)\n",
262 |     "raw_demosaiced = utils.raw_to_rgb(torch.tensor(raw_img[None,None].astype(np.int32)))[0].permute(1,2,0)\n",
263 |     "raw_demosaiced = raw_demosaiced/raw_demosaiced.max()"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": null,
269 |    "metadata": {
270 |     "id": "Z-yzxCOrdvK6"
271 |    },
272 |    "outputs": [],
273 |    "source": [
274 |     "fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))\n",
275 |     "axes[0].imshow(raw_img, cmap=\"gray\")\n",
276 |     "axes[0].set_title(f\"Frame {frame} Mosaiced Raw\")\n",
277 |     "im = axes[1].imshow(raw_demosaiced)\n",
278 |     "axes[1].set_title(f\"Frame {frame} De-Mosaiced Raw\")\n",
279 |     "\n",
280 |     "fig.subplots_adjust(right=0.7)\n",
281 |     "plt.show()"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "markdown",
286 |    "metadata": {
287 |     "id": "kDdX44-4dyM8"
288 |    },
289 |    "source": [
290 |     "If we zoom into a small patch of the above mosaiced RAW we can see the Bayer CFA pattern:"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": null,
296 |    "metadata": {
297 |     "id": "EE62TZPGdxxb"
298 |    },
299 |    "outputs": [],
300 |    "source": [
301 |     "plt.imshow(raw_img[:8,:8], cmap=\"gray\")\n",
302 |     "plt.show()"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "markdown",
307 |    "metadata": {
308 |     "id": "l_s4Xiosd2Op"
309 |    },
310 |    "source": [
311 |     "Applying the shade map to this data we see how it corrects for the vignetting on the edges of the scene:"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": null,
317 |    "metadata": {
318 |     "id": "jMMBTfHFkt0L"
319 |    },
320 |    "outputs": [],
321 |    "source": [
322 |     "shade_map = np.load(\"data/shade_map.npy\")\n",
323 |     "raw_img_deshade = raw[\"raw\"] * shade_map"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": null,
329 |    "metadata": {
330 |     "id": "V4RkBotcd4jn"
331 |    },
332 |    "outputs": [],
333 |    "source": [
334 |     "fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))\n",
335 |     "axes[0].imshow(shade_map, cmap=\"gray\")\n",
336 |     "axes[0].set_title(f\"Shade Map\")\n",
337 |     "im = axes[1].imshow(raw_img_deshade, cmap=\"gray\")\n",
338 |     "axes[1].set_title(f\"Frame {frame} Mosaiced Raw + Shade Map\")\n",
339 |     "\n",
340 |     "fig.subplots_adjust(right=0.7)\n",
341 |     "plt.show()"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "markdown",
346 |    "metadata": {
347 |     "id": "i2yFj30BeBTG"
348 |    },
349 |    "source": [
350 |     "Processed RGB and depth data:"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "code",
355 |    "execution_count": null,
356 |    "metadata": {
357 |     "id": "nVzXhZXheEhv"
358 |    },
359 |    "outputs": [],
360 |    "source": [
361 |     "print(rgb.keys())\n",
362 |     "print(\"height:\", rgb['height'], \"width:\", rgb['width'])"
363 |    ]
364 |   },
365 |   {
366 |    "cell_type": "markdown",
367 |    "metadata": {
368 |     "id": "oHDdV9gXeGa0"
369 |    },
370 |    "source": [
371 |     "Each `rgb` frame contains:\n",
372 |     "1. `frame_count`, `timestamp`, `height`, `width` : see `raw` documentation\n",
373 |     "2. `intrinsics`: 3x3 camera intrinsics, see: [documentation](https://developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881135-intrinsicmatrix)\n",
374 |     "3. `rgb`, 1920 x 1440 3 channel, 8-bit processed RGB frame"
375 |    ]
376 |   },
377 |   {
378 |    "cell_type": "code",
379 |    "execution_count": null,
380 |    "metadata": {
381 |     "id": "jdU-m4v4eIzo"
382 |    },
383 |    "outputs": [],
384 |    "source": [
385 |     "print(depth.keys())\n",
386 |     "print(\"height:\", depth['height'], \"width:\", depth['width'])"
387 |    ]
388 |   },
389 |   {
390 |    "cell_type": "markdown",
391 |    "metadata": {
392 |     "id": "rMYQNoSueK__"
393 |    },
394 |    "source": [
395 |     "Each `depth` frame contains:\n",
396 |     "1. `frame_count`, `timestamp`, `height`, `width` : see `raw` documentation\n",
397 |     "2. `intrinsic_height`, `intrinsic_width`, `intrinsics` : 3x3 camera intrinsics, with associated frame height and width\n",
398 |     "3. `lens_distortion` : look-up table for radial distortion correction, see: [documentation](https://developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881129-lensdistortionlookuptable)\n",
399 |     "4. `lens_undistortion` : inverse of `lens_distortion`\n",
400 |     "5. `depth_accuracy` : [accuracy of depth measurements](https://developer.apple.com/documentation/avfoundation/avdepthdata/accuracy), depends on iPhone/iOS version. `1` -> metric depth, `0` -> relative depth\n",
401 |     "6. `depth`, 320 x 240 inverse depth map from monocular cues + LiDAR measurements\n",
402 |     "\n",
403 |     "Here's a preview of what the RGB and iPhone depth data look like:"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "code",
408 |    "execution_count": null,
409 |    "metadata": {
410 |     "id": "72Ar-jykeP_a"
411 |    },
412 |    "outputs": [],
413 |    "source": [
414 |     "fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(14, 8))\n",
415 |     "axes[0].imshow(rgb['rgb'])\n",
416 |     "axes[0].set_title(\"Frame {0} Image\".format(frame))\n",
417 |     "im = axes[1].imshow(depth['depth'], cmap='RdYlBu')\n",
418 |     "axes[1].set_title(\"Frame {0} iPhone Depth\".format(frame))\n",
419 |     "\n",
420 |     "fig.subplots_adjust(right=0.82)\n",
421 |     "cbar_ax = fig.add_axes([0.85, 0.15, 0.02, 0.7])\n",
422 |     "fig.colorbar(im, cax=cbar_ax, label='Depth [m]')\n",
423 |     "plt.show()\n",
424 |     "\n",
425 |     "print()\n",
426 |     "print(\"Camera Info at Frame {0}: \\n\".format(frame))\n",
427 |     "print(\"Timestamp:\", rgb['timestamp'], \"\\n\")\n",
428 |     "print(\"Camera Intrinsics: \\n\", rgb['intrinsics'])"
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "markdown",
433 |    "metadata": {
434 |     "id": "GWrA79XSeTJ4"
435 |    },
436 |    "source": [
437 |     "### Section 2: Training on a `frame_bundle.npz`\n",
438 |     "This section will cover how to fit our model to an input RAW frame_bundle.npz, monitor the model's training, and plot its outputs.\n"
439 |    ]
440 |   },
441 |   {
442 |    "cell_type": "code",
443 |    "execution_count": null,
444 |    "metadata": {
445 |     "id": "Xg4HzP48et2G"
446 |    },
447 |    "outputs": [],
448 |    "source": [
449 |     "import torch\n",
450 |     "import numpy as np\n",
451 |     "import matplotlib.pyplot as plt\n",
452 |     "\n",
453 |     "from train import *\n",
454 |     "from utils import utils"
455 |    ]
456 |   },
457 |   {
458 |    "cell_type": "markdown",
459 |    "metadata": {
460 |     "id": "TqJAuR2Sf1tt"
461 |    },
462 |    "source": [
463 |     "Lets begin by taking a look at the images in our `compressed_frame_bundle.npz` (this is a sub-sampled `frame_bundle.npz` with 9 images instead of 42 to speed up training/download time)"
464 |    ]
465 |   },
466 |   {
467 |    "cell_type": "code",
468 |    "execution_count": null,
469 |    "metadata": {
470 |     "id": "Hma153d3fbOI"
471 |    },
472 |    "outputs": [],
473 |    "source": [
474 |     "bundle = dict(np.load(\"data/demo/dragon/compressed_frame_bundle.npz\", allow_pickle=True))\n",
475 |     "utils.de_item(bundle)\n",
476 |     "\n",
477 |     "# plot the first 5 images, downsample 2x for speed\n",
478 |     "fig, ax = plt.subplots(1,5, figsize=(19.5,5))\n",
479 |     "for i in range(5):\n",
480 |     "    ax[i].imshow(bundle[f\"rgb_{i}\"][\"rgb\"][::2,::2])\n",
481 |     "    ax[i].set_title(f\"Image {i}\")\n",
482 |     "\n",
483 |     "# remove ticks\n",
484 |     "for a in ax:\n",
485 |     "    a.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)\n",
486 |     "# adjust spacing\n",
487 |     "plt.subplots_adjust(wspace=0.0)\n",
488 |     "plt.show()"
489 |    ]
490 |   },
491 |   {
492 |    "cell_type": "markdown",
493 |    "metadata": {
494 |     "id": "0N382cN3eyP9"
495 |    },
496 |    "source": [
497 |     "While they barely appear to change, there's actually still more than enough parallax here to recover meaningful depth.  \n",
498 |     "\n",
499 |     "We begin by launching tensorboard so we can see our training progress:"
500 |    ]
501 |   },
502 |   {
503 |    "cell_type": "code",
504 |    "execution_count": null,
505 |    "metadata": {
506 |     "id": "_YhuEtN7eyWa"
507 |    },
508 |    "outputs": [],
509 |    "source": [
510 |     "%load_ext tensorboard\n",
511 |     "%tensorboard --logdir lightning_logs"
512 |    ]
513 |   },
514 |   {
515 |    "cell_type": "markdown",
516 |    "metadata": {
517 |     "id": "sZyyIi2Bkt0L"
518 |    },
519 |    "source": [
520 |     "Next we run `train.py`. On an RTX 4090 this should train in a couple minutes, on Colab this will be quite a bit slower.  \n",
521 |     "\n",
522 |     "You can refresh the tensorboard window above to watch the training progress."
523 |    ]
524 |   },
525 |   {
526 |    "cell_type": "code",
527 |    "execution_count": null,
528 |    "metadata": {
529 |     "id": "jz3HSxV5g7Gy"
530 |    },
531 |    "outputs": [],
532 |    "source": [
533 |     "# only run to 30 epochs to save time, remove the flag to run for default 100 epochs\n",
534 |     "!python3 train.py --name dragon-test --bundle_path data/demo/dragon/compressed_frame_bundle.npz --max_epochs 30"
535 |    ]
536 |   },
537 |   {
538 |    "cell_type": "markdown",
539 |    "metadata": {
540 |     "id": "0EHa5z5djQlI"
541 |    },
542 |    "source": [
543 |     "To view our reconstruction we load the model from disk:"
544 |    ]
545 |   },
546 |   {
547 |    "cell_type": "code",
548 |    "execution_count": null,
549 |    "metadata": {
550 |     "id": "-mWQko-gjT8w"
551 |    },
552 |    "outputs": [],
553 |    "source": [
554 |     "model = BundleMLP.load_from_checkpoint(\"checkpoints/dragon-test/last.ckpt\", device=\"cuda\")"
555 |    ]
556 |   },
557 |   {
558 |    "cell_type": "code",
559 |    "execution_count": null,
560 |    "metadata": {
561 |     "id": "qcG0vZwfjW4A"
562 |    },
563 |    "outputs": [],
564 |    "source": [
565 |     "# move model components to GPU\n",
566 |     "model = model.eval()\n",
567 |     "model = model.to('cuda')\n",
568 |     "model.rgb_volume = model.rgb_volume.to('cuda')\n",
569 |     "model.processed_rgb_volume = model.processed_rgb_volume.to('cuda')\n",
570 |     "model.model_rotation = model.model_rotation.to('cuda')\n",
571 |     "model.model_translation = model.model_translation.to('cuda')\n",
572 |     "model.reference_intrinsics = model.reference_intrinsics.to('cuda')\n",
573 |     "model.model_rotation.reference_rotation = model.model_rotation.reference_rotation.to('cuda')\n",
574 |     "\n",
575 |     "# use all encoding levels for inference\n",
576 |     "model.mask = torch.ones_like(model.mask)"
577 |    ]
578 |   },
579 |   {
580 |    "cell_type": "markdown",
581 |    "metadata": {
582 |     "id": "pPbUQXv0jaTa"
583 |    },
584 |    "source": [
585 |     "And use `model.generate_outputs` to generate the outputs:"
586 |    ]
587 |   },
588 |   {
589 |    "cell_type": "code",
590 |    "execution_count": null,
591 |    "metadata": {
592 |     "id": "F9uc5Rnyjgap"
593 |    },
594 |    "outputs": [],
595 |    "source": [
596 |     "rgb, rgb_raw, rgb_processed, depth, depth_img = model.generate_outputs(frame=0, height=1920, width=1440, u_lims=[0.025,0.975], v_lims=[0.025,0.975])"
597 |    ]
598 |   },
599 |   {
600 |    "cell_type": "markdown",
601 |    "metadata": {
602 |     "id": "nvxRR70Sjksj"
603 |    },
604 |    "source": [
605 |     "Outputs:\n",
606 |     "1. `rgb` : color values I(u,v) output by implicit image model\n",
607 |     "2. `rgb_raw` : corresponding sampled values from bayer RAW volume\n",
608 |     "3. `rgb_processed` : corresponding sampled values from processed RGB volume\n",
609 |     "4. `depth` : depth values D(u,v) from shakes-on-a-plane implicit depth model\n",
610 |     "5. `depth_img` : same as `depth` but with colormap applied for tensorboard visualization"
611 |    ]
612 |   },
613 |   {
614 |    "cell_type": "code",
615 |    "execution_count": null,
616 |    "metadata": {
617 |     "id": "C-uH6IaejnEV"
618 |    },
619 |    "outputs": [],
620 |    "source": [
621 |     "fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(14, 8))\n",
622 |     "axes[0].imshow((rgb.permute(1,2,0).cpu()).clip(0,1)) # increase brightness\n",
623 |     "axes[0].set_title(\"Reconstructed Image I(u,v)\")\n",
624 |     "axes[1].imshow(rgb_processed.permute(1,2,0).cpu())\n",
625 |     "axes[1].set_title(\"Processed RGB\")\n",
626 |     "axes[2].imshow(depth.cpu(), cmap=\"RdYlBu\")\n",
627 |     "axes[2].set_title(\"Reconstructed Depth D(u,v)\")\n",
628 |     "plt.show()"
629 |    ]
630 |   },
631 |   {
632 |    "cell_type": "markdown",
633 |    "metadata": {
634 |     "id": "su3XUolojxfm"
635 |    },
636 |    "source": [
637 |     "### Section 3: Training on PNGs\n",
638 |     "This section is almost identical to the previous one, except we will learn how to convert a stack of `PNGs` into a `frame_bundle.npz` before fitting our model to it."
639 |    ]
640 |   },
641 |   {
642 |    "cell_type": "code",
643 |    "execution_count": null,
644 |    "metadata": {
645 |     "id": "TJ0UL1GYkIaj"
646 |    },
647 |    "outputs": [],
648 |    "source": [
649 |     "import numpy as np\n",
650 |     "import matplotlib.pyplot as plt\n",
651 |     "import utils.utils as utils\n",
652 |     "from glob import glob\n",
653 |     "from train import *"
654 |    ]
655 |   },
656 |   {
657 |    "cell_type": "markdown",
658 |    "metadata": {
659 |     "id": "VWi3iakjkhvE"
660 |    },
661 |    "source": [
662 |     "You can replace the code below with any filetype (e.g., load an MP4 with OpenCV), as long as `imgs` is a `NxHxWxC` array, where N is the number of frames."
663 |    ]
664 |   },
665 |   {
666 |    "cell_type": "code",
667 |    "execution_count": null,
668 |    "metadata": {
669 |     "id": "8NosYD8BkU_8"
670 |    },
671 |    "outputs": [],
672 |    "source": [
673 |     "imgs = sorted(glob(\"data/demo/dragon-rgb/*.png\")) # change file extension to match your filetypes\n",
674 |     "imgs = np.array([plt.imread(img)[:,:,:3] for img in imgs]) # remove alpha channel and load\n",
675 |     "\n",
676 |     "print(\"Number of images: \", len(imgs))\n",
677 |     "# plot first image, last image\n",
678 |     "fig, ax = plt.subplots(1,2, figsize=(10,5))\n",
679 |     "ax[0].imshow(imgs[0])\n",
680 |     "ax[0].set_title(\"Image 0\")\n",
681 |     "ax[1].imshow(imgs[-1])\n",
682 |     "ax[1].set_title(f\"Image {len(imgs)-1}\")\n",
683 |     "plt.show()"
684 |    ]
685 |   },
686 |   {
687 |    "cell_type": "markdown",
688 |    "metadata": {
689 |     "id": "XLyPF5h3k8Qe"
690 |    },
691 |    "source": [
692 |     "For our projective camera model to work we'll need to supply it with [camera intrinsics](https://en.wikipedia.org/wiki/Camera_matrix). Here we'll assume we don't have and calibrated intrinsics and will have to create our own.\n",
693 |     "\n",
694 |     "We'll set the camera centers `cx` and `cy` to be the center of the image:"
695 |    ]
696 |   },
697 |   {
698 |    "cell_type": "code",
699 |    "execution_count": null,
700 |    "metadata": {
701 |     "id": "sy72i80WkX5z"
702 |    },
703 |    "outputs": [],
704 |    "source": [
705 |     "cy = imgs.shape[1] // 2 # set centers to the middle of the image\n",
706 |     "cx = imgs.shape[2] // 2\n",
707 |     "print(\"Center y: \", cy, \"\\nCenter x: \", cx)"
708 |    ]
709 |   },
710 |   {
711 |    "cell_type": "markdown",
712 |    "metadata": {
713 |     "id": "y4KSGYJzlZl1"
714 |    },
715 |    "source": [
716 |     "If we don't know the focal length of the camera, we can use a best guess of its FOV (around 70 degrees for a standard phone camera) to calculate it:"
717 |    ]
718 |   },
719 |   {
720 |    "cell_type": "code",
721 |    "execution_count": null,
722 |    "metadata": {
723 |     "id": "wlZVRRrplY_i"
724 |    },
725 |    "outputs": [],
726 |    "source": [
727 |     "focal = min(cx, cy)/np.tan(70 * (np.pi/180/2)) # 70 degree field of view\n",
728 |     "print(\"Focal length (pixels): \", focal)"
729 |    ]
730 |   },
731 |   {
732 |    "cell_type": "code",
733 |    "execution_count": null,
734 |    "metadata": {
735 |     "id": "3-P3IkZ2lbtF"
736 |    },
737 |    "outputs": [],
738 |    "source": [
739 |     "intrinsics = np.array([[focal, 0, 0],\n",
740 |     "                       [0, focal, 0],\n",
741 |     "                       [cx, cy, 1]])"
742 |    ]
743 |   },
744 |   {
745 |    "cell_type": "markdown",
746 |    "metadata": {
747 |     "id": "awjacroSlgeq"
748 |    },
749 |    "source": [
750 |     "These and the images are all we need to make our custom frame bundle, which we save to the same folder as the input data:"
751 |    ]
752 |   },
753 |   {
754 |    "cell_type": "code",
755 |    "execution_count": null,
756 |    "metadata": {
757 |     "id": "97_QQvqhle9E"
758 |    },
759 |    "outputs": [],
760 |    "source": [
761 |     "rgb_bundle = {}\n",
762 |     "for i in range(len(imgs)):\n",
763 |     "    rgb = {\"rgb\": imgs[i], \"intrinsics\": intrinsics, \"height\": imgs.shape[2], \"width\": imgs.shape[1]}\n",
764 |     "    rgb_bundle[f'rgb_{i}'] = rgb\n",
765 |     "rgb_bundle['num_rgb_frames'] = len(imgs)\n",
766 |     "rgb_bundle['num_raw_frames'] = 0\n",
767 |     "rgb_bundle['num_depth_frames'] = 0\n",
768 |     "rgb_bundle['motion'] = None\n",
769 |     "np.savez('data/demo/dragon-rgb/frame_bundle.npz', **rgb_bundle)"
770 |    ]
771 |   },
772 |   {
773 |    "cell_type": "markdown",
774 |    "metadata": {
775 |     "id": "HfxU3mMQlpWv"
776 |    },
777 |    "source": [
778 |     "Now we can train our model as before:"
779 |    ]
780 |   },
781 |   {
782 |    "cell_type": "code",
783 |    "execution_count": null,
784 |    "metadata": {
785 |     "id": "JjRx9O-Jlm1K"
786 |    },
787 |    "outputs": [],
788 |    "source": [
789 |     "%load_ext tensorboard\n",
790 |     "%tensorboard --logdir lightning_logs"
791 |    ]
792 |   },
793 |   {
794 |    "cell_type": "markdown",
795 |    "metadata": {
796 |     "id": "Qv-Ss6PBl6hS"
797 |    },
798 |    "source": [
799 |     "However we now have to add flags `--no_device_rotations`, `--no_phone_depth`, and `--no_raw` to let the training code know that we're only passing in RGB data and nothing else."
800 |    ]
801 |   },
802 |   {
803 |    "cell_type": "code",
804 |    "execution_count": null,
805 |    "metadata": {
806 |     "id": "O-RQ4woClzLv"
807 |    },
808 |    "outputs": [],
809 |    "source": [
810 |     "# only run to 30 epochs to save time, remove the flag to run for default 100 epochs\n",
811 |     "!python3 train.py --name dragon-rgb-test --bundle_path data/demo/dragon-rgb/frame_bundle.npz --max_epochs 30 --no_device_rotations --no_phone_depth --no_raw"
812 |    ]
813 |   },
814 |   {
815 |    "cell_type": "markdown",
816 |    "metadata": {
817 |     "id": "Qv85okr3lzL5"
818 |    },
819 |    "source": [
820 |     "To view our reconstruction we load the model from disk:"
821 |    ]
822 |   },
823 |   {
824 |    "cell_type": "code",
825 |    "execution_count": null,
826 |    "metadata": {
827 |     "id": "ZQCjH4B_lzL5"
828 |    },
829 |    "outputs": [],
830 |    "source": [
831 |     "model = BundleMLP.load_from_checkpoint(\"checkpoints/dragon-rgb-test/last.ckpt\", device=\"cuda\")"
832 |    ]
833 |   },
834 |   {
835 |    "cell_type": "code",
836 |    "execution_count": null,
837 |    "metadata": {
838 |     "id": "vUr4jWsTlzL5"
839 |    },
840 |    "outputs": [],
841 |    "source": [
842 |     "# move model components to GPU\n",
843 |     "model = model.eval()\n",
844 |     "model = model.to('cuda')\n",
845 |     "model.rgb_volume = model.rgb_volume.to('cuda')\n",
846 |     "model.processed_rgb_volume = model.processed_rgb_volume.to('cuda')\n",
847 |     "model.model_rotation = model.model_rotation.to('cuda')\n",
848 |     "model.model_translation = model.model_translation.to('cuda')\n",
849 |     "model.reference_intrinsics = model.reference_intrinsics.to('cuda')\n",
850 |     "# model.model_rotation.reference_rotation = model.model_rotation.reference_rotation.to('cuda') # doesnt exist\n",
851 |     "\n",
852 |     "# use all encoding levels for inference\n",
853 |     "model.mask = torch.ones_like(model.mask)"
854 |    ]
855 |   },
856 |   {
857 |    "cell_type": "markdown",
858 |    "metadata": {
859 |     "id": "BEg6nQvGlzL5"
860 |    },
861 |    "source": [
862 |     "And use `model.generate_outputs` to generate the outputs:"
863 |    ]
864 |   },
865 |   {
866 |    "cell_type": "code",
867 |    "execution_count": null,
868 |    "metadata": {
869 |     "id": "E_EQO9S1lzL5"
870 |    },
871 |    "outputs": [],
872 |    "source": [
873 |     "rgb, rgb_raw, rgb_processed, depth, depth_img = model.generate_outputs(frame=0, height=1920, width=1440, u_lims=[0.025,0.975], v_lims=[0.025,0.975])"
874 |    ]
875 |   },
876 |   {
877 |    "cell_type": "markdown",
878 |    "metadata": {
879 |     "id": "H2e610IylzL5"
880 |    },
881 |    "source": [
882 |     "Outputs:\n",
883 |     "1. `rgb` : color values I(u,v) output by implicit image model\n",
884 |     "2. `rgb_raw` : corresponding sampled values from bayer RAW volume\n",
885 |     "3. `rgb_processed` : corresponding sampled values from processed RGB volume\n",
886 |     "4. `depth` : depth values D(u,v) from shakes-on-a-plane implicit depth model\n",
887 |     "5. `depth_img` : same as `depth` but with colormap applied for tensorboard visualization"
888 |    ]
889 |   },
890 |   {
891 |    "cell_type": "code",
892 |    "execution_count": null,
893 |    "metadata": {
894 |     "id": "OdliAs7blzL5"
895 |    },
896 |    "outputs": [],
897 |    "source": [
898 |     "fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(14, 8))\n",
899 |     "axes[0].imshow((rgb.permute(1,2,0).cpu()).clip(0,1)) # increase brightness\n",
900 |     "axes[0].set_title(\"Reconstructed Image I(u,v)\")\n",
901 |     "axes[1].imshow(rgb_processed.permute(1,2,0).cpu())\n",
902 |     "axes[1].set_title(\"Processed RGB\")\n",
903 |     "axes[2].imshow(depth.cpu(), cmap=\"RdYlBu\")\n",
904 |     "axes[2].set_title(\"Reconstructed Depth D(u,v)\")\n",
905 |     "plt.show()"
906 |    ]
907 |   }
908 |  ],
909 |  "metadata": {
910 |   "accelerator": "GPU",
911 |   "colab": {
912 |    "collapsed_sections": [
913 |     "4J5E1CJukt0H",
914 |     "GB_OzWSbcd83",
915 |     "GWrA79XSeTJ4",
916 |     "su3XUolojxfm"
917 |    ],
918 |    "gpuType": "T4",
919 |    "provenance": []
920 |   },
921 |   "kernelspec": {
922 |    "display_name": "Python 3",
923 |    "name": "python3"
924 |   },
925 |   "language_info": {
926 |    "codemirror_mode": {
927 |     "name": "ipython",
928 |     "version": 3
929 |    },
930 |    "file_extension": ".py",
931 |    "mimetype": "text/x-python",
932 |    "name": "python",
933 |    "nbconvert_exporter": "python",
934 |    "pygments_lexer": "ipython3",
935 |    "version": "3.10.9"
936 |   }
937 |  },
938 |  "nbformat": 4,
939 |  "nbformat_minor": 0
940 | }
941 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | import torch
  4 | 
  5 | def de_casteljau(betas, t):
  6 |     """ castle interpolation, for knights
  7 |         see: https://en.wikipedia.org/wiki/De_Casteljau%27s_algorithm
  8 |         assumes t in [0,1]
  9 |     """
 10 |     t = t[None,None,:,0] # 1,1,T
 11 |     
 12 |     out = betas.clone()
 13 |     N = betas.shape[0] # number of points
 14 |     for i in range(1, N):
 15 |         out = out[:-1,:] * (1-t) + out[1:,:] * t
 16 |     return out.squeeze(0).permute(1,0)
 17 | 
 18 | def uvz_to_xyz(uvz, intrinsics, img_width, img_height):
 19 |     """ Get xyz coordinates in meters from uv coordinates [0-1]
 20 |      iPhone poses are right-handed system, +x is right towards power button, +y is up towards front camera, +z is towards user's face
 21 |      images are opencv convention right-handed, x to the right, y down, and z into the world (away from face)
 22 |     """
 23 |     u = uvz[:,0:1] * img_width
 24 |     v = uvz[:,1:2] * img_height
 25 |     z = uvz[:,2:3]
 26 |     
 27 |     # intrinsics are for landscape sensor, top row: y, middle row: x, bottom row: z
 28 |     fy, cy, fx, cx = intrinsics[:,0,0,None], intrinsics[:,2,0,None], intrinsics[:,1,1,None], intrinsics[:,2,1,None]
 29 |     
 30 |     x = (u - cx) * (z/fx)
 31 |     y = (v - cy) * (z/fy)
 32 | 
 33 |     # rotate around the camera's x-axis by 180 degrees
 34 |     # now point cloud is in y up and z towards face convention
 35 |     y = -y
 36 |     z = -z
 37 |                      
 38 |     # match pose convention (y,x,z)
 39 |     return torch.cat((x,y,z), dim=1)
 40 | 
 41 | def xyz_to_uvz(uvz, intrinsics, img_width, img_height):
 42 |     """ Get uv coordinates [0-1] from coordinates rays in meters
 43 |     """    
 44 |     fy, cy, fx, cx = intrinsics[:,0,0,None], intrinsics[:,2,0,None], intrinsics[:,1,1,None], intrinsics[:,2,1,None]
 45 |     x, y, z = uvz[:,0:1], uvz[:,1:2], uvz[:,2:3]
 46 |     
 47 |     # undo rotation from convert_px_rays_to_m
 48 |     y = -y
 49 |     z = -z
 50 |     
 51 |     u  = (x * (fx/z) + cx) / img_width
 52 |     v =  (y * (fy/z) + cy) / img_height
 53 |     
 54 |     return torch.cat((u,v,z), dim=1)
 55 | 
 56 | def convert_quaternions_to_rot(quaternions):
 57 |     """ Convert quaternions (xyzw) to 3x3 rotation matrices.
 58 |         Adapted from: https://automaticaddison.com/how-to-convert-a-quaternion-to-a-rotation-matrix
 59 |     """
 60 | 
 61 |     qx, qy, qz, qw = quaternions[:,0], quaternions[:,1], quaternions[:,2], quaternions[:,3]
 62 |     
 63 |     R00 = 2 * ((qw * qw) + (qx * qx)) - 1
 64 |     R01 = 2 * ((qx * qy) - (qw * qz))
 65 |     R02 = 2 * ((qx * qz) + (qw * qy))
 66 |      
 67 |     R10 = 2 * ((qx * qy) + (qw * qz))
 68 |     R11 = 2 * ((qw * qw) + (qy * qy)) - 1
 69 |     R12 = 2 * ((qy * qz) - (qw * qx))
 70 |      
 71 |     R20 = 2 * ((qx * qz) - (qw * qy))
 72 |     R21 = 2 * ((qy * qz) + (qw * qx))
 73 |     R22 = 2 * ((qw * qw) + (qz * qz)) - 1
 74 |      
 75 |     R = torch.stack([R00, R01, R02, R10, R11, R12, R20, R21, R22], dim=-1)
 76 |     R = R.reshape(-1,3,3)
 77 |                
 78 |     return R
 79 | 
 80 | def multi_interp(x, xp, fp):
 81 |     """ Simple extension of np.interp for independent
 82 |         linear interpolation of all axes of fp
 83 |     """
 84 |     if torch.is_tensor(fp):
 85 |         out = [torch.tensor(np.interp(x, xp, fp[:,i]), dtype=fp.dtype) for i in range(fp.shape[-1])]
 86 |         return torch.stack(out, dim=-1)
 87 |     else:
 88 |         out = [np.interp(x, xp, fp[:,i]) for i in range(fp.shape[-1])]
 89 |         return np.stack(out, axis=-1)
 90 | 
 91 | def raw_to_rgb(raw_frames):
 92 |     """ Convert RAW mosaic into three-channel RGB volume
 93 |         by only in-filling empty pixels.
 94 |         Returns volume of shape: (T, C, H, W)
 95 |     """ 
 96 |     
 97 |     B = raw_frames[:,:,0::2,1::2].float()
 98 |     G1 = raw_frames[:,:,0::2,0::2].float()
 99 |     G2 = raw_frames[:,:,1::2,1::2].float()
100 |     R = raw_frames[:,:,1::2,0::2].float()
101 | 
102 |     # Blue
103 |     B_upsampled = torch.zeros_like(B).repeat(1,1,2,2)
104 |     B_left = torch.roll(B, 1, dims=3)
105 |     B_down = torch.roll(B, -1, dims=2)
106 |     B_diag = torch.roll(B, [-1,1], dims=[2,3])
107 | 
108 |     B_upsampled[:,:,0::2,1::2] = B
109 |     B_upsampled[:,:,0::2,0::2] = (B + B_left)/2
110 |     B_upsampled[:,:,1::2,1::2] = (B + B_down)/2
111 |     B_upsampled[:,:,1::2,0::2] = (B + B_down + B_left + B_diag)/4
112 | 
113 |     # Green
114 |     G_upsampled = torch.zeros_like(G1).repeat(1,1,2,2)
115 |     G1_right = torch.roll(G1, -1, dims=3)
116 |     G1_down = torch.roll(G1, -1, dims=2)
117 | 
118 |     G2_left = torch.roll(G2, 1, dims=3)
119 |     G2_up = torch.roll(G2, 1, dims=2)
120 | 
121 |     G_upsampled[:,:,0::2,0::2] = G1
122 |     G_upsampled[:,:,0::2,1::2] = (G1 + G1_right + G2 + G2_up)/4
123 |     G_upsampled[:,:,1::2,0::2] = (G1 + G1_down + G2 + G2_left)/4
124 |     G_upsampled[:,:,1::2,1::2] = G2
125 |     G_upsampled = G_upsampled
126 | 
127 |     # Red
128 |     R_upsampled = torch.zeros_like(R).repeat(1,1,2,2)
129 |     R_right = torch.roll(R, -1, dims=3)
130 |     R_up = torch.roll(R, 1, dims=2)
131 |     R_diag = torch.roll(R, [1,-1], dims=[2,3])
132 | 
133 |     R_upsampled[:,:,1::2,0::2] = R
134 |     R_upsampled[:,:,1::2,1::2] = (R + R_right)/2
135 |     R_upsampled[:,:,0::2,0::2] = (R + R_up)/2
136 |     R_upsampled[:,:,0::2,1::2] = (R + R_up + R_right + R_diag)/4
137 | 
138 |     rgb_volume = torch.concat([R_upsampled, G_upsampled, B_upsampled], dim=0).permute(1,0,2,3) # T, C, H, W
139 |     
140 |     return rgb_volume
141 |     
142 | def de_item(bundle):
143 |     """ Call .item() on all dictionary items
144 |         removes unnecessary extra dimension
145 |     """
146 | 
147 |     bundle['motion'] = bundle['motion'].item()
148 |     
149 |     if 'num_rgb_frames' not in bundle:
150 |         return # motion bundle
151 |     
152 |     for i in range(bundle['num_rgb_frames']):
153 |         bundle[f'rgb_{i}'] = bundle[f'rgb_{i}'].item()
154 |         
155 |     for i in range(bundle['num_raw_frames']):
156 |         bundle[f'raw_{i}'] = bundle[f'raw_{i}'].item()
157 |         
158 |     for i in range(bundle['num_depth_frames']):
159 |         bundle[f'depth_{i}'] = bundle[f'depth_{i}'].item()
160 |         
161 | def debatch(x):
162 |     """ Collapse batch and channel dimension together
163 |     """
164 | 
165 |     if len(x.shape) <=1:
166 |         raise Exception("This tensor is to small to debatch.")
167 |     elif len(x.shape) == 2:
168 |         return x.reshape(x.shape[0] * x.shape[1])
169 |     else:
170 |         return x.reshape(x.shape[0] * x.shape[1], *x.shape[2:])
171 |     
172 | def colorize_tensor(value, vmin=None, vmax=None, cmap=None, colorbar=False, height=9.6, width=7.2):
173 |     """ Convert tensor to 3 channel RGB array according to colors from cmap
174 |         similar usage as plt.imshow
175 |     """
176 |     assert len(value.shape) == 2 # H x W
177 |     
178 |     fig, ax = plt.subplots(1,1)
179 |     fig.set_size_inches(width,height)
180 |     a = ax.imshow(value.detach().cpu(), vmin=vmin, vmax=vmax, cmap=cmap)
181 |     ax.set_axis_off()
182 |     if colorbar:
183 |         cbar = plt.colorbar(a, fraction=0.05)
184 |         cbar.ax.tick_params(labelsize=30)
185 |     plt.tight_layout()
186 |     plt.close()
187 |     
188 |     # Draw figure on canvas
189 |     fig.canvas.draw()
190 | 
191 |     # Convert the figure to numpy array, read the pixel values and reshape the array
192 |     img = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
193 |     img = img.reshape(fig.canvas.get_width_height()[::-1] + (3,))
194 | 
195 |     # Normalize into 0-1 range for TensorBoard(X). Swap axes for newer versions where API expects colors in first dim
196 |     img = img / 255.0
197 |     
198 |     return torch.tensor(img).permute(2,0,1).float()


--------------------------------------------------------------------------------