├── utils ├── __init__.py ├── __pycache__ │ ├── Camera.cpython-38.pyc │ ├── __init__.cpython-37.pyc │ ├── __init__.cpython-38.pyc │ ├── ArgParser.cpython-37.pyc │ └── ArgParser.cpython-38.pyc ├── render │ ├── __pycache__ │ │ ├── noise.cpython-37.pyc │ │ ├── noise.cpython-38.pyc │ │ ├── openExr.cpython-37.pyc │ │ └── openExr.cpython-38.pyc │ ├── noise.py │ └── openExr.py ├── net │ ├── __pycache__ │ │ ├── basic_layer.cpython-37.pyc │ │ └── basic_layer.cpython-38.pyc │ └── basic_layer.py ├── ArgParser.py └── Camera.py ├── dataset ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-38.pyc │ └── dataset.cpython-38.pyc ├── dataset_generator.py ├── dataset.py └── Generator.py ├── Image_formation ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-38.pyc │ └── renderer.cpython-38.pyc └── renderer.py ├── fisheye_mask.npy ├── checkpoint └── pattern.mat ├── model ├── __pycache__ │ ├── e2e.cpython-38.pyc │ ├── Metasurface.cpython-37.pyc │ ├── Metasurface.cpython-38.pyc │ └── StereoMatching.cpython-38.pyc ├── Metasurface.py ├── e2e.py └── StereoMatching.py ├── calib_results.txt ├── README.md ├── test.py ├── train.py ├── environment.yaml └── LICENSE /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Image_formation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fisheye_mask.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/fisheye_mask.npy -------------------------------------------------------------------------------- /checkpoint/pattern.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/checkpoint/pattern.mat -------------------------------------------------------------------------------- /model/__pycache__/e2e.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/model/__pycache__/e2e.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/Camera.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/utils/__pycache__/Camera.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /dataset/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/dataset/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /dataset/__pycache__/dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/dataset/__pycache__/dataset.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/ArgParser.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/utils/__pycache__/ArgParser.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/ArgParser.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/utils/__pycache__/ArgParser.cpython-38.pyc -------------------------------------------------------------------------------- /model/__pycache__/Metasurface.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/model/__pycache__/Metasurface.cpython-37.pyc -------------------------------------------------------------------------------- /model/__pycache__/Metasurface.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/model/__pycache__/Metasurface.cpython-38.pyc -------------------------------------------------------------------------------- /utils/render/__pycache__/noise.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/utils/render/__pycache__/noise.cpython-37.pyc -------------------------------------------------------------------------------- /utils/render/__pycache__/noise.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/utils/render/__pycache__/noise.cpython-38.pyc -------------------------------------------------------------------------------- /model/__pycache__/StereoMatching.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/model/__pycache__/StereoMatching.cpython-38.pyc -------------------------------------------------------------------------------- /utils/net/__pycache__/basic_layer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/utils/net/__pycache__/basic_layer.cpython-37.pyc -------------------------------------------------------------------------------- /utils/net/__pycache__/basic_layer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/utils/net/__pycache__/basic_layer.cpython-38.pyc -------------------------------------------------------------------------------- /utils/render/__pycache__/openExr.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/utils/render/__pycache__/openExr.cpython-37.pyc -------------------------------------------------------------------------------- /utils/render/__pycache__/openExr.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/utils/render/__pycache__/openExr.cpython-38.pyc -------------------------------------------------------------------------------- /Image_formation/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/Image_formation/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Image_formation/__pycache__/renderer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ches00/360-SL-Metasurface/HEAD/Image_formation/__pycache__/renderer.cpython-38.pyc -------------------------------------------------------------------------------- /dataset/dataset_generator.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from dataset.Generator import Generator 4 | from utils.ArgParser import Argument 5 | 6 | if __name__ == "__main__": 7 | 8 | parser = Argument() 9 | args = parser.parse() 10 | 11 | generator = Generator(args) 12 | generator.gen() 13 | 14 | 15 | -------------------------------------------------------------------------------- /utils/render/noise.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class GaussianNoise: 4 | def __init__(self, mean, stddev, device): 5 | self.mean = mean 6 | self.stddev = stddev 7 | self.device = device 8 | 9 | def sample(self, size): 10 | noise = torch.zeros(size, device=self.device).normal_(self.mean, self.stddev) 11 | return noise -------------------------------------------------------------------------------- /calib_results.txt: -------------------------------------------------------------------------------- 1 | #polynomial coefficients for the DIRECT mapping function (ocam_model.ss in MATLAB). These are used by cam2world 2 | 3 | 5 -1.890172e+02 0.000000e+00 2.564080e-03 -5.912461e-06 2.369445e-08 4 | 5 | #polynomial coefficients for the inverse mapping function (ocam_model.invpol in MATLAB). These are used by world2cam 6 | 7 | 11 266.128731 140.429458 -8.116038 24.186340 7.952007 -2.508722 8.587325 6.530621 -2.919406 -3.251640 -0.658070 8 | 9 | #center: "row" and "column", starting from 0 (C convention) 10 | 11 | 199.772421 320.450876 12 | 13 | #affine parameters "c", "d", "e" 14 | 15 | 1.000124 -0.000228 -0.000155 16 | 17 | #image size: "height" and "width" 18 | 19 | 400 640 20 | 21 | -------------------------------------------------------------------------------- /model/Metasurface.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | 6 | 7 | class Metasurface: 8 | def __init__(self, opt, device): 9 | self.device = device 10 | self.N_theta = opt.N_theta 11 | self.N_alpha = opt.N_alpha 12 | self.N_phase = opt.N_phase 13 | 14 | self.phase = torch.rand(self.N_phase, self.N_phase).to(device) * torch.pi*2 - torch.pi # initialization 15 | 16 | 17 | self.wl = opt.wave_length 18 | self.p = opt.pixel_pitch 19 | 20 | 21 | 22 | def propagate(self): 23 | return torch.abs(torch.fft.fftshift(torch.fft.fft2(torch.fft.fftshift(torch.exp(1j * self.phase))))) ** 2 24 | 25 | def get_phase(self): 26 | return self.phase 27 | 28 | def update_phase(self, new_phase): 29 | self.phase = new_phase 30 | 31 | 32 | -------------------------------------------------------------------------------- /utils/render/openExr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import OpenEXR 3 | import Imath 4 | 5 | def read_exr_as_np(path): 6 | """ Read exr file as numpy array 7 | """ 8 | 9 | f = OpenEXR.InputFile(path) 10 | channels = f.header()['channels'] 11 | 12 | dw = f.header()['dataWindow'] 13 | size = (dw.max.x - dw.min.x + 1, dw.max.y - dw.min.y + 1) 14 | 15 | ch_names = [] 16 | image = np.zeros((size[1], size[0], len(channels) - 1)) 17 | 18 | for i, ch_name in enumerate(channels): 19 | ch_names.append(ch_name) 20 | ch_dtype = channels[ch_name].type 21 | ch_str = f.channel(ch_name, ch_dtype) 22 | 23 | if ch_dtype == Imath.PixelType(Imath.PixelType.FLOAT): 24 | np_dtype = np.float32 25 | else: 26 | np_dtype = np.float16 27 | image_ch = np.fromstring(ch_str, dtype=np_dtype) 28 | image_ch.shape = (size[1], size[0]) 29 | 30 | if ch_name == "A": 31 | continue 32 | else: 33 | image[:, :, 3-i] = image_ch 34 | 35 | return image 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /model/e2e.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | 5 | 6 | 7 | class E2E(nn.Module): 8 | def __init__(self, metasurface, renderer, estimator): 9 | super().__init__() 10 | self.metasurface = metasurface 11 | self.renderer = renderer 12 | self.estimator = estimator 13 | 14 | self.num = 0 15 | 16 | 17 | 18 | 19 | def forward(self, ref_im_list, depth_map_list, occ_im_list, normal_im_list): 20 | synthetic_images, illum_img = self.renderer.render(ref_im_list, depth_map_list, occ_im_list, normal_im_list) 21 | 22 | pred_depth = self.estimator(synthetic_images) 23 | return pred_depth, synthetic_images 24 | 25 | def get_meta_phase(self): 26 | return self.metasurface.get_phase() 27 | 28 | def update_phase(self, new_phase): 29 | self.metasurface.update_phase(new_phase) 30 | 31 | def get_pattern(self): 32 | return self.metasurface.propagate() 33 | 34 | def get_estimator(self): 35 | return self.estimator 36 | 37 | -------------------------------------------------------------------------------- /utils/net/basic_layer.py: -------------------------------------------------------------------------------- 1 | from torch import nn as nn 2 | import torch 3 | #from kornia.filters import box_blur 4 | from collections import namedtuple 5 | from torchvision import models 6 | 7 | 8 | def convbn(in_channel, out_channel, kernel_size, stride, pad, dilation): 9 | return nn.Sequential( 10 | nn.Conv2d( 11 | in_channel, 12 | out_channel, 13 | kernel_size=kernel_size, 14 | stride=stride, 15 | padding=dilation if dilation > 1 else pad, 16 | dilation=dilation), 17 | nn.BatchNorm2d(out_channel)) 18 | 19 | 20 | def convbn_3d(in_channel, out_channel, kernel_size, stride, pad): 21 | return nn.Sequential( 22 | nn.Conv3d( 23 | in_channel, 24 | out_channel, 25 | kernel_size=kernel_size, 26 | padding=pad, 27 | stride=stride), 28 | nn.BatchNorm3d(out_channel)) 29 | 30 | 31 | 32 | class Vgg16(torch.nn.Module): 33 | def __init__(self, requires_grad=False): 34 | super(Vgg16, self).__init__() 35 | vgg_pretrained_features = models.vgg16(pretrained=True).features 36 | self.slice1 = torch.nn.Sequential() 37 | self.slice2 = torch.nn.Sequential() 38 | self.slice3 = torch.nn.Sequential() 39 | self.slice4 = torch.nn.Sequential() 40 | for x in range(4): 41 | self.slice1.add_module(str(x), vgg_pretrained_features[x]) 42 | for x in range(4, 9): 43 | self.slice2.add_module(str(x), vgg_pretrained_features[x]) 44 | for x in range(9, 16): 45 | self.slice3.add_module(str(x), vgg_pretrained_features[x]) 46 | for x in range(16, 23): 47 | self.slice4.add_module(str(x), vgg_pretrained_features[x]) 48 | if not requires_grad: 49 | for param in self.parameters(): 50 | param.requires_grad = False 51 | 52 | def forward(self, X): 53 | h = self.slice1(X) 54 | h_relu1_2 = h 55 | h = self.slice2(h) 56 | h_relu2_2 = h 57 | h = self.slice3(h) 58 | h_relu3_3 = h 59 | h = self.slice4(h) 60 | h_relu4_3 = h 61 | vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3']) 62 | out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3) 63 | return out 64 | 65 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 360° Structured Light with Learned Metasurfaces 2 | 3 | ### [Project Page](https://eschoi.com/360-SL-Metasurface/) | [Paper](https://www.nature.com/articles/s41566-024-01450-x) | [Data](https://doi.org/10.5281/zenodo.11518075) | [Arxiv](https://arxiv.org/abs/2306.13361) 4 | 5 | [Eunsue Choi*](https://eschoi.com), [Gyeongtae Kim*](https://scholar.google.co.kr/citations?user=0rZekfsAAAAJ), [Jooyeong Yun](https://scholar.google.com/citations?user=iw2cTTYAAAAJ), [Yujin Jeon](https://scholar.google.com/citations?user=M9ZnHHoAAAAJ), [Junsuk Rho+](https://sites.google.com/site/junsukrho/), [Seung-Hwan Baek+](https://www.shbaek.com/) 6 | 7 | dataset: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.11518075.svg)](https://doi.org/10.5281/zenodo.11518075) 8 | 9 | 10 | 11 | This repository contains the implementation and supplementary files of the paper: 12 | 13 | **360° Structured Light with Learned Metasurfaces** 14 | 15 | Eunsue Choi *, Gyeongtae Kim *, Jooyeong Yun, Yujin Jeon, Junsuk Rho+, Seung-Hwan Baek+ 16 | 17 | ***Nature Photonics, 2024*** 18 | 19 | 20 | 21 | This code implements a 180° full-space light propagation model for simulating 360° structured light, differentiable fish-eye camera rendering and 360° depth reconstructor in pytorch. These components are optimized end-to-end using machine learning optimizers. 22 | 23 | 24 | 25 | ## Dataset 26 | 27 | We have provided synthetic fish-eye images rendered in blender. 28 | 29 | You can download the train and test datasets from [this link](https://zenodo.org/records/5637679) and place them in the corresponding folders. 30 | 31 | We have also included the fish-eye camera configuration for this dataset in a file named "calib_results.txt" within this repository. 32 | 33 | If you wish to use your own dataset, please replace the configuration file with your calibration results. 34 | 35 | 36 | 37 | ## Training 38 | 39 | To perform end-to-end training (of metasurface and 3D reconstructor) execute the 'train.py': 40 | 41 | ```bash 42 | python train.py 43 | ``` 44 | 45 | Please, refer to the details of the arguments in utils/ArgParser.py 46 | 47 | We have provided several supplementary files for training: 48 | 49 | - synthetic fisheye dataset 50 | - __checkpoint/pattern.mat__: Optimized meta-surface phase map 51 | - __calib_results.txt__ : Calibration result of the dataset 52 | - __fisheye_mask.npy__: Validation mask of the fisheye-camera dataset for the given camera-lens 53 | 54 | If you perform your own end-to-end training, please replace those files with your own. 55 | 56 | 57 | 58 | ## Testing 59 | 60 | Our implementation performs two types of inference: for synthetic images and real-world captures. To perform inference on your images, execute 'test.py': 61 | 62 | ```bash 63 | python test.py 64 | ``` 65 | 66 | Please, refer to the details of the arguments in utils/ArgParser.py 67 | 68 | 69 | 70 | ## Requirements 71 | 72 | This code has been trained and tested on Linux with an Nvidia A6000 GPU with 48GB of VRAM. 73 | 74 | We recommend using a Conda environment. Build a virtual environment based on the YAML configuration file provided in this repository. 75 | 76 | ```bash 77 | conda env create --file environment.yaml 78 | ``` 79 | 80 | 81 | 82 | ## Citation 83 | 84 | If you find our work useful in your research, please cite: 85 | 86 | ``` 87 | TBU 88 | ``` 89 | 90 | 91 | 92 | ## Acknowledgement 93 | 94 | Part of our code is based on the other works: [sphere-stereo](https://github.com/KAIST-VCLAB/sphere-stereo), [OcamCalib](https://sites.google.com/site/scarabotix/ocamcalib-omnidirectional-camera-calibration-toolbox-for-matlab), [Omnimvs](https://github.com/hyu-cvlab/omnimvs-pytorch), and [polka_lines](https://openaccess.thecvf.com/content/CVPR2021/html/Baek_Polka_Lines_Learning_Structured_Illumination_and_Reconstruction_for_Active_Stereo_CVPR_2021_paper.html). 95 | 96 | Our dataset was rendered with 3D object from [ShapeNet](https://shapenet.org/) 97 | -------------------------------------------------------------------------------- /dataset/dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import cv2 3 | import os 4 | import numpy as np 5 | 6 | from utils.render.openExr import read_exr_as_np 7 | 8 | 9 | class CreateSyntheticDataset(torch.utils.data.Dataset): 10 | def __init__(self, path, mode): 11 | self.path = path # 12 | 13 | fisheye_path = os.path.join(self.path, "fisheye") 14 | pano_path = os.path.join(self.path, "pano") 15 | 16 | self.reflectance_path = os.path.join(fisheye_path, "Albedo") 17 | self.depth_path = os.path.join(fisheye_path, "Depth") 18 | self.normal_path = os.path.join(fisheye_path, "Normal") 19 | self.occlusion_path = os.path.join(fisheye_path, "Occlusion") 20 | 21 | self.gt_path = os.path.join(pano_path, "Depth/pano_cam/") 22 | 23 | self.file_list = os.listdir(os.path.join(self.reflectance_path, 'cam1')) 24 | 25 | if 'Thumbs.db' in self.file_list: 26 | self.file_list.remove('Thumbs.db') 27 | 28 | self.N = len(self.file_list) 29 | 30 | def __getitem__(self, i): 31 | 32 | ref_im_list = [] 33 | depth_im_list = [] 34 | occ_im_list = [] 35 | normal_im_list = [] 36 | 37 | idx = self.file_list[i][:-4] 38 | png_name = idx + '.png' 39 | exr_name = idx + '.exr' 40 | #png_name = '%d.png'%(idx) 41 | #exr_name = '%d.exr'%(idx) 42 | 43 | for idx in range(1, 3): 44 | 45 | cam_name = 'cam%d'%(idx) 46 | 47 | ref_im = cv2.imread(os.path.join(self.reflectance_path, cam_name, png_name)) 48 | ref_im_torch = torch.from_numpy(ref_im).float() 49 | ref_im_list.append(ref_im_torch) 50 | 51 | occ_im = cv2.imread(os.path.join(self.occlusion_path, cam_name, png_name)) 52 | occ_im_torch = torch.from_numpy(occ_im)[..., 0].float() # 1 x H x W x 3 53 | occ_im_list.append(occ_im_torch) 54 | 55 | depth_im = read_exr_as_np(os.path.join(self.depth_path, cam_name, exr_name)) 56 | depth_torch = torch.from_numpy(depth_im)[..., 0].float() 57 | depth_im_list.append(depth_torch) 58 | 59 | normal_im = read_exr_as_np(os.path.join(self.normal_path, cam_name, exr_name)) 60 | normal_torch = torch.from_numpy(normal_im).float() 61 | normal_torch[normal_torch.isnan()] = 0. 62 | normal_im_list.append(normal_torch) 63 | 64 | 65 | 66 | input_dict = { 67 | 'ref_im_list': ref_im_list, 68 | 'depth_im_list': depth_im_list, 69 | 'occ_im_list': occ_im_list, 70 | 'normal_im_list': normal_im_list, 71 | #'gt': None, #gt_depth_torch, 72 | 'name': png_name 73 | } 74 | return input_dict 75 | 76 | 77 | def __len__(self): 78 | return self.N 79 | 80 | 81 | 82 | class RealDataset(torch.utils.data.Dataset): 83 | def __init__(self, path, mode): 84 | self.path = path # "matching/ " 85 | 86 | self.cam3_list = os.listdir(os.path.join(self.path, 'cam3')) 87 | self.cam4_list = os.listdir(os.path.join(self.path, 'cam4')) 88 | self.cam3_list.sort() 89 | self.cam4_list.sort() 90 | 91 | 92 | self.N = len(self.cam3_list)#file_list) 93 | 94 | def __getitem__(self, i): 95 | 96 | im_list = [] 97 | 98 | 99 | for idx2 in range(1, 2): 100 | 101 | cam_name = 'cam%d'%(idx2) 102 | if idx2 == 3: 103 | png_name = self.cam3_list[i] 104 | else: 105 | png_name = self.cam4_list[i] 106 | img = cv2.resize(cv2.imread(os.path.join(self.path, cam_name, png_name)), (640, 400)) 107 | img_torch = torch.from_numpy(img) / 255. #term 108 | im_list.append(img_torch ) 109 | 110 | 111 | 112 | input_dict = { 113 | 'img': im_list, 114 | 'name': png_name[:-4] #idx 115 | } 116 | 117 | 118 | return input_dict 119 | 120 | 121 | def __len__(self): 122 | return self.N 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /utils/ArgParser.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | 5 | class Argument: 6 | def __init__(self): 7 | self.parser = argparse.ArgumentParser() 8 | 9 | # for init scene 10 | self.parser.add_argument('--N_scene', type=int, default=2) 11 | self.parser.add_argument('--N_obj', type=int, default=60) 12 | self.parser.add_argument('--max_dist_bg', type=int, default=6) 13 | self.parser.add_argument('--min_dist_bg', type=int, default=4) 14 | 15 | # data directory path6.6 16 | self.parser.add_argument('--abs') 17 | self.parser.add_argument('--obj_path', type=str, default="models-OBJ\\models\\models") 18 | self.parser.add_argument('--tex_path', type=str, default="texture\\models-textures\\textures") 19 | self.parser.add_argument('--save_fisheye_path', type=str, default="/data/fisheye") 20 | self.parser.add_argument('--save_pano_path', type=str, default="/data/pano") 21 | 22 | # resolution for rendering 23 | self.parser.add_argument('--fisheye_resolution_x', type=int, default=640) 24 | self.parser.add_argument('--fisheye_resolution_y', type=int, default=400) 25 | 26 | self.parser.add_argument('--pano_resolution_x', type=int, default=800) 27 | self.parser.add_argument('--pano_resolution_y', type=int, default=400) 28 | 29 | # for Camera calibaration 30 | self.parser.add_argument('--fov', type=float, default=185.0) 31 | self.parser.add_argument('--focal_length', type=float, default=1.80) 32 | self.parser.add_argument('--sensor_width', type=float, default=6.17) # 1/2.3 inch 33 | self.parser.add_argument('--sensor_height', type=float, default=4.55) 34 | self.parser.add_argument('--baseline', type=float, default=0.1) # 10cm 35 | 36 | 37 | # for image formation 38 | self.parser.add_argument('--device', type=str, default="cuda:1") 39 | self.parser.add_argument('--ambient_light_off', type=bool, default=False) 40 | self.parser.add_argument('--noise_gaussian_stddev', type=float, default=2e-2) 41 | self.parser.add_argument('--ambient_power_max', type=float, default=0.6) 42 | self.parser.add_argument('--ambient_power_min', type=float, default=0.6) 43 | self.parser.add_argument('--laser_power_min', type=float, default=1e-1, help='previous default: 5e-1') 44 | self.parser.add_argument('--laser_power_max', type=float, default=1.5e-0, help='previous default: 5e-1') 45 | 46 | self.parser.add_argument('--cam_config_path', type=str, default="./calib_results.txt") 47 | 48 | 49 | # for Metasurface 50 | self.parser.add_argument('--N_phase', type=int, default=1000) 51 | self.parser.add_argument('--N_supercell', type=int, default=10) 52 | self.parser.add_argument('--N_theta', type=int, default=300) 53 | self.parser.add_argument('--N_alpha', type=int, default=100) 54 | self.parser.add_argument('--wave_length', type=float, default=532e-9) # mono-chromatic structured light 55 | self.parser.add_argument('--pixel_pitch', type=float, default=260e-9) # Metasurface pixel pitch 56 | 57 | # 58 | # for stereo matching 59 | self.parser.add_argument('--N_depth_candidate', type=int, default=90) 60 | self.parser.add_argument('--max_depth', type=float, default=5.0) # unit: [m] 61 | self.parser.add_argument('--min_depth', type=float, default=0.3) 62 | 63 | # for optimization 64 | self.parser.add_argument('--lr', type=float, default=3e-4) 65 | self.parser.add_argument('--momentum', type=float, default=0.9) 66 | self.parser.add_argument('--input_path', type=str, default='./data') 67 | self.parser.add_argument('--log', type=str, default="./log/") 68 | self.parser.add_argument('--batch_size', type=int, default=8) 69 | 70 | self.parser.add_argument('--train_path', type=str, default="./data/train") 71 | self.parser.add_argument('--valid_path', type=str, default="./data/test") 72 | 73 | 74 | 75 | # for test 76 | self.parser.add_argument('--num_gpu', type=int, default=1) 77 | self.parser.add_argument('--pattern_path', type=str, default="./checkpoint/pattern.mat") 78 | self.parser.add_argument('--test_path', type=str, default='./data/test') 79 | self.parser.add_argument('--single_img', type=bool, default=True) 80 | self.parser.add_argument('--test_save_path', type=str, default='./log/inference') 81 | self.parser.add_argument('--chk_path', type=str, default='./checkpoint/model.pth') 82 | self.parser.add_argument('--front_right_config', type=str, default='./front_cam.npy') 83 | self.parser.add_argument('--back_right_config', type=str, default='./back_cam.npy') 84 | 85 | 86 | 87 | 88 | 89 | 90 | def parse(self): 91 | return self.parser.parse_args() 92 | 93 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from utils.ArgParser import Argument 4 | from dataset.dataset import * 5 | from model.Metasurface import Metasurface 6 | from utils.Camera import * 7 | from Image_formation.renderer import * 8 | from model.StereoMatching import DepthEstimator 9 | import scipy.io 10 | 11 | 12 | 13 | import matplotlib.pyplot as plt 14 | from torch.autograd import Variable 15 | 16 | import GPUtil, os 17 | from model.e2e import * 18 | 19 | 20 | def grad_loss(output, gt): 21 | def one_grad(shift): 22 | ox = output[:, shift:] - output[:, :-shift] 23 | oy = output[:, :, shift:] - output[:, :, :-shift] 24 | gx = gt[:, shift:] - gt[:, :-shift] 25 | gy = gt[:, :, shift:] - gt[:, :, :-shift] 26 | loss = (ox - gx).abs().mean() + (oy - gy).abs().mean() 27 | return loss 28 | loss = (one_grad(1) + one_grad(2) + one_grad(3)) / 3. 29 | return loss 30 | 31 | 32 | def test(opt, model, dataset_path): 33 | if opt.test_real_scene : 34 | dataset_test = RealDataset(opt.test_path, 'test') 35 | else: 36 | dataset_test = CreateSyntheticDataset(opt.test_path, 'test') 37 | dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size=opt.batch_size, shuffle=False, num_workers=8) 38 | 39 | l1_loss = torch.nn.L1Loss() 40 | l1_loss.requires_grad = False 41 | 42 | device = torch.device(opt.device) 43 | model = model.to(device) 44 | model.load_state_dict(torch.load(opt.chk_path)) 45 | 46 | 47 | 48 | fisheye_mask = torch.from_numpy(np.load("./fisheye_mask.npy")).to(device) 49 | model.eval() 50 | 51 | 52 | if opt.test_real_scene : # captured by your real system 53 | with torch.no_grad(): 54 | for i, data in enumerate(dataloader_test): 55 | 56 | img_list = data['img'] 57 | 58 | for j, img in enumerate(img_list): 59 | img_list[j] = img.to(device) 60 | 61 | result = model.estimator(img_list)[0] 62 | name = data['name'] 63 | 64 | B = result.shape[0] 65 | 66 | # TODO: saving process 67 | for b in range(B): 68 | plt.imsave(os.path.join(opt.test_save_path, name[b] + '.png'), result[b].cpu().numpy(), cmap='inferno') 69 | 70 | 71 | 72 | 73 | else: # synthetic dataset 74 | with torch.no_grad(): 75 | losses = [] 76 | inv_losses = [] 77 | for i, data in enumerate(dataloader_test): 78 | B = opt.batch_size 79 | 80 | ref_im_list = data['ref_im_list'] 81 | depth_map_list = data['depth_im_list'] 82 | occ_im_list = data['occ_im_list'] 83 | normal_im_list = data['normal_im_list'] 84 | 85 | 86 | gt = 1.0 / (depth_map_list[0].to(device).float() * 10) 87 | inv_depth_pred, _ = model(ref_im_list, depth_map_list, occ_im_list, normal_im_list) 88 | 89 | front_l1loss = l1_loss(gt[:, fisheye_mask], inv_depth_pred[0][:, fisheye_mask]) 90 | front_tvloss = grad_loss(gt, inv_depth_pred[0]) 91 | 92 | 93 | # saving predicted inverse depth 94 | #np.save(os.path.join(opt.test_save_path, 'ours_%d.npy'%(i)), inv_depth_pred[0].cpu().numpy()) 95 | 96 | 97 | 98 | 99 | 100 | 101 | if __name__ == "__main__": 102 | parser = Argument() 103 | parser.parser.add_argument('--test_real_scene', type=bool, default=False) 104 | parser.parser.add_argument('--use_extrinsic', type=bool, default=False) 105 | 106 | args = parser.parse() 107 | 108 | device = torch.device(args.device) 109 | 110 | import pickle 111 | metasurface = Metasurface(args, device) 112 | 113 | optimized_phase = scipy.io.loadmat(args.pattern_path)['phasemap'] 114 | metasurface.update_phase(torch.from_numpy(optimized_phase).float().to(device)) 115 | 116 | radian_90 = math.radians(90) 117 | cam1 = FisheyeCam(args, (0.05, 0.05, 0), (radian_90, 0, 0), 'cam1', device, args.cam_config_path) 118 | cam2 = FisheyeCam(args, (-0.05, 0.05, 0), (radian_90, 0, 0), 'cam2', device, args.cam_config_path) 119 | cam_calib = [cam1, cam2] 120 | 121 | if args.use_extrinsic: # for real_scene 122 | 123 | cam2_ext = torch.from_numpy(np.load(args.front_right_config)).to(device).float() 124 | cam4_ext = torch.from_numpy(np.load(args.back_right_config)).to(device).float() 125 | 126 | cam1.set_extrinsic(torch.Tensor([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]).to(device)) 127 | cam3.set_extrinsic(torch.Tensor([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]).to(device)) 128 | cam2.set_extrinsic(cam2_ext) 129 | cam4.set_extrinsic(cam4_ext) 130 | 131 | cam_calib = [cam1, cam2, cam3, cam4] 132 | 133 | 134 | 135 | renderer = ActiveStereoRenderer(args, metasurface, cam_calib, device) 136 | pano_cam = PanoramaCam(args, (0, 0, 0), (radian_90, 0, 0), 'pano', device) 137 | estimator = DepthEstimator(pano_cam, cam_calib, device, args) 138 | 139 | e2e_model = E2E(metasurface, renderer, estimator) 140 | 141 | 142 | test(args, e2e_model, args.input_path) 143 | 144 | -------------------------------------------------------------------------------- /Image_formation/renderer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import numpy as np 4 | import cv2 5 | import os, math 6 | 7 | from utils.Camera import FisheyeCam 8 | from utils.ArgParser import Argument 9 | from utils.render.noise import GaussianNoise 10 | from utils.render.openExr import read_exr_as_np 11 | from model.Metasurface import Metasurface 12 | 13 | 14 | 15 | def img_show(img): 16 | img_np = (img).cpu().numpy() 17 | 18 | cv2.imshow('test', img_np) 19 | cv2.waitKey(0) 20 | cv2.destroyAllWindows() 21 | 22 | 23 | def dir_to_sin(pts, num_cells): 24 | ### pts : B x 3 x N 25 | x = pts[:, 0, :] 26 | y = pts[:, 1, :] 27 | z = pts[:, 2, :] 28 | 29 | 30 | sin_phi = x / torch.sqrt(x**2 + y**2) # N [-1, 1] 31 | sin_theta = -z / torch.norm(pts, dim=1) # N 32 | 33 | u = (sin_phi + 1) * (num_cells/2) 34 | v = (sin_theta +1) * (num_cells/2) 35 | 36 | return (u, v) 37 | 38 | 39 | 40 | class ActiveStereoRenderer: 41 | def __init__(self, opt, metasurface, fisheye_cams, device): 42 | 43 | self.opt = opt 44 | self.device = device 45 | 46 | self.metasurface = metasurface 47 | self.cam_calib = fisheye_cams 48 | 49 | self.resolution_x = opt.fisheye_resolution_x 50 | self.resolution_y = opt.fisheye_resolution_y 51 | 52 | 53 | self.ambient_light_off = opt.ambient_light_off # True or False 54 | self.noise = GaussianNoise(0, opt.noise_gaussian_stddev, self.device) 55 | 56 | def find_pattern(self, pts, pattern): 57 | ### pts : B x (3xN) 58 | ### pattern : n_phase x n_phase 59 | 60 | x = -pts[:, 0, :] 61 | y = pts[:, 2, :] 62 | z = pts[:, 1, :] 63 | 64 | B = x.shape[0] 65 | 66 | n_phase = self.opt.N_phase 67 | wvl = self.opt.wave_length 68 | p = self.opt.pixel_pitch 69 | 70 | norm = torch.sqrt(x**2 + y**2 + z**2) 71 | 72 | fx = (x/norm) / wvl * p 73 | fy = (y/norm) / wvl * p 74 | 75 | dist_factor = 1/(norm**2) 76 | 77 | H, W = self.resolution_y, self.resolution_x 78 | 79 | x_base = fx.reshape(B, H, W) 80 | y_base = fy.reshape(B, H, W) 81 | grid = torch.stack((x_base, y_base), dim=-1) * 2 # B x W x H x 2 82 | 83 | output = F.grid_sample(pattern.repeat(B, 1, 1, 1), grid, mode='bilinear', padding_mode='zeros') 84 | 85 | return output[:, 0, ...] * dist_factor.reshape(B, H, W) # B x H x W 86 | 87 | 88 | 89 | def find_pattern_sin(self, pts, pattern): 90 | resolution = self.opt.N_phase * self.opt.N_supercell * 0.5 91 | u, v = dir_to_sin(pts, resolution) 92 | 93 | pattern_flatten = pattern.flatten().float() 94 | 95 | # grid_sample test 96 | B = pts.shape[0] 97 | H, W = self.resolution_y, self.resolution_x 98 | x_base = u.reshape(B, H, W) / resolution 99 | y_base = v.reshape(B, H, W) / resolution 100 | 101 | grid = torch.stack((x_base, y_base), dim=3) 102 | 103 | output = F.grid_sample(pattern.repeat(B, 1, 1, 1), 2*grid-1, mode='bilinear', padding_mode='zeros') 104 | result = output[:, 0, ...] # B x H x W 105 | 106 | return result 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | def render(self, ref_im_list, depth_map_list, occ_list, normal_list, pattern_fixed=None): 115 | #---------------------------SCENE SETUP----------------------- 116 | # scene 117 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 118 | # cam_1 MetaSurface cam_2 119 | # light source 120 | # cam_3 cam_4 121 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 122 | # scene 123 | num_cameras = len(self.cam_calib) 124 | result = [] 125 | 126 | # make panorama illumination image 127 | pattern_360 = self.metasurface.propagate()**2 128 | if pattern_360.shape[0] > 500: 129 | pattern_360 = torch.nn.functional.interpolate(pattern_360.unsqueeze(0).unsqueeze(0), size=(500, 500), mode='bilinear')[0][0] 130 | 131 | 132 | for i in range(2): 133 | 134 | cam = self.cam_calib[i] 135 | 136 | 137 | ref_im = ref_im_list[i].to(self.device)/255. 138 | depth_map = depth_map_list[i].to(self.device) 139 | occ = occ_list[i].to(self.device)/255. 140 | normal = normal_list[i].to(self.device) 141 | 142 | B = ref_im.shape[0] # batch size 143 | 144 | ambient_power = torch.tensor(np.random.uniform(low=self.opt.ambient_power_min, high=self.opt.ambient_power_max, size=(B, 1, 1)).astype(np.float32)).to(self.device) 145 | laser_power = 8e-8 #1e-7 - ours #5e-8 #tv2 : 8e-8#3e-8#0.8 torch.tensor(np.random.uniform(low=self.opt.laser_power_min, high=self.opt.laser_power_max, size=(B, 1, 1)).astype(np.float32), device=self.opt.device) 146 | 147 | xyz_pts_norm = cam.get_whole_pts().unsqueeze(0) # 1 x 3 x N 148 | xyz_pts = xyz_pts_norm * depth_map.reshape(B, -1).unsqueeze(1) * 10 149 | 150 | w = torch.ones(B, 1, xyz_pts.shape[2]).to(self.device) # B x 1 x N 151 | homo_coord_xyz = torch.cat([xyz_pts, w], dim=1) # B x 4 x N 152 | xyz_pts_o = torch.matmul(cam.get_extrinsic(), homo_coord_xyz.permute(1, 2, 0).reshape(4, -1)) # 153 | 154 | xyz_pts_o = (xyz_pts_o / xyz_pts_o[-1])[:3].reshape(3, -1, B) #3 x N x B 155 | xyz_pts_o = xyz_pts_o 156 | 157 | # Far-field propagation 158 | pattern = self.find_pattern(xyz_pts_o.permute(2, 0, 1), pattern_360).float() 159 | pattern_img = occ * (laser_power * pattern) 160 | pattern_occ = pattern_img + ambient_power # + 0.04 161 | 162 | R_img = ref_im[..., 0] * (ambient_power) # * occ) 163 | G_img = ref_im[..., 1] * pattern_occ + 0.2 # Green laser 164 | B_img = ref_im[..., 2] * (ambient_power) # * occ) 165 | 166 | im_sim = torch.stack([R_img, G_img, B_img], axis=-1) 167 | 168 | 169 | normal_origin = normal.reshape(B, -1, 3) * 2 - 1 # B x (w x H) x 3 170 | dot_result = normal_origin * -xyz_pts_o.permute(2, 1, 0) # B x (w x H) x 3 171 | normal_size = torch.norm(normal_origin, dim=2) 172 | xyz_size = torch.norm(dot_result, dim=2) 173 | normal_size[normal_size==0] = 1 174 | xyz_size[xyz_size==0] = 1 175 | cos_theta = dot_result.sum(dim=2) / (normal_size * xyz_size) 176 | 177 | im_sim_normal = im_sim * cos_theta.reshape(im_sim.shape[:-1]).unsqueeze(-1) # (B, H, W, 3) * (B, H, W, 1 ) 178 | 179 | 180 | noise = self.noise.sample(ref_im.shape) 181 | im_sim_noisy = im_sim_normal + noise 182 | 183 | #sensor clamping 184 | im_sim_noisy_clamped = torch.clamp(im_sim_noisy, min=0, max=1) 185 | result.append(im_sim_noisy_clamped) 186 | 187 | return result, None#illum_img 188 | 189 | 190 | 191 | 192 | 193 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.utils.tensorboard import SummaryWriter 4 | from utils.ArgParser import Argument 5 | from dataset.dataset import CreateSyntheticDataset 6 | from model.Metasurface import Metasurface 7 | from utils.Camera import * 8 | from Image_formation.renderer import * 9 | from model.StereoMatching import DepthEstimator 10 | import scipy.io 11 | 12 | 13 | import matplotlib.pyplot as plt 14 | from torch.autograd import Variable 15 | 16 | import GPUtil, os 17 | from model.e2e import * 18 | 19 | 20 | def grad_loss(output, gt): 21 | def one_grad(shift): 22 | ox = output[:, shift:] - output[:, :-shift] 23 | oy = output[:, :, shift:] - output[:, :, :-shift] 24 | gx = gt[:, shift:] - gt[:, :-shift] 25 | gy = gt[:, :, shift:] - gt[:, :, :-shift] 26 | loss = (ox - gx).abs().mean() + (oy - gy).abs().mean() 27 | return loss 28 | loss = (one_grad(1) + one_grad(2) + one_grad(3)) / 3. 29 | return loss 30 | 31 | 32 | def illum_tv(output): 33 | def one_grad(shift): 34 | ox = output[shift:] - output[ :-shift] 35 | oy = output[ :, shift:] - output[ :, :-shift] 36 | 37 | loss = ox.abs().mean() + oy.abs().mean() 38 | return loss 39 | loss = (one_grad(1) + one_grad(2) + one_grad(3)) / 3. 40 | return loss 41 | 42 | 43 | def train(opt, model, dataset_path): 44 | dataset_train = CreateSyntheticDataset(opt.train_path, 'train') # path 45 | dataset_test = CreateSyntheticDataset(opt.valid_path, 'valid') 46 | dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size=opt.batch_size, shuffle=True, num_workers=4) 47 | dataloader_valid = torch.utils.data.DataLoader(dataset_test, batch_size=opt.batch_size, shuffle=False, num_workers=4) 48 | 49 | meta_phase = torch.autograd.Variable(model.get_meta_phase(), requires_grad=True) 50 | optimizer_meta = torch.optim.Adam([meta_phase], lr=opt.lr) 51 | optimizer_net = torch.optim.Adam(list(model.parameters()), lr=opt.lr) 52 | 53 | scheduler_meta = torch.optim.lr_scheduler.StepLR(optimizer_meta, step_size=200, gamma=0.2) 54 | scheduler_net = torch.optim.lr_scheduler.StepLR(optimizer_net, step_size=350, gamma=0.4) 55 | 56 | 57 | l1_loss = torch.nn.L1Loss() 58 | l1_loss.requires_grad = True 59 | 60 | device = torch.device(opt.device) 61 | model = model.to(device) 62 | 63 | writer = SummaryWriter(log_dir=opt.log) 64 | fisheye_mask = torch.from_numpy(np.load("./fisheye_mask.npy")).to(device) 65 | 66 | 67 | 68 | for epoch in range(1000): 69 | losses = [] 70 | model.train() 71 | # minibatch 72 | for i, data in enumerate(dataloader_train): 73 | B = opt.batch_size 74 | 75 | ref_im_list = data['ref_im_list'] 76 | depth_map_list = data['depth_im_list'] 77 | occ_im_list = data['occ_im_list'] 78 | normal_im_list = data['normal_im_list'] 79 | 80 | # update meta-surface phase 81 | if optimizer_meta: 82 | model.update_phase(meta_phase) 83 | 84 | gt = 1.0 / (depth_map_list[0].to(device).float() * 10) 85 | inv_depth_pred, synthetic_images = model(ref_im_list, depth_map_list, occ_im_list, normal_im_list) 86 | 87 | front_l1loss = l1_loss(gt[:, fisheye_mask], inv_depth_pred[0][:, fisheye_mask]) 88 | front_tvloss = grad_loss(gt, inv_depth_pred[0]) 89 | 90 | # pattern loss 91 | #pattern = model.get_pattern() 92 | #illum = torch.nn.functional.grid_sample(pattern.repeat(1, 1, 1, 1), grid, mode='bilinear', padding_mode='zeros').squeeze(0).squeeze(0) 93 | #illum_loss = 1 / illum_tv(illum / illum.max()) 94 | 95 | 96 | loss = front_l1loss + front_tvloss * 0.4 # + 0.01 * illum_loss 97 | print("{0}th iter : {1}".format(i, loss.item())) 98 | losses.append(loss.item()) 99 | 100 | 101 | if optimizer_meta : 102 | optimizer_meta.zero_grad() 103 | if optimizer_net : 104 | optimizer_net.zero_grad() 105 | loss.backward() 106 | if optimizer_meta : 107 | optimizer_meta.step() 108 | if optimizer_net : 109 | optimizer_net.step() 110 | 111 | 112 | print("[{0}/1000 epoch - Train loss : {1}".format(epoch, sum(losses)/len(losses))) 113 | 114 | 115 | # Test 116 | model.eval() 117 | losses = [] 118 | with torch.no_grad(): 119 | for j, data in enumerate(dataloader_valid): 120 | B = opt.batch_size 121 | 122 | ref_im_list = data['ref_im_list'] 123 | depth_map_list = data['depth_im_list'] 124 | occ_im_list = data['occ_im_list'] 125 | normal_im_list = data['normal_im_list'] 126 | 127 | gt = 1.0 / (depth_map_list[0].to(device).float() * 10) 128 | inv_depth_pred, _ = model(ref_im_list, depth_map_list, occ_im_list, normal_im_list) 129 | 130 | front_l1loss = l1_loss(gt[:, fisheye_mask], inv_depth_pred[0][:, fisheye_mask]) 131 | front_tvloss = grad_loss(gt, inv_depth_pred[0]) 132 | 133 | 134 | 135 | loss = front_l1loss + front_tvloss * 0.4 136 | losses.append(loss.item()) 137 | 138 | print("[{0}/1000 epoch - validation loss : {1}".format(epoch, sum(losses)/len(losses))) 139 | 140 | 141 | 142 | 143 | if epoch % 10 == 0: 144 | torch.save(model.state_dict(), os.path.join(opt.log, "model_epoch_%d.pth"%(epoch))) 145 | np.save(os.path.join(opt.log, "phase_epoch_%d.npy"%(epoch)), meta_phase.detach().cpu().numpy()) 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | if __name__ == "__main__": 156 | parser = Argument() 157 | args = parser.parse() 158 | 159 | device = torch.device(args.device) 160 | 161 | metasurface = Metasurface(args, device) 162 | 163 | 164 | radian_90 = math.radians(90) 165 | 166 | 167 | cam1 = FisheyeCam(args, (0.05, 0.05, 0), (radian_90, 0, 0), 'cam1', device, args.cam_config_path) 168 | cam2 = FisheyeCam(args, (-0.05, 0.05, 0), (radian_90, 0, 0), 'cam2', device, args.cam_config_path) 169 | 170 | 171 | # Front-back / in training time, we just trained cam1-cam2 front system. 172 | cam_calib = [cam1, cam2] 173 | 174 | renderer = ActiveStereoRenderer(args, metasurface, cam_calib, device) 175 | pano_cam = PanoramaCam(args, (0, 0, 0), (radian_90, 0, 0), 'pano', device) 176 | estimator = DepthEstimator(pano_cam, cam_calib, device, args) 177 | 178 | e2e_model = E2E(metasurface, renderer, estimator) 179 | 180 | train(args, e2e_model, args.input_path) 181 | 182 | -------------------------------------------------------------------------------- /model/StereoMatching.py: -------------------------------------------------------------------------------- 1 | import torch, gc 2 | import torch.nn as nn 3 | from utils.Camera import * 4 | from utils.ArgParser import Argument 5 | from utils.render.openExr import read_exr_as_np 6 | 7 | import matplotlib.pyplot as plt 8 | from utils.render.openExr import read_exr_as_np 9 | import torchvision.transforms as T 10 | 11 | from utils.net.basic_layer import convbn, convbn_3d 12 | import torch.nn.functional as F 13 | 14 | 15 | 16 | 17 | class EdgeAwareRefinement(nn.Module): 18 | def __init__(self, in_channel): 19 | super().__init__() 20 | 21 | self.edge_in = nn.Sequential( 22 | convbn(4, 32, kernel_size=3, stride=1, pad=1, dilation=1), 23 | nn.LeakyReLU(negative_slope=0.2, inplace=True) 24 | ) 25 | 26 | self.edge_filter = nn.ModuleList() 27 | for i in range(5): 28 | self.edge_filter.append( 29 | BasicBlock( 30 | 32, 32, stride=1, downsample=None, pad=1, dilation=1 31 | ) 32 | ) 33 | 34 | self.edge_out = nn.Conv2d(32, 1, kernel_size=3, stride=1, padding=1) 35 | 36 | 37 | 38 | self.conv2d_feature = nn.Sequential( 39 | convbn(2, 32, kernel_size=3, stride=1, pad=1, dilation=1), 40 | nn.LeakyReLU(negative_slope=0.2, inplace=True) 41 | ) 42 | 43 | 44 | 45 | self.residual_astrous_blocks = nn.ModuleList() 46 | astrous_list = [1, 2, 3, 4, 3, 2, 1] 47 | for di in astrous_list: 48 | self.residual_astrous_blocks.append( 49 | BasicBlock( 50 | 32, 32, stride=1, downsample=None, pad=1, dilation=di)) 51 | 52 | self.conv2d_out = nn.Conv2d(32, 1, kernel_size=3, stride=1, padding=1) 53 | 54 | def forward(self, low_disparity, corresponding_rgb): 55 | #output = torch.unsqueeze(low_disparity, dim=1) 56 | twice_disparity = F.interpolate( 57 | low_disparity, 58 | size=corresponding_rgb.size()[-2:], 59 | mode='bilinear', 60 | align_corners=False) 61 | 62 | edge_input = self.edge_in(torch.cat([corresponding_rgb, twice_disparity], dim=1)) 63 | 64 | for filter in self.edge_filter: 65 | edge_input = filter(edge_input) 66 | 67 | edge_map = self.edge_out(edge_input) 68 | 69 | 70 | output = self.conv2d_feature( 71 | torch.cat([twice_disparity, edge_map], dim=1)) 72 | 73 | for astrous_block in self.residual_astrous_blocks: 74 | output = astrous_block(output) 75 | 76 | return nn.ReLU(inplace=True)(torch.squeeze( 77 | twice_disparity + self.conv2d_out(output) * 0.01, dim=1)) 78 | 79 | 80 | class BasicBlock(nn.Module): 81 | def __init__(self, in_channel, out_channel, stride, downsample, pad, dilation): 82 | super().__init__() 83 | self.conv1 = nn.Sequential( 84 | convbn(in_channel, out_channel, 3, stride, pad, dilation), 85 | nn.LeakyReLU(negative_slope=0.2, inplace=True)) 86 | 87 | self.downsample = downsample 88 | self.stride = stride 89 | 90 | def forward(self, x): 91 | out = self.conv1(x) 92 | 93 | if self.downsample is not None: 94 | x = self.downsample(x) 95 | 96 | out = x + out 97 | return out 98 | 99 | class FeatureExtractor(nn.Module): 100 | def __init__(self, k, in_channel=3, out_channel=32): 101 | super().__init__() 102 | self.k = k 103 | self.downsample = nn.ModuleList() 104 | self.in_channel = in_channel 105 | self.out_channel = out_channel 106 | 107 | for _ in range(k): 108 | self.downsample.append( 109 | nn.Conv2d( 110 | self.in_channel, 111 | self.out_channel, 112 | kernel_size=5, 113 | stride=2, 114 | padding=2 115 | ) 116 | ) 117 | self.in_channel = self.out_channel 118 | 119 | self.residual_blocks = nn.ModuleList() 120 | for _ in range(6): 121 | self.residual_blocks.append( 122 | BasicBlock(self.out_channel, self.out_channel, stride=1, downsample=None, pad=1, dilation=1) 123 | ) 124 | self.conv_alone = nn.Conv2d(self.out_channel, self.out_channel, kernel_size=3, stride=1, padding=1) 125 | 126 | def forward(self, img): 127 | output = img 128 | for d in self.downsample: 129 | output = d(output) 130 | 131 | for block in self.residual_blocks: 132 | output = block(output) 133 | 134 | return self.conv_alone(output) 135 | 136 | 137 | class DepthEstimator(nn.Module): 138 | def __init__(self, pano_cam, fish_cam, device, opt) : 139 | super().__init__() 140 | self.pano_cam = pano_cam 141 | self.fisheye_cams = fish_cam 142 | 143 | self.r_min = opt.min_depth 144 | self.r_max = opt.max_depth 145 | self.count_candidate = opt.N_depth_candidate # 146 | 147 | self.device = device 148 | 149 | self.fisheye_resolution_x = opt.fisheye_resolution_x 150 | self.fisheye_resolution_y = opt.fisheye_resolution_y 151 | 152 | self.pano_resolution_x = opt.pano_resolution_x 153 | self.pano_resolution_y = opt.pano_resolution_y 154 | 155 | self.num_features = 34 156 | 157 | 158 | self.filter = nn.ModuleList() 159 | for _ in range(7): 160 | self.filter.append( 161 | nn.Sequential( 162 | convbn_3d(self.num_features, self.num_features, kernel_size=3, stride=1, pad=1), 163 | nn.LeakyReLU(negative_slope=0.2, inplace=True) 164 | ) 165 | ) 166 | 167 | self.conv3d_alone = nn.Conv3d(self.num_features, 1, kernel_size=3, stride=1, padding=1) 168 | 169 | self.feature_extractor = FeatureExtractor(2, in_channel=3, out_channel=self.num_features) 170 | self.edge_aware_refinements = EdgeAwareRefinement(4) 171 | 172 | 173 | 174 | 175 | 176 | 177 | # test reference spherical sweeping 178 | def forward(self, img_list): 179 | self.device = img_list[0].device 180 | feature_list = [] 181 | R = self.count_candidate 182 | B = img_list[0].shape[0] 183 | 184 | r_range = 1/torch.linspace(1/self.r_max, 1/self.r_min, R).to(self.device).flip(0) 185 | high_resolution_pred = [] 186 | for img in img_list: 187 | feature = self.feature_extractor(img.permute(0, -1, 1, 2) ** 1/(2.2)) 188 | feature_list.append(feature) 189 | 190 | for i, j in [(0, 1)]: #, (1, 0)] 191 | ref_cam = self.fisheye_cams[i] 192 | target_cam = self.fisheye_cams[j] 193 | 194 | norm_pts = ref_cam.get_whole_pts().unsqueeze(-1) 195 | 196 | sweeping_pts = norm_pts *r_range # 3 x N x R 197 | sweeping_pts = sweeping_pts.reshape(3, -1) 198 | w = torch.ones(sweeping_pts.shape[1]).unsqueeze(0).to(self.device) 199 | pts = torch.cat([sweeping_pts, w]) 200 | 201 | world_pts = torch.matmul(ref_cam.get_extrinsic(), pts) 202 | world_pts = (world_pts/world_pts[-1]) 203 | world_pts = torch.matmul(torch.inverse(target_cam.get_extrinsic()), world_pts)#torch.inverse(target_cam.get_extrinsic()), pts) 204 | world_pts = (world_pts/world_pts[-1])[:3] 205 | 206 | uv = target_cam.world2pixel(world_pts.reshape(3, -1)) 207 | 208 | u = uv[0] 209 | v = uv[1] 210 | 211 | H, W = self.fisheye_resolution_y, self.fisheye_resolution_x 212 | x_base = u.reshape(1, H, W, -1) / self.fisheye_resolution_x 213 | x_base = x_base.permute(0, -1, 1, 2) # B x D x H x W 214 | y_base = v.reshape(1, H, W, -1) / self.fisheye_resolution_y 215 | y_base = y_base.permute(0, -1, 1, 2) # B x D x H x W 216 | z_base = torch.zeros_like(x_base) 217 | 218 | grid = torch.stack((x_base, y_base, z_base), dim=4).repeat(B, 1, 1, 1, 1) # 1 x D x H x W x 3 219 | 220 | 221 | target_img = feature_list[j].unsqueeze(-1) 222 | target_img = target_img.permute(0, 1, -1, 2, 3) 223 | 224 | intensity = torch.nn.functional.grid_sample(target_img, 2*grid-1, mode='bilinear', padding_mode='zeros') # B x C x R x H x W 225 | intensity = torch.nn.functional.interpolate(intensity.reshape(B, -1, H, W), size=(H//4, W//4), mode='bilinear').reshape(B, -1, R, H//4, W//4) 226 | 227 | src_img = feature_list[i].unsqueeze(2) 228 | 229 | cost_volume = torch.abs(src_img - intensity) # B x C x R x H x W 230 | 231 | for f in self.filter: 232 | cost_volume = f(cost_volume) 233 | 234 | # softmax 235 | cost_volume_filtered = self.conv3d_alone(cost_volume).squeeze(1) 236 | 237 | prob = torch.nn.functional.softmax(cost_volume_filtered, dim=1) 238 | expectation = prob * r_range.reshape(1, R, 1, 1) 239 | inv_depth_pred = 1.0 / expectation.sum(dim=1).unsqueeze(1) 240 | result = self.edge_aware_refinements(inv_depth_pred, img_list[i].permute(0, -1, 1, 2)) 241 | high_resolution_pred.append(result) 242 | 243 | return high_resolution_pred 244 | 245 | -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | name: metaPolka 2 | channels: 3 | - pytorch 4 | - nvidia 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - _libgcc_mutex=0.1=conda_forge 9 | - _openmp_mutex=4.5=2_kmp_llvm 10 | - _tflow_select=2.3.0=mkl 11 | - abseil-cpp=20211102.0=hd4dd3e8_0 12 | - absl-py=1.3.0=py38h06a4308_0 13 | - aiohttp=3.8.3=py38h5eee18b_0 14 | - aiosignal=1.2.0=pyhd3eb1b0_0 15 | - asttokens=2.0.5=pyhd3eb1b0_0 16 | - astunparse=1.6.3=py_0 17 | - async-timeout=4.0.2=py38h06a4308_0 18 | - attrs=22.1.0=py38h06a4308_0 19 | - backcall=0.2.0=pyhd3eb1b0_0 20 | - blas=1.0=mkl 21 | - blinker=1.4=py38h06a4308_0 22 | - brotli=1.0.9=h5eee18b_7 23 | - brotli-bin=1.0.9=h5eee18b_7 24 | - brotlipy=0.7.0=py38h27cfd23_1003 25 | - bzip2=1.0.8=h7b6447c_0 26 | - c-ares=1.18.1=h7f8727e_0 27 | - ca-certificates=2022.10.11=h06a4308_0 28 | - cachetools=5.2.0=pyhd8ed1ab_0 29 | - certifi=2022.9.24=py38h06a4308_0 30 | - cffi=1.15.1=py38h5eee18b_2 31 | - charset-normalizer=2.0.4=pyhd3eb1b0_0 32 | - click=8.0.4=py38h06a4308_0 33 | - cryptography=38.0.1=py38h9ce1e76_0 34 | - cuda=11.6.2=0 35 | - cuda-cccl=11.6.55=hf6102b2_0 36 | - cuda-command-line-tools=11.6.2=0 37 | - cuda-compiler=11.6.2=0 38 | - cuda-cudart=11.6.55=he381448_0 39 | - cuda-cudart-dev=11.6.55=h42ad0f4_0 40 | - cuda-cuobjdump=11.6.124=h2eeebcb_0 41 | - cuda-cupti=11.6.124=h86345e5_0 42 | - cuda-cuxxfilt=11.6.124=hecbf4f6_0 43 | - cuda-driver-dev=11.6.55=0 44 | - cuda-gdb=11.8.86=0 45 | - cuda-libraries=11.6.2=0 46 | - cuda-libraries-dev=11.6.2=0 47 | - cuda-memcheck=11.8.86=0 48 | - cuda-nsight=11.8.86=0 49 | - cuda-nsight-compute=11.8.0=0 50 | - cuda-nvcc=11.6.124=hbba6d2d_0 51 | - cuda-nvdisasm=11.8.86=0 52 | - cuda-nvml-dev=11.6.55=haa9ef22_0 53 | - cuda-nvprof=11.8.87=0 54 | - cuda-nvprune=11.6.124=he22ec0a_0 55 | - cuda-nvrtc=11.6.124=h020bade_0 56 | - cuda-nvrtc-dev=11.6.124=h249d397_0 57 | - cuda-nvtx=11.6.124=h0630a44_0 58 | - cuda-nvvp=11.8.87=0 59 | - cuda-runtime=11.6.2=0 60 | - cuda-samples=11.6.101=h8efea70_0 61 | - cuda-sanitizer-api=11.8.86=0 62 | - cuda-toolkit=11.6.2=0 63 | - cuda-tools=11.6.2=0 64 | - cuda-visual-tools=11.6.2=0 65 | - curl=7.86.0=h5eee18b_0 66 | - cycler=0.11.0=pyhd3eb1b0_0 67 | - dataclasses=0.8=pyh6d0b6a4_7 68 | - dbus=1.13.18=hb2f20db_0 69 | - debugpy=1.5.1=py38h295c915_0 70 | - decorator=5.1.1=pyhd3eb1b0_0 71 | - entrypoints=0.4=py38h06a4308_0 72 | - executing=0.8.3=pyhd3eb1b0_0 73 | - expat=2.4.9=h6a678d5_0 74 | - ffmpeg=4.2.2=h20bf706_0 75 | - fftw=3.3.9=h27cfd23_1 76 | - flatbuffers=2.0.0=h2531618_0 77 | - flit-core=3.6.0=pyhd3eb1b0_0 78 | - fontconfig=2.14.1=hef1e5e3_0 79 | - fonttools=4.25.0=pyhd3eb1b0_0 80 | - freetype=2.12.1=h4a9f257_0 81 | - frozenlist=1.3.3=py38h5eee18b_0 82 | - gast=0.4.0=pyhd3eb1b0_0 83 | - gdbm=1.18=hd4cb3f1_4 84 | - gds-tools=1.4.0.31=0 85 | - gettext=0.21.0=hf68c758_0 86 | - giflib=5.2.1=h7b6447c_0 87 | - git=2.34.1=pl5262hc120c5b_0 88 | - glib=2.69.1=he621ea3_2 89 | - gmp=6.2.1=h295c915_3 90 | - gnutls=3.6.15=he1e5248_0 91 | - google-auth=2.15.0=pyh1a96a4e_0 92 | - google-auth-oauthlib=0.4.6=pyhd8ed1ab_0 93 | - google-pasta=0.2.0=pyhd3eb1b0_0 94 | - grpc-cpp=1.46.1=h33aed49_0 95 | - grpcio=1.46.3=py38ha0cdfde_0 96 | - gst-plugins-base=1.14.0=h8213a91_2 97 | - gstreamer=1.14.0=h28cd5cc_2 98 | - h5py=3.7.0=py38h737f45e_0 99 | - hdf5=1.10.6=h3ffc7dd_1 100 | - icu=58.2=he6710b0_3 101 | - idna=3.4=py38h06a4308_0 102 | - imath=3.1.6=h6239696_1 103 | - importlib-metadata=5.2.0=pyha770c72_0 104 | - intel-openmp=2021.4.0=h06a4308_3561 105 | - ipykernel=6.15.2=py38h06a4308_0 106 | - ipython=8.6.0=py38h06a4308_0 107 | - jedi=0.18.1=py38h06a4308_1 108 | - jpeg=9e=h7f8727e_0 109 | - jupyter_client=7.4.7=py38h06a4308_0 110 | - jupyter_core=4.11.2=py38h06a4308_0 111 | - keras=2.10.0=py38h06a4308_0 112 | - keras-preprocessing=1.1.2=pyhd3eb1b0_0 113 | - kiwisolver=1.4.2=py38h295c915_0 114 | - krb5=1.19.2=hac12032_0 115 | - lame=3.100=h7b6447c_0 116 | - lcms2=2.12=h3be6417_0 117 | - ld_impl_linux-64=2.38=h1181459_1 118 | - lerc=3.0=h295c915_0 119 | - libblas=3.9.0=12_linux64_mkl 120 | - libbrotlicommon=1.0.9=h5eee18b_7 121 | - libbrotlidec=1.0.9=h5eee18b_7 122 | - libbrotlienc=1.0.9=h5eee18b_7 123 | - libcblas=3.9.0=12_linux64_mkl 124 | - libclang=10.0.1=default_hb85057a_2 125 | - libcublas=11.11.3.6=0 126 | - libcublas-dev=11.11.3.6=0 127 | - libcufft=10.9.0.58=0 128 | - libcufft-dev=10.9.0.58=0 129 | - libcufile=1.4.0.31=0 130 | - libcufile-dev=1.4.0.31=0 131 | - libcurand=10.3.0.86=0 132 | - libcurand-dev=10.3.0.86=0 133 | - libcurl=7.86.0=h91b91d3_0 134 | - libcusolver=11.4.1.48=0 135 | - libcusolver-dev=11.4.1.48=0 136 | - libcusparse=11.7.5.86=0 137 | - libcusparse-dev=11.7.5.86=0 138 | - libdeflate=1.8=h7f8727e_5 139 | - libedit=3.1.20210910=h7f8727e_0 140 | - libev=4.33=h7f8727e_1 141 | - libevent=2.1.12=h8f2d780_0 142 | - libffi=3.4.2=h6a678d5_6 143 | - libgcc-ng=12.2.0=h65d4601_19 144 | - libgfortran-ng=11.2.0=h00389a5_1 145 | - libgfortran5=11.2.0=h1234567_1 146 | - libidn2=2.3.2=h7f8727e_0 147 | - liblapack=3.9.0=12_linux64_mkl 148 | - libllvm10=10.0.1=hbcb73fb_5 149 | - libnghttp2=1.46.0=hce63b2e_0 150 | - libnpp=11.8.0.86=0 151 | - libnpp-dev=11.8.0.86=0 152 | - libnvjpeg=11.9.0.86=0 153 | - libnvjpeg-dev=11.9.0.86=0 154 | - libopus=1.3.1=h7b6447c_0 155 | - libpng=1.6.37=hbc83047_0 156 | - libpq=12.9=h16c4e8d_3 157 | - libprotobuf=3.20.1=h4ff587b_0 158 | - libsodium=1.0.18=h7b6447c_0 159 | - libssh2=1.10.0=h8f2d780_0 160 | - libstdcxx-ng=12.2.0=h46fd767_19 161 | - libtasn1=4.16.0=h27cfd23_0 162 | - libtiff=4.4.0=hecacb30_2 163 | - libunistring=0.9.10=h27cfd23_0 164 | - libvpx=1.7.0=h439df22_0 165 | - libwebp=1.2.4=h11a3e52_0 166 | - libwebp-base=1.2.4=h5eee18b_0 167 | - libxcb=1.15=h7f8727e_0 168 | - libxkbcommon=1.0.1=hfa300c1_0 169 | - libxml2=2.9.14=h74e7548_0 170 | - libxslt=1.1.35=h4e12654_0 171 | - libzlib=1.2.13=h166bdaf_4 172 | - llvm-openmp=14.0.6=h9e868ea_0 173 | - lz4-c=1.9.3=h295c915_1 174 | - markdown=3.4.1=pyhd8ed1ab_0 175 | - markupsafe=2.1.1=py38h0a891b7_2 176 | - matplotlib=3.5.3=py38h06a4308_0 177 | - matplotlib-base=3.5.3=py38hf590b9c_0 178 | - matplotlib-inline=0.1.6=py38h06a4308_0 179 | - mkl=2021.4.0=h06a4308_640 180 | - mkl-service=2.4.0=py38h7f8727e_0 181 | - mkl_fft=1.3.1=py38hd3c417c_0 182 | - mkl_random=1.2.2=py38h51133e4_0 183 | - multidict=6.0.2=py38h5eee18b_0 184 | - munkres=1.1.4=py_0 185 | - ncurses=6.3=h5eee18b_3 186 | - nest-asyncio=1.5.5=py38h06a4308_0 187 | - nettle=3.7.3=hbbd107a_1 188 | - nsight-compute=2022.3.0.22=0 189 | - nspr=4.33=h295c915_0 190 | - nss=3.74=h0370c37_0 191 | - numpy=1.23.4=py38h14f4228_0 192 | - numpy-base=1.23.4=py38h31eccc5_0 193 | - oauthlib=3.2.2=pyhd8ed1ab_0 194 | - openexr=3.1.5=he0ac6c6_1 195 | - openexr-python=1.3.9=py38h094547f_1 196 | - openh264=2.1.1=h4ff587b_0 197 | - openssl=1.1.1s=h7f8727e_0 198 | - opt_einsum=3.3.0=pyhd3eb1b0_1 199 | - packaging=21.3=pyhd3eb1b0_0 200 | - parso=0.8.3=pyhd3eb1b0_0 201 | - pcre=8.45=h295c915_0 202 | - pcre2=10.37=he7ceb23_1 203 | - perl=5.34.0=h5eee18b_2 204 | - pexpect=4.8.0=pyhd3eb1b0_3 205 | - pickleshare=0.7.5=pyhd3eb1b0_1003 206 | - pillow=9.2.0=py38hace64e9_1 207 | - pip=22.2.2=py38h06a4308_0 208 | - ply=3.11=py38_0 209 | - prompt-toolkit=3.0.20=pyhd3eb1b0_0 210 | - protobuf=3.20.1=py38hfa26641_0 211 | - psutil=5.9.0=py38h5eee18b_0 212 | - ptyprocess=0.7.0=pyhd3eb1b0_2 213 | - pure_eval=0.2.2=pyhd3eb1b0_0 214 | - pyasn1=0.4.8=pyhd3eb1b0_0 215 | - pyasn1-modules=0.2.7=py_0 216 | - pycparser=2.21=pyhd3eb1b0_0 217 | - pygments=2.11.2=pyhd3eb1b0_0 218 | - pyjwt=2.4.0=py38h06a4308_0 219 | - pyopenssl=22.0.0=pyhd3eb1b0_0 220 | - pyparsing=3.0.9=py38h06a4308_0 221 | - pyqt=5.15.7=py38h6a678d5_1 222 | - pyqt5-sip=12.11.0=py38h6a678d5_1 223 | - pysocks=1.7.1=py38h06a4308_0 224 | - python=3.8.15=h7a1cb2a_2 225 | - python-dateutil=2.8.2=pyhd3eb1b0_0 226 | - python-flatbuffers=2.0=pyhd3eb1b0_0 227 | - python_abi=3.8=2_cp38 228 | - pytorch=1.13.0=py3.8_cuda11.6_cudnn8.3.2_0 229 | - pytorch-cuda=11.6=h867d48c_0 230 | - pytorch-mutex=1.0=cuda 231 | - pyu2f=0.1.5=pyhd8ed1ab_0 232 | - pyzmq=23.2.0=py38h6a678d5_0 233 | - qt-main=5.15.2=h327a75a_7 234 | - qt-webengine=5.15.9=hd2b0992_4 235 | - qtwebkit=5.212=h4eab89a_4 236 | - re2=2022.04.01=h295c915_0 237 | - readline=8.2=h5eee18b_0 238 | - requests=2.28.1=py38h06a4308_0 239 | - requests-oauthlib=1.3.1=pyhd8ed1ab_0 240 | - rsa=4.9=pyhd8ed1ab_0 241 | - scipy=1.9.3=py38h8ce737c_2 242 | - setuptools=65.5.0=py38h06a4308_0 243 | - sip=6.6.2=py38h6a678d5_0 244 | - six=1.16.0=pyhd3eb1b0_1 245 | - snappy=1.1.9=h295c915_0 246 | - sqlite=3.40.0=h5082296_0 247 | - stack_data=0.2.0=pyhd3eb1b0_0 248 | - tensorboard=2.10.1=pyhd8ed1ab_0 249 | - tensorboard-data-server=0.6.1=py38h52d8a92_0 250 | - tensorboard-plugin-wit=1.8.1=py38h06a4308_0 251 | - tensorflow=2.10.0=mkl_py38hd2379f1_0 252 | - tensorflow-base=2.10.0=mkl_py38hb9daa73_0 253 | - tensorflow-estimator=2.10.0=py38h06a4308_0 254 | - termcolor=2.1.0=py38h06a4308_0 255 | - tk=8.6.12=h1ccaba5_0 256 | - toml=0.10.2=pyhd3eb1b0_0 257 | - torchaudio=0.13.0=py38_cu116 258 | - torchvision=0.14.0=py38_cu116 259 | - tornado=6.2=py38h5eee18b_0 260 | - traitlets=5.1.1=pyhd3eb1b0_0 261 | - typing_extensions=4.4.0=py38h06a4308_0 262 | - urllib3=1.26.12=py38h06a4308_0 263 | - wcwidth=0.2.5=pyhd3eb1b0_0 264 | - werkzeug=2.2.2=pyhd8ed1ab_0 265 | - wheel=0.37.1=pyhd3eb1b0_0 266 | - wrapt=1.14.1=py38h5eee18b_0 267 | - x264=1!157.20191217=h7b6447c_0 268 | - xz=5.2.8=h5eee18b_0 269 | - yarl=1.8.1=py38h5eee18b_0 270 | - zeromq=4.3.4=h2531618_0 271 | - zipp=3.11.0=pyhd8ed1ab_0 272 | - zlib=1.2.13=h166bdaf_4 273 | - zstd=1.5.2=ha4553b6_0 274 | - pip: 275 | - easydict==1.10 276 | - gputil==1.4.0 277 | - opencv-python==4.6.0.66 278 | - pandas==1.5.2 279 | - pytz==2022.7 280 | - torch-tb-profiler==0.4.0 281 | prefix: /opt/conda/envs/metaPolka 282 | -------------------------------------------------------------------------------- /utils/Camera.py: -------------------------------------------------------------------------------- 1 | #import bpy 2 | import math 3 | import torch, gc 4 | import numpy as np 5 | 6 | class Camera: 7 | def __init__(self, cam_type, fov, focal_length, location, rotation, name, sensor_width, sensor_height, resolution_x, resolution_y, device): 8 | self.type = cam_type # 'Fisheye', 'Pano' 9 | self.fov = fov # camera.angle 10 | self.focal_length = focal_length 11 | self.location = location 12 | self.rotation = rotation 13 | self.name = name 14 | self.device = device 15 | 16 | self.sensor_width = sensor_width 17 | self.sensor_height = sensor_height 18 | 19 | self.resolution_x = resolution_x 20 | self.resolution_y = resolution_y 21 | 22 | self.create_camera() 23 | 24 | def set_device(self, device): 25 | self.device = device 26 | 27 | def set_extrinsic(self, extrinsic): 28 | self.extrinsic = extrinsic 29 | 30 | def create_camera(self): 31 | ''' 32 | camera_data = bpy.data.cameras.new(name=self.name) 33 | camera_obj = bpy.data.objects.new(self.name, camera_data) 34 | self.obj = camera_obj 35 | 36 | self.obj.location = self.location 37 | self.obj.rotation_euler = self.rotation 38 | self.obj.data.type = 'PANO' 39 | 40 | self.obj.data.lens_unit = 'MILLIMETERS' 41 | self.obj.data.angle = self.focal_length 42 | 43 | self.obj.data.sensor_width = self.sensor_width 44 | self.obj.data.sensor_height = self.sensor_height 45 | 46 | self.set_lens_type() 47 | 48 | bpy.context.scene.collection.objects.link(self.obj) 49 | bpy.context.view_layer.update() 50 | ''' 51 | self.extrinsic = None 52 | 53 | ''' 54 | if self.device : 55 | self.extrinsic = torch.Tensor(self.obj.matrix_world).to(self.device) 56 | else: 57 | self.extrinsic = np.zeros((4, 4)) 58 | ''' 59 | 60 | def set_cam_type(self, cam_type): 61 | self.type = cam_type 62 | self.set_lens_type() 63 | 64 | def set_lens_type(self): 65 | if self.type == 'Fisheye': 66 | self.obj.data.cycles.panorama_type = 'FISHEYE_EQUISOLID' 67 | self.obj.data.cycles.fisheye_lens = self.focal_length 68 | 69 | else: 70 | self.obj.data.cycles.panorama_type = 'EQUIRECTANGULAR' 71 | 72 | def get_name(self): 73 | return self.name 74 | 75 | def get_cam_obj(self): 76 | return self.obj 77 | 78 | def get_extrinsic(self): 79 | if self.extrinsic is None: 80 | x, y, z = self.rotation 81 | cos_x, sin_x = math.cos(x), math.sin(x) 82 | Rx = torch.Tensor([[1, 0, 0], [0, cos_x, -sin_x], [0, sin_x, cos_x]]) 83 | cos_y, sin_y = math.cos(y), math.sin(y) 84 | Ry = torch.Tensor([[cos_y, 0, sin_y], [0, 1, 0], [-sin_y, 0, cos_y]]) 85 | cos_z, sin_z = math.cos(z), math.sin(z) 86 | Rz = torch.Tensor([[cos_z, -sin_z, 0], [sin_z, cos_z, 0], [0, 0, 1]]) 87 | 88 | rot = Rz @ Ry @ Rx 89 | t = torch.Tensor(self.location).reshape(3, 1) 90 | 91 | mat = torch.hstack([rot, t]) # 3x4 92 | line = torch.Tensor([[0, 0, 0, 1]]) 93 | mat = torch.vstack([mat, line]) 94 | 95 | if self.device : 96 | mat = mat.to(self.device) 97 | 98 | self.extrinsic = mat 99 | 100 | return self.extrinsic 101 | 102 | def get_whole_pts(self): 103 | return 0 104 | 105 | def whole_pixel2world(self): 106 | return 0 107 | 108 | def pixel2world(self, pixels): 109 | return 0 110 | 111 | def get_resolution(self): 112 | return (self.resolution_x, self.resolution_y) 113 | 114 | 115 | 116 | class FisheyeCam(Camera): 117 | def __init__(self, opt, location, rotation, name, device, config_path): 118 | super().__init__("Fisheye", opt.fov, opt.focal_length, location, rotation, name, opt.sensor_width, opt.sensor_height, opt.fisheye_resolution_x, opt.fisheye_resolution_y, device) 119 | 120 | self.device = device 121 | self.config_path = config_path 122 | self.pts = None 123 | 124 | self.read_config_from_file() 125 | 126 | def update_intrinsic(self, poly_coef, inv_poly_coef, center, affine): 127 | self.poly_coef = poly_coef 128 | self.inv_poly_coef = inv_poly_coef 129 | self.center_x = center[0] 130 | self.center_y = center[1] 131 | self.c, self.d, self.e = affine 132 | 133 | 134 | 135 | def read_config_from_file(self): 136 | 137 | with open(self.config_path) as f: 138 | lines = f.readlines() 139 | 140 | calib_data = [] 141 | for line in lines: 142 | if (line[0] == '#' or line[0] == '\n'): 143 | continue 144 | calib_data.append([float(i) for i in line.split()]) 145 | 146 | # polynopmial coeeficients for the DIRECT mapping function 147 | self.num_poly = int(calib_data[0][0]) 148 | self.poly_coef = torch.tensor(calib_data[0][1:]).to(self.device) 149 | 150 | # polynomial coefficients for the inverse mapping function 151 | self.inv_poly_coef = torch.tensor(calib_data[1][1:])#.to(self.device) 152 | self.num_inv_poly = int(calib_data[1][0]) 153 | 154 | # center: 155 | self.center_x = calib_data[2][1] 156 | self.center_y = calib_data[2][0] 157 | 158 | # affine parameters "c", "d", "e" 159 | self.c, self.d, self.e = calib_data[3] 160 | 161 | 162 | def whole_pixel2world(self): 163 | # torch array of pixels # 2 X N 164 | x = torch.range(0, self.resolution_x-1).to(self.device) 165 | y = torch.range(0, self.resolution_y-1).to(self.device) 166 | grid_x, grid_y = torch.meshgrid(y, x) 167 | pixels = torch.stack((grid_y, grid_x)).reshape(2, -1) # 2 x N 168 | 169 | 170 | self.pts = self.pixel2world(pixels) 171 | 172 | 173 | def get_whole_pts(self): 174 | if self.pts is None: 175 | self.whole_pixel2world() 176 | 177 | return self.pts 178 | 179 | 180 | # unproject pixel coordinate to world (spherical ray) 181 | def pixel2world(self, input_pixels): 182 | """ 183 | pixel2world unprojects a 2D pixel point onto the unit sphere 184 | you can find out the world coordinate point by multiplying depth value 185 | """ 186 | # pixel * 3 187 | 188 | 189 | pixels = (input_pixels) - torch.Tensor([[self.center_x], [self.center_y]]).to(self.device) 190 | 191 | affine_mat = torch.tensor([[self.c, self.d], [self.e, 1.]]).to(self.device) 192 | inv_affine_mat = torch.inverse(affine_mat) 193 | sensor_coord = torch.matmul(inv_affine_mat, pixels) 194 | 195 | r = torch.sqrt(sensor_coord[0]**2 + sensor_coord[1]**2) # N 196 | r_poly = torch.stack([r**i for i in range(self.num_poly)]) 197 | 198 | z = -1 * torch.matmul(self.poly_coef, r_poly) 199 | 200 | z = torch.unsqueeze(z, 0) 201 | 202 | pts = torch.cat([pixels, z], dim=0) 203 | pts_norm = pts / torch.linalg.norm(pts, axis=0) 204 | 205 | rot_axis = torch.Tensor([[1, 0, 0], [0, -1, 0], [0, 0, -1]]).to(self.device) 206 | 207 | result = torch.matmul(rot_axis, pts_norm) 208 | 209 | del r_poly 210 | 211 | return result 212 | 213 | 214 | def world2pixel(self, pts): 215 | #pts : 3xN 216 | # rotate axis (x->x, y->-y, z-> -z) 217 | rot_axis = torch.Tensor([[1, 0, 0], [0, -1, 0], [0, 0, -1]]).to(pts.device) 218 | rot_pts = torch.matmul(rot_axis, pts) 219 | 220 | x = rot_pts[0] 221 | y = rot_pts[1] 222 | z = rot_pts[2] 223 | 224 | norm = torch.hypot(x, y) 225 | theta = torch.arctan2(-z, norm) 226 | 227 | # test 228 | rho = 0 229 | for i in range(self.num_inv_poly): 230 | rho += theta**i * self.inv_poly_coef[i].to(pts.device) 231 | 232 | u = x/norm * rho 233 | v = y/norm * rho 234 | 235 | pixels = torch.stack([u, v]) # 2 x N 236 | 237 | 238 | affine_mat = torch.tensor([[self.c, self.d], [self.e, 1.]]).to(pts.device) 239 | pixel_affine = (torch.matmul(affine_mat, pixels) + torch.Tensor([[self.center_x], [self.center_y]]).to(pts.device)) #/ 3. 240 | 241 | max_theta = math.radians(self.fov) / 2.0 242 | theta = theta + math.pi/2 # zenith angle from optical axis 243 | 244 | valid = torch.ones(theta.shape).to(pts.device) 245 | valid[theta > max_theta] = 0 246 | 247 | valid[pixel_affine[0] >= self.resolution_x] = 0 248 | valid[pixel_affine[0] <0] = 0 249 | valid[pixel_affine[1] >= self.resolution_y] = 0 250 | valid[pixel_affine[1] <0] = 0 251 | 252 | pixel_affine[0][valid == 0] = 0 253 | pixel_affine[1][valid == 0] = 0 254 | 255 | 256 | pixel_result = torch.cat([pixel_affine, valid.unsqueeze(0)]) #valid.unsqueeze(0)]) 257 | 258 | return pixel_result# 3 x N [u, v, valid(valid fov)] 259 | 260 | def world2sensor(self, pts): 261 | pixel = self.world2pixel(pts) 262 | 263 | u = pixel[0] 264 | v = pixel[1] 265 | 266 | u = u - self.center_x 267 | v = v - self.center_y 268 | 269 | u = 2 * u / self.resolution_x 270 | v = 2 * v / self.resolution_y 271 | 272 | pixel[0] = u 273 | pixel[1] = v 274 | 275 | return pixel 276 | 277 | 278 | 279 | 280 | 281 | class PanoramaCam(Camera): 282 | def __init__(self, opt, location, rotation, name, device): 283 | super().__init__("Pano", opt.fov, opt.focal_length, location, rotation, name, opt.sensor_width, opt.sensor_height, opt.pano_resolution_x, opt.pano_resolution_y, device) 284 | 285 | self.device = device 286 | self.pts = None 287 | 288 | 289 | 290 | def whole_pixel2world(self): 291 | # torch array of pixels # 2 X N 292 | x = torch.range(0, self.resolution_x-1).to(self.device) 293 | y = torch.range(0, self.resolution_y-1).to(self.device) 294 | grid_x, grid_y = torch.meshgrid(y, x) 295 | pixels = torch.stack((grid_y, grid_x)).reshape(2, -1) # 2 x N 296 | 297 | self.pts = self.pixel2world(pixels) 298 | 299 | 300 | def get_whole_pts(self): 301 | if self.pts is None: 302 | self.whole_pixel2world() 303 | 304 | return self.pts 305 | 306 | 307 | # unproject pixel coordinate to world (spherical ray) 308 | def pixel2world(self, pixels): 309 | """ 310 | pixel2world unprojects a 2D pixel point onto the unit sphere 311 | you can find out the world coordinate point by multiplying depth value 312 | """ 313 | u = pixels[0] 314 | v = pixels[1] 315 | 316 | # normalized to [-0.5, 0.5] 317 | u_norm = u / self.resolution_x - 0.5 318 | v_norm = v / self.resolution_y - 0.5 319 | 320 | phi = u_norm * 2 * math.pi 321 | theta = v_norm * math.pi 322 | 323 | z = -torch.sin(theta) 324 | 325 | r = torch.cos(theta) 326 | x = r * torch.sin(phi) 327 | y = r * torch.cos(phi) 328 | 329 | 330 | result = torch.stack([x, y, z]) 331 | 332 | return result 333 | 334 | def world2pixel(self, pts): 335 | x = pts[0] 336 | y = pts[1] 337 | z = pts[2] 338 | 339 | 340 | phi = torch.arctan2(x, y) 341 | theta = -torch.arctan(z/torch.hypot(x, y)) 342 | 343 | u_norm = phi/(math.pi*2) + 0.5 344 | v_norm = theta/math.pi + 0.5 345 | 346 | u = u_norm * self.resolution_x 347 | v = v_norm * self.resolution_y 348 | 349 | result = torch.stack([u, v]) 350 | 351 | return result -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /dataset/Generator.py: -------------------------------------------------------------------------------- 1 | import bpy 2 | import os, math, random 3 | 4 | from utils.Camera import * 5 | 6 | class Generator: 7 | def __init__(self, arg): 8 | self.N_scene = arg.N_scene 9 | self.N_obj = arg.N_obj 10 | 11 | self.max_dist_bg = arg.max_dist_bg 12 | self.min_dist_bg = arg.min_dist_bg 13 | 14 | self.obj_path = arg.obj_path 15 | self.tex_path = arg.tex_path 16 | 17 | self.save_fisheye_path = arg.save_fisheye_path 18 | self.save_pano_path = arg.save_pano_path 19 | 20 | self.fisheye_resolution_x = arg.fisheye_resolution_x 21 | self.fisheye_resolution_y = arg.fisheye_resolution_y 22 | 23 | self.pano_resolution_x = arg.pano_resolution_x 24 | self.pano_resolution_y = arg.pano_resolution_y 25 | 26 | self.fov = arg.fov 27 | self.focal_length = arg.focal_length 28 | 29 | self.sensor_width = arg.sensor_width 30 | self.sensor_height = arg.sensor_height 31 | self.baseline = arg.baseline 32 | 33 | self.arg = arg 34 | 35 | 36 | 37 | 38 | def gen(self): 39 | self.scene_init() 40 | 41 | for i in range(self.N_scene): 42 | self.clean_up_scene() 43 | 44 | bg_size = self.create_background() 45 | 46 | if i%2 == 0: 47 | self.create_random_object(bg_size, 'TEXTURE') 48 | else: 49 | self.create_random_object(bg_size, 'NONE') 50 | 51 | 52 | self.render_init() 53 | 54 | # PNG file 55 | self.render('RGB', i) 56 | self.links.clear() 57 | self.render('Albedo', i) 58 | self.links.clear() 59 | self.render('Occlusion', i) 60 | self.links.clear() 61 | 62 | # OpenEXR file 63 | self.scene.render.image_settings.file_format = 'OPEN_EXR' 64 | self.render('Depth', i) 65 | self.links.clear() 66 | self.render('Normal', i) 67 | self.links.clear() 68 | 69 | 70 | def scene_init(self): 71 | self.scene = bpy.context.scene 72 | 73 | # to reduce noise - fixed value 74 | self.scene.cycles.samples = 1000 75 | self.scene.cycles.sample_clamp_indirect = 1.0 76 | 77 | # to use node for rendering 78 | self.scene.use_nodes = True 79 | self.nodes = self.scene.node_tree.nodes 80 | self.links = self.scene.node_tree.links 81 | 82 | 83 | ############################ 84 | ## Camera Setting - share 85 | ############################ 86 | 87 | radians_90 = math.radians(90) 88 | unit = self.baseline / 2 89 | 90 | Camera1 = FisheyeCam(self.arg, (unit, unit, 0),(radians_90, 0, math.radians(-45)),'cam1', None) 91 | Camera2 = FisheyeCam(self.arg, (-unit, unit, 0),(radians_90, 0, math.radians(45)), 'cam2', None) 92 | Camera3 = FisheyeCam(self.arg, (-unit, -unit, 0),(radians_90, 0, math.radians(135)), 'cam3', None) 93 | Camera4 = FisheyeCam(self.arg, (unit, -unit, 0),(radians_90, 0, math.radians(-135)), 'cam4', None) 94 | 95 | Camera_pano = Camera('Pano', self.fov, self.focal_length, (0, 0, 0), (radians_90, 0, 0), 'pano_cam', self.sensor_width, self.sensor_height, self.pano_resolution_x, self.pano_resolution_y, None) 96 | self.cam_list = [Camera1, Camera2, Camera3, Camera4] 97 | self.pano_cam_list = [Camera_pano] 98 | 99 | #for cam in self.cam_list: 100 | #self.scene.collection.objects.link(cam.get_cam_obj()) 101 | 102 | #for cam in self.pano_cam_list: 103 | #self.scene.collection.objects.link(cam.get_cam_obj()) 104 | 105 | ############################ 106 | ## Light Setting - share 107 | ############################ 108 | light = bpy.data.objects['Light'] 109 | #light.location = (1, 3, 2) 110 | light.location = (0, 0, 0) 111 | light.data.shadow_soft_size = 0 112 | #light.data.cycles.cast_shadow = False # doesn't project shadow on the scene 113 | 114 | 115 | def clean_up_scene(self): 116 | 117 | for node in self.nodes: 118 | self.nodes.remove(node) 119 | 120 | for mat in bpy.data.materials: 121 | mat.user_clear() 122 | bpy.data.materials.remove(mat) 123 | 124 | for texture in bpy.data.textures: 125 | texture.user_clear() 126 | bpy.data.textures.remove(texture) 127 | 128 | bpy.ops.object.select_all(action='DESELECT') 129 | 130 | for item in bpy.data.objects: 131 | if item.type == "MESH": 132 | bpy.data.objects[item.name].select_set(True) 133 | bpy.ops.object.delete() 134 | 135 | for item in bpy.data.meshes: 136 | bpy.data.meshes.remove(item) 137 | 138 | 139 | 140 | 141 | def create_background(self): 142 | """ randomly create background (sphere or cube) 143 | 144 | Return: 145 | size : the maximum size of background 146 | 147 | """ 148 | bg_type = random.choice(['SPHERE', 'CUBE']) # randomly choose the background type 149 | size = random.uniform(self.min_dist_bg, self.max_dist_bg) 150 | #size=10 151 | #bg_type = 'CUBE'# 152 | tex_list = os.listdir(self.tex_path) 153 | tex_name = random.choice(tex_list) 154 | imported_tex_path = os.path.join(self.tex_path, tex_name) 155 | 156 | if bg_type == 'SPHERE': 157 | bpy.ops.mesh.primitive_uv_sphere_add(location=(0, 0, 0)) 158 | sphere = bpy.data.objects['Sphere'] 159 | sphere.select_set(True) 160 | bpy.ops.object.shade_smooth() 161 | sphere.scale = (size, size, size) 162 | self.apply_texture_to_object(imported_tex_path, sphere) 163 | 164 | else: 165 | bpy.ops.mesh.primitive_cube_add(location=(0, 0, 0)) 166 | cube = bpy.data.objects['Cube'] 167 | cube.select_set(True) 168 | bpy.ops.object.shade_smooth() 169 | cube.scale = (size, size, size) 170 | self.apply_texture_to_object(imported_tex_path, cube) 171 | 172 | 173 | return size 174 | 175 | 176 | def create_random_object(self, size, type='NONE'): 177 | """ randomly import obj files and put/rotate/resize them on the scene 178 | 179 | Args: 180 | size : the maximum size of the background 181 | type: ['TEXTURE', 'NONE'] TEXTURE - draw a texture image onto object 182 | 183 | """ 184 | obj_list = os.listdir(self.obj_path) 185 | obj_list = [file for file in obj_list if file.endswith(".obj")] 186 | obj_choice = random.sample(obj_list, self.N_obj) 187 | 188 | tex_list = os.listdir(self.tex_path) 189 | tex_choice = random.sample(tex_list, self.N_obj) 190 | 191 | radian_360 = math.radians(360) 192 | 193 | for (obj_name, tex_name) in zip(obj_choice, tex_choice): 194 | prev_objects = set(self.scene.objects.keys()) 195 | 196 | path = os.path.join(self.obj_path, obj_name) 197 | bpy.ops.import_scene.obj(filepath=path) 198 | imported_obj_name = list(set(self.scene.objects.keys()) - prev_objects)[0] 199 | imported_obj = bpy.data.objects[imported_obj_name] 200 | 201 | # object location range [-size, -1.5] and [1.5, size], to guarantee the clear view of the camera (camera is located at (0, 0, 0)) 202 | loc_x, loc_y, loc_z = random.uniform(0.4, 2), random.uniform(0.4, 2), random.uniform(0.4, 2) 203 | imported_obj.location = (random.choice([1, -1])*loc_x, random.choice([1, -1])*loc_y, random.choice([1, -1])*loc_z) 204 | 205 | #mag = math.sqrt(loc_x**2 + loc_y**2 + loc_z**2) 206 | #scale = random.uniform(mag/80, mag/120) 207 | scale_x, scale_y, scale_z = random.uniform(0.8, 2), random.uniform(0.8, 2), random.uniform(0.8, 2) 208 | 209 | imported_obj.scale = (scale_x, scale_y, scale_z) 210 | imported_obj.rotation_euler = (random.uniform(0, radian_360), random.uniform(0, radian_360), random.uniform(0, radian_360)) 211 | 212 | if type == 'TEXTURE': 213 | imported_tex_path = os.path.join(self.tex_path, tex_name) 214 | self.apply_texture_to_object(imported_tex_path, imported_obj) 215 | 216 | 217 | 218 | def apply_texture_to_object(self, tex_path, obj): 219 | """ randomly import image texture and add to the object 220 | 221 | Args: 222 | tex_path : directory path containing texture img files 223 | obj : a target bpy object to use the texture 224 | 225 | """ 226 | # Create material for texture 227 | mat = bpy.data.materials.new(obj.data.name + '_texture') 228 | obj.data.materials.clear() 229 | obj.data.materials.append(mat) 230 | 231 | mat.use_nodes = True 232 | mat_nodes = mat.node_tree.nodes 233 | 234 | bsdf_node = mat_nodes['Principled BSDF'] 235 | mat_out_node = mat_nodes['Material Output'] 236 | 237 | tex_node = mat_nodes.new('ShaderNodeTexImage') 238 | tex_img = bpy.data.images.load(tex_path) 239 | tex_node.image = tex_img 240 | 241 | # Material shading node linking 242 | mat.node_tree.links.new(tex_node.outputs['Color'], bsdf_node.inputs['Base Color']) 243 | mat.node_tree.links.new(bsdf_node.outputs['BSDF'], mat_out_node.inputs['Surface']) 244 | 245 | def render_init(self): 246 | self.scene.render.image_settings.file_format = 'PNG' 247 | self.scene.render.engine = 'CYCLES' 248 | 249 | # using GPU 250 | 251 | self.scene.cycles.device = 'GPU' 252 | bpy.context.preferences.addons['cycles'].preferences.compute_device_type = 'CUDA' 253 | 254 | for device in bpy.context.preferences.addons['cycles'].preferences.devices: 255 | if device.type == 'CUDA': 256 | device.use = True 257 | print("Device '{}' type {} : {}".format(device.name, device.type, device.use)) 258 | 259 | 260 | # rendering image resolution 261 | self.render_layer_node = self.nodes.new('CompositorNodeRLayers') 262 | self.compositor_node = self.nodes.new('CompositorNodeComposite') 263 | self.map_value_node = self.nodes.new('CompositorNodeMapValue') 264 | 265 | self.alpha_node = self.nodes.new("CompositorNodeSetAlpha") 266 | 267 | # for occlusion map 268 | self.math_node = self.nodes.new('CompositorNodeMath') 269 | self.math_node.operation = "GREATER_THAN" 270 | self.math_node.inputs[1].default_value = 0.5 271 | 272 | 273 | # Those values would be paramaterized 274 | self.map_value_node.offset[0] = 0 275 | self.map_value_node.size[0] = 0.1 276 | self.map_value_node.use_min = True 277 | self.map_value_node.use_max = True 278 | self.map_value_node.min[0] = 0.0 279 | self.map_value_node.max[0] = 1. 280 | 281 | # for rendering normal map 282 | # normal map nodes 283 | 284 | self.seperate_RGBA_node = self.nodes.new("CompositorNodeSepRGBA") 285 | self.add_node_R = self.nodes.new("CompositorNodeMath") 286 | self.add_node_R.operation = "ADD" 287 | self.add_node_R.inputs[1].default_value = 1 288 | 289 | self.add_node_G = self.nodes.new("CompositorNodeMath") 290 | self.add_node_G.operation = "ADD" 291 | self.add_node_G.inputs[1].default_value = 1 292 | 293 | self.add_node_B = self.nodes.new("CompositorNodeMath") 294 | self.add_node_B.operation = "ADD" 295 | self.add_node_B.inputs[1].default_value = 1 296 | 297 | self.divide_node_R = self.nodes.new("CompositorNodeMath") 298 | self.divide_node_R.operation = "DIVIDE" 299 | self.divide_node_R.inputs[1].default_value = 2 300 | 301 | self.divide_node_G = self.nodes.new("CompositorNodeMath") 302 | self.divide_node_G.operation = "DIVIDE" 303 | self.divide_node_G.inputs[1].default_value = 2 304 | 305 | self.divide_node_B = self.nodes.new("CompositorNodeMath") 306 | self.divide_node_B.operation = "DIVIDE" 307 | self.divide_node_B.inputs[1].default_value = 2 308 | 309 | self.combine_RBGA_node = self.nodes.new("CompositorNodeCombRGBA") 310 | 311 | 312 | 313 | def render(self, render_type, i_th): 314 | """ 315 | Arg: 316 | render_type: ['RGB', 'Depth', 'Albedo'] 317 | """ 318 | 319 | if render_type == "RGB": 320 | self.links.new(self.render_layer_node.outputs[0], self.compositor_node.inputs[0]) 321 | elif render_type == "Albedo": 322 | bpy.context.view_layer.use_pass_diffuse_color = True 323 | self.links.new(self.render_layer_node.outputs['DiffCol'], self.alpha_node.inputs['Image']) 324 | self.links.new(self.render_layer_node.outputs['Alpha'], self.alpha_node.inputs['Alpha']) 325 | self.links.new(self.alpha_node.outputs['Image'], self.compositor_node.inputs[0]) 326 | elif render_type == "Depth": 327 | bpy.context.view_layer.use_pass_z = True 328 | self.links.new(self.render_layer_node.outputs['Depth'], self.map_value_node.inputs[0]) 329 | self.links.new(self.map_value_node.outputs[0], self.compositor_node.inputs[0]) 330 | elif render_type == "Occlusion": 331 | bpy.context.view_layer.use_pass_shadow = True 332 | self.links.new(self.render_layer_node.outputs['Shadow'], self.math_node.inputs[0]) 333 | self.links.new(self.math_node.outputs[0], self.compositor_node.inputs[0]) 334 | 335 | elif render_type == "Normal": 336 | bpy.context.view_layer.use_pass_normal = True 337 | # R 338 | self.links.new(self.render_layer_node.outputs["Normal"], self.seperate_RGBA_node.inputs[0]) 339 | self.links.new(self.seperate_RGBA_node.outputs['R'], self.add_node_R.inputs["Value"]) 340 | self.links.new(self.add_node_R.outputs["Value"], self.divide_node_R.inputs['Value']) 341 | self.links.new(self.divide_node_R.outputs["Value"], self.combine_RBGA_node.inputs['R']) 342 | self.links.new(self.combine_RBGA_node.outputs[0], self.compositor_node.inputs[0]) 343 | 344 | # G 345 | self.links.new(self.seperate_RGBA_node.outputs['G'], self.add_node_G.inputs["Value"]) 346 | self.links.new(self.add_node_G.outputs["Value"], self.divide_node_G.inputs['Value']) 347 | self.links.new(self.divide_node_G.outputs["Value"], self.combine_RBGA_node.inputs['G']) 348 | self.links.new(self.combine_RBGA_node.outputs[0], self.compositor_node.inputs[0]) 349 | 350 | #B 351 | self.links.new(self.seperate_RGBA_node.outputs['B'], self.add_node_B.inputs["Value"]) 352 | self.links.new(self.add_node_B.outputs["Value"], self.divide_node_B.inputs['Value']) 353 | self.links.new(self.divide_node_B.outputs["Value"], self.combine_RBGA_node.inputs['B']) 354 | self.links.new(self.combine_RBGA_node.outputs[0], self.compositor_node.inputs[0]) 355 | 356 | else: 357 | print("Unsupported function!") 358 | 359 | 360 | self.scene.render.resolution_x = self.fisheye_resolution_x 361 | self.scene.render.resolution_y = self.fisheye_resolution_y 362 | 363 | img_name = '{0:05d}'.format(i_th) 364 | 365 | for cam in self.cam_list: 366 | img_path = os.path.join(render_type, os.path.join(cam.get_name(), img_name)) 367 | self.scene.render.filepath = os.path.join(self.save_fisheye_path, img_path) 368 | self.scene.camera = cam.get_cam_obj() 369 | bpy.ops.render.render(write_still=True) 370 | 371 | 372 | self.scene.render.resolution_x = self.pano_resolution_x 373 | self.scene.render.resolution_y = self.pano_resolution_y 374 | 375 | if render_type == "Occlusion": 376 | return 377 | 378 | for cam in self.pano_cam_list: 379 | img_path = os.path.join(render_type, os.path.join(cam.get_name(), img_name)) 380 | self.scene.render.filepath = os.path.join(self.save_pano_path, img_path) 381 | self.scene.camera = cam.get_cam_obj() 382 | bpy.ops.render.render(write_still=True) 383 | 384 | 385 | 386 | 387 | --------------------------------------------------------------------------------