├── .DS_Store ├── 3dv ├── .DS_Store ├── 3dv-poster.pdf ├── 3dv-poster.png ├── architecture.png ├── data_format.png ├── discriminators.png ├── overview.png ├── qualitative_results.png └── video.gif ├── README.md ├── config.py ├── config_test.py ├── data ├── bin2camera.py ├── depthbin2npy.py └── depthbin2npy_tsdf.py ├── depth-tsdf ├── .DS_Store ├── README.md ├── back-project ├── back-project.cu ├── compile.sh ├── data │ ├── .DS_Store │ ├── camera-intrinsics.txt │ ├── camera │ │ ├── 00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.txt │ │ ├── 00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.txt │ │ ├── 00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.txt │ │ └── 00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.txt │ ├── depth_real_png │ │ ├── 00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.png │ │ ├── 00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.png │ │ ├── 00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.png │ │ └── 00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.png │ ├── depth_rgb_png │ │ ├── 00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.png │ │ ├── 00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.png │ │ ├── 00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.png │ │ └── 00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.png │ ├── depth_tsdf_bin │ │ ├── 00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.bin │ │ ├── 00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.bin │ │ ├── 00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.bin │ │ └── 00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.bin │ ├── depth_tsdf_occluded_npy │ │ ├── 00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.npy │ │ ├── 00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.npy │ │ ├── 00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.npy │ │ └── 00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.npy │ ├── depth_tsdf_ply │ │ ├── 00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.ply │ │ ├── 00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.ply │ │ ├── 00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.ply │ │ └── 00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.ply │ └── origin │ │ ├── 00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.txt │ │ ├── 00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.txt │ │ ├── 00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.txt │ │ └── 00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.txt ├── depth_yida.png ├── run.sh ├── tsdf2mesh.m └── utils.hpp ├── evaluate.py ├── main.py ├── model.py ├── train.py ├── tsdf.ply ├── util.py └── visualization └── voxviz.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/.DS_Store -------------------------------------------------------------------------------- /3dv/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/.DS_Store -------------------------------------------------------------------------------- /3dv/3dv-poster.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/3dv-poster.pdf -------------------------------------------------------------------------------- /3dv/3dv-poster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/3dv-poster.png -------------------------------------------------------------------------------- /3dv/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/architecture.png -------------------------------------------------------------------------------- /3dv/data_format.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/data_format.png -------------------------------------------------------------------------------- /3dv/discriminators.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/discriminators.png -------------------------------------------------------------------------------- /3dv/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/overview.png -------------------------------------------------------------------------------- /3dv/qualitative_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/qualitative_results.png -------------------------------------------------------------------------------- /3dv/video.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/video.gif -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Adversarial Semantic Scene Completion from a Single Depth Image 2 | 3 | ## Authors 4 | **[Yida Wang](https://wangyida.github.io/#about), David Tan, Nassir Navab and [Federico Tombari](http://campar.in.tum.de/Main/FedericoTombari)** 5 | 6 | *International Conference on 3D Vision*, IEEE 7 | 8 | ## Showcase 9 | road condition 10 | 11 | ## Overview 12 | ![](3dv/overview.png) 13 | We introduce a direct reconstruction method to reconstruct from a 2.5D depth image to a 3D voxel data with both shape completion and semantic segmentation that relies on a deep architecture based on 3D VAE with an adversarial training to improve the performance of this task. 14 | 15 | ## Architecture 16 | ![](3dv/architecture.png) 17 | We utilize the latent representation of 3D auto-encoder to help train a latent representation from a depth image. The 3D auto-encoder is removed after the parametric model is trained. This pipeline is optimized for the encoders for the depth image and the 3D volumetric data and the shared generator is also optimised during 18 | training. 19 | 20 | ## Discriminators 21 | ![](3dv/discriminators.png) 22 | To make the latent representation and the reconstructed 3D scene similar to each others, we apply two discriminators for both targets. In this manner, the latent representation of the depth produces the expected target more precisely compared to the latent representation of the ground truth volumetric data. 23 | 24 | ## Our data format 25 | ![](3dv/data_format.png) 26 | 27 | ## Qualitative results 28 | ![](3dv/qualitative_results.png) 29 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | __C = edict() 4 | cfg = __C 5 | 6 | # 7 | # Common 8 | # 9 | __C.SUB_CONFIG_FILE = [] 10 | 11 | __C.CONST = edict() 12 | __C.CONST.N_VOX = [80, 48, 80] 13 | __C.CONST.N_DEP = [320, 240, 1] 14 | __C.CONST.BATCH_SIZE = 16 15 | __C.SAVER_MAX = 1000 16 | __C.CHECK_FREQ = 1000 17 | __C.RECORD_VOX_NUM = 10 18 | __C.SWITCHING_ITE = 75001 19 | 20 | # Network 21 | __C.NET = edict() 22 | __C.NET.DIM_Z = 16 23 | __C.NET.DIM = [512, 256, 128, 64, 12] 24 | __C.NET.START_VOX = [5, 3, 5] 25 | __C.NET.KERNEL = [[5, 5, 5, 5, 5], [3, 3, 3, 3, 3], [5, 5, 5, 5, 5]] 26 | __C.NET.STRIDE = [1, 2, 2, 2, 1] 27 | __C.NET.REFINE_CH = 32 28 | __C.NET.REFINE_KERNEL = 3 29 | 30 | # 31 | # Directories 32 | # 33 | __C.DIR = edict() 34 | # Path where taxonomy.json is stored 35 | # __C.DIR.SCENE_ID_PATH = '../3D-FCR-alphaGAN/Scenevox' 36 | # __C.DIR.VOXEL_PATH = '../3D-FCR-alphaGAN/Scenevox/%s/%s' 37 | __C.DIR.ROOT_PATH = '/media/wangyida/D0-P1/database/SUNCG_Yida/train/voxel_semantic_npy' 38 | __C.DIR.VOXEL_PATH = '/media/wangyida/D0-P1/database/SUNCG_Yida/train/voxel_semantic_npy/%s' 39 | # depth--start 40 | __C.DIR.DEPTH_PATH = '/media/wangyida/D0-P1/database/SUNCG_Yida/train/depth_npy/%s' 41 | __C.DIR.TSDF_PATH = '/media/wangyida/D0-P1/database/SUNCG_Yida/train/depth_tsdf_npy/%s' 42 | # depth--end 43 | __C.DIR.CHECK_POINT_PATH = './Checkpt' 44 | __C.DIR.CHECK_PT_PATH = './Checkpt/checkpoint' 45 | __C.DIR.TRAIN_OBJ_PATH = './train_vox' 46 | __C.DIR.EVAL_PATH = './eval' 47 | __C.DIR.LOG_PATH = './log' 48 | 49 | # 50 | # Training 51 | # 52 | __C.TRAIN = edict() 53 | 54 | __C.TRAIN.DATASET_PORTION = [0, 0.9] 55 | __C.TRAIN.NUM_EPOCH = 500 # maximum number of training epochs 56 | 57 | # Learning 58 | __C.LEARNING_RATE_G = 0.0001 59 | __C.LEARNING_RATE_D = 0.0001 60 | __C.LEARNING_RATE_V = [0.0001, 1000, 0.0001] 61 | __C.TRAIN.ADAM_BETA_G = 0.5 62 | __C.TRAIN.ADAM_BETA_D = 0.5 63 | __C.LAMDA_RECONS = 1 64 | __C.LAMDA_GAMMA = 0.97 65 | 66 | 67 | def cfg_from_file(filename): 68 | """Load a config file and merge it into the default options.""" 69 | import yaml 70 | with open(filename, 'r') as f: 71 | yaml_cfg = edict(yaml.load(f)) 72 | 73 | _merge_a_into_b(yaml_cfg, __C) 74 | 75 | 76 | def cfg_from_list(cfg_list): 77 | """Set config keys via list (e.g., from command line).""" 78 | from ast import literal_eval 79 | assert len(cfg_list) % 2 == 0 80 | for k, v in zip(cfg_list[0::2], cfg_list[1::2]): 81 | key_list = k.split('.') 82 | d = __C 83 | for subkey in key_list[:-1]: 84 | assert subkey in d.keys() 85 | d = d[subkey] 86 | subkey = key_list[-1] 87 | assert subkey in d.keys() 88 | try: 89 | value = literal_eval(v) 90 | except: 91 | # handle the case when v is a string literal 92 | value = v 93 | assert type(value) == type(d[subkey]), \ 94 | 'type {} does not match original type {}'.format( 95 | type(value), type(d[subkey])) 96 | d[subkey] = value 97 | -------------------------------------------------------------------------------- /config_test.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | __C = edict() 4 | cfg_test = __C 5 | 6 | # 7 | # Common 8 | # 9 | __C.SUB_CONFIG_FILE = [] 10 | 11 | __C.CONST = edict() 12 | __C.CONST.N_VOX = [80, 48, 80] 13 | __C.CONST.N_DEP = [320, 240, 1] 14 | __C.CONST.BATCH_SIZE = 2 15 | __C.SAVER_MAX = 100 16 | __C.CHECK_FREQ = 100 17 | __C.RECORD_VOX_NUM = 10 18 | __C.SWITCHING_ITE = 75001 19 | 20 | # Network 21 | __C.NET = edict() 22 | __C.NET.DIM_Z = 16 23 | __C.NET.DIM = [512, 256, 128, 64, 12] 24 | __C.NET.START_VOX = [5, 3, 5] 25 | __C.NET.KERNEL = [[5, 5, 5, 5, 5], [3, 3, 3, 3, 3], [5, 5, 5, 5, 5]] 26 | __C.NET.STRIDE = [1, 2, 2, 2, 1] 27 | __C.NET.REFINE_CH = 32 28 | __C.NET.REFINE_KERNEL = 3 29 | 30 | # 31 | # Directories 32 | # 33 | __C.DIR = edict() 34 | # Path where taxonomy.json is stored 35 | # __C.DIR.SCENE_ID_PATH = '../3D-FCR-alphaGAN/Scenevox' 36 | # __C.DIR.VOXEL_PATH = '../3D-FCR-alphaGAN/Scenevox/%s/%s' 37 | __C.DIR.ROOT_PATH = '/media/wangyida/D0-P1/database/SUNCG_Yida/test/voxel_semantic_npy' 38 | __C.DIR.VOXEL_PATH = '/media/wangyida/D0-P1/database/SUNCG_Yida/test/voxel_semantic_npy/%s' 39 | # depth--start 40 | __C.DIR.DEPTH_PATH = '/media/wangyida/D0-P1/database/SUNCG_Yida/test/depth_npy/%s' 41 | __C.DIR.TSDF_PATH = '/media/wangyida/D0-P1/database/SUNCG_Yida/test/depth_tsdf_npy/%s' 42 | # depth--end 43 | __C.DIR.CHECK_POINT_PATH = './Checkpt' 44 | __C.DIR.CHECK_PT_PATH = './Checkpt/checkpoint' 45 | __C.DIR.TRAIN_OBJ_PATH = './test_vox' 46 | __C.DIR.EVAL_PATH = './eval' 47 | __C.DIR.LOG_PATH = './log' 48 | 49 | # 50 | # Training 51 | # 52 | __C.TRAIN = edict() 53 | 54 | __C.TRAIN.DATASET_PORTION = [0, 0.8] 55 | __C.TRAIN.NUM_EPOCH = 500 # maximum number of training epochs 56 | 57 | # Learning 58 | __C.LEARNING_RATE_G = 0.0001 59 | __C.LEARNING_RATE_D = 0.0001 60 | __C.LEARNING_RATE_V = [0.0001, 1000, 0.0001] 61 | __C.TRAIN.ADAM_BETA_G = 0.5 62 | __C.TRAIN.ADAM_BETA_D = 0.5 63 | __C.LAMDA_RECONS = 1 64 | __C.LAMDA_GAMMA = 0.97 65 | 66 | 67 | def cfg_from_file(filename): 68 | """Load a config file and merge it into the default options.""" 69 | import yaml 70 | with open(filename, 'r') as f: 71 | yaml_cfg = edict(yaml.load(f)) 72 | 73 | _merge_a_into_b(yaml_cfg, __C) 74 | 75 | 76 | def cfg_from_list(cfg_list): 77 | """Set config keys via list (e.g., from command line).""" 78 | from ast import literal_eval 79 | assert len(cfg_list) % 2 == 0 80 | for k, v in zip(cfg_list[0::2], cfg_list[1::2]): 81 | key_list = k.split('.') 82 | d = __C 83 | for subkey in key_list[:-1]: 84 | assert subkey in d.keys() 85 | d = d[subkey] 86 | subkey = key_list[-1] 87 | assert subkey in d.keys() 88 | try: 89 | value = literal_eval(v) 90 | except: 91 | # handle the case when v is a string literal 92 | value = v 93 | assert type(value) == type(d[subkey]), \ 94 | 'type {} does not match original type {}'.format( 95 | type(value), type(d[subkey])) 96 | d[subkey] = value 97 | -------------------------------------------------------------------------------- /data/bin2camera.py: -------------------------------------------------------------------------------- 1 | from struct import * 2 | import numpy as np 3 | # I considered using multiprocessing package, but I find this code version is fine. 4 | # Welcome for your version with multiprocessing to make the reading faster. 5 | # from joblib import Parallel, delayed 6 | import multiprocessing 7 | import time 8 | from scipy import misc 9 | import os 10 | import argparse 11 | from progressbar import ProgressBar 12 | from skimage.measure import block_reduce 13 | 14 | 15 | def bin2camera(file): 16 | start_time = time.time() 17 | with open(file, 'r') as f: 18 | float_size = 4 19 | uint_size = 4 20 | total_count = 0 21 | cor = f.read(float_size * 3) 22 | cors = unpack('fff', cor) 23 | # print("cors is {}",cors) 24 | cam = f.read(float_size * 16) 25 | cams = unpack('ffffffffffffffff', cam) 26 | cams = np.array(cams) 27 | cams = np.reshape(cams, [4, 4]) 28 | # cams = np.linalg.inv(cams) 29 | # print("cams %16f",cams) 30 | f.close() 31 | # print "reading voxel file takes {} mins".format((time.time()-start_time)/60) 32 | return cams, cors 33 | 34 | 35 | class ScanFile(object): 36 | def __init__(self, directory, prefix=None, postfix='.bin'): 37 | self.directory = directory 38 | self.prefix = prefix 39 | self.postfix = postfix 40 | 41 | def scan_files(self): 42 | files_list = [] 43 | 44 | for dirpath, dirnames, filenames in os.walk(self.directory): 45 | for special_file in filenames: 46 | if self.postfix: 47 | if special_file.endswith(self.postfix): 48 | files_list.append(os.path.join(dirpath, special_file)) 49 | elif self.prefix: 50 | if special_file.startswith(self.prefix): 51 | files_list.append(os.path.join(dirpath, special_file)) 52 | else: 53 | files_list.append(os.path.join(dirpath, special_file)) 54 | 55 | return files_list 56 | 57 | def scan_subdir(self): 58 | subdir_list = [] 59 | for dirpath, dirnames, files in os.walk(self.directory): 60 | subdir_list.append(dirpath) 61 | return subdir_list 62 | 63 | 64 | if __name__ == "__main__": 65 | 66 | parser = argparse.ArgumentParser(description='Parser added') 67 | parser.add_argument( 68 | '-s', 69 | action="store", 70 | dest="dir_src", 71 | default="/media/wangyida/D0-P1/database/SUNCGtrain_3001_5000", 72 | help='folder of paired depth and voxel') 73 | parser.add_argument( 74 | '-t', 75 | action="store", 76 | dest="dir_tar", 77 | default="/media/wangyida/D0-P1/database/SUNCG_Yida/train", 78 | help='for storing generated npy') 79 | parser.print_help() 80 | results = parser.parse_args() 81 | 82 | # folder of paired depth and voxel 83 | dir_src = results.dir_src 84 | # for storing generated npy 85 | dir_tar = results.dir_tar 86 | 87 | # scan for semantic voxel files 88 | dir_camera = dir_tar + '/camera/' 89 | dir_origin = dir_tar + '/origin/' 90 | scan_bin = ScanFile(directory=dir_src, postfix='.bin') 91 | files_bin = scan_bin.scan_files() 92 | 93 | # making directories 94 | try: 95 | os.stat(dir_camera) 96 | except: 97 | os.mkdir(dir_camera) 98 | 99 | try: 100 | os.stat(dir_origin) 101 | except: 102 | os.mkdir(dir_origin) 103 | 104 | # save voxel as npy files 105 | pbar1 = ProgressBar() 106 | for file_bin in pbar1(files_bin): 107 | cams, cors = bin2camera(file=file_bin) 108 | name_start = int(file_bin.rfind('/')) 109 | name_end = int(file_bin.find('.', name_start)) 110 | np.savetxt(dir_camera + file_bin[name_start:name_end] + '.txt', cams) 111 | np.savetxt(dir_origin + file_bin[name_start:name_end] + '.txt', cors) 112 | -------------------------------------------------------------------------------- /data/depthbin2npy.py: -------------------------------------------------------------------------------- 1 | from struct import * 2 | import numpy as np 3 | # I considered using multiprocessing package, but I find this code version is fine. 4 | # Welcome for your version with multiprocessing to make the reading faster. 5 | # from joblib import Parallel, delayed 6 | import multiprocessing 7 | import time 8 | from scipy import misc 9 | import os 10 | import argparse 11 | from progressbar import ProgressBar 12 | from skimage.measure import block_reduce 13 | 14 | 15 | def bin2array(file): 16 | start_time = time.time() 17 | with open(file, 'r') as f: 18 | float_size = 4 19 | uint_size = 4 20 | total_count = 0 21 | cor = f.read(float_size * 3) 22 | cors = unpack('fff', cor) 23 | # print("cors is {}",cors) 24 | cam = f.read(float_size * 16) 25 | cams = unpack('ffffffffffffffff', cam) 26 | # print("cams %16f",cams) 27 | vox = f.read() 28 | numC = len(vox) / uint_size 29 | # print('numC is {}'.format(numC)) 30 | checkVoxValIter = unpack('I' * numC, vox) 31 | checkVoxVal = checkVoxValIter[0::2] 32 | checkVoxIter = checkVoxValIter[1::2] 33 | checkVox = [ 34 | i for (val, repeat) in zip(checkVoxVal, checkVoxIter) 35 | for i in np.tile(val, repeat) 36 | ] 37 | # print('checkVox shape is {}'.format(len(checkVox))) 38 | checkVox = np.reshape(checkVox, (240, 144, 240)) 39 | checkVox = block_reduce(checkVox, block_size=(3, 3, 3), func=np.max) 40 | f.close() 41 | # print "reading voxel file takes {} mins".format((time.time()-start_time)/60) 42 | return checkVox 43 | 44 | 45 | def png2array(file): 46 | image = misc.imread(file) 47 | image = misc.imresize(image, 50) 48 | return image 49 | 50 | 51 | class ScanFile(object): 52 | def __init__(self, directory, prefix=None, postfix='.bin'): 53 | self.directory = directory 54 | self.prefix = prefix 55 | self.postfix = postfix 56 | 57 | def scan_files(self): 58 | files_list = [] 59 | 60 | for dirpath, dirnames, filenames in os.walk(self.directory): 61 | for special_file in filenames: 62 | if self.postfix: 63 | if special_file.endswith(self.postfix): 64 | files_list.append(os.path.join(dirpath, special_file)) 65 | elif self.prefix: 66 | if special_file.startswith(self.prefix): 67 | files_list.append(os.path.join(dirpath, special_file)) 68 | else: 69 | files_list.append(os.path.join(dirpath, special_file)) 70 | 71 | return files_list 72 | 73 | def scan_subdir(self): 74 | subdir_list = [] 75 | for dirpath, dirnames, files in os.walk(self.directory): 76 | subdir_list.append(dirpath) 77 | return subdir_list 78 | 79 | 80 | if __name__ == "__main__": 81 | 82 | parser = argparse.ArgumentParser(description='Parser added') 83 | parser.add_argument( 84 | '-s', 85 | action="store", 86 | dest="dir_src", 87 | default="/media/wangyida/D0-P1/database/SUNCGtrain_3001_5000", 88 | help='folder of paired depth and voxel') 89 | parser.add_argument( 90 | '-td', 91 | action="store", 92 | dest="dir_tar_depth", 93 | default="/media/wangyida/D0-P1/database/SUNCGtrain_3001_5000_depvox", 94 | help='for storing generated npy') 95 | parser.add_argument( 96 | '-tv', 97 | action="store", 98 | dest="dir_tar_voxel", 99 | default="/media/wangyida/D0-P1/database/SUNCGtrain_3001_5000_depvox", 100 | help='for storing generated npy') 101 | parser.print_help() 102 | results = parser.parse_args() 103 | 104 | # folder of paired depth and voxel 105 | dir_src = results.dir_src 106 | # for storing generated npy 107 | dir_tar_depth = results.dir_tar_depth 108 | dir_tar_voxel = results.dir_tar_voxel 109 | 110 | # scan for depth files 111 | scan_png = ScanFile(directory=dir_src, postfix='.png') 112 | files_png = scan_png.scan_files() 113 | 114 | # scan for semantic voxel files 115 | scan_bin = ScanFile(directory=dir_src, postfix='.bin') 116 | files_bin = scan_bin.scan_files() 117 | 118 | # making directories 119 | try: 120 | os.stat(dir_tar_voxel) 121 | except: 122 | os.mkdir(dir_tar_voxel) 123 | try: 124 | os.stat(dir_tar_depth) 125 | except: 126 | os.mkdir(dir_tar_depth) 127 | 128 | pbar1 = ProgressBar() 129 | # save depth as npy files 130 | for file_png in pbar1(files_png): 131 | depth = png2array(file=file_png) 132 | name_start = int(file_png.rfind('/')) 133 | name_end = int(file_png.find('.', name_start)) 134 | np.save(dir_tar_depth + file_png[name_start:name_end] + '.npy', depth) 135 | 136 | # save voxel as npy files 137 | pbar2 = ProgressBar() 138 | for file_bin in pbar2(files_bin): 139 | voxel = bin2array(file=file_bin) 140 | name_start = int(file_bin.rfind('/')) 141 | name_end = int(file_bin.find('.', name_start)) 142 | np.save(dir_tar_voxel + file_bin[name_start:name_end] + '.npy', voxel) 143 | -------------------------------------------------------------------------------- /data/depthbin2npy_tsdf.py: -------------------------------------------------------------------------------- 1 | from struct import * 2 | from subprocess import call 3 | import numpy as np 4 | # I considered using multiprocessing package, but I find this code version is fine. 5 | # Welcome for your version with multiprocessing to make the reading faster. 6 | # from joblib import Parallel, delayed 7 | import multiprocessing 8 | import time 9 | from scipy import misc 10 | import os 11 | import argparse 12 | from progressbar import ProgressBar 13 | from skimage.measure import block_reduce 14 | 15 | 16 | def bin2array(file): 17 | start_time = time.time() 18 | with open(file, 'r') as f: 19 | float_size = 4 20 | uint_size = 4 21 | total_count = 0 22 | """ 23 | cor = f.read(float_size*3) 24 | cors = unpack('fff', cor) 25 | print("cors is {}",cors) 26 | tmp = f.read(float_size*5) 27 | tmps = unpack('f'*5, tmp) 28 | print("cams %16f",cams) 29 | """ 30 | vox = f.read() 31 | numC = len(vox) / float_size 32 | # print('numC is {}'.format(numC)) 33 | checkVox = unpack('I' * numC, vox) 34 | # print('checkVox shape is {}'.format(len(checkVox))) 35 | checkVox = np.reshape(checkVox, (48, 80, 80)) 36 | checkVox = np.swapaxes(checkVox, 0, 1) 37 | checkVox = np.swapaxes(checkVox, 0, 2) 38 | # checkVox = np.flip(checkVox, 0) 39 | checkVox = np.where(checkVox < 1.0, 1, 0) 40 | # checkVox = block_reduce(checkVox, block_size=(3, 3, 3), func=np.max) 41 | f.close() 42 | # print "reading voxel file takes {} mins".format((time.time()-start_time)/60) 43 | return checkVox 44 | 45 | 46 | def png2array(file): 47 | image = misc.imread(file) 48 | image = misc.imresize(image, 50) 49 | return image 50 | 51 | 52 | class ScanFile(object): 53 | def __init__(self, directory, prefix=None, postfix='.bin'): 54 | self.directory = directory 55 | self.prefix = prefix 56 | self.postfix = postfix 57 | 58 | def scan_files(self): 59 | files_list = [] 60 | 61 | for dirpath, dirnames, filenames in os.walk(self.directory): 62 | for special_file in filenames: 63 | if self.postfix: 64 | if special_file.endswith(self.postfix): 65 | files_list.append(os.path.join(dirpath, special_file)) 66 | elif self.prefix: 67 | if special_file.startswith(self.prefix): 68 | files_list.append(os.path.join(dirpath, special_file)) 69 | else: 70 | files_list.append(os.path.join(dirpath, special_file)) 71 | 72 | return files_list 73 | 74 | def scan_subdir(self): 75 | subdir_list = [] 76 | for dirpath, dirnames, files in os.walk(self.directory): 77 | subdir_list.append(dirpath) 78 | return subdir_list 79 | 80 | 81 | def process_data(file_depth): 82 | img_path = file_depth 83 | camera_intrinsic = "./depth-tsdf/data/camera-intrinsics.txt" 84 | camera_extrinsic = img_path.replace("depth_real_png", "camera") 85 | camera_extrinsic = camera_extrinsic.replace(".png", ".txt") 86 | camera_origin = camera_extrinsic.replace("camera", "origin") 87 | call([ 88 | "./depth-tsdf/demo", camera_intrinsic, camera_origin, camera_extrinsic, 89 | img_path 90 | ]) 91 | voxel = bin2array(file="./tsdf.bin") 92 | name_start = int(img_path.rfind('/')) 93 | name_end = int(img_path.find('.', name_start)) 94 | # save numpy 95 | np.save(dir_voxel + img_path[name_start:name_end] + '.npy', voxel) 96 | 97 | # save ply 98 | call( 99 | ["cp", "./tsdf.ply", dir_ply + img_path[name_start:name_end] + '.ply']) 100 | 101 | 102 | if __name__ == "__main__": 103 | 104 | parser = argparse.ArgumentParser(description='Parser added') 105 | parser.add_argument( 106 | '-s', 107 | action="store", 108 | dest="dir_src", 109 | default="/media/wangyida/D0-P1/database/SUNCGtrain_3001_5000", 110 | help='folder of paired depth and voxel') 111 | parser.add_argument( 112 | '-tv', 113 | action="store", 114 | dest="dir_tar", 115 | default="/media/wangyida/D0-P1/database/SUNCGtrain_3001_5000_depvox", 116 | help='for storing generated npy') 117 | parser.add_argument( 118 | '-tp', 119 | action="store", 120 | dest="dir_ply", 121 | default="/media/wangyida/D0-P1/database/SUNCGtrain_3001_5000_depvox", 122 | help='for storing generated ply') 123 | parser.print_help() 124 | results = parser.parse_args() 125 | 126 | # folder of paired depth and voxel 127 | dir_src = results.dir_src 128 | # for storing generated npy 129 | dir_voxel = results.dir_tar 130 | dir_ply = results.dir_ply 131 | 132 | # scan for depth files 133 | scan_png = ScanFile(directory=dir_src, postfix='.png') 134 | files_png = scan_png.scan_files() 135 | 136 | # making directories 137 | try: 138 | os.stat(dir_voxel) 139 | except: 140 | os.mkdir(dir_voxel) 141 | 142 | try: 143 | os.stat(dir_ply) 144 | except: 145 | os.mkdir(dir_ply) 146 | 147 | # save voxel as npy files 148 | pbar = ProgressBar() 149 | """ 150 | from joblib import Parallel, delayed 151 | import multiprocessing 152 | num_cores = multiprocessing.cpu_count() 153 | Parallel(n_jobs=num_cores)(delayed(process_data(file_depth)) for file_depth in pbar(files_png)) 154 | """ 155 | for file_depth in pbar(files_png): 156 | process_data(file_depth) 157 | -------------------------------------------------------------------------------- /depth-tsdf/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/.DS_Store -------------------------------------------------------------------------------- /depth-tsdf/README.md: -------------------------------------------------------------------------------- 1 | # Volumetric TSDF Fusion of Multiple Depth Maps 2 | 3 | ![Teaser](teaser.jpg?raw=true) 4 | 5 | CUDA/C++ code to fuse multiple registered depth maps into a projective truncated signed distance function (TSDF) voxel volume, which can then be used to create high quality 3D surface meshes and point clouds. Tested on Ubuntu 14.04 and 16.04. 6 | 7 | Looking for an older version? See [here](old-version). 8 | 9 | This repository is a part of [Andy's Code Collection](http://andyzeng.github.io/). 10 | 11 | ## Change Log 12 | * **Nov. 1, 2017.** Bug fix: `tsdf2mesh.m` now properly generates a mesh in camera coordinates instead of voxel coordinates. 13 | * **Oct. 30, 2017.** Notice: changed default weight threshold for `SaveVoxelGrid2SurfacePointCloud` in demo code to enable creating point cloud visualizations with only one depth frame. 14 | * **Aug. 30, 2017.** Bug fix: remove deprecated offsets from surface distance compute during integration. 15 | 16 | ## Requirements 17 | * NVIDA GPU with [CUDA](https://developer.nvidia.com/cuda-downloads) support 18 | * [OpenCV](http://opencv.org/) (tested with OpenCV 2.4.11) 19 | 20 | ## Demo 21 | This demo fuses 50 registered depth maps from directory `data/rgbd-frames` into a projective TSDF voxel volume, and creates a 3D surface point cloud `tsdf.ply`, which can be visualized with a 3D viewer like [Meshlab](http://www.meshlab.net/). 22 | 23 | **Note**: Input depth maps should be saved in format: 16-bit PNG, depth in millimeters. 24 | 25 | ```shell 26 | ./compile.sh # compiles demo executable 27 | ./demo # 3D point cloud saved to tsdf.ply and voxel grid saved to tsdf.bin 28 | ``` 29 | 30 | [Optional] This demo also saves the computed voxel volume into a binary file `tsdf.bin`. Run the following script in Matlab to create a 3D surface mesh `mesh.ply`, which can be visualized with [Meshlab](http://www.meshlab.net/). 31 | 32 | ```matlab 33 | tsdf2mesh; % 3D mesh saved to mesh.ply 34 | ``` 35 | 36 | ## Seen in 37 | * [3DMatch: Learning Local Geometric Descriptors from RGB-D Reconstructions (CVPR 2017)](http://3dmatch.cs.princeton.edu/) 38 | * [Semantic Scene Completion from a Single Depth Image (CVPR 2017)](http://sscnet.cs.princeton.edu/) 39 | * [Deep Sliding Shapes for Amodal 3D Object Detection in RGB-D Images (CVPR 2016)](http://dss.cs.princeton.edu/) 40 | 41 | ## References 42 | * [A Volumetric Method for Building Complex Models from Range Images (SIGGRAPH 1996)](https://graphics.stanford.edu/papers/volrange/volrange.pdf) 43 | * [KinectFusion: Real-Time Dense Surface Mapping and Tracking (ISMAR 2011)](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/ismar2011.pdf) 44 | * [Scene Coordinate Regression Forests for Camera Relocalization in RGB-D Images (CVPR 2013)](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/RelocForests.pdf) 45 | -------------------------------------------------------------------------------- /depth-tsdf/back-project: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/back-project -------------------------------------------------------------------------------- /depth-tsdf/back-project.cu: -------------------------------------------------------------------------------- 1 | // --------------------------------------------------------- 2 | // Author: Andy Zeng, Princeton University, 2016 3 | // --------------------------------------------------------- 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "utils.hpp" 11 | 12 | // CUDA kernel function to integrate a TSDF voxel volume given depth images 13 | __global__ 14 | void Integrate(float * cam_K, float * cam2base, float * depth_im, 15 | int im_height, int im_width, int voxel_grid_dim_x, int voxel_grid_dim_y, int voxel_grid_dim_z, 16 | float voxel_grid_origin_x, float voxel_grid_origin_y, float voxel_grid_origin_z, float voxel_size, float trunc_margin, 17 | float * voxel_grid_TSDF) { 18 | 19 | int pt_grid_z = blockIdx.x; 20 | int pt_grid_y = threadIdx.x; 21 | 22 | for (int pt_grid_x = 0; pt_grid_x < voxel_grid_dim_x; ++pt_grid_x) { 23 | 24 | // Convert voxel center from grid coordinates to base frame camera coordinates 25 | float pt_base_x = voxel_grid_origin_x + pt_grid_x * voxel_size; 26 | float pt_base_y = voxel_grid_origin_y + pt_grid_y * voxel_size; 27 | float pt_base_z = voxel_grid_origin_z + pt_grid_z * voxel_size; 28 | 29 | // Convert from base frame camera coordinates to current frame camera coordinates 30 | float tmp_pt[3] = {0}; 31 | tmp_pt[0] = pt_base_x - cam2base[0 * 4 + 3]; 32 | tmp_pt[1] = pt_base_y - cam2base[1 * 4 + 3]; 33 | tmp_pt[2] = pt_base_z - cam2base[2 * 4 + 3]; 34 | float pt_cam_x = cam2base[0 * 4 + 0] * tmp_pt[0] + cam2base[1 * 4 + 0] * tmp_pt[1] + cam2base[2 * 4 + 0] * tmp_pt[2]; 35 | float pt_cam_y = cam2base[0 * 4 + 1] * tmp_pt[0] + cam2base[1 * 4 + 1] * tmp_pt[1] + cam2base[2 * 4 + 1] * tmp_pt[2]; 36 | float pt_cam_z = cam2base[0 * 4 + 2] * tmp_pt[0] + cam2base[1 * 4 + 2] * tmp_pt[1] + cam2base[2 * 4 + 2] * tmp_pt[2]; 37 | 38 | int volume_idx = pt_grid_z * voxel_grid_dim_y * voxel_grid_dim_x + pt_grid_y * voxel_grid_dim_x + pt_grid_x; 39 | if (pt_cam_z <= 0) { 40 | voxel_grid_TSDF[volume_idx] = -2.0f; 41 | continue; 42 | } 43 | 44 | int pt_pix_x = roundf(cam_K[0 * 3 + 0] * (pt_cam_x / pt_cam_z) + cam_K[0 * 3 + 2]); 45 | int pt_pix_y = roundf(cam_K[1 * 3 + 1] * (pt_cam_y / pt_cam_z) + cam_K[1 * 3 + 2]); 46 | if (pt_pix_x < 0 || pt_pix_x >= im_width || pt_pix_y < 0 || pt_pix_y >= im_height) { 47 | voxel_grid_TSDF[volume_idx] = -2.0f; 48 | continue; 49 | } 50 | 51 | float depth_val = depth_im[pt_pix_y * im_width + pt_pix_x]; 52 | 53 | if (depth_val > 8) { 54 | voxel_grid_TSDF[volume_idx] = -2.0f; 55 | continue; 56 | } 57 | 58 | float diff = depth_val - pt_cam_z; 59 | 60 | // This is for labeling the -1 space (occluded space) 61 | if (diff < -0.1 || depth_val == 0.0) { 62 | voxel_grid_TSDF[volume_idx] = 2.0f; 63 | continue; 64 | } 65 | 66 | // This is for labeling the empty space 67 | if (diff > 0.1) { 68 | voxel_grid_TSDF[volume_idx] = -1.0f; 69 | continue; 70 | } 71 | 72 | // Integrate 73 | // float dist = fmin(1.0f, diff / trunc_margin); 74 | // float weight_old = voxel_grid_weight[volume_idx]; 75 | // float weight_new = weight_old + 1.0f; 76 | // voxel_grid_weight[volume_idx] = weight_new; 77 | // voxel_grid_TSDF[volume_idx] = (voxel_grid_TSDF[volume_idx] * weight_old + dist) / weight_new; 78 | if (abs(diff) < 0.1) { 79 | voxel_grid_TSDF[volume_idx] = 1.0f; 80 | } 81 | } 82 | } 83 | 84 | // Loads a binary file with depth data and generates a TSDF voxel volume (5m x 5m x 5m at 1cm resolution) 85 | // Volume is aligned with respect to the camera coordinates of the first frame (a.k.a. base frame) 86 | int main(int argc, char * argv[]) { 87 | 88 | // Location of camera intrinsic file 89 | std::string cam_K_file = "data/camera-intrinsics.txt"; 90 | std::string cam_origin_file = "data/origin/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.txt"; 91 | std::string base2world_file = "data/camera/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.txt"; 92 | std::string depth_im_file = "data/depth_real_png/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.png"; 93 | std::string tsdf_bin_file = "tsdf.bin"; 94 | 95 | // Location of folder containing RGB-D frames and camera pose files 96 | // std::string data_path = "data/rgbd-frames-yida"; 97 | 98 | float cam_K[3 * 3]; 99 | float cam_origin[3 * 1]; 100 | float base2world[4 * 4]; 101 | float cam2base[4 * 4]; 102 | float cam2world[4 * 4]; 103 | int im_width = 640; 104 | int im_height = 480; 105 | float depth_im[im_height * im_width]; 106 | 107 | // Voxel grid parameters (change these to change voxel grid resolution, etc.) 108 | float voxel_grid_origin_x = 43.15f; // Location of voxel grid origin in base frame camera coordinates 109 | float voxel_grid_origin_y = 50.88f; 110 | float voxel_grid_origin_z = 0.05f; 111 | float voxel_size = 0.06f; 112 | float trunc_margin = 0.72f;//voxel_size * 5; 113 | int voxel_grid_dim_x = 80; 114 | int voxel_grid_dim_y = 80; 115 | int voxel_grid_dim_z = 48; 116 | 117 | // Manual parameters 118 | if (argc > 1) { 119 | cam_K_file = argv[1]; 120 | cam_origin_file = argv[2]; 121 | base2world_file = argv[3]; 122 | depth_im_file = argv[4]; 123 | tsdf_bin_file = argv[5]; 124 | } 125 | 126 | // Read camera intrinsics 127 | std::vector cam_K_vec = LoadMatrixFromFile(cam_K_file, 3, 3); 128 | std::copy(cam_K_vec.begin(), cam_K_vec.end(), cam_K); 129 | std::vector cam_origin_vec = LoadMatrixFromFile(cam_origin_file, 3, 1); 130 | std::copy(cam_origin_vec.begin(), cam_origin_vec.end(), cam_origin); 131 | voxel_grid_origin_x = cam_origin[0]; 132 | voxel_grid_origin_y = cam_origin[1]; 133 | voxel_grid_origin_z = cam_origin[2]; 134 | 135 | // Read base frame camera pose 136 | std::ostringstream base_frame_prefix; 137 | // base_frame_prefix << std::setw(6) << std::setfill('0') << base_frame_idx; 138 | // std::string base2world_file = data_path + "/frame-" + base_frame_prefix.str() + ".pose.txt"; 139 | std::vector base2world_vec = LoadMatrixFromFile(base2world_file, 4, 4); 140 | std::copy(base2world_vec.begin(), base2world_vec.end(), base2world); 141 | 142 | // Invert base frame camera pose to get world-to-base frame transform 143 | float base2world_inv[16] = {0}; 144 | invert_matrix(base2world, base2world_inv); 145 | 146 | // Initialize voxel grid 147 | float * voxel_grid_TSDF = new float[voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z]; 148 | for (int i = 0; i < voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z; ++i) 149 | voxel_grid_TSDF[i] = 0.0f; 150 | 151 | // Load variables to GPU memory 152 | float * gpu_voxel_grid_TSDF; 153 | cudaMalloc(&gpu_voxel_grid_TSDF, voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z * sizeof(float)); 154 | checkCUDA(__LINE__, cudaGetLastError()); 155 | cudaMemcpy(gpu_voxel_grid_TSDF, voxel_grid_TSDF, voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z * sizeof(float), cudaMemcpyHostToDevice); 156 | checkCUDA(__LINE__, cudaGetLastError()); 157 | float * gpu_cam_K; 158 | float * gpu_cam2base; 159 | float * gpu_depth_im; 160 | cudaMalloc(&gpu_cam_K, 3 * 3 * sizeof(float)); 161 | cudaMemcpy(gpu_cam_K, cam_K, 3 * 3 * sizeof(float), cudaMemcpyHostToDevice); 162 | cudaMalloc(&gpu_cam2base, 4 * 4 * sizeof(float)); 163 | cudaMalloc(&gpu_depth_im, im_height * im_width * sizeof(float)); 164 | checkCUDA(__LINE__, cudaGetLastError()); 165 | 166 | // Loop through each depth frame and integrate TSDF voxel grid 167 | 168 | // std::ostringstream curr_frame_prefix; 169 | // curr_frame_prefix << std::setw(6) << std::setfill('0') << frame_idx; 170 | 171 | // // Read current frame depth 172 | // std::string depth_im_file = data_path + "/frame-" + curr_frame_prefix.str() + ".depth.png"; 173 | ReadDepth(depth_im_file, im_height, im_width, depth_im); 174 | 175 | // Read base frame camera pose 176 | std::string cam2world_file = base2world_file; //data_path + "/frame-" + curr_frame_prefix.str() + ".pose.txt"; 177 | std::vector cam2world_vec = LoadMatrixFromFile(cam2world_file, 4, 4); 178 | std::copy(cam2world_vec.begin(), cam2world_vec.end(), cam2world); 179 | 180 | // Compute relative camera pose (camera-to-base frame) 181 | multiply_matrix(base2world_inv, cam2world, cam2base); 182 | 183 | // yida: here we should use base2world for rotation for alignment of the ground 184 | cudaMemcpy(gpu_cam2base, base2world, 4 * 4 * sizeof(float), cudaMemcpyHostToDevice); 185 | cudaMemcpy(gpu_depth_im, depth_im, im_height * im_width * sizeof(float), cudaMemcpyHostToDevice); 186 | checkCUDA(__LINE__, cudaGetLastError()); 187 | 188 | // std::cout << "Fusing: " << depth_im_file << std::endl; 189 | 190 | Integrate <<< voxel_grid_dim_z, voxel_grid_dim_y >>>(gpu_cam_K, gpu_cam2base, gpu_depth_im, 191 | im_height, im_width, voxel_grid_dim_x, voxel_grid_dim_y, voxel_grid_dim_z, 192 | voxel_grid_origin_x, voxel_grid_origin_y, voxel_grid_origin_z, voxel_size, trunc_margin, 193 | gpu_voxel_grid_TSDF); 194 | 195 | // Load TSDF voxel grid from GPU to CPU memory 196 | cudaMemcpy(voxel_grid_TSDF, gpu_voxel_grid_TSDF, voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z * sizeof(float), cudaMemcpyDeviceToHost); 197 | // cudaMemcpy(voxel_grid_weight, gpu_voxel_grid_weight, voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z * sizeof(float), cudaMemcpyDeviceToHost); 198 | checkCUDA(__LINE__, cudaGetLastError()); 199 | 200 | // Compute surface points from TSDF voxel grid and save to point cloud .ply file 201 | // std::cout << "Saving surface point cloud (tsdf.ply)..." << std::endl; 202 | 203 | SaveVoxelGrid2SurfacePointCloud("tsdf.ply", voxel_grid_dim_x, voxel_grid_dim_y, voxel_grid_dim_z, 204 | voxel_size, voxel_grid_origin_x, voxel_grid_origin_y, voxel_grid_origin_z, 205 | voxel_grid_TSDF); 206 | 207 | // Save TSDF voxel grid and its parameters to disk as binary file (float array) 208 | // std::cout << "Saving TSDF voxel grid values to disk (tsdf.bin)..." << std::endl; 209 | std::ofstream outFile(tsdf_bin_file, std::ios::binary | std::ios::out); 210 | /* 211 | float voxel_grid_dim_xf = (float) voxel_grid_dim_x; 212 | float voxel_grid_dim_yf = (float) voxel_grid_dim_y; 213 | float voxel_grid_dim_zf = (float) voxel_grid_dim_z; 214 | outFile.write((char*)&voxel_grid_dim_xf, sizeof(float)); 215 | outFile.write((char*)&voxel_grid_dim_yf, sizeof(float)); 216 | outFile.write((char*)&voxel_grid_dim_zf, sizeof(float)); 217 | outFile.write((char*)&voxel_grid_origin_x, sizeof(float)); 218 | outFile.write((char*)&voxel_grid_origin_y, sizeof(float)); 219 | outFile.write((char*)&voxel_grid_origin_z, sizeof(float)); 220 | outFile.write((char*)&voxel_size, sizeof(float)); 221 | outFile.write((char*)&trunc_margin, sizeof(float)); 222 | */ 223 | for (int i = 0; i < voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z; ++i) { 224 | outFile.write((char*)&voxel_grid_TSDF[i], sizeof(float)); 225 | } 226 | outFile.close(); 227 | 228 | return 0; 229 | } 230 | -------------------------------------------------------------------------------- /depth-tsdf/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PATH=$PATH:/usr/local/cuda/bin 4 | 5 | if uname | grep -q Darwin; then 6 | CUDA_LIB_DIR=/usr/local/cuda/lib 7 | elif uname | grep -q Linux; then 8 | CUDA_LIB_DIR=/usr/local/cuda/lib64 9 | fi 10 | 11 | nvcc -std=c++11 -O3 -o back-project back-project.cu -I/usr/local/cuda/include -L$CUDA_LIB_DIR -lcudart -lcublas -lcurand -D_MWAITXINTRIN_H_INCLUDED `pkg-config --cflags --libs opencv` 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /depth-tsdf/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/.DS_Store -------------------------------------------------------------------------------- /depth-tsdf/data/camera-intrinsics.txt: -------------------------------------------------------------------------------- 1 | 518.8579 0 320 2 | 0 518.8579 240 3 | 0 0 1 4 | -------------------------------------------------------------------------------- /depth-tsdf/data/camera/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.txt: -------------------------------------------------------------------------------- 1 | -9.947609901428222656e-01 -4.861999768763780594e-03 1.021080017089843750e-01 4.428060531616210938e+01 2 | -1.022230014204978943e-01 4.731500148773193359e-02 -9.936360120773315430e-01 4.011987304687500000e+01 3 | 0.000000000000000000e+00 -9.988679885864257812e-01 -4.756399989128112793e-02 1.281923055648803711e+00 4 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 5 | -------------------------------------------------------------------------------- /depth-tsdf/data/camera/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.txt: -------------------------------------------------------------------------------- 1 | 1.216949969530105591e-01 9.431800246238708496e-02 -9.880759716033935547e-01 4.156637573242187500e+01 2 | 9.925680160522460938e-01 -1.156399957835674286e-02 1.211439967155456543e-01 3.953329467773437500e+01 3 | -0.000000000000000000e+00 -9.954749941825866699e-01 -9.502399712800979614e-02 1.549054980278015137e+00 4 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 5 | -------------------------------------------------------------------------------- /depth-tsdf/data/camera/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.txt: -------------------------------------------------------------------------------- 1 | -5.403019785881042480e-01 -2.791469991207122803e-01 7.938200235366821289e-01 4.199525833129882812e+01 2 | -8.414710164070129395e-01 1.792380064725875854e-01 -5.097060203552246094e-01 4.885494995117187500e+01 3 | 0.000000000000000000e+00 -9.433720111846923828e-01 -3.317370116710662842e-01 9.233530163764953613e-01 4 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 5 | -------------------------------------------------------------------------------- /depth-tsdf/data/camera/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.txt: -------------------------------------------------------------------------------- 1 | 3.210160136222839355e-01 -2.677600085735321045e-02 9.466950297355651855e-01 5.375836944580078125e+01 2 | -9.470739960670471191e-01 -9.076000191271305084e-03 3.208869993686676025e-01 4.655792999267578125e+01 3 | 0.000000000000000000e+00 -9.995999932289123535e-01 -2.827299945056438446e-02 1.377272009849548340e+00 4 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 5 | -------------------------------------------------------------------------------- /depth-tsdf/data/depth_real_png/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_real_png/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.png -------------------------------------------------------------------------------- /depth-tsdf/data/depth_real_png/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_real_png/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.png -------------------------------------------------------------------------------- /depth-tsdf/data/depth_real_png/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_real_png/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.png -------------------------------------------------------------------------------- /depth-tsdf/data/depth_real_png/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_real_png/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.png -------------------------------------------------------------------------------- /depth-tsdf/data/depth_rgb_png/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_rgb_png/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.png -------------------------------------------------------------------------------- /depth-tsdf/data/depth_rgb_png/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_rgb_png/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.png -------------------------------------------------------------------------------- /depth-tsdf/data/depth_rgb_png/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_rgb_png/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.png -------------------------------------------------------------------------------- /depth-tsdf/data/depth_rgb_png/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_rgb_png/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.png -------------------------------------------------------------------------------- /depth-tsdf/data/depth_tsdf_bin/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_bin/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.bin -------------------------------------------------------------------------------- /depth-tsdf/data/depth_tsdf_bin/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_bin/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.bin -------------------------------------------------------------------------------- /depth-tsdf/data/depth_tsdf_bin/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_bin/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.bin -------------------------------------------------------------------------------- /depth-tsdf/data/depth_tsdf_bin/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_bin/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.bin -------------------------------------------------------------------------------- /depth-tsdf/data/depth_tsdf_occluded_npy/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_occluded_npy/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.npy -------------------------------------------------------------------------------- /depth-tsdf/data/depth_tsdf_occluded_npy/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_occluded_npy/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.npy -------------------------------------------------------------------------------- /depth-tsdf/data/depth_tsdf_occluded_npy/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_occluded_npy/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.npy -------------------------------------------------------------------------------- /depth-tsdf/data/depth_tsdf_occluded_npy/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_occluded_npy/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.npy -------------------------------------------------------------------------------- /depth-tsdf/data/depth_tsdf_ply/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_ply/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.ply -------------------------------------------------------------------------------- /depth-tsdf/data/depth_tsdf_ply/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_ply/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.ply -------------------------------------------------------------------------------- /depth-tsdf/data/depth_tsdf_ply/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_ply/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.ply -------------------------------------------------------------------------------- /depth-tsdf/data/depth_tsdf_ply/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_ply/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.ply -------------------------------------------------------------------------------- /depth-tsdf/data/origin/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.txt: -------------------------------------------------------------------------------- 1 | 4.222777557373046875e+01 2 | 3.434151077270507812e+01 3 | -5.000000074505805969e-02 4 | -------------------------------------------------------------------------------- /depth-tsdf/data/origin/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.txt: -------------------------------------------------------------------------------- 1 | 3.580691528320312500e+01 2 | 3.754518508911132812e+01 3 | -5.000000074505805969e-02 4 | -------------------------------------------------------------------------------- /depth-tsdf/data/origin/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.txt: -------------------------------------------------------------------------------- 1 | 4.229424667358398438e+01 2 | 4.472195053100585938e+01 3 | -5.000000074505805969e-02 4 | -------------------------------------------------------------------------------- /depth-tsdf/data/origin/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.txt: -------------------------------------------------------------------------------- 1 | 5.457713317871093750e+01 2 | 4.524894332885742188e+01 3 | -5.000000074505805969e-02 4 | -------------------------------------------------------------------------------- /depth-tsdf/depth_yida.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/depth_yida.png -------------------------------------------------------------------------------- /depth-tsdf/run.sh: -------------------------------------------------------------------------------- 1 | ./demo \ 2 | data/camera-intrinsics.txt \ 3 | data/rgbd-frames-yida \ 4 | 150 \ 5 | 150 \ 6 | 1 \ 7 | 36.0f \ 8 | 40.5f \ 9 | -0.05f \ 10 | 0.02f \ 11 | 0.1f 12 | 13 | -------------------------------------------------------------------------------- /depth-tsdf/tsdf2mesh.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Loads a TSDF voxel grid from a binary file (tsdf.bin) and 3 | % creates a mesh (saved to mesh.ply), which can be viewed 4 | % with a 3D viewer like Meshlab. 5 | % 6 | % Author: Andy Zeng, Princeton University, 2016 7 | % --------------------------------------------------------- 8 | 9 | % Load TSDF voxel grid from binary file 10 | fid = fopen('tsdf.bin','rb'); 11 | tsdfHeader = fread(fid,8,'single'); 12 | voxelGridDim = tsdfHeader(1:3); 13 | voxelGridOrigin = tsdfHeader(4:6); 14 | voxelSize = tsdfHeader(7); 15 | truncMargin = tsdfHeader(8); 16 | tsdf = fread(fid,voxelGridDim(1)*voxelGridDim(2)*voxelGridDim(3),'single'); 17 | fclose(fid); 18 | 19 | % Convert from TSDF to mesh 20 | tsdf = reshape(tsdf,[voxelGridDim(1),voxelGridDim(2),voxelGridDim(3)]); 21 | fv = isosurface(tsdf,0); 22 | points = fv.vertices'; 23 | faces = fv.faces'; 24 | 25 | % Set mesh color (light blue) 26 | color = uint8(repmat([175;198;233],1,size(points,2))); 27 | 28 | % Transform mesh from voxel coordinates to camera coordinates 29 | meshPoints(1,:) = voxelGridOrigin(1) + points(2,:)*voxelSize; % x y axes are swapped from isosurface 30 | meshPoints(2,:) = voxelGridOrigin(2) + points(1,:)*voxelSize; 31 | meshPoints(3,:) = voxelGridOrigin(3) + points(3,:)*voxelSize; 32 | 33 | % Write header for mesh file 34 | data = reshape(typecast(reshape(single(meshPoints),1,[]),'uint8'),3*4,[]); 35 | data = [data; color]; 36 | fid = fopen('mesh.ply','w'); 37 | fprintf (fid, 'ply\n'); 38 | fprintf (fid, 'format binary_little_endian 1.0\n'); 39 | fprintf (fid, 'element vertex %d\n', size(data,2)); 40 | fprintf (fid, 'property float x\n'); 41 | fprintf (fid, 'property float y\n'); 42 | fprintf (fid, 'property float z\n'); 43 | fprintf (fid, 'property uchar red\n'); 44 | fprintf (fid, 'property uchar green\n'); 45 | fprintf (fid, 'property uchar blue\n'); 46 | fprintf (fid, 'element face %d\n', size(faces,2)); 47 | fprintf (fid, 'property list uchar int vertex_index\n'); 48 | fprintf (fid, 'end_header\n'); 49 | 50 | % Write vertices 51 | fwrite(fid, data,'uint8'); 52 | 53 | % Write faces 54 | faces = faces([3 2 1],:); % reverse the order to get a better normal 55 | faces_data = int32(faces-1); 56 | faces_data = reshape(typecast(reshape(faces_data,1,[]),'uint8'),3*4,[]); 57 | faces_data = [uint32(ones(1,size(faces,2))*3); faces_data]; 58 | fwrite(fid, faces_data,'uint8'); 59 | 60 | fclose(fid); 61 | -------------------------------------------------------------------------------- /depth-tsdf/utils.hpp: -------------------------------------------------------------------------------- 1 | // --------------------------------------------------------- 2 | // Author: Andy Zeng, Princeton University, 2016 3 | // --------------------------------------------------------- 4 | 5 | #include 6 | #include 7 | 8 | // Compute surface points from TSDF voxel grid and save points to point cloud file 9 | void SaveVoxelGrid2SurfacePointCloud(const std::string &file_name, int voxel_grid_dim_x, int voxel_grid_dim_y, int voxel_grid_dim_z, 10 | float voxel_size, float voxel_grid_origin_x, float voxel_grid_origin_y, float voxel_grid_origin_z, 11 | float * voxel_grid_TSDF) { 12 | 13 | // Count total number of points in point cloud 14 | int num_pts = 0; 15 | for (int i = 0; i < voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z; i++) 16 | if (voxel_grid_TSDF[i] == 1.0f) 17 | num_pts++; 18 | 19 | // Create header for .ply file 20 | FILE *fp = fopen(file_name.c_str(), "w"); 21 | fprintf(fp, "ply\n"); 22 | fprintf(fp, "format binary_little_endian 1.0\n"); 23 | fprintf(fp, "element vertex %d\n", num_pts); 24 | fprintf(fp, "property float x\n"); 25 | fprintf(fp, "property float y\n"); 26 | fprintf(fp, "property float z\n"); 27 | fprintf(fp, "end_header\n"); 28 | 29 | // Create point cloud content for ply file 30 | for (int i = 0; i < voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z; i++) { 31 | 32 | // If TSDF value of voxel is less than some threshold, add voxel coordinates to point cloud 33 | if (voxel_grid_TSDF[i] == 1.0f) { 34 | 35 | // Compute voxel indices in int for higher positive number range 36 | int z = floor(i / (voxel_grid_dim_x * voxel_grid_dim_y)); 37 | int y = floor((i - (z * voxel_grid_dim_x * voxel_grid_dim_y)) / voxel_grid_dim_x); 38 | int x = i - (z * voxel_grid_dim_x * voxel_grid_dim_y) - (y * voxel_grid_dim_x); 39 | 40 | // Convert voxel indices to float, and save coordinates to ply file 41 | float pt_base_x = voxel_grid_origin_x + (float) x * voxel_size; 42 | float pt_base_y = voxel_grid_origin_y + (float) y * voxel_size; 43 | float pt_base_z = voxel_grid_origin_z + (float) z * voxel_size; 44 | fwrite(&pt_base_x, sizeof(float), 1, fp); 45 | fwrite(&pt_base_y, sizeof(float), 1, fp); 46 | fwrite(&pt_base_z, sizeof(float), 1, fp); 47 | } 48 | } 49 | fclose(fp); 50 | } 51 | 52 | // Load an M x N matrix from a text file (numbers delimited by spaces/tabs) 53 | // Return the matrix as a float vector of the matrix in row-major order 54 | std::vector LoadMatrixFromFile(std::string filename, int M, int N) { 55 | std::vector matrix; 56 | FILE *fp = fopen(filename.c_str(), "r"); 57 | for (int i = 0; i < M * N; i++) { 58 | float tmp; 59 | int iret = fscanf(fp, "%f", &tmp); 60 | matrix.push_back(tmp); 61 | } 62 | fclose(fp); 63 | return matrix; 64 | } 65 | 66 | // Read a depth image with size H x W and save the depth values (in meters) into a float array (in row-major order) 67 | // The depth image file is assumed to be in 16-bit PNG format, depth in millimeters 68 | void ReadDepth(std::string filename, int H, int W, float * depth) { 69 | cv::Mat depth_mat = cv::imread(filename, -1); 70 | if (depth_mat.empty()) { 71 | std::cout << "Error: depth image file not read!" << std::endl; 72 | cv::waitKey(0); 73 | } 74 | for (int r = 0; r < H; ++r) 75 | for (int c = 0; c < W; ++c) { 76 | depth[r * W + c] = (float)(depth_mat.at(r, c)) / 1000.0f; 77 | if (depth[r * W + c] > 6.0f) // Only consider depth < 6m 78 | depth[r * W + c] = 0; 79 | } 80 | } 81 | 82 | // 4x4 matrix multiplication (matrices are stored as float arrays in row-major order) 83 | void multiply_matrix(const float m1[16], const float m2[16], float mOut[16]) { 84 | mOut[0] = m1[0] * m2[0] + m1[1] * m2[4] + m1[2] * m2[8] + m1[3] * m2[12]; 85 | mOut[1] = m1[0] * m2[1] + m1[1] * m2[5] + m1[2] * m2[9] + m1[3] * m2[13]; 86 | mOut[2] = m1[0] * m2[2] + m1[1] * m2[6] + m1[2] * m2[10] + m1[3] * m2[14]; 87 | mOut[3] = m1[0] * m2[3] + m1[1] * m2[7] + m1[2] * m2[11] + m1[3] * m2[15]; 88 | 89 | mOut[4] = m1[4] * m2[0] + m1[5] * m2[4] + m1[6] * m2[8] + m1[7] * m2[12]; 90 | mOut[5] = m1[4] * m2[1] + m1[5] * m2[5] + m1[6] * m2[9] + m1[7] * m2[13]; 91 | mOut[6] = m1[4] * m2[2] + m1[5] * m2[6] + m1[6] * m2[10] + m1[7] * m2[14]; 92 | mOut[7] = m1[4] * m2[3] + m1[5] * m2[7] + m1[6] * m2[11] + m1[7] * m2[15]; 93 | 94 | mOut[8] = m1[8] * m2[0] + m1[9] * m2[4] + m1[10] * m2[8] + m1[11] * m2[12]; 95 | mOut[9] = m1[8] * m2[1] + m1[9] * m2[5] + m1[10] * m2[9] + m1[11] * m2[13]; 96 | mOut[10] = m1[8] * m2[2] + m1[9] * m2[6] + m1[10] * m2[10] + m1[11] * m2[14]; 97 | mOut[11] = m1[8] * m2[3] + m1[9] * m2[7] + m1[10] * m2[11] + m1[11] * m2[15]; 98 | 99 | mOut[12] = m1[12] * m2[0] + m1[13] * m2[4] + m1[14] * m2[8] + m1[15] * m2[12]; 100 | mOut[13] = m1[12] * m2[1] + m1[13] * m2[5] + m1[14] * m2[9] + m1[15] * m2[13]; 101 | mOut[14] = m1[12] * m2[2] + m1[13] * m2[6] + m1[14] * m2[10] + m1[15] * m2[14]; 102 | mOut[15] = m1[12] * m2[3] + m1[13] * m2[7] + m1[14] * m2[11] + m1[15] * m2[15]; 103 | } 104 | 105 | // 4x4 matrix inversion (matrices are stored as float arrays in row-major order) 106 | bool invert_matrix(const float m[16], float invOut[16]) { 107 | float inv[16], det; 108 | int i; 109 | inv[0] = m[5] * m[10] * m[15] - 110 | m[5] * m[11] * m[14] - 111 | m[9] * m[6] * m[15] + 112 | m[9] * m[7] * m[14] + 113 | m[13] * m[6] * m[11] - 114 | m[13] * m[7] * m[10]; 115 | 116 | inv[4] = -m[4] * m[10] * m[15] + 117 | m[4] * m[11] * m[14] + 118 | m[8] * m[6] * m[15] - 119 | m[8] * m[7] * m[14] - 120 | m[12] * m[6] * m[11] + 121 | m[12] * m[7] * m[10]; 122 | 123 | inv[8] = m[4] * m[9] * m[15] - 124 | m[4] * m[11] * m[13] - 125 | m[8] * m[5] * m[15] + 126 | m[8] * m[7] * m[13] + 127 | m[12] * m[5] * m[11] - 128 | m[12] * m[7] * m[9]; 129 | 130 | inv[12] = -m[4] * m[9] * m[14] + 131 | m[4] * m[10] * m[13] + 132 | m[8] * m[5] * m[14] - 133 | m[8] * m[6] * m[13] - 134 | m[12] * m[5] * m[10] + 135 | m[12] * m[6] * m[9]; 136 | 137 | inv[1] = -m[1] * m[10] * m[15] + 138 | m[1] * m[11] * m[14] + 139 | m[9] * m[2] * m[15] - 140 | m[9] * m[3] * m[14] - 141 | m[13] * m[2] * m[11] + 142 | m[13] * m[3] * m[10]; 143 | 144 | inv[5] = m[0] * m[10] * m[15] - 145 | m[0] * m[11] * m[14] - 146 | m[8] * m[2] * m[15] + 147 | m[8] * m[3] * m[14] + 148 | m[12] * m[2] * m[11] - 149 | m[12] * m[3] * m[10]; 150 | 151 | inv[9] = -m[0] * m[9] * m[15] + 152 | m[0] * m[11] * m[13] + 153 | m[8] * m[1] * m[15] - 154 | m[8] * m[3] * m[13] - 155 | m[12] * m[1] * m[11] + 156 | m[12] * m[3] * m[9]; 157 | 158 | inv[13] = m[0] * m[9] * m[14] - 159 | m[0] * m[10] * m[13] - 160 | m[8] * m[1] * m[14] + 161 | m[8] * m[2] * m[13] + 162 | m[12] * m[1] * m[10] - 163 | m[12] * m[2] * m[9]; 164 | 165 | inv[2] = m[1] * m[6] * m[15] - 166 | m[1] * m[7] * m[14] - 167 | m[5] * m[2] * m[15] + 168 | m[5] * m[3] * m[14] + 169 | m[13] * m[2] * m[7] - 170 | m[13] * m[3] * m[6]; 171 | 172 | inv[6] = -m[0] * m[6] * m[15] + 173 | m[0] * m[7] * m[14] + 174 | m[4] * m[2] * m[15] - 175 | m[4] * m[3] * m[14] - 176 | m[12] * m[2] * m[7] + 177 | m[12] * m[3] * m[6]; 178 | 179 | inv[10] = m[0] * m[5] * m[15] - 180 | m[0] * m[7] * m[13] - 181 | m[4] * m[1] * m[15] + 182 | m[4] * m[3] * m[13] + 183 | m[12] * m[1] * m[7] - 184 | m[12] * m[3] * m[5]; 185 | 186 | inv[14] = -m[0] * m[5] * m[14] + 187 | m[0] * m[6] * m[13] + 188 | m[4] * m[1] * m[14] - 189 | m[4] * m[2] * m[13] - 190 | m[12] * m[1] * m[6] + 191 | m[12] * m[2] * m[5]; 192 | 193 | inv[3] = -m[1] * m[6] * m[11] + 194 | m[1] * m[7] * m[10] + 195 | m[5] * m[2] * m[11] - 196 | m[5] * m[3] * m[10] - 197 | m[9] * m[2] * m[7] + 198 | m[9] * m[3] * m[6]; 199 | 200 | inv[7] = m[0] * m[6] * m[11] - 201 | m[0] * m[7] * m[10] - 202 | m[4] * m[2] * m[11] + 203 | m[4] * m[3] * m[10] + 204 | m[8] * m[2] * m[7] - 205 | m[8] * m[3] * m[6]; 206 | 207 | inv[11] = -m[0] * m[5] * m[11] + 208 | m[0] * m[7] * m[9] + 209 | m[4] * m[1] * m[11] - 210 | m[4] * m[3] * m[9] - 211 | m[8] * m[1] * m[7] + 212 | m[8] * m[3] * m[5]; 213 | 214 | inv[15] = m[0] * m[5] * m[10] - 215 | m[0] * m[6] * m[9] - 216 | m[4] * m[1] * m[10] + 217 | m[4] * m[2] * m[9] + 218 | m[8] * m[1] * m[6] - 219 | m[8] * m[2] * m[5]; 220 | 221 | det = m[0] * inv[0] + m[1] * inv[4] + m[2] * inv[8] + m[3] * inv[12]; 222 | 223 | if (det == 0) 224 | return false; 225 | 226 | det = 1.0 / det; 227 | 228 | for (i = 0; i < 16; i++) 229 | invOut[i] = inv[i] * det; 230 | 231 | return true; 232 | } 233 | 234 | void FatalError(const int lineNumber = 0) { 235 | std::cerr << "FatalError"; 236 | if (lineNumber != 0) std::cerr << " at LINE " << lineNumber; 237 | std::cerr << ". Program Terminated." << std::endl; 238 | cudaDeviceReset(); 239 | exit(EXIT_FAILURE); 240 | } 241 | 242 | void checkCUDA(const int lineNumber, cudaError_t status) { 243 | if (status != cudaSuccess) { 244 | std::cerr << "CUDA failure at LINE " << lineNumber << ": " << status << std::endl; 245 | FatalError(); 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from config_test import cfg_test 5 | from model import FCR_aGAN 6 | from util import DataProcess, scene_model_id_pair, onehot, scene_model_id_pair_test 7 | from sklearn.metrics import average_precision_score 8 | import copy 9 | 10 | 11 | def evaluate(batch_size, checknum, mode): 12 | 13 | n_vox = cfg_test.CONST.N_VOX 14 | dim = cfg_test.NET.DIM 15 | vox_shape = [n_vox[0], n_vox[1], n_vox[2], dim[4]] 16 | dim_z = cfg_test.NET.DIM_Z 17 | start_vox_size = cfg_test.NET.START_VOX 18 | kernel = cfg_test.NET.KERNEL 19 | stride = cfg_test.NET.STRIDE 20 | freq = cfg_test.CHECK_FREQ 21 | refine_ch = cfg_test.NET.REFINE_CH 22 | refine_kernel = cfg_test.NET.REFINE_KERNEL 23 | 24 | save_path = cfg_test.DIR.EVAL_PATH 25 | chckpt_path = cfg_test.DIR.CHECK_PT_PATH + str( 26 | checknum) #+ '-' + str(checknum * freq) 27 | 28 | fcr_agan_model = FCR_aGAN( 29 | batch_size=batch_size, 30 | vox_shape=vox_shape, 31 | dim_z=dim_z, 32 | dim=dim, 33 | start_vox_size=start_vox_size, 34 | kernel=kernel, 35 | stride=stride, 36 | refine_ch=refine_ch, 37 | refine_kernel=refine_kernel, 38 | ) 39 | 40 | 41 | Z_tf, z_enc_tf, vox_tf, vox_gen_tf, vox_gen_decode_tf, vox_refine_dec_tf, vox_refine_gen_tf,\ 42 | recons_loss_tf, code_encode_loss_tf, gen_loss_tf, discrim_loss_tf, recons_loss_refine_tfs, gen_loss_refine_tf, discrim_loss_refine_tf,\ 43 | cost_enc_tf, cost_code_tf, cost_gen_tf, cost_discrim_tf, cost_gen_ref_tf, cost_discrim_ref_tf, summary_tf,\ 44 | tsdf_tf = fcr_agan_model.build_model() 45 | """ 46 | z_enc_dep_tf, dep_tf, vox_gen_decode_dep_tf,\ 47 | recons_dep_loss_tf, code_encode_dep_loss_tf, gen_dep_loss_tf, discrim_dep_loss_tf,\ 48 | cost_enc_dep_tf, cost_code_dep_tf, cost_gen_dep_tf, cost_discrim_dep_tf, cost_code_compare_tf,\ 49 | """ 50 | Z_tf_sample, vox_tf_sample = fcr_agan_model.samples_generator( 51 | visual_size=batch_size) 52 | sample_vox_tf, sample_refine_vox_tf = fcr_agan_model.refine_generator( 53 | visual_size=batch_size) 54 | sess = tf.InteractiveSession() 55 | saver = tf.train.Saver() 56 | 57 | # Restore variables from disk. 58 | saver.restore(sess, chckpt_path) 59 | 60 | print("...Weights restored.") 61 | 62 | if mode == 'recons': 63 | #reconstruction and generation from normal distribution evaluation 64 | #generator from random distribution 65 | for i in np.arange(batch_size): 66 | Z_np_sample = np.random.normal( 67 | size=(1, start_vox_size[0], start_vox_size[1], 68 | start_vox_size[2], dim_z)).astype(np.float32) 69 | if i == 0: 70 | Z_var_np_sample = Z_np_sample 71 | else: 72 | Z_var_np_sample = np.concatenate( 73 | (Z_var_np_sample, Z_np_sample), axis=0) 74 | np.save(save_path + '/sample_z.npy', Z_var_np_sample) 75 | 76 | generated_voxs_fromrand = sess.run( 77 | vox_tf_sample, feed_dict={Z_tf_sample: Z_var_np_sample}) 78 | vox_models_cat = np.argmax(generated_voxs_fromrand, axis=4) 79 | np.save(save_path + '/generate.npy', vox_models_cat) 80 | 81 | refined_voxs_fromrand = sess.run( 82 | sample_refine_vox_tf, 83 | feed_dict={sample_vox_tf: generated_voxs_fromrand}) 84 | vox_models_cat = np.argmax(refined_voxs_fromrand, axis=4) 85 | np.save(save_path + '/generate_refine.npy', vox_models_cat) 86 | 87 | #evaluation for reconstruction 88 | voxel_test, tsdf_test, num = scene_model_id_pair_test( 89 | dataset_portion=cfg_test.TRAIN.DATASET_PORTION) 90 | num = voxel_test.shape[0] 91 | print("test voxels loaded") 92 | for i in np.arange(int(num / batch_size)): 93 | batch_voxel_test = voxel_test[i * batch_size:i * batch_size + 94 | batch_size] 95 | # depth--start 96 | """ 97 | batch_depth_test = depth_test[i*batch_size:i*batch_size+batch_size] 98 | """ 99 | # depth--end 100 | batch_tsdf_test = tsdf_test[i * batch_size:i * batch_size + 101 | batch_size] 102 | 103 | batch_generated_voxs, batch_enc_Z = sess.run( 104 | [vox_gen_decode_tf, z_enc_tf], 105 | feed_dict={tsdf_tf: batch_tsdf_test}) 106 | # depth--start 107 | """ 108 | batch_dep_generated_voxs, batch_enc_dep_Z = sess.run( 109 | [vox_gen_decode_dep_tf, z_enc_dep_tf], 110 | feed_dict={dep_tf:batch_depth_test}) 111 | """ 112 | # depth--end 113 | batch_refined_vox = sess.run( 114 | sample_refine_vox_tf, 115 | feed_dict={sample_vox_tf: batch_generated_voxs}) 116 | 117 | if i == 0: 118 | generated_voxs = batch_generated_voxs 119 | # generated_deps = batch_dep_generated_voxs 120 | refined_voxs = batch_refined_vox 121 | enc_Z = batch_enc_Z 122 | else: 123 | generated_voxs = np.concatenate( 124 | (generated_voxs, batch_generated_voxs), axis=0) 125 | # generated_deps = np.concatenate((generated_deps, batch_dep_generated_voxs), axis=0) 126 | refined_voxs = np.concatenate( 127 | (refined_voxs, batch_refined_vox), axis=0) 128 | enc_Z = np.concatenate((enc_Z, batch_enc_Z), axis=0) 129 | 130 | print("forwarded") 131 | 132 | #real 133 | vox_models_cat = voxel_test 134 | np.save(save_path + '/real.npy', vox_models_cat) 135 | tsdf_models_cat = tsdf_test 136 | np.save(save_path + '/tsdf.npy', tsdf_models_cat) 137 | 138 | #decoded 139 | vox_models_cat = np.argmax(generated_voxs, axis=4) 140 | np.save(save_path + '/recons.npy', vox_models_cat) 141 | """ 142 | vox_models_cat = np.argmax(generated_deps, axis=4) 143 | np.save(save_path + '/gens_dep.npy', vox_models_cat) 144 | """ 145 | vox_models_cat = np.argmax(refined_voxs, axis=4) 146 | np.save(save_path + '/recons_refine.npy', vox_models_cat) 147 | np.save(save_path + '/decode_z.npy', enc_Z) 148 | 149 | print("voxels saved") 150 | 151 | #numerical evalutation 152 | on_real = onehot(voxel_test, vox_shape[3]) 153 | on_recons = onehot(np.argmax(generated_voxs, axis=4), vox_shape[3]) 154 | # on_gens_dep = onehot(np.argmax(generated_deps, axis=4),vox_shape[3]) 155 | 156 | #calc_IoU 157 | IoU_class = np.zeros([vox_shape[3] + 1]) 158 | for class_n in np.arange(vox_shape[3]): 159 | on_recons_ = on_recons[:, :, :, :, class_n] 160 | on_real_ = on_real[:, :, :, :, class_n] 161 | mother = np.sum(np.add(on_recons_, on_real_), (1, 2, 3)) 162 | child = np.sum(np.multiply(on_recons_, on_real_), (1, 2, 3)) 163 | count = 0 164 | IoU_element = 0 165 | for i in np.arange(num): 166 | if mother[i] != 0: 167 | IoU_element += child[i] / mother[i] 168 | count += 1 169 | IoU_calc = np.round(IoU_element / count, 3) 170 | IoU_class[class_n] = IoU_calc 171 | print 'IoU class ' + str(class_n) + '=' + str(IoU_calc) 172 | 173 | on_recons_ = on_recons[:, :, :, :, 1:vox_shape[3]] 174 | on_real_ = on_real[:, :, :, :, 1:vox_shape[3]] 175 | mother = np.sum(np.add(on_recons_, on_real_), (1, 2, 3, 4)) 176 | child = np.sum(np.multiply(on_recons_, on_real_), (1, 2, 3, 4)) 177 | count = 0 178 | IoU_element = 0 179 | for i in np.arange(num): 180 | if mother[i] != 0: 181 | IoU_element += child[i] / mother[i] 182 | count += 1 183 | IoU_calc = np.round(IoU_element / count, 3) 184 | IoU_class[vox_shape[3]] = IoU_calc 185 | print 'IoU all =' + str(IoU_calc) 186 | np.savetxt(save_path + '/IoU.csv', IoU_class, delimiter=",") 187 | 188 | #calc_AP 189 | AP_class = np.zeros([vox_shape[3] + 1]) 190 | for class_n in np.arange(vox_shape[3]): 191 | on_recons_ = generated_voxs[:, :, :, :, class_n] 192 | on_real_ = on_real[:, :, :, :, class_n] 193 | 194 | AP = 0. 195 | for i in np.arange(num): 196 | y_true = np.reshape(on_real_[i], [-1]) 197 | y_scores = np.reshape(on_recons_[i], [-1]) 198 | if np.sum(y_true) > 0.: 199 | AP += average_precision_score(y_true, y_scores) 200 | AP = np.round(AP / num, 3) 201 | AP_class[class_n] = AP 202 | print 'AP class ' + str(class_n) + '=' + str(AP) 203 | 204 | on_recons_ = generated_voxs[:, :, :, :, 1:vox_shape[3]] 205 | on_real_ = on_real[:, :, :, :, 1:vox_shape[3]] 206 | AP = 0. 207 | for i in np.arange(num): 208 | y_true = np.reshape(on_real_[i], [-1]) 209 | y_scores = np.reshape(on_recons_[i], [-1]) 210 | if np.sum(y_true) > 0.: 211 | AP += average_precision_score(y_true, y_scores) 212 | 213 | AP = np.round(AP / num, 3) 214 | AP_class[vox_shape[3]] = AP 215 | print 'AP all =' + str(AP) 216 | np.savetxt(save_path + '/AP.csv', AP_class, delimiter=",") 217 | 218 | #Refine 219 | #calc_IoU 220 | on_recons = onehot(np.argmax(refined_voxs, axis=4), vox_shape[3]) 221 | 222 | IoU_class = np.zeros([vox_shape[3] + 1]) 223 | for class_n in np.arange(vox_shape[3]): 224 | on_recons_ = on_recons[:, :, :, :, class_n] 225 | on_real_ = on_real[:, :, :, :, class_n] 226 | mother = np.sum(np.add(on_recons_, on_real_), (1, 2, 3)) 227 | child = np.sum(np.multiply(on_recons_, on_real_), (1, 2, 3)) 228 | count = 0 229 | IoU_element = 0 230 | for i in np.arange(num): 231 | if mother[i] != 0: 232 | IoU_element += child[i] / mother[i] 233 | count += 1 234 | IoU_calc = np.round(IoU_element / count, 3) 235 | IoU_class[class_n] = IoU_calc 236 | print 'IoU class ' + str(class_n) + '=' + str(IoU_calc) 237 | 238 | on_recons_ = on_recons[:, :, :, :, 1:vox_shape[3]] 239 | on_real_ = on_real[:, :, :, :, 1:vox_shape[3]] 240 | mother = np.sum(np.add(on_recons_, on_real_), (1, 2, 3, 4)) 241 | child = np.sum(np.multiply(on_recons_, on_real_), (1, 2, 3, 4)) 242 | count = 0 243 | IoU_element = 0 244 | for i in np.arange(num): 245 | if mother[i] != 0: 246 | IoU_element += child[i] / mother[i] 247 | count += 1 248 | IoU_calc = np.round(IoU_element / count, 3) 249 | IoU_class[vox_shape[3]] = IoU_calc 250 | print 'IoU all =' + str(IoU_calc) 251 | np.savetxt(save_path + '/IoU_refine.csv', IoU_class, delimiter=",") 252 | 253 | #calc_AP 254 | AP_class = np.zeros([vox_shape[3] + 1]) 255 | for class_n in np.arange(vox_shape[3]): 256 | on_recons_ = refined_voxs[:, :, :, :, class_n] 257 | on_real_ = on_real[:, :, :, :, class_n] 258 | 259 | AP = 0. 260 | for i in np.arange(num): 261 | y_true = np.reshape(on_real_[i], [-1]) 262 | y_scores = np.reshape(on_recons_[i], [-1]) 263 | if np.sum(y_true) > 0.: 264 | AP += average_precision_score(y_true, y_scores) 265 | AP = np.round(AP / num, 3) 266 | AP_class[class_n] = AP 267 | print 'AP class ' + str(class_n) + '=' + str(AP) 268 | 269 | on_recons_ = refined_voxs[:, :, :, :, 1:vox_shape[3]] 270 | on_real_ = on_real[:, :, :, :, 1:vox_shape[3]] 271 | AP = 0. 272 | for i in np.arange(num): 273 | y_true = np.reshape(on_real_[i], [-1]) 274 | y_scores = np.reshape(on_recons_[i], [-1]) 275 | if np.sum(y_true) > 0.: 276 | AP += average_precision_score(y_true, y_scores) 277 | 278 | AP = np.round(AP / num, 3) 279 | AP_class[vox_shape[3]] = AP 280 | print 'AP all =' + str(AP) 281 | np.savetxt(save_path + '/AP_refine.csv', AP_class, delimiter=",") 282 | 283 | #interpolation evaluation 284 | if mode == 'interpolate': 285 | interpolate_num = 30 286 | #interpolatioin latent vectores 287 | decode_z = np.load(save_path + '/decode_z.npy') 288 | decode_z = decode_z[:batch_size] 289 | for l in np.arange(batch_size): 290 | for r in np.arange(batch_size): 291 | if l != r: 292 | print l, r 293 | base_num_left = l 294 | base_num_right = r 295 | left = np.reshape(decode_z[base_num_left], [ 296 | 1, start_vox_size[0], start_vox_size[1], 297 | start_vox_size[2], dim_z 298 | ]) 299 | right = np.reshape(decode_z[base_num_right], [ 300 | 1, start_vox_size[0], start_vox_size[1], 301 | start_vox_size[2], dim_z 302 | ]) 303 | 304 | duration = (right - left) / (interpolate_num - 1) 305 | if base_num_left == 0: 306 | Z_np_sample = decode_z[1:] 307 | elif base_num_left == batch_size - 1: 308 | Z_np_sample = decode_z[:batch_size - 1] 309 | else: 310 | Z_np_sample_before = np.reshape( 311 | decode_z[:base_num_left], [ 312 | base_num_left, start_vox_size[0], 313 | start_vox_size[1], start_vox_size[2], dim_z 314 | ]) 315 | Z_np_sample_after = np.reshape( 316 | decode_z[base_num_left + 1:], [ 317 | batch_size - base_num_left - 1, 318 | start_vox_size[0], start_vox_size[1], 319 | start_vox_size[2], dim_z 320 | ]) 321 | Z_np_sample = np.concatenate( 322 | [Z_np_sample_before, Z_np_sample_after], axis=0) 323 | for i in np.arange(interpolate_num): 324 | if i == 0: 325 | Z = copy.copy(left) 326 | interpolate_z = copy.copy(Z) 327 | else: 328 | Z = Z + duration 329 | interpolate_z = np.concatenate([interpolate_z, Z], 330 | axis=0) 331 | Z_var_np_sample = np.concatenate([Z, Z_np_sample], 332 | axis=0) 333 | generated_voxs_fromrand = sess.run( 334 | vox_tf_sample, 335 | feed_dict={Z_tf_sample: Z_var_np_sample}) 336 | refined_voxs_fromrand = sess.run( 337 | sample_refine_vox_tf, 338 | feed_dict={sample_vox_tf: generated_voxs_fromrand}) 339 | interpolate_vox = np.reshape( 340 | refined_voxs_fromrand[0], [ 341 | 1, vox_shape[0], vox_shape[1], vox_shape[2], 342 | vox_shape[3] 343 | ]) 344 | if i == 0: 345 | generated_voxs = interpolate_vox 346 | else: 347 | generated_voxs = np.concatenate( 348 | [generated_voxs, interpolate_vox], axis=0) 349 | 350 | np.save( 351 | save_path + '/interpolation_z' + str(l) + '-' + str(r) 352 | + '.npy', interpolate_z) 353 | 354 | vox_models_cat = np.argmax(generated_voxs, axis=4) 355 | np.save( 356 | save_path + '/interpolation' + str(l) + '-' + str(r) + 357 | '.npy', vox_models_cat) 358 | print("voxels saved") 359 | 360 | #add noise evaluation 361 | if mode == 'noise': 362 | decode_z = np.load(save_path + '/decode_z.npy') 363 | decode_z = decode_z[:batch_size] 364 | noise_num = 10 365 | for base_num in np.arange(batch_size): 366 | print base_num 367 | base = np.reshape(decode_z[base_num], [ 368 | 1, start_vox_size[0], start_vox_size[1], start_vox_size[2], 369 | dim_z 370 | ]) 371 | eps = np.random.normal(size=(noise_num - 1, 372 | dim_z)).astype(np.float32) 373 | 374 | if base_num == 0: 375 | Z_np_sample = decode_z[1:] 376 | elif base_num == batch_size - 1: 377 | Z_np_sample = decode_z[:batch_size - 1] 378 | else: 379 | Z_np_sample_before = np.reshape(decode_z[:base_num], [ 380 | base_num, start_vox_size[0], start_vox_size[1], 381 | start_vox_size[2], dim_z 382 | ]) 383 | Z_np_sample_after = np.reshape(decode_z[base_num + 1:], [ 384 | batch_size - base_num - 1, start_vox_size[0], 385 | start_vox_size[1], start_vox_size[2], dim_z 386 | ]) 387 | Z_np_sample = np.concatenate( 388 | [Z_np_sample_before, Z_np_sample_after], axis=0) 389 | 390 | for c in np.arange(start_vox_size[0]): 391 | for l in np.arange(start_vox_size[1]): 392 | for d in np.arange(start_vox_size[2]): 393 | 394 | for i in np.arange(noise_num): 395 | if i == 0: 396 | Z = copy.copy(base) 397 | noise_z = copy.copy(Z) 398 | else: 399 | Z = copy.copy(base) 400 | Z[0, c, l, d, :] += eps[i - 1] 401 | noise_z = np.concatenate([noise_z, Z], axis=0) 402 | Z_var_np_sample = np.concatenate([Z, Z_np_sample], 403 | axis=0) 404 | generated_voxs_fromrand = sess.run( 405 | vox_tf_sample, 406 | feed_dict={Z_tf_sample: Z_var_np_sample}) 407 | refined_voxs_fromrand = sess.run( 408 | sample_refine_vox_tf, 409 | feed_dict={ 410 | sample_vox_tf: generated_voxs_fromrand 411 | }) 412 | noise_vox = np.reshape(refined_voxs_fromrand[0], [ 413 | 1, vox_shape[0], vox_shape[1], vox_shape[2], 414 | vox_shape[3] 415 | ]) 416 | if i == 0: 417 | generated_voxs = noise_vox 418 | else: 419 | generated_voxs = np.concatenate( 420 | [generated_voxs, noise_vox], axis=0) 421 | 422 | np.save( 423 | save_path + '/noise_z' + str(base_num) + '_' + 424 | str(c) + str(l) + str(d) + '.npy', noise_z) 425 | 426 | vox_models_cat = np.argmax(generated_voxs, axis=4) 427 | np.save( 428 | save_path + '/noise' + str(base_num) + '_' + str(c) 429 | + str(l) + str(d) + '.npy', vox_models_cat) 430 | 431 | print("voxels saved") 432 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | from train import train 5 | from config import cfg 6 | from config_test import cfg_test 7 | import tensorflow as tf 8 | 9 | flags = tf.app.flags 10 | flags.DEFINE_integer("epoch", cfg.TRAIN.NUM_EPOCH, 11 | "Epoch to train [15]") #n_epochs = cfg.TRAIN.NUM_EPOCH 12 | flags.DEFINE_float("learning_rate_G", cfg.LEARNING_RATE_G, 13 | "Learning rate for Generator of adam [0.0001]" 14 | ) #learning_rate_G = cfg.LEARNING_RATE_G 15 | flags.DEFINE_float("learning_rate_D", cfg.LEARNING_RATE_D, 16 | "Learning rate for Discriminator of adam [0.0001]" 17 | ) #learning_rate_D = cfg.LEARNING_RATE_D 18 | flags.DEFINE_integer( 19 | "batch_size", cfg.CONST.BATCH_SIZE, 20 | "The size of batch voxels [100]") #batch_size = cfg.CONST.BATCH_SIZE 21 | flags.DEFINE_integer( 22 | "batch_size_test", cfg_test.CONST.BATCH_SIZE, 23 | "The size of batch voxels [100]") #batch_size = cfg.CONST.BATCH_SIZE 24 | 25 | flags.DEFINE_boolean("middle_start", False, 26 | "True for starting from the middle [False]") 27 | flags.DEFINE_integer( 28 | "ini_epoch", 0, 29 | "The number of initial epoch --if middle_start: False -> 0, True -> must assign the number [0]" 30 | ) 31 | flags.DEFINE_string( 32 | "mode", 'train', 33 | "Execute mode: train/evaluate_recons/evaluate_interpolate/evaluate_noise") 34 | flags.DEFINE_integer( 35 | "conf_epoch", 10000, 36 | "The number of confirmation epoch to evaluate interpolate, reconstruction etc [100]" 37 | ) 38 | 39 | FLAGS = flags.FLAGS 40 | 41 | 42 | def main(): 43 | if not os.path.exists(cfg.DIR.CHECK_POINT_PATH): 44 | os.makedirs(cfg.DIR.CHECK_POINT_PATH) 45 | if not os.path.exists(cfg.DIR.TRAIN_OBJ_PATH): 46 | os.makedirs(cfg.DIR.TRAIN_OBJ_PATH) 47 | if not os.path.exists(cfg.DIR.EVAL_PATH): 48 | os.makedirs(cfg.DIR.EVAL_PATH) 49 | if FLAGS.middle_start: 50 | print 'middle_start' 51 | 52 | if FLAGS.mode == 'train': 53 | train(FLAGS.epoch, FLAGS.learning_rate_G, FLAGS.learning_rate_D, 54 | FLAGS.batch_size, FLAGS.middle_start, FLAGS.ini_epoch) 55 | elif FLAGS.mode == 'evaluate_recons' or 'evaluate_interpolate' or 'evaluate_noise': 56 | from evaluate import evaluate 57 | if FLAGS.mode == 'evaluate_recons': 58 | mode = 'recons' 59 | elif FLAGS.mode == 'evaluate_interpolate': 60 | mode = 'interpolate' 61 | else: 62 | mode = 'noise' 63 | evaluate(FLAGS.batch_size_test, FLAGS.conf_epoch, mode) 64 | 65 | 66 | if __name__ == '__main__': 67 | #tf.app.run() 68 | main() 69 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from config import cfg 4 | import tensorflow as tf 5 | from util import * 6 | 7 | 8 | def batchnormalize(X, eps=1e-5, g=None, b=None, batch_size=10): 9 | if X.get_shape().ndims == 5: 10 | if batch_size == 1: 11 | mean = 0 12 | std = 1 - eps 13 | else: 14 | mean = tf.reduce_mean(X, [0, 1, 2, 3]) 15 | std = tf.reduce_mean(tf.square(X - mean), [0, 1, 2, 3]) 16 | X = (X - mean) / tf.sqrt(std + eps) 17 | 18 | if g is not None and b is not None: 19 | g = tf.reshape(g, [1, 1, 1, 1, -1]) 20 | b = tf.reshape(b, [1, 1, 1, 1, -1]) 21 | X = X * g + b 22 | 23 | # depth--start 24 | elif X.get_shape().ndims == 4: 25 | if batch_size == 1: 26 | mean = 0 27 | std = 1 - eps 28 | else: 29 | mean = tf.reduce_mean(X, [0, 1, 2]) 30 | std = tf.reduce_mean(tf.square(X - mean), [0, 1, 2]) 31 | X = (X - mean) / tf.sqrt(std + eps) 32 | 33 | if g is not None and b is not None: 34 | g = tf.reshape(g, [1, 1, 1, -1]) 35 | b = tf.reshape(b, [1, 1, 1, -1]) 36 | X = X * g + b 37 | # depth--end 38 | 39 | elif X.get_shape().ndims == 2: 40 | if batch_size == 1: 41 | mean = 0 42 | std = 1 - eps 43 | else: 44 | mean = tf.reduce_mean(X, 0) 45 | std = tf.reduce_mean(tf.square(X - mean), 0) 46 | X = (X - mean) / tf.sqrt(std + eps) #std 47 | 48 | if g is not None and b is not None: 49 | g = tf.reshape(g, [1, -1]) 50 | b = tf.reshape(b, [1, -1]) 51 | X = X * g + b 52 | 53 | else: 54 | raise NotImplementedError 55 | 56 | return X 57 | 58 | 59 | def layernormalize(X, eps=1e-5, g=None, b=None): 60 | if X.get_shape().ndims == 5: 61 | mean, std = tf.nn.moments(X, [1, 2, 3, 4], keep_dims=True) 62 | X = (X - mean) / tf.sqrt(std + eps) 63 | 64 | if g is not None and b is not None: 65 | X = X * g + b 66 | 67 | elif X.get_shape().ndims == 2: 68 | mean = tf.reduce_mean(X, 1) 69 | std = tf.reduce_mean(tf.square(X - mean), 1) 70 | X = (X - mean) / tf.sqrt(std + eps) #std 71 | 72 | if g is not None and b is not None: 73 | X = X * g + b 74 | 75 | else: 76 | raise NotImplementedError 77 | 78 | return X 79 | 80 | 81 | def lrelu(X, leak=0.2): 82 | return tf.maximum(X, leak * X) 83 | 84 | 85 | def softmax(X, batch_size, vox_shape): 86 | c = tf.reduce_max(X, 4) 87 | c = tf.reshape(c, 88 | [batch_size, vox_shape[0], vox_shape[1], vox_shape[2], 1]) 89 | exp = tf.exp(tf.subtract(X, c)) 90 | expsum = tf.reduce_sum(exp, 4) 91 | expsum = tf.reshape( 92 | expsum, [batch_size, vox_shape[0], vox_shape[1], vox_shape[2], 1]) 93 | soft = tf.div(exp, expsum) 94 | 95 | return soft 96 | 97 | 98 | class FCR_aGAN(): 99 | def __init__( 100 | self, 101 | batch_size=20, 102 | vox_shape=[80, 48, 80, 12], 103 | dep_shape=[320, 240, 1], 104 | dim_z=16, 105 | dim=[512, 256, 128, 64, 12], 106 | start_vox_size=[5, 3, 5], 107 | kernel=[[5, 5, 5, 5, 5], [3, 3, 3, 3, 3], [5, 5, 5, 5, 5]], 108 | stride=[1, 2, 2, 2, 1], 109 | dim_code=750, 110 | refine_ch=32, 111 | refine_kernel=3, 112 | ): 113 | 114 | self.batch_size = batch_size 115 | self.vox_shape = vox_shape 116 | # depth--start 117 | self.dep_shape = dep_shape 118 | # depth--end 119 | self.n_class = vox_shape[3] 120 | self.dim_z = dim_z 121 | self.dim_W1 = dim[0] 122 | self.dim_W2 = dim[1] 123 | self.dim_W3 = dim[2] 124 | self.dim_W4 = dim[3] 125 | self.dim_W5 = dim[4] 126 | self.start_vox_size = np.array(start_vox_size) 127 | self.kernel = np.array(kernel) 128 | self.kernel1 = self.kernel[:, 0] 129 | self.kernel2 = self.kernel[:, 1] 130 | self.kernel3 = self.kernel[:, 2] 131 | self.kernel4 = self.kernel[:, 3] 132 | self.kernel5 = self.kernel[:, 4] 133 | self.stride = stride 134 | # depth--start 135 | self.stride_dep = [1, 2, 2, 1] 136 | # depth--end 137 | 138 | self.lamda_recons = cfg.LAMDA_RECONS 139 | self.lamda_gamma = cfg.LAMDA_GAMMA 140 | 141 | self.dim_code = dim_code 142 | self.refine_ch = refine_ch 143 | self.refine_kernel = refine_kernel 144 | 145 | self.gen_W1 = tf.Variable( 146 | tf.random_normal([ 147 | self.dim_z * self.start_vox_size[0] * self.start_vox_size[1] * 148 | self.start_vox_size[2], self.dim_W1 * self.start_vox_size[0] * 149 | self.start_vox_size[1] * self.start_vox_size[2] 150 | ], 151 | stddev=0.02), 152 | name='gen_W1') 153 | self.gen_bn_g1 = tf.Variable( 154 | tf.random_normal([ 155 | self.dim_W1 * self.start_vox_size[0] * self.start_vox_size[1] * 156 | self.start_vox_size[2] 157 | ], 158 | mean=1.0, 159 | stddev=0.02), 160 | name='gen_bn_g1') 161 | self.gen_bn_b1 = tf.Variable( 162 | tf.zeros([ 163 | self.dim_W1 * self.start_vox_size[0] * self.start_vox_size[1] * 164 | self.start_vox_size[2] 165 | ]), 166 | name='gen_bn_b1') 167 | 168 | self.gen_W2 = tf.Variable( 169 | tf.random_normal([ 170 | self.kernel2[0], self.kernel2[1], self.kernel2[2], self.dim_W2, 171 | self.dim_W1 172 | ], 173 | stddev=0.02), 174 | name='gen_W2') 175 | self.gen_bn_g2 = tf.Variable( 176 | tf.random_normal([self.dim_W2], mean=1.0, stddev=0.02), 177 | name='gen_bn_g2') 178 | self.gen_bn_b2 = tf.Variable(tf.zeros([self.dim_W2]), name='gen_bn_b2') 179 | 180 | self.gen_W3 = tf.Variable( 181 | tf.random_normal([ 182 | self.kernel3[0], self.kernel3[1], self.kernel3[2], self.dim_W3, 183 | self.dim_W2 184 | ], 185 | stddev=0.02), 186 | name='gen_W3') 187 | self.gen_bn_g3 = tf.Variable( 188 | tf.random_normal([self.dim_W3], mean=1.0, stddev=0.02), 189 | name='gen_bn_g3') 190 | self.gen_bn_b3 = tf.Variable(tf.zeros([self.dim_W3]), name='gen_bn_b3') 191 | 192 | self.gen_W4 = tf.Variable( 193 | tf.random_normal([ 194 | self.kernel4[0], self.kernel4[1], self.kernel4[2], self.dim_W4, 195 | self.dim_W3 196 | ], 197 | stddev=0.02), 198 | name='gen_W4') 199 | self.gen_bn_g4 = tf.Variable( 200 | tf.random_normal([self.dim_W4], mean=1.0, stddev=0.02), 201 | name='gen_bn_g4') 202 | self.gen_bn_b4 = tf.Variable(tf.zeros([self.dim_W4]), name='gen_bn_b4') 203 | 204 | self.gen_W5 = tf.Variable( 205 | tf.random_normal([ 206 | self.kernel5[0], self.kernel5[1], self.kernel5[2], self.dim_W5, 207 | self.dim_W4 208 | ], 209 | stddev=0.02), 210 | name='gen_W5') 211 | self.gen_bn_g5 = tf.Variable( 212 | tf.random_normal([self.dim_W5], mean=1.0, stddev=0.02), 213 | name='gen_bn_g5') 214 | self.gen_bn_b5 = tf.Variable(tf.zeros([self.dim_W5]), name='gen_bn_b5') 215 | 216 | self.encode_W1 = tf.Variable( 217 | tf.random_normal([ 218 | self.kernel5[0], self.kernel5[1], self.kernel5[2], 1, 219 | self.dim_W4 220 | ], 221 | stddev=0.02), 222 | name='encode_W1') 223 | self.encode_bn_g1 = tf.Variable( 224 | tf.random_normal([self.dim_W4], mean=1.0, stddev=0.02), 225 | name='encode_bn_g1') 226 | self.encode_bn_b1 = tf.Variable( 227 | tf.zeros([self.dim_W4]), name='encode_bn_b1') 228 | 229 | self.encode_W2 = tf.Variable( 230 | tf.random_normal([ 231 | self.kernel4[0], self.kernel4[1], self.kernel4[2], self.dim_W4, 232 | self.dim_W3 233 | ], 234 | stddev=0.02), 235 | name='encode_W2') 236 | self.encode_bn_g2 = tf.Variable( 237 | tf.random_normal([self.dim_W3], mean=1.0, stddev=0.02), 238 | name='encode_bn_g2') 239 | self.encode_bn_b2 = tf.Variable( 240 | tf.zeros([self.dim_W3]), name='encode_bn_b2') 241 | 242 | self.encode_W3 = tf.Variable( 243 | tf.random_normal([ 244 | self.kernel3[0], self.kernel3[1], self.kernel3[2], self.dim_W3, 245 | self.dim_W2 246 | ], 247 | stddev=0.02), 248 | name='encode_W3') 249 | self.encode_bn_g3 = tf.Variable( 250 | tf.random_normal([self.dim_W2], mean=1.0, stddev=0.02), 251 | name='encode_bn_g3') 252 | self.encode_bn_b3 = tf.Variable( 253 | tf.zeros([self.dim_W2]), name='encode_bn_b3') 254 | 255 | self.encode_W4 = tf.Variable( 256 | tf.random_normal([ 257 | self.kernel2[0], self.kernel2[1], self.kernel2[2], self.dim_W2, 258 | self.dim_W1 259 | ], 260 | stddev=0.02), 261 | name='encode_W4') 262 | self.encode_bn_g4 = tf.Variable( 263 | tf.random_normal([self.dim_W1], mean=1.0, stddev=0.02), 264 | name='encode_bn_g4') 265 | self.encode_bn_b4 = tf.Variable( 266 | tf.zeros([self.dim_W1]), name='encode_bn_b4') 267 | 268 | self.encode_W5 = tf.Variable( 269 | tf.random_normal([1, 1, 1, self.dim_W1, self.dim_z], stddev=0.02), 270 | name='encode_W5') 271 | self.encode_W5_sigma = tf.Variable( 272 | tf.random_normal([1, 1, 1, self.dim_W1, self.dim_z], stddev=0.02), 273 | name='encode_W5_sigma') 274 | 275 | # depth--start 276 | self.encode_dep_W1 = tf.Variable( 277 | tf.random_normal( 278 | [self.kernel5[0], self.kernel5[1], 1, self.dim_W4], 279 | stddev=0.02), 280 | name='depthproject_W1') 281 | self.encode_dep_bn_g1 = tf.Variable( 282 | tf.random_normal([self.dim_W4], mean=1.0, stddev=0.02), 283 | name='depthproject_bn_g1') 284 | self.encode_dep_bn_b1 = tf.Variable( 285 | tf.zeros([self.dim_W4]), name='depthproject_bn_b1') 286 | 287 | self.encode_dep_W2 = tf.Variable( 288 | tf.random_normal( 289 | [self.kernel4[0], self.kernel4[1], self.dim_W4, self.dim_W3], 290 | stddev=0.02), 291 | name='depthproject_W2') 292 | self.encode_dep_bn_g2 = tf.Variable( 293 | tf.random_normal([self.dim_W3], mean=1.0, stddev=0.02), 294 | name='depthproject_bn_g2') 295 | self.encode_dep_bn_b2 = tf.Variable( 296 | tf.zeros([self.dim_W3]), name='depthproject_bn_b2') 297 | 298 | self.encode_dep_W3 = tf.Variable( 299 | tf.random_normal( 300 | [self.kernel3[0], self.kernel3[1], self.dim_W3, self.dim_W2], 301 | stddev=0.02), 302 | name='depthproject_W3') 303 | self.encode_dep_bn_g3 = tf.Variable( 304 | tf.random_normal([self.dim_W2], mean=1.0, stddev=0.02), 305 | name='depthproject_bn_g3') 306 | self.encode_dep_bn_b3 = tf.Variable( 307 | tf.zeros([self.dim_W2]), name='depthproject_bn_b3') 308 | 309 | self.encode_dep_W4 = tf.Variable( 310 | tf.random_normal( 311 | [self.kernel2[0], self.kernel2[1], self.dim_W2, self.dim_W1], 312 | stddev=0.02), 313 | name='depthproject_W4') 314 | self.encode_dep_bn_g4 = tf.Variable( 315 | tf.random_normal([self.dim_W1], mean=1.0, stddev=0.02), 316 | name='depthproject_bn_g4') 317 | self.encode_dep_bn_b4 = tf.Variable( 318 | tf.zeros([self.dim_W1]), name='depthproject_bn_b4') 319 | 320 | self.encode_dep_W5 = tf.Variable( 321 | tf.random_normal( 322 | [self.kernel2[0], self.kernel2[1], self.dim_W1, self.dim_W1], 323 | stddev=0.02), 324 | name='depthproject_W5') 325 | self.encode_dep_bn_g5 = tf.Variable( 326 | tf.random_normal([self.dim_W1], mean=1.0, stddev=0.02), 327 | name='depthproject_bn_g5') 328 | self.encode_dep_bn_b5 = tf.Variable( 329 | tf.zeros([self.dim_W1]), name='depthproject_bn_b5') 330 | 331 | self.encode_dep_W6 = tf.Variable( 332 | tf.random_normal( 333 | [self.kernel2[0], self.kernel2[1], self.dim_W1, 256], 334 | stddev=0.02), 335 | name='depthproject_W6') 336 | self.encode_dep_bn_g6 = tf.Variable( 337 | tf.random_normal([256], mean=1.0, stddev=0.02), 338 | name='depthproject_bn_g6') 339 | self.encode_dep_bn_b6 = tf.Variable( 340 | tf.zeros([256]), name='depthproject_bn_b6') 341 | self.encode_dep_W7 = tf.Variable( 342 | tf.random_normal([ 343 | 5 * 4 * 256, self.start_vox_size[0] * self.start_vox_size[1] * 344 | self.start_vox_size[2] * self.dim_W1 345 | ], 346 | stddev=0.02), 347 | name='depthproject_W7') 348 | 349 | self.encode_dep_W8 = tf.Variable( 350 | tf.random_normal([1, 1, 1, self.dim_W1, self.dim_z], stddev=0.02), 351 | name='depthproject_W8') 352 | self.encode_dep_W8_sigma = tf.Variable( 353 | tf.random_normal([1, 1, 1, self.dim_W1, self.dim_z], stddev=0.02), 354 | name='depthproject_W8_sigma') 355 | # depth--end 356 | 357 | self.discrim_W1 = tf.Variable( 358 | tf.random_normal([ 359 | self.kernel5[0], self.kernel5[1], self.kernel5[2], self.dim_W5, 360 | self.dim_W4 361 | ], 362 | stddev=0.02), 363 | name='discrim_vox_W1') 364 | self.discrim_bn_g1 = tf.Variable( 365 | tf.random_normal([1], mean=1.0, stddev=0.02), 366 | name='discrim_vox_bn_g1') 367 | self.discrim_bn_b1 = tf.Variable( 368 | tf.zeros([1]), name='discrim_vox_bn_b1') 369 | 370 | self.discrim_W2 = tf.Variable( 371 | tf.random_normal([ 372 | self.kernel4[0], self.kernel4[1], self.kernel4[2], self.dim_W4, 373 | self.dim_W3 374 | ], 375 | stddev=0.02), 376 | name='discrim_vox_W2') 377 | self.discrim_bn_g2 = tf.Variable( 378 | tf.random_normal([1], mean=1.0, stddev=0.02), 379 | name='discrim_vox_bn_g2') 380 | self.discrim_bn_b2 = tf.Variable( 381 | tf.zeros([1]), name='discrim_vox_bn_b2') 382 | 383 | self.discrim_W3 = tf.Variable( 384 | tf.random_normal([ 385 | self.kernel3[0], self.kernel3[1], self.kernel3[2], self.dim_W3, 386 | self.dim_W2 387 | ], 388 | stddev=0.02), 389 | name='discrim_vox_W3') 390 | self.discrim_bn_g3 = tf.Variable( 391 | tf.random_normal([1], mean=1.0, stddev=0.02), 392 | name='discrim_vox_bn_g3') 393 | self.discrim_bn_b3 = tf.Variable( 394 | tf.zeros([1]), name='discrim_vox_bn_b3') 395 | 396 | self.discrim_W4 = tf.Variable( 397 | tf.random_normal([ 398 | self.kernel2[0], self.kernel2[1], self.kernel2[2], self.dim_W2, 399 | self.dim_W1 400 | ], 401 | stddev=0.02), 402 | name='discrim_vox_W4') 403 | self.discrim_bn_g4 = tf.Variable( 404 | tf.random_normal([1], mean=1.0, stddev=0.02), 405 | name='discrim_vox_bn_g4') 406 | self.discrim_bn_b4 = tf.Variable( 407 | tf.zeros([1]), name='discrim_vox_bn_b4') 408 | 409 | self.discrim_W5 = tf.Variable( 410 | tf.random_normal([ 411 | self.start_vox_size[0] * self.start_vox_size[1] * 412 | self.start_vox_size[2] * self.dim_W1, 1 413 | ], 414 | stddev=0.02), 415 | name='discrim_vox_W5') 416 | 417 | # depth--start 418 | self.discrim_dep_W1 = tf.Variable( 419 | tf.random_normal([ 420 | self.kernel5[0], self.kernel5[1], self.kernel5[2], self.dim_W5, 421 | self.dim_W4 422 | ], 423 | stddev=0.02), 424 | name='discrim_dep_W1') 425 | self.discrim_dep_bn_g1 = tf.Variable( 426 | tf.random_normal([1], mean=1.0, stddev=0.02), 427 | name='discrim_dep_bn_g1') 428 | self.discrim_dep_bn_b1 = tf.Variable( 429 | tf.zeros([1]), name='discrim_dep_bn_b1') 430 | 431 | self.discrim_dep_W2 = tf.Variable( 432 | tf.random_normal([ 433 | self.kernel4[0], self.kernel4[1], self.kernel4[2], self.dim_W4, 434 | self.dim_W3 435 | ], 436 | stddev=0.02), 437 | name='discrim_dep_W2') 438 | self.discrim_dep_bn_g2 = tf.Variable( 439 | tf.random_normal([1], mean=1.0, stddev=0.02), 440 | name='discrim_dep_bn_g2') 441 | self.discrim_dep_bn_b2 = tf.Variable( 442 | tf.zeros([1]), name='discrim_dep_bn_b2') 443 | 444 | self.discrim_dep_W3 = tf.Variable( 445 | tf.random_normal([ 446 | self.kernel3[0], self.kernel3[1], self.kernel3[2], self.dim_W3, 447 | self.dim_W2 448 | ], 449 | stddev=0.02), 450 | name='discrim_dep_W3') 451 | self.discrim_dep_bn_g3 = tf.Variable( 452 | tf.random_normal([1], mean=1.0, stddev=0.02), 453 | name='discrim_dep_bn_g3') 454 | self.discrim_dep_bn_b3 = tf.Variable( 455 | tf.zeros([1]), name='discrim_dep_bn_b3') 456 | 457 | self.discrim_dep_W4 = tf.Variable( 458 | tf.random_normal([ 459 | self.kernel2[0], self.kernel2[1], self.kernel2[2], self.dim_W2, 460 | self.dim_W1 461 | ], 462 | stddev=0.02), 463 | name='discrim_dep_W4') 464 | self.discrim_dep_bn_g4 = tf.Variable( 465 | tf.random_normal([1], mean=1.0, stddev=0.02), 466 | name='discrim_dep_bn_g4') 467 | self.discrim_dep_bn_b4 = tf.Variable( 468 | tf.zeros([1]), name='discrim_dep_bn_b4') 469 | 470 | self.discrim_dep_W5 = tf.Variable( 471 | tf.random_normal([ 472 | self.start_vox_size[0] * self.start_vox_size[1] * 473 | self.start_vox_size[2] * self.dim_W1, 1 474 | ], 475 | stddev=0.02), 476 | name='discrim_dep_W5') 477 | # depth--end 478 | 479 | self.cod_W1 = tf.Variable( 480 | tf.random_normal([ 481 | self.dim_z * self.start_vox_size[0] * self.start_vox_size[1] * 482 | self.start_vox_size[2], self.dim_code 483 | ], 484 | stddev=0.02), 485 | name='cod_W1') 486 | self.cod_bn_g1 = tf.Variable( 487 | tf.random_normal([dim_code], mean=1.0, stddev=0.02), 488 | name='cod_bn_g1') 489 | self.cod_bn_b1 = tf.Variable(tf.zeros([dim_code]), name='cod_bn_b1') 490 | 491 | self.cod_W2 = tf.Variable( 492 | tf.random_normal([dim_code, dim_code], stddev=0.02), name='cod_W2') 493 | self.cod_bn_g2 = tf.Variable( 494 | tf.random_normal([dim_code], mean=1.0, stddev=0.02), 495 | name='cod_bn_g2') 496 | self.cod_bn_b2 = tf.Variable(tf.zeros([dim_code]), name='cod_bn_b2') 497 | 498 | self.cod_W3 = tf.Variable( 499 | tf.random_normal([dim_code, 1], stddev=0.02), name='cod_W3') 500 | 501 | self.refine_W1 = tf.Variable( 502 | tf.random_normal([ 503 | self.refine_kernel, self.refine_kernel, self.refine_kernel, 504 | self.dim_W5, self.refine_ch 505 | ], 506 | stddev=0.02), 507 | name='refine_W1') 508 | self.refine_res1_W1 = tf.Variable( 509 | tf.random_normal([ 510 | self.refine_kernel, self.refine_kernel, self.refine_kernel, 511 | self.refine_ch, self.refine_ch 512 | ], 513 | stddev=0.02), 514 | name='refine__res1_W1') 515 | self.refine_res1_W2 = tf.Variable( 516 | tf.random_normal([ 517 | self.refine_kernel, self.refine_kernel, self.refine_kernel, 518 | self.refine_ch, self.refine_ch 519 | ], 520 | stddev=0.02), 521 | name='refine__res1_W2') 522 | 523 | self.refine_res2_W1 = tf.Variable( 524 | tf.random_normal([ 525 | self.refine_kernel, self.refine_kernel, self.refine_kernel, 526 | self.refine_ch, self.refine_ch 527 | ], 528 | stddev=0.02), 529 | name='refine__res2_W1') 530 | self.refine_res2_W2 = tf.Variable( 531 | tf.random_normal([ 532 | self.refine_kernel, self.refine_kernel, self.refine_kernel, 533 | self.refine_ch, self.refine_ch 534 | ], 535 | stddev=0.02), 536 | name='refine__res2_W2') 537 | 538 | self.refine_res3_W1 = tf.Variable( 539 | tf.random_normal([ 540 | self.refine_kernel, self.refine_kernel, self.refine_kernel, 541 | self.refine_ch, self.refine_ch 542 | ], 543 | stddev=0.02), 544 | name='refine__res3_W1') 545 | self.refine_res3_W2 = tf.Variable( 546 | tf.random_normal([ 547 | self.refine_kernel, self.refine_kernel, self.refine_kernel, 548 | self.refine_ch, self.refine_ch 549 | ], 550 | stddev=0.02), 551 | name='refine__res3_W2') 552 | 553 | self.refine_res4_W1 = tf.Variable( 554 | tf.random_normal([ 555 | self.refine_kernel, self.refine_kernel, self.refine_kernel, 556 | self.refine_ch, self.refine_ch 557 | ], 558 | stddev=0.02), 559 | name='refine__res4_W1') 560 | self.refine_res4_W2 = tf.Variable( 561 | tf.random_normal([ 562 | self.refine_kernel, self.refine_kernel, self.refine_kernel, 563 | self.refine_ch, self.refine_ch 564 | ], 565 | stddev=0.02), 566 | name='refine__res4_W2') 567 | 568 | self.refine_W2 = tf.Variable( 569 | tf.random_normal([ 570 | self.refine_kernel, self.refine_kernel, self.refine_kernel, 571 | self.refine_ch, self.dim_W5 572 | ], 573 | stddev=0.02), 574 | name='refine_W2') 575 | 576 | self.saver = tf.train.Saver() 577 | 578 | def build_model(self): 579 | 580 | vox_real_ = tf.placeholder(tf.int32, [ 581 | self.batch_size, self.vox_shape[0], self.vox_shape[1], 582 | self.vox_shape[2] 583 | ]) 584 | vox_real = tf.one_hot(vox_real_, self.n_class) 585 | vox_real = tf.cast(vox_real, tf.float32) 586 | # depth--start 587 | """ 588 | dep_real = tf.placeholder( 589 | tf.float32, 590 | [self.batch_size, self.dep_shape[0], self.dep_shape[1], self.dep_shape[2]]) 591 | # depth--end 592 | """ 593 | # tsdf--start 594 | tsdf_real_ = tf.placeholder(tf.int32, [ 595 | self.batch_size, self.vox_shape[0], self.vox_shape[1], 596 | self.vox_shape[2] 597 | ]) 598 | tsdf_real = tf.one_hot(tsdf_real_, 1) 599 | tsdf_real = tf.cast(tsdf_real, tf.float32) 600 | # tsdf--end 601 | Z = tf.placeholder(tf.float32, [ 602 | self.batch_size, self.start_vox_size[0], self.start_vox_size[1], 603 | self.start_vox_size[2], self.dim_z 604 | ]) 605 | 606 | filter_bilateral = tf.placeholder( 607 | tf.float32, [self.batch_size] + 608 | [self.vox_shape[0], self.vox_shape[1], self.vox_shape[2], 4]) 609 | mean, sigma = self.encoder(tsdf_real) 610 | Z_encode = mean 611 | # depth--start 612 | """ 613 | mean_dep, sigma_dep = self.encoder_dep(dep_real) 614 | Z_encode_dep = mean_dep 615 | """ 616 | # depth--end 617 | 618 | #code_discriminator 619 | p_code_encode, h_code_encode = self.code_discriminator(Z_encode) 620 | p_code_real, h_code_real = self.code_discriminator(Z) 621 | # depth--start 622 | """ 623 | p_code_encode_dep, h_code_encode_dep = self.code_discriminator(Z_encode_dep) 624 | """ 625 | # depth--start 626 | 627 | code_encode_loss = tf.reduce_mean( 628 | tf.reduce_sum( 629 | tf.nn.sigmoid_cross_entropy_with_logits( 630 | logits=h_code_encode, labels=tf.ones_like(h_code_encode)), 631 | [1])) 632 | code_discrim_loss = tf.reduce_mean( 633 | tf.reduce_sum( 634 | tf.nn.sigmoid_cross_entropy_with_logits( 635 | logits=h_code_real, labels=tf.ones_like(h_code_real)), 636 | [1])) + tf.reduce_mean( 637 | tf.reduce_sum( 638 | tf.nn.sigmoid_cross_entropy_with_logits( 639 | logits=h_code_encode, 640 | labels=tf.zeros_like(h_code_encode)), [1])) 641 | 642 | # depth--start 643 | """ 644 | code_encode_dep_loss = tf.reduce_mean( 645 | tf.reduce_sum( 646 | tf.nn.sigmoid_cross_entropy_with_logits( 647 | logits=h_code_encode_dep, 648 | labels=tf.ones_like(h_code_encode_dep)), 649 | [1])) 650 | code_discrim_dep_loss = tf.reduce_mean( 651 | tf.reduce_sum( 652 | tf.nn.sigmoid_cross_entropy_with_logits( 653 | logits=h_code_real, 654 | labels=tf.ones_like(h_code_real)), 655 | [1])) + tf.reduce_mean( 656 | tf.reduce_sum( 657 | tf.nn.sigmoid_cross_entropy_with_logits( 658 | logits=h_code_encode_dep, 659 | labels=tf.zeros_like(h_code_encode_dep)), 660 | [1])) 661 | code_compare_loss = tf.reduce_mean( 662 | tf.reduce_sum( 663 | tf.squared_difference( 664 | Z_encode_dep, 665 | Z_encode), 666 | [1,2,3,4])) 667 | """ 668 | # depth--end 669 | 670 | #reconstruction 671 | vox_gen_decode, _ = self.generate(Z_encode) 672 | """ 673 | vox_gen_decode_dep, _ = self.generate(Z_encode_dep) 674 | """ 675 | batch_mean_vox_real = tf.reduce_mean(vox_real, [0, 1, 2, 3]) 676 | # batch_mean_vox_real ranges from 0 to 1 677 | ones = tf.ones_like(batch_mean_vox_real) 678 | # inverse ranges from 0.5 to 1 679 | inverse = tf.div(ones, tf.add(batch_mean_vox_real, ones)) 680 | # inverse ranges from 1/1.1 to 10 681 | inverse = tf.div(ones, batch_mean_vox_real + 0.1) 682 | weight = inverse * tf.div(1., tf.reduce_sum(inverse)) 683 | recons_loss = -tf.reduce_sum( 684 | self.lamda_gamma * vox_real * tf.log(1e-6 + vox_gen_decode) + 685 | (1 - self.lamda_gamma) * 686 | (1 - vox_real) * tf.log(1e-6 + 1 - vox_gen_decode), [1, 2, 3]) 687 | recons_loss = tf.reduce_mean(tf.reduce_sum(recons_loss * weight, 1)) 688 | # Completion loss 689 | vox_real_complete = tf.stack([ 690 | vox_real[:, :, :, :, 0], 691 | tf.reduce_sum(vox_real[:, :, :, :, 1:], 4) 692 | ], 4) 693 | vox_gen_complete = tf.stack([ 694 | vox_gen_decode[:, :, :, :, 0], 695 | tf.reduce_sum(vox_gen_decode[:, :, :, :, 1:], 4) 696 | ], 4) 697 | complete_loss = -tf.reduce_sum( 698 | self.lamda_gamma * vox_real_complete * 699 | tf.log(1e-6 + vox_gen_complete) + (1 - self.lamda_gamma) * 700 | (1 - vox_real_complete) * tf.log(1e-6 + 1 - vox_gen_complete), 701 | [1, 2, 3]) 702 | weight_complete = tf.stack([weight[0], tf.reduce_sum(weight[1:])]) 703 | recons_loss += tf.reduce_mean( 704 | tf.reduce_sum(complete_loss * weight_complete, 1)) 705 | """ 706 | recons_dep_loss = -tf.reduce_sum( 707 | self.lamda_gamma *vox_real * tf.log(1e-6 + vox_gen_decode_dep) + (1- self.lamda_gamma) * (1-vox_real) * tf.log(1e-6 + 1-vox_gen_decode_dep), 708 | [1,2,3]) 709 | recons_dep_loss = tf.reduce_mean( 710 | tf.reduce_sum( 711 | recons_dep_loss * weight, 1)) 712 | """ 713 | #Refiner 714 | vox_after_refine_dec = self.refine(vox_gen_decode) 715 | 716 | recons_loss_refine = -tf.reduce_sum( 717 | self.lamda_gamma * vox_real * tf.log(1e-6 + vox_after_refine_dec) + 718 | (1 - self.lamda_gamma) * 719 | (1 - vox_real) * tf.log(1e-6 + 1 - vox_after_refine_dec), 720 | [1, 2, 3]) 721 | recons_loss_refine = tf.reduce_mean( 722 | tf.reduce_sum(recons_loss_refine * weight, 1)) 723 | 724 | #GAN_generate 725 | vox_gen, _ = self.generate(Z) 726 | vox_after_refine_gen = self.refine(vox_gen) 727 | 728 | p_real, h_real = self.discriminate(vox_real) 729 | p_gen, h_gen = self.discriminate(vox_gen) 730 | p_gen_dec, h_gen_dec = self.discriminate(vox_gen_decode) 731 | # depth--start 732 | """ 733 | p_real_dep, h_real_dep = self.discriminate_dep(vox_real) 734 | p_gen_dep, h_gen_dep = self.discriminate_dep(vox_gen) 735 | p_gen_dec_dep, h_gen_dec_dep = self.discriminate_dep(vox_gen_decode_dep) 736 | """ 737 | # depth--end 738 | p_gen_ref, h_gen_ref = self.discriminate(vox_after_refine_gen) 739 | p_gen_dec_ref, h_gen_dec_ref = self.discriminate(vox_after_refine_dec) 740 | 741 | #Standard_GAN_Loss 742 | discrim_loss = tf.reduce_mean( 743 | tf.nn.sigmoid_cross_entropy_with_logits( 744 | logits=h_real, labels=tf.ones_like(h_real))) + tf.reduce_mean( 745 | tf.nn.sigmoid_cross_entropy_with_logits( 746 | logits=h_gen, 747 | labels=tf.zeros_like(h_gen))) + tf.reduce_mean( 748 | tf.nn.sigmoid_cross_entropy_with_logits( 749 | logits=h_gen_dec, 750 | labels=tf.zeros_like(h_gen_dec))) 751 | 752 | gen_loss = tf.reduce_mean( 753 | tf.nn.sigmoid_cross_entropy_with_logits( 754 | logits=h_gen, labels=tf.ones_like(h_gen))) + tf.reduce_mean( 755 | tf.nn.sigmoid_cross_entropy_with_logits( 756 | logits=h_gen_dec, labels=tf.ones_like(h_gen_dec))) 757 | 758 | # depth--start 759 | """ 760 | discrim_dep_loss = tf.reduce_mean( 761 | tf.nn.sigmoid_cross_entropy_with_logits( 762 | logits=h_real_dep, 763 | labels=tf.ones_like(h_real_dep))) + tf.reduce_mean( 764 | tf.nn.sigmoid_cross_entropy_with_logits( 765 | logits=h_gen_dep, 766 | labels=tf.zeros_like(h_gen_dep))) + tf.reduce_mean( 767 | tf.nn.sigmoid_cross_entropy_with_logits( 768 | logits=h_gen_dec_dep, 769 | labels=tf.zeros_like(h_gen_dec_dep))) 770 | 771 | gen_dep_loss = tf.reduce_mean( 772 | tf.nn.sigmoid_cross_entropy_with_logits( 773 | logits=h_gen_dep, 774 | labels=tf.ones_like(h_gen_dep))) + tf.reduce_mean( 775 | tf.nn.sigmoid_cross_entropy_with_logits( 776 | logits=h_gen_dec_dep, 777 | labels=tf.ones_like(h_gen_dec_dep))) 778 | """ 779 | # depth--end 780 | #for refine 781 | discrim_loss_refine = tf.reduce_mean( 782 | tf.nn.sigmoid_cross_entropy_with_logits( 783 | logits=h_real, labels=tf.ones_like(h_real))) + tf.reduce_mean( 784 | tf.nn.sigmoid_cross_entropy_with_logits( 785 | logits=h_gen_ref, 786 | labels=tf.zeros_like(h_gen_ref))) + tf.reduce_mean( 787 | tf.nn.sigmoid_cross_entropy_with_logits( 788 | logits=h_gen_dec_ref, 789 | labels=tf.zeros_like(h_gen_dec_ref))) 790 | 791 | gen_loss_refine = tf.reduce_mean( 792 | tf.nn.sigmoid_cross_entropy_with_logits( 793 | logits=h_gen_ref, 794 | labels=tf.ones_like(h_gen_ref))) + tf.reduce_mean( 795 | tf.nn.sigmoid_cross_entropy_with_logits( 796 | logits=h_gen_dec_ref, 797 | labels=tf.ones_like(h_gen_dec_ref))) 798 | """ 799 | #LS_GAN_Loss 800 | a=-1 801 | b=1 802 | c=0 803 | 804 | discrim_loss = tf.reduce_mean(0.5*((h_real-b)**2) + 0.5*((h_gen-a)**2) + 0.5*((h_gen_dec-a)**2)) 805 | gen_loss = tf.reduce_mean(0.5*((h_gen-c)**2) + 0.5*((h_gen_dec-c)**2)) 806 | """ 807 | 808 | #Cost 809 | cost_enc = code_encode_loss + self.lamda_recons * recons_loss 810 | cost_gen = self.lamda_recons * recons_loss + gen_loss 811 | cost_discrim = discrim_loss 812 | cost_code = code_discrim_loss 813 | cost_gen_ref = self.lamda_recons * recons_loss_refine + gen_loss_refine 814 | cost_discrim_ref = discrim_loss_refine 815 | """ 816 | cost_enc_dep = code_encode_dep_loss + self.lamda_recons*recons_dep_loss 817 | cost_gen_dep = self.lamda_recons*recons_dep_loss + gen_dep_loss 818 | cost_discrim_dep = discrim_dep_loss 819 | cost_code_dep = code_discrim_dep_loss 820 | """ 821 | 822 | tf.summary.scalar("recons_loss", tf.reduce_mean(recons_loss)) 823 | tf.summary.scalar("gen_loss", tf.reduce_mean(gen_loss)) 824 | tf.summary.scalar("discrim_loss", tf.reduce_mean(discrim_loss)) 825 | tf.summary.scalar("code_encode_loss", tf.reduce_mean(code_encode_loss)) 826 | tf.summary.scalar("code_discrim_loss", 827 | tf.reduce_mean(code_discrim_loss)) 828 | 829 | summary_op = tf.summary.merge_all() 830 | 831 | return Z, Z_encode, vox_real_, vox_gen, vox_gen_decode, vox_after_refine_dec, vox_after_refine_gen,\ 832 | recons_loss, code_encode_loss, gen_loss, discrim_loss, recons_loss_refine, gen_loss_refine, discrim_loss_refine,\ 833 | cost_enc, cost_code, cost_gen, cost_discrim, cost_gen_ref, cost_discrim_ref, summary_op,\ 834 | tsdf_real 835 | """ 836 | Z_encode_dep, dep_real, vox_gen_decode_dep,\ 837 | recons_dep_loss, code_encode_dep_loss, gen_dep_loss, discrim_dep_loss,\ 838 | cost_enc_dep, cost_code_dep, cost_gen_dep, cost_discrim_dep, code_compare_loss,\ 839 | """ 840 | 841 | def encoder(self, vox): 842 | 843 | h1 = lrelu( 844 | tf.nn.conv3d( 845 | vox, self.encode_W1, strides=self.stride, padding='SAME')) 846 | h2 = lrelu( 847 | batchnormalize( 848 | tf.nn.conv3d( 849 | h1, self.encode_W2, strides=self.stride, padding='SAME'), 850 | g=self.encode_bn_g2, 851 | b=self.encode_bn_b2, 852 | batch_size=self.batch_size)) 853 | h3 = lrelu( 854 | batchnormalize( 855 | tf.nn.conv3d( 856 | h2, self.encode_W3, strides=self.stride, padding='SAME'), 857 | g=self.encode_bn_g3, 858 | b=self.encode_bn_b3, 859 | batch_size=self.batch_size)) 860 | h4 = lrelu( 861 | batchnormalize( 862 | tf.nn.conv3d( 863 | h3, self.encode_W4, strides=self.stride, padding='SAME'), 864 | g=self.encode_bn_g4, 865 | b=self.encode_bn_b4, 866 | batch_size=self.batch_size)) 867 | h5 = tf.nn.conv3d( 868 | h4, self.encode_W5, strides=[1, 1, 1, 1, 1], padding='SAME') 869 | h5_sigma = tf.nn.conv3d( 870 | h4, self.encode_W5_sigma, strides=[1, 1, 1, 1, 1], padding='SAME') 871 | 872 | return h5, h5_sigma 873 | 874 | def encoder_dep(self, dep): 875 | 876 | h1 = lrelu( 877 | tf.nn.conv2d( 878 | dep, 879 | self.encode_dep_W1, 880 | strides=self.stride_dep, 881 | padding='SAME')) 882 | h2 = lrelu( 883 | batchnormalize( 884 | tf.nn.conv2d( 885 | h1, 886 | self.encode_dep_W2, 887 | strides=self.stride_dep, 888 | padding='SAME'), 889 | g=self.encode_dep_bn_g2, 890 | b=self.encode_dep_bn_b2, 891 | batch_size=self.batch_size)) 892 | h3 = lrelu( 893 | batchnormalize( 894 | tf.nn.conv2d( 895 | h2, 896 | self.encode_dep_W3, 897 | strides=self.stride_dep, 898 | padding='SAME'), 899 | g=self.encode_dep_bn_g3, 900 | b=self.encode_dep_bn_b3, 901 | batch_size=self.batch_size)) 902 | h4 = lrelu( 903 | batchnormalize( 904 | tf.nn.conv2d( 905 | h3, 906 | self.encode_dep_W4, 907 | strides=self.stride_dep, 908 | padding='SAME'), 909 | g=self.encode_dep_bn_g4, 910 | b=self.encode_dep_bn_b4, 911 | batch_size=self.batch_size)) 912 | h5 = lrelu( 913 | batchnormalize( 914 | tf.nn.conv2d( 915 | h4, 916 | self.encode_dep_W5, 917 | strides=self.stride_dep, 918 | padding='SAME'), 919 | g=self.encode_dep_bn_g5, 920 | b=self.encode_dep_bn_b5, 921 | batch_size=self.batch_size)) 922 | h6 = lrelu( 923 | batchnormalize( 924 | tf.nn.conv2d( 925 | h5, 926 | self.encode_dep_W6, 927 | strides=self.stride_dep, 928 | padding='SAME'), 929 | g=self.encode_dep_bn_g6, 930 | b=self.encode_dep_bn_b6, 931 | batch_size=self.batch_size)) 932 | h6 = tf.reshape(h6, [self.batch_size, -1]) 933 | h7 = tf.matmul(h6, self.encode_dep_W7) 934 | h7 = tf.reshape(h7, [ 935 | self.batch_size, self.start_vox_size[0], self.start_vox_size[1], 936 | self.start_vox_size[2], self.dim_W1 937 | ]) 938 | h8 = tf.nn.conv3d( 939 | h7, self.encode_dep_W8, strides=[1, 1, 1, 1, 1], padding='SAME') 940 | h8_sigma = tf.nn.conv3d( 941 | h7, 942 | self.encode_dep_W8_sigma, 943 | strides=[1, 1, 1, 1, 1], 944 | padding='SAME') 945 | 946 | return h8, h8_sigma 947 | 948 | def discriminate(self, vox): 949 | 950 | h1 = lrelu( 951 | tf.nn.conv3d( 952 | vox, self.discrim_W1, strides=self.stride, padding='SAME')) 953 | h2 = lrelu( 954 | layernormalize( 955 | tf.nn.conv3d( 956 | h1, self.discrim_W2, strides=self.stride, padding='SAME'), 957 | g=self.discrim_bn_g2, 958 | b=self.discrim_bn_b2)) 959 | h3 = lrelu( 960 | layernormalize( 961 | tf.nn.conv3d( 962 | h2, self.discrim_W3, strides=self.stride, padding='SAME'), 963 | g=self.discrim_bn_g3, 964 | b=self.discrim_bn_b3)) 965 | h4 = lrelu( 966 | layernormalize( 967 | tf.nn.conv3d( 968 | h3, self.discrim_W4, strides=self.stride, padding='SAME'), 969 | g=self.discrim_bn_g4, 970 | b=self.discrim_bn_b4)) 971 | h4 = tf.reshape(h4, [self.batch_size, -1]) 972 | h5 = tf.matmul(h4, self.discrim_W5) 973 | y = tf.nn.sigmoid(h5) 974 | 975 | return y, h5 976 | 977 | def discriminate_dep(self, vox): 978 | 979 | h1 = lrelu( 980 | tf.nn.conv3d( 981 | vox, self.discrim_dep_W1, strides=self.stride, padding='SAME')) 982 | h2 = lrelu( 983 | layernormalize( 984 | tf.nn.conv3d( 985 | h1, 986 | self.discrim_dep_W2, 987 | strides=self.stride, 988 | padding='SAME'), 989 | g=self.discrim_dep_bn_g2, 990 | b=self.discrim_dep_bn_b2)) 991 | h3 = lrelu( 992 | layernormalize( 993 | tf.nn.conv3d( 994 | h2, 995 | self.discrim_dep_W3, 996 | strides=self.stride, 997 | padding='SAME'), 998 | g=self.discrim_dep_bn_g3, 999 | b=self.discrim_dep_bn_b3)) 1000 | h4 = lrelu( 1001 | layernormalize( 1002 | tf.nn.conv3d( 1003 | h3, 1004 | self.discrim_dep_W4, 1005 | strides=self.stride, 1006 | padding='SAME'), 1007 | g=self.discrim_dep_bn_g4, 1008 | b=self.discrim_dep_bn_b4)) 1009 | h4 = tf.reshape(h4, [self.batch_size, -1]) 1010 | h5 = tf.matmul(h4, self.discrim_dep_W5) 1011 | y = tf.nn.sigmoid(h5) 1012 | 1013 | return y, h5 1014 | 1015 | def code_discriminator(self, Z): 1016 | Z_ = tf.reshape(Z, [self.batch_size, -1]) 1017 | h1 = tf.nn.relu( 1018 | batchnormalize( 1019 | tf.matmul(Z_, self.cod_W1), g=self.cod_bn_g1, 1020 | b=self.cod_bn_b1)) 1021 | h2 = tf.nn.relu( 1022 | batchnormalize( 1023 | tf.matmul(h1, self.cod_W2), g=self.cod_bn_g2, 1024 | b=self.cod_bn_b2)) 1025 | h3 = tf.matmul(h2, self.cod_W3) 1026 | y = tf.nn.sigmoid(h3) 1027 | return y, h3 1028 | 1029 | def generate(self, Z): 1030 | 1031 | Z_ = tf.reshape(Z, [self.batch_size, -1]) 1032 | h1 = tf.nn.relu( 1033 | batchnormalize( 1034 | tf.matmul(Z_, self.gen_W1), g=self.gen_bn_g1, 1035 | b=self.gen_bn_b1)) 1036 | h1 = tf.reshape(h1, [ 1037 | self.batch_size, self.start_vox_size[0], self.start_vox_size[1], 1038 | self.start_vox_size[2], self.dim_W1 1039 | ]) 1040 | 1041 | vox_size_l2 = self.start_vox_size * 2 1042 | output_shape_l2 = [ 1043 | self.batch_size, vox_size_l2[0], vox_size_l2[1], vox_size_l2[2], 1044 | self.dim_W2 1045 | ] 1046 | h2 = tf.nn.conv3d_transpose( 1047 | h1, self.gen_W2, output_shape=output_shape_l2, strides=self.stride) 1048 | h2 = tf.nn.relu( 1049 | batchnormalize( 1050 | h2, 1051 | g=self.gen_bn_g2, 1052 | b=self.gen_bn_b2, 1053 | batch_size=self.batch_size)) 1054 | 1055 | vox_size_l3 = self.start_vox_size * 4 1056 | output_shape_l3 = [ 1057 | self.batch_size, vox_size_l3[0], vox_size_l3[1], vox_size_l3[2], 1058 | self.dim_W3 1059 | ] 1060 | h3 = tf.nn.conv3d_transpose( 1061 | h2, self.gen_W3, output_shape=output_shape_l3, strides=self.stride) 1062 | h3 = tf.nn.relu( 1063 | batchnormalize( 1064 | h3, 1065 | g=self.gen_bn_g3, 1066 | b=self.gen_bn_b3, 1067 | batch_size=self.batch_size)) 1068 | 1069 | vox_size_l4 = self.start_vox_size * 8 1070 | output_shape_l4 = [ 1071 | self.batch_size, vox_size_l4[0], vox_size_l4[1], vox_size_l4[2], 1072 | self.dim_W4 1073 | ] 1074 | h4 = tf.nn.conv3d_transpose( 1075 | h3, self.gen_W4, output_shape=output_shape_l4, strides=self.stride) 1076 | h4 = tf.nn.relu( 1077 | batchnormalize( 1078 | h4, 1079 | g=self.gen_bn_g4, 1080 | b=self.gen_bn_b4, 1081 | batch_size=self.batch_size)) 1082 | 1083 | vox_size_l5 = self.start_vox_size * 16 1084 | output_shape_l5 = [ 1085 | self.batch_size, vox_size_l5[0], vox_size_l5[1], vox_size_l5[2], 1086 | self.dim_W5 1087 | ] 1088 | h5 = tf.nn.conv3d_transpose( 1089 | h4, self.gen_W5, output_shape=output_shape_l5, strides=self.stride) 1090 | 1091 | x = softmax(h5, self.batch_size, self.vox_shape) 1092 | return x, h5 1093 | 1094 | def refine(self, vox): 1095 | base = tf.nn.relu( 1096 | tf.nn.conv3d( 1097 | vox, self.refine_W1, strides=[1, 1, 1, 1, 1], padding='SAME')) 1098 | 1099 | #res1 1100 | res1_1 = tf.nn.relu( 1101 | tf.nn.conv3d( 1102 | base, 1103 | self.refine_res1_W1, 1104 | strides=[1, 1, 1, 1, 1], 1105 | padding='SAME')) 1106 | res1_2 = tf.nn.conv3d( 1107 | res1_1, 1108 | self.refine_res1_W2, 1109 | strides=[1, 1, 1, 1, 1], 1110 | padding='SAME') 1111 | 1112 | res1 = tf.nn.relu(tf.add(base, res1_2)) 1113 | 1114 | #res2 1115 | res2_1 = tf.nn.relu( 1116 | tf.nn.conv3d( 1117 | res1, 1118 | self.refine_res2_W1, 1119 | strides=[1, 1, 1, 1, 1], 1120 | padding='SAME')) 1121 | res2_2 = tf.nn.conv3d( 1122 | res2_1, 1123 | self.refine_res2_W2, 1124 | strides=[1, 1, 1, 1, 1], 1125 | padding='SAME') 1126 | 1127 | res2 = tf.nn.relu(tf.add(res1, res2_2)) 1128 | 1129 | #res3 1130 | res3_1 = tf.nn.relu( 1131 | tf.nn.conv3d( 1132 | res2, 1133 | self.refine_res3_W1, 1134 | strides=[1, 1, 1, 1, 1], 1135 | padding='SAME')) 1136 | res3_2 = tf.nn.conv3d( 1137 | res3_1, 1138 | self.refine_res3_W2, 1139 | strides=[1, 1, 1, 1, 1], 1140 | padding='SAME') 1141 | 1142 | res3 = tf.nn.relu(tf.add(res2, res3_2)) 1143 | 1144 | #res4 1145 | res4_1 = tf.nn.relu( 1146 | tf.nn.conv3d( 1147 | res3, 1148 | self.refine_res4_W1, 1149 | strides=[1, 1, 1, 1, 1], 1150 | padding='SAME')) 1151 | res4_2 = tf.nn.conv3d( 1152 | res4_1, 1153 | self.refine_res4_W2, 1154 | strides=[1, 1, 1, 1, 1], 1155 | padding='SAME') 1156 | 1157 | res4 = tf.nn.relu(tf.add(res3, res4_2)) 1158 | 1159 | out = tf.nn.conv3d( 1160 | res4, self.refine_W2, strides=[1, 1, 1, 1, 1], padding='SAME') 1161 | x_refine = softmax(out, self.batch_size, self.vox_shape) 1162 | 1163 | return x_refine 1164 | 1165 | def samples_generator(self, visual_size): 1166 | 1167 | Z = tf.placeholder(tf.float32, [ 1168 | visual_size, self.start_vox_size[0], self.start_vox_size[1], 1169 | self.start_vox_size[2], self.dim_z 1170 | ]) 1171 | 1172 | Z_ = tf.reshape(Z, [visual_size, -1]) 1173 | h1 = tf.nn.relu( 1174 | batchnormalize( 1175 | tf.matmul(Z_, self.gen_W1), g=self.gen_bn_g1, 1176 | b=self.gen_bn_b1)) 1177 | h1 = tf.reshape(h1, [ 1178 | visual_size, self.start_vox_size[0], self.start_vox_size[1], 1179 | self.start_vox_size[2], self.dim_W1 1180 | ]) 1181 | 1182 | vox_size_l2 = self.start_vox_size * 2 1183 | output_shape_l2 = [ 1184 | visual_size, vox_size_l2[0], vox_size_l2[1], vox_size_l2[2], 1185 | self.dim_W2 1186 | ] 1187 | h2 = tf.nn.conv3d_transpose( 1188 | h1, self.gen_W2, output_shape=output_shape_l2, strides=self.stride) 1189 | h2 = tf.nn.relu( 1190 | batchnormalize( 1191 | h2, 1192 | g=self.gen_bn_g2, 1193 | b=self.gen_bn_b2, 1194 | batch_size=self.batch_size)) 1195 | 1196 | vox_size_l3 = self.start_vox_size * 4 1197 | output_shape_l3 = [ 1198 | visual_size, vox_size_l3[0], vox_size_l3[1], vox_size_l3[2], 1199 | self.dim_W3 1200 | ] 1201 | h3 = tf.nn.conv3d_transpose( 1202 | h2, self.gen_W3, output_shape=output_shape_l3, strides=self.stride) 1203 | h3 = tf.nn.relu( 1204 | batchnormalize( 1205 | h3, 1206 | g=self.gen_bn_g3, 1207 | b=self.gen_bn_b3, 1208 | batch_size=self.batch_size)) 1209 | 1210 | vox_size_l4 = self.start_vox_size * 8 1211 | output_shape_l4 = [ 1212 | visual_size, vox_size_l4[0], vox_size_l4[1], vox_size_l4[2], 1213 | self.dim_W4 1214 | ] 1215 | h4 = tf.nn.conv3d_transpose( 1216 | h3, self.gen_W4, output_shape=output_shape_l4, strides=self.stride) 1217 | h4 = tf.nn.relu( 1218 | batchnormalize( 1219 | h4, 1220 | g=self.gen_bn_g4, 1221 | b=self.gen_bn_b4, 1222 | batch_size=self.batch_size)) 1223 | 1224 | vox_size_l5 = self.start_vox_size * 16 1225 | output_shape_l5 = [ 1226 | visual_size, vox_size_l5[0], vox_size_l5[1], vox_size_l5[2], 1227 | self.dim_W5 1228 | ] 1229 | h5 = tf.nn.conv3d_transpose( 1230 | h4, self.gen_W5, output_shape=output_shape_l5, strides=self.stride) 1231 | 1232 | x = softmax(h5, visual_size, self.vox_shape) 1233 | return Z, x 1234 | 1235 | def refine_generator(self, visual_size): 1236 | vox = tf.placeholder(tf.float32, [ 1237 | visual_size, self.vox_shape[0], self.vox_shape[1], 1238 | self.vox_shape[2], self.vox_shape[3] 1239 | ]) 1240 | 1241 | base = tf.nn.relu( 1242 | tf.nn.conv3d( 1243 | vox, self.refine_W1, strides=[1, 1, 1, 1, 1], padding='SAME')) 1244 | 1245 | #res1 1246 | res1_1 = tf.nn.relu( 1247 | tf.nn.conv3d( 1248 | base, 1249 | self.refine_res1_W1, 1250 | strides=[1, 1, 1, 1, 1], 1251 | padding='SAME')) 1252 | res1_2 = tf.nn.conv3d( 1253 | res1_1, 1254 | self.refine_res1_W2, 1255 | strides=[1, 1, 1, 1, 1], 1256 | padding='SAME') 1257 | 1258 | res1 = tf.nn.relu(tf.add(base, res1_2)) 1259 | 1260 | #res2 1261 | res2_1 = tf.nn.relu( 1262 | tf.nn.conv3d( 1263 | res1, 1264 | self.refine_res2_W1, 1265 | strides=[1, 1, 1, 1, 1], 1266 | padding='SAME')) 1267 | res2_2 = tf.nn.conv3d( 1268 | res2_1, 1269 | self.refine_res2_W2, 1270 | strides=[1, 1, 1, 1, 1], 1271 | padding='SAME') 1272 | 1273 | res2 = tf.nn.relu(tf.add(res1, res2_2)) 1274 | 1275 | #res3 1276 | res3_1 = tf.nn.relu( 1277 | tf.nn.conv3d( 1278 | res2, 1279 | self.refine_res3_W1, 1280 | strides=[1, 1, 1, 1, 1], 1281 | padding='SAME')) 1282 | res3_2 = tf.nn.conv3d( 1283 | res3_1, 1284 | self.refine_res3_W2, 1285 | strides=[1, 1, 1, 1, 1], 1286 | padding='SAME') 1287 | 1288 | res3 = tf.nn.relu(tf.add(res2, res3_2)) 1289 | 1290 | #res4 1291 | res4_1 = tf.nn.relu( 1292 | tf.nn.conv3d( 1293 | res3, 1294 | self.refine_res4_W1, 1295 | strides=[1, 1, 1, 1, 1], 1296 | padding='SAME')) 1297 | res4_2 = tf.nn.conv3d( 1298 | res4_1, 1299 | self.refine_res4_W2, 1300 | strides=[1, 1, 1, 1, 1], 1301 | padding='SAME') 1302 | 1303 | res4 = tf.nn.relu(tf.add(res3, res4_2)) 1304 | 1305 | out = tf.nn.conv3d( 1306 | res4, self.refine_W2, strides=[1, 1, 1, 1, 1], padding='SAME') 1307 | x_refine = softmax(out, self.batch_size, self.vox_shape) 1308 | 1309 | return vox, x_refine 1310 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from config import cfg 5 | from util import DataProcess, scene_model_id_pair 6 | from model import FCR_aGAN 7 | 8 | 9 | def learning_rate(rate, step): 10 | if step < rate[1]: 11 | lr = rate[0] 12 | else: 13 | lr = rate[2] 14 | return lr 15 | 16 | 17 | def train(n_epochs, learning_rate_G, learning_rate_D, batch_size, mid_flag, 18 | check_num): 19 | beta_G = cfg.TRAIN.ADAM_BETA_G 20 | beta_D = cfg.TRAIN.ADAM_BETA_D 21 | n_vox = cfg.CONST.N_VOX 22 | dim = cfg.NET.DIM 23 | vox_shape = [n_vox[0], n_vox[1], n_vox[2], dim[4]] 24 | dim_z = cfg.NET.DIM_Z 25 | start_vox_size = cfg.NET.START_VOX 26 | kernel = cfg.NET.KERNEL 27 | stride = cfg.NET.STRIDE 28 | freq = cfg.CHECK_FREQ 29 | record_vox_num = cfg.RECORD_VOX_NUM 30 | refine_ch = cfg.NET.REFINE_CH 31 | refine_kernel = cfg.NET.REFINE_KERNEL 32 | 33 | refine_start = cfg.SWITCHING_ITE 34 | 35 | fcr_agan_model = FCR_aGAN( 36 | batch_size=batch_size, 37 | vox_shape=vox_shape, 38 | dim_z=dim_z, 39 | dim=dim, 40 | start_vox_size=start_vox_size, 41 | kernel=kernel, 42 | stride=stride, 43 | refine_ch=refine_ch, 44 | refine_kernel=refine_kernel, 45 | ) 46 | 47 | Z_tf, z_enc_tf, vox_tf, vox_gen_tf, vox_gen_decode_tf, vox_refine_dec_tf, vox_refine_gen_tf,\ 48 | recons_loss_tf, code_encode_loss_tf, gen_loss_tf, discrim_loss_tf, recons_loss_refine_tf, gen_loss_refine_tf, discrim_loss_refine_tf,\ 49 | cost_enc_tf, cost_code_tf, cost_gen_tf, cost_discrim_tf, cost_gen_ref_tf, cost_discrim_ref_tf, summary_tf,\ 50 | tsdf_tf = fcr_agan_model.build_model() 51 | """ 52 | z_enc_dep_tf, dep_tf, vox_gen_decode_dep_tf,\ 53 | recons_dep_loss_tf, code_encode_dep_loss_tf, gen_dep_loss_tf, discrim_dep_loss_tf,\ 54 | cost_enc_dep_tf, cost_code_dep_tf, cost_gen_dep_tf, cost_discrim_dep_tf, cost_code_compare_tf,\ 55 | """ 56 | config = tf.ConfigProto() 57 | config.gpu_options.allow_growth = True 58 | sess = tf.InteractiveSession(config=config) 59 | global_step = tf.Variable(0, name='global_step', trainable=False) 60 | saver = tf.train.Saver(max_to_keep=cfg.SAVER_MAX) 61 | 62 | data_paths = scene_model_id_pair(dataset_portion=cfg.TRAIN.DATASET_PORTION) 63 | print '---amount of data:' + str(len(data_paths)) 64 | data_process = DataProcess(data_paths, batch_size, repeat=True) 65 | 66 | encode_vars = filter(lambda x: x.name.startswith('enc'), 67 | tf.trainable_variables()) 68 | discrim_vars = filter(lambda x: x.name.startswith('discrim_vox'), 69 | tf.trainable_variables()) 70 | # depth--start 71 | """ 72 | depth_vars = filter(lambda x: x.name.startswith('dep'), tf.trainable_variables()) 73 | discrim_dep_vars = filter(lambda x: x.name.startswith('discrim_dep'), tf.trainable_variables()) 74 | """ 75 | # depth--end 76 | gen_vars = filter(lambda x: x.name.startswith('gen'), 77 | tf.trainable_variables()) 78 | code_vars = filter(lambda x: x.name.startswith('cod'), 79 | tf.trainable_variables()) 80 | refine_vars = filter(lambda x: x.name.startswith('refine'), 81 | tf.trainable_variables()) 82 | 83 | lr_VAE = tf.placeholder(tf.float32, shape=[]) 84 | train_op_encode = tf.train.AdamOptimizer( 85 | lr_VAE, beta1=beta_D, beta2=0.9).minimize( 86 | cost_enc_tf, var_list=encode_vars) 87 | train_op_discrim = tf.train.AdamOptimizer( 88 | learning_rate_D, beta1=beta_D, beta2=0.9).minimize( 89 | cost_discrim_tf, var_list=discrim_vars, global_step=global_step) 90 | train_op_gen = tf.train.AdamOptimizer( 91 | learning_rate_G, beta1=beta_G, beta2=0.9).minimize( 92 | cost_gen_tf, var_list=gen_vars) 93 | train_op_code = tf.train.AdamOptimizer( 94 | lr_VAE, beta1=beta_G, beta2=0.9).minimize( 95 | cost_code_tf, var_list=code_vars) 96 | # depth--start 97 | """ 98 | train_op_latent_depvox = tf.train.AdamOptimizer( 99 | lr_VAE, beta1=beta_G, beta2=0.9).minimize( 100 | cost_code_compare_tf, var_list=depth_vars) 101 | train_op_encode_dep=tf.train.AdamOptimizer( 102 | lr_VAE, beta1=beta_D, beta2=0.9).minimize( 103 | cost_enc_dep_tf, var_list=depth_vars) 104 | train_op_discrim_dep = tf.train.AdamOptimizer( 105 | learning_rate_D, beta1=beta_D, beta2=0.9).minimize( 106 | cost_discrim_dep_tf, var_list=discrim_dep_vars) 107 | train_op_gen_dep = tf.train.AdamOptimizer( 108 | learning_rate_G, beta1=beta_G, beta2=0.9).minimize( 109 | cost_gen_dep_tf, var_list=gen_vars) 110 | train_op_code_dep = tf.train.AdamOptimizer( 111 | lr_VAE, beta1=beta_G, beta2=0.9).minimize( 112 | cost_code_dep_tf, var_list=code_vars) 113 | """ 114 | # depth--end 115 | train_op_refine = tf.train.AdamOptimizer( 116 | lr_VAE, beta1=beta_G, beta2=0.9).minimize( 117 | cost_gen_ref_tf, var_list=refine_vars) 118 | train_op_discrim_refine = tf.train.AdamOptimizer( 119 | learning_rate_D, beta1=beta_D, beta2=0.9).minimize( 120 | cost_discrim_ref_tf, 121 | var_list=discrim_vars, 122 | global_step=global_step) 123 | 124 | Z_tf_sample, vox_tf_sample = fcr_agan_model.samples_generator( 125 | visual_size=batch_size) 126 | sample_vox_tf, sample_refine_vox_tf = fcr_agan_model.refine_generator( 127 | visual_size=batch_size) 128 | writer = tf.summary.FileWriter(cfg.DIR.LOG_PATH, sess.graph_def) 129 | tf.initialize_all_variables().run() 130 | 131 | if mid_flag: 132 | chckpt_path = cfg.DIR.CHECK_PT_PATH + str( 133 | check_num) #+ '-' + str(check_num * freq) 134 | saver.restore(sess, chckpt_path) 135 | Z_var_np_sample = np.load(cfg.DIR.TRAIN_OBJ_PATH + 136 | '/sample_z.npy').astype(np.float32) 137 | Z_var_np_sample = Z_var_np_sample[:batch_size] 138 | print '---weights restored' 139 | else: 140 | Z_var_np_sample = np.random.normal( 141 | size=(batch_size, start_vox_size[0], start_vox_size[1], 142 | start_vox_size[2], dim_z)).astype(np.float32) 143 | np.save(cfg.DIR.TRAIN_OBJ_PATH + '/sample_z.npy', Z_var_np_sample) 144 | 145 | ite = check_num * freq + 1 146 | cur_epochs = int(ite / int(len(data_paths) / batch_size)) 147 | 148 | #training 149 | for epoch in np.arange(cur_epochs, n_epochs): 150 | epoch_flag = True 151 | while epoch_flag: 152 | print '=iteration:%d, epoch:%d' % (ite, epoch) 153 | db_inds, epoch_flag = data_process.get_next_minibatch() 154 | batch_voxel = data_process.get_voxel(db_inds) 155 | batch_voxel_train = batch_voxel 156 | batch_tsdf = data_process.get_tsdf(db_inds) 157 | batch_tsdf_train = np.expand_dims(batch_tsdf, axis=-1) 158 | """ 159 | batch_depth = data_process.get_depth(db_inds) 160 | batch_depth_train = batch_depth / 255.0 161 | """ 162 | lr = learning_rate(cfg.LEARNING_RATE_V, ite) 163 | 164 | batch_z_var = np.random.normal( 165 | size=(batch_size, start_vox_size[0], start_vox_size[1], 166 | start_vox_size[2], dim_z)).astype(np.float32) 167 | 168 | if ite < refine_start: 169 | for s in np.arange(2): 170 | _, recons_loss_val, code_encode_loss_val, cost_enc_val = sess.run( 171 | [ 172 | train_op_encode, recons_loss_tf, 173 | code_encode_loss_tf, cost_enc_tf 174 | ], 175 | feed_dict={ 176 | vox_tf: batch_voxel_train, 177 | tsdf_tf: batch_tsdf_train, 178 | Z_tf: batch_z_var, 179 | lr_VAE: lr 180 | }, 181 | ) 182 | 183 | _, gen_loss_val, cost_gen_val = sess.run( 184 | [train_op_gen, gen_loss_tf, cost_gen_tf], 185 | feed_dict={ 186 | Z_tf: batch_z_var, 187 | vox_tf: batch_voxel_train, 188 | tsdf_tf: batch_tsdf_train, 189 | lr_VAE: lr 190 | }, 191 | ) 192 | # depth--start 193 | """ 194 | _, cost_code_compare_val = sess.run( 195 | [train_op_latent_depvox, cost_code_compare_tf], 196 | feed_dict={vox_tf:batch_voxel_train, dep_tf:batch_depth_train, lr_VAE:lr}, 197 | ) 198 | _, recons_dep_loss_val, code_encode_dep_loss_val, cost_enc_dep_val = sess.run( 199 | [train_op_encode_dep, recons_dep_loss_tf, code_encode_dep_loss_tf, cost_enc_dep_tf], 200 | feed_dict={vox_tf:batch_voxel_train, dep_tf:batch_depth_train, tsdf_tf:batch_tsdf_train, Z_tf:batch_z_var, lr_VAE:lr}, 201 | ) 202 | 203 | _, gen_dep_loss_val, cost_gen_dep_val = sess.run( 204 | [train_op_gen_dep, gen_dep_loss_tf, cost_gen_dep_tf], 205 | feed_dict={Z_tf:batch_z_var, vox_tf:batch_voxel_train, dep_tf:batch_depth_train, tsdf_tf:batch_tsdf_train, lr_VAE:lr}, 206 | ) 207 | # depth--end 208 | """ 209 | _, discrim_loss_val, cost_discrim_val = sess.run( 210 | [train_op_discrim, discrim_loss_tf, cost_discrim_tf], 211 | feed_dict={ 212 | Z_tf: batch_z_var, 213 | vox_tf: batch_voxel_train, 214 | tsdf_tf: batch_tsdf_train 215 | }, 216 | ) 217 | 218 | _, cost_code_val, z_enc_val = sess.run( 219 | [train_op_code, cost_code_tf, z_enc_tf], 220 | feed_dict={ 221 | Z_tf: batch_z_var, 222 | vox_tf: batch_voxel_train, 223 | tsdf_tf: batch_tsdf_train, 224 | lr_VAE: lr 225 | }, 226 | ) 227 | """ 228 | # depth--start 229 | _, discrim_dep_loss_val, cost_discrim_dep_val = sess.run( 230 | [train_op_discrim_dep, discrim_dep_loss_tf, cost_discrim_dep_tf], 231 | feed_dict={Z_tf:batch_z_var, vox_tf:batch_voxel_train, dep_tf:batch_depth_train, tsdf_tf:batch_tsdf_train}, 232 | ) 233 | 234 | 235 | _, cost_code_dep_val, z_enc_dep_val= sess.run( 236 | [train_op_code_dep, cost_code_dep_tf, z_enc_dep_tf], 237 | feed_dict={Z_tf:batch_z_var, dep_tf:batch_depth_train, tsdf_tf:batch_tsdf_train, lr_VAE:lr}, 238 | ) 239 | """ 240 | # depth--end 241 | summary = sess.run( 242 | summary_tf, 243 | feed_dict={ 244 | Z_tf: batch_z_var, 245 | vox_tf: batch_voxel_train, 246 | tsdf_tf: batch_tsdf_train, 247 | lr_VAE: lr 248 | }, 249 | ) 250 | 251 | print 'reconstruction loss:', recons_loss_val if ( 252 | 'recons_loss_val' in locals()) else 'None' 253 | # print ' (depth):', recons_dep_loss_val if ('recons_dep_loss_val' in locals()) else 'None' 254 | 255 | print ' code encode loss:', code_encode_loss_val if ( 256 | 'code_encode_loss_val' in locals()) else 'None' 257 | 258 | # print ' (depth):', code_encode_dep_loss_val if ('code_encode_dep_loss_val' in locals()) else 'None' 259 | 260 | print ' gen loss:', gen_loss_val if ( 261 | 'gen_loss_val' in locals()) else 'None' 262 | 263 | # print ' (depth):', gen_dep_loss_val if ('gen_dep_loss_val' in locals()) else 'None' 264 | 265 | print ' cost_encoder:', cost_enc_val if ( 266 | 'cost_enc_val' in locals()) else 'None' 267 | 268 | # print ' (depth):', cost_enc_dep_val if ('cost_enc_dep_val' in locals()) else 'None' 269 | 270 | print ' cost_generator:', cost_gen_val if ( 271 | 'cost_gen_val' in locals()) else 'None' 272 | 273 | # print ' (depth):', cost_gen_dep_val if ('cost_gen_dep_val' in locals()) else 'None' 274 | 275 | print ' cost_discriminator:', cost_discrim_val if ( 276 | 'cost_discrim_val' in locals()) else 'None' 277 | 278 | # print ' (depth):', cost_discrim_dep_val if ('cost_discrim_dep_val' in locals()) else 'None' 279 | 280 | print ' cost_code:', cost_code_val if ( 281 | 'cost_code_val' in locals()) else 'None' 282 | 283 | # print ' (depth):', cost_code_dep_val if ('cost_code_dep_val' in locals()) else 'None' 284 | 285 | # print ' diff_codes_vox_dep:', cost_code_compare_val if ('cost_code_compare_val' in locals()) else 'None' 286 | 287 | print ' avarage of enc_z:', np.mean(np.mean( 288 | z_enc_val, 4)) if ('z_enc_val' in locals()) else 'None' 289 | 290 | print ' std of enc_z:', np.mean(np.std( 291 | z_enc_val, 4)) if ('z_enc_val' in locals()) else 'None' 292 | 293 | # print 'avarage of enc_z_dep:', np.mean(np.mean(z_enc_dep_val,4)) if ('z_enc_dep_val' in locals()) else 'None' 294 | 295 | # print ' std of enc_z_dep:', np.mean(np.std(z_enc_dep_val,4)) if ('z_enc_dep_val' in locals()) else 'None' 296 | 297 | if np.mod(ite, freq) == 0: 298 | vox_models = sess.run( 299 | vox_tf_sample, 300 | feed_dict={Z_tf_sample: Z_var_np_sample}, 301 | ) 302 | vox_models_cat = np.argmax(vox_models, axis=4) 303 | record_vox = vox_models_cat[:record_vox_num] 304 | np.save( 305 | cfg.DIR.TRAIN_OBJ_PATH + '/' + str(ite / freq) + 306 | '.npy', record_vox) 307 | save_path = saver.save( 308 | sess, 309 | cfg.DIR.CHECK_PT_PATH + str(ite / freq), 310 | global_step=None) 311 | 312 | else: 313 | _, recons_loss_val, recons_loss_refine_val, gen_loss_refine_val, cost_gen_ref_val = sess.run( 314 | [ 315 | train_op_refine, recons_loss_tf, recons_loss_refine_tf, 316 | gen_loss_refine_tf, cost_gen_ref_tf 317 | ], 318 | feed_dict={ 319 | Z_tf: batch_z_var, 320 | vox_tf: batch_voxel_train, 321 | tsdf_tf: batch_tsdf_train, 322 | lr_VAE: lr 323 | }, 324 | ) 325 | 326 | _, discrim_loss_refine_val, cost_discrim_ref_val, summary = sess.run( 327 | [ 328 | train_op_discrim_refine, discrim_loss_refine_tf, 329 | cost_discrim_ref_tf, summary_tf 330 | ], 331 | feed_dict={ 332 | Z_tf: batch_z_var, 333 | vox_tf: batch_voxel_train, 334 | tsdf_tf: batch_tsdf_train 335 | }, 336 | ) 337 | 338 | print 'reconstruction loss:', recons_loss_val 339 | print ' recons refine loss:', recons_loss_refine_val 340 | print ' gen loss:', gen_loss_refine_val 341 | print ' cost_discriminator:', cost_discrim_ref_val 342 | 343 | if np.mod(ite, freq) == 0: 344 | vox_models = sess.run( 345 | vox_tf_sample, 346 | feed_dict={Z_tf_sample: Z_var_np_sample}, 347 | ) 348 | refined_models = sess.run( 349 | sample_refine_vox_tf, 350 | feed_dict={sample_vox_tf: vox_models}) 351 | vox_models_cat = np.argmax(vox_models, axis=4) 352 | record_vox = vox_models_cat[:record_vox_num] 353 | np.save( 354 | cfg.DIR.TRAIN_OBJ_PATH + '/' + str(ite / freq) + 355 | '.npy', record_vox) 356 | 357 | vox_models_cat = np.argmax(refined_models, axis=4) 358 | record_vox = vox_models_cat[:record_vox_num] 359 | np.save( 360 | cfg.DIR.TRAIN_OBJ_PATH + '/' + str(ite / freq) + 361 | '_refine.npy', record_vox) 362 | save_path = saver.save( 363 | sess, 364 | cfg.DIR.CHECK_PT_PATH + str(ite / freq), 365 | global_step=None) 366 | 367 | writer.add_summary(summary, global_step=ite) 368 | 369 | ite += 1 370 | -------------------------------------------------------------------------------- /tsdf.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/tsdf.ply -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import random 4 | 5 | from config import cfg 6 | 7 | 8 | class DataProcess(): 9 | def __init__(self, data_paths, batch_size, repeat=True): 10 | self.data_paths = data_paths 11 | self.num_data = len(data_paths) 12 | self.repeat = repeat 13 | 14 | self.batch_size = batch_size 15 | self.shuffle_db_inds() 16 | self.n_vox = cfg.CONST.N_VOX 17 | # self.n_dep = cfg.CONST.N_DEP 18 | 19 | def shuffle_db_inds(self): 20 | # Randomly permute the training roidb 21 | if self.repeat: 22 | self.perm = np.random.permutation(np.arange(self.num_data)) 23 | else: 24 | self.perm = np.arange(self.num_data) 25 | self.cur = 0 26 | 27 | def get_next_minibatch(self): 28 | flag = True 29 | if (self.cur + self.batch_size) >= self.num_data and self.repeat: 30 | self.shuffle_db_inds() 31 | flag = False 32 | 33 | db_inds = self.perm[self.cur:min(self.cur + 34 | self.batch_size, self.num_data)] 35 | self.cur += self.batch_size 36 | return db_inds, flag 37 | 38 | def get_tsdf(self, db_inds): 39 | batch_tsdf = np.zeros( 40 | (self.batch_size, self.n_vox[0], self.n_vox[1], self.n_vox[2]), 41 | dtype=np.float32) 42 | 43 | for batch_id, db_ind in enumerate(db_inds): 44 | sceneId, model_id = self.data_paths[db_ind] 45 | 46 | tsdf_fn = cfg.DIR.TSDF_PATH % (model_id) 47 | tsdf_data = np.load(tsdf_fn) 48 | 49 | batch_tsdf[batch_id, :, :, :] = tsdf_data 50 | return batch_tsdf 51 | 52 | def get_voxel(self, db_inds): 53 | batch_voxel = np.zeros( 54 | (self.batch_size, self.n_vox[0], self.n_vox[1], self.n_vox[2]), 55 | dtype=np.float32) 56 | 57 | for batch_id, db_ind in enumerate(db_inds): 58 | sceneId, model_id = self.data_paths[db_ind] 59 | 60 | voxel_fn = cfg.DIR.VOXEL_PATH % (model_id) 61 | voxel_data = np.load(voxel_fn) 62 | 63 | batch_voxel[batch_id, :, :, :] = voxel_data 64 | return batch_voxel 65 | 66 | """ 67 | def get_depth(self, db_inds): 68 | batch_depth = np.zeros( 69 | (self.batch_size, self.n_dep[0], self.n_dep[1], self.n_dep[2]), dtype=np.float32) 70 | 71 | for batch_id, db_ind in enumerate(db_inds): 72 | sceneId, model_id = self.data_paths[db_ind] 73 | 74 | depth_fn = cfg.DIR.DEPTH_PATH % (model_id) 75 | depth_data = np.load(depth_fn) 76 | 77 | batch_depth[batch_id, :, :, :] = np.reshape(depth_data, [self.n_dep[0], self.n_dep[1], self.n_dep[2]]) 78 | return batch_depth 79 | """ 80 | 81 | 82 | def scene_model_id_pair(dataset_portion=[]): 83 | ''' 84 | Load sceneId, model names from a suncg dataset. 85 | ''' 86 | 87 | scene_name_pair = [] # full path of the objs files 88 | 89 | model_path = cfg.DIR.ROOT_PATH 90 | models = os.listdir(model_path) 91 | 92 | scene_name_pair.extend([(model_path, model_id) for model_id in models]) 93 | 94 | num_models = len(scene_name_pair) 95 | portioned_scene_name_pair = scene_name_pair[int( 96 | num_models * dataset_portion[0]):int(num_models * dataset_portion[1])] 97 | 98 | return portioned_scene_name_pair 99 | 100 | 101 | def scene_model_id_pair_test(dataset_portion=[]): 102 | 103 | amount_of_test_sample = 200 104 | 105 | scene_name_pair = [] # full path of the objs files 106 | 107 | model_path = cfg.DIR.ROOT_PATH 108 | models = os.listdir(model_path) 109 | 110 | scene_name_pair.extend([(model_path, model_id) for model_id in models]) 111 | 112 | num_models = len(scene_name_pair) 113 | data_paths_test = scene_name_pair[int(num_models * dataset_portion[1]) + 114 | 1:] 115 | # random.shuffle(data_paths_test) 116 | #data_paths = scene_name_pair[int(num_models * dataset_portion[1])+1:int(num_models * dataset_portion[1])+amount_of_test_sample+1] 117 | data_paths = data_paths_test[:amount_of_test_sample] 118 | 119 | num_models = len(data_paths) 120 | print '---amount of test data:' + str(num_models) 121 | 122 | n_vox = cfg.CONST.N_VOX 123 | 124 | batch_voxel = np.zeros((num_models, n_vox[0], n_vox[1], n_vox[2]), 125 | dtype=np.float32) 126 | # depth--start 127 | """ 128 | n_dep = cfg.CONST.N_DEP 129 | 130 | batch_depth = np.zeros( 131 | (num_models, n_dep[0], n_dep[1], n_dep[2]), dtype=np.float32) 132 | """ 133 | # depth--end 134 | batch_tsdf = np.zeros((num_models, n_vox[0], n_vox[1], n_vox[2], 1), 135 | dtype=np.float32) 136 | 137 | for i in np.arange(num_models): 138 | sceneId, model_id = data_paths[i] 139 | 140 | voxel_fn = cfg.DIR.VOXEL_PATH % (model_id) 141 | voxel_data = np.load(voxel_fn) 142 | 143 | batch_voxel[i, :, :, :] = voxel_data 144 | # depth--start 145 | """ 146 | depth_fn = cfg.DIR.DEPTH_PATH % (model_id) 147 | depth_data = np.load(depth_fn) 148 | batch_depth[i, :, :, :] = np.reshape(depth_data, [n_dep[0], n_dep[1], n_dep[2]]) 149 | """ 150 | # depth--end 151 | 152 | tsdf_fn = cfg.DIR.TSDF_PATH % (model_id) 153 | tsdf_data = np.load(tsdf_fn) 154 | batch_tsdf[i, :, :, :, :] = np.reshape( 155 | tsdf_data, [n_vox[0], n_vox[1], n_vox[2], 1]) 156 | 157 | return batch_voxel, batch_tsdf, num_models 158 | 159 | 160 | def onehot(voxel, class_num): 161 | onehot_voxels = np.zeros((voxel.shape[0], voxel.shape[1], voxel.shape[2], 162 | voxel.shape[3], class_num)) 163 | for i in np.arange(class_num): 164 | onehot_voxel = np.zeros(voxel.shape) 165 | onehot_voxel[np.where(voxel == i)] = 1 166 | onehot_voxels[:, :, :, :, i] = onehot_voxel[:, :, :, :] 167 | return onehot_voxels 168 | -------------------------------------------------------------------------------- /visualization/voxviz.py: -------------------------------------------------------------------------------- 1 | import os 2 | import matplotlib as mpl 3 | if os.environ.get('DISPLAY', '') == '': 4 | print('no display found. Using non-interactive Agg backend') 5 | mpl.use('Agg') 6 | import matplotlib.pyplot as plt 7 | import os 8 | import numpy as np 9 | from matplotlib import cm 10 | from skimage.transform import resize 11 | import argparse 12 | from progressbar import ProgressBar 13 | 14 | 15 | class ScanFile(object): 16 | def __init__(self, directory, prefix=None, postfix='.jpg'): 17 | self.directory = directory 18 | self.prefix = prefix 19 | self.postfix = postfix 20 | 21 | def scan_files(self): 22 | files_list = [] 23 | 24 | for dirpath, dirnames, filenames in os.walk(self.directory): 25 | ''''' 26 | dirpath is a string, the path to the directory. 27 | dirnames is a list of the names of the subdirectories in dirpath 28 | (excluding '.' and '..'). 29 | filenames is a list of the names of the non-directory files 30 | in dirpath. 31 | ''' 32 | for special_file in filenames: 33 | if self.postfix: 34 | special_file.endswith(self.postfix) 35 | files_list.append(os.path.join(dirpath, special_file)) 36 | elif self.prefix: 37 | special_file.startswith(self.prefix) 38 | files_list.append(os.path.join(dirpath, special_file)) 39 | else: 40 | files_list.append(os.path.join(dirpath, special_file)) 41 | 42 | return files_list 43 | 44 | def scan_subdir(self): 45 | subdir_list = [] 46 | for dirpath, dirnames, files in os.walk(self.directory): 47 | subdir_list.append(dirpath) 48 | return subdir_list 49 | 50 | 51 | class ScanFile(object): 52 | def __init__(self, directory, prefix=None, postfix='.jpg'): 53 | self.directory = directory 54 | self.prefix = prefix 55 | self.postfix = postfix 56 | 57 | def scan_files(self): 58 | files_list = [] 59 | 60 | for dirpath, dirnames, filenames in os.walk(self.directory): 61 | ''''' 62 | dirpath is a string, the path to the directory. 63 | dirnames is a list of the names of the subdirectories in dirpath 64 | (excluding '.' and '..'). 65 | filenames is a list of the names of the non-directory files 66 | in dirpath. 67 | ''' 68 | for special_file in filenames: 69 | if self.postfix: 70 | special_file.endswith(self.postfix) 71 | files_list.append(os.path.join(dirpath, special_file)) 72 | elif self.prefix: 73 | special_file.startswith(self.prefix) 74 | files_list.append(os.path.join(dirpath, special_file)) 75 | else: 76 | files_list.append(os.path.join(dirpath, special_file)) 77 | 78 | return files_list 79 | 80 | def scan_subdir(self): 81 | subdir_list = [] 82 | for dirpath, dirnames, files in os.walk(self.directory): 83 | subdir_list.append(dirpath) 84 | return subdir_list 85 | 86 | 87 | def normalize(arr): 88 | arr_min = np.min(arr) 89 | return (arr - arr_min) / (np.max(arr) - arr_min) 90 | 91 | 92 | def show_histogram(values): 93 | n, bins, patches = plt.hist(values.reshape(-1), 50, normed=1) 94 | bin_centers = 0.5 * (bins[:-1] + bins[1:]) 95 | 96 | for c, p in zip(normalize(bin_centers), patches): 97 | plt.setp(p, 'facecolor', cm.hsv(c)) 98 | 99 | plt.show() 100 | 101 | 102 | def explode(data): 103 | shape_arr = np.array(data.shape) 104 | size = shape_arr[:3] * 2 - 1 105 | exploded = np.zeros( 106 | np.concatenate([size, shape_arr[3:]]), dtype=data.dtype) 107 | exploded[::2, ::2, ::2] = data 108 | return exploded 109 | 110 | 111 | def expand_coordinates(indices): 112 | x, y, z = indices 113 | x[1::2, :, :] += 1 114 | y[:, 1::2, :] += 1 115 | z[:, :, 1::2] += 1 116 | return x, y, z 117 | 118 | 119 | def scale_by(arr, fac): 120 | mean = np.mean(arr) 121 | return (arr - mean) * fac + mean 122 | 123 | 124 | def plot_image(arr, name='depth.png'): 125 | fig = plt.figure() 126 | ax = fig.add_subplot(111) 127 | # ax.set_axis_off() 128 | arr = (arr - np.min(arr)) / (np.max(arr) - np.min(arr)) * 255 129 | arr = np.uint8(arr) 130 | ax.set_axis_off() 131 | # ax.set_aspect('equal') 132 | 133 | plt.imshow(arr, cmap="hot") 134 | plt.savefig(name, bbox_inches='tight', pad_inches=0, transparent=True) 135 | plt.close(fig) 136 | 137 | 138 | def plot_cube(cube, name='voxel', angle=40, IMG_DIM=80): 139 | from mpl_toolkits.mplot3d import Axes3D 140 | 141 | # cube = normalize(cube) 142 | cube[np.where(cube > 11)] = 0 143 | facecolors = cm.Paired((np.round(cube) / 11)) 144 | # make the alpha channel more similar to each others while 0 is still 0 145 | facecolors[:, :, :, -1] = 0.1 * np.tanh(cube * 1000) 146 | facecolors = explode(facecolors) 147 | filled = facecolors[:, :, :, -1] != 0 148 | 149 | x, y, z = expand_coordinates(np.indices(np.array(filled.shape) + 1)) 150 | 151 | # Here is a loop for generating demo files 152 | for idx, val in enumerate(np.arange(-40, -30, 10)): 153 | fig = plt.figure(figsize=(30 / 2.54, 30 / 2.54)) # , dpi=150) 154 | # plot 155 | ax1 = fig.add_subplot(111, projection='3d') 156 | # For samples in SUNCG, 20, -40 is a good choice for visualization 157 | # ax1.view_init(np.abs(90-val/2), val) 158 | ax1.view_init(angle, val) 159 | ax1.set_xlim(right=IMG_DIM * 2) 160 | ax1.set_ylim(top=IMG_DIM * 2) 161 | ax1.set_zlim(top=48 * 2) 162 | ax1.set_axis_off() 163 | ax1.voxels( 164 | x, 165 | y, 166 | z, 167 | filled, 168 | facecolors=facecolors, 169 | edgecolors=np.clip(2 * facecolors - 0.5, 0, 1)) 170 | 171 | # plt.show() 172 | plt.savefig( 173 | name + '_' + format(idx, '04d') + '.png', 174 | bbox_inches='tight', 175 | pad_inches=0, 176 | transparent=True) 177 | plt.close(fig) 178 | """ 179 | objects_name = ['empty', 'ceiling', 'floor', 'wall', 'window', 'chair', 'bed', 'sofa', 'table', 'tvs', 'furnture', 'object'] 180 | for x in range(1, 11): 181 | fig = plt.figure(figsize=(30/2.54, 30/2.54)) 182 | filled = explode(cube) == x 183 | ax1 = fig.add_subplot(111, projection='3d') 184 | ax1.view_init(20, angle) 185 | ax1.set_xlim(right=IMG_DIM*2) 186 | ax1.set_ylim(top=IMG_DIM*2) 187 | ax1.set_zlim(top=48*2) 188 | ax1.set_title(objects_name[x]) 189 | ax1.set_axis_off() 190 | ax1.voxels(x, y, z, filled, facecolors=facecolors) 191 | # plt.show() 192 | plt.savefig(name.replace('.png', '_'+objects_name[x]+'.png'), bbox_inches='tight', pad_inches=0, transparent=True) 193 | plt.close(fig) 194 | """ 195 | 196 | 197 | def plot_depvox(dir_dep, dir_vox, target_folder): 198 | label_start = dir_dep.rfind('/') + 1 199 | label_end = dir_dep.find('.', label_start) 200 | arr = np.load(dir_dep) 201 | plot_image( 202 | arr, 203 | name=target_folder + '/depth/' + dir_dep[label_start:label_end] + 204 | '.png') 205 | 206 | arr = np.load(dir_vox) 207 | 208 | # ignore 255 and replace it with 0 209 | arr[arr == 255] = 0 210 | 211 | # show_histogram(arr) 212 | """ 213 | transformed = np.clip( 214 | scale_by(np.clip(normalize(arr)-0.1, 0, 1)**0.4, 2)-0.1, 215 | 0, 1) 216 | """ 217 | resized = resize(arr, (48, 80, 80), mode='constant') 218 | plot_cube( 219 | np.rollaxis(resized[:, :, :], 2, 0), 220 | name=target_folder + '/voxel/' + dir_dep[label_start:label_end] + 221 | '.png') 222 | 223 | 224 | if __name__ == "__main__": 225 | 226 | parser = argparse.ArgumentParser(description='Parser added') 227 | parser.add_argument( 228 | '-d', 229 | action="store", 230 | dest="dir_dep", 231 | default="./SUNCGtrain_3001_5000", 232 | help='npy file for depth') 233 | parser.add_argument( 234 | '-v', 235 | action="store", 236 | dest="dir_vox", 237 | default="./SUNCGtrain_3001_5000", 238 | help='npy file for voxel') 239 | parser.add_argument( 240 | '-t', 241 | action="store", 242 | dest="target_folder", 243 | default="./target_folder", 244 | help='target folder for vis') 245 | parser.print_help() 246 | results = parser.parse_args() 247 | 248 | dir_dep = results.dir_dep 249 | dir_vox = results.dir_vox 250 | target_folder = results.target_folder 251 | scan = ScanFile(dir_dep) 252 | subdirs = scan.scan_subdir() 253 | files = scan.scan_files() 254 | """ 255 | pbar = ProgressBar() 256 | for file_dep in pbar(files): 257 | file_vox = file_dep.replace(dir_dep, dir_vox, 1) 258 | plot_depvox(file_dep, file_vox, target_folder) 259 | """ 260 | # vis for 3D FGAN 261 | pbar = ProgressBar() 262 | arr = np.load(results.dir_vox) 263 | # arr = np.expand_dims(arr, axis=0) 264 | arr[arr == 255] = 0 265 | for idx in pbar(range( 266 | 0, 267 | arr.shape[0])): #([37, 69, 73, 76, 91, 93, 100, 121, 154, 156]): 268 | resized = arr[idx, :, :, :] 269 | # resized = normalize(resized) 270 | resized = np.squeeze(resized) 271 | # resized = resize(resized, (48, 80, 80), mode='constant') 272 | plot_cube( 273 | np.flip(np.rollaxis(resized[:, :, :], 2, 0), 1), 274 | name=target_folder + '/' + str(idx)) 275 | --------------------------------------------------------------------------------