├── .DS_Store
├── 3dv
├── .DS_Store
├── 3dv-poster.pdf
├── 3dv-poster.png
├── architecture.png
├── data_format.png
├── discriminators.png
├── overview.png
├── qualitative_results.png
└── video.gif
├── README.md
├── config.py
├── config_test.py
├── data
├── bin2camera.py
├── depthbin2npy.py
└── depthbin2npy_tsdf.py
├── depth-tsdf
├── .DS_Store
├── README.md
├── back-project
├── back-project.cu
├── compile.sh
├── data
│ ├── .DS_Store
│ ├── camera-intrinsics.txt
│ ├── camera
│ │ ├── 00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.txt
│ │ ├── 00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.txt
│ │ ├── 00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.txt
│ │ └── 00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.txt
│ ├── depth_real_png
│ │ ├── 00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.png
│ │ ├── 00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.png
│ │ ├── 00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.png
│ │ └── 00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.png
│ ├── depth_rgb_png
│ │ ├── 00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.png
│ │ ├── 00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.png
│ │ ├── 00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.png
│ │ └── 00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.png
│ ├── depth_tsdf_bin
│ │ ├── 00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.bin
│ │ ├── 00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.bin
│ │ ├── 00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.bin
│ │ └── 00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.bin
│ ├── depth_tsdf_occluded_npy
│ │ ├── 00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.npy
│ │ ├── 00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.npy
│ │ ├── 00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.npy
│ │ └── 00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.npy
│ ├── depth_tsdf_ply
│ │ ├── 00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.ply
│ │ ├── 00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.ply
│ │ ├── 00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.ply
│ │ └── 00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.ply
│ └── origin
│ │ ├── 00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.txt
│ │ ├── 00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.txt
│ │ ├── 00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.txt
│ │ └── 00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.txt
├── depth_yida.png
├── run.sh
├── tsdf2mesh.m
└── utils.hpp
├── evaluate.py
├── main.py
├── model.py
├── train.py
├── tsdf.ply
├── util.py
└── visualization
└── voxviz.py
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/.DS_Store
--------------------------------------------------------------------------------
/3dv/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/.DS_Store
--------------------------------------------------------------------------------
/3dv/3dv-poster.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/3dv-poster.pdf
--------------------------------------------------------------------------------
/3dv/3dv-poster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/3dv-poster.png
--------------------------------------------------------------------------------
/3dv/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/architecture.png
--------------------------------------------------------------------------------
/3dv/data_format.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/data_format.png
--------------------------------------------------------------------------------
/3dv/discriminators.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/discriminators.png
--------------------------------------------------------------------------------
/3dv/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/overview.png
--------------------------------------------------------------------------------
/3dv/qualitative_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/qualitative_results.png
--------------------------------------------------------------------------------
/3dv/video.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/3dv/video.gif
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Adversarial Semantic Scene Completion from a Single Depth Image
2 |
3 | ## Authors
4 | **[Yida Wang](https://wangyida.github.io/#about), David Tan, Nassir Navab and [Federico Tombari](http://campar.in.tum.de/Main/FedericoTombari)**
5 |
6 | *International Conference on 3D Vision*, IEEE
7 |
8 | ## Showcase
9 |
10 |
11 | ## Overview
12 | 
13 | We introduce a direct reconstruction method to reconstruct from a 2.5D depth image to a 3D voxel data with both shape completion and semantic segmentation that relies on a deep architecture based on 3D VAE with an adversarial training to improve the performance of this task.
14 |
15 | ## Architecture
16 | 
17 | We utilize the latent representation of 3D auto-encoder to help train a latent representation from a depth image. The 3D auto-encoder is removed after the parametric model is trained. This pipeline is optimized for the encoders for the depth image and the 3D volumetric data and the shared generator is also optimised during
18 | training.
19 |
20 | ## Discriminators
21 | 
22 | To make the latent representation and the reconstructed 3D scene similar to each others, we apply two discriminators for both targets. In this manner, the latent representation of the depth produces the expected target more precisely compared to the latent representation of the ground truth volumetric data.
23 |
24 | ## Our data format
25 | 
26 |
27 | ## Qualitative results
28 | 
29 |
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | from easydict import EasyDict as edict
2 |
3 | __C = edict()
4 | cfg = __C
5 |
6 | #
7 | # Common
8 | #
9 | __C.SUB_CONFIG_FILE = []
10 |
11 | __C.CONST = edict()
12 | __C.CONST.N_VOX = [80, 48, 80]
13 | __C.CONST.N_DEP = [320, 240, 1]
14 | __C.CONST.BATCH_SIZE = 16
15 | __C.SAVER_MAX = 1000
16 | __C.CHECK_FREQ = 1000
17 | __C.RECORD_VOX_NUM = 10
18 | __C.SWITCHING_ITE = 75001
19 |
20 | # Network
21 | __C.NET = edict()
22 | __C.NET.DIM_Z = 16
23 | __C.NET.DIM = [512, 256, 128, 64, 12]
24 | __C.NET.START_VOX = [5, 3, 5]
25 | __C.NET.KERNEL = [[5, 5, 5, 5, 5], [3, 3, 3, 3, 3], [5, 5, 5, 5, 5]]
26 | __C.NET.STRIDE = [1, 2, 2, 2, 1]
27 | __C.NET.REFINE_CH = 32
28 | __C.NET.REFINE_KERNEL = 3
29 |
30 | #
31 | # Directories
32 | #
33 | __C.DIR = edict()
34 | # Path where taxonomy.json is stored
35 | # __C.DIR.SCENE_ID_PATH = '../3D-FCR-alphaGAN/Scenevox'
36 | # __C.DIR.VOXEL_PATH = '../3D-FCR-alphaGAN/Scenevox/%s/%s'
37 | __C.DIR.ROOT_PATH = '/media/wangyida/D0-P1/database/SUNCG_Yida/train/voxel_semantic_npy'
38 | __C.DIR.VOXEL_PATH = '/media/wangyida/D0-P1/database/SUNCG_Yida/train/voxel_semantic_npy/%s'
39 | # depth--start
40 | __C.DIR.DEPTH_PATH = '/media/wangyida/D0-P1/database/SUNCG_Yida/train/depth_npy/%s'
41 | __C.DIR.TSDF_PATH = '/media/wangyida/D0-P1/database/SUNCG_Yida/train/depth_tsdf_npy/%s'
42 | # depth--end
43 | __C.DIR.CHECK_POINT_PATH = './Checkpt'
44 | __C.DIR.CHECK_PT_PATH = './Checkpt/checkpoint'
45 | __C.DIR.TRAIN_OBJ_PATH = './train_vox'
46 | __C.DIR.EVAL_PATH = './eval'
47 | __C.DIR.LOG_PATH = './log'
48 |
49 | #
50 | # Training
51 | #
52 | __C.TRAIN = edict()
53 |
54 | __C.TRAIN.DATASET_PORTION = [0, 0.9]
55 | __C.TRAIN.NUM_EPOCH = 500 # maximum number of training epochs
56 |
57 | # Learning
58 | __C.LEARNING_RATE_G = 0.0001
59 | __C.LEARNING_RATE_D = 0.0001
60 | __C.LEARNING_RATE_V = [0.0001, 1000, 0.0001]
61 | __C.TRAIN.ADAM_BETA_G = 0.5
62 | __C.TRAIN.ADAM_BETA_D = 0.5
63 | __C.LAMDA_RECONS = 1
64 | __C.LAMDA_GAMMA = 0.97
65 |
66 |
67 | def cfg_from_file(filename):
68 | """Load a config file and merge it into the default options."""
69 | import yaml
70 | with open(filename, 'r') as f:
71 | yaml_cfg = edict(yaml.load(f))
72 |
73 | _merge_a_into_b(yaml_cfg, __C)
74 |
75 |
76 | def cfg_from_list(cfg_list):
77 | """Set config keys via list (e.g., from command line)."""
78 | from ast import literal_eval
79 | assert len(cfg_list) % 2 == 0
80 | for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
81 | key_list = k.split('.')
82 | d = __C
83 | for subkey in key_list[:-1]:
84 | assert subkey in d.keys()
85 | d = d[subkey]
86 | subkey = key_list[-1]
87 | assert subkey in d.keys()
88 | try:
89 | value = literal_eval(v)
90 | except:
91 | # handle the case when v is a string literal
92 | value = v
93 | assert type(value) == type(d[subkey]), \
94 | 'type {} does not match original type {}'.format(
95 | type(value), type(d[subkey]))
96 | d[subkey] = value
97 |
--------------------------------------------------------------------------------
/config_test.py:
--------------------------------------------------------------------------------
1 | from easydict import EasyDict as edict
2 |
3 | __C = edict()
4 | cfg_test = __C
5 |
6 | #
7 | # Common
8 | #
9 | __C.SUB_CONFIG_FILE = []
10 |
11 | __C.CONST = edict()
12 | __C.CONST.N_VOX = [80, 48, 80]
13 | __C.CONST.N_DEP = [320, 240, 1]
14 | __C.CONST.BATCH_SIZE = 2
15 | __C.SAVER_MAX = 100
16 | __C.CHECK_FREQ = 100
17 | __C.RECORD_VOX_NUM = 10
18 | __C.SWITCHING_ITE = 75001
19 |
20 | # Network
21 | __C.NET = edict()
22 | __C.NET.DIM_Z = 16
23 | __C.NET.DIM = [512, 256, 128, 64, 12]
24 | __C.NET.START_VOX = [5, 3, 5]
25 | __C.NET.KERNEL = [[5, 5, 5, 5, 5], [3, 3, 3, 3, 3], [5, 5, 5, 5, 5]]
26 | __C.NET.STRIDE = [1, 2, 2, 2, 1]
27 | __C.NET.REFINE_CH = 32
28 | __C.NET.REFINE_KERNEL = 3
29 |
30 | #
31 | # Directories
32 | #
33 | __C.DIR = edict()
34 | # Path where taxonomy.json is stored
35 | # __C.DIR.SCENE_ID_PATH = '../3D-FCR-alphaGAN/Scenevox'
36 | # __C.DIR.VOXEL_PATH = '../3D-FCR-alphaGAN/Scenevox/%s/%s'
37 | __C.DIR.ROOT_PATH = '/media/wangyida/D0-P1/database/SUNCG_Yida/test/voxel_semantic_npy'
38 | __C.DIR.VOXEL_PATH = '/media/wangyida/D0-P1/database/SUNCG_Yida/test/voxel_semantic_npy/%s'
39 | # depth--start
40 | __C.DIR.DEPTH_PATH = '/media/wangyida/D0-P1/database/SUNCG_Yida/test/depth_npy/%s'
41 | __C.DIR.TSDF_PATH = '/media/wangyida/D0-P1/database/SUNCG_Yida/test/depth_tsdf_npy/%s'
42 | # depth--end
43 | __C.DIR.CHECK_POINT_PATH = './Checkpt'
44 | __C.DIR.CHECK_PT_PATH = './Checkpt/checkpoint'
45 | __C.DIR.TRAIN_OBJ_PATH = './test_vox'
46 | __C.DIR.EVAL_PATH = './eval'
47 | __C.DIR.LOG_PATH = './log'
48 |
49 | #
50 | # Training
51 | #
52 | __C.TRAIN = edict()
53 |
54 | __C.TRAIN.DATASET_PORTION = [0, 0.8]
55 | __C.TRAIN.NUM_EPOCH = 500 # maximum number of training epochs
56 |
57 | # Learning
58 | __C.LEARNING_RATE_G = 0.0001
59 | __C.LEARNING_RATE_D = 0.0001
60 | __C.LEARNING_RATE_V = [0.0001, 1000, 0.0001]
61 | __C.TRAIN.ADAM_BETA_G = 0.5
62 | __C.TRAIN.ADAM_BETA_D = 0.5
63 | __C.LAMDA_RECONS = 1
64 | __C.LAMDA_GAMMA = 0.97
65 |
66 |
67 | def cfg_from_file(filename):
68 | """Load a config file and merge it into the default options."""
69 | import yaml
70 | with open(filename, 'r') as f:
71 | yaml_cfg = edict(yaml.load(f))
72 |
73 | _merge_a_into_b(yaml_cfg, __C)
74 |
75 |
76 | def cfg_from_list(cfg_list):
77 | """Set config keys via list (e.g., from command line)."""
78 | from ast import literal_eval
79 | assert len(cfg_list) % 2 == 0
80 | for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
81 | key_list = k.split('.')
82 | d = __C
83 | for subkey in key_list[:-1]:
84 | assert subkey in d.keys()
85 | d = d[subkey]
86 | subkey = key_list[-1]
87 | assert subkey in d.keys()
88 | try:
89 | value = literal_eval(v)
90 | except:
91 | # handle the case when v is a string literal
92 | value = v
93 | assert type(value) == type(d[subkey]), \
94 | 'type {} does not match original type {}'.format(
95 | type(value), type(d[subkey]))
96 | d[subkey] = value
97 |
--------------------------------------------------------------------------------
/data/bin2camera.py:
--------------------------------------------------------------------------------
1 | from struct import *
2 | import numpy as np
3 | # I considered using multiprocessing package, but I find this code version is fine.
4 | # Welcome for your version with multiprocessing to make the reading faster.
5 | # from joblib import Parallel, delayed
6 | import multiprocessing
7 | import time
8 | from scipy import misc
9 | import os
10 | import argparse
11 | from progressbar import ProgressBar
12 | from skimage.measure import block_reduce
13 |
14 |
15 | def bin2camera(file):
16 | start_time = time.time()
17 | with open(file, 'r') as f:
18 | float_size = 4
19 | uint_size = 4
20 | total_count = 0
21 | cor = f.read(float_size * 3)
22 | cors = unpack('fff', cor)
23 | # print("cors is {}",cors)
24 | cam = f.read(float_size * 16)
25 | cams = unpack('ffffffffffffffff', cam)
26 | cams = np.array(cams)
27 | cams = np.reshape(cams, [4, 4])
28 | # cams = np.linalg.inv(cams)
29 | # print("cams %16f",cams)
30 | f.close()
31 | # print "reading voxel file takes {} mins".format((time.time()-start_time)/60)
32 | return cams, cors
33 |
34 |
35 | class ScanFile(object):
36 | def __init__(self, directory, prefix=None, postfix='.bin'):
37 | self.directory = directory
38 | self.prefix = prefix
39 | self.postfix = postfix
40 |
41 | def scan_files(self):
42 | files_list = []
43 |
44 | for dirpath, dirnames, filenames in os.walk(self.directory):
45 | for special_file in filenames:
46 | if self.postfix:
47 | if special_file.endswith(self.postfix):
48 | files_list.append(os.path.join(dirpath, special_file))
49 | elif self.prefix:
50 | if special_file.startswith(self.prefix):
51 | files_list.append(os.path.join(dirpath, special_file))
52 | else:
53 | files_list.append(os.path.join(dirpath, special_file))
54 |
55 | return files_list
56 |
57 | def scan_subdir(self):
58 | subdir_list = []
59 | for dirpath, dirnames, files in os.walk(self.directory):
60 | subdir_list.append(dirpath)
61 | return subdir_list
62 |
63 |
64 | if __name__ == "__main__":
65 |
66 | parser = argparse.ArgumentParser(description='Parser added')
67 | parser.add_argument(
68 | '-s',
69 | action="store",
70 | dest="dir_src",
71 | default="/media/wangyida/D0-P1/database/SUNCGtrain_3001_5000",
72 | help='folder of paired depth and voxel')
73 | parser.add_argument(
74 | '-t',
75 | action="store",
76 | dest="dir_tar",
77 | default="/media/wangyida/D0-P1/database/SUNCG_Yida/train",
78 | help='for storing generated npy')
79 | parser.print_help()
80 | results = parser.parse_args()
81 |
82 | # folder of paired depth and voxel
83 | dir_src = results.dir_src
84 | # for storing generated npy
85 | dir_tar = results.dir_tar
86 |
87 | # scan for semantic voxel files
88 | dir_camera = dir_tar + '/camera/'
89 | dir_origin = dir_tar + '/origin/'
90 | scan_bin = ScanFile(directory=dir_src, postfix='.bin')
91 | files_bin = scan_bin.scan_files()
92 |
93 | # making directories
94 | try:
95 | os.stat(dir_camera)
96 | except:
97 | os.mkdir(dir_camera)
98 |
99 | try:
100 | os.stat(dir_origin)
101 | except:
102 | os.mkdir(dir_origin)
103 |
104 | # save voxel as npy files
105 | pbar1 = ProgressBar()
106 | for file_bin in pbar1(files_bin):
107 | cams, cors = bin2camera(file=file_bin)
108 | name_start = int(file_bin.rfind('/'))
109 | name_end = int(file_bin.find('.', name_start))
110 | np.savetxt(dir_camera + file_bin[name_start:name_end] + '.txt', cams)
111 | np.savetxt(dir_origin + file_bin[name_start:name_end] + '.txt', cors)
112 |
--------------------------------------------------------------------------------
/data/depthbin2npy.py:
--------------------------------------------------------------------------------
1 | from struct import *
2 | import numpy as np
3 | # I considered using multiprocessing package, but I find this code version is fine.
4 | # Welcome for your version with multiprocessing to make the reading faster.
5 | # from joblib import Parallel, delayed
6 | import multiprocessing
7 | import time
8 | from scipy import misc
9 | import os
10 | import argparse
11 | from progressbar import ProgressBar
12 | from skimage.measure import block_reduce
13 |
14 |
15 | def bin2array(file):
16 | start_time = time.time()
17 | with open(file, 'r') as f:
18 | float_size = 4
19 | uint_size = 4
20 | total_count = 0
21 | cor = f.read(float_size * 3)
22 | cors = unpack('fff', cor)
23 | # print("cors is {}",cors)
24 | cam = f.read(float_size * 16)
25 | cams = unpack('ffffffffffffffff', cam)
26 | # print("cams %16f",cams)
27 | vox = f.read()
28 | numC = len(vox) / uint_size
29 | # print('numC is {}'.format(numC))
30 | checkVoxValIter = unpack('I' * numC, vox)
31 | checkVoxVal = checkVoxValIter[0::2]
32 | checkVoxIter = checkVoxValIter[1::2]
33 | checkVox = [
34 | i for (val, repeat) in zip(checkVoxVal, checkVoxIter)
35 | for i in np.tile(val, repeat)
36 | ]
37 | # print('checkVox shape is {}'.format(len(checkVox)))
38 | checkVox = np.reshape(checkVox, (240, 144, 240))
39 | checkVox = block_reduce(checkVox, block_size=(3, 3, 3), func=np.max)
40 | f.close()
41 | # print "reading voxel file takes {} mins".format((time.time()-start_time)/60)
42 | return checkVox
43 |
44 |
45 | def png2array(file):
46 | image = misc.imread(file)
47 | image = misc.imresize(image, 50)
48 | return image
49 |
50 |
51 | class ScanFile(object):
52 | def __init__(self, directory, prefix=None, postfix='.bin'):
53 | self.directory = directory
54 | self.prefix = prefix
55 | self.postfix = postfix
56 |
57 | def scan_files(self):
58 | files_list = []
59 |
60 | for dirpath, dirnames, filenames in os.walk(self.directory):
61 | for special_file in filenames:
62 | if self.postfix:
63 | if special_file.endswith(self.postfix):
64 | files_list.append(os.path.join(dirpath, special_file))
65 | elif self.prefix:
66 | if special_file.startswith(self.prefix):
67 | files_list.append(os.path.join(dirpath, special_file))
68 | else:
69 | files_list.append(os.path.join(dirpath, special_file))
70 |
71 | return files_list
72 |
73 | def scan_subdir(self):
74 | subdir_list = []
75 | for dirpath, dirnames, files in os.walk(self.directory):
76 | subdir_list.append(dirpath)
77 | return subdir_list
78 |
79 |
80 | if __name__ == "__main__":
81 |
82 | parser = argparse.ArgumentParser(description='Parser added')
83 | parser.add_argument(
84 | '-s',
85 | action="store",
86 | dest="dir_src",
87 | default="/media/wangyida/D0-P1/database/SUNCGtrain_3001_5000",
88 | help='folder of paired depth and voxel')
89 | parser.add_argument(
90 | '-td',
91 | action="store",
92 | dest="dir_tar_depth",
93 | default="/media/wangyida/D0-P1/database/SUNCGtrain_3001_5000_depvox",
94 | help='for storing generated npy')
95 | parser.add_argument(
96 | '-tv',
97 | action="store",
98 | dest="dir_tar_voxel",
99 | default="/media/wangyida/D0-P1/database/SUNCGtrain_3001_5000_depvox",
100 | help='for storing generated npy')
101 | parser.print_help()
102 | results = parser.parse_args()
103 |
104 | # folder of paired depth and voxel
105 | dir_src = results.dir_src
106 | # for storing generated npy
107 | dir_tar_depth = results.dir_tar_depth
108 | dir_tar_voxel = results.dir_tar_voxel
109 |
110 | # scan for depth files
111 | scan_png = ScanFile(directory=dir_src, postfix='.png')
112 | files_png = scan_png.scan_files()
113 |
114 | # scan for semantic voxel files
115 | scan_bin = ScanFile(directory=dir_src, postfix='.bin')
116 | files_bin = scan_bin.scan_files()
117 |
118 | # making directories
119 | try:
120 | os.stat(dir_tar_voxel)
121 | except:
122 | os.mkdir(dir_tar_voxel)
123 | try:
124 | os.stat(dir_tar_depth)
125 | except:
126 | os.mkdir(dir_tar_depth)
127 |
128 | pbar1 = ProgressBar()
129 | # save depth as npy files
130 | for file_png in pbar1(files_png):
131 | depth = png2array(file=file_png)
132 | name_start = int(file_png.rfind('/'))
133 | name_end = int(file_png.find('.', name_start))
134 | np.save(dir_tar_depth + file_png[name_start:name_end] + '.npy', depth)
135 |
136 | # save voxel as npy files
137 | pbar2 = ProgressBar()
138 | for file_bin in pbar2(files_bin):
139 | voxel = bin2array(file=file_bin)
140 | name_start = int(file_bin.rfind('/'))
141 | name_end = int(file_bin.find('.', name_start))
142 | np.save(dir_tar_voxel + file_bin[name_start:name_end] + '.npy', voxel)
143 |
--------------------------------------------------------------------------------
/data/depthbin2npy_tsdf.py:
--------------------------------------------------------------------------------
1 | from struct import *
2 | from subprocess import call
3 | import numpy as np
4 | # I considered using multiprocessing package, but I find this code version is fine.
5 | # Welcome for your version with multiprocessing to make the reading faster.
6 | # from joblib import Parallel, delayed
7 | import multiprocessing
8 | import time
9 | from scipy import misc
10 | import os
11 | import argparse
12 | from progressbar import ProgressBar
13 | from skimage.measure import block_reduce
14 |
15 |
16 | def bin2array(file):
17 | start_time = time.time()
18 | with open(file, 'r') as f:
19 | float_size = 4
20 | uint_size = 4
21 | total_count = 0
22 | """
23 | cor = f.read(float_size*3)
24 | cors = unpack('fff', cor)
25 | print("cors is {}",cors)
26 | tmp = f.read(float_size*5)
27 | tmps = unpack('f'*5, tmp)
28 | print("cams %16f",cams)
29 | """
30 | vox = f.read()
31 | numC = len(vox) / float_size
32 | # print('numC is {}'.format(numC))
33 | checkVox = unpack('I' * numC, vox)
34 | # print('checkVox shape is {}'.format(len(checkVox)))
35 | checkVox = np.reshape(checkVox, (48, 80, 80))
36 | checkVox = np.swapaxes(checkVox, 0, 1)
37 | checkVox = np.swapaxes(checkVox, 0, 2)
38 | # checkVox = np.flip(checkVox, 0)
39 | checkVox = np.where(checkVox < 1.0, 1, 0)
40 | # checkVox = block_reduce(checkVox, block_size=(3, 3, 3), func=np.max)
41 | f.close()
42 | # print "reading voxel file takes {} mins".format((time.time()-start_time)/60)
43 | return checkVox
44 |
45 |
46 | def png2array(file):
47 | image = misc.imread(file)
48 | image = misc.imresize(image, 50)
49 | return image
50 |
51 |
52 | class ScanFile(object):
53 | def __init__(self, directory, prefix=None, postfix='.bin'):
54 | self.directory = directory
55 | self.prefix = prefix
56 | self.postfix = postfix
57 |
58 | def scan_files(self):
59 | files_list = []
60 |
61 | for dirpath, dirnames, filenames in os.walk(self.directory):
62 | for special_file in filenames:
63 | if self.postfix:
64 | if special_file.endswith(self.postfix):
65 | files_list.append(os.path.join(dirpath, special_file))
66 | elif self.prefix:
67 | if special_file.startswith(self.prefix):
68 | files_list.append(os.path.join(dirpath, special_file))
69 | else:
70 | files_list.append(os.path.join(dirpath, special_file))
71 |
72 | return files_list
73 |
74 | def scan_subdir(self):
75 | subdir_list = []
76 | for dirpath, dirnames, files in os.walk(self.directory):
77 | subdir_list.append(dirpath)
78 | return subdir_list
79 |
80 |
81 | def process_data(file_depth):
82 | img_path = file_depth
83 | camera_intrinsic = "./depth-tsdf/data/camera-intrinsics.txt"
84 | camera_extrinsic = img_path.replace("depth_real_png", "camera")
85 | camera_extrinsic = camera_extrinsic.replace(".png", ".txt")
86 | camera_origin = camera_extrinsic.replace("camera", "origin")
87 | call([
88 | "./depth-tsdf/demo", camera_intrinsic, camera_origin, camera_extrinsic,
89 | img_path
90 | ])
91 | voxel = bin2array(file="./tsdf.bin")
92 | name_start = int(img_path.rfind('/'))
93 | name_end = int(img_path.find('.', name_start))
94 | # save numpy
95 | np.save(dir_voxel + img_path[name_start:name_end] + '.npy', voxel)
96 |
97 | # save ply
98 | call(
99 | ["cp", "./tsdf.ply", dir_ply + img_path[name_start:name_end] + '.ply'])
100 |
101 |
102 | if __name__ == "__main__":
103 |
104 | parser = argparse.ArgumentParser(description='Parser added')
105 | parser.add_argument(
106 | '-s',
107 | action="store",
108 | dest="dir_src",
109 | default="/media/wangyida/D0-P1/database/SUNCGtrain_3001_5000",
110 | help='folder of paired depth and voxel')
111 | parser.add_argument(
112 | '-tv',
113 | action="store",
114 | dest="dir_tar",
115 | default="/media/wangyida/D0-P1/database/SUNCGtrain_3001_5000_depvox",
116 | help='for storing generated npy')
117 | parser.add_argument(
118 | '-tp',
119 | action="store",
120 | dest="dir_ply",
121 | default="/media/wangyida/D0-P1/database/SUNCGtrain_3001_5000_depvox",
122 | help='for storing generated ply')
123 | parser.print_help()
124 | results = parser.parse_args()
125 |
126 | # folder of paired depth and voxel
127 | dir_src = results.dir_src
128 | # for storing generated npy
129 | dir_voxel = results.dir_tar
130 | dir_ply = results.dir_ply
131 |
132 | # scan for depth files
133 | scan_png = ScanFile(directory=dir_src, postfix='.png')
134 | files_png = scan_png.scan_files()
135 |
136 | # making directories
137 | try:
138 | os.stat(dir_voxel)
139 | except:
140 | os.mkdir(dir_voxel)
141 |
142 | try:
143 | os.stat(dir_ply)
144 | except:
145 | os.mkdir(dir_ply)
146 |
147 | # save voxel as npy files
148 | pbar = ProgressBar()
149 | """
150 | from joblib import Parallel, delayed
151 | import multiprocessing
152 | num_cores = multiprocessing.cpu_count()
153 | Parallel(n_jobs=num_cores)(delayed(process_data(file_depth)) for file_depth in pbar(files_png))
154 | """
155 | for file_depth in pbar(files_png):
156 | process_data(file_depth)
157 |
--------------------------------------------------------------------------------
/depth-tsdf/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/.DS_Store
--------------------------------------------------------------------------------
/depth-tsdf/README.md:
--------------------------------------------------------------------------------
1 | # Volumetric TSDF Fusion of Multiple Depth Maps
2 |
3 | 
4 |
5 | CUDA/C++ code to fuse multiple registered depth maps into a projective truncated signed distance function (TSDF) voxel volume, which can then be used to create high quality 3D surface meshes and point clouds. Tested on Ubuntu 14.04 and 16.04.
6 |
7 | Looking for an older version? See [here](old-version).
8 |
9 | This repository is a part of [Andy's Code Collection](http://andyzeng.github.io/).
10 |
11 | ## Change Log
12 | * **Nov. 1, 2017.** Bug fix: `tsdf2mesh.m` now properly generates a mesh in camera coordinates instead of voxel coordinates.
13 | * **Oct. 30, 2017.** Notice: changed default weight threshold for `SaveVoxelGrid2SurfacePointCloud` in demo code to enable creating point cloud visualizations with only one depth frame.
14 | * **Aug. 30, 2017.** Bug fix: remove deprecated offsets from surface distance compute during integration.
15 |
16 | ## Requirements
17 | * NVIDA GPU with [CUDA](https://developer.nvidia.com/cuda-downloads) support
18 | * [OpenCV](http://opencv.org/) (tested with OpenCV 2.4.11)
19 |
20 | ## Demo
21 | This demo fuses 50 registered depth maps from directory `data/rgbd-frames` into a projective TSDF voxel volume, and creates a 3D surface point cloud `tsdf.ply`, which can be visualized with a 3D viewer like [Meshlab](http://www.meshlab.net/).
22 |
23 | **Note**: Input depth maps should be saved in format: 16-bit PNG, depth in millimeters.
24 |
25 | ```shell
26 | ./compile.sh # compiles demo executable
27 | ./demo # 3D point cloud saved to tsdf.ply and voxel grid saved to tsdf.bin
28 | ```
29 |
30 | [Optional] This demo also saves the computed voxel volume into a binary file `tsdf.bin`. Run the following script in Matlab to create a 3D surface mesh `mesh.ply`, which can be visualized with [Meshlab](http://www.meshlab.net/).
31 |
32 | ```matlab
33 | tsdf2mesh; % 3D mesh saved to mesh.ply
34 | ```
35 |
36 | ## Seen in
37 | * [3DMatch: Learning Local Geometric Descriptors from RGB-D Reconstructions (CVPR 2017)](http://3dmatch.cs.princeton.edu/)
38 | * [Semantic Scene Completion from a Single Depth Image (CVPR 2017)](http://sscnet.cs.princeton.edu/)
39 | * [Deep Sliding Shapes for Amodal 3D Object Detection in RGB-D Images (CVPR 2016)](http://dss.cs.princeton.edu/)
40 |
41 | ## References
42 | * [A Volumetric Method for Building Complex Models from Range Images (SIGGRAPH 1996)](https://graphics.stanford.edu/papers/volrange/volrange.pdf)
43 | * [KinectFusion: Real-Time Dense Surface Mapping and Tracking (ISMAR 2011)](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/ismar2011.pdf)
44 | * [Scene Coordinate Regression Forests for Camera Relocalization in RGB-D Images (CVPR 2013)](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/RelocForests.pdf)
45 |
--------------------------------------------------------------------------------
/depth-tsdf/back-project:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/back-project
--------------------------------------------------------------------------------
/depth-tsdf/back-project.cu:
--------------------------------------------------------------------------------
1 | // ---------------------------------------------------------
2 | // Author: Andy Zeng, Princeton University, 2016
3 | // ---------------------------------------------------------
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include "utils.hpp"
11 |
12 | // CUDA kernel function to integrate a TSDF voxel volume given depth images
13 | __global__
14 | void Integrate(float * cam_K, float * cam2base, float * depth_im,
15 | int im_height, int im_width, int voxel_grid_dim_x, int voxel_grid_dim_y, int voxel_grid_dim_z,
16 | float voxel_grid_origin_x, float voxel_grid_origin_y, float voxel_grid_origin_z, float voxel_size, float trunc_margin,
17 | float * voxel_grid_TSDF) {
18 |
19 | int pt_grid_z = blockIdx.x;
20 | int pt_grid_y = threadIdx.x;
21 |
22 | for (int pt_grid_x = 0; pt_grid_x < voxel_grid_dim_x; ++pt_grid_x) {
23 |
24 | // Convert voxel center from grid coordinates to base frame camera coordinates
25 | float pt_base_x = voxel_grid_origin_x + pt_grid_x * voxel_size;
26 | float pt_base_y = voxel_grid_origin_y + pt_grid_y * voxel_size;
27 | float pt_base_z = voxel_grid_origin_z + pt_grid_z * voxel_size;
28 |
29 | // Convert from base frame camera coordinates to current frame camera coordinates
30 | float tmp_pt[3] = {0};
31 | tmp_pt[0] = pt_base_x - cam2base[0 * 4 + 3];
32 | tmp_pt[1] = pt_base_y - cam2base[1 * 4 + 3];
33 | tmp_pt[2] = pt_base_z - cam2base[2 * 4 + 3];
34 | float pt_cam_x = cam2base[0 * 4 + 0] * tmp_pt[0] + cam2base[1 * 4 + 0] * tmp_pt[1] + cam2base[2 * 4 + 0] * tmp_pt[2];
35 | float pt_cam_y = cam2base[0 * 4 + 1] * tmp_pt[0] + cam2base[1 * 4 + 1] * tmp_pt[1] + cam2base[2 * 4 + 1] * tmp_pt[2];
36 | float pt_cam_z = cam2base[0 * 4 + 2] * tmp_pt[0] + cam2base[1 * 4 + 2] * tmp_pt[1] + cam2base[2 * 4 + 2] * tmp_pt[2];
37 |
38 | int volume_idx = pt_grid_z * voxel_grid_dim_y * voxel_grid_dim_x + pt_grid_y * voxel_grid_dim_x + pt_grid_x;
39 | if (pt_cam_z <= 0) {
40 | voxel_grid_TSDF[volume_idx] = -2.0f;
41 | continue;
42 | }
43 |
44 | int pt_pix_x = roundf(cam_K[0 * 3 + 0] * (pt_cam_x / pt_cam_z) + cam_K[0 * 3 + 2]);
45 | int pt_pix_y = roundf(cam_K[1 * 3 + 1] * (pt_cam_y / pt_cam_z) + cam_K[1 * 3 + 2]);
46 | if (pt_pix_x < 0 || pt_pix_x >= im_width || pt_pix_y < 0 || pt_pix_y >= im_height) {
47 | voxel_grid_TSDF[volume_idx] = -2.0f;
48 | continue;
49 | }
50 |
51 | float depth_val = depth_im[pt_pix_y * im_width + pt_pix_x];
52 |
53 | if (depth_val > 8) {
54 | voxel_grid_TSDF[volume_idx] = -2.0f;
55 | continue;
56 | }
57 |
58 | float diff = depth_val - pt_cam_z;
59 |
60 | // This is for labeling the -1 space (occluded space)
61 | if (diff < -0.1 || depth_val == 0.0) {
62 | voxel_grid_TSDF[volume_idx] = 2.0f;
63 | continue;
64 | }
65 |
66 | // This is for labeling the empty space
67 | if (diff > 0.1) {
68 | voxel_grid_TSDF[volume_idx] = -1.0f;
69 | continue;
70 | }
71 |
72 | // Integrate
73 | // float dist = fmin(1.0f, diff / trunc_margin);
74 | // float weight_old = voxel_grid_weight[volume_idx];
75 | // float weight_new = weight_old + 1.0f;
76 | // voxel_grid_weight[volume_idx] = weight_new;
77 | // voxel_grid_TSDF[volume_idx] = (voxel_grid_TSDF[volume_idx] * weight_old + dist) / weight_new;
78 | if (abs(diff) < 0.1) {
79 | voxel_grid_TSDF[volume_idx] = 1.0f;
80 | }
81 | }
82 | }
83 |
84 | // Loads a binary file with depth data and generates a TSDF voxel volume (5m x 5m x 5m at 1cm resolution)
85 | // Volume is aligned with respect to the camera coordinates of the first frame (a.k.a. base frame)
86 | int main(int argc, char * argv[]) {
87 |
88 | // Location of camera intrinsic file
89 | std::string cam_K_file = "data/camera-intrinsics.txt";
90 | std::string cam_origin_file = "data/origin/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.txt";
91 | std::string base2world_file = "data/camera/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.txt";
92 | std::string depth_im_file = "data/depth_real_png/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.png";
93 | std::string tsdf_bin_file = "tsdf.bin";
94 |
95 | // Location of folder containing RGB-D frames and camera pose files
96 | // std::string data_path = "data/rgbd-frames-yida";
97 |
98 | float cam_K[3 * 3];
99 | float cam_origin[3 * 1];
100 | float base2world[4 * 4];
101 | float cam2base[4 * 4];
102 | float cam2world[4 * 4];
103 | int im_width = 640;
104 | int im_height = 480;
105 | float depth_im[im_height * im_width];
106 |
107 | // Voxel grid parameters (change these to change voxel grid resolution, etc.)
108 | float voxel_grid_origin_x = 43.15f; // Location of voxel grid origin in base frame camera coordinates
109 | float voxel_grid_origin_y = 50.88f;
110 | float voxel_grid_origin_z = 0.05f;
111 | float voxel_size = 0.06f;
112 | float trunc_margin = 0.72f;//voxel_size * 5;
113 | int voxel_grid_dim_x = 80;
114 | int voxel_grid_dim_y = 80;
115 | int voxel_grid_dim_z = 48;
116 |
117 | // Manual parameters
118 | if (argc > 1) {
119 | cam_K_file = argv[1];
120 | cam_origin_file = argv[2];
121 | base2world_file = argv[3];
122 | depth_im_file = argv[4];
123 | tsdf_bin_file = argv[5];
124 | }
125 |
126 | // Read camera intrinsics
127 | std::vector cam_K_vec = LoadMatrixFromFile(cam_K_file, 3, 3);
128 | std::copy(cam_K_vec.begin(), cam_K_vec.end(), cam_K);
129 | std::vector cam_origin_vec = LoadMatrixFromFile(cam_origin_file, 3, 1);
130 | std::copy(cam_origin_vec.begin(), cam_origin_vec.end(), cam_origin);
131 | voxel_grid_origin_x = cam_origin[0];
132 | voxel_grid_origin_y = cam_origin[1];
133 | voxel_grid_origin_z = cam_origin[2];
134 |
135 | // Read base frame camera pose
136 | std::ostringstream base_frame_prefix;
137 | // base_frame_prefix << std::setw(6) << std::setfill('0') << base_frame_idx;
138 | // std::string base2world_file = data_path + "/frame-" + base_frame_prefix.str() + ".pose.txt";
139 | std::vector base2world_vec = LoadMatrixFromFile(base2world_file, 4, 4);
140 | std::copy(base2world_vec.begin(), base2world_vec.end(), base2world);
141 |
142 | // Invert base frame camera pose to get world-to-base frame transform
143 | float base2world_inv[16] = {0};
144 | invert_matrix(base2world, base2world_inv);
145 |
146 | // Initialize voxel grid
147 | float * voxel_grid_TSDF = new float[voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z];
148 | for (int i = 0; i < voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z; ++i)
149 | voxel_grid_TSDF[i] = 0.0f;
150 |
151 | // Load variables to GPU memory
152 | float * gpu_voxel_grid_TSDF;
153 | cudaMalloc(&gpu_voxel_grid_TSDF, voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z * sizeof(float));
154 | checkCUDA(__LINE__, cudaGetLastError());
155 | cudaMemcpy(gpu_voxel_grid_TSDF, voxel_grid_TSDF, voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z * sizeof(float), cudaMemcpyHostToDevice);
156 | checkCUDA(__LINE__, cudaGetLastError());
157 | float * gpu_cam_K;
158 | float * gpu_cam2base;
159 | float * gpu_depth_im;
160 | cudaMalloc(&gpu_cam_K, 3 * 3 * sizeof(float));
161 | cudaMemcpy(gpu_cam_K, cam_K, 3 * 3 * sizeof(float), cudaMemcpyHostToDevice);
162 | cudaMalloc(&gpu_cam2base, 4 * 4 * sizeof(float));
163 | cudaMalloc(&gpu_depth_im, im_height * im_width * sizeof(float));
164 | checkCUDA(__LINE__, cudaGetLastError());
165 |
166 | // Loop through each depth frame and integrate TSDF voxel grid
167 |
168 | // std::ostringstream curr_frame_prefix;
169 | // curr_frame_prefix << std::setw(6) << std::setfill('0') << frame_idx;
170 |
171 | // // Read current frame depth
172 | // std::string depth_im_file = data_path + "/frame-" + curr_frame_prefix.str() + ".depth.png";
173 | ReadDepth(depth_im_file, im_height, im_width, depth_im);
174 |
175 | // Read base frame camera pose
176 | std::string cam2world_file = base2world_file; //data_path + "/frame-" + curr_frame_prefix.str() + ".pose.txt";
177 | std::vector cam2world_vec = LoadMatrixFromFile(cam2world_file, 4, 4);
178 | std::copy(cam2world_vec.begin(), cam2world_vec.end(), cam2world);
179 |
180 | // Compute relative camera pose (camera-to-base frame)
181 | multiply_matrix(base2world_inv, cam2world, cam2base);
182 |
183 | // yida: here we should use base2world for rotation for alignment of the ground
184 | cudaMemcpy(gpu_cam2base, base2world, 4 * 4 * sizeof(float), cudaMemcpyHostToDevice);
185 | cudaMemcpy(gpu_depth_im, depth_im, im_height * im_width * sizeof(float), cudaMemcpyHostToDevice);
186 | checkCUDA(__LINE__, cudaGetLastError());
187 |
188 | // std::cout << "Fusing: " << depth_im_file << std::endl;
189 |
190 | Integrate <<< voxel_grid_dim_z, voxel_grid_dim_y >>>(gpu_cam_K, gpu_cam2base, gpu_depth_im,
191 | im_height, im_width, voxel_grid_dim_x, voxel_grid_dim_y, voxel_grid_dim_z,
192 | voxel_grid_origin_x, voxel_grid_origin_y, voxel_grid_origin_z, voxel_size, trunc_margin,
193 | gpu_voxel_grid_TSDF);
194 |
195 | // Load TSDF voxel grid from GPU to CPU memory
196 | cudaMemcpy(voxel_grid_TSDF, gpu_voxel_grid_TSDF, voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z * sizeof(float), cudaMemcpyDeviceToHost);
197 | // cudaMemcpy(voxel_grid_weight, gpu_voxel_grid_weight, voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z * sizeof(float), cudaMemcpyDeviceToHost);
198 | checkCUDA(__LINE__, cudaGetLastError());
199 |
200 | // Compute surface points from TSDF voxel grid and save to point cloud .ply file
201 | // std::cout << "Saving surface point cloud (tsdf.ply)..." << std::endl;
202 |
203 | SaveVoxelGrid2SurfacePointCloud("tsdf.ply", voxel_grid_dim_x, voxel_grid_dim_y, voxel_grid_dim_z,
204 | voxel_size, voxel_grid_origin_x, voxel_grid_origin_y, voxel_grid_origin_z,
205 | voxel_grid_TSDF);
206 |
207 | // Save TSDF voxel grid and its parameters to disk as binary file (float array)
208 | // std::cout << "Saving TSDF voxel grid values to disk (tsdf.bin)..." << std::endl;
209 | std::ofstream outFile(tsdf_bin_file, std::ios::binary | std::ios::out);
210 | /*
211 | float voxel_grid_dim_xf = (float) voxel_grid_dim_x;
212 | float voxel_grid_dim_yf = (float) voxel_grid_dim_y;
213 | float voxel_grid_dim_zf = (float) voxel_grid_dim_z;
214 | outFile.write((char*)&voxel_grid_dim_xf, sizeof(float));
215 | outFile.write((char*)&voxel_grid_dim_yf, sizeof(float));
216 | outFile.write((char*)&voxel_grid_dim_zf, sizeof(float));
217 | outFile.write((char*)&voxel_grid_origin_x, sizeof(float));
218 | outFile.write((char*)&voxel_grid_origin_y, sizeof(float));
219 | outFile.write((char*)&voxel_grid_origin_z, sizeof(float));
220 | outFile.write((char*)&voxel_size, sizeof(float));
221 | outFile.write((char*)&trunc_margin, sizeof(float));
222 | */
223 | for (int i = 0; i < voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z; ++i) {
224 | outFile.write((char*)&voxel_grid_TSDF[i], sizeof(float));
225 | }
226 | outFile.close();
227 |
228 | return 0;
229 | }
230 |
--------------------------------------------------------------------------------
/depth-tsdf/compile.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export PATH=$PATH:/usr/local/cuda/bin
4 |
5 | if uname | grep -q Darwin; then
6 | CUDA_LIB_DIR=/usr/local/cuda/lib
7 | elif uname | grep -q Linux; then
8 | CUDA_LIB_DIR=/usr/local/cuda/lib64
9 | fi
10 |
11 | nvcc -std=c++11 -O3 -o back-project back-project.cu -I/usr/local/cuda/include -L$CUDA_LIB_DIR -lcudart -lcublas -lcurand -D_MWAITXINTRIN_H_INCLUDED `pkg-config --cflags --libs opencv`
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/depth-tsdf/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/.DS_Store
--------------------------------------------------------------------------------
/depth-tsdf/data/camera-intrinsics.txt:
--------------------------------------------------------------------------------
1 | 518.8579 0 320
2 | 0 518.8579 240
3 | 0 0 1
4 |
--------------------------------------------------------------------------------
/depth-tsdf/data/camera/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.txt:
--------------------------------------------------------------------------------
1 | -9.947609901428222656e-01 -4.861999768763780594e-03 1.021080017089843750e-01 4.428060531616210938e+01
2 | -1.022230014204978943e-01 4.731500148773193359e-02 -9.936360120773315430e-01 4.011987304687500000e+01
3 | 0.000000000000000000e+00 -9.988679885864257812e-01 -4.756399989128112793e-02 1.281923055648803711e+00
4 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00
5 |
--------------------------------------------------------------------------------
/depth-tsdf/data/camera/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.txt:
--------------------------------------------------------------------------------
1 | 1.216949969530105591e-01 9.431800246238708496e-02 -9.880759716033935547e-01 4.156637573242187500e+01
2 | 9.925680160522460938e-01 -1.156399957835674286e-02 1.211439967155456543e-01 3.953329467773437500e+01
3 | -0.000000000000000000e+00 -9.954749941825866699e-01 -9.502399712800979614e-02 1.549054980278015137e+00
4 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00
5 |
--------------------------------------------------------------------------------
/depth-tsdf/data/camera/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.txt:
--------------------------------------------------------------------------------
1 | -5.403019785881042480e-01 -2.791469991207122803e-01 7.938200235366821289e-01 4.199525833129882812e+01
2 | -8.414710164070129395e-01 1.792380064725875854e-01 -5.097060203552246094e-01 4.885494995117187500e+01
3 | 0.000000000000000000e+00 -9.433720111846923828e-01 -3.317370116710662842e-01 9.233530163764953613e-01
4 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00
5 |
--------------------------------------------------------------------------------
/depth-tsdf/data/camera/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.txt:
--------------------------------------------------------------------------------
1 | 3.210160136222839355e-01 -2.677600085735321045e-02 9.466950297355651855e-01 5.375836944580078125e+01
2 | -9.470739960670471191e-01 -9.076000191271305084e-03 3.208869993686676025e-01 4.655792999267578125e+01
3 | 0.000000000000000000e+00 -9.995999932289123535e-01 -2.827299945056438446e-02 1.377272009849548340e+00
4 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00
5 |
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_real_png/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_real_png/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.png
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_real_png/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_real_png/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.png
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_real_png/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_real_png/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.png
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_real_png/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_real_png/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.png
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_rgb_png/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_rgb_png/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.png
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_rgb_png/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_rgb_png/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.png
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_rgb_png/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_rgb_png/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.png
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_rgb_png/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_rgb_png/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.png
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_tsdf_bin/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_bin/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.bin
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_tsdf_bin/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_bin/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.bin
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_tsdf_bin/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_bin/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.bin
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_tsdf_bin/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_bin/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.bin
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_tsdf_occluded_npy/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_occluded_npy/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.npy
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_tsdf_occluded_npy/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_occluded_npy/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.npy
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_tsdf_occluded_npy/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_occluded_npy/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.npy
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_tsdf_occluded_npy/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_occluded_npy/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.npy
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_tsdf_ply/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.ply:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_ply/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.ply
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_tsdf_ply/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.ply:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_ply/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.ply
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_tsdf_ply/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.ply:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_ply/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.ply
--------------------------------------------------------------------------------
/depth-tsdf/data/depth_tsdf_ply/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.ply:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/data/depth_tsdf_ply/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.ply
--------------------------------------------------------------------------------
/depth-tsdf/data/origin/00017227_01e40e56e7c4006efc920560ac4d26b9_fl001_rm0004_0000.txt:
--------------------------------------------------------------------------------
1 | 4.222777557373046875e+01
2 | 3.434151077270507812e+01
3 | -5.000000074505805969e-02
4 |
--------------------------------------------------------------------------------
/depth-tsdf/data/origin/00017227_17756eab966537ff81f8071e78d4402f_fl003_rm0002_0000.txt:
--------------------------------------------------------------------------------
1 | 3.580691528320312500e+01
2 | 3.754518508911132812e+01
3 | -5.000000074505805969e-02
4 |
--------------------------------------------------------------------------------
/depth-tsdf/data/origin/00017227_1cf7ccef62c8d06d587302f215ef1a1c_fl001_rm0009_0000.txt:
--------------------------------------------------------------------------------
1 | 4.229424667358398438e+01
2 | 4.472195053100585938e+01
3 | -5.000000074505805969e-02
4 |
--------------------------------------------------------------------------------
/depth-tsdf/data/origin/00017227_22261817deb874fc5216dd284bfc02e9_fl001_rm0009_0000.txt:
--------------------------------------------------------------------------------
1 | 5.457713317871093750e+01
2 | 4.524894332885742188e+01
3 | -5.000000074505805969e-02
4 |
--------------------------------------------------------------------------------
/depth-tsdf/depth_yida.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/depth-tsdf/depth_yida.png
--------------------------------------------------------------------------------
/depth-tsdf/run.sh:
--------------------------------------------------------------------------------
1 | ./demo \
2 | data/camera-intrinsics.txt \
3 | data/rgbd-frames-yida \
4 | 150 \
5 | 150 \
6 | 1 \
7 | 36.0f \
8 | 40.5f \
9 | -0.05f \
10 | 0.02f \
11 | 0.1f
12 |
13 |
--------------------------------------------------------------------------------
/depth-tsdf/tsdf2mesh.m:
--------------------------------------------------------------------------------
1 | % ---------------------------------------------------------
2 | % Loads a TSDF voxel grid from a binary file (tsdf.bin) and
3 | % creates a mesh (saved to mesh.ply), which can be viewed
4 | % with a 3D viewer like Meshlab.
5 | %
6 | % Author: Andy Zeng, Princeton University, 2016
7 | % ---------------------------------------------------------
8 |
9 | % Load TSDF voxel grid from binary file
10 | fid = fopen('tsdf.bin','rb');
11 | tsdfHeader = fread(fid,8,'single');
12 | voxelGridDim = tsdfHeader(1:3);
13 | voxelGridOrigin = tsdfHeader(4:6);
14 | voxelSize = tsdfHeader(7);
15 | truncMargin = tsdfHeader(8);
16 | tsdf = fread(fid,voxelGridDim(1)*voxelGridDim(2)*voxelGridDim(3),'single');
17 | fclose(fid);
18 |
19 | % Convert from TSDF to mesh
20 | tsdf = reshape(tsdf,[voxelGridDim(1),voxelGridDim(2),voxelGridDim(3)]);
21 | fv = isosurface(tsdf,0);
22 | points = fv.vertices';
23 | faces = fv.faces';
24 |
25 | % Set mesh color (light blue)
26 | color = uint8(repmat([175;198;233],1,size(points,2)));
27 |
28 | % Transform mesh from voxel coordinates to camera coordinates
29 | meshPoints(1,:) = voxelGridOrigin(1) + points(2,:)*voxelSize; % x y axes are swapped from isosurface
30 | meshPoints(2,:) = voxelGridOrigin(2) + points(1,:)*voxelSize;
31 | meshPoints(3,:) = voxelGridOrigin(3) + points(3,:)*voxelSize;
32 |
33 | % Write header for mesh file
34 | data = reshape(typecast(reshape(single(meshPoints),1,[]),'uint8'),3*4,[]);
35 | data = [data; color];
36 | fid = fopen('mesh.ply','w');
37 | fprintf (fid, 'ply\n');
38 | fprintf (fid, 'format binary_little_endian 1.0\n');
39 | fprintf (fid, 'element vertex %d\n', size(data,2));
40 | fprintf (fid, 'property float x\n');
41 | fprintf (fid, 'property float y\n');
42 | fprintf (fid, 'property float z\n');
43 | fprintf (fid, 'property uchar red\n');
44 | fprintf (fid, 'property uchar green\n');
45 | fprintf (fid, 'property uchar blue\n');
46 | fprintf (fid, 'element face %d\n', size(faces,2));
47 | fprintf (fid, 'property list uchar int vertex_index\n');
48 | fprintf (fid, 'end_header\n');
49 |
50 | % Write vertices
51 | fwrite(fid, data,'uint8');
52 |
53 | % Write faces
54 | faces = faces([3 2 1],:); % reverse the order to get a better normal
55 | faces_data = int32(faces-1);
56 | faces_data = reshape(typecast(reshape(faces_data,1,[]),'uint8'),3*4,[]);
57 | faces_data = [uint32(ones(1,size(faces,2))*3); faces_data];
58 | fwrite(fid, faces_data,'uint8');
59 |
60 | fclose(fid);
61 |
--------------------------------------------------------------------------------
/depth-tsdf/utils.hpp:
--------------------------------------------------------------------------------
1 | // ---------------------------------------------------------
2 | // Author: Andy Zeng, Princeton University, 2016
3 | // ---------------------------------------------------------
4 |
5 | #include
6 | #include
7 |
8 | // Compute surface points from TSDF voxel grid and save points to point cloud file
9 | void SaveVoxelGrid2SurfacePointCloud(const std::string &file_name, int voxel_grid_dim_x, int voxel_grid_dim_y, int voxel_grid_dim_z,
10 | float voxel_size, float voxel_grid_origin_x, float voxel_grid_origin_y, float voxel_grid_origin_z,
11 | float * voxel_grid_TSDF) {
12 |
13 | // Count total number of points in point cloud
14 | int num_pts = 0;
15 | for (int i = 0; i < voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z; i++)
16 | if (voxel_grid_TSDF[i] == 1.0f)
17 | num_pts++;
18 |
19 | // Create header for .ply file
20 | FILE *fp = fopen(file_name.c_str(), "w");
21 | fprintf(fp, "ply\n");
22 | fprintf(fp, "format binary_little_endian 1.0\n");
23 | fprintf(fp, "element vertex %d\n", num_pts);
24 | fprintf(fp, "property float x\n");
25 | fprintf(fp, "property float y\n");
26 | fprintf(fp, "property float z\n");
27 | fprintf(fp, "end_header\n");
28 |
29 | // Create point cloud content for ply file
30 | for (int i = 0; i < voxel_grid_dim_x * voxel_grid_dim_y * voxel_grid_dim_z; i++) {
31 |
32 | // If TSDF value of voxel is less than some threshold, add voxel coordinates to point cloud
33 | if (voxel_grid_TSDF[i] == 1.0f) {
34 |
35 | // Compute voxel indices in int for higher positive number range
36 | int z = floor(i / (voxel_grid_dim_x * voxel_grid_dim_y));
37 | int y = floor((i - (z * voxel_grid_dim_x * voxel_grid_dim_y)) / voxel_grid_dim_x);
38 | int x = i - (z * voxel_grid_dim_x * voxel_grid_dim_y) - (y * voxel_grid_dim_x);
39 |
40 | // Convert voxel indices to float, and save coordinates to ply file
41 | float pt_base_x = voxel_grid_origin_x + (float) x * voxel_size;
42 | float pt_base_y = voxel_grid_origin_y + (float) y * voxel_size;
43 | float pt_base_z = voxel_grid_origin_z + (float) z * voxel_size;
44 | fwrite(&pt_base_x, sizeof(float), 1, fp);
45 | fwrite(&pt_base_y, sizeof(float), 1, fp);
46 | fwrite(&pt_base_z, sizeof(float), 1, fp);
47 | }
48 | }
49 | fclose(fp);
50 | }
51 |
52 | // Load an M x N matrix from a text file (numbers delimited by spaces/tabs)
53 | // Return the matrix as a float vector of the matrix in row-major order
54 | std::vector LoadMatrixFromFile(std::string filename, int M, int N) {
55 | std::vector matrix;
56 | FILE *fp = fopen(filename.c_str(), "r");
57 | for (int i = 0; i < M * N; i++) {
58 | float tmp;
59 | int iret = fscanf(fp, "%f", &tmp);
60 | matrix.push_back(tmp);
61 | }
62 | fclose(fp);
63 | return matrix;
64 | }
65 |
66 | // Read a depth image with size H x W and save the depth values (in meters) into a float array (in row-major order)
67 | // The depth image file is assumed to be in 16-bit PNG format, depth in millimeters
68 | void ReadDepth(std::string filename, int H, int W, float * depth) {
69 | cv::Mat depth_mat = cv::imread(filename, -1);
70 | if (depth_mat.empty()) {
71 | std::cout << "Error: depth image file not read!" << std::endl;
72 | cv::waitKey(0);
73 | }
74 | for (int r = 0; r < H; ++r)
75 | for (int c = 0; c < W; ++c) {
76 | depth[r * W + c] = (float)(depth_mat.at(r, c)) / 1000.0f;
77 | if (depth[r * W + c] > 6.0f) // Only consider depth < 6m
78 | depth[r * W + c] = 0;
79 | }
80 | }
81 |
82 | // 4x4 matrix multiplication (matrices are stored as float arrays in row-major order)
83 | void multiply_matrix(const float m1[16], const float m2[16], float mOut[16]) {
84 | mOut[0] = m1[0] * m2[0] + m1[1] * m2[4] + m1[2] * m2[8] + m1[3] * m2[12];
85 | mOut[1] = m1[0] * m2[1] + m1[1] * m2[5] + m1[2] * m2[9] + m1[3] * m2[13];
86 | mOut[2] = m1[0] * m2[2] + m1[1] * m2[6] + m1[2] * m2[10] + m1[3] * m2[14];
87 | mOut[3] = m1[0] * m2[3] + m1[1] * m2[7] + m1[2] * m2[11] + m1[3] * m2[15];
88 |
89 | mOut[4] = m1[4] * m2[0] + m1[5] * m2[4] + m1[6] * m2[8] + m1[7] * m2[12];
90 | mOut[5] = m1[4] * m2[1] + m1[5] * m2[5] + m1[6] * m2[9] + m1[7] * m2[13];
91 | mOut[6] = m1[4] * m2[2] + m1[5] * m2[6] + m1[6] * m2[10] + m1[7] * m2[14];
92 | mOut[7] = m1[4] * m2[3] + m1[5] * m2[7] + m1[6] * m2[11] + m1[7] * m2[15];
93 |
94 | mOut[8] = m1[8] * m2[0] + m1[9] * m2[4] + m1[10] * m2[8] + m1[11] * m2[12];
95 | mOut[9] = m1[8] * m2[1] + m1[9] * m2[5] + m1[10] * m2[9] + m1[11] * m2[13];
96 | mOut[10] = m1[8] * m2[2] + m1[9] * m2[6] + m1[10] * m2[10] + m1[11] * m2[14];
97 | mOut[11] = m1[8] * m2[3] + m1[9] * m2[7] + m1[10] * m2[11] + m1[11] * m2[15];
98 |
99 | mOut[12] = m1[12] * m2[0] + m1[13] * m2[4] + m1[14] * m2[8] + m1[15] * m2[12];
100 | mOut[13] = m1[12] * m2[1] + m1[13] * m2[5] + m1[14] * m2[9] + m1[15] * m2[13];
101 | mOut[14] = m1[12] * m2[2] + m1[13] * m2[6] + m1[14] * m2[10] + m1[15] * m2[14];
102 | mOut[15] = m1[12] * m2[3] + m1[13] * m2[7] + m1[14] * m2[11] + m1[15] * m2[15];
103 | }
104 |
105 | // 4x4 matrix inversion (matrices are stored as float arrays in row-major order)
106 | bool invert_matrix(const float m[16], float invOut[16]) {
107 | float inv[16], det;
108 | int i;
109 | inv[0] = m[5] * m[10] * m[15] -
110 | m[5] * m[11] * m[14] -
111 | m[9] * m[6] * m[15] +
112 | m[9] * m[7] * m[14] +
113 | m[13] * m[6] * m[11] -
114 | m[13] * m[7] * m[10];
115 |
116 | inv[4] = -m[4] * m[10] * m[15] +
117 | m[4] * m[11] * m[14] +
118 | m[8] * m[6] * m[15] -
119 | m[8] * m[7] * m[14] -
120 | m[12] * m[6] * m[11] +
121 | m[12] * m[7] * m[10];
122 |
123 | inv[8] = m[4] * m[9] * m[15] -
124 | m[4] * m[11] * m[13] -
125 | m[8] * m[5] * m[15] +
126 | m[8] * m[7] * m[13] +
127 | m[12] * m[5] * m[11] -
128 | m[12] * m[7] * m[9];
129 |
130 | inv[12] = -m[4] * m[9] * m[14] +
131 | m[4] * m[10] * m[13] +
132 | m[8] * m[5] * m[14] -
133 | m[8] * m[6] * m[13] -
134 | m[12] * m[5] * m[10] +
135 | m[12] * m[6] * m[9];
136 |
137 | inv[1] = -m[1] * m[10] * m[15] +
138 | m[1] * m[11] * m[14] +
139 | m[9] * m[2] * m[15] -
140 | m[9] * m[3] * m[14] -
141 | m[13] * m[2] * m[11] +
142 | m[13] * m[3] * m[10];
143 |
144 | inv[5] = m[0] * m[10] * m[15] -
145 | m[0] * m[11] * m[14] -
146 | m[8] * m[2] * m[15] +
147 | m[8] * m[3] * m[14] +
148 | m[12] * m[2] * m[11] -
149 | m[12] * m[3] * m[10];
150 |
151 | inv[9] = -m[0] * m[9] * m[15] +
152 | m[0] * m[11] * m[13] +
153 | m[8] * m[1] * m[15] -
154 | m[8] * m[3] * m[13] -
155 | m[12] * m[1] * m[11] +
156 | m[12] * m[3] * m[9];
157 |
158 | inv[13] = m[0] * m[9] * m[14] -
159 | m[0] * m[10] * m[13] -
160 | m[8] * m[1] * m[14] +
161 | m[8] * m[2] * m[13] +
162 | m[12] * m[1] * m[10] -
163 | m[12] * m[2] * m[9];
164 |
165 | inv[2] = m[1] * m[6] * m[15] -
166 | m[1] * m[7] * m[14] -
167 | m[5] * m[2] * m[15] +
168 | m[5] * m[3] * m[14] +
169 | m[13] * m[2] * m[7] -
170 | m[13] * m[3] * m[6];
171 |
172 | inv[6] = -m[0] * m[6] * m[15] +
173 | m[0] * m[7] * m[14] +
174 | m[4] * m[2] * m[15] -
175 | m[4] * m[3] * m[14] -
176 | m[12] * m[2] * m[7] +
177 | m[12] * m[3] * m[6];
178 |
179 | inv[10] = m[0] * m[5] * m[15] -
180 | m[0] * m[7] * m[13] -
181 | m[4] * m[1] * m[15] +
182 | m[4] * m[3] * m[13] +
183 | m[12] * m[1] * m[7] -
184 | m[12] * m[3] * m[5];
185 |
186 | inv[14] = -m[0] * m[5] * m[14] +
187 | m[0] * m[6] * m[13] +
188 | m[4] * m[1] * m[14] -
189 | m[4] * m[2] * m[13] -
190 | m[12] * m[1] * m[6] +
191 | m[12] * m[2] * m[5];
192 |
193 | inv[3] = -m[1] * m[6] * m[11] +
194 | m[1] * m[7] * m[10] +
195 | m[5] * m[2] * m[11] -
196 | m[5] * m[3] * m[10] -
197 | m[9] * m[2] * m[7] +
198 | m[9] * m[3] * m[6];
199 |
200 | inv[7] = m[0] * m[6] * m[11] -
201 | m[0] * m[7] * m[10] -
202 | m[4] * m[2] * m[11] +
203 | m[4] * m[3] * m[10] +
204 | m[8] * m[2] * m[7] -
205 | m[8] * m[3] * m[6];
206 |
207 | inv[11] = -m[0] * m[5] * m[11] +
208 | m[0] * m[7] * m[9] +
209 | m[4] * m[1] * m[11] -
210 | m[4] * m[3] * m[9] -
211 | m[8] * m[1] * m[7] +
212 | m[8] * m[3] * m[5];
213 |
214 | inv[15] = m[0] * m[5] * m[10] -
215 | m[0] * m[6] * m[9] -
216 | m[4] * m[1] * m[10] +
217 | m[4] * m[2] * m[9] +
218 | m[8] * m[1] * m[6] -
219 | m[8] * m[2] * m[5];
220 |
221 | det = m[0] * inv[0] + m[1] * inv[4] + m[2] * inv[8] + m[3] * inv[12];
222 |
223 | if (det == 0)
224 | return false;
225 |
226 | det = 1.0 / det;
227 |
228 | for (i = 0; i < 16; i++)
229 | invOut[i] = inv[i] * det;
230 |
231 | return true;
232 | }
233 |
234 | void FatalError(const int lineNumber = 0) {
235 | std::cerr << "FatalError";
236 | if (lineNumber != 0) std::cerr << " at LINE " << lineNumber;
237 | std::cerr << ". Program Terminated." << std::endl;
238 | cudaDeviceReset();
239 | exit(EXIT_FAILURE);
240 | }
241 |
242 | void checkCUDA(const int lineNumber, cudaError_t status) {
243 | if (status != cudaSuccess) {
244 | std::cerr << "CUDA failure at LINE " << lineNumber << ": " << status << std::endl;
245 | FatalError();
246 | }
247 | }
248 |
--------------------------------------------------------------------------------
/evaluate.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 | from config_test import cfg_test
5 | from model import FCR_aGAN
6 | from util import DataProcess, scene_model_id_pair, onehot, scene_model_id_pair_test
7 | from sklearn.metrics import average_precision_score
8 | import copy
9 |
10 |
11 | def evaluate(batch_size, checknum, mode):
12 |
13 | n_vox = cfg_test.CONST.N_VOX
14 | dim = cfg_test.NET.DIM
15 | vox_shape = [n_vox[0], n_vox[1], n_vox[2], dim[4]]
16 | dim_z = cfg_test.NET.DIM_Z
17 | start_vox_size = cfg_test.NET.START_VOX
18 | kernel = cfg_test.NET.KERNEL
19 | stride = cfg_test.NET.STRIDE
20 | freq = cfg_test.CHECK_FREQ
21 | refine_ch = cfg_test.NET.REFINE_CH
22 | refine_kernel = cfg_test.NET.REFINE_KERNEL
23 |
24 | save_path = cfg_test.DIR.EVAL_PATH
25 | chckpt_path = cfg_test.DIR.CHECK_PT_PATH + str(
26 | checknum) #+ '-' + str(checknum * freq)
27 |
28 | fcr_agan_model = FCR_aGAN(
29 | batch_size=batch_size,
30 | vox_shape=vox_shape,
31 | dim_z=dim_z,
32 | dim=dim,
33 | start_vox_size=start_vox_size,
34 | kernel=kernel,
35 | stride=stride,
36 | refine_ch=refine_ch,
37 | refine_kernel=refine_kernel,
38 | )
39 |
40 |
41 | Z_tf, z_enc_tf, vox_tf, vox_gen_tf, vox_gen_decode_tf, vox_refine_dec_tf, vox_refine_gen_tf,\
42 | recons_loss_tf, code_encode_loss_tf, gen_loss_tf, discrim_loss_tf, recons_loss_refine_tfs, gen_loss_refine_tf, discrim_loss_refine_tf,\
43 | cost_enc_tf, cost_code_tf, cost_gen_tf, cost_discrim_tf, cost_gen_ref_tf, cost_discrim_ref_tf, summary_tf,\
44 | tsdf_tf = fcr_agan_model.build_model()
45 | """
46 | z_enc_dep_tf, dep_tf, vox_gen_decode_dep_tf,\
47 | recons_dep_loss_tf, code_encode_dep_loss_tf, gen_dep_loss_tf, discrim_dep_loss_tf,\
48 | cost_enc_dep_tf, cost_code_dep_tf, cost_gen_dep_tf, cost_discrim_dep_tf, cost_code_compare_tf,\
49 | """
50 | Z_tf_sample, vox_tf_sample = fcr_agan_model.samples_generator(
51 | visual_size=batch_size)
52 | sample_vox_tf, sample_refine_vox_tf = fcr_agan_model.refine_generator(
53 | visual_size=batch_size)
54 | sess = tf.InteractiveSession()
55 | saver = tf.train.Saver()
56 |
57 | # Restore variables from disk.
58 | saver.restore(sess, chckpt_path)
59 |
60 | print("...Weights restored.")
61 |
62 | if mode == 'recons':
63 | #reconstruction and generation from normal distribution evaluation
64 | #generator from random distribution
65 | for i in np.arange(batch_size):
66 | Z_np_sample = np.random.normal(
67 | size=(1, start_vox_size[0], start_vox_size[1],
68 | start_vox_size[2], dim_z)).astype(np.float32)
69 | if i == 0:
70 | Z_var_np_sample = Z_np_sample
71 | else:
72 | Z_var_np_sample = np.concatenate(
73 | (Z_var_np_sample, Z_np_sample), axis=0)
74 | np.save(save_path + '/sample_z.npy', Z_var_np_sample)
75 |
76 | generated_voxs_fromrand = sess.run(
77 | vox_tf_sample, feed_dict={Z_tf_sample: Z_var_np_sample})
78 | vox_models_cat = np.argmax(generated_voxs_fromrand, axis=4)
79 | np.save(save_path + '/generate.npy', vox_models_cat)
80 |
81 | refined_voxs_fromrand = sess.run(
82 | sample_refine_vox_tf,
83 | feed_dict={sample_vox_tf: generated_voxs_fromrand})
84 | vox_models_cat = np.argmax(refined_voxs_fromrand, axis=4)
85 | np.save(save_path + '/generate_refine.npy', vox_models_cat)
86 |
87 | #evaluation for reconstruction
88 | voxel_test, tsdf_test, num = scene_model_id_pair_test(
89 | dataset_portion=cfg_test.TRAIN.DATASET_PORTION)
90 | num = voxel_test.shape[0]
91 | print("test voxels loaded")
92 | for i in np.arange(int(num / batch_size)):
93 | batch_voxel_test = voxel_test[i * batch_size:i * batch_size +
94 | batch_size]
95 | # depth--start
96 | """
97 | batch_depth_test = depth_test[i*batch_size:i*batch_size+batch_size]
98 | """
99 | # depth--end
100 | batch_tsdf_test = tsdf_test[i * batch_size:i * batch_size +
101 | batch_size]
102 |
103 | batch_generated_voxs, batch_enc_Z = sess.run(
104 | [vox_gen_decode_tf, z_enc_tf],
105 | feed_dict={tsdf_tf: batch_tsdf_test})
106 | # depth--start
107 | """
108 | batch_dep_generated_voxs, batch_enc_dep_Z = sess.run(
109 | [vox_gen_decode_dep_tf, z_enc_dep_tf],
110 | feed_dict={dep_tf:batch_depth_test})
111 | """
112 | # depth--end
113 | batch_refined_vox = sess.run(
114 | sample_refine_vox_tf,
115 | feed_dict={sample_vox_tf: batch_generated_voxs})
116 |
117 | if i == 0:
118 | generated_voxs = batch_generated_voxs
119 | # generated_deps = batch_dep_generated_voxs
120 | refined_voxs = batch_refined_vox
121 | enc_Z = batch_enc_Z
122 | else:
123 | generated_voxs = np.concatenate(
124 | (generated_voxs, batch_generated_voxs), axis=0)
125 | # generated_deps = np.concatenate((generated_deps, batch_dep_generated_voxs), axis=0)
126 | refined_voxs = np.concatenate(
127 | (refined_voxs, batch_refined_vox), axis=0)
128 | enc_Z = np.concatenate((enc_Z, batch_enc_Z), axis=0)
129 |
130 | print("forwarded")
131 |
132 | #real
133 | vox_models_cat = voxel_test
134 | np.save(save_path + '/real.npy', vox_models_cat)
135 | tsdf_models_cat = tsdf_test
136 | np.save(save_path + '/tsdf.npy', tsdf_models_cat)
137 |
138 | #decoded
139 | vox_models_cat = np.argmax(generated_voxs, axis=4)
140 | np.save(save_path + '/recons.npy', vox_models_cat)
141 | """
142 | vox_models_cat = np.argmax(generated_deps, axis=4)
143 | np.save(save_path + '/gens_dep.npy', vox_models_cat)
144 | """
145 | vox_models_cat = np.argmax(refined_voxs, axis=4)
146 | np.save(save_path + '/recons_refine.npy', vox_models_cat)
147 | np.save(save_path + '/decode_z.npy', enc_Z)
148 |
149 | print("voxels saved")
150 |
151 | #numerical evalutation
152 | on_real = onehot(voxel_test, vox_shape[3])
153 | on_recons = onehot(np.argmax(generated_voxs, axis=4), vox_shape[3])
154 | # on_gens_dep = onehot(np.argmax(generated_deps, axis=4),vox_shape[3])
155 |
156 | #calc_IoU
157 | IoU_class = np.zeros([vox_shape[3] + 1])
158 | for class_n in np.arange(vox_shape[3]):
159 | on_recons_ = on_recons[:, :, :, :, class_n]
160 | on_real_ = on_real[:, :, :, :, class_n]
161 | mother = np.sum(np.add(on_recons_, on_real_), (1, 2, 3))
162 | child = np.sum(np.multiply(on_recons_, on_real_), (1, 2, 3))
163 | count = 0
164 | IoU_element = 0
165 | for i in np.arange(num):
166 | if mother[i] != 0:
167 | IoU_element += child[i] / mother[i]
168 | count += 1
169 | IoU_calc = np.round(IoU_element / count, 3)
170 | IoU_class[class_n] = IoU_calc
171 | print 'IoU class ' + str(class_n) + '=' + str(IoU_calc)
172 |
173 | on_recons_ = on_recons[:, :, :, :, 1:vox_shape[3]]
174 | on_real_ = on_real[:, :, :, :, 1:vox_shape[3]]
175 | mother = np.sum(np.add(on_recons_, on_real_), (1, 2, 3, 4))
176 | child = np.sum(np.multiply(on_recons_, on_real_), (1, 2, 3, 4))
177 | count = 0
178 | IoU_element = 0
179 | for i in np.arange(num):
180 | if mother[i] != 0:
181 | IoU_element += child[i] / mother[i]
182 | count += 1
183 | IoU_calc = np.round(IoU_element / count, 3)
184 | IoU_class[vox_shape[3]] = IoU_calc
185 | print 'IoU all =' + str(IoU_calc)
186 | np.savetxt(save_path + '/IoU.csv', IoU_class, delimiter=",")
187 |
188 | #calc_AP
189 | AP_class = np.zeros([vox_shape[3] + 1])
190 | for class_n in np.arange(vox_shape[3]):
191 | on_recons_ = generated_voxs[:, :, :, :, class_n]
192 | on_real_ = on_real[:, :, :, :, class_n]
193 |
194 | AP = 0.
195 | for i in np.arange(num):
196 | y_true = np.reshape(on_real_[i], [-1])
197 | y_scores = np.reshape(on_recons_[i], [-1])
198 | if np.sum(y_true) > 0.:
199 | AP += average_precision_score(y_true, y_scores)
200 | AP = np.round(AP / num, 3)
201 | AP_class[class_n] = AP
202 | print 'AP class ' + str(class_n) + '=' + str(AP)
203 |
204 | on_recons_ = generated_voxs[:, :, :, :, 1:vox_shape[3]]
205 | on_real_ = on_real[:, :, :, :, 1:vox_shape[3]]
206 | AP = 0.
207 | for i in np.arange(num):
208 | y_true = np.reshape(on_real_[i], [-1])
209 | y_scores = np.reshape(on_recons_[i], [-1])
210 | if np.sum(y_true) > 0.:
211 | AP += average_precision_score(y_true, y_scores)
212 |
213 | AP = np.round(AP / num, 3)
214 | AP_class[vox_shape[3]] = AP
215 | print 'AP all =' + str(AP)
216 | np.savetxt(save_path + '/AP.csv', AP_class, delimiter=",")
217 |
218 | #Refine
219 | #calc_IoU
220 | on_recons = onehot(np.argmax(refined_voxs, axis=4), vox_shape[3])
221 |
222 | IoU_class = np.zeros([vox_shape[3] + 1])
223 | for class_n in np.arange(vox_shape[3]):
224 | on_recons_ = on_recons[:, :, :, :, class_n]
225 | on_real_ = on_real[:, :, :, :, class_n]
226 | mother = np.sum(np.add(on_recons_, on_real_), (1, 2, 3))
227 | child = np.sum(np.multiply(on_recons_, on_real_), (1, 2, 3))
228 | count = 0
229 | IoU_element = 0
230 | for i in np.arange(num):
231 | if mother[i] != 0:
232 | IoU_element += child[i] / mother[i]
233 | count += 1
234 | IoU_calc = np.round(IoU_element / count, 3)
235 | IoU_class[class_n] = IoU_calc
236 | print 'IoU class ' + str(class_n) + '=' + str(IoU_calc)
237 |
238 | on_recons_ = on_recons[:, :, :, :, 1:vox_shape[3]]
239 | on_real_ = on_real[:, :, :, :, 1:vox_shape[3]]
240 | mother = np.sum(np.add(on_recons_, on_real_), (1, 2, 3, 4))
241 | child = np.sum(np.multiply(on_recons_, on_real_), (1, 2, 3, 4))
242 | count = 0
243 | IoU_element = 0
244 | for i in np.arange(num):
245 | if mother[i] != 0:
246 | IoU_element += child[i] / mother[i]
247 | count += 1
248 | IoU_calc = np.round(IoU_element / count, 3)
249 | IoU_class[vox_shape[3]] = IoU_calc
250 | print 'IoU all =' + str(IoU_calc)
251 | np.savetxt(save_path + '/IoU_refine.csv', IoU_class, delimiter=",")
252 |
253 | #calc_AP
254 | AP_class = np.zeros([vox_shape[3] + 1])
255 | for class_n in np.arange(vox_shape[3]):
256 | on_recons_ = refined_voxs[:, :, :, :, class_n]
257 | on_real_ = on_real[:, :, :, :, class_n]
258 |
259 | AP = 0.
260 | for i in np.arange(num):
261 | y_true = np.reshape(on_real_[i], [-1])
262 | y_scores = np.reshape(on_recons_[i], [-1])
263 | if np.sum(y_true) > 0.:
264 | AP += average_precision_score(y_true, y_scores)
265 | AP = np.round(AP / num, 3)
266 | AP_class[class_n] = AP
267 | print 'AP class ' + str(class_n) + '=' + str(AP)
268 |
269 | on_recons_ = refined_voxs[:, :, :, :, 1:vox_shape[3]]
270 | on_real_ = on_real[:, :, :, :, 1:vox_shape[3]]
271 | AP = 0.
272 | for i in np.arange(num):
273 | y_true = np.reshape(on_real_[i], [-1])
274 | y_scores = np.reshape(on_recons_[i], [-1])
275 | if np.sum(y_true) > 0.:
276 | AP += average_precision_score(y_true, y_scores)
277 |
278 | AP = np.round(AP / num, 3)
279 | AP_class[vox_shape[3]] = AP
280 | print 'AP all =' + str(AP)
281 | np.savetxt(save_path + '/AP_refine.csv', AP_class, delimiter=",")
282 |
283 | #interpolation evaluation
284 | if mode == 'interpolate':
285 | interpolate_num = 30
286 | #interpolatioin latent vectores
287 | decode_z = np.load(save_path + '/decode_z.npy')
288 | decode_z = decode_z[:batch_size]
289 | for l in np.arange(batch_size):
290 | for r in np.arange(batch_size):
291 | if l != r:
292 | print l, r
293 | base_num_left = l
294 | base_num_right = r
295 | left = np.reshape(decode_z[base_num_left], [
296 | 1, start_vox_size[0], start_vox_size[1],
297 | start_vox_size[2], dim_z
298 | ])
299 | right = np.reshape(decode_z[base_num_right], [
300 | 1, start_vox_size[0], start_vox_size[1],
301 | start_vox_size[2], dim_z
302 | ])
303 |
304 | duration = (right - left) / (interpolate_num - 1)
305 | if base_num_left == 0:
306 | Z_np_sample = decode_z[1:]
307 | elif base_num_left == batch_size - 1:
308 | Z_np_sample = decode_z[:batch_size - 1]
309 | else:
310 | Z_np_sample_before = np.reshape(
311 | decode_z[:base_num_left], [
312 | base_num_left, start_vox_size[0],
313 | start_vox_size[1], start_vox_size[2], dim_z
314 | ])
315 | Z_np_sample_after = np.reshape(
316 | decode_z[base_num_left + 1:], [
317 | batch_size - base_num_left - 1,
318 | start_vox_size[0], start_vox_size[1],
319 | start_vox_size[2], dim_z
320 | ])
321 | Z_np_sample = np.concatenate(
322 | [Z_np_sample_before, Z_np_sample_after], axis=0)
323 | for i in np.arange(interpolate_num):
324 | if i == 0:
325 | Z = copy.copy(left)
326 | interpolate_z = copy.copy(Z)
327 | else:
328 | Z = Z + duration
329 | interpolate_z = np.concatenate([interpolate_z, Z],
330 | axis=0)
331 | Z_var_np_sample = np.concatenate([Z, Z_np_sample],
332 | axis=0)
333 | generated_voxs_fromrand = sess.run(
334 | vox_tf_sample,
335 | feed_dict={Z_tf_sample: Z_var_np_sample})
336 | refined_voxs_fromrand = sess.run(
337 | sample_refine_vox_tf,
338 | feed_dict={sample_vox_tf: generated_voxs_fromrand})
339 | interpolate_vox = np.reshape(
340 | refined_voxs_fromrand[0], [
341 | 1, vox_shape[0], vox_shape[1], vox_shape[2],
342 | vox_shape[3]
343 | ])
344 | if i == 0:
345 | generated_voxs = interpolate_vox
346 | else:
347 | generated_voxs = np.concatenate(
348 | [generated_voxs, interpolate_vox], axis=0)
349 |
350 | np.save(
351 | save_path + '/interpolation_z' + str(l) + '-' + str(r)
352 | + '.npy', interpolate_z)
353 |
354 | vox_models_cat = np.argmax(generated_voxs, axis=4)
355 | np.save(
356 | save_path + '/interpolation' + str(l) + '-' + str(r) +
357 | '.npy', vox_models_cat)
358 | print("voxels saved")
359 |
360 | #add noise evaluation
361 | if mode == 'noise':
362 | decode_z = np.load(save_path + '/decode_z.npy')
363 | decode_z = decode_z[:batch_size]
364 | noise_num = 10
365 | for base_num in np.arange(batch_size):
366 | print base_num
367 | base = np.reshape(decode_z[base_num], [
368 | 1, start_vox_size[0], start_vox_size[1], start_vox_size[2],
369 | dim_z
370 | ])
371 | eps = np.random.normal(size=(noise_num - 1,
372 | dim_z)).astype(np.float32)
373 |
374 | if base_num == 0:
375 | Z_np_sample = decode_z[1:]
376 | elif base_num == batch_size - 1:
377 | Z_np_sample = decode_z[:batch_size - 1]
378 | else:
379 | Z_np_sample_before = np.reshape(decode_z[:base_num], [
380 | base_num, start_vox_size[0], start_vox_size[1],
381 | start_vox_size[2], dim_z
382 | ])
383 | Z_np_sample_after = np.reshape(decode_z[base_num + 1:], [
384 | batch_size - base_num - 1, start_vox_size[0],
385 | start_vox_size[1], start_vox_size[2], dim_z
386 | ])
387 | Z_np_sample = np.concatenate(
388 | [Z_np_sample_before, Z_np_sample_after], axis=0)
389 |
390 | for c in np.arange(start_vox_size[0]):
391 | for l in np.arange(start_vox_size[1]):
392 | for d in np.arange(start_vox_size[2]):
393 |
394 | for i in np.arange(noise_num):
395 | if i == 0:
396 | Z = copy.copy(base)
397 | noise_z = copy.copy(Z)
398 | else:
399 | Z = copy.copy(base)
400 | Z[0, c, l, d, :] += eps[i - 1]
401 | noise_z = np.concatenate([noise_z, Z], axis=0)
402 | Z_var_np_sample = np.concatenate([Z, Z_np_sample],
403 | axis=0)
404 | generated_voxs_fromrand = sess.run(
405 | vox_tf_sample,
406 | feed_dict={Z_tf_sample: Z_var_np_sample})
407 | refined_voxs_fromrand = sess.run(
408 | sample_refine_vox_tf,
409 | feed_dict={
410 | sample_vox_tf: generated_voxs_fromrand
411 | })
412 | noise_vox = np.reshape(refined_voxs_fromrand[0], [
413 | 1, vox_shape[0], vox_shape[1], vox_shape[2],
414 | vox_shape[3]
415 | ])
416 | if i == 0:
417 | generated_voxs = noise_vox
418 | else:
419 | generated_voxs = np.concatenate(
420 | [generated_voxs, noise_vox], axis=0)
421 |
422 | np.save(
423 | save_path + '/noise_z' + str(base_num) + '_' +
424 | str(c) + str(l) + str(d) + '.npy', noise_z)
425 |
426 | vox_models_cat = np.argmax(generated_voxs, axis=4)
427 | np.save(
428 | save_path + '/noise' + str(base_num) + '_' + str(c)
429 | + str(l) + str(d) + '.npy', vox_models_cat)
430 |
431 | print("voxels saved")
432 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 |
4 | from train import train
5 | from config import cfg
6 | from config_test import cfg_test
7 | import tensorflow as tf
8 |
9 | flags = tf.app.flags
10 | flags.DEFINE_integer("epoch", cfg.TRAIN.NUM_EPOCH,
11 | "Epoch to train [15]") #n_epochs = cfg.TRAIN.NUM_EPOCH
12 | flags.DEFINE_float("learning_rate_G", cfg.LEARNING_RATE_G,
13 | "Learning rate for Generator of adam [0.0001]"
14 | ) #learning_rate_G = cfg.LEARNING_RATE_G
15 | flags.DEFINE_float("learning_rate_D", cfg.LEARNING_RATE_D,
16 | "Learning rate for Discriminator of adam [0.0001]"
17 | ) #learning_rate_D = cfg.LEARNING_RATE_D
18 | flags.DEFINE_integer(
19 | "batch_size", cfg.CONST.BATCH_SIZE,
20 | "The size of batch voxels [100]") #batch_size = cfg.CONST.BATCH_SIZE
21 | flags.DEFINE_integer(
22 | "batch_size_test", cfg_test.CONST.BATCH_SIZE,
23 | "The size of batch voxels [100]") #batch_size = cfg.CONST.BATCH_SIZE
24 |
25 | flags.DEFINE_boolean("middle_start", False,
26 | "True for starting from the middle [False]")
27 | flags.DEFINE_integer(
28 | "ini_epoch", 0,
29 | "The number of initial epoch --if middle_start: False -> 0, True -> must assign the number [0]"
30 | )
31 | flags.DEFINE_string(
32 | "mode", 'train',
33 | "Execute mode: train/evaluate_recons/evaluate_interpolate/evaluate_noise")
34 | flags.DEFINE_integer(
35 | "conf_epoch", 10000,
36 | "The number of confirmation epoch to evaluate interpolate, reconstruction etc [100]"
37 | )
38 |
39 | FLAGS = flags.FLAGS
40 |
41 |
42 | def main():
43 | if not os.path.exists(cfg.DIR.CHECK_POINT_PATH):
44 | os.makedirs(cfg.DIR.CHECK_POINT_PATH)
45 | if not os.path.exists(cfg.DIR.TRAIN_OBJ_PATH):
46 | os.makedirs(cfg.DIR.TRAIN_OBJ_PATH)
47 | if not os.path.exists(cfg.DIR.EVAL_PATH):
48 | os.makedirs(cfg.DIR.EVAL_PATH)
49 | if FLAGS.middle_start:
50 | print 'middle_start'
51 |
52 | if FLAGS.mode == 'train':
53 | train(FLAGS.epoch, FLAGS.learning_rate_G, FLAGS.learning_rate_D,
54 | FLAGS.batch_size, FLAGS.middle_start, FLAGS.ini_epoch)
55 | elif FLAGS.mode == 'evaluate_recons' or 'evaluate_interpolate' or 'evaluate_noise':
56 | from evaluate import evaluate
57 | if FLAGS.mode == 'evaluate_recons':
58 | mode = 'recons'
59 | elif FLAGS.mode == 'evaluate_interpolate':
60 | mode = 'interpolate'
61 | else:
62 | mode = 'noise'
63 | evaluate(FLAGS.batch_size_test, FLAGS.conf_epoch, mode)
64 |
65 |
66 | if __name__ == '__main__':
67 | #tf.app.run()
68 | main()
69 |
--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from config import cfg
4 | import tensorflow as tf
5 | from util import *
6 |
7 |
8 | def batchnormalize(X, eps=1e-5, g=None, b=None, batch_size=10):
9 | if X.get_shape().ndims == 5:
10 | if batch_size == 1:
11 | mean = 0
12 | std = 1 - eps
13 | else:
14 | mean = tf.reduce_mean(X, [0, 1, 2, 3])
15 | std = tf.reduce_mean(tf.square(X - mean), [0, 1, 2, 3])
16 | X = (X - mean) / tf.sqrt(std + eps)
17 |
18 | if g is not None and b is not None:
19 | g = tf.reshape(g, [1, 1, 1, 1, -1])
20 | b = tf.reshape(b, [1, 1, 1, 1, -1])
21 | X = X * g + b
22 |
23 | # depth--start
24 | elif X.get_shape().ndims == 4:
25 | if batch_size == 1:
26 | mean = 0
27 | std = 1 - eps
28 | else:
29 | mean = tf.reduce_mean(X, [0, 1, 2])
30 | std = tf.reduce_mean(tf.square(X - mean), [0, 1, 2])
31 | X = (X - mean) / tf.sqrt(std + eps)
32 |
33 | if g is not None and b is not None:
34 | g = tf.reshape(g, [1, 1, 1, -1])
35 | b = tf.reshape(b, [1, 1, 1, -1])
36 | X = X * g + b
37 | # depth--end
38 |
39 | elif X.get_shape().ndims == 2:
40 | if batch_size == 1:
41 | mean = 0
42 | std = 1 - eps
43 | else:
44 | mean = tf.reduce_mean(X, 0)
45 | std = tf.reduce_mean(tf.square(X - mean), 0)
46 | X = (X - mean) / tf.sqrt(std + eps) #std
47 |
48 | if g is not None and b is not None:
49 | g = tf.reshape(g, [1, -1])
50 | b = tf.reshape(b, [1, -1])
51 | X = X * g + b
52 |
53 | else:
54 | raise NotImplementedError
55 |
56 | return X
57 |
58 |
59 | def layernormalize(X, eps=1e-5, g=None, b=None):
60 | if X.get_shape().ndims == 5:
61 | mean, std = tf.nn.moments(X, [1, 2, 3, 4], keep_dims=True)
62 | X = (X - mean) / tf.sqrt(std + eps)
63 |
64 | if g is not None and b is not None:
65 | X = X * g + b
66 |
67 | elif X.get_shape().ndims == 2:
68 | mean = tf.reduce_mean(X, 1)
69 | std = tf.reduce_mean(tf.square(X - mean), 1)
70 | X = (X - mean) / tf.sqrt(std + eps) #std
71 |
72 | if g is not None and b is not None:
73 | X = X * g + b
74 |
75 | else:
76 | raise NotImplementedError
77 |
78 | return X
79 |
80 |
81 | def lrelu(X, leak=0.2):
82 | return tf.maximum(X, leak * X)
83 |
84 |
85 | def softmax(X, batch_size, vox_shape):
86 | c = tf.reduce_max(X, 4)
87 | c = tf.reshape(c,
88 | [batch_size, vox_shape[0], vox_shape[1], vox_shape[2], 1])
89 | exp = tf.exp(tf.subtract(X, c))
90 | expsum = tf.reduce_sum(exp, 4)
91 | expsum = tf.reshape(
92 | expsum, [batch_size, vox_shape[0], vox_shape[1], vox_shape[2], 1])
93 | soft = tf.div(exp, expsum)
94 |
95 | return soft
96 |
97 |
98 | class FCR_aGAN():
99 | def __init__(
100 | self,
101 | batch_size=20,
102 | vox_shape=[80, 48, 80, 12],
103 | dep_shape=[320, 240, 1],
104 | dim_z=16,
105 | dim=[512, 256, 128, 64, 12],
106 | start_vox_size=[5, 3, 5],
107 | kernel=[[5, 5, 5, 5, 5], [3, 3, 3, 3, 3], [5, 5, 5, 5, 5]],
108 | stride=[1, 2, 2, 2, 1],
109 | dim_code=750,
110 | refine_ch=32,
111 | refine_kernel=3,
112 | ):
113 |
114 | self.batch_size = batch_size
115 | self.vox_shape = vox_shape
116 | # depth--start
117 | self.dep_shape = dep_shape
118 | # depth--end
119 | self.n_class = vox_shape[3]
120 | self.dim_z = dim_z
121 | self.dim_W1 = dim[0]
122 | self.dim_W2 = dim[1]
123 | self.dim_W3 = dim[2]
124 | self.dim_W4 = dim[3]
125 | self.dim_W5 = dim[4]
126 | self.start_vox_size = np.array(start_vox_size)
127 | self.kernel = np.array(kernel)
128 | self.kernel1 = self.kernel[:, 0]
129 | self.kernel2 = self.kernel[:, 1]
130 | self.kernel3 = self.kernel[:, 2]
131 | self.kernel4 = self.kernel[:, 3]
132 | self.kernel5 = self.kernel[:, 4]
133 | self.stride = stride
134 | # depth--start
135 | self.stride_dep = [1, 2, 2, 1]
136 | # depth--end
137 |
138 | self.lamda_recons = cfg.LAMDA_RECONS
139 | self.lamda_gamma = cfg.LAMDA_GAMMA
140 |
141 | self.dim_code = dim_code
142 | self.refine_ch = refine_ch
143 | self.refine_kernel = refine_kernel
144 |
145 | self.gen_W1 = tf.Variable(
146 | tf.random_normal([
147 | self.dim_z * self.start_vox_size[0] * self.start_vox_size[1] *
148 | self.start_vox_size[2], self.dim_W1 * self.start_vox_size[0] *
149 | self.start_vox_size[1] * self.start_vox_size[2]
150 | ],
151 | stddev=0.02),
152 | name='gen_W1')
153 | self.gen_bn_g1 = tf.Variable(
154 | tf.random_normal([
155 | self.dim_W1 * self.start_vox_size[0] * self.start_vox_size[1] *
156 | self.start_vox_size[2]
157 | ],
158 | mean=1.0,
159 | stddev=0.02),
160 | name='gen_bn_g1')
161 | self.gen_bn_b1 = tf.Variable(
162 | tf.zeros([
163 | self.dim_W1 * self.start_vox_size[0] * self.start_vox_size[1] *
164 | self.start_vox_size[2]
165 | ]),
166 | name='gen_bn_b1')
167 |
168 | self.gen_W2 = tf.Variable(
169 | tf.random_normal([
170 | self.kernel2[0], self.kernel2[1], self.kernel2[2], self.dim_W2,
171 | self.dim_W1
172 | ],
173 | stddev=0.02),
174 | name='gen_W2')
175 | self.gen_bn_g2 = tf.Variable(
176 | tf.random_normal([self.dim_W2], mean=1.0, stddev=0.02),
177 | name='gen_bn_g2')
178 | self.gen_bn_b2 = tf.Variable(tf.zeros([self.dim_W2]), name='gen_bn_b2')
179 |
180 | self.gen_W3 = tf.Variable(
181 | tf.random_normal([
182 | self.kernel3[0], self.kernel3[1], self.kernel3[2], self.dim_W3,
183 | self.dim_W2
184 | ],
185 | stddev=0.02),
186 | name='gen_W3')
187 | self.gen_bn_g3 = tf.Variable(
188 | tf.random_normal([self.dim_W3], mean=1.0, stddev=0.02),
189 | name='gen_bn_g3')
190 | self.gen_bn_b3 = tf.Variable(tf.zeros([self.dim_W3]), name='gen_bn_b3')
191 |
192 | self.gen_W4 = tf.Variable(
193 | tf.random_normal([
194 | self.kernel4[0], self.kernel4[1], self.kernel4[2], self.dim_W4,
195 | self.dim_W3
196 | ],
197 | stddev=0.02),
198 | name='gen_W4')
199 | self.gen_bn_g4 = tf.Variable(
200 | tf.random_normal([self.dim_W4], mean=1.0, stddev=0.02),
201 | name='gen_bn_g4')
202 | self.gen_bn_b4 = tf.Variable(tf.zeros([self.dim_W4]), name='gen_bn_b4')
203 |
204 | self.gen_W5 = tf.Variable(
205 | tf.random_normal([
206 | self.kernel5[0], self.kernel5[1], self.kernel5[2], self.dim_W5,
207 | self.dim_W4
208 | ],
209 | stddev=0.02),
210 | name='gen_W5')
211 | self.gen_bn_g5 = tf.Variable(
212 | tf.random_normal([self.dim_W5], mean=1.0, stddev=0.02),
213 | name='gen_bn_g5')
214 | self.gen_bn_b5 = tf.Variable(tf.zeros([self.dim_W5]), name='gen_bn_b5')
215 |
216 | self.encode_W1 = tf.Variable(
217 | tf.random_normal([
218 | self.kernel5[0], self.kernel5[1], self.kernel5[2], 1,
219 | self.dim_W4
220 | ],
221 | stddev=0.02),
222 | name='encode_W1')
223 | self.encode_bn_g1 = tf.Variable(
224 | tf.random_normal([self.dim_W4], mean=1.0, stddev=0.02),
225 | name='encode_bn_g1')
226 | self.encode_bn_b1 = tf.Variable(
227 | tf.zeros([self.dim_W4]), name='encode_bn_b1')
228 |
229 | self.encode_W2 = tf.Variable(
230 | tf.random_normal([
231 | self.kernel4[0], self.kernel4[1], self.kernel4[2], self.dim_W4,
232 | self.dim_W3
233 | ],
234 | stddev=0.02),
235 | name='encode_W2')
236 | self.encode_bn_g2 = tf.Variable(
237 | tf.random_normal([self.dim_W3], mean=1.0, stddev=0.02),
238 | name='encode_bn_g2')
239 | self.encode_bn_b2 = tf.Variable(
240 | tf.zeros([self.dim_W3]), name='encode_bn_b2')
241 |
242 | self.encode_W3 = tf.Variable(
243 | tf.random_normal([
244 | self.kernel3[0], self.kernel3[1], self.kernel3[2], self.dim_W3,
245 | self.dim_W2
246 | ],
247 | stddev=0.02),
248 | name='encode_W3')
249 | self.encode_bn_g3 = tf.Variable(
250 | tf.random_normal([self.dim_W2], mean=1.0, stddev=0.02),
251 | name='encode_bn_g3')
252 | self.encode_bn_b3 = tf.Variable(
253 | tf.zeros([self.dim_W2]), name='encode_bn_b3')
254 |
255 | self.encode_W4 = tf.Variable(
256 | tf.random_normal([
257 | self.kernel2[0], self.kernel2[1], self.kernel2[2], self.dim_W2,
258 | self.dim_W1
259 | ],
260 | stddev=0.02),
261 | name='encode_W4')
262 | self.encode_bn_g4 = tf.Variable(
263 | tf.random_normal([self.dim_W1], mean=1.0, stddev=0.02),
264 | name='encode_bn_g4')
265 | self.encode_bn_b4 = tf.Variable(
266 | tf.zeros([self.dim_W1]), name='encode_bn_b4')
267 |
268 | self.encode_W5 = tf.Variable(
269 | tf.random_normal([1, 1, 1, self.dim_W1, self.dim_z], stddev=0.02),
270 | name='encode_W5')
271 | self.encode_W5_sigma = tf.Variable(
272 | tf.random_normal([1, 1, 1, self.dim_W1, self.dim_z], stddev=0.02),
273 | name='encode_W5_sigma')
274 |
275 | # depth--start
276 | self.encode_dep_W1 = tf.Variable(
277 | tf.random_normal(
278 | [self.kernel5[0], self.kernel5[1], 1, self.dim_W4],
279 | stddev=0.02),
280 | name='depthproject_W1')
281 | self.encode_dep_bn_g1 = tf.Variable(
282 | tf.random_normal([self.dim_W4], mean=1.0, stddev=0.02),
283 | name='depthproject_bn_g1')
284 | self.encode_dep_bn_b1 = tf.Variable(
285 | tf.zeros([self.dim_W4]), name='depthproject_bn_b1')
286 |
287 | self.encode_dep_W2 = tf.Variable(
288 | tf.random_normal(
289 | [self.kernel4[0], self.kernel4[1], self.dim_W4, self.dim_W3],
290 | stddev=0.02),
291 | name='depthproject_W2')
292 | self.encode_dep_bn_g2 = tf.Variable(
293 | tf.random_normal([self.dim_W3], mean=1.0, stddev=0.02),
294 | name='depthproject_bn_g2')
295 | self.encode_dep_bn_b2 = tf.Variable(
296 | tf.zeros([self.dim_W3]), name='depthproject_bn_b2')
297 |
298 | self.encode_dep_W3 = tf.Variable(
299 | tf.random_normal(
300 | [self.kernel3[0], self.kernel3[1], self.dim_W3, self.dim_W2],
301 | stddev=0.02),
302 | name='depthproject_W3')
303 | self.encode_dep_bn_g3 = tf.Variable(
304 | tf.random_normal([self.dim_W2], mean=1.0, stddev=0.02),
305 | name='depthproject_bn_g3')
306 | self.encode_dep_bn_b3 = tf.Variable(
307 | tf.zeros([self.dim_W2]), name='depthproject_bn_b3')
308 |
309 | self.encode_dep_W4 = tf.Variable(
310 | tf.random_normal(
311 | [self.kernel2[0], self.kernel2[1], self.dim_W2, self.dim_W1],
312 | stddev=0.02),
313 | name='depthproject_W4')
314 | self.encode_dep_bn_g4 = tf.Variable(
315 | tf.random_normal([self.dim_W1], mean=1.0, stddev=0.02),
316 | name='depthproject_bn_g4')
317 | self.encode_dep_bn_b4 = tf.Variable(
318 | tf.zeros([self.dim_W1]), name='depthproject_bn_b4')
319 |
320 | self.encode_dep_W5 = tf.Variable(
321 | tf.random_normal(
322 | [self.kernel2[0], self.kernel2[1], self.dim_W1, self.dim_W1],
323 | stddev=0.02),
324 | name='depthproject_W5')
325 | self.encode_dep_bn_g5 = tf.Variable(
326 | tf.random_normal([self.dim_W1], mean=1.0, stddev=0.02),
327 | name='depthproject_bn_g5')
328 | self.encode_dep_bn_b5 = tf.Variable(
329 | tf.zeros([self.dim_W1]), name='depthproject_bn_b5')
330 |
331 | self.encode_dep_W6 = tf.Variable(
332 | tf.random_normal(
333 | [self.kernel2[0], self.kernel2[1], self.dim_W1, 256],
334 | stddev=0.02),
335 | name='depthproject_W6')
336 | self.encode_dep_bn_g6 = tf.Variable(
337 | tf.random_normal([256], mean=1.0, stddev=0.02),
338 | name='depthproject_bn_g6')
339 | self.encode_dep_bn_b6 = tf.Variable(
340 | tf.zeros([256]), name='depthproject_bn_b6')
341 | self.encode_dep_W7 = tf.Variable(
342 | tf.random_normal([
343 | 5 * 4 * 256, self.start_vox_size[0] * self.start_vox_size[1] *
344 | self.start_vox_size[2] * self.dim_W1
345 | ],
346 | stddev=0.02),
347 | name='depthproject_W7')
348 |
349 | self.encode_dep_W8 = tf.Variable(
350 | tf.random_normal([1, 1, 1, self.dim_W1, self.dim_z], stddev=0.02),
351 | name='depthproject_W8')
352 | self.encode_dep_W8_sigma = tf.Variable(
353 | tf.random_normal([1, 1, 1, self.dim_W1, self.dim_z], stddev=0.02),
354 | name='depthproject_W8_sigma')
355 | # depth--end
356 |
357 | self.discrim_W1 = tf.Variable(
358 | tf.random_normal([
359 | self.kernel5[0], self.kernel5[1], self.kernel5[2], self.dim_W5,
360 | self.dim_W4
361 | ],
362 | stddev=0.02),
363 | name='discrim_vox_W1')
364 | self.discrim_bn_g1 = tf.Variable(
365 | tf.random_normal([1], mean=1.0, stddev=0.02),
366 | name='discrim_vox_bn_g1')
367 | self.discrim_bn_b1 = tf.Variable(
368 | tf.zeros([1]), name='discrim_vox_bn_b1')
369 |
370 | self.discrim_W2 = tf.Variable(
371 | tf.random_normal([
372 | self.kernel4[0], self.kernel4[1], self.kernel4[2], self.dim_W4,
373 | self.dim_W3
374 | ],
375 | stddev=0.02),
376 | name='discrim_vox_W2')
377 | self.discrim_bn_g2 = tf.Variable(
378 | tf.random_normal([1], mean=1.0, stddev=0.02),
379 | name='discrim_vox_bn_g2')
380 | self.discrim_bn_b2 = tf.Variable(
381 | tf.zeros([1]), name='discrim_vox_bn_b2')
382 |
383 | self.discrim_W3 = tf.Variable(
384 | tf.random_normal([
385 | self.kernel3[0], self.kernel3[1], self.kernel3[2], self.dim_W3,
386 | self.dim_W2
387 | ],
388 | stddev=0.02),
389 | name='discrim_vox_W3')
390 | self.discrim_bn_g3 = tf.Variable(
391 | tf.random_normal([1], mean=1.0, stddev=0.02),
392 | name='discrim_vox_bn_g3')
393 | self.discrim_bn_b3 = tf.Variable(
394 | tf.zeros([1]), name='discrim_vox_bn_b3')
395 |
396 | self.discrim_W4 = tf.Variable(
397 | tf.random_normal([
398 | self.kernel2[0], self.kernel2[1], self.kernel2[2], self.dim_W2,
399 | self.dim_W1
400 | ],
401 | stddev=0.02),
402 | name='discrim_vox_W4')
403 | self.discrim_bn_g4 = tf.Variable(
404 | tf.random_normal([1], mean=1.0, stddev=0.02),
405 | name='discrim_vox_bn_g4')
406 | self.discrim_bn_b4 = tf.Variable(
407 | tf.zeros([1]), name='discrim_vox_bn_b4')
408 |
409 | self.discrim_W5 = tf.Variable(
410 | tf.random_normal([
411 | self.start_vox_size[0] * self.start_vox_size[1] *
412 | self.start_vox_size[2] * self.dim_W1, 1
413 | ],
414 | stddev=0.02),
415 | name='discrim_vox_W5')
416 |
417 | # depth--start
418 | self.discrim_dep_W1 = tf.Variable(
419 | tf.random_normal([
420 | self.kernel5[0], self.kernel5[1], self.kernel5[2], self.dim_W5,
421 | self.dim_W4
422 | ],
423 | stddev=0.02),
424 | name='discrim_dep_W1')
425 | self.discrim_dep_bn_g1 = tf.Variable(
426 | tf.random_normal([1], mean=1.0, stddev=0.02),
427 | name='discrim_dep_bn_g1')
428 | self.discrim_dep_bn_b1 = tf.Variable(
429 | tf.zeros([1]), name='discrim_dep_bn_b1')
430 |
431 | self.discrim_dep_W2 = tf.Variable(
432 | tf.random_normal([
433 | self.kernel4[0], self.kernel4[1], self.kernel4[2], self.dim_W4,
434 | self.dim_W3
435 | ],
436 | stddev=0.02),
437 | name='discrim_dep_W2')
438 | self.discrim_dep_bn_g2 = tf.Variable(
439 | tf.random_normal([1], mean=1.0, stddev=0.02),
440 | name='discrim_dep_bn_g2')
441 | self.discrim_dep_bn_b2 = tf.Variable(
442 | tf.zeros([1]), name='discrim_dep_bn_b2')
443 |
444 | self.discrim_dep_W3 = tf.Variable(
445 | tf.random_normal([
446 | self.kernel3[0], self.kernel3[1], self.kernel3[2], self.dim_W3,
447 | self.dim_W2
448 | ],
449 | stddev=0.02),
450 | name='discrim_dep_W3')
451 | self.discrim_dep_bn_g3 = tf.Variable(
452 | tf.random_normal([1], mean=1.0, stddev=0.02),
453 | name='discrim_dep_bn_g3')
454 | self.discrim_dep_bn_b3 = tf.Variable(
455 | tf.zeros([1]), name='discrim_dep_bn_b3')
456 |
457 | self.discrim_dep_W4 = tf.Variable(
458 | tf.random_normal([
459 | self.kernel2[0], self.kernel2[1], self.kernel2[2], self.dim_W2,
460 | self.dim_W1
461 | ],
462 | stddev=0.02),
463 | name='discrim_dep_W4')
464 | self.discrim_dep_bn_g4 = tf.Variable(
465 | tf.random_normal([1], mean=1.0, stddev=0.02),
466 | name='discrim_dep_bn_g4')
467 | self.discrim_dep_bn_b4 = tf.Variable(
468 | tf.zeros([1]), name='discrim_dep_bn_b4')
469 |
470 | self.discrim_dep_W5 = tf.Variable(
471 | tf.random_normal([
472 | self.start_vox_size[0] * self.start_vox_size[1] *
473 | self.start_vox_size[2] * self.dim_W1, 1
474 | ],
475 | stddev=0.02),
476 | name='discrim_dep_W5')
477 | # depth--end
478 |
479 | self.cod_W1 = tf.Variable(
480 | tf.random_normal([
481 | self.dim_z * self.start_vox_size[0] * self.start_vox_size[1] *
482 | self.start_vox_size[2], self.dim_code
483 | ],
484 | stddev=0.02),
485 | name='cod_W1')
486 | self.cod_bn_g1 = tf.Variable(
487 | tf.random_normal([dim_code], mean=1.0, stddev=0.02),
488 | name='cod_bn_g1')
489 | self.cod_bn_b1 = tf.Variable(tf.zeros([dim_code]), name='cod_bn_b1')
490 |
491 | self.cod_W2 = tf.Variable(
492 | tf.random_normal([dim_code, dim_code], stddev=0.02), name='cod_W2')
493 | self.cod_bn_g2 = tf.Variable(
494 | tf.random_normal([dim_code], mean=1.0, stddev=0.02),
495 | name='cod_bn_g2')
496 | self.cod_bn_b2 = tf.Variable(tf.zeros([dim_code]), name='cod_bn_b2')
497 |
498 | self.cod_W3 = tf.Variable(
499 | tf.random_normal([dim_code, 1], stddev=0.02), name='cod_W3')
500 |
501 | self.refine_W1 = tf.Variable(
502 | tf.random_normal([
503 | self.refine_kernel, self.refine_kernel, self.refine_kernel,
504 | self.dim_W5, self.refine_ch
505 | ],
506 | stddev=0.02),
507 | name='refine_W1')
508 | self.refine_res1_W1 = tf.Variable(
509 | tf.random_normal([
510 | self.refine_kernel, self.refine_kernel, self.refine_kernel,
511 | self.refine_ch, self.refine_ch
512 | ],
513 | stddev=0.02),
514 | name='refine__res1_W1')
515 | self.refine_res1_W2 = tf.Variable(
516 | tf.random_normal([
517 | self.refine_kernel, self.refine_kernel, self.refine_kernel,
518 | self.refine_ch, self.refine_ch
519 | ],
520 | stddev=0.02),
521 | name='refine__res1_W2')
522 |
523 | self.refine_res2_W1 = tf.Variable(
524 | tf.random_normal([
525 | self.refine_kernel, self.refine_kernel, self.refine_kernel,
526 | self.refine_ch, self.refine_ch
527 | ],
528 | stddev=0.02),
529 | name='refine__res2_W1')
530 | self.refine_res2_W2 = tf.Variable(
531 | tf.random_normal([
532 | self.refine_kernel, self.refine_kernel, self.refine_kernel,
533 | self.refine_ch, self.refine_ch
534 | ],
535 | stddev=0.02),
536 | name='refine__res2_W2')
537 |
538 | self.refine_res3_W1 = tf.Variable(
539 | tf.random_normal([
540 | self.refine_kernel, self.refine_kernel, self.refine_kernel,
541 | self.refine_ch, self.refine_ch
542 | ],
543 | stddev=0.02),
544 | name='refine__res3_W1')
545 | self.refine_res3_W2 = tf.Variable(
546 | tf.random_normal([
547 | self.refine_kernel, self.refine_kernel, self.refine_kernel,
548 | self.refine_ch, self.refine_ch
549 | ],
550 | stddev=0.02),
551 | name='refine__res3_W2')
552 |
553 | self.refine_res4_W1 = tf.Variable(
554 | tf.random_normal([
555 | self.refine_kernel, self.refine_kernel, self.refine_kernel,
556 | self.refine_ch, self.refine_ch
557 | ],
558 | stddev=0.02),
559 | name='refine__res4_W1')
560 | self.refine_res4_W2 = tf.Variable(
561 | tf.random_normal([
562 | self.refine_kernel, self.refine_kernel, self.refine_kernel,
563 | self.refine_ch, self.refine_ch
564 | ],
565 | stddev=0.02),
566 | name='refine__res4_W2')
567 |
568 | self.refine_W2 = tf.Variable(
569 | tf.random_normal([
570 | self.refine_kernel, self.refine_kernel, self.refine_kernel,
571 | self.refine_ch, self.dim_W5
572 | ],
573 | stddev=0.02),
574 | name='refine_W2')
575 |
576 | self.saver = tf.train.Saver()
577 |
578 | def build_model(self):
579 |
580 | vox_real_ = tf.placeholder(tf.int32, [
581 | self.batch_size, self.vox_shape[0], self.vox_shape[1],
582 | self.vox_shape[2]
583 | ])
584 | vox_real = tf.one_hot(vox_real_, self.n_class)
585 | vox_real = tf.cast(vox_real, tf.float32)
586 | # depth--start
587 | """
588 | dep_real = tf.placeholder(
589 | tf.float32,
590 | [self.batch_size, self.dep_shape[0], self.dep_shape[1], self.dep_shape[2]])
591 | # depth--end
592 | """
593 | # tsdf--start
594 | tsdf_real_ = tf.placeholder(tf.int32, [
595 | self.batch_size, self.vox_shape[0], self.vox_shape[1],
596 | self.vox_shape[2]
597 | ])
598 | tsdf_real = tf.one_hot(tsdf_real_, 1)
599 | tsdf_real = tf.cast(tsdf_real, tf.float32)
600 | # tsdf--end
601 | Z = tf.placeholder(tf.float32, [
602 | self.batch_size, self.start_vox_size[0], self.start_vox_size[1],
603 | self.start_vox_size[2], self.dim_z
604 | ])
605 |
606 | filter_bilateral = tf.placeholder(
607 | tf.float32, [self.batch_size] +
608 | [self.vox_shape[0], self.vox_shape[1], self.vox_shape[2], 4])
609 | mean, sigma = self.encoder(tsdf_real)
610 | Z_encode = mean
611 | # depth--start
612 | """
613 | mean_dep, sigma_dep = self.encoder_dep(dep_real)
614 | Z_encode_dep = mean_dep
615 | """
616 | # depth--end
617 |
618 | #code_discriminator
619 | p_code_encode, h_code_encode = self.code_discriminator(Z_encode)
620 | p_code_real, h_code_real = self.code_discriminator(Z)
621 | # depth--start
622 | """
623 | p_code_encode_dep, h_code_encode_dep = self.code_discriminator(Z_encode_dep)
624 | """
625 | # depth--start
626 |
627 | code_encode_loss = tf.reduce_mean(
628 | tf.reduce_sum(
629 | tf.nn.sigmoid_cross_entropy_with_logits(
630 | logits=h_code_encode, labels=tf.ones_like(h_code_encode)),
631 | [1]))
632 | code_discrim_loss = tf.reduce_mean(
633 | tf.reduce_sum(
634 | tf.nn.sigmoid_cross_entropy_with_logits(
635 | logits=h_code_real, labels=tf.ones_like(h_code_real)),
636 | [1])) + tf.reduce_mean(
637 | tf.reduce_sum(
638 | tf.nn.sigmoid_cross_entropy_with_logits(
639 | logits=h_code_encode,
640 | labels=tf.zeros_like(h_code_encode)), [1]))
641 |
642 | # depth--start
643 | """
644 | code_encode_dep_loss = tf.reduce_mean(
645 | tf.reduce_sum(
646 | tf.nn.sigmoid_cross_entropy_with_logits(
647 | logits=h_code_encode_dep,
648 | labels=tf.ones_like(h_code_encode_dep)),
649 | [1]))
650 | code_discrim_dep_loss = tf.reduce_mean(
651 | tf.reduce_sum(
652 | tf.nn.sigmoid_cross_entropy_with_logits(
653 | logits=h_code_real,
654 | labels=tf.ones_like(h_code_real)),
655 | [1])) + tf.reduce_mean(
656 | tf.reduce_sum(
657 | tf.nn.sigmoid_cross_entropy_with_logits(
658 | logits=h_code_encode_dep,
659 | labels=tf.zeros_like(h_code_encode_dep)),
660 | [1]))
661 | code_compare_loss = tf.reduce_mean(
662 | tf.reduce_sum(
663 | tf.squared_difference(
664 | Z_encode_dep,
665 | Z_encode),
666 | [1,2,3,4]))
667 | """
668 | # depth--end
669 |
670 | #reconstruction
671 | vox_gen_decode, _ = self.generate(Z_encode)
672 | """
673 | vox_gen_decode_dep, _ = self.generate(Z_encode_dep)
674 | """
675 | batch_mean_vox_real = tf.reduce_mean(vox_real, [0, 1, 2, 3])
676 | # batch_mean_vox_real ranges from 0 to 1
677 | ones = tf.ones_like(batch_mean_vox_real)
678 | # inverse ranges from 0.5 to 1
679 | inverse = tf.div(ones, tf.add(batch_mean_vox_real, ones))
680 | # inverse ranges from 1/1.1 to 10
681 | inverse = tf.div(ones, batch_mean_vox_real + 0.1)
682 | weight = inverse * tf.div(1., tf.reduce_sum(inverse))
683 | recons_loss = -tf.reduce_sum(
684 | self.lamda_gamma * vox_real * tf.log(1e-6 + vox_gen_decode) +
685 | (1 - self.lamda_gamma) *
686 | (1 - vox_real) * tf.log(1e-6 + 1 - vox_gen_decode), [1, 2, 3])
687 | recons_loss = tf.reduce_mean(tf.reduce_sum(recons_loss * weight, 1))
688 | # Completion loss
689 | vox_real_complete = tf.stack([
690 | vox_real[:, :, :, :, 0],
691 | tf.reduce_sum(vox_real[:, :, :, :, 1:], 4)
692 | ], 4)
693 | vox_gen_complete = tf.stack([
694 | vox_gen_decode[:, :, :, :, 0],
695 | tf.reduce_sum(vox_gen_decode[:, :, :, :, 1:], 4)
696 | ], 4)
697 | complete_loss = -tf.reduce_sum(
698 | self.lamda_gamma * vox_real_complete *
699 | tf.log(1e-6 + vox_gen_complete) + (1 - self.lamda_gamma) *
700 | (1 - vox_real_complete) * tf.log(1e-6 + 1 - vox_gen_complete),
701 | [1, 2, 3])
702 | weight_complete = tf.stack([weight[0], tf.reduce_sum(weight[1:])])
703 | recons_loss += tf.reduce_mean(
704 | tf.reduce_sum(complete_loss * weight_complete, 1))
705 | """
706 | recons_dep_loss = -tf.reduce_sum(
707 | self.lamda_gamma *vox_real * tf.log(1e-6 + vox_gen_decode_dep) + (1- self.lamda_gamma) * (1-vox_real) * tf.log(1e-6 + 1-vox_gen_decode_dep),
708 | [1,2,3])
709 | recons_dep_loss = tf.reduce_mean(
710 | tf.reduce_sum(
711 | recons_dep_loss * weight, 1))
712 | """
713 | #Refiner
714 | vox_after_refine_dec = self.refine(vox_gen_decode)
715 |
716 | recons_loss_refine = -tf.reduce_sum(
717 | self.lamda_gamma * vox_real * tf.log(1e-6 + vox_after_refine_dec) +
718 | (1 - self.lamda_gamma) *
719 | (1 - vox_real) * tf.log(1e-6 + 1 - vox_after_refine_dec),
720 | [1, 2, 3])
721 | recons_loss_refine = tf.reduce_mean(
722 | tf.reduce_sum(recons_loss_refine * weight, 1))
723 |
724 | #GAN_generate
725 | vox_gen, _ = self.generate(Z)
726 | vox_after_refine_gen = self.refine(vox_gen)
727 |
728 | p_real, h_real = self.discriminate(vox_real)
729 | p_gen, h_gen = self.discriminate(vox_gen)
730 | p_gen_dec, h_gen_dec = self.discriminate(vox_gen_decode)
731 | # depth--start
732 | """
733 | p_real_dep, h_real_dep = self.discriminate_dep(vox_real)
734 | p_gen_dep, h_gen_dep = self.discriminate_dep(vox_gen)
735 | p_gen_dec_dep, h_gen_dec_dep = self.discriminate_dep(vox_gen_decode_dep)
736 | """
737 | # depth--end
738 | p_gen_ref, h_gen_ref = self.discriminate(vox_after_refine_gen)
739 | p_gen_dec_ref, h_gen_dec_ref = self.discriminate(vox_after_refine_dec)
740 |
741 | #Standard_GAN_Loss
742 | discrim_loss = tf.reduce_mean(
743 | tf.nn.sigmoid_cross_entropy_with_logits(
744 | logits=h_real, labels=tf.ones_like(h_real))) + tf.reduce_mean(
745 | tf.nn.sigmoid_cross_entropy_with_logits(
746 | logits=h_gen,
747 | labels=tf.zeros_like(h_gen))) + tf.reduce_mean(
748 | tf.nn.sigmoid_cross_entropy_with_logits(
749 | logits=h_gen_dec,
750 | labels=tf.zeros_like(h_gen_dec)))
751 |
752 | gen_loss = tf.reduce_mean(
753 | tf.nn.sigmoid_cross_entropy_with_logits(
754 | logits=h_gen, labels=tf.ones_like(h_gen))) + tf.reduce_mean(
755 | tf.nn.sigmoid_cross_entropy_with_logits(
756 | logits=h_gen_dec, labels=tf.ones_like(h_gen_dec)))
757 |
758 | # depth--start
759 | """
760 | discrim_dep_loss = tf.reduce_mean(
761 | tf.nn.sigmoid_cross_entropy_with_logits(
762 | logits=h_real_dep,
763 | labels=tf.ones_like(h_real_dep))) + tf.reduce_mean(
764 | tf.nn.sigmoid_cross_entropy_with_logits(
765 | logits=h_gen_dep,
766 | labels=tf.zeros_like(h_gen_dep))) + tf.reduce_mean(
767 | tf.nn.sigmoid_cross_entropy_with_logits(
768 | logits=h_gen_dec_dep,
769 | labels=tf.zeros_like(h_gen_dec_dep)))
770 |
771 | gen_dep_loss = tf.reduce_mean(
772 | tf.nn.sigmoid_cross_entropy_with_logits(
773 | logits=h_gen_dep,
774 | labels=tf.ones_like(h_gen_dep))) + tf.reduce_mean(
775 | tf.nn.sigmoid_cross_entropy_with_logits(
776 | logits=h_gen_dec_dep,
777 | labels=tf.ones_like(h_gen_dec_dep)))
778 | """
779 | # depth--end
780 | #for refine
781 | discrim_loss_refine = tf.reduce_mean(
782 | tf.nn.sigmoid_cross_entropy_with_logits(
783 | logits=h_real, labels=tf.ones_like(h_real))) + tf.reduce_mean(
784 | tf.nn.sigmoid_cross_entropy_with_logits(
785 | logits=h_gen_ref,
786 | labels=tf.zeros_like(h_gen_ref))) + tf.reduce_mean(
787 | tf.nn.sigmoid_cross_entropy_with_logits(
788 | logits=h_gen_dec_ref,
789 | labels=tf.zeros_like(h_gen_dec_ref)))
790 |
791 | gen_loss_refine = tf.reduce_mean(
792 | tf.nn.sigmoid_cross_entropy_with_logits(
793 | logits=h_gen_ref,
794 | labels=tf.ones_like(h_gen_ref))) + tf.reduce_mean(
795 | tf.nn.sigmoid_cross_entropy_with_logits(
796 | logits=h_gen_dec_ref,
797 | labels=tf.ones_like(h_gen_dec_ref)))
798 | """
799 | #LS_GAN_Loss
800 | a=-1
801 | b=1
802 | c=0
803 |
804 | discrim_loss = tf.reduce_mean(0.5*((h_real-b)**2) + 0.5*((h_gen-a)**2) + 0.5*((h_gen_dec-a)**2))
805 | gen_loss = tf.reduce_mean(0.5*((h_gen-c)**2) + 0.5*((h_gen_dec-c)**2))
806 | """
807 |
808 | #Cost
809 | cost_enc = code_encode_loss + self.lamda_recons * recons_loss
810 | cost_gen = self.lamda_recons * recons_loss + gen_loss
811 | cost_discrim = discrim_loss
812 | cost_code = code_discrim_loss
813 | cost_gen_ref = self.lamda_recons * recons_loss_refine + gen_loss_refine
814 | cost_discrim_ref = discrim_loss_refine
815 | """
816 | cost_enc_dep = code_encode_dep_loss + self.lamda_recons*recons_dep_loss
817 | cost_gen_dep = self.lamda_recons*recons_dep_loss + gen_dep_loss
818 | cost_discrim_dep = discrim_dep_loss
819 | cost_code_dep = code_discrim_dep_loss
820 | """
821 |
822 | tf.summary.scalar("recons_loss", tf.reduce_mean(recons_loss))
823 | tf.summary.scalar("gen_loss", tf.reduce_mean(gen_loss))
824 | tf.summary.scalar("discrim_loss", tf.reduce_mean(discrim_loss))
825 | tf.summary.scalar("code_encode_loss", tf.reduce_mean(code_encode_loss))
826 | tf.summary.scalar("code_discrim_loss",
827 | tf.reduce_mean(code_discrim_loss))
828 |
829 | summary_op = tf.summary.merge_all()
830 |
831 | return Z, Z_encode, vox_real_, vox_gen, vox_gen_decode, vox_after_refine_dec, vox_after_refine_gen,\
832 | recons_loss, code_encode_loss, gen_loss, discrim_loss, recons_loss_refine, gen_loss_refine, discrim_loss_refine,\
833 | cost_enc, cost_code, cost_gen, cost_discrim, cost_gen_ref, cost_discrim_ref, summary_op,\
834 | tsdf_real
835 | """
836 | Z_encode_dep, dep_real, vox_gen_decode_dep,\
837 | recons_dep_loss, code_encode_dep_loss, gen_dep_loss, discrim_dep_loss,\
838 | cost_enc_dep, cost_code_dep, cost_gen_dep, cost_discrim_dep, code_compare_loss,\
839 | """
840 |
841 | def encoder(self, vox):
842 |
843 | h1 = lrelu(
844 | tf.nn.conv3d(
845 | vox, self.encode_W1, strides=self.stride, padding='SAME'))
846 | h2 = lrelu(
847 | batchnormalize(
848 | tf.nn.conv3d(
849 | h1, self.encode_W2, strides=self.stride, padding='SAME'),
850 | g=self.encode_bn_g2,
851 | b=self.encode_bn_b2,
852 | batch_size=self.batch_size))
853 | h3 = lrelu(
854 | batchnormalize(
855 | tf.nn.conv3d(
856 | h2, self.encode_W3, strides=self.stride, padding='SAME'),
857 | g=self.encode_bn_g3,
858 | b=self.encode_bn_b3,
859 | batch_size=self.batch_size))
860 | h4 = lrelu(
861 | batchnormalize(
862 | tf.nn.conv3d(
863 | h3, self.encode_W4, strides=self.stride, padding='SAME'),
864 | g=self.encode_bn_g4,
865 | b=self.encode_bn_b4,
866 | batch_size=self.batch_size))
867 | h5 = tf.nn.conv3d(
868 | h4, self.encode_W5, strides=[1, 1, 1, 1, 1], padding='SAME')
869 | h5_sigma = tf.nn.conv3d(
870 | h4, self.encode_W5_sigma, strides=[1, 1, 1, 1, 1], padding='SAME')
871 |
872 | return h5, h5_sigma
873 |
874 | def encoder_dep(self, dep):
875 |
876 | h1 = lrelu(
877 | tf.nn.conv2d(
878 | dep,
879 | self.encode_dep_W1,
880 | strides=self.stride_dep,
881 | padding='SAME'))
882 | h2 = lrelu(
883 | batchnormalize(
884 | tf.nn.conv2d(
885 | h1,
886 | self.encode_dep_W2,
887 | strides=self.stride_dep,
888 | padding='SAME'),
889 | g=self.encode_dep_bn_g2,
890 | b=self.encode_dep_bn_b2,
891 | batch_size=self.batch_size))
892 | h3 = lrelu(
893 | batchnormalize(
894 | tf.nn.conv2d(
895 | h2,
896 | self.encode_dep_W3,
897 | strides=self.stride_dep,
898 | padding='SAME'),
899 | g=self.encode_dep_bn_g3,
900 | b=self.encode_dep_bn_b3,
901 | batch_size=self.batch_size))
902 | h4 = lrelu(
903 | batchnormalize(
904 | tf.nn.conv2d(
905 | h3,
906 | self.encode_dep_W4,
907 | strides=self.stride_dep,
908 | padding='SAME'),
909 | g=self.encode_dep_bn_g4,
910 | b=self.encode_dep_bn_b4,
911 | batch_size=self.batch_size))
912 | h5 = lrelu(
913 | batchnormalize(
914 | tf.nn.conv2d(
915 | h4,
916 | self.encode_dep_W5,
917 | strides=self.stride_dep,
918 | padding='SAME'),
919 | g=self.encode_dep_bn_g5,
920 | b=self.encode_dep_bn_b5,
921 | batch_size=self.batch_size))
922 | h6 = lrelu(
923 | batchnormalize(
924 | tf.nn.conv2d(
925 | h5,
926 | self.encode_dep_W6,
927 | strides=self.stride_dep,
928 | padding='SAME'),
929 | g=self.encode_dep_bn_g6,
930 | b=self.encode_dep_bn_b6,
931 | batch_size=self.batch_size))
932 | h6 = tf.reshape(h6, [self.batch_size, -1])
933 | h7 = tf.matmul(h6, self.encode_dep_W7)
934 | h7 = tf.reshape(h7, [
935 | self.batch_size, self.start_vox_size[0], self.start_vox_size[1],
936 | self.start_vox_size[2], self.dim_W1
937 | ])
938 | h8 = tf.nn.conv3d(
939 | h7, self.encode_dep_W8, strides=[1, 1, 1, 1, 1], padding='SAME')
940 | h8_sigma = tf.nn.conv3d(
941 | h7,
942 | self.encode_dep_W8_sigma,
943 | strides=[1, 1, 1, 1, 1],
944 | padding='SAME')
945 |
946 | return h8, h8_sigma
947 |
948 | def discriminate(self, vox):
949 |
950 | h1 = lrelu(
951 | tf.nn.conv3d(
952 | vox, self.discrim_W1, strides=self.stride, padding='SAME'))
953 | h2 = lrelu(
954 | layernormalize(
955 | tf.nn.conv3d(
956 | h1, self.discrim_W2, strides=self.stride, padding='SAME'),
957 | g=self.discrim_bn_g2,
958 | b=self.discrim_bn_b2))
959 | h3 = lrelu(
960 | layernormalize(
961 | tf.nn.conv3d(
962 | h2, self.discrim_W3, strides=self.stride, padding='SAME'),
963 | g=self.discrim_bn_g3,
964 | b=self.discrim_bn_b3))
965 | h4 = lrelu(
966 | layernormalize(
967 | tf.nn.conv3d(
968 | h3, self.discrim_W4, strides=self.stride, padding='SAME'),
969 | g=self.discrim_bn_g4,
970 | b=self.discrim_bn_b4))
971 | h4 = tf.reshape(h4, [self.batch_size, -1])
972 | h5 = tf.matmul(h4, self.discrim_W5)
973 | y = tf.nn.sigmoid(h5)
974 |
975 | return y, h5
976 |
977 | def discriminate_dep(self, vox):
978 |
979 | h1 = lrelu(
980 | tf.nn.conv3d(
981 | vox, self.discrim_dep_W1, strides=self.stride, padding='SAME'))
982 | h2 = lrelu(
983 | layernormalize(
984 | tf.nn.conv3d(
985 | h1,
986 | self.discrim_dep_W2,
987 | strides=self.stride,
988 | padding='SAME'),
989 | g=self.discrim_dep_bn_g2,
990 | b=self.discrim_dep_bn_b2))
991 | h3 = lrelu(
992 | layernormalize(
993 | tf.nn.conv3d(
994 | h2,
995 | self.discrim_dep_W3,
996 | strides=self.stride,
997 | padding='SAME'),
998 | g=self.discrim_dep_bn_g3,
999 | b=self.discrim_dep_bn_b3))
1000 | h4 = lrelu(
1001 | layernormalize(
1002 | tf.nn.conv3d(
1003 | h3,
1004 | self.discrim_dep_W4,
1005 | strides=self.stride,
1006 | padding='SAME'),
1007 | g=self.discrim_dep_bn_g4,
1008 | b=self.discrim_dep_bn_b4))
1009 | h4 = tf.reshape(h4, [self.batch_size, -1])
1010 | h5 = tf.matmul(h4, self.discrim_dep_W5)
1011 | y = tf.nn.sigmoid(h5)
1012 |
1013 | return y, h5
1014 |
1015 | def code_discriminator(self, Z):
1016 | Z_ = tf.reshape(Z, [self.batch_size, -1])
1017 | h1 = tf.nn.relu(
1018 | batchnormalize(
1019 | tf.matmul(Z_, self.cod_W1), g=self.cod_bn_g1,
1020 | b=self.cod_bn_b1))
1021 | h2 = tf.nn.relu(
1022 | batchnormalize(
1023 | tf.matmul(h1, self.cod_W2), g=self.cod_bn_g2,
1024 | b=self.cod_bn_b2))
1025 | h3 = tf.matmul(h2, self.cod_W3)
1026 | y = tf.nn.sigmoid(h3)
1027 | return y, h3
1028 |
1029 | def generate(self, Z):
1030 |
1031 | Z_ = tf.reshape(Z, [self.batch_size, -1])
1032 | h1 = tf.nn.relu(
1033 | batchnormalize(
1034 | tf.matmul(Z_, self.gen_W1), g=self.gen_bn_g1,
1035 | b=self.gen_bn_b1))
1036 | h1 = tf.reshape(h1, [
1037 | self.batch_size, self.start_vox_size[0], self.start_vox_size[1],
1038 | self.start_vox_size[2], self.dim_W1
1039 | ])
1040 |
1041 | vox_size_l2 = self.start_vox_size * 2
1042 | output_shape_l2 = [
1043 | self.batch_size, vox_size_l2[0], vox_size_l2[1], vox_size_l2[2],
1044 | self.dim_W2
1045 | ]
1046 | h2 = tf.nn.conv3d_transpose(
1047 | h1, self.gen_W2, output_shape=output_shape_l2, strides=self.stride)
1048 | h2 = tf.nn.relu(
1049 | batchnormalize(
1050 | h2,
1051 | g=self.gen_bn_g2,
1052 | b=self.gen_bn_b2,
1053 | batch_size=self.batch_size))
1054 |
1055 | vox_size_l3 = self.start_vox_size * 4
1056 | output_shape_l3 = [
1057 | self.batch_size, vox_size_l3[0], vox_size_l3[1], vox_size_l3[2],
1058 | self.dim_W3
1059 | ]
1060 | h3 = tf.nn.conv3d_transpose(
1061 | h2, self.gen_W3, output_shape=output_shape_l3, strides=self.stride)
1062 | h3 = tf.nn.relu(
1063 | batchnormalize(
1064 | h3,
1065 | g=self.gen_bn_g3,
1066 | b=self.gen_bn_b3,
1067 | batch_size=self.batch_size))
1068 |
1069 | vox_size_l4 = self.start_vox_size * 8
1070 | output_shape_l4 = [
1071 | self.batch_size, vox_size_l4[0], vox_size_l4[1], vox_size_l4[2],
1072 | self.dim_W4
1073 | ]
1074 | h4 = tf.nn.conv3d_transpose(
1075 | h3, self.gen_W4, output_shape=output_shape_l4, strides=self.stride)
1076 | h4 = tf.nn.relu(
1077 | batchnormalize(
1078 | h4,
1079 | g=self.gen_bn_g4,
1080 | b=self.gen_bn_b4,
1081 | batch_size=self.batch_size))
1082 |
1083 | vox_size_l5 = self.start_vox_size * 16
1084 | output_shape_l5 = [
1085 | self.batch_size, vox_size_l5[0], vox_size_l5[1], vox_size_l5[2],
1086 | self.dim_W5
1087 | ]
1088 | h5 = tf.nn.conv3d_transpose(
1089 | h4, self.gen_W5, output_shape=output_shape_l5, strides=self.stride)
1090 |
1091 | x = softmax(h5, self.batch_size, self.vox_shape)
1092 | return x, h5
1093 |
1094 | def refine(self, vox):
1095 | base = tf.nn.relu(
1096 | tf.nn.conv3d(
1097 | vox, self.refine_W1, strides=[1, 1, 1, 1, 1], padding='SAME'))
1098 |
1099 | #res1
1100 | res1_1 = tf.nn.relu(
1101 | tf.nn.conv3d(
1102 | base,
1103 | self.refine_res1_W1,
1104 | strides=[1, 1, 1, 1, 1],
1105 | padding='SAME'))
1106 | res1_2 = tf.nn.conv3d(
1107 | res1_1,
1108 | self.refine_res1_W2,
1109 | strides=[1, 1, 1, 1, 1],
1110 | padding='SAME')
1111 |
1112 | res1 = tf.nn.relu(tf.add(base, res1_2))
1113 |
1114 | #res2
1115 | res2_1 = tf.nn.relu(
1116 | tf.nn.conv3d(
1117 | res1,
1118 | self.refine_res2_W1,
1119 | strides=[1, 1, 1, 1, 1],
1120 | padding='SAME'))
1121 | res2_2 = tf.nn.conv3d(
1122 | res2_1,
1123 | self.refine_res2_W2,
1124 | strides=[1, 1, 1, 1, 1],
1125 | padding='SAME')
1126 |
1127 | res2 = tf.nn.relu(tf.add(res1, res2_2))
1128 |
1129 | #res3
1130 | res3_1 = tf.nn.relu(
1131 | tf.nn.conv3d(
1132 | res2,
1133 | self.refine_res3_W1,
1134 | strides=[1, 1, 1, 1, 1],
1135 | padding='SAME'))
1136 | res3_2 = tf.nn.conv3d(
1137 | res3_1,
1138 | self.refine_res3_W2,
1139 | strides=[1, 1, 1, 1, 1],
1140 | padding='SAME')
1141 |
1142 | res3 = tf.nn.relu(tf.add(res2, res3_2))
1143 |
1144 | #res4
1145 | res4_1 = tf.nn.relu(
1146 | tf.nn.conv3d(
1147 | res3,
1148 | self.refine_res4_W1,
1149 | strides=[1, 1, 1, 1, 1],
1150 | padding='SAME'))
1151 | res4_2 = tf.nn.conv3d(
1152 | res4_1,
1153 | self.refine_res4_W2,
1154 | strides=[1, 1, 1, 1, 1],
1155 | padding='SAME')
1156 |
1157 | res4 = tf.nn.relu(tf.add(res3, res4_2))
1158 |
1159 | out = tf.nn.conv3d(
1160 | res4, self.refine_W2, strides=[1, 1, 1, 1, 1], padding='SAME')
1161 | x_refine = softmax(out, self.batch_size, self.vox_shape)
1162 |
1163 | return x_refine
1164 |
1165 | def samples_generator(self, visual_size):
1166 |
1167 | Z = tf.placeholder(tf.float32, [
1168 | visual_size, self.start_vox_size[0], self.start_vox_size[1],
1169 | self.start_vox_size[2], self.dim_z
1170 | ])
1171 |
1172 | Z_ = tf.reshape(Z, [visual_size, -1])
1173 | h1 = tf.nn.relu(
1174 | batchnormalize(
1175 | tf.matmul(Z_, self.gen_W1), g=self.gen_bn_g1,
1176 | b=self.gen_bn_b1))
1177 | h1 = tf.reshape(h1, [
1178 | visual_size, self.start_vox_size[0], self.start_vox_size[1],
1179 | self.start_vox_size[2], self.dim_W1
1180 | ])
1181 |
1182 | vox_size_l2 = self.start_vox_size * 2
1183 | output_shape_l2 = [
1184 | visual_size, vox_size_l2[0], vox_size_l2[1], vox_size_l2[2],
1185 | self.dim_W2
1186 | ]
1187 | h2 = tf.nn.conv3d_transpose(
1188 | h1, self.gen_W2, output_shape=output_shape_l2, strides=self.stride)
1189 | h2 = tf.nn.relu(
1190 | batchnormalize(
1191 | h2,
1192 | g=self.gen_bn_g2,
1193 | b=self.gen_bn_b2,
1194 | batch_size=self.batch_size))
1195 |
1196 | vox_size_l3 = self.start_vox_size * 4
1197 | output_shape_l3 = [
1198 | visual_size, vox_size_l3[0], vox_size_l3[1], vox_size_l3[2],
1199 | self.dim_W3
1200 | ]
1201 | h3 = tf.nn.conv3d_transpose(
1202 | h2, self.gen_W3, output_shape=output_shape_l3, strides=self.stride)
1203 | h3 = tf.nn.relu(
1204 | batchnormalize(
1205 | h3,
1206 | g=self.gen_bn_g3,
1207 | b=self.gen_bn_b3,
1208 | batch_size=self.batch_size))
1209 |
1210 | vox_size_l4 = self.start_vox_size * 8
1211 | output_shape_l4 = [
1212 | visual_size, vox_size_l4[0], vox_size_l4[1], vox_size_l4[2],
1213 | self.dim_W4
1214 | ]
1215 | h4 = tf.nn.conv3d_transpose(
1216 | h3, self.gen_W4, output_shape=output_shape_l4, strides=self.stride)
1217 | h4 = tf.nn.relu(
1218 | batchnormalize(
1219 | h4,
1220 | g=self.gen_bn_g4,
1221 | b=self.gen_bn_b4,
1222 | batch_size=self.batch_size))
1223 |
1224 | vox_size_l5 = self.start_vox_size * 16
1225 | output_shape_l5 = [
1226 | visual_size, vox_size_l5[0], vox_size_l5[1], vox_size_l5[2],
1227 | self.dim_W5
1228 | ]
1229 | h5 = tf.nn.conv3d_transpose(
1230 | h4, self.gen_W5, output_shape=output_shape_l5, strides=self.stride)
1231 |
1232 | x = softmax(h5, visual_size, self.vox_shape)
1233 | return Z, x
1234 |
1235 | def refine_generator(self, visual_size):
1236 | vox = tf.placeholder(tf.float32, [
1237 | visual_size, self.vox_shape[0], self.vox_shape[1],
1238 | self.vox_shape[2], self.vox_shape[3]
1239 | ])
1240 |
1241 | base = tf.nn.relu(
1242 | tf.nn.conv3d(
1243 | vox, self.refine_W1, strides=[1, 1, 1, 1, 1], padding='SAME'))
1244 |
1245 | #res1
1246 | res1_1 = tf.nn.relu(
1247 | tf.nn.conv3d(
1248 | base,
1249 | self.refine_res1_W1,
1250 | strides=[1, 1, 1, 1, 1],
1251 | padding='SAME'))
1252 | res1_2 = tf.nn.conv3d(
1253 | res1_1,
1254 | self.refine_res1_W2,
1255 | strides=[1, 1, 1, 1, 1],
1256 | padding='SAME')
1257 |
1258 | res1 = tf.nn.relu(tf.add(base, res1_2))
1259 |
1260 | #res2
1261 | res2_1 = tf.nn.relu(
1262 | tf.nn.conv3d(
1263 | res1,
1264 | self.refine_res2_W1,
1265 | strides=[1, 1, 1, 1, 1],
1266 | padding='SAME'))
1267 | res2_2 = tf.nn.conv3d(
1268 | res2_1,
1269 | self.refine_res2_W2,
1270 | strides=[1, 1, 1, 1, 1],
1271 | padding='SAME')
1272 |
1273 | res2 = tf.nn.relu(tf.add(res1, res2_2))
1274 |
1275 | #res3
1276 | res3_1 = tf.nn.relu(
1277 | tf.nn.conv3d(
1278 | res2,
1279 | self.refine_res3_W1,
1280 | strides=[1, 1, 1, 1, 1],
1281 | padding='SAME'))
1282 | res3_2 = tf.nn.conv3d(
1283 | res3_1,
1284 | self.refine_res3_W2,
1285 | strides=[1, 1, 1, 1, 1],
1286 | padding='SAME')
1287 |
1288 | res3 = tf.nn.relu(tf.add(res2, res3_2))
1289 |
1290 | #res4
1291 | res4_1 = tf.nn.relu(
1292 | tf.nn.conv3d(
1293 | res3,
1294 | self.refine_res4_W1,
1295 | strides=[1, 1, 1, 1, 1],
1296 | padding='SAME'))
1297 | res4_2 = tf.nn.conv3d(
1298 | res4_1,
1299 | self.refine_res4_W2,
1300 | strides=[1, 1, 1, 1, 1],
1301 | padding='SAME')
1302 |
1303 | res4 = tf.nn.relu(tf.add(res3, res4_2))
1304 |
1305 | out = tf.nn.conv3d(
1306 | res4, self.refine_W2, strides=[1, 1, 1, 1, 1], padding='SAME')
1307 | x_refine = softmax(out, self.batch_size, self.vox_shape)
1308 |
1309 | return vox, x_refine
1310 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 | from config import cfg
5 | from util import DataProcess, scene_model_id_pair
6 | from model import FCR_aGAN
7 |
8 |
9 | def learning_rate(rate, step):
10 | if step < rate[1]:
11 | lr = rate[0]
12 | else:
13 | lr = rate[2]
14 | return lr
15 |
16 |
17 | def train(n_epochs, learning_rate_G, learning_rate_D, batch_size, mid_flag,
18 | check_num):
19 | beta_G = cfg.TRAIN.ADAM_BETA_G
20 | beta_D = cfg.TRAIN.ADAM_BETA_D
21 | n_vox = cfg.CONST.N_VOX
22 | dim = cfg.NET.DIM
23 | vox_shape = [n_vox[0], n_vox[1], n_vox[2], dim[4]]
24 | dim_z = cfg.NET.DIM_Z
25 | start_vox_size = cfg.NET.START_VOX
26 | kernel = cfg.NET.KERNEL
27 | stride = cfg.NET.STRIDE
28 | freq = cfg.CHECK_FREQ
29 | record_vox_num = cfg.RECORD_VOX_NUM
30 | refine_ch = cfg.NET.REFINE_CH
31 | refine_kernel = cfg.NET.REFINE_KERNEL
32 |
33 | refine_start = cfg.SWITCHING_ITE
34 |
35 | fcr_agan_model = FCR_aGAN(
36 | batch_size=batch_size,
37 | vox_shape=vox_shape,
38 | dim_z=dim_z,
39 | dim=dim,
40 | start_vox_size=start_vox_size,
41 | kernel=kernel,
42 | stride=stride,
43 | refine_ch=refine_ch,
44 | refine_kernel=refine_kernel,
45 | )
46 |
47 | Z_tf, z_enc_tf, vox_tf, vox_gen_tf, vox_gen_decode_tf, vox_refine_dec_tf, vox_refine_gen_tf,\
48 | recons_loss_tf, code_encode_loss_tf, gen_loss_tf, discrim_loss_tf, recons_loss_refine_tf, gen_loss_refine_tf, discrim_loss_refine_tf,\
49 | cost_enc_tf, cost_code_tf, cost_gen_tf, cost_discrim_tf, cost_gen_ref_tf, cost_discrim_ref_tf, summary_tf,\
50 | tsdf_tf = fcr_agan_model.build_model()
51 | """
52 | z_enc_dep_tf, dep_tf, vox_gen_decode_dep_tf,\
53 | recons_dep_loss_tf, code_encode_dep_loss_tf, gen_dep_loss_tf, discrim_dep_loss_tf,\
54 | cost_enc_dep_tf, cost_code_dep_tf, cost_gen_dep_tf, cost_discrim_dep_tf, cost_code_compare_tf,\
55 | """
56 | config = tf.ConfigProto()
57 | config.gpu_options.allow_growth = True
58 | sess = tf.InteractiveSession(config=config)
59 | global_step = tf.Variable(0, name='global_step', trainable=False)
60 | saver = tf.train.Saver(max_to_keep=cfg.SAVER_MAX)
61 |
62 | data_paths = scene_model_id_pair(dataset_portion=cfg.TRAIN.DATASET_PORTION)
63 | print '---amount of data:' + str(len(data_paths))
64 | data_process = DataProcess(data_paths, batch_size, repeat=True)
65 |
66 | encode_vars = filter(lambda x: x.name.startswith('enc'),
67 | tf.trainable_variables())
68 | discrim_vars = filter(lambda x: x.name.startswith('discrim_vox'),
69 | tf.trainable_variables())
70 | # depth--start
71 | """
72 | depth_vars = filter(lambda x: x.name.startswith('dep'), tf.trainable_variables())
73 | discrim_dep_vars = filter(lambda x: x.name.startswith('discrim_dep'), tf.trainable_variables())
74 | """
75 | # depth--end
76 | gen_vars = filter(lambda x: x.name.startswith('gen'),
77 | tf.trainable_variables())
78 | code_vars = filter(lambda x: x.name.startswith('cod'),
79 | tf.trainable_variables())
80 | refine_vars = filter(lambda x: x.name.startswith('refine'),
81 | tf.trainable_variables())
82 |
83 | lr_VAE = tf.placeholder(tf.float32, shape=[])
84 | train_op_encode = tf.train.AdamOptimizer(
85 | lr_VAE, beta1=beta_D, beta2=0.9).minimize(
86 | cost_enc_tf, var_list=encode_vars)
87 | train_op_discrim = tf.train.AdamOptimizer(
88 | learning_rate_D, beta1=beta_D, beta2=0.9).minimize(
89 | cost_discrim_tf, var_list=discrim_vars, global_step=global_step)
90 | train_op_gen = tf.train.AdamOptimizer(
91 | learning_rate_G, beta1=beta_G, beta2=0.9).minimize(
92 | cost_gen_tf, var_list=gen_vars)
93 | train_op_code = tf.train.AdamOptimizer(
94 | lr_VAE, beta1=beta_G, beta2=0.9).minimize(
95 | cost_code_tf, var_list=code_vars)
96 | # depth--start
97 | """
98 | train_op_latent_depvox = tf.train.AdamOptimizer(
99 | lr_VAE, beta1=beta_G, beta2=0.9).minimize(
100 | cost_code_compare_tf, var_list=depth_vars)
101 | train_op_encode_dep=tf.train.AdamOptimizer(
102 | lr_VAE, beta1=beta_D, beta2=0.9).minimize(
103 | cost_enc_dep_tf, var_list=depth_vars)
104 | train_op_discrim_dep = tf.train.AdamOptimizer(
105 | learning_rate_D, beta1=beta_D, beta2=0.9).minimize(
106 | cost_discrim_dep_tf, var_list=discrim_dep_vars)
107 | train_op_gen_dep = tf.train.AdamOptimizer(
108 | learning_rate_G, beta1=beta_G, beta2=0.9).minimize(
109 | cost_gen_dep_tf, var_list=gen_vars)
110 | train_op_code_dep = tf.train.AdamOptimizer(
111 | lr_VAE, beta1=beta_G, beta2=0.9).minimize(
112 | cost_code_dep_tf, var_list=code_vars)
113 | """
114 | # depth--end
115 | train_op_refine = tf.train.AdamOptimizer(
116 | lr_VAE, beta1=beta_G, beta2=0.9).minimize(
117 | cost_gen_ref_tf, var_list=refine_vars)
118 | train_op_discrim_refine = tf.train.AdamOptimizer(
119 | learning_rate_D, beta1=beta_D, beta2=0.9).minimize(
120 | cost_discrim_ref_tf,
121 | var_list=discrim_vars,
122 | global_step=global_step)
123 |
124 | Z_tf_sample, vox_tf_sample = fcr_agan_model.samples_generator(
125 | visual_size=batch_size)
126 | sample_vox_tf, sample_refine_vox_tf = fcr_agan_model.refine_generator(
127 | visual_size=batch_size)
128 | writer = tf.summary.FileWriter(cfg.DIR.LOG_PATH, sess.graph_def)
129 | tf.initialize_all_variables().run()
130 |
131 | if mid_flag:
132 | chckpt_path = cfg.DIR.CHECK_PT_PATH + str(
133 | check_num) #+ '-' + str(check_num * freq)
134 | saver.restore(sess, chckpt_path)
135 | Z_var_np_sample = np.load(cfg.DIR.TRAIN_OBJ_PATH +
136 | '/sample_z.npy').astype(np.float32)
137 | Z_var_np_sample = Z_var_np_sample[:batch_size]
138 | print '---weights restored'
139 | else:
140 | Z_var_np_sample = np.random.normal(
141 | size=(batch_size, start_vox_size[0], start_vox_size[1],
142 | start_vox_size[2], dim_z)).astype(np.float32)
143 | np.save(cfg.DIR.TRAIN_OBJ_PATH + '/sample_z.npy', Z_var_np_sample)
144 |
145 | ite = check_num * freq + 1
146 | cur_epochs = int(ite / int(len(data_paths) / batch_size))
147 |
148 | #training
149 | for epoch in np.arange(cur_epochs, n_epochs):
150 | epoch_flag = True
151 | while epoch_flag:
152 | print '=iteration:%d, epoch:%d' % (ite, epoch)
153 | db_inds, epoch_flag = data_process.get_next_minibatch()
154 | batch_voxel = data_process.get_voxel(db_inds)
155 | batch_voxel_train = batch_voxel
156 | batch_tsdf = data_process.get_tsdf(db_inds)
157 | batch_tsdf_train = np.expand_dims(batch_tsdf, axis=-1)
158 | """
159 | batch_depth = data_process.get_depth(db_inds)
160 | batch_depth_train = batch_depth / 255.0
161 | """
162 | lr = learning_rate(cfg.LEARNING_RATE_V, ite)
163 |
164 | batch_z_var = np.random.normal(
165 | size=(batch_size, start_vox_size[0], start_vox_size[1],
166 | start_vox_size[2], dim_z)).astype(np.float32)
167 |
168 | if ite < refine_start:
169 | for s in np.arange(2):
170 | _, recons_loss_val, code_encode_loss_val, cost_enc_val = sess.run(
171 | [
172 | train_op_encode, recons_loss_tf,
173 | code_encode_loss_tf, cost_enc_tf
174 | ],
175 | feed_dict={
176 | vox_tf: batch_voxel_train,
177 | tsdf_tf: batch_tsdf_train,
178 | Z_tf: batch_z_var,
179 | lr_VAE: lr
180 | },
181 | )
182 |
183 | _, gen_loss_val, cost_gen_val = sess.run(
184 | [train_op_gen, gen_loss_tf, cost_gen_tf],
185 | feed_dict={
186 | Z_tf: batch_z_var,
187 | vox_tf: batch_voxel_train,
188 | tsdf_tf: batch_tsdf_train,
189 | lr_VAE: lr
190 | },
191 | )
192 | # depth--start
193 | """
194 | _, cost_code_compare_val = sess.run(
195 | [train_op_latent_depvox, cost_code_compare_tf],
196 | feed_dict={vox_tf:batch_voxel_train, dep_tf:batch_depth_train, lr_VAE:lr},
197 | )
198 | _, recons_dep_loss_val, code_encode_dep_loss_val, cost_enc_dep_val = sess.run(
199 | [train_op_encode_dep, recons_dep_loss_tf, code_encode_dep_loss_tf, cost_enc_dep_tf],
200 | feed_dict={vox_tf:batch_voxel_train, dep_tf:batch_depth_train, tsdf_tf:batch_tsdf_train, Z_tf:batch_z_var, lr_VAE:lr},
201 | )
202 |
203 | _, gen_dep_loss_val, cost_gen_dep_val = sess.run(
204 | [train_op_gen_dep, gen_dep_loss_tf, cost_gen_dep_tf],
205 | feed_dict={Z_tf:batch_z_var, vox_tf:batch_voxel_train, dep_tf:batch_depth_train, tsdf_tf:batch_tsdf_train, lr_VAE:lr},
206 | )
207 | # depth--end
208 | """
209 | _, discrim_loss_val, cost_discrim_val = sess.run(
210 | [train_op_discrim, discrim_loss_tf, cost_discrim_tf],
211 | feed_dict={
212 | Z_tf: batch_z_var,
213 | vox_tf: batch_voxel_train,
214 | tsdf_tf: batch_tsdf_train
215 | },
216 | )
217 |
218 | _, cost_code_val, z_enc_val = sess.run(
219 | [train_op_code, cost_code_tf, z_enc_tf],
220 | feed_dict={
221 | Z_tf: batch_z_var,
222 | vox_tf: batch_voxel_train,
223 | tsdf_tf: batch_tsdf_train,
224 | lr_VAE: lr
225 | },
226 | )
227 | """
228 | # depth--start
229 | _, discrim_dep_loss_val, cost_discrim_dep_val = sess.run(
230 | [train_op_discrim_dep, discrim_dep_loss_tf, cost_discrim_dep_tf],
231 | feed_dict={Z_tf:batch_z_var, vox_tf:batch_voxel_train, dep_tf:batch_depth_train, tsdf_tf:batch_tsdf_train},
232 | )
233 |
234 |
235 | _, cost_code_dep_val, z_enc_dep_val= sess.run(
236 | [train_op_code_dep, cost_code_dep_tf, z_enc_dep_tf],
237 | feed_dict={Z_tf:batch_z_var, dep_tf:batch_depth_train, tsdf_tf:batch_tsdf_train, lr_VAE:lr},
238 | )
239 | """
240 | # depth--end
241 | summary = sess.run(
242 | summary_tf,
243 | feed_dict={
244 | Z_tf: batch_z_var,
245 | vox_tf: batch_voxel_train,
246 | tsdf_tf: batch_tsdf_train,
247 | lr_VAE: lr
248 | },
249 | )
250 |
251 | print 'reconstruction loss:', recons_loss_val if (
252 | 'recons_loss_val' in locals()) else 'None'
253 | # print ' (depth):', recons_dep_loss_val if ('recons_dep_loss_val' in locals()) else 'None'
254 |
255 | print ' code encode loss:', code_encode_loss_val if (
256 | 'code_encode_loss_val' in locals()) else 'None'
257 |
258 | # print ' (depth):', code_encode_dep_loss_val if ('code_encode_dep_loss_val' in locals()) else 'None'
259 |
260 | print ' gen loss:', gen_loss_val if (
261 | 'gen_loss_val' in locals()) else 'None'
262 |
263 | # print ' (depth):', gen_dep_loss_val if ('gen_dep_loss_val' in locals()) else 'None'
264 |
265 | print ' cost_encoder:', cost_enc_val if (
266 | 'cost_enc_val' in locals()) else 'None'
267 |
268 | # print ' (depth):', cost_enc_dep_val if ('cost_enc_dep_val' in locals()) else 'None'
269 |
270 | print ' cost_generator:', cost_gen_val if (
271 | 'cost_gen_val' in locals()) else 'None'
272 |
273 | # print ' (depth):', cost_gen_dep_val if ('cost_gen_dep_val' in locals()) else 'None'
274 |
275 | print ' cost_discriminator:', cost_discrim_val if (
276 | 'cost_discrim_val' in locals()) else 'None'
277 |
278 | # print ' (depth):', cost_discrim_dep_val if ('cost_discrim_dep_val' in locals()) else 'None'
279 |
280 | print ' cost_code:', cost_code_val if (
281 | 'cost_code_val' in locals()) else 'None'
282 |
283 | # print ' (depth):', cost_code_dep_val if ('cost_code_dep_val' in locals()) else 'None'
284 |
285 | # print ' diff_codes_vox_dep:', cost_code_compare_val if ('cost_code_compare_val' in locals()) else 'None'
286 |
287 | print ' avarage of enc_z:', np.mean(np.mean(
288 | z_enc_val, 4)) if ('z_enc_val' in locals()) else 'None'
289 |
290 | print ' std of enc_z:', np.mean(np.std(
291 | z_enc_val, 4)) if ('z_enc_val' in locals()) else 'None'
292 |
293 | # print 'avarage of enc_z_dep:', np.mean(np.mean(z_enc_dep_val,4)) if ('z_enc_dep_val' in locals()) else 'None'
294 |
295 | # print ' std of enc_z_dep:', np.mean(np.std(z_enc_dep_val,4)) if ('z_enc_dep_val' in locals()) else 'None'
296 |
297 | if np.mod(ite, freq) == 0:
298 | vox_models = sess.run(
299 | vox_tf_sample,
300 | feed_dict={Z_tf_sample: Z_var_np_sample},
301 | )
302 | vox_models_cat = np.argmax(vox_models, axis=4)
303 | record_vox = vox_models_cat[:record_vox_num]
304 | np.save(
305 | cfg.DIR.TRAIN_OBJ_PATH + '/' + str(ite / freq) +
306 | '.npy', record_vox)
307 | save_path = saver.save(
308 | sess,
309 | cfg.DIR.CHECK_PT_PATH + str(ite / freq),
310 | global_step=None)
311 |
312 | else:
313 | _, recons_loss_val, recons_loss_refine_val, gen_loss_refine_val, cost_gen_ref_val = sess.run(
314 | [
315 | train_op_refine, recons_loss_tf, recons_loss_refine_tf,
316 | gen_loss_refine_tf, cost_gen_ref_tf
317 | ],
318 | feed_dict={
319 | Z_tf: batch_z_var,
320 | vox_tf: batch_voxel_train,
321 | tsdf_tf: batch_tsdf_train,
322 | lr_VAE: lr
323 | },
324 | )
325 |
326 | _, discrim_loss_refine_val, cost_discrim_ref_val, summary = sess.run(
327 | [
328 | train_op_discrim_refine, discrim_loss_refine_tf,
329 | cost_discrim_ref_tf, summary_tf
330 | ],
331 | feed_dict={
332 | Z_tf: batch_z_var,
333 | vox_tf: batch_voxel_train,
334 | tsdf_tf: batch_tsdf_train
335 | },
336 | )
337 |
338 | print 'reconstruction loss:', recons_loss_val
339 | print ' recons refine loss:', recons_loss_refine_val
340 | print ' gen loss:', gen_loss_refine_val
341 | print ' cost_discriminator:', cost_discrim_ref_val
342 |
343 | if np.mod(ite, freq) == 0:
344 | vox_models = sess.run(
345 | vox_tf_sample,
346 | feed_dict={Z_tf_sample: Z_var_np_sample},
347 | )
348 | refined_models = sess.run(
349 | sample_refine_vox_tf,
350 | feed_dict={sample_vox_tf: vox_models})
351 | vox_models_cat = np.argmax(vox_models, axis=4)
352 | record_vox = vox_models_cat[:record_vox_num]
353 | np.save(
354 | cfg.DIR.TRAIN_OBJ_PATH + '/' + str(ite / freq) +
355 | '.npy', record_vox)
356 |
357 | vox_models_cat = np.argmax(refined_models, axis=4)
358 | record_vox = vox_models_cat[:record_vox_num]
359 | np.save(
360 | cfg.DIR.TRAIN_OBJ_PATH + '/' + str(ite / freq) +
361 | '_refine.npy', record_vox)
362 | save_path = saver.save(
363 | sess,
364 | cfg.DIR.CHECK_PT_PATH + str(ite / freq),
365 | global_step=None)
366 |
367 | writer.add_summary(summary, global_step=ite)
368 |
369 | ite += 1
370 |
--------------------------------------------------------------------------------
/tsdf.ply:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangyida/gan-depth-semantic3d/c009ecbfd72761241cd124934509179acfa8f7c9/tsdf.ply
--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | import random
4 |
5 | from config import cfg
6 |
7 |
8 | class DataProcess():
9 | def __init__(self, data_paths, batch_size, repeat=True):
10 | self.data_paths = data_paths
11 | self.num_data = len(data_paths)
12 | self.repeat = repeat
13 |
14 | self.batch_size = batch_size
15 | self.shuffle_db_inds()
16 | self.n_vox = cfg.CONST.N_VOX
17 | # self.n_dep = cfg.CONST.N_DEP
18 |
19 | def shuffle_db_inds(self):
20 | # Randomly permute the training roidb
21 | if self.repeat:
22 | self.perm = np.random.permutation(np.arange(self.num_data))
23 | else:
24 | self.perm = np.arange(self.num_data)
25 | self.cur = 0
26 |
27 | def get_next_minibatch(self):
28 | flag = True
29 | if (self.cur + self.batch_size) >= self.num_data and self.repeat:
30 | self.shuffle_db_inds()
31 | flag = False
32 |
33 | db_inds = self.perm[self.cur:min(self.cur +
34 | self.batch_size, self.num_data)]
35 | self.cur += self.batch_size
36 | return db_inds, flag
37 |
38 | def get_tsdf(self, db_inds):
39 | batch_tsdf = np.zeros(
40 | (self.batch_size, self.n_vox[0], self.n_vox[1], self.n_vox[2]),
41 | dtype=np.float32)
42 |
43 | for batch_id, db_ind in enumerate(db_inds):
44 | sceneId, model_id = self.data_paths[db_ind]
45 |
46 | tsdf_fn = cfg.DIR.TSDF_PATH % (model_id)
47 | tsdf_data = np.load(tsdf_fn)
48 |
49 | batch_tsdf[batch_id, :, :, :] = tsdf_data
50 | return batch_tsdf
51 |
52 | def get_voxel(self, db_inds):
53 | batch_voxel = np.zeros(
54 | (self.batch_size, self.n_vox[0], self.n_vox[1], self.n_vox[2]),
55 | dtype=np.float32)
56 |
57 | for batch_id, db_ind in enumerate(db_inds):
58 | sceneId, model_id = self.data_paths[db_ind]
59 |
60 | voxel_fn = cfg.DIR.VOXEL_PATH % (model_id)
61 | voxel_data = np.load(voxel_fn)
62 |
63 | batch_voxel[batch_id, :, :, :] = voxel_data
64 | return batch_voxel
65 |
66 | """
67 | def get_depth(self, db_inds):
68 | batch_depth = np.zeros(
69 | (self.batch_size, self.n_dep[0], self.n_dep[1], self.n_dep[2]), dtype=np.float32)
70 |
71 | for batch_id, db_ind in enumerate(db_inds):
72 | sceneId, model_id = self.data_paths[db_ind]
73 |
74 | depth_fn = cfg.DIR.DEPTH_PATH % (model_id)
75 | depth_data = np.load(depth_fn)
76 |
77 | batch_depth[batch_id, :, :, :] = np.reshape(depth_data, [self.n_dep[0], self.n_dep[1], self.n_dep[2]])
78 | return batch_depth
79 | """
80 |
81 |
82 | def scene_model_id_pair(dataset_portion=[]):
83 | '''
84 | Load sceneId, model names from a suncg dataset.
85 | '''
86 |
87 | scene_name_pair = [] # full path of the objs files
88 |
89 | model_path = cfg.DIR.ROOT_PATH
90 | models = os.listdir(model_path)
91 |
92 | scene_name_pair.extend([(model_path, model_id) for model_id in models])
93 |
94 | num_models = len(scene_name_pair)
95 | portioned_scene_name_pair = scene_name_pair[int(
96 | num_models * dataset_portion[0]):int(num_models * dataset_portion[1])]
97 |
98 | return portioned_scene_name_pair
99 |
100 |
101 | def scene_model_id_pair_test(dataset_portion=[]):
102 |
103 | amount_of_test_sample = 200
104 |
105 | scene_name_pair = [] # full path of the objs files
106 |
107 | model_path = cfg.DIR.ROOT_PATH
108 | models = os.listdir(model_path)
109 |
110 | scene_name_pair.extend([(model_path, model_id) for model_id in models])
111 |
112 | num_models = len(scene_name_pair)
113 | data_paths_test = scene_name_pair[int(num_models * dataset_portion[1]) +
114 | 1:]
115 | # random.shuffle(data_paths_test)
116 | #data_paths = scene_name_pair[int(num_models * dataset_portion[1])+1:int(num_models * dataset_portion[1])+amount_of_test_sample+1]
117 | data_paths = data_paths_test[:amount_of_test_sample]
118 |
119 | num_models = len(data_paths)
120 | print '---amount of test data:' + str(num_models)
121 |
122 | n_vox = cfg.CONST.N_VOX
123 |
124 | batch_voxel = np.zeros((num_models, n_vox[0], n_vox[1], n_vox[2]),
125 | dtype=np.float32)
126 | # depth--start
127 | """
128 | n_dep = cfg.CONST.N_DEP
129 |
130 | batch_depth = np.zeros(
131 | (num_models, n_dep[0], n_dep[1], n_dep[2]), dtype=np.float32)
132 | """
133 | # depth--end
134 | batch_tsdf = np.zeros((num_models, n_vox[0], n_vox[1], n_vox[2], 1),
135 | dtype=np.float32)
136 |
137 | for i in np.arange(num_models):
138 | sceneId, model_id = data_paths[i]
139 |
140 | voxel_fn = cfg.DIR.VOXEL_PATH % (model_id)
141 | voxel_data = np.load(voxel_fn)
142 |
143 | batch_voxel[i, :, :, :] = voxel_data
144 | # depth--start
145 | """
146 | depth_fn = cfg.DIR.DEPTH_PATH % (model_id)
147 | depth_data = np.load(depth_fn)
148 | batch_depth[i, :, :, :] = np.reshape(depth_data, [n_dep[0], n_dep[1], n_dep[2]])
149 | """
150 | # depth--end
151 |
152 | tsdf_fn = cfg.DIR.TSDF_PATH % (model_id)
153 | tsdf_data = np.load(tsdf_fn)
154 | batch_tsdf[i, :, :, :, :] = np.reshape(
155 | tsdf_data, [n_vox[0], n_vox[1], n_vox[2], 1])
156 |
157 | return batch_voxel, batch_tsdf, num_models
158 |
159 |
160 | def onehot(voxel, class_num):
161 | onehot_voxels = np.zeros((voxel.shape[0], voxel.shape[1], voxel.shape[2],
162 | voxel.shape[3], class_num))
163 | for i in np.arange(class_num):
164 | onehot_voxel = np.zeros(voxel.shape)
165 | onehot_voxel[np.where(voxel == i)] = 1
166 | onehot_voxels[:, :, :, :, i] = onehot_voxel[:, :, :, :]
167 | return onehot_voxels
168 |
--------------------------------------------------------------------------------
/visualization/voxviz.py:
--------------------------------------------------------------------------------
1 | import os
2 | import matplotlib as mpl
3 | if os.environ.get('DISPLAY', '') == '':
4 | print('no display found. Using non-interactive Agg backend')
5 | mpl.use('Agg')
6 | import matplotlib.pyplot as plt
7 | import os
8 | import numpy as np
9 | from matplotlib import cm
10 | from skimage.transform import resize
11 | import argparse
12 | from progressbar import ProgressBar
13 |
14 |
15 | class ScanFile(object):
16 | def __init__(self, directory, prefix=None, postfix='.jpg'):
17 | self.directory = directory
18 | self.prefix = prefix
19 | self.postfix = postfix
20 |
21 | def scan_files(self):
22 | files_list = []
23 |
24 | for dirpath, dirnames, filenames in os.walk(self.directory):
25 | '''''
26 | dirpath is a string, the path to the directory.
27 | dirnames is a list of the names of the subdirectories in dirpath
28 | (excluding '.' and '..').
29 | filenames is a list of the names of the non-directory files
30 | in dirpath.
31 | '''
32 | for special_file in filenames:
33 | if self.postfix:
34 | special_file.endswith(self.postfix)
35 | files_list.append(os.path.join(dirpath, special_file))
36 | elif self.prefix:
37 | special_file.startswith(self.prefix)
38 | files_list.append(os.path.join(dirpath, special_file))
39 | else:
40 | files_list.append(os.path.join(dirpath, special_file))
41 |
42 | return files_list
43 |
44 | def scan_subdir(self):
45 | subdir_list = []
46 | for dirpath, dirnames, files in os.walk(self.directory):
47 | subdir_list.append(dirpath)
48 | return subdir_list
49 |
50 |
51 | class ScanFile(object):
52 | def __init__(self, directory, prefix=None, postfix='.jpg'):
53 | self.directory = directory
54 | self.prefix = prefix
55 | self.postfix = postfix
56 |
57 | def scan_files(self):
58 | files_list = []
59 |
60 | for dirpath, dirnames, filenames in os.walk(self.directory):
61 | '''''
62 | dirpath is a string, the path to the directory.
63 | dirnames is a list of the names of the subdirectories in dirpath
64 | (excluding '.' and '..').
65 | filenames is a list of the names of the non-directory files
66 | in dirpath.
67 | '''
68 | for special_file in filenames:
69 | if self.postfix:
70 | special_file.endswith(self.postfix)
71 | files_list.append(os.path.join(dirpath, special_file))
72 | elif self.prefix:
73 | special_file.startswith(self.prefix)
74 | files_list.append(os.path.join(dirpath, special_file))
75 | else:
76 | files_list.append(os.path.join(dirpath, special_file))
77 |
78 | return files_list
79 |
80 | def scan_subdir(self):
81 | subdir_list = []
82 | for dirpath, dirnames, files in os.walk(self.directory):
83 | subdir_list.append(dirpath)
84 | return subdir_list
85 |
86 |
87 | def normalize(arr):
88 | arr_min = np.min(arr)
89 | return (arr - arr_min) / (np.max(arr) - arr_min)
90 |
91 |
92 | def show_histogram(values):
93 | n, bins, patches = plt.hist(values.reshape(-1), 50, normed=1)
94 | bin_centers = 0.5 * (bins[:-1] + bins[1:])
95 |
96 | for c, p in zip(normalize(bin_centers), patches):
97 | plt.setp(p, 'facecolor', cm.hsv(c))
98 |
99 | plt.show()
100 |
101 |
102 | def explode(data):
103 | shape_arr = np.array(data.shape)
104 | size = shape_arr[:3] * 2 - 1
105 | exploded = np.zeros(
106 | np.concatenate([size, shape_arr[3:]]), dtype=data.dtype)
107 | exploded[::2, ::2, ::2] = data
108 | return exploded
109 |
110 |
111 | def expand_coordinates(indices):
112 | x, y, z = indices
113 | x[1::2, :, :] += 1
114 | y[:, 1::2, :] += 1
115 | z[:, :, 1::2] += 1
116 | return x, y, z
117 |
118 |
119 | def scale_by(arr, fac):
120 | mean = np.mean(arr)
121 | return (arr - mean) * fac + mean
122 |
123 |
124 | def plot_image(arr, name='depth.png'):
125 | fig = plt.figure()
126 | ax = fig.add_subplot(111)
127 | # ax.set_axis_off()
128 | arr = (arr - np.min(arr)) / (np.max(arr) - np.min(arr)) * 255
129 | arr = np.uint8(arr)
130 | ax.set_axis_off()
131 | # ax.set_aspect('equal')
132 |
133 | plt.imshow(arr, cmap="hot")
134 | plt.savefig(name, bbox_inches='tight', pad_inches=0, transparent=True)
135 | plt.close(fig)
136 |
137 |
138 | def plot_cube(cube, name='voxel', angle=40, IMG_DIM=80):
139 | from mpl_toolkits.mplot3d import Axes3D
140 |
141 | # cube = normalize(cube)
142 | cube[np.where(cube > 11)] = 0
143 | facecolors = cm.Paired((np.round(cube) / 11))
144 | # make the alpha channel more similar to each others while 0 is still 0
145 | facecolors[:, :, :, -1] = 0.1 * np.tanh(cube * 1000)
146 | facecolors = explode(facecolors)
147 | filled = facecolors[:, :, :, -1] != 0
148 |
149 | x, y, z = expand_coordinates(np.indices(np.array(filled.shape) + 1))
150 |
151 | # Here is a loop for generating demo files
152 | for idx, val in enumerate(np.arange(-40, -30, 10)):
153 | fig = plt.figure(figsize=(30 / 2.54, 30 / 2.54)) # , dpi=150)
154 | # plot
155 | ax1 = fig.add_subplot(111, projection='3d')
156 | # For samples in SUNCG, 20, -40 is a good choice for visualization
157 | # ax1.view_init(np.abs(90-val/2), val)
158 | ax1.view_init(angle, val)
159 | ax1.set_xlim(right=IMG_DIM * 2)
160 | ax1.set_ylim(top=IMG_DIM * 2)
161 | ax1.set_zlim(top=48 * 2)
162 | ax1.set_axis_off()
163 | ax1.voxels(
164 | x,
165 | y,
166 | z,
167 | filled,
168 | facecolors=facecolors,
169 | edgecolors=np.clip(2 * facecolors - 0.5, 0, 1))
170 |
171 | # plt.show()
172 | plt.savefig(
173 | name + '_' + format(idx, '04d') + '.png',
174 | bbox_inches='tight',
175 | pad_inches=0,
176 | transparent=True)
177 | plt.close(fig)
178 | """
179 | objects_name = ['empty', 'ceiling', 'floor', 'wall', 'window', 'chair', 'bed', 'sofa', 'table', 'tvs', 'furnture', 'object']
180 | for x in range(1, 11):
181 | fig = plt.figure(figsize=(30/2.54, 30/2.54))
182 | filled = explode(cube) == x
183 | ax1 = fig.add_subplot(111, projection='3d')
184 | ax1.view_init(20, angle)
185 | ax1.set_xlim(right=IMG_DIM*2)
186 | ax1.set_ylim(top=IMG_DIM*2)
187 | ax1.set_zlim(top=48*2)
188 | ax1.set_title(objects_name[x])
189 | ax1.set_axis_off()
190 | ax1.voxels(x, y, z, filled, facecolors=facecolors)
191 | # plt.show()
192 | plt.savefig(name.replace('.png', '_'+objects_name[x]+'.png'), bbox_inches='tight', pad_inches=0, transparent=True)
193 | plt.close(fig)
194 | """
195 |
196 |
197 | def plot_depvox(dir_dep, dir_vox, target_folder):
198 | label_start = dir_dep.rfind('/') + 1
199 | label_end = dir_dep.find('.', label_start)
200 | arr = np.load(dir_dep)
201 | plot_image(
202 | arr,
203 | name=target_folder + '/depth/' + dir_dep[label_start:label_end] +
204 | '.png')
205 |
206 | arr = np.load(dir_vox)
207 |
208 | # ignore 255 and replace it with 0
209 | arr[arr == 255] = 0
210 |
211 | # show_histogram(arr)
212 | """
213 | transformed = np.clip(
214 | scale_by(np.clip(normalize(arr)-0.1, 0, 1)**0.4, 2)-0.1,
215 | 0, 1)
216 | """
217 | resized = resize(arr, (48, 80, 80), mode='constant')
218 | plot_cube(
219 | np.rollaxis(resized[:, :, :], 2, 0),
220 | name=target_folder + '/voxel/' + dir_dep[label_start:label_end] +
221 | '.png')
222 |
223 |
224 | if __name__ == "__main__":
225 |
226 | parser = argparse.ArgumentParser(description='Parser added')
227 | parser.add_argument(
228 | '-d',
229 | action="store",
230 | dest="dir_dep",
231 | default="./SUNCGtrain_3001_5000",
232 | help='npy file for depth')
233 | parser.add_argument(
234 | '-v',
235 | action="store",
236 | dest="dir_vox",
237 | default="./SUNCGtrain_3001_5000",
238 | help='npy file for voxel')
239 | parser.add_argument(
240 | '-t',
241 | action="store",
242 | dest="target_folder",
243 | default="./target_folder",
244 | help='target folder for vis')
245 | parser.print_help()
246 | results = parser.parse_args()
247 |
248 | dir_dep = results.dir_dep
249 | dir_vox = results.dir_vox
250 | target_folder = results.target_folder
251 | scan = ScanFile(dir_dep)
252 | subdirs = scan.scan_subdir()
253 | files = scan.scan_files()
254 | """
255 | pbar = ProgressBar()
256 | for file_dep in pbar(files):
257 | file_vox = file_dep.replace(dir_dep, dir_vox, 1)
258 | plot_depvox(file_dep, file_vox, target_folder)
259 | """
260 | # vis for 3D FGAN
261 | pbar = ProgressBar()
262 | arr = np.load(results.dir_vox)
263 | # arr = np.expand_dims(arr, axis=0)
264 | arr[arr == 255] = 0
265 | for idx in pbar(range(
266 | 0,
267 | arr.shape[0])): #([37, 69, 73, 76, 91, 93, 100, 121, 154, 156]):
268 | resized = arr[idx, :, :, :]
269 | # resized = normalize(resized)
270 | resized = np.squeeze(resized)
271 | # resized = resize(resized, (48, 80, 80), mode='constant')
272 | plot_cube(
273 | np.flip(np.rollaxis(resized[:, :, :], 2, 0), 1),
274 | name=target_folder + '/' + str(idx))
275 |
--------------------------------------------------------------------------------