├── framework.png ├── VGG ├── 0000011.jpg ├── 0000041.jpg ├── perceptual_loss_test.py ├── perceptual_loss.py └── vgg.py ├── script3 ├── run1.sh ├── run.sh ├── model22.py ├── model23.py ├── model.py ├── baseline22.py ├── baseline23.py └── main.py ├── evaluation_metrics ├── run.sh ├── metrics_tf.py └── metrics_tf_cvact.py ├── README.md ├── geometry ├── projector.py ├── test_geometry.py ├── utils.py └── Geometry.py └── load_data ├── load_data_op.py ├── load_data_cvusa.py ├── load_data_cvact.py ├── load_data_cvact_unaligned.py └── load_data_cvact_half.py /framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YujiaoShi/Sat2StrPanoramaSynthesis/HEAD/framework.png -------------------------------------------------------------------------------- /VGG/0000011.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YujiaoShi/Sat2StrPanoramaSynthesis/HEAD/VGG/0000011.jpg -------------------------------------------------------------------------------- /VGG/0000041.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YujiaoShi/Sat2StrPanoramaSynthesis/HEAD/VGG/0000041.jpg -------------------------------------------------------------------------------- /VGG/perceptual_loss_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' 3 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1' 4 | 5 | 6 | import tensorflow as tf 7 | from VGG.vgg import build_vgg19 8 | import cv2 9 | import numpy as np 10 | 11 | x = np.arange(0, 224*224*3).reshape((1, 224, 224, 3))/(224*224*3)*255 12 | 13 | x1 = tf.constant(x) 14 | 15 | vgg_model_file = './imagenet-vgg-verydeep-19.mat' 16 | 17 | net = build_vgg19(x1, vgg_model_file) 18 | 19 | a = 1 20 | 21 | # 22 | # img1 = cv2.resize(cv2.imread('0000011.jpg'), (256, 256)).astype(np.float32)[np.newaxis,...] 23 | # img2 = cv2.resize(cv2.imread('0000011.jpg'), (256, 256)).astype(np.float32)[np.newaxis,...] 24 | # 25 | # real_img = tf.constant(img1) 26 | # fake_img = tf.constant(img2) 27 | # 28 | # def compute_error(real, fake): 29 | # return tf.reduce_mean(tf.abs(fake - real)) 30 | # vgg_model_file = './imagenet-vgg-verydeep-19.mat' 31 | # 32 | # vgg_real = build_vgg19(real_img, vgg_model_file) 33 | # vgg_fake = build_vgg19(fake_img, vgg_model_file) 34 | # 35 | # p0 = compute_error(vgg_real['input'], vgg_fake['input']) 36 | # p1 = compute_error(vgg_real['conv1_2'], vgg_fake['conv1_2']) / 2.6 37 | # p2 = compute_error(vgg_real['conv2_2'], vgg_fake['conv2_2']) / 4.8 38 | # p3 = compute_error(vgg_real['conv3_2'], vgg_fake['conv3_2']) / 3.7 39 | # p4 = compute_error(vgg_real['conv4_2'], vgg_fake['conv4_2']) / 5.6 40 | # p5 = compute_error(vgg_real['conv5_2'], vgg_fake['conv5_2']) * 10 / 1.5 41 | # total_loss = p0 + p1 + p2 + p3 + p4 + p5 42 | # 43 | # sess = tf.Session() 44 | # 45 | # loss = sess.run(total_loss) -------------------------------------------------------------------------------- /script3/run1.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACT --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 1 #--mode test --checkpoint tt 3 | CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVUSA --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 64 #--mode test --checkpoint tt 4 | 5 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACT --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 0 --heightPlaneNum 32 --mode test --checkpoint tt 6 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACT --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 0 --heightPlaneNum 64 --mode test --checkpoint tt 7 | # 8 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACT --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 1 --heightPlaneNum 32 --mode test --checkpoint tt 9 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACT --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 1 --heightPlaneNum 64 --mode test --checkpoint tt 10 | # 11 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACT --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 1 --heightPlaneNum 32 --mode test --checkpoint tt 12 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACT --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 1 --heightPlaneNum 64 --mode test --checkpoint tt 13 | 14 | 15 | 16 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACTunaligned --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 1 --heightPlaneNum 1 # --mode test --checkpoint tt 17 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACTunaligned --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 1 --heightPlaneNum 64 #--mode test --checkpoint tt 18 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACTunaligned --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 1 --heightPlaneNum 1 #--mode test --checkpoint tt 19 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACTunaligned --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 1 --heightPlaneNum 64 #--mode test --checkpoint tt 20 | -------------------------------------------------------------------------------- /script3/run.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVACT --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 64 --mode test --checkpoint tt 3 | CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 1 #--mode test --checkpoint tt 4 | 5 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 0 --heightPlaneNum 32 --mode test --checkpoint tt 6 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 0 --heightPlaneNum 64 --mode test --checkpoint tt 7 | # 8 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 1 --heightPlaneNum 32 --mode test --checkpoint tt 9 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 1 --heightPlaneNum 64 --mode test --checkpoint tt 10 | # 11 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 1 --heightPlaneNum 32 --mode test --checkpoint tt 12 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 1 --heightPlaneNum 64 --mode test --checkpoint tt 13 | 14 | 15 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVACTunaligned --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 1 # --mode test --checkpoint tt 16 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVACTunaligned --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 64 #--mode test --checkpoint tt 17 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVACTunaligned --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 0 --heightPlaneNum 1 #--mode test --checkpoint tt 18 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVACTunaligned --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 0 --heightPlaneNum 64 #--mode test --checkpoint tt 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /VGG/perceptual_loss.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow as tf 3 | from .vgg import build_vgg19 4 | 5 | 6 | 7 | def compute_error(real, fake): 8 | return tf.reduce_mean(tf.abs(fake - real)) 9 | 10 | 11 | def perceptual_loss(real_img, fake_img): 12 | real_img = (real_img+1.)/2. * 255. 13 | fake_img = (fake_img+1.)/2. * 255. 14 | vgg_model_file = '../VGG/imagenet-vgg-verydeep-19.mat' 15 | 16 | vgg_real = build_vgg19(real_img, vgg_model_file) 17 | vgg_fake = build_vgg19(fake_img, vgg_model_file) 18 | 19 | p0 = compute_error(vgg_real['input'], vgg_fake['input']) 20 | p1 = compute_error(vgg_real['conv1_2'], vgg_fake['conv1_2']) / 2.6 21 | p2 = compute_error(vgg_real['conv2_2'], vgg_fake['conv2_2']) / 4.8 22 | p3 = compute_error(vgg_real['conv3_2'], vgg_fake['conv3_2']) / 3.7 23 | p4 = compute_error(vgg_real['conv4_2'], vgg_fake['conv4_2']) / 5.6 24 | p5 = compute_error(vgg_real['conv5_2'], vgg_fake['conv5_2']) * 10 / 1.5 25 | total_loss = p0 + p1 + p2 + p3 + p4 + p5 26 | 27 | return total_loss 28 | 29 | 30 | def perceptual_loss_n(real_img, fake_imgs): 31 | 32 | vgg_model_file = '../VGG/imagenet-vgg-verydeep-19.mat' 33 | 34 | real_img = (real_img + 1.) / 2. * 255. 35 | vgg_real = build_vgg19(real_img, vgg_model_file) 36 | 37 | loss = [] 38 | 39 | for fake in fake_imgs: 40 | fake = (fake + 1.) / 2. * 255. 41 | 42 | vgg_fake = build_vgg19(fake, vgg_model_file) 43 | 44 | p0 = compute_error(vgg_real['input'], vgg_fake['input']) 45 | p1 = compute_error(vgg_real['conv1_2'], vgg_fake['conv1_2']) / 2.6 46 | p2 = compute_error(vgg_real['conv2_2'], vgg_fake['conv2_2']) / 4.8 47 | p3 = compute_error(vgg_real['conv3_2'], vgg_fake['conv3_2']) / 3.7 48 | p4 = compute_error(vgg_real['conv4_2'], vgg_fake['conv4_2']) / 5.6 49 | p5 = compute_error(vgg_real['conv5_2'], vgg_fake['conv5_2']) * 10 / 1.5 50 | 51 | loss.append(p0 + p1 + p2 + p3 + p4 + p5) 52 | 53 | total_loss = tf.stack(loss) 54 | min_loss = tf.reduce_min(total_loss) 55 | 56 | return min_loss 57 | 58 | -------------------------------------------------------------------------------- /evaluation_metrics/run.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../stoa/GeneratedData/CVUSA/pix2pix1_aer_L1Grd_100.0_PerGrd_0.0/ 3 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../stoa/GeneratedData/CVUSA/pix2pix1_aer_L1Grd_0.0_PerGrd_1.0/ 4 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../stoa/GeneratedData/CVUSA/pix2pix_aer_L1Grd_100.0_PerGrd_0.0/ 5 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../stoa/GeneratedData/CVUSA/pix2pix_aer_L1Grd_0.0_PerGrd_1.0/ 6 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../script3/GeneratedData/CVUSA/geometry_radiusPlaneMethod_1_column_image_image_L1Grd_100.0_PerGrd_0.0_skip_1/image/ 7 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../script3/GeneratedData/CVUSA/geometry_radiusPlaneMethod_64_column_image_image_L1Grd_100.0_PerGrd_0.0_skip_1/image/ 8 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../script3/GeneratedData/CVUSA/geometry_radiusPlaneMethod_1_column_image_image_L1Grd_0.0_PerGrd_1.0_skip_1/image/ 9 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../script3/GeneratedData/CVUSA/geometry_radiusPlaneMethod_64_column_image_image_L1Grd_0.0_PerGrd_1.0_skip_1/image/ 10 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../script3/GeneratedData/CVUSA/geometry_radiusPlaneMethod_1_column_image_image_L1Grd_100.0_PerGrd_0.0_skip_0/image/ 11 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../script3/GeneratedData/CVUSA/geometry_radiusPlaneMethod_64_column_image_image_L1Grd_100.0_PerGrd_0.0_skip_0/image/ 12 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../script3/GeneratedData/CVUSA/geometry_radiusPlaneMethod_1_column_image_image_L1Grd_0.0_PerGrd_1.0_skip_0/image/ 13 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../script3/GeneratedData/CVUSA/geometry_radiusPlaneMethod_64_column_image_image_L1Grd_0.0_PerGrd_1.0_skip_0/image/ 14 | 15 | 16 | 17 | 18 | #CUDA_VISIBLE_DEVICES=1 python metrics_tf_cvact.py --dir ../script3/GeneratedData/CVACT/geometry_radiusPlaneMethod_1_column_image_image_L1Grd_100.0_PerGrd_0.0_skip_1/image/ 19 | #CUDA_VISIBLE_DEVICES=1 python metrics_tf_cvact.py --dir ../script3/GeneratedData/CVACT/geometry_radiusPlaneMethod_64_column_image_image_L1Grd_100.0_PerGrd_0.0_skip_1/image/ 20 | ##CUDA_VISIBLE_DEVICES=0 python metrics_tf_cvact.py --dir ../script3/GeneratedData/CVACT/geometry_radiusPlaneMethod_1_column_image_image_L1Grd_0.0_PerGrd_1.0_skip_1/image/ 21 | ##CUDA_VISIBLE_DEVICES=0 python metrics_tf_cvact.py --dir ../script3/GeneratedData/CVACT/geometry_radiusPlaneMethod_64_column_image_image_L1Grd_0.0_PerGrd_1.0_skip_1/image/ 22 | #CUDA_VISIBLE_DEVICES=1 python metrics_tf_cvact.py --dir ../script3/GeneratedData/CVACT/geometry_radiusPlaneMethod_1_column_image_image_L1Grd_100.0_PerGrd_0.0_skip_0/image/ 23 | #CUDA_VISIBLE_DEVICES=1 python metrics_tf_cvact.py --dir ../script3/GeneratedData/CVACT/geometry_radiusPlaneMethod_64_column_image_image_L1Grd_100.0_PerGrd_0.0_skip_0/image/ 24 | #CUDA_VISIBLE_DEVICES=1 python metrics_tf_cvact.py --dir ../script3/GeneratedData/CVACT/geometry_radiusPlaneMethod_1_column_image_image_L1Grd_0.0_PerGrd_1.0_skip_0/image/ 25 | #CUDA_VISIBLE_DEVICES=1 python metrics_tf_cvact.py --dir ../script3/GeneratedData/CVACT/geometry_radiusPlaneMethod_64_column_image_image_L1Grd_0.0_PerGrd_1.0_skip_0/image/ 26 | 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sat2StrPanoramaSynthesis 2 | Geometry-Guided Street-View Panorama Synthesis from Satellite Imagery, TPAMI 2022 3 | ![alt text](./framework.png) 4 | 5 | # Abstract 6 | This paper presents a new approach for synthesizing a novel street-view panorama given a satellite image, as if captured from the geographical location at the center of the satellite image. Existing works approach this as an image generation problem, adopting generative adversarial networks to implicitly learn the cross-view transformations, but ignore the geometric constraints. 7 | In this paper, we make the geometric correspondences between the satellite and street-view images explicit so as to facilitate the transfer of information between domains. 8 | Specifically, we observe that when a 3D point is visible in both views, and the height of the point relative to the camera is known, there is a deterministic mapping between the projected points in the images. 9 | Motivated by this, we develop a novel satellite to street-view projection (S2SP) module which learns the height map and projects the satellite image to the ground-level viewpoint, explicitly connecting corresponding pixels. 10 | With these projected satellite images as input, we next employ a generator to synthesize realistic street-view panoramas that are geometrically consistent with the satellite images. 11 | Our S2SP module is differentiable and the whole framework is trained in an end-to-end manner. 12 | Extensive experimental results on two cross-view benchmark datasets demonstrate that our method generates more accurate and consistent images than existing approaches. 13 | 14 | ### Experiment Dataset 15 | 16 | Our experiment is conducted on the CVUSA and CVACT dataset. For our processed data, please download [here](https://anu365-my.sharepoint.com/:f:/g/personal/u6293587_anu_edu_au/EuOBUDUQNClJvCpQ8bD1hnoBjdRBWxsHOVp946YVahiMGg?e=F4yRAC). 17 | 18 | (The link may expire regularly. If it does not work, please drop me an email: yujiao.shi@anu.edu.au.) 19 | 20 | ### Preparation 21 | 22 | Please load trained VGG model on Imagenet from [here](https://anu365-my.sharepoint.com/:u:/g/personal/u6293587_anu_edu_au/EVueknEGIBpKolDJ3JrqEjsBey5P12JFuR36xpO-inhXHg?e=kkmD4r). This is for VGG perceptual loss. 23 | 24 | ### Codes 25 | 26 | #### Training 27 | 28 | cd script3 29 | 30 | CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 64 --mode train 31 | 32 | CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVACT --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 64 --mode train 33 | 34 | 35 | #### Testing 36 | 37 | cd script3 38 | 39 | CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 64 --mode test 40 | 41 | CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVACT --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 64 --mode test 42 | 43 | 44 | ### Publications 45 | This work is published in TPAMI 2022. 46 | [Geometry-Guided Street-View Panorama Synthesis from Satellite Imagery] 47 | 48 | If you are interested in our work and use our code, we are pleased that you can cite the following publication: 49 | *Yujiao Shi, and Hongdong Li. Geometry-Guided Street-View Panorama Synthesis from Satellite Imagery.* 50 | 51 | @inproceedings{shi2020where, 52 | title={Geometry-Guided Street-View Panorama Synthesis from Satellite Imagery}, 53 | author={Shi, Yujiao and Campbell, Dylan and Yu, Xin and Li, Hongdong}, 54 | booktitle={TPAMI}, 55 | year={2022} 56 | } 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /geometry/projector.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow.compat.v1 as tf 3 | tf.disable_v2_behavior() 4 | from MIP import tf_shape 5 | 6 | 7 | def over_composite(rgbas): 8 | """Combines a list of RGBA images using the over operation. 9 | 10 | Combines RGBA images from back to front with the over operation. 11 | The alpha image of the first image is ignored and assumed to be 1.0. 12 | 13 | Args: 14 | rgbas: A list of [batch, H, W, 4] RGBA images, combined from back to front. 15 | Returns: 16 | Composited RGB image. 17 | """ 18 | for i in range(len(rgbas)): 19 | rgb = rgbas[i][:, :, :, 0:3] 20 | alpha = rgbas[i][:, :, :, 3:] 21 | if i == 0: 22 | output = rgb 23 | else: 24 | rgb_by_alpha = rgb * alpha 25 | output = rgb_by_alpha + output * (1.0 - alpha) 26 | return output 27 | 28 | 29 | def mpi_render_grd_view(batch_rgbas, share_alpha=True): 30 | 31 | batch, height, width, channel = batch_rgbas.get_shape().as_list() 32 | 33 | if share_alpha: 34 | num_mpi_planes = int(channel/4) 35 | rgba_layers = tf.reshape(batch_rgbas, [-1, height, width, num_mpi_planes, 4]) 36 | rgb = rgba_layers[..., :3] 37 | alpha = tf.expand_dims(rgba_layers[..., -1], axis=-1) 38 | else: 39 | num_mpi_planes = int(channel / 5) 40 | rgba_layers = tf.reshape(batch_rgbas, [-1, height, width, num_mpi_planes, 5]) 41 | rgb = rgba_layers[..., :3] 42 | alpha = tf.expand_dims(rgba_layers[..., 4], axis=-1) 43 | 44 | alpha = (alpha + 1.)/2. 45 | rgba_layers = tf.transpose(tf.concat([rgb, alpha], axis=-1), [3, 0, 1, 2, 4]) 46 | 47 | rgba_list = [] 48 | for i in range(int(num_mpi_planes)): 49 | rgba_list.append(rgba_layers[i]) 50 | 51 | synthesis_image = over_composite(rgba_list) 52 | # shape = [batch, height, width, 3] 53 | 54 | return synthesis_image 55 | 56 | 57 | def mpi_render_aer_view(batch_rgbas, share_alpha=True): 58 | batch, height, width, channel = batch_rgbas.get_shape().as_list() 59 | 60 | if share_alpha: 61 | num_mpi_planes = int(channel / 4) 62 | rgba_layers = tf.reshape(batch_rgbas, [-1, height, width, num_mpi_planes, 4]) 63 | rgb = rgba_layers[..., :3] 64 | alpha = tf.expand_dims(rgba_layers[..., -1], axis=-1) 65 | else: 66 | num_mpi_planes = int(channel / 5) 67 | rgba_layers = tf.reshape(batch_rgbas, [-1, height, width, num_mpi_planes, 5]) 68 | rgb = rgba_layers[..., :3] 69 | alpha = tf.expand_dims(rgba_layers[..., -1], axis=-1) 70 | alpha = (alpha + 1.) / 2. 71 | rgba_layers = tf.transpose(tf.concat([rgb, alpha], axis=-1), [1, 0, 2, 3, 4]) 72 | # shape = [height, batch, width, num_mpi_planes, 4] 73 | 74 | rgba_list = [] 75 | for i in range(int(height)): 76 | rgba_list.append(rgba_layers[i]) 77 | 78 | rgba_list = rgba_list[::-1][:int(height//2)] 79 | 80 | synthesis_image = over_composite(rgba_list) 81 | # shape = [batch, width, num_mpi_planes, 3] 82 | 83 | return synthesis_image 84 | 85 | 86 | def rtheta2uv(athetaimage, aer_size): 87 | ''' 88 | :param athetaimage: shape = [batch, width, PlaneNum, 3] width-->theta PlaneNum-->radius 89 | :param aer_size: 90 | :return: 91 | ''' 92 | batch, width, PlaneNum, channel = tf_shape(athetaimage, 4) 93 | i = np.arange(aer_size) 94 | j = np.arange(aer_size) 95 | jj, ii = np.meshgrid(j, i) 96 | 97 | center = aer_size / 2 - 0.5 98 | theta = np.arctan(-(jj - center) / (ii - center)) 99 | theta[np.where(ii < center)] += np.pi 100 | theta[np.where((ii >= center) & (jj >= center))] += 2 * np.pi 101 | theta = theta/(2 * np.pi)*width 102 | 103 | RadiusByPixel = np.sqrt((ii - center) ** 2 + (jj - center) ** 2) 104 | RadiusByPixel = (1-RadiusByPixel/aer_size*2)*PlaneNum 105 | 106 | uv = np.stack([RadiusByPixel, theta], axis=-1) 107 | uv = uv.astype(np.float32) 108 | warp = tf.stack([uv] * batch, axis=0) 109 | 110 | sampler_output = tf.contrib.resampler.resampler(athetaimage, warp) 111 | # shape = [batch, aer_size, aer_size, 3] 112 | 113 | return sampler_output 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /VGG/vgg.py: -------------------------------------------------------------------------------- 1 | # ****************************************************************************** 2 | # VGG network definition from: 3 | # 4 | # https://github.com/CQFIO/PhotographicImageSynthesis/blob/master/demo_1024p.py 5 | # 6 | # Released under an MIT License. 7 | """VGG network definition. 8 | """ 9 | 10 | from __future__ import division 11 | import numpy as np 12 | import tensorflow as tf 13 | 14 | 15 | def build_net(ntype, nin, nwb=None, name=None): 16 | if ntype == 'conv': 17 | return tf.nn.relu( 18 | tf.nn.conv2d( 19 | nin, nwb[0], strides=[1, 1, 1, 1], padding='SAME', name=name) + 20 | nwb[1]) 21 | elif ntype == 'pool': 22 | return tf.nn.avg_pool( 23 | nin, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 24 | 25 | 26 | def get_weight_bias(vgg_layers, i): 27 | weights = vgg_layers[i][0][0][2][0][0] 28 | weights = tf.constant(weights) 29 | bias = vgg_layers[i][0][0][2][0][1] 30 | bias = tf.constant(np.reshape(bias, (bias.size))) 31 | return weights, bias 32 | 33 | 34 | def build_vgg19(input, model_filepath, reuse=False): 35 | with tf.variable_scope('vgg', reuse=reuse): 36 | net = {} 37 | input = tf.cast(input, tf.float32) 38 | import scipy.io as sio 39 | # with open(model_filepath, 'r') as f: 40 | vgg_rawnet = sio.loadmat(model_filepath) 41 | vgg_layers = vgg_rawnet['layers'][0] 42 | imagenet_mean = tf.constant( 43 | [123.6800, 116.7790, 103.9390], shape=[1, 1, 1, 3]) 44 | net['input'] = input - imagenet_mean 45 | net['conv1_1'] = build_net( 46 | 'conv', 47 | net['input'], 48 | get_weight_bias(vgg_layers, 0), 49 | name='vgg_conv1_1') 50 | net['conv1_2'] = build_net( 51 | 'conv', 52 | net['conv1_1'], 53 | get_weight_bias(vgg_layers, 2), 54 | name='vgg_conv1_2') 55 | net['pool1'] = build_net('pool', net['conv1_2']) 56 | net['conv2_1'] = build_net( 57 | 'conv', 58 | net['pool1'], 59 | get_weight_bias(vgg_layers, 5), 60 | name='vgg_conv2_1') 61 | net['conv2_2'] = build_net( 62 | 'conv', 63 | net['conv2_1'], 64 | get_weight_bias(vgg_layers, 7), 65 | name='vgg_conv2_2') 66 | net['pool2'] = build_net('pool', net['conv2_2']) 67 | net['conv3_1'] = build_net( 68 | 'conv', 69 | net['pool2'], 70 | get_weight_bias(vgg_layers, 10), 71 | name='vgg_conv3_1') 72 | net['conv3_2'] = build_net( 73 | 'conv', 74 | net['conv3_1'], 75 | get_weight_bias(vgg_layers, 12), 76 | name='vgg_conv3_2') 77 | net['conv3_3'] = build_net( 78 | 'conv', 79 | net['conv3_2'], 80 | get_weight_bias(vgg_layers, 14), 81 | name='vgg_conv3_3') 82 | net['conv3_4'] = build_net( 83 | 'conv', 84 | net['conv3_3'], 85 | get_weight_bias(vgg_layers, 16), 86 | name='vgg_conv3_4') 87 | net['pool3'] = build_net('pool', net['conv3_4']) 88 | net['conv4_1'] = build_net( 89 | 'conv', 90 | net['pool3'], 91 | get_weight_bias(vgg_layers, 19), 92 | name='vgg_conv4_1') 93 | net['conv4_2'] = build_net( 94 | 'conv', 95 | net['conv4_1'], 96 | get_weight_bias(vgg_layers, 21), 97 | name='vgg_conv4_2') 98 | net['conv4_3'] = build_net( 99 | 'conv', 100 | net['conv4_2'], 101 | get_weight_bias(vgg_layers, 23), 102 | name='vgg_conv4_3') 103 | net['conv4_4'] = build_net( 104 | 'conv', 105 | net['conv4_3'], 106 | get_weight_bias(vgg_layers, 25), 107 | name='vgg_conv4_4') 108 | net['pool4'] = build_net('pool', net['conv4_4']) 109 | net['conv5_1'] = build_net( 110 | 'conv', 111 | net['pool4'], 112 | get_weight_bias(vgg_layers, 28), 113 | name='vgg_conv5_1') 114 | net['conv5_2'] = build_net( 115 | 'conv', 116 | net['conv5_1'], 117 | get_weight_bias(vgg_layers, 30), 118 | name='vgg_conv5_2') 119 | return net 120 | 121 | # ****************************************************************************** 122 | -------------------------------------------------------------------------------- /load_data/load_data_op.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import tensorflow.compat.v1 as tf 3 | tf.disable_v2_behavior() 4 | import math 5 | 6 | # Examples = collections.namedtuple("Examples", "paths, aer, pano, mask, count, steps_per_epoch") 7 | Examples = collections.namedtuple("Examples", "paths, aer, pano, tanpolar, count, steps_per_epoch") 8 | 9 | 10 | def preprocess(image): 11 | with tf.name_scope("preprocess"): 12 | # [0, 1] => [-1, 1] 13 | return image * 2 - 1 14 | 15 | 16 | def load_examples(mode='train', batch_size=2): 17 | 18 | if mode=='train': 19 | file_list = '../../../Data/OP/splits/train_split.txt' 20 | else: 21 | file_list = '../../../Data/OP/splits/test_split.txt' 22 | img_root = '../../../Data/OP/' 23 | 24 | data_list = [] 25 | 26 | with open(file_list, 'r') as f: 27 | lines = f.readlines() 28 | for line in lines: 29 | items = line.split(',') 30 | if mode == 'train' and items[0].replace('_nadir', '') != items[2]: 31 | continue 32 | else: 33 | data_list.append([img_root + 'aerial/' + items[0], 34 | img_root + 'panorama/' + items[1].replace('\n', ''), 35 | # img_root + 'refinenetSeman/aerial/' + items[0], 36 | # img_root + 'refinenetSeman/panorama_visualize/' + items[1].replace('\n', ''), 37 | img_root + 'tanpolar/' + items[0], 38 | items[1].replace('\n', '')]) 39 | 40 | aer_list = [item[0] for item in data_list] 41 | pano_list = [item[1] for item in data_list] 42 | # mask_list = [item[2] for item in data_list] 43 | tanpolar_list = [item[2] for item in data_list] 44 | # polar_list = [item[4] for item in data_list] 45 | 46 | aer_queue = tf.train.string_input_producer(aer_list, shuffle=mode=='train', seed=2020) 47 | pano_queue = tf.train.string_input_producer(pano_list, shuffle=mode=='train', seed=2020) 48 | # mask_queue = tf.train.string_input_producer(mask_list, shuffle=mode=='train', seed=2020) 49 | tanpolar_queue = tf.train.string_input_producer(tanpolar_list, shuffle=mode == 'train', seed=2020) 50 | # polar_queue = tf.train.string_input_producer(polar_list, shuffle=mode == 'train', seed=2020) 51 | 52 | reader = tf.WholeFileReader() 53 | aer_paths, aer_contents = reader.read(aer_queue) 54 | pano_paths, pano_contents = reader.read(pano_queue) 55 | # mask_paths, mask_contents = reader.read(mask_queue) 56 | tanpolar_paths, tanpolar_contents = reader.read(tanpolar_queue) 57 | # polar_paths, polar_contents = reader.read(polar_queue) 58 | 59 | aer = tf.image.decode_jpeg(aer_contents) 60 | panos = tf.image.decode_jpeg(pano_contents) 61 | # mask = tf.image.decode_png(mask_contents) 62 | tanpolar = tf.image.decode_png(tanpolar_contents) 63 | # polar = tf.image.decode_png(polar_contents) 64 | 65 | aer = tf.image.convert_image_dtype(aer, tf.float32) 66 | panos = tf.image.convert_image_dtype(panos, tf.float32) 67 | tanpolar = tf.image.convert_image_dtype(tanpolar, tf.float32) 68 | # mask = tf.image.convert_image_dtype(mask, tf.float32) 69 | # polar = tf.image.convert_image_dtype(polar, tf.float32) 70 | 71 | aer = preprocess(aer) 72 | panos = preprocess(panos) 73 | tanpolar = preprocess(tanpolar) 74 | # mask = preprocess(mask) 75 | # polar = preprocess(polar) 76 | 77 | aer.set_shape([None, None, 3]) 78 | panos.set_shape([None, None, 3]) 79 | # mask.set_shape([None, None, 3]) 80 | tanpolar.set_shape([None, None, 3]) 81 | # polar.set_shape([None, None, 3]) 82 | 83 | aer = tf.image.resize_images(aer, [256, 256], method=tf.image.ResizeMethod.AREA) 84 | panos = tf.image.resize_images(panos, [128, 512], method=tf.image.ResizeMethod.AREA) 85 | # mask = tf.image.resize_images(mask, [128, 512], method=tf.image.ResizeMethod.AREA) 86 | # mask = tf.cast(tf.image.resize_images(mask, [128, 512], method=tf.image.ResizeMethod.AREA), tf.int32) 87 | # mask = 0.9 * tf.one_hot(tf.squeeze(mask, axis=-1), depth=4) 88 | tanpolar = tf.image.resize_images(tanpolar, [128, 512], method=tf.image.ResizeMethod.AREA) 89 | # polar = tf.image.resize_images(polar, [128, 512], method=tf.image.ResizeMethod.AREA) 90 | 91 | aer_batch, panos_batch, pano_paths_batch, tanpolar_batch = \ 92 | tf.train.batch([aer, panos, pano_paths, tanpolar], batch_size=batch_size) 93 | # aer_batch, panos_batch, mask_batch, aer_paths_batch = \ 94 | # tf.train.batch([aer, panos, mask, aer_paths], batch_size=batch_size) 95 | 96 | steps_per_epoch = int(math.ceil(len(data_list) / batch_size)) 97 | 98 | return Examples( 99 | paths=pano_paths_batch, 100 | aer=aer_batch, 101 | pano=panos_batch, 102 | # mask=mask_batch, 103 | tanpolar=tanpolar_batch, 104 | # polar=polar_batch, 105 | count=len(data_list), 106 | steps_per_epoch=steps_per_epoch, 107 | ) 108 | 109 | 110 | -------------------------------------------------------------------------------- /load_data/load_data_cvusa.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import os.path 3 | 4 | import tensorflow.compat.v1 as tf 5 | tf.disable_v2_behavior() 6 | import math 7 | 8 | # Examples = collections.namedtuple("Examples", "paths, aer, pano, mask, count, steps_per_epoch") 9 | Examples = collections.namedtuple("Examples", "paths, aer, pano, tanpolar, polar, count, steps_per_epoch") 10 | 11 | 12 | def preprocess(image): 13 | with tf.name_scope("preprocess"): 14 | # [0, 1] => [-1, 1] 15 | return image * 2 - 1 16 | 17 | 18 | def load_examples(mode='train', batch_size=2): 19 | 20 | img_root = '../../../Data/CVUSA/' 21 | 22 | if mode=='train': 23 | file_list = os.path.join(img_root, 'splits/train-19zl.csv') 24 | elif mode=='test': 25 | file_list = os.path.join(img_root, 'splits/val-19zl.csv') 26 | 27 | data_list = [] 28 | 29 | with open(file_list, 'r') as f: 30 | for line in f: 31 | data = line.split(',') 32 | # data_list.append([img_root + data[0], img_root + data[1], img_root + data[2][:-1]]) 33 | data_list.append([img_root + data[0], img_root + data[1], 34 | # img_root + data[2][:-1].replace('annotations', 'annotations_visualize'), 35 | img_root + data[0].replace('bingmap/19', 'a2g').replace('jpg', 'png'), 36 | img_root + data[0].replace('bing', 'polar').replace('jpg', 'png')]) 37 | 38 | aer_list = [item[0] for item in data_list] 39 | pano_list = [item[1] for item in data_list] 40 | # mask_list = [item[2] for item in data_list] 41 | tanpolar_list = [item[2] for item in data_list] 42 | polar_list = [item[3] for item in data_list] 43 | 44 | aer_queue = tf.train.string_input_producer(aer_list, shuffle=mode=='train', seed=2020) 45 | pano_queue = tf.train.string_input_producer(pano_list, shuffle=mode=='train', seed=2020) 46 | tanpolar_queue = tf.train.string_input_producer(tanpolar_list, shuffle=mode == 'train', seed=2020) 47 | polar_queue = tf.train.string_input_producer(polar_list, shuffle=mode == 'train', seed=2020) 48 | 49 | # aer_queue = tf.data.Dataset.from_tensor_slices(aer_list) 50 | # pano_queue = tf.data.Dataset.from_tensor_slices(pano_list) 51 | # tanpolar_queue = tf.data.Dataset.from_tensor_slices(tanpolar_list) 52 | # polar_queue = tf.data.Dataset.from_tensor_slices(polar_list) 53 | # if mode=='train': 54 | # buffer_size = len(data_list) 55 | # aer_queue = aer_queue.shuffle(buffer_size, seed=2020) 56 | # pano_queue = pano_queue.shuffle(buffer_size, seed=2020) 57 | # tanpolar_queue = tanpolar_queue.shuffle(buffer_size, seed=2020) 58 | # polar_queue = polar_queue.shuffle(buffer_size, seed=2020) 59 | 60 | reader = tf.WholeFileReader() 61 | aer_paths, aer_contents = reader.read(aer_queue) 62 | pano_paths, pano_contents = reader.read(pano_queue) 63 | # mask_paths, mask_contents = reader.read(mask_queue) 64 | tanpolar_paths, tanpolar_contents = reader.read(tanpolar_queue) 65 | polar_paths, polar_contents = reader.read(polar_queue) 66 | 67 | aer = tf.image.decode_jpeg(aer_contents) 68 | panos = tf.image.decode_jpeg(pano_contents) 69 | # mask = tf.image.decode_png(mask_contents) 70 | tanpolar = tf.image.decode_png(tanpolar_contents) 71 | polar = tf.image.decode_png(polar_contents) 72 | 73 | aer = tf.image.convert_image_dtype(aer, tf.float32) 74 | panos = tf.image.convert_image_dtype(panos, tf.float32) 75 | # mask = tf.image.convert_image_dtype(mask, tf.float32) 76 | tanpolar = tf.image.convert_image_dtype(tanpolar, tf.float32) 77 | polar = tf.image.convert_image_dtype(polar, tf.float32) 78 | 79 | aer = preprocess(aer) 80 | panos = preprocess(panos) 81 | # mask = preprocess(mask) 82 | tanpolar = preprocess(tanpolar) 83 | polar = preprocess(polar) 84 | 85 | aer.set_shape([None, None, 3]) 86 | panos.set_shape([None, None, 3]) 87 | # mask.set_shape([None, None, 3]) 88 | tanpolar.set_shape([None, None, 3]) 89 | polar.set_shape([None, None, 3]) 90 | 91 | aer = tf.image.resize_images(aer, [256, 256], method=tf.image.ResizeMethod.AREA) 92 | panos = tf.image.resize_images(panos, [128, 512], method=tf.image.ResizeMethod.AREA) 93 | # mask = tf.image.resize_images(mask, [128, 512], method=tf.image.ResizeMethod.AREA) 94 | # mask = tf.cast(tf.image.resize_images(mask, [128, 512], method=tf.image.ResizeMethod.AREA), tf.int32) 95 | # mask = 0.9 * tf.one_hot(tf.squeeze(mask, axis=-1), depth=4) 96 | tanpolar = tf.image.resize_images(tanpolar, [128, 512], method=tf.image.ResizeMethod.AREA) 97 | polar = tf.image.resize_images(polar, [128, 512], method=tf.image.ResizeMethod.AREA) 98 | 99 | # aer_batch, panos_batch, mask_batch, aer_paths_batch, tanpolar_batch, polar_batch = \ 100 | # tf.train.batch([aer, panos, mask, aer_paths, tanpolar, polar], batch_size=batch_size) 101 | aer_batch, panos_batch, aer_paths_batch, tanpolar_batch, polar_batch = \ 102 | tf.train.batch([aer, panos, aer_paths, tanpolar, polar], batch_size=batch_size) 103 | 104 | steps_per_epoch = int(math.ceil(len(data_list) / batch_size)) 105 | 106 | return Examples( 107 | paths=aer_paths_batch, 108 | aer=aer_batch, 109 | pano=panos_batch, 110 | # mask=mask_batch, 111 | tanpolar=tanpolar_batch, 112 | polar=polar_batch, 113 | count=len(data_list), 114 | steps_per_epoch=steps_per_epoch, 115 | ) 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /evaluation_metrics/metrics_tf.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | # from skimage.measure import compare_ssim, compare_psnr, compare_mse, compare_nrmse 3 | import cv2 4 | 5 | import os 6 | # os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' 7 | # os.environ['CUDA_VISIBLE_DEVICES'] = '1' 8 | import tensorflow.compat.v1 as tf 9 | tf.disable_v2_behavior() 10 | from tensorflow.python.ops import math_ops 11 | 12 | import argparse 13 | 14 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 15 | parser.add_argument('--dir', type=str, default='../scri/GeneratedData/CVUSA/pix2pix_tanpolar_L1Grd_0.0_PerGrd_1.0/') 16 | 17 | opt = parser.parse_args() 18 | 19 | input_dir = opt.dir 20 | 21 | def safe_divide(numerator, denominator, name='safe_divide'): 22 | 23 | return tf.where(math_ops.greater(denominator, 0), 24 | math_ops.divide(numerator, denominator), 25 | tf.zeros_like(numerator), name=name) 26 | 27 | 28 | def RMSE(input, target): 29 | return tf.sqrt(tf.reduce_mean((input - target)**2, axis=(1, 2, 3))) 30 | 31 | 32 | def SharpDiff(inputs, targets): 33 | ''' 34 | :param inputs: shape = [batch, height, width, channel] 35 | :param target: shape = [batch, height, width, channel] 36 | :param eps: 37 | :return: 38 | ''' 39 | s = inputs.get_shape().as_list() 40 | gradx_in, grady_in = tf.image.image_gradients(inputs) 41 | gradx_ta, grady_ta = tf.image.image_gradients(targets) 42 | diff_gradients = tf.abs(gradx_in - gradx_ta)[:, 1: s[1]-1, 1: s[2]-1, :] + tf.abs(grady_in - grady_ta)[:, 1: s[1]-1, 1: s[2]-1, :] 43 | prediction_error = 64* tf.reduce_mean(diff_gradients, axis=[1, 2, 3]) 44 | 45 | sharpdiff = 10 * tf.log(255.*255./prediction_error)/tf.log(10.) 46 | 47 | return sharpdiff 48 | 49 | 50 | def get_val_id_list(): 51 | val_file = '../../../Data/CVUSA/splits/val-19zl.csv' 52 | 53 | id_list = [] 54 | with open(val_file, 'r') as f: 55 | for line in f: 56 | data = line.split(',') 57 | pano_id = (data[0].split('/')[-1]).split('.')[0] 58 | id_list.append(pano_id) 59 | 60 | return id_list 61 | 62 | 63 | def input_data_generator(input_dir, target_dir='../../../Data/CVUSA/streetview/targets/1/', batch_size=1): 64 | id_list = get_val_id_list() 65 | 66 | num_batches = len(id_list)//batch_size 67 | 68 | for i in range(num_batches + 1): 69 | 70 | input_list = [] 71 | target_list = [] 72 | 73 | img_num_per_batch = batch_size if i [-1, 1] 15 | return image * 2 - 1 16 | 17 | 18 | def load_examples(mode='train', batch_size=2): 19 | 20 | # allDataList = '../OriNet_CVACT/CVACT_orientations/ACT_data.mat' 21 | img_root = '../../../Data/CVACT/' 22 | allDataList = os.path.join(img_root, 'ACT_data.mat') 23 | 24 | exist_aer_list = os.listdir(img_root + 'satview_correct') 25 | exist_grd_list = os.listdir(img_root + 'streetview') 26 | 27 | __cur_allid = 0 # for training 28 | 29 | # load the mat 30 | anuData = sio.loadmat(allDataList) 31 | 32 | data_list = [] 33 | for i in range(0, len(anuData['panoIds'])): 34 | # grd_id_align = img_root + 'streetview/' + anuData['panoIds'][i] + '_grdView.png' 35 | # sat_id_ori = img_root + 'satview_polish/' + anuData['panoIds'][i] + '_satView_polish.png' 36 | grd_id_align = anuData['panoIds'][i] + '_grdView.png' 37 | sat_id_ori = anuData['panoIds'][i] + '_satView_polish.png' 38 | data_list.append([grd_id_align, sat_id_ori]) 39 | 40 | if mode=='train': 41 | training_inds = anuData['trainSet']['trainInd'][0][0] - 1 42 | trainNum = len(training_inds) 43 | trainList = [] 44 | for k in range(trainNum): 45 | trainList.append(data_list[training_inds[k][0]]) 46 | pano_list = [img_root + 'streetview/' + item[0] for item in trainList if item[0] in exist_grd_list and item[1] in exist_aer_list] 47 | aer_list = [img_root + 'satview_correct/' + item[1] for item in trainList if item[0] in exist_grd_list and item[1] in exist_aer_list] 48 | # pano_seman_list = [img_root + 'streetseman_visualize/' + item[0] for item in trainList if 49 | # item[0] in exist_grd_list and item[1] in exist_aer_list] 50 | tanpolar_list = [img_root + 'a2g_correct/' + item[1] for item in trainList if 51 | item[0] in exist_grd_list and item[1] in exist_aer_list] 52 | polar_list = [img_root + 'polarmap/' + item[1] for item in trainList if 53 | item[0] in exist_grd_list and item[1] in exist_aer_list] 54 | 55 | 56 | else: 57 | 58 | val_inds = anuData['valSet']['valInd'][0][0] - 1 59 | valNum = len(val_inds) 60 | valList = [] 61 | for k in range(valNum): 62 | valList.append(data_list[val_inds[k][0]]) 63 | pano_list = [img_root + 'streetview/' + item[0] for item in valList if item[0] in exist_grd_list and item[1] in exist_aer_list] 64 | aer_list = [img_root + 'satview_correct/' + item[1] for item in valList if item[0] in exist_grd_list and item[1] in exist_aer_list] 65 | # pano_seman_list = [img_root + 'streetseman_visualize/' + item[0] for item in valList if 66 | # item[0] in exist_grd_list and item[1] in exist_aer_list] 67 | # aer_seman_list = [img_root + 'satseman/' + item[1] for item in valList if 68 | # item[0] in exist_grd_list and item[1] in exist_aer_list] 69 | tanpolar_list = [img_root + 'a2g_correct/' + item[1] for item in valList if 70 | item[0] in exist_grd_list and item[1] in exist_aer_list] 71 | polar_list = [img_root + 'polarmap/' + item[1] for item in valList if 72 | item[0] in exist_grd_list and item[1] in exist_aer_list] 73 | 74 | aer_queue = tf.train.string_input_producer(aer_list, shuffle=mode == 'train', seed=2020) 75 | pano_queue = tf.train.string_input_producer(pano_list, shuffle=mode == 'train', seed=2020) 76 | # pano_seman_queue = tf.train.string_input_producer(pano_seman_list, shuffle=mode == 'train', seed=2020) 77 | tanpolar_queue = tf.train.string_input_producer(tanpolar_list, shuffle=mode == 'train', seed=2020) 78 | polar_queue = tf.train.string_input_producer(polar_list, shuffle=mode == 'train', seed=2020) 79 | 80 | reader = tf.WholeFileReader() 81 | aer_paths, aer_contents = reader.read(aer_queue) 82 | pano_paths, pano_contents = reader.read(pano_queue) 83 | # pano_seman_paths, pano_seman_contents = reader.read(pano_seman_queue) 84 | tanpolar_paths, tanpolar_contents = reader.read(tanpolar_queue) 85 | polar_paths, polar_contents = reader.read(polar_queue) 86 | 87 | aer = tf.image.decode_png(aer_contents) 88 | panos = tf.image.decode_png(pano_contents) 89 | # panos_seman = tf.image.decode_png(pano_seman_contents) 90 | tanpolar = tf.image.decode_png(tanpolar_contents) 91 | polar = tf.image.decode_png(polar_contents) 92 | 93 | aer = tf.image.convert_image_dtype(aer, tf.float32) 94 | panos = tf.image.convert_image_dtype(panos, tf.float32) 95 | # panos_seman = tf.image.convert_image_dtype(panos_seman, tf.float32) 96 | tanpolar = tf.image.convert_image_dtype(tanpolar, tf.float32) 97 | polar = tf.image.convert_image_dtype(polar, tf.float32) 98 | 99 | aer = preprocess(aer) 100 | panos = preprocess(panos) 101 | # panos_seman = preprocess(panos_seman) 102 | tanpolar = preprocess(tanpolar) 103 | polar = preprocess(polar) 104 | 105 | aer.set_shape([None, None, 3]) 106 | panos.set_shape([None, None, 3]) 107 | # panos_seman.set_shape([None, None, 3]) 108 | tanpolar.set_shape([None, None, 3]) 109 | polar.set_shape([None, None, 3]) 110 | 111 | aer = tf.image.resize_images(aer, [256, 256], method=tf.image.ResizeMethod.AREA) 112 | panos = tf.image.resize_images(panos, [128, 512], method=tf.image.ResizeMethod.AREA) 113 | # panos_seman = tf.image.resize_images(panos_seman, [128, 512], method=tf.image.ResizeMethod.AREA) 114 | # panos_seman = tf.cast(tf.image.resize_images(panos_seman, [128, 512], method=tf.image.ResizeMethod.AREA), tf.int32) 115 | # panos_seman = tf.one_hot(tf.squeeze(panos_seman, axis=-1), depth=4) 116 | tanpolar = tf.image.resize_images(tanpolar, [128, 512], method=tf.image.ResizeMethod.AREA) 117 | polar = tf.image.resize_images(polar, [128, 512], method=tf.image.ResizeMethod.AREA) 118 | 119 | aer_batch, panos_batch, grd_paths_batch, tanpolar_batch, polar_batch = \ 120 | tf.train.batch([aer, panos, pano_paths, tanpolar, polar], batch_size=batch_size) 121 | 122 | steps_per_epoch = int(math.ceil(len(pano_list) / batch_size)) 123 | 124 | return Examples( 125 | paths=grd_paths_batch, 126 | aer=aer_batch, 127 | pano=panos_batch, 128 | # mask=panos_seman_batch, 129 | tanpolar=tanpolar_batch, 130 | polar = polar_batch, 131 | count=len(pano_list), 132 | steps_per_epoch=steps_per_epoch, 133 | ) 134 | -------------------------------------------------------------------------------- /load_data/load_data_cvact_unaligned.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import tensorflow.compat.v1 as tf 3 | tf.disable_v2_behavior() 4 | import math 5 | import scipy.io as sio 6 | import os 7 | 8 | # Examples = collections.namedtuple("Examples", "paths, aer, pano, mask, count, steps_per_epoch, tanpolar, polar") 9 | Examples = collections.namedtuple("Examples", "paths, aer, pano, count, steps_per_epoch, tanpolar, polar") 10 | 11 | 12 | def preprocess(image): 13 | with tf.name_scope("preprocess"): 14 | # [0, 1] => [-1, 1] 15 | return image * 2 - 1 16 | 17 | 18 | def load_examples(mode='train', batch_size=2): 19 | 20 | # allDataList = '../OriNet_CVACT/CVACT_orientations/ACT_data.mat' 21 | img_root = '../../../Data/CVACT/' 22 | allDataList = os.path.join(img_root, 'ACT_data.mat') 23 | 24 | exist_aer_list = os.listdir(img_root + 'satview_correct') 25 | exist_grd_list = os.listdir(img_root + 'streetview') 26 | 27 | __cur_allid = 0 # for training 28 | 29 | # load the mat 30 | anuData = sio.loadmat(allDataList) 31 | 32 | data_list = [] 33 | for i in range(0, len(anuData['panoIds'])): 34 | # grd_id_align = img_root + 'streetview/' + anuData['panoIds'][i] + '_grdView.png' 35 | # sat_id_ori = img_root + 'satview_polish/' + anuData['panoIds'][i] + '_satView_polish.png' 36 | grd_id_align = anuData['panoIds'][i] + '_grdView.png' 37 | sat_id_ori = anuData['panoIds'][i] + '_satView_polish.png' 38 | data_list.append([grd_id_align, sat_id_ori]) 39 | 40 | if mode=='train': 41 | training_inds = anuData['trainSet']['trainInd'][0][0] - 1 42 | trainNum = len(training_inds) 43 | trainList = [] 44 | for k in range(trainNum): 45 | trainList.append(data_list[training_inds[k][0]]) 46 | pano_list = [img_root + 'streetview/' + item[0] for item in trainList if item[0] in exist_grd_list and item[1] in exist_aer_list] 47 | aer_list = [img_root + 'satview_correct/' + item[1] for item in trainList if item[0] in exist_grd_list and item[1] in exist_aer_list] 48 | # pano_seman_list = [img_root + 'streetseman_visualize/' + item[0] for item in trainList if 49 | # item[0] in exist_grd_list and item[1] in exist_aer_list] 50 | tanpolar_list = [img_root + 'a2g_correct/' + item[1] for item in trainList if 51 | item[0] in exist_grd_list and item[1] in exist_aer_list] 52 | polar_list = [img_root + 'polarmap/' + item[1] for item in trainList if 53 | item[0] in exist_grd_list and item[1] in exist_aer_list] 54 | 55 | 56 | else: 57 | 58 | val_inds = anuData['valSet']['valInd'][0][0] - 1 59 | valNum = len(val_inds) 60 | valList = [] 61 | for k in range(valNum): 62 | valList.append(data_list[val_inds[k][0]]) 63 | pano_list = [img_root + 'streetview/' + item[0] for item in valList if item[0] in exist_grd_list and item[1] in exist_aer_list] 64 | aer_list = [img_root + 'satview_polish/' + item[1] for item in valList if item[0] in exist_grd_list and item[1] in exist_aer_list] 65 | # pano_seman_list = [img_root + 'streetseman_visualize/' + item[0] for item in valList if 66 | # item[0] in exist_grd_list and item[1] in exist_aer_list] 67 | # aer_seman_list = [img_root + 'satseman/' + item[1] for item in valList if 68 | # item[0] in exist_grd_list and item[1] in exist_aer_list] 69 | tanpolar_list = [img_root + 'a2g_origin/' + item[1] for item in valList if 70 | item[0] in exist_grd_list and item[1] in exist_aer_list] 71 | polar_list = [img_root + 'polarmap/' + item[1] for item in valList if 72 | item[0] in exist_grd_list and item[1] in exist_aer_list] 73 | 74 | aer_queue = tf.train.string_input_producer(aer_list, shuffle=mode == 'train', seed=2020) 75 | pano_queue = tf.train.string_input_producer(pano_list, shuffle=mode == 'train', seed=2020) 76 | # pano_seman_queue = tf.train.string_input_producer(pano_seman_list, shuffle=mode == 'train', seed=2020) 77 | tanpolar_queue = tf.train.string_input_producer(tanpolar_list, shuffle=mode == 'train', seed=2020) 78 | polar_queue = tf.train.string_input_producer(polar_list, shuffle=mode == 'train', seed=2020) 79 | 80 | reader = tf.WholeFileReader() 81 | aer_paths, aer_contents = reader.read(aer_queue) 82 | pano_paths, pano_contents = reader.read(pano_queue) 83 | # pano_seman_paths, pano_seman_contents = reader.read(pano_seman_queue) 84 | tanpolar_paths, tanpolar_contents = reader.read(tanpolar_queue) 85 | polar_paths, polar_contents = reader.read(polar_queue) 86 | 87 | aer = tf.image.decode_png(aer_contents) 88 | panos = tf.image.decode_png(pano_contents) 89 | # panos_seman = tf.image.decode_png(pano_seman_contents) 90 | tanpolar = tf.image.decode_png(tanpolar_contents) 91 | polar = tf.image.decode_png(polar_contents) 92 | 93 | aer = tf.image.convert_image_dtype(aer, tf.float32) 94 | panos = tf.image.convert_image_dtype(panos, tf.float32) 95 | # panos_seman = tf.image.convert_image_dtype(panos_seman, tf.float32) 96 | tanpolar = tf.image.convert_image_dtype(tanpolar, tf.float32) 97 | polar = tf.image.convert_image_dtype(polar, tf.float32) 98 | 99 | aer = preprocess(aer) 100 | panos = preprocess(panos) 101 | # panos_seman = preprocess(panos_seman) 102 | tanpolar = preprocess(tanpolar) 103 | polar = preprocess(polar) 104 | 105 | aer.set_shape([None, None, 3]) 106 | panos.set_shape([None, None, 3]) 107 | # panos_seman.set_shape([None, None, 3]) 108 | tanpolar.set_shape([None, None, 3]) 109 | polar.set_shape([None, None, 3]) 110 | 111 | aer = tf.image.resize_images(aer, [256, 256], method=tf.image.ResizeMethod.AREA) 112 | panos = tf.image.resize_images(panos, [128, 512], method=tf.image.ResizeMethod.AREA) 113 | # panos_seman = tf.image.resize_images(panos_seman, [128, 512], method=tf.image.ResizeMethod.AREA) 114 | # panos_seman = tf.cast(tf.image.resize_images(panos_seman, [128, 512], method=tf.image.ResizeMethod.AREA), tf.int32) 115 | # panos_seman = tf.one_hot(tf.squeeze(panos_seman, axis=-1), depth=4) 116 | tanpolar = tf.image.resize_images(tanpolar, [128, 512], method=tf.image.ResizeMethod.AREA) 117 | polar = tf.image.resize_images(polar, [128, 512], method=tf.image.ResizeMethod.AREA) 118 | 119 | aer_batch, panos_batch, grd_paths_batch, tanpolar_batch, polar_batch = \ 120 | tf.train.batch([aer, panos, pano_paths, tanpolar, polar], batch_size=batch_size) 121 | 122 | steps_per_epoch = int(math.ceil(len(pano_list) / batch_size)) 123 | 124 | return Examples( 125 | paths=grd_paths_batch, 126 | aer=aer_batch, 127 | pano=panos_batch, 128 | # mask=panos_seman_batch, 129 | tanpolar=tanpolar_batch, 130 | polar = polar_batch, 131 | count=len(pano_list), 132 | steps_per_epoch=steps_per_epoch, 133 | ) 134 | -------------------------------------------------------------------------------- /load_data/load_data_cvact_half.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import tensorflow.compat.v1 as tf 3 | tf.disable_v2_behavior() 4 | import math 5 | import scipy.io as sio 6 | import os 7 | 8 | Examples = collections.namedtuple("Examples", "paths, aer, pano, mask, count, steps_per_epoch, tanpolar, polar") 9 | 10 | 11 | def preprocess(image): 12 | with tf.name_scope("preprocess"): 13 | # [0, 1] => [-1, 1] 14 | return image * 2 - 1 15 | 16 | 17 | def load_examples(mode='train', batch_size=2): 18 | 19 | allDataList = '../OriNet_CVACT/CVACT_orientations/ACT_data.mat' 20 | img_root = '../../Data/ANU_data_small/' 21 | 22 | exist_aer_list = os.listdir(img_root + 'satview_polish') 23 | exist_grd_list = os.listdir(img_root + 'streetview') 24 | 25 | __cur_allid = 0 # for training 26 | 27 | # load the mat 28 | anuData = sio.loadmat(allDataList) 29 | 30 | data_list = [] 31 | for i in range(0, len(anuData['panoIds'])): 32 | # grd_id_align = img_root + 'streetview/' + anuData['panoIds'][i] + '_grdView.png' 33 | # sat_id_ori = img_root + 'satview_polish/' + anuData['panoIds'][i] + '_satView_polish.png' 34 | grd_id_align = anuData['panoIds'][i] + '_grdView.png' 35 | sat_id_ori = anuData['panoIds'][i] + '_satView_polish.png' 36 | data_list.append([grd_id_align, sat_id_ori]) 37 | 38 | if mode=='train': 39 | training_inds = anuData['trainSet']['trainInd'][0][0] - 1 40 | trainNum = len(training_inds) 41 | trainList = [] 42 | for k in range(trainNum): 43 | trainList.append(data_list[training_inds[k][0]]) 44 | pano_list = [img_root + 'streetview/' + item[0] for item in trainList if item[0] in exist_grd_list and item[1] in exist_aer_list] 45 | aer_list = [img_root + 'satview_polish/' + item[1] for item in trainList if item[0] in exist_grd_list and item[1] in exist_aer_list] 46 | pano_seman_list = [img_root + 'streetseman_visualize/' + item[0] for item in trainList if 47 | item[0] in exist_grd_list and item[1] in exist_aer_list] 48 | aer_seman_list = [img_root + 'satseman/' + item[1] for item in trainList if 49 | item[0] in exist_grd_list and item[1] in exist_aer_list] 50 | tanpolar_list = [img_root + 'tanpolarmap/' + item[1] for item in trainList if 51 | item[0] in exist_grd_list and item[1] in exist_aer_list] 52 | polar_list = [img_root + 'polarmap/' + item[1] for item in trainList if 53 | item[0] in exist_grd_list and item[1] in exist_aer_list] 54 | 55 | 56 | else: 57 | 58 | val_inds = anuData['valSet']['valInd'][0][0] - 1 59 | valNum = len(val_inds) 60 | valList = [] 61 | for k in range(valNum): 62 | valList.append(data_list[val_inds[k][0]]) 63 | pano_list = [img_root + 'streetview/' + item[0] for item in valList if item[0] in exist_grd_list and item[1] in exist_aer_list] 64 | aer_list = [img_root + 'satview_polish/' + item[1] for item in valList if item[0] in exist_grd_list and item[1] in exist_aer_list] 65 | pano_seman_list = [img_root + 'streetseman_visualize/' + item[0] for item in valList if 66 | item[0] in exist_grd_list and item[1] in exist_aer_list] 67 | aer_seman_list = [img_root + 'satseman/' + item[1] for item in valList if 68 | item[0] in exist_grd_list and item[1] in exist_aer_list] 69 | tanpolar_list = [img_root + 'tanpolarmap/' + item[1] for item in valList if 70 | item[0] in exist_grd_list and item[1] in exist_aer_list] 71 | polar_list = [img_root + 'polarmap/' + item[1] for item in valList if 72 | item[0] in exist_grd_list and item[1] in exist_aer_list] 73 | 74 | aer_queue = tf.train.string_input_producer(aer_list, shuffle=mode == 'train', seed=2020) 75 | pano_queue = tf.train.string_input_producer(pano_list, shuffle=mode == 'train', seed=2020) 76 | pano_seman_queue = tf.train.string_input_producer(pano_seman_list, shuffle=mode == 'train', seed=2020) 77 | tanpolar_queue = tf.train.string_input_producer(tanpolar_list, shuffle=mode == 'train', seed=2020) 78 | polar_queue = tf.train.string_input_producer(polar_list, shuffle=mode == 'train', seed=2020) 79 | 80 | reader = tf.WholeFileReader() 81 | aer_paths, aer_contents = reader.read(aer_queue) 82 | pano_paths, pano_contents = reader.read(pano_queue) 83 | pano_seman_paths, pano_seman_contents = reader.read(pano_seman_queue) 84 | tanpolar_paths, tanpolar_contents = reader.read(tanpolar_queue) 85 | polar_paths, polar_contents = reader.read(polar_queue) 86 | 87 | aer = tf.image.decode_png(aer_contents) 88 | panos = tf.image.decode_png(pano_contents) 89 | panos_seman = tf.image.decode_png(pano_seman_contents) 90 | tanpolar = tf.image.decode_png(tanpolar_contents) 91 | polar = tf.image.decode_png(polar_contents) 92 | 93 | aer = tf.image.convert_image_dtype(aer, tf.float32) 94 | panos = tf.image.convert_image_dtype(panos, tf.float32) 95 | panos_seman = tf.image.convert_image_dtype(panos_seman, tf.float32) 96 | tanpolar = tf.image.convert_image_dtype(tanpolar, tf.float32) 97 | polar = tf.image.convert_image_dtype(polar, tf.float32) 98 | 99 | aer = preprocess(aer) 100 | panos = preprocess(panos) 101 | panos_seman = preprocess(panos_seman) 102 | tanpolar = preprocess(tanpolar) 103 | polar = preprocess(polar) 104 | 105 | aer.set_shape([None, None, 3]) 106 | panos.set_shape([None, None, 3]) 107 | panos_seman.set_shape([None, None, 3]) 108 | tanpolar.set_shape([None, None, 3]) 109 | polar.set_shape([None, None, 3]) 110 | 111 | aer = tf.image.resize_images(aer, [256, 256], method=tf.image.ResizeMethod.AREA) 112 | panos = tf.image.resize_images(panos, [128, 512], method=tf.image.ResizeMethod.AREA) 113 | panos_seman = tf.image.resize_images(panos_seman, [128, 512], method=tf.image.ResizeMethod.AREA) 114 | # panos_seman = tf.cast(tf.image.resize_images(panos_seman, [128, 512], method=tf.image.ResizeMethod.AREA), tf.int32) 115 | # panos_seman = tf.one_hot(tf.squeeze(panos_seman, axis=-1), depth=4) 116 | tanpolar = tf.image.resize_images(tanpolar, [128, 512], method=tf.image.ResizeMethod.AREA) 117 | polar = tf.image.resize_images(polar, [128, 512], method=tf.image.ResizeMethod.AREA) 118 | 119 | aer_batch, panos_batch, panos_seman_batch, aer_paths_batch, tanpolar_batch, polar_batch = \ 120 | tf.train.batch([aer, panos, panos_seman, aer_paths, tanpolar, polar], batch_size=batch_size) 121 | 122 | steps_per_epoch = int(math.ceil(len(pano_list) / batch_size)) 123 | 124 | return Examples( 125 | paths=aer_paths_batch, 126 | aer=aer_batch, 127 | pano=panos_batch, 128 | mask=panos_seman_batch, 129 | tanpolar=tanpolar_batch, 130 | polar = polar_batch, 131 | count=len(pano_list), 132 | steps_per_epoch=steps_per_epoch, 133 | ) 134 | 135 | -------------------------------------------------------------------------------- /script3/model22.py: -------------------------------------------------------------------------------- 1 | import tensorflow.compat.v1 as tf 2 | 3 | import collections 4 | from VGG.perceptual_loss import perceptual_loss 5 | from geometry.Geometry import * 6 | from geometry.projector import * 7 | from geometry.utils import * 8 | 9 | 10 | EPS = 1e-7 11 | 12 | target_height = 128 13 | target_width = 512 14 | aer_size = 256 15 | grd_height = -2 16 | max_height = 30 17 | 18 | Model = collections.namedtuple("Model", "outputs, predict_real, predict_fake, discrim_loss, discrim_grads_and_vars, " 19 | "estimated_height, generator_inputs," 20 | "gen_loss_GAN, gen_loss_L1, gen_loss_perceptual, gen_grads_and_vars, train") 21 | 22 | 23 | def create_generator(generator_inputs, ref_images, a): 24 | 25 | generator_outputs_channels = 3 26 | 27 | ngf = a.ngf 28 | layers = [] 29 | 30 | # encoder_1: [batch, 512, 512, in_channels] => [batch, 256, 256, ngf] 31 | with tf.variable_scope("encoder_1"): 32 | output = gen_conv(generator_inputs, ngf) 33 | layers.append(output) 34 | 35 | layer_specs = [ 36 | ngf * 2, # encoder_2: [batch, 256, 256, ngf] => [batch, 128, 128, ngf * 2] 37 | ngf * 4, # encoder_3: [batch, 128, 128, ngf * 2] => [batch, 64, 64, ngf * 4] 38 | ngf * 8, # encoder_4: [batch, 64, 64, ngf * 4] => [batch, 32, 32, ngf * 8] 39 | ngf * 8, # encoder_5: [batch, 32, 32, ngf * 8] => [batch, 16, 16, ngf * 8] 40 | ngf * 8, # encoder_6: [batch, 16, 16, ngf * 8] => [batch, 8, 8, ngf * 8] 41 | ngf * 8, # encoder_7: [batch, 8, 8, ngf * 8] => [batch, 4, 4, ngf * 8] 42 | # ngf * 8, # encoder_8: [batch, 4, 4, ngf * 8] => [batch, 2, 2, ngf * 8] 43 | ] 44 | 45 | for out_channels in layer_specs: 46 | with tf.variable_scope("encoder_%d" % (len(layers) + 1)): 47 | rectified = lrelu(layers[-1], 0.2) 48 | # [batch, in_height, in_width, in_channels] => [batch, in_height/2, in_width/2, out_channels] 49 | convolved = gen_conv(rectified, out_channels) 50 | output = batchnorm(convolved) 51 | layers.append(output) 52 | 53 | bottleneck = tf.reshape(output, [-1, 1, 4, output.get_shape().as_list()[-1]]) 54 | layers.append(bottleneck) 55 | 56 | layer_specs = [ 57 | # (ngf * 8, 0.5), # decoder_8: [batch, 1, 4, ngf * 8] => [batch, 2, 8, ngf * 8 * 2] 58 | (ngf * 8, 0.5), # decoder_7: [batch, 2, 8, ngf * 8 * 2] => [batch, 4, 16, ngf * 8 * 2] 59 | (ngf * 8, 0.5), # decoder_6: [batch, 4, 16, ngf * 8 * 2] => [batch, 8, 32, ngf * 8 * 2] 60 | (ngf * 8, 0.0), # decoder_5: [batch, 8, 32, ngf * 8 * 2] => [batch, 16, 64, ngf * 8 * 2] 61 | (ngf * 4, 0.0), # decoder_4: [batch, 16, 64, ngf * 8 * 2] => [batch, 32, 128, ngf * 4 * 2] 62 | (ngf * 2, 0.0), # decoder_3: [batch, 32, 128, ngf * 4 * 2] => [batch, 64, 256, ngf * 2 * 2] 63 | (ngf, 0.0), # decoder_2: [batch, 64, 256, ngf * 2 * 2] => [batch, 128, 512, ngf * 2 * 2] 64 | ] 65 | 66 | num_encoder_layers = len(layers) 67 | for decoder_layer, (out_channels, dropout) in enumerate(layer_specs): 68 | skip_layer = num_encoder_layers - decoder_layer - 1 69 | with tf.variable_scope("decoder_%d" % (skip_layer + 1)): 70 | # if decoder_layer == 0: 71 | # # first decoder layer doesn't have skip connections 72 | # # since it is directly connected to the skip_layer 73 | # input = layers[-1] 74 | # else: 75 | # input = tf.concat([layers[-1], layers[skip_layer]], axis=3) 76 | 77 | rectified = tf.nn.relu(layers[-1]) 78 | # [batch, in_height, in_width, in_channels] => [batch, in_height*2, in_width*2, out_channels] 79 | output = gen_deconv(rectified, out_channels) 80 | output = batchnorm(output) 81 | 82 | if dropout > 0.0: 83 | output = tf.nn.dropout(output, keep_prob=1 - dropout) 84 | 85 | layers.append(output) 86 | 87 | # decoder_1: [batch, 128, 512, ngf * 2] => [batch, 256, 1024, generator_outputs_channels] 88 | with tf.variable_scope("decoder_1"): 89 | # input = tf.concat([layers[-1], layers[0]], axis=3) 90 | rectified = tf.nn.relu(layers[-1]) 91 | output = gen_deconv(rectified, generator_outputs_channels) 92 | output = tf.tanh(output) 93 | layers.append(output) 94 | 95 | outputs_grd = layers[-1] 96 | 97 | return outputs_grd 98 | 99 | 100 | 101 | def create_discriminator(discrim_inputs, ndf=64): 102 | n_layers = 3 103 | layers = [] 104 | 105 | # layer_1: [batch, 256, 256, in_channels * 2] => [batch, 128, 128, ndf] 106 | with tf.variable_scope("layer_1"): 107 | convolved = discrim_conv(discrim_inputs, ndf, stride=2) 108 | rectified = lrelu(convolved, 0.2) 109 | layers.append(rectified) 110 | 111 | # layer_2: [batch, 128, 128, ndf] => [batch, 64, 64, ndf * 2] 112 | # layer_3: [batch, 64, 64, ndf * 2] => [batch, 32, 32, ndf * 4] 113 | # layer_4: [batch, 32, 32, ndf * 4] => [batch, 31, 31, ndf * 8] 114 | for i in range(n_layers): 115 | with tf.variable_scope("layer_%d" % (len(layers) + 1)): 116 | out_channels = ndf * min(2**(i+1), 8) 117 | stride = 1 if i == n_layers - 1 else 2 # last layer here has stride 1 118 | convolved = discrim_conv(layers[-1], out_channels, stride=stride) 119 | normalized = batchnorm(convolved) 120 | rectified = lrelu(normalized, 0.2) 121 | layers.append(rectified) 122 | 123 | # layer_5: [batch, 31, 31, ndf * 8] => [batch, 30, 30, 1] 124 | with tf.variable_scope("layer_%d" % (len(layers) + 1)): 125 | convolved = discrim_conv(rectified, out_channels=1, stride=1) 126 | output = tf.sigmoid(convolved) 127 | layers.append(output) 128 | 129 | return layers[-1] 130 | 131 | 132 | def create_model(inputs, targets, ref_images, a): 133 | 134 | with tf.variable_scope("generator"): 135 | 136 | batch, height, width, channel = tf_shape(inputs, rank=4) 137 | estimated_height = tf.ones([batch, height, width, a.heightPlaneNum])/a.heightPlaneNum 138 | 139 | generator_inputs = geometry_transform(inputs, estimated_height, target_height, target_width, 140 | a.height_mode, grd_height, max_height, a.method, a.geoout_type, a.dataset) 141 | 142 | outputs_grd = create_generator(generator_inputs, ref_images, a) 143 | 144 | with tf.name_scope("real_discriminator_grd"): 145 | with tf.variable_scope("discriminator_grd"): 146 | # 2x [batch, height, width, channels] => [batch, 30, 30, 1] 147 | predict_real_grd = create_discriminator(targets) 148 | 149 | with tf.name_scope("fake_discriminator_grd"): 150 | with tf.variable_scope("discriminator_grd", reuse=True): 151 | # 2x [batch, height, width, channels] => [batch, 30, 30, 1] 152 | predict_fake_grd = create_discriminator(outputs_grd) 153 | 154 | 155 | with tf.name_scope("discriminator_loss"): 156 | # minimizing -tf.log will try to get inputs to 1 157 | # predict_real => 1 158 | # predict_fake => 0 159 | discrim_loss = 0.5 * (tf.reduce_mean(-(tf.log(predict_real_grd + EPS) + tf.log(1 - predict_fake_grd + EPS)))) 160 | 161 | with tf.name_scope("generator_loss"): 162 | # predict_fake => 1 163 | # abs(targets - outputs) => 0 164 | gen_loss_GAN_grd = tf.reduce_mean(-tf.log(predict_fake_grd + EPS)) 165 | gen_loss_L1_grd = tf.reduce_mean(tf.abs(targets - outputs_grd)) 166 | gen_loss_perceptual_grd = perceptual_loss(targets, outputs_grd) 167 | 168 | gen_loss = gen_loss_GAN_grd * a.gan_weight + \ 169 | gen_loss_perceptual_grd * a.perceptual_weight_grd + \ 170 | gen_loss_L1_grd * a.l1_weight_grd 171 | 172 | with tf.name_scope("discriminator_train"): 173 | discrim_tvars = [var for var in tf.trainable_variables() if var.name.startswith("discriminator")] 174 | discrim_optim = tf.train.AdamOptimizer(a.lr, a.beta1) 175 | discrim_grads_and_vars = discrim_optim.compute_gradients(discrim_loss, var_list=discrim_tvars) 176 | discrim_train = discrim_optim.apply_gradients(discrim_grads_and_vars) 177 | 178 | with tf.name_scope("generator_train"): 179 | with tf.control_dependencies([discrim_train]): 180 | gen_tvars = [var for var in tf.trainable_variables() if var.name.startswith("generator")] 181 | gen_optim = tf.train.AdamOptimizer(a.lr, a.beta1) 182 | gen_grads_and_vars = gen_optim.compute_gradients(gen_loss, var_list=gen_tvars) 183 | gen_train = gen_optim.apply_gradients(gen_grads_and_vars) 184 | 185 | ema = tf.train.ExponentialMovingAverage(decay=0.99) 186 | update_losses = ema.apply([discrim_loss, gen_loss_GAN_grd, gen_loss_L1_grd, gen_loss_perceptual_grd]) 187 | 188 | global_step = tf.train.get_or_create_global_step() 189 | incr_global_step = tf.assign(global_step, global_step+1) 190 | 191 | return Model( 192 | predict_real=predict_real_grd, 193 | predict_fake=predict_fake_grd, 194 | discrim_loss=ema.average(discrim_loss), 195 | discrim_grads_and_vars=discrim_grads_and_vars, 196 | gen_loss_GAN=ema.average(gen_loss_GAN_grd), 197 | gen_loss_L1=ema.average(gen_loss_L1_grd), 198 | gen_loss_perceptual=ema.average(gen_loss_perceptual_grd), 199 | gen_grads_and_vars=gen_grads_and_vars, 200 | estimated_height=tf.argmax(estimated_height, axis=-1), 201 | generator_inputs=generator_inputs, 202 | outputs=outputs_grd, 203 | train=tf.group(update_losses, incr_global_step, gen_train), 204 | ) 205 | 206 | -------------------------------------------------------------------------------- /script3/model23.py: -------------------------------------------------------------------------------- 1 | import tensorflow.compat.v1 as tf 2 | 3 | import collections 4 | from VGG.perceptual_loss import perceptual_loss 5 | from geometry.Geometry import * 6 | from geometry.projector import * 7 | from geometry.utils import * 8 | 9 | 10 | EPS = 1e-7 11 | 12 | target_height = 128 13 | target_width = 512 14 | aer_size = 256 15 | grd_height = -2 16 | max_height = 30 17 | 18 | Model = collections.namedtuple("Model", "outputs, predict_real, predict_fake, discrim_loss, discrim_grads_and_vars, " 19 | "estimated_height, generator_inputs," 20 | "gen_loss_GAN, gen_loss_L1, gen_loss_perceptual, gen_grads_and_vars, train") 21 | 22 | def create_generator(generator_inputs, ref_images, a): 23 | 24 | generator_outputs_channels = 3 25 | 26 | ngf = a.ngf 27 | layers = [] 28 | 29 | # encoder_1: [batch, 512, 512, in_channels] => [batch, 256, 256, ngf] 30 | with tf.variable_scope("encoder_1"): 31 | output = gen_conv(generator_inputs, ngf) 32 | layers.append(output) 33 | 34 | layer_specs = [ 35 | ngf * 2, # encoder_2: [batch, 256, 256, ngf] => [batch, 128, 128, ngf * 2] 36 | ngf * 4, # encoder_3: [batch, 128, 128, ngf * 2] => [batch, 64, 64, ngf * 4] 37 | ngf * 8, # encoder_4: [batch, 64, 64, ngf * 4] => [batch, 32, 32, ngf * 8] 38 | ngf * 8, # encoder_5: [batch, 32, 32, ngf * 8] => [batch, 16, 16, ngf * 8] 39 | ngf * 8, # encoder_6: [batch, 16, 16, ngf * 8] => [batch, 8, 8, ngf * 8] 40 | ngf * 8, # encoder_7: [batch, 8, 8, ngf * 8] => [batch, 4, 4, ngf * 8] 41 | # ngf * 8, # encoder_8: [batch, 4, 4, ngf * 8] => [batch, 2, 2, ngf * 8] 42 | ] 43 | 44 | for out_channels in layer_specs: 45 | with tf.variable_scope("encoder_%d" % (len(layers) + 1)): 46 | rectified = lrelu(layers[-1], 0.2) 47 | # [batch, in_height, in_width, in_channels] => [batch, in_height/2, in_width/2, out_channels] 48 | convolved = gen_conv(rectified, out_channels) 49 | output = batchnorm(convolved) 50 | layers.append(output) 51 | 52 | bottleneck = tf.reshape(output, [-1, 1, 4, output.get_shape().as_list()[-1]]) 53 | layers.append(bottleneck) 54 | 55 | layer_specs = [ 56 | # (ngf * 8, 0.5), # decoder_8: [batch, 1, 4, ngf * 8] => [batch, 2, 8, ngf * 8 * 2] 57 | (ngf * 8, 0.5), # decoder_7: [batch, 2, 8, ngf * 8 * 2] => [batch, 4, 16, ngf * 8 * 2] 58 | (ngf * 8, 0.5), # decoder_6: [batch, 4, 16, ngf * 8 * 2] => [batch, 8, 32, ngf * 8 * 2] 59 | (ngf * 8, 0.0), # decoder_5: [batch, 8, 32, ngf * 8 * 2] => [batch, 16, 64, ngf * 8 * 2] 60 | (ngf * 4, 0.0), # decoder_4: [batch, 16, 64, ngf * 8 * 2] => [batch, 32, 128, ngf * 4 * 2] 61 | (ngf * 2, 0.0), # decoder_3: [batch, 32, 128, ngf * 4 * 2] => [batch, 64, 256, ngf * 2 * 2] 62 | (ngf, 0.0), # decoder_2: [batch, 64, 256, ngf * 2 * 2] => [batch, 128, 512, ngf * 2 * 2] 63 | ] 64 | 65 | num_encoder_layers = len(layers) 66 | for decoder_layer, (out_channels, dropout) in enumerate(layer_specs): 67 | skip_layer = num_encoder_layers - decoder_layer - 1 68 | with tf.variable_scope("decoder_%d" % (skip_layer + 1)): 69 | if decoder_layer == 0: 70 | # first decoder layer doesn't have skip connections 71 | # since it is directly connected to the skip_layer 72 | input = layers[-1] 73 | else: 74 | batch, height, width, channel = tf_shape(layers[-1], rank=4) 75 | 76 | input = tf.concat([layers[-1], tf.reshape(layers[skip_layer - 1], [batch, height, width, channel])], 77 | axis=3) 78 | 79 | rectified = tf.nn.relu(input) 80 | # [batch, in_height, in_width, in_channels] => [batch, in_height*2, in_width*2, out_channels] 81 | output = gen_deconv(rectified, out_channels) 82 | output = batchnorm(output) 83 | 84 | if dropout > 0.0: 85 | output = tf.nn.dropout(output, keep_prob=1 - dropout) 86 | 87 | layers.append(output) 88 | 89 | # decoder_1: [batch, 128, 512, ngf * 2] => [batch, 256, 1024, generator_outputs_channels] 90 | with tf.variable_scope("decoder_1"): 91 | # input = tf.concat([layers[-1], layers[0]], axis=3) 92 | rectified = tf.nn.relu(layers[-1]) 93 | output = gen_deconv(rectified, generator_outputs_channels) 94 | output = tf.tanh(output) 95 | layers.append(output) 96 | 97 | outputs_grd = layers[-1] 98 | 99 | return outputs_grd 100 | 101 | 102 | def create_discriminator(discrim_inputs, ndf=64): 103 | n_layers = 3 104 | layers = [] 105 | 106 | # layer_1: [batch, 256, 256, in_channels * 2] => [batch, 128, 128, ndf] 107 | with tf.variable_scope("layer_1"): 108 | convolved = discrim_conv(discrim_inputs, ndf, stride=2) 109 | rectified = lrelu(convolved, 0.2) 110 | layers.append(rectified) 111 | 112 | # layer_2: [batch, 128, 128, ndf] => [batch, 64, 64, ndf * 2] 113 | # layer_3: [batch, 64, 64, ndf * 2] => [batch, 32, 32, ndf * 4] 114 | # layer_4: [batch, 32, 32, ndf * 4] => [batch, 31, 31, ndf * 8] 115 | for i in range(n_layers): 116 | with tf.variable_scope("layer_%d" % (len(layers) + 1)): 117 | out_channels = ndf * min(2**(i+1), 8) 118 | stride = 1 if i == n_layers - 1 else 2 # last layer here has stride 1 119 | convolved = discrim_conv(layers[-1], out_channels, stride=stride) 120 | normalized = batchnorm(convolved) 121 | rectified = lrelu(normalized, 0.2) 122 | layers.append(rectified) 123 | 124 | # layer_5: [batch, 31, 31, ndf * 8] => [batch, 30, 30, 1] 125 | with tf.variable_scope("layer_%d" % (len(layers) + 1)): 126 | convolved = discrim_conv(rectified, out_channels=1, stride=1) 127 | output = tf.sigmoid(convolved) 128 | layers.append(output) 129 | 130 | return layers[-1] 131 | 132 | 133 | def create_model(inputs, targets, ref_images, a): 134 | 135 | with tf.variable_scope("generator"): 136 | 137 | with tf.variable_scope('height_estimation'): 138 | estimated_height = encoder_decoder(inputs, generator_outputs_channels=a.heightPlaneNum, ngf=4, 139 | activational_layer=tf.nn.softmax) 140 | estimated_height = softargmax(estimated_height) 141 | # print("*******************************",estimated_height.get_shape().as_list()) 142 | 143 | generator_inputs = tf.concat([inputs, estimated_height], axis=-1) 144 | 145 | outputs_grd = create_generator(generator_inputs, ref_images, a) 146 | 147 | with tf.name_scope("real_discriminator_grd"): 148 | with tf.variable_scope("discriminator_grd"): 149 | # 2x [batch, height, width, channels] => [batch, 30, 30, 1] 150 | predict_real_grd = create_discriminator(targets) 151 | 152 | with tf.name_scope("fake_discriminator_grd"): 153 | with tf.variable_scope("discriminator_grd", reuse=True): 154 | # 2x [batch, height, width, channels] => [batch, 30, 30, 1] 155 | predict_fake_grd = create_discriminator(outputs_grd) 156 | 157 | 158 | with tf.name_scope("discriminator_loss"): 159 | # minimizing -tf.log will try to get inputs to 1 160 | # predict_real => 1 161 | # predict_fake => 0 162 | discrim_loss = 0.5 * (tf.reduce_mean(-(tf.log(predict_real_grd + EPS) + tf.log(1 - predict_fake_grd + EPS)))) 163 | 164 | with tf.name_scope("generator_loss"): 165 | # predict_fake => 1 166 | # abs(targets - outputs) => 0 167 | gen_loss_GAN_grd = tf.reduce_mean(-tf.log(predict_fake_grd + EPS)) 168 | gen_loss_L1_grd = tf.reduce_mean(tf.abs(targets - outputs_grd)) 169 | gen_loss_perceptual_grd = perceptual_loss(targets, outputs_grd) 170 | 171 | gen_loss = gen_loss_GAN_grd * a.gan_weight + \ 172 | gen_loss_perceptual_grd * a.perceptual_weight_grd + \ 173 | gen_loss_L1_grd * a.l1_weight_grd 174 | 175 | with tf.name_scope("discriminator_train"): 176 | discrim_tvars = [var for var in tf.trainable_variables() if var.name.startswith("discriminator")] 177 | discrim_optim = tf.train.AdamOptimizer(a.lr, a.beta1) 178 | discrim_grads_and_vars = discrim_optim.compute_gradients(discrim_loss, var_list=discrim_tvars) 179 | discrim_train = discrim_optim.apply_gradients(discrim_grads_and_vars) 180 | 181 | with tf.name_scope("generator_train"): 182 | with tf.control_dependencies([discrim_train]): 183 | gen_tvars = [var for var in tf.trainable_variables() if var.name.startswith("generator")] 184 | gen_optim = tf.train.AdamOptimizer(a.lr, a.beta1) 185 | gen_grads_and_vars = gen_optim.compute_gradients(gen_loss, var_list=gen_tvars) 186 | gen_train = gen_optim.apply_gradients(gen_grads_and_vars) 187 | 188 | ema = tf.train.ExponentialMovingAverage(decay=0.99) 189 | update_losses = ema.apply([discrim_loss, gen_loss_GAN_grd, gen_loss_L1_grd, gen_loss_perceptual_grd]) 190 | 191 | global_step = tf.train.get_or_create_global_step() 192 | incr_global_step = tf.assign(global_step, global_step+1) 193 | 194 | return Model( 195 | predict_real=predict_real_grd, 196 | predict_fake=predict_fake_grd, 197 | discrim_loss=ema.average(discrim_loss), 198 | discrim_grads_and_vars=discrim_grads_and_vars, 199 | gen_loss_GAN=ema.average(gen_loss_GAN_grd), 200 | gen_loss_L1=ema.average(gen_loss_L1_grd), 201 | gen_loss_perceptual=ema.average(gen_loss_perceptual_grd), 202 | gen_grads_and_vars=gen_grads_and_vars, 203 | estimated_height=tf.argmax(estimated_height, axis=-1), 204 | generator_inputs=generator_inputs, 205 | outputs=outputs_grd, 206 | train=tf.group(update_losses, incr_global_step, gen_train), 207 | ) 208 | -------------------------------------------------------------------------------- /script3/model.py: -------------------------------------------------------------------------------- 1 | import tensorflow.compat.v1 as tf 2 | tf.disable_v2_behavior() 3 | 4 | import collections 5 | from VGG.perceptual_loss import perceptual_loss 6 | from geometry.Geometry import * 7 | from geometry.projector import * 8 | from geometry.utils import * 9 | 10 | 11 | EPS = 1e-7 12 | 13 | target_height = 128 14 | target_width = 512 15 | aer_size = 256 16 | grd_height = -2 17 | max_height = 6 18 | 19 | Model = collections.namedtuple("Model", "outputs, predict_real, predict_fake, discrim_loss, discrim_grads_and_vars, " 20 | "estimated_height, generator_inputs," 21 | "gen_loss_GAN, gen_loss_L1, gen_loss_perceptual, gen_grads_and_vars, train") 22 | 23 | def create_generator(generator_inputs, ref_images, a): 24 | 25 | if a.finalout_type == 'image': # ['image', 'rgba', 'fgbg'] 26 | generator_outputs_channels = 3 27 | elif a.finalout_type == 'rgba': 28 | generator_outputs_channels = a.radiusPlaneNum * 4 29 | elif a.finalout_type == 'fgbg': 30 | generator_outputs_channels = a.radiusPlaneNum * 2 + 3 31 | 32 | ngf = a.ngf 33 | layers = [] 34 | 35 | # encoder_1: [batch, 512, 512, in_channels] => [batch, 256, 256, ngf] 36 | with tf.variable_scope("encoder_1"): 37 | output = gen_conv(generator_inputs, ngf) 38 | layers.append(output) 39 | 40 | layer_specs = [ 41 | ngf * 2, # encoder_2: [batch, 256, 256, ngf] => [batch, 128, 128, ngf * 2] 42 | ngf * 4, # encoder_3: [batch, 128, 128, ngf * 2] => [batch, 64, 64, ngf * 4] 43 | ngf * 8, # encoder_4: [batch, 64, 64, ngf * 4] => [batch, 32, 32, ngf * 8] 44 | ngf * 8, # encoder_5: [batch, 32, 32, ngf * 8] => [batch, 16, 16, ngf * 8] 45 | ngf * 8, # encoder_6: [batch, 16, 16, ngf * 8] => [batch, 8, 8, ngf * 8] 46 | ngf * 8, # encoder_7: [batch, 8, 8, ngf * 8] => [batch, 4, 4, ngf * 8] 47 | # ngf * 8, # encoder_8: [batch, 4, 4, ngf * 8] => [batch, 2, 2, ngf * 8] 48 | ] 49 | 50 | for out_channels in layer_specs: 51 | with tf.variable_scope("encoder_%d" % (len(layers) + 1)): 52 | rectified = lrelu(layers[-1], 0.2) 53 | # [batch, in_height, in_width, in_channels] => [batch, in_height/2, in_width/2, out_channels] 54 | convolved = gen_conv(rectified, out_channels) 55 | output = batchnorm(convolved) 56 | layers.append(output) 57 | 58 | bottleneck = tf.reshape(output, [-1, 1, 4, output.get_shape().as_list()[-1]]) 59 | layers.append(bottleneck) 60 | 61 | layer_specs = [ 62 | # (ngf * 8, 0.5), # decoder_8: [batch, 1, 4, ngf * 8] => [batch, 2, 8, ngf * 8 * 2] 63 | (ngf * 8, 0.5), # decoder_7: [batch, 2, 8, ngf * 8 * 2] => [batch, 4, 16, ngf * 8 * 2] 64 | (ngf * 8, 0.5), # decoder_6: [batch, 4, 16, ngf * 8 * 2] => [batch, 8, 32, ngf * 8 * 2] 65 | (ngf * 8, 0.5), # decoder_5: [batch, 8, 32, ngf * 8 * 2] => [batch, 16, 64, ngf * 8 * 2] 66 | (ngf * 4, 0.0), # decoder_4: [batch, 16, 64, ngf * 8 * 2] => [batch, 32, 128, ngf * 4 * 2] 67 | (ngf * 2, 0.0), # decoder_3: [batch, 32, 128, ngf * 4 * 2] => [batch, 64, 256, ngf * 2 * 2] 68 | (ngf, 0.0), # decoder_2: [batch, 64, 256, ngf * 2 * 2] => [batch, 128, 512, ngf * 2 * 2] 69 | ] 70 | 71 | num_encoder_layers = len(layers) 72 | for decoder_layer, (out_channels, dropout) in enumerate(layer_specs): 73 | skip_layer = num_encoder_layers - decoder_layer - 1 74 | with tf.variable_scope("decoder_%d" % (skip_layer + 1)): 75 | 76 | if a.skip: 77 | 78 | if decoder_layer == 0: 79 | # first decoder layer doesn't have skip connections 80 | # since it is directly connected to the skip_layer 81 | input = layers[-1] 82 | else: 83 | input = tf.concat([layers[-1], layers[skip_layer-1]], axis=3) 84 | else: 85 | 86 | input = layers[-1] 87 | 88 | rectified = tf.nn.relu(input) 89 | # [batch, in_height, in_width, in_channels] => [batch, in_height*2, in_width*2, out_channels] 90 | output = gen_deconv(rectified, out_channels) 91 | output = batchnorm(output) 92 | 93 | if dropout > 0.0: 94 | output = tf.nn.dropout(output, keep_prob=1 - dropout) 95 | 96 | layers.append(output) 97 | 98 | # decoder_1: [batch, 128, 512, ngf * 2] => [batch, 256, 1024, generator_outputs_channels] 99 | with tf.variable_scope("decoder_1"): 100 | # input = tf.concat([layers[-1], layers[0]], axis=3) 101 | rectified = tf.nn.relu(layers[-1]) 102 | output = gen_deconv(rectified, generator_outputs_channels) 103 | output = tf.tanh(output) 104 | layers.append(output) 105 | 106 | if a.finalout_type == 'image': # ['image', 'rgba', 'fgbg'] 107 | outputs_grd = layers[-1] 108 | 109 | return outputs_grd 110 | 111 | elif a.finalout_type == 'rgba': 112 | outputs_grd = mpi_render_grd_view(layers[-1], share_alpha=True) 113 | outputs_grd = tf.tanh(outputs_grd) 114 | render_aer = mpi_render_aer_view(layers[-1], share_alpha=True) 115 | # render_aer = rtheta2uv(render_aer, a.radiusPlaneNum * 2) 116 | # outputs_aer = refine(render_aer) 117 | render_aer = rtheta2uv(render_aer, 256) 118 | with tf.variable_scope('refine_aer'): 119 | outputs_aer = encoder_decoder(render_aer, 3, ngf=16, activational_layer=tf.nn.tanh) 120 | 121 | return outputs_grd, outputs_aer 122 | 123 | elif a.finalout_type == 'fgbg': 124 | blend_weights = (layers[-1][:, :, :, :a.radiusPlaneNum] + 1.) / 2. 125 | alphas = (layers[-1][:, :, :, a.radiusPlaneNum: 2 * a.radiusPlaneNum] + 1.) / 2. 126 | bg_rgb = layers[-1][..., -3:] 127 | fg_rgb = ref_images 128 | 129 | for i in range(a.radiusPlaneNum): 130 | curr_alpha = tf.expand_dims(alphas[:, :, :, i], -1) 131 | w = tf.expand_dims(blend_weights[:, :, :, i], -1) 132 | curr_rgb = w * fg_rgb + (1 - w) * bg_rgb 133 | curr_rgba = tf.concat([curr_rgb, curr_alpha], axis=3) 134 | if i == 0: 135 | rgba_layers = curr_rgba 136 | else: 137 | rgba_layers = tf.concat([rgba_layers, curr_rgba], axis=3) 138 | 139 | outputs_grd = mpi_render_grd_view(rgba_layers, share_alpha=True) 140 | render_aer = mpi_render_aer_view(rgba_layers, share_alpha=True) 141 | # render_aer = rtheta2uv(render_aer, a.radiusPlaneNum * 2) 142 | # outputs_aer = refine(render_aer) 143 | render_aer = rtheta2uv(render_aer, 256) 144 | with tf.variable_scope('refine_aer'): 145 | outputs_aer = encoder_decoder(render_aer, 3, ngf=4, activational_layer=tf.nn.tanh) 146 | 147 | return outputs_grd, outputs_aer 148 | 149 | 150 | def create_discriminator(discrim_inputs, ndf=64): 151 | n_layers = 3 152 | layers = [] 153 | 154 | # layer_1: [batch, 256, 256, in_channels * 2] => [batch, 128, 128, ndf] 155 | with tf.variable_scope("layer_1"): 156 | convolved = discrim_conv(discrim_inputs, ndf, stride=2) 157 | rectified = lrelu(convolved, 0.2) 158 | layers.append(rectified) 159 | 160 | # layer_2: [batch, 128, 128, ndf] => [batch, 64, 64, ndf * 2] 161 | # layer_3: [batch, 64, 64, ndf * 2] => [batch, 32, 32, ndf * 4] 162 | # layer_4: [batch, 32, 32, ndf * 4] => [batch, 31, 31, ndf * 8] 163 | for i in range(n_layers): 164 | with tf.variable_scope("layer_%d" % (len(layers) + 1)): 165 | out_channels = ndf * min(2**(i+1), 8) 166 | stride = 1 if i == n_layers - 1 else 2 # last layer here has stride 1 167 | convolved = discrim_conv(layers[-1], out_channels, stride=stride) 168 | normalized = batchnorm(convolved) 169 | rectified = lrelu(normalized, 0.2) 170 | layers.append(rectified) 171 | 172 | # layer_5: [batch, 31, 31, ndf * 8] => [batch, 30, 30, 1] 173 | with tf.variable_scope("layer_%d" % (len(layers) + 1)): 174 | convolved = discrim_conv(rectified, out_channels=1, stride=1) 175 | output = tf.sigmoid(convolved) 176 | layers.append(output) 177 | 178 | return layers[-1] 179 | 180 | 181 | def create_model(inputs, targets, ref_images, a): 182 | 183 | with tf.variable_scope("generator"): 184 | 185 | with tf.variable_scope('height_estimation'): 186 | 187 | if a.heightPlaneNum > 1: 188 | 189 | estimated_height = encoder_decoder(inputs, generator_outputs_channels=a.heightPlaneNum, ngf=4, 190 | activational_layer=tf.nn.softmax) 191 | else: 192 | estimated_height = tf.concat([tf.zeros(inputs.get_shape().as_list()[:-1] + [63]), 193 | tf.ones(inputs.get_shape().as_list()[:-1] + [1])], axis=-1) 194 | 195 | generator_inputs = geometry_transform(inputs, estimated_height, target_height, target_width, 196 | a.height_mode, grd_height, max_height, a.method, a.geoout_type, a.dataset) 197 | 198 | # height, width = targets.get_shape().as_list()[1:-1] 199 | # concat_inputs = tf.concat([generator_inputs[:, : int(height/2), :, :], ref_images[:, int(height/2):, :, :]], axis=1) 200 | 201 | outputs = create_generator(generator_inputs, ref_images, a) 202 | 203 | if a.finalout_type == 'image': # ['image', 'rgba', 'fgbg'] 204 | outputs_grd = outputs 205 | 206 | else: 207 | outputs_grd, outputs_aer = outputs 208 | 209 | with tf.name_scope("real_discriminator_grd"): 210 | with tf.variable_scope("discriminator_grd"): 211 | # 2x [batch, height, width, channels] => [batch, 30, 30, 1] 212 | predict_real_grd = create_discriminator(targets) 213 | 214 | with tf.name_scope("fake_discriminator_grd"): 215 | with tf.variable_scope("discriminator_grd", reuse=True): 216 | # 2x [batch, height, width, channels] => [batch, 30, 30, 1] 217 | predict_fake_grd = create_discriminator(outputs_grd) 218 | 219 | 220 | with tf.name_scope("discriminator_loss"): 221 | # minimizing -tf.log will try to get inputs to 1 222 | # predict_real => 1 223 | # predict_fake => 0 224 | discrim_loss = 0.5 * (tf.reduce_mean(-(tf.log(predict_real_grd + EPS) + tf.log(1 - predict_fake_grd + EPS)))) 225 | 226 | with tf.name_scope("generator_loss"): 227 | # predict_fake => 1 228 | # abs(targets - outputs) => 0 229 | gen_loss_GAN_grd = tf.reduce_mean(-tf.log(predict_fake_grd + EPS)) 230 | gen_loss_L1_grd = tf.reduce_mean(tf.abs(targets - outputs_grd)) 231 | gen_loss_perceptual_grd = perceptual_loss(targets, outputs_grd) 232 | if a.finalout_type != 'image': 233 | gen_loss_L1_aer = tf.reduce_mean(tf.abs(inputs - outputs_aer)) 234 | gen_loss_perceptual_aer = perceptual_loss(inputs, outputs_aer) 235 | gen_loss = gen_loss_GAN_grd * a.gan_weight + \ 236 | gen_loss_perceptual_grd * a.perceptual_weight_grd + \ 237 | gen_loss_perceptual_aer * a.perceptual_weight_aer + \ 238 | gen_loss_L1_grd * a.l1_weight_grd + \ 239 | gen_loss_L1_aer * a.l1_weight_aer 240 | else: 241 | 242 | gen_loss = gen_loss_GAN_grd * a.gan_weight + \ 243 | gen_loss_perceptual_grd * a.perceptual_weight_grd + \ 244 | gen_loss_L1_grd * a.l1_weight_grd 245 | 246 | with tf.name_scope("discriminator_train"): 247 | discrim_tvars = [var for var in tf.trainable_variables() if var.name.startswith("discriminator")] 248 | discrim_optim = tf.train.AdamOptimizer(a.lr, a.beta1) 249 | discrim_grads_and_vars = discrim_optim.compute_gradients(discrim_loss, var_list=discrim_tvars) 250 | discrim_train = discrim_optim.apply_gradients(discrim_grads_and_vars) 251 | 252 | with tf.name_scope("generator_train"): 253 | with tf.control_dependencies([discrim_train]): 254 | gen_tvars = [var for var in tf.trainable_variables() if var.name.startswith("generator")] 255 | gen_optim = tf.train.AdamOptimizer(a.lr, a.beta1) 256 | gen_grads_and_vars = gen_optim.compute_gradients(gen_loss, var_list=gen_tvars) 257 | gen_train = gen_optim.apply_gradients(gen_grads_and_vars) 258 | 259 | ema = tf.train.ExponentialMovingAverage(decay=0.99) 260 | update_losses = ema.apply([discrim_loss, gen_loss_GAN_grd, gen_loss_L1_grd, gen_loss_perceptual_grd]) 261 | 262 | global_step = tf.train.get_or_create_global_step() 263 | incr_global_step = tf.assign(global_step, global_step+1) 264 | 265 | return Model( 266 | predict_real=predict_real_grd, 267 | predict_fake=predict_fake_grd, 268 | discrim_loss=ema.average(discrim_loss), 269 | discrim_grads_and_vars=discrim_grads_and_vars, 270 | gen_loss_GAN=ema.average(gen_loss_GAN_grd), 271 | gen_loss_L1=ema.average(gen_loss_L1_grd), 272 | gen_loss_perceptual=ema.average(gen_loss_perceptual_grd), 273 | gen_grads_and_vars=gen_grads_and_vars, 274 | # estimated_height=tf.argmax(estimated_height, axis=-1), 275 | estimated_height=estimated_height, 276 | generator_inputs=generator_inputs, 277 | outputs=outputs_grd, 278 | train=tf.group(update_losses, incr_global_step, gen_train), 279 | ) 280 | 281 | -------------------------------------------------------------------------------- /script3/baseline22.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | 6 | import os 7 | 8 | # os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' 9 | # os.environ['CUDA_VISIBLE_DEVICES'] = '1' 10 | 11 | import sys 12 | sys.path.append('../') 13 | 14 | import tensorflow as tf 15 | import numpy as np 16 | import argparse 17 | import os 18 | import json 19 | 20 | import random 21 | import collections 22 | import math 23 | import time 24 | import PIL.Image as Image 25 | 26 | from model22 import * 27 | 28 | parser = argparse.ArgumentParser() 29 | # parser.add_argument("--input_dir", help="path to folder containing images", default='facades/train') 30 | parser.add_argument("--dataset", help="dataset", default='CVUSA') 31 | parser.add_argument("--mode", choices=["train", "test", "export"], default="train") 32 | parser.add_argument("--output_dir", help="where to put output files", default='pix2pix_perceploss') 33 | parser.add_argument("--seed", type=int) 34 | parser.add_argument("--checkpoint", help="directory with checkpoint to resume training from or use for testing") 35 | 36 | parser.add_argument("--max_steps", type=int, help="number of training steps (0 to disable)") 37 | parser.add_argument("--max_epochs", type=int, default=35, help="number of training epochs") 38 | parser.add_argument("--summary_freq", type=int, default=100, help="update summaries every summary_freq steps") 39 | parser.add_argument("--progress_freq", type=int, default=50, help="display progress every progress_freq steps") 40 | parser.add_argument("--trace_freq", type=int, default=0, help="trace execution every trace_freq steps") 41 | parser.add_argument("--display_freq", type=int, default=0, help="write current training images every display_freq steps") 42 | parser.add_argument("--save_freq", type=int, default=5000, help="save model every save_freq steps, 0 to disable") 43 | 44 | parser.add_argument("--separable_conv", action="store_true", help="use separable convolutions in the generator") 45 | parser.add_argument("--aspect_ratio", type=float, default=1.0, help="aspect ratio of output images (width/height)") 46 | parser.add_argument("--lab_colorization", action="store_true", help="split input image into brightness (A) and color (B)") 47 | parser.add_argument("--batch_size", type=int, default=4, help="number of images in batch") 48 | parser.add_argument("--which_direction", type=str, default="AtoG", choices=["AtoG", "GtoA"]) 49 | parser.add_argument("--ngf", type=int, default=64, help="number of generator filters in first conv layer") 50 | parser.add_argument("--ndf", type=int, default=64, help="number of discriminator filters in first conv layer") 51 | parser.add_argument("--scale_size", type=int, default=286, help="scale images to this size before cropping to 256x256") 52 | parser.add_argument("--flip", dest="flip", action="store_true", help="flip images horizontally") 53 | parser.add_argument("--no_flip", dest="flip", action="store_false", help="don't flip images horizontally") 54 | parser.set_defaults(flip=True) 55 | parser.add_argument("--lr", type=float, default=0.0002, help="initial learning rate for adam") 56 | parser.add_argument("--beta1", type=float, default=0.5, help="momentum term of adam") 57 | 58 | parser.add_argument("--inputs_type", choices=["original", "geometry"], default="geometry") 59 | 60 | parser.add_argument("--gan_weight", type=float, default=1.0, help="weight on GAN term for generator gradient") 61 | parser.add_argument("--l1_weight_grd", type=float, default=0.0, help="weight on GAN term for generator gradient") 62 | parser.add_argument("--l1_weight_aer", type=float, default=10.0, help="weight on L1 term for generator gradient") 63 | parser.add_argument("--perceptual_weight_grd", type=float, default=1.0, help="weight on GAN term for generator gradient") 64 | parser.add_argument("--perceptual_weight_aer", type=float, default=0.0, help="weight on GAN term for generator gradient") 65 | 66 | parser.add_argument("--heightPlaneNum", type=int, default=32, help="weight on GAN term for generator gradient") 67 | parser.add_argument("--radiusPlaneNum", type=int, default=32, help="weight on GAN term for generator gradient") 68 | parser.add_argument("--height_mode", choices=['radiusPlaneMethod', 'heightPlaneMethod'], default='radiusPlaneMethod') 69 | # Only when 'height_mode' is 'radiusPlaneMethod', the following two parameters are required. Otherwise not. 70 | parser.add_argument("--method", choices=['column', 'point'], default='column') 71 | parser.add_argument("--geoout_type", choices=['volume', 'image'], default='image') 72 | 73 | parser.add_argument("--finalout_type", choices=['image', 'rgba', 'fgbg'], default='image') 74 | 75 | # export options 76 | parser.add_argument("--output_filetype", default="png", choices=["png", "jpeg"]) 77 | a = parser.parse_args() 78 | 79 | EPS = 1e-12 80 | CROP_SIZE = 256 81 | 82 | nameStr = 'baseline22' 83 | 84 | def save_images(fetches, step=None): 85 | 86 | image_dir = os.path.join('./GeneratedData/', a.dataset, nameStr, 'image') 87 | if not os.path.exists(image_dir): 88 | os.makedirs(image_dir) 89 | 90 | filesets = [] 91 | for i, in_path in enumerate(fetches["paths"]): 92 | name, _ = os.path.splitext(os.path.basename(in_path.decode("utf8"))) 93 | fileset = {"name": name, "step": step} 94 | for kind in ["outputs"]: 95 | filename = name + ".png" 96 | if step is not None: 97 | filename = "%08d-%s" % (step, filename) 98 | fileset[kind] = filename 99 | out_path = os.path.join(image_dir, filename) 100 | contents = fetches[kind][i] 101 | with open(out_path, "wb") as f: 102 | f.write(contents) 103 | filesets.append(fileset) 104 | return filesets 105 | 106 | 107 | def main(): 108 | if a.seed is None: 109 | a.seed = random.randint(0, 2**31 - 1) 110 | 111 | tf.set_random_seed(a.seed) 112 | np.random.seed(a.seed) 113 | random.seed(a.seed) 114 | 115 | output_dir = os.path.join(a.dataset, nameStr, 'aer') 116 | 117 | if not os.path.exists(output_dir): 118 | os.makedirs(output_dir) 119 | 120 | if a.mode == "test" or a.mode == "export": 121 | if a.checkpoint is None: 122 | raise Exception("checkpoint required for test mode") 123 | 124 | # load some options from the checkpoint 125 | checkpoint_dir = os.path.join(a.dataset, nameStr, 'aer') 126 | options = {"which_direction", "ngf", "ndf", "lab_colorization"} 127 | with open(os.path.join(checkpoint_dir, "options.json")) as f: 128 | for key, val in json.loads(f.read()).items(): 129 | if key in options: 130 | print("loaded", key, "=", val) 131 | setattr(a, key, val) 132 | # disable these features in test mode 133 | a.scale_size = CROP_SIZE 134 | a.flip = False 135 | 136 | for k, v in a._get_kwargs(): 137 | print(k, "=", v) 138 | 139 | with open(os.path.join(output_dir, "options.json"), "w") as f: 140 | f.write(json.dumps(vars(a), sort_keys=True, indent=4)) 141 | 142 | if a.dataset=='CVUSA': 143 | from load_data.load_data_cvusa import load_examples 144 | elif a.dataset=='CVACT': 145 | from load_data.load_data_cvact import load_examples 146 | elif a.dataset=='CVACThalf': 147 | from load_data.load_data_cvact_half import load_examples 148 | elif a.dataset=='OP': 149 | from load_data.load_data_op import load_examples 150 | 151 | examples = load_examples(a.mode, a.batch_size) 152 | print("examples count = %d" % examples.count) 153 | 154 | inputs = examples.aer 155 | targets = examples.pano 156 | ref_images = examples.tanpolar 157 | 158 | # inputs and targets are [batch_size, height, width, channels] 159 | model = create_model(inputs, targets, ref_images, a) 160 | 161 | inputs = deprocess(inputs) 162 | targets = deprocess(targets) 163 | outputs = deprocess(model.outputs) 164 | 165 | def convert(image): 166 | if a.aspect_ratio != 1.0: 167 | # upscale to correct aspect ratio 168 | size = [CROP_SIZE, int(round(CROP_SIZE * a.aspect_ratio))] 169 | image = tf.image.resize_images(image, size=size, method=tf.image.ResizeMethod.BICUBIC) 170 | 171 | return tf.image.convert_image_dtype(image, dtype=tf.uint8, saturate=True) 172 | 173 | # reverse any processing on images so they can be written to disk or displayed to user 174 | with tf.name_scope("convert_inputs"): 175 | converted_inputs = convert(inputs) 176 | 177 | with tf.name_scope("convert_targets"): 178 | converted_targets = convert(targets) 179 | 180 | with tf.name_scope("convert_outputs"): 181 | converted_outputs = convert(outputs) 182 | 183 | with tf.name_scope("encode_images"): 184 | display_fetches = { 185 | "paths": examples.paths, 186 | "inputs": tf.map_fn(tf.image.encode_png, converted_inputs, dtype=tf.string, name="input_pngs"), 187 | "targets": tf.map_fn(tf.image.encode_png, converted_targets, dtype=tf.string, name="target_pngs"), 188 | "outputs": tf.map_fn(tf.image.encode_png, converted_outputs, dtype=tf.string, name="output_pngs"), 189 | } 190 | 191 | # summaries 192 | with tf.name_scope("inputs_summary"): 193 | tf.summary.image("inputs", converted_inputs) 194 | 195 | with tf.name_scope("targets_summary"): 196 | tf.summary.image("targets", converted_targets) 197 | 198 | with tf.name_scope("outputs_summary"): 199 | tf.summary.image("outputs", converted_outputs) 200 | 201 | tf.summary.scalar("discriminator_loss", model.discrim_loss) 202 | tf.summary.scalar("generator_loss_GAN", model.gen_loss_GAN) 203 | tf.summary.scalar("generator_loss_L1", model.gen_loss_L1) 204 | tf.summary.scalar("gen_loss_perceptual", model.gen_loss_perceptual) 205 | 206 | for var in tf.trainable_variables(): 207 | tf.summary.histogram(var.op.name + "/values", var) 208 | 209 | for grad, var in model.discrim_grads_and_vars + model.gen_grads_and_vars: 210 | tf.summary.histogram(var.op.name + "/gradients", grad) 211 | 212 | with tf.name_scope("parameter_count"): 213 | parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()]) 214 | 215 | saver = tf.train.Saver(max_to_keep=1) 216 | 217 | logdir = output_dir if (a.trace_freq > 0 or a.summary_freq > 0) else None 218 | sv = tf.train.Supervisor(logdir=logdir, save_summaries_secs=0, saver=None) 219 | with sv.managed_session() as sess: 220 | print("parameter_count =", sess.run(parameter_count)) 221 | 222 | if a.checkpoint is not None: 223 | print("loading model from checkpoint") 224 | checkpoint_dir = os.path.join(a.dataset, nameStr, 'aer') 225 | checkpoint = tf.train.latest_checkpoint(checkpoint_dir) 226 | saver.restore(sess, checkpoint) 227 | 228 | max_steps = 2**32 229 | if a.max_epochs is not None: 230 | max_steps = examples.steps_per_epoch * a.max_epochs 231 | if a.max_steps is not None: 232 | max_steps = a.max_steps 233 | 234 | if a.mode == "test": 235 | # testing 236 | # at most, process the test data once 237 | start = time.time() 238 | max_steps = min(examples.steps_per_epoch, max_steps) 239 | for step in range(max_steps): 240 | results = sess.run(display_fetches) 241 | filesets = save_images(results) 242 | for i, f in enumerate(filesets): 243 | print("evaluated image", f["name"]) 244 | # index_path = append_index(filesets) 245 | # print("wrote index at", index_path) 246 | print("rate", (time.time() - start) / max_steps) 247 | else: 248 | # training 249 | start = time.time() 250 | 251 | for step in range(max_steps): 252 | def should(freq): 253 | return freq > 0 and ((step + 1) % freq == 0 or step == max_steps - 1) 254 | 255 | options = None 256 | run_metadata = None 257 | if should(a.trace_freq): 258 | options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) 259 | run_metadata = tf.RunMetadata() 260 | 261 | fetches = { 262 | "train": model.train, 263 | "global_step": sv.global_step, 264 | } 265 | 266 | if should(a.progress_freq): 267 | fetches["discrim_loss"] = model.discrim_loss 268 | fetches["gen_loss_GAN"] = model.gen_loss_GAN 269 | fetches["gen_loss_L1"] = model.gen_loss_L1 270 | fetches["gen_loss_perceptual"] = model.gen_loss_perceptual 271 | 272 | if should(a.summary_freq): 273 | fetches["summary"] = sv.summary_op 274 | 275 | if should(a.display_freq): 276 | fetches["display"] = display_fetches 277 | 278 | results = sess.run(fetches, options=options, run_metadata=run_metadata) 279 | 280 | if should(a.summary_freq): 281 | print("recording summary") 282 | sv.summary_writer.add_summary(results["summary"], results["global_step"]) 283 | 284 | if should(a.trace_freq): 285 | print("recording trace") 286 | sv.summary_writer.add_run_metadata(run_metadata, "step_%d" % results["global_step"]) 287 | 288 | if should(a.progress_freq): 289 | # global_step will have the correct step count if we resume from a checkpoint 290 | train_epoch = math.ceil(results["global_step"] / examples.steps_per_epoch) 291 | train_step = (results["global_step"] - 1) % examples.steps_per_epoch + 1 292 | rate = (step + 1) * a.batch_size / (time.time() - start) 293 | remaining = (max_steps - step) * a.batch_size / rate 294 | print("progress epoch %d step %d image/sec %0.1f remaining %dm" % (train_epoch, train_step, rate, remaining / 60)) 295 | print("discrim_loss", results["discrim_loss"]) 296 | print("gen_loss_GAN", results["gen_loss_GAN"]) 297 | print("gen_loss_L1", results["gen_loss_L1"]) 298 | print("gen_loss_perceptual", results["gen_loss_perceptual"]) 299 | 300 | if should(examples.steps_per_epoch): 301 | print("saving model") 302 | saver.save(sess, os.path.join(output_dir, "model"), global_step=sv.global_step) 303 | 304 | if sv.should_stop(): 305 | break 306 | 307 | 308 | main() 309 | -------------------------------------------------------------------------------- /script3/baseline23.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | 6 | import os 7 | 8 | # os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' 9 | # os.environ['CUDA_VISIBLE_DEVICES'] = '1' 10 | 11 | import sys 12 | sys.path.append('../') 13 | 14 | import tensorflow as tf 15 | import numpy as np 16 | import argparse 17 | import os 18 | import json 19 | 20 | import random 21 | import collections 22 | import math 23 | import time 24 | import PIL.Image as Image 25 | 26 | from model23 import * 27 | 28 | parser = argparse.ArgumentParser() 29 | # parser.add_argument("--input_dir", help="path to folder containing images", default='facades/train') 30 | parser.add_argument("--dataset", help="dataset", default='CVUSA') 31 | parser.add_argument("--mode", choices=["train", "test", "export"], default="train") 32 | parser.add_argument("--output_dir", help="where to put output files", default='pix2pix_perceploss') 33 | parser.add_argument("--seed", type=int) 34 | parser.add_argument("--checkpoint", help="directory with checkpoint to resume training from or use for testing") 35 | 36 | parser.add_argument("--max_steps", type=int, help="number of training steps (0 to disable)") 37 | parser.add_argument("--max_epochs", type=int, default=35, help="number of training epochs") 38 | parser.add_argument("--summary_freq", type=int, default=100, help="update summaries every summary_freq steps") 39 | parser.add_argument("--progress_freq", type=int, default=50, help="display progress every progress_freq steps") 40 | parser.add_argument("--trace_freq", type=int, default=0, help="trace execution every trace_freq steps") 41 | parser.add_argument("--display_freq", type=int, default=0, help="write current training images every display_freq steps") 42 | parser.add_argument("--save_freq", type=int, default=5000, help="save model every save_freq steps, 0 to disable") 43 | 44 | parser.add_argument("--separable_conv", action="store_true", help="use separable convolutions in the generator") 45 | parser.add_argument("--aspect_ratio", type=float, default=1.0, help="aspect ratio of output images (width/height)") 46 | parser.add_argument("--lab_colorization", action="store_true", help="split input image into brightness (A) and color (B)") 47 | parser.add_argument("--batch_size", type=int, default=4, help="number of images in batch") 48 | parser.add_argument("--which_direction", type=str, default="AtoG", choices=["AtoG", "GtoA"]) 49 | parser.add_argument("--ngf", type=int, default=64, help="number of generator filters in first conv layer") 50 | parser.add_argument("--ndf", type=int, default=64, help="number of discriminator filters in first conv layer") 51 | parser.add_argument("--scale_size", type=int, default=286, help="scale images to this size before cropping to 256x256") 52 | parser.add_argument("--flip", dest="flip", action="store_true", help="flip images horizontally") 53 | parser.add_argument("--no_flip", dest="flip", action="store_false", help="don't flip images horizontally") 54 | parser.set_defaults(flip=True) 55 | parser.add_argument("--lr", type=float, default=0.0002, help="initial learning rate for adam") 56 | parser.add_argument("--beta1", type=float, default=0.5, help="momentum term of adam") 57 | 58 | parser.add_argument("--inputs_type", choices=["original", "geometry"], default="geometry") 59 | 60 | parser.add_argument("--gan_weight", type=float, default=1.0, help="weight on GAN term for generator gradient") 61 | parser.add_argument("--l1_weight_grd", type=float, default=0.0, help="weight on GAN term for generator gradient") 62 | parser.add_argument("--l1_weight_aer", type=float, default=10.0, help="weight on L1 term for generator gradient") 63 | parser.add_argument("--perceptual_weight_grd", type=float, default=1.0, help="weight on GAN term for generator gradient") 64 | parser.add_argument("--perceptual_weight_aer", type=float, default=0.0, help="weight on GAN term for generator gradient") 65 | 66 | parser.add_argument("--heightPlaneNum", type=int, default=32, help="weight on GAN term for generator gradient") 67 | parser.add_argument("--radiusPlaneNum", type=int, default=32, help="weight on GAN term for generator gradient") 68 | parser.add_argument("--height_mode", choices=['radiusPlaneMethod', 'heightPlaneMethod'], default='radiusPlaneMethod') 69 | # Only when 'height_mode' is 'radiusPlaneMethod', the following two parameters are required. Otherwise not. 70 | parser.add_argument("--method", choices=['column', 'point'], default='column') 71 | parser.add_argument("--geoout_type", choices=['volume', 'image'], default='image') 72 | 73 | parser.add_argument("--finalout_type", choices=['image', 'rgba', 'fgbg'], default='image') 74 | 75 | # export options 76 | parser.add_argument("--output_filetype", default="png", choices=["png", "jpeg"]) 77 | a = parser.parse_args() 78 | 79 | EPS = 1e-12 80 | CROP_SIZE = 256 81 | 82 | nameStr = 'baseline23' + '_L1Grd_' + str(a.l1_weight_grd) + '_PerGrd_' + str(a.perceptual_weight_grd) 83 | 84 | def save_images(fetches, step=None): 85 | cmap = np.load('../cmap.npy') 86 | image_dir = os.path.join('./GeneratedData/', a.dataset, nameStr, 'image') 87 | if not os.path.exists(image_dir): 88 | os.makedirs(image_dir) 89 | 90 | filesets = [] 91 | for i, in_path in enumerate(fetches["paths"]): 92 | name, _ = os.path.splitext(os.path.basename(in_path.decode("utf8"))) 93 | fileset = {"name": name, "step": step} 94 | for kind in ["outputs"]: 95 | filename = name + ".png" 96 | if step is not None: 97 | filename = "%08d-%s" % (step, filename) 98 | fileset[kind] = filename 99 | out_path = os.path.join(image_dir, filename) 100 | contents = fetches[kind][i] 101 | with open(out_path, "wb") as f: 102 | f.write(contents) 103 | 104 | filesets.append(fileset) 105 | return filesets 106 | 107 | 108 | def main(): 109 | if a.seed is None: 110 | a.seed = random.randint(0, 2**31 - 1) 111 | 112 | tf.set_random_seed(a.seed) 113 | np.random.seed(a.seed) 114 | random.seed(a.seed) 115 | 116 | cmap = np.load('../cmap.npy') 117 | 118 | output_dir = os.path.join(a.dataset, nameStr, 'aer') 119 | 120 | if not os.path.exists(output_dir): 121 | os.makedirs(output_dir) 122 | 123 | if a.mode == "test" or a.mode == "export": 124 | if a.checkpoint is None: 125 | raise Exception("checkpoint required for test mode") 126 | 127 | # load some options from the checkpoint 128 | checkpoint_dir = os.path.join(a.dataset, nameStr, 'aer') 129 | options = {"which_direction", "ngf", "ndf", "lab_colorization"} 130 | with open(os.path.join(checkpoint_dir, "options.json")) as f: 131 | for key, val in json.loads(f.read()).items(): 132 | if key in options: 133 | print("loaded", key, "=", val) 134 | setattr(a, key, val) 135 | # disable these features in test mode 136 | a.scale_size = CROP_SIZE 137 | a.flip = False 138 | 139 | for k, v in a._get_kwargs(): 140 | print(k, "=", v) 141 | 142 | with open(os.path.join(output_dir, "options.json"), "w") as f: 143 | f.write(json.dumps(vars(a), sort_keys=True, indent=4)) 144 | 145 | if a.dataset=='CVUSA': 146 | from load_data.load_data_cvusa import load_examples 147 | elif a.dataset=='CVACT': 148 | from load_data.load_data_cvact import load_examples 149 | elif a.dataset=='CVACThalf': 150 | from load_data.load_data_cvact_half import load_examples 151 | elif a.dataset=='OP': 152 | from load_data.load_data_op import load_examples 153 | 154 | examples = load_examples(a.mode, a.batch_size) 155 | print("examples count = %d" % examples.count) 156 | 157 | inputs = examples.aer 158 | targets = examples.pano 159 | ref_images = examples.tanpolar 160 | 161 | # inputs and targets are [batch_size, height, width, channels] 162 | model = create_model(inputs, targets, ref_images, a) 163 | 164 | inputs = deprocess(inputs) 165 | targets = deprocess(targets) 166 | outputs = deprocess(model.outputs) 167 | converted_generator_inputs = deprocess(model.generator_inputs) 168 | 169 | def convert(image): 170 | if a.aspect_ratio != 1.0: 171 | # upscale to correct aspect ratio 172 | size = [CROP_SIZE, int(round(CROP_SIZE * a.aspect_ratio))] 173 | image = tf.image.resize_images(image, size=size, method=tf.image.ResizeMethod.BICUBIC) 174 | 175 | return tf.image.convert_image_dtype(image, dtype=tf.uint8, saturate=True) 176 | 177 | # reverse any processing on images so they can be written to disk or displayed to user 178 | with tf.name_scope("convert_inputs"): 179 | converted_inputs = convert(inputs) 180 | 181 | with tf.name_scope("convert_targets"): 182 | converted_targets = convert(targets) 183 | 184 | with tf.name_scope("convert_outputs"): 185 | converted_outputs = convert(outputs) 186 | 187 | with tf.name_scope("encode_images"): 188 | display_fetches = { 189 | "paths": examples.paths, 190 | "inputs": tf.map_fn(tf.image.encode_png, converted_inputs, dtype=tf.string, name="input_pngs"), 191 | "targets": tf.map_fn(tf.image.encode_png, converted_targets, dtype=tf.string, name="target_pngs"), 192 | "outputs": tf.map_fn(tf.image.encode_png, converted_outputs, dtype=tf.string, name="output_pngs"), 193 | 194 | } 195 | 196 | # summaries 197 | with tf.name_scope("inputs_summary"): 198 | tf.summary.image("inputs", converted_inputs) 199 | 200 | with tf.name_scope("targets_summary"): 201 | tf.summary.image("targets", converted_targets) 202 | 203 | with tf.name_scope("outputs_summary"): 204 | tf.summary.image("outputs", converted_outputs) 205 | 206 | tf.summary.scalar("discriminator_loss", model.discrim_loss) 207 | tf.summary.scalar("generator_loss_GAN", model.gen_loss_GAN) 208 | tf.summary.scalar("generator_loss_L1", model.gen_loss_L1) 209 | tf.summary.scalar("gen_loss_perceptual", model.gen_loss_perceptual) 210 | 211 | for var in tf.trainable_variables(): 212 | tf.summary.histogram(var.op.name + "/values", var) 213 | 214 | for grad, var in model.discrim_grads_and_vars + model.gen_grads_and_vars: 215 | tf.summary.histogram(var.op.name + "/gradients", grad) 216 | 217 | with tf.name_scope("parameter_count"): 218 | parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()]) 219 | 220 | saver = tf.train.Saver(max_to_keep=1) 221 | 222 | logdir = output_dir if (a.trace_freq > 0 or a.summary_freq > 0) else None 223 | sv = tf.train.Supervisor(logdir=logdir, save_summaries_secs=0, saver=None) 224 | with sv.managed_session() as sess: 225 | print("parameter_count =", sess.run(parameter_count)) 226 | 227 | if a.checkpoint is not None: 228 | print("loading model from checkpoint") 229 | checkpoint_dir = os.path.join(a.dataset, nameStr, 'aer') 230 | checkpoint = tf.train.latest_checkpoint(checkpoint_dir) 231 | saver.restore(sess, checkpoint) 232 | 233 | max_steps = 2**32 234 | if a.max_epochs is not None: 235 | max_steps = examples.steps_per_epoch * a.max_epochs 236 | if a.max_steps is not None: 237 | max_steps = a.max_steps 238 | 239 | if a.mode == "test": 240 | # testing 241 | # at most, process the test data once 242 | start = time.time() 243 | max_steps = min(examples.steps_per_epoch, max_steps) 244 | for step in range(max_steps): 245 | results = sess.run(display_fetches) 246 | filesets = save_images(results) 247 | for i, f in enumerate(filesets): 248 | print("evaluated image", f["name"]) 249 | # index_path = append_index(filesets) 250 | # print("wrote index at", index_path) 251 | print("rate", (time.time() - start) / max_steps) 252 | else: 253 | # training 254 | start = time.time() 255 | 256 | for step in range(max_steps): 257 | def should(freq): 258 | return freq > 0 and ((step + 1) % freq == 0 or step == max_steps - 1) 259 | 260 | options = None 261 | run_metadata = None 262 | if should(a.trace_freq): 263 | options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) 264 | run_metadata = tf.RunMetadata() 265 | 266 | fetches = { 267 | "train": model.train, 268 | "global_step": sv.global_step, 269 | } 270 | 271 | if should(a.progress_freq): 272 | fetches["discrim_loss"] = model.discrim_loss 273 | fetches["gen_loss_GAN"] = model.gen_loss_GAN 274 | fetches["gen_loss_L1"] = model.gen_loss_L1 275 | fetches["gen_loss_perceptual"] = model.gen_loss_perceptual 276 | 277 | if should(a.summary_freq): 278 | fetches["summary"] = sv.summary_op 279 | 280 | if should(a.display_freq): 281 | fetches["display"] = display_fetches 282 | 283 | results = sess.run(fetches, options=options, run_metadata=run_metadata) 284 | # height = sess.run(model.estimated_height, options=options, run_metadata=run_metadata) 285 | # for b in range(0, a.batch_size): 286 | # img = cmap[height[b].squeeze()] 287 | # img = Image.fromarray(img) 288 | # img.save(str(b)+'height.png') 289 | 290 | 291 | if should(a.summary_freq): 292 | print("recording summary") 293 | sv.summary_writer.add_summary(results["summary"], results["global_step"]) 294 | 295 | if should(a.trace_freq): 296 | print("recording trace") 297 | sv.summary_writer.add_run_metadata(run_metadata, "step_%d" % results["global_step"]) 298 | 299 | if should(a.progress_freq): 300 | # global_step will have the correct step count if we resume from a checkpoint 301 | train_epoch = math.ceil(results["global_step"] / examples.steps_per_epoch) 302 | train_step = (results["global_step"] - 1) % examples.steps_per_epoch + 1 303 | rate = (step + 1) * a.batch_size / (time.time() - start) 304 | remaining = (max_steps - step) * a.batch_size / rate 305 | print("progress epoch %d step %d image/sec %0.1f remaining %dm" % (train_epoch, train_step, rate, remaining / 60)) 306 | print("discrim_loss", results["discrim_loss"]) 307 | print("gen_loss_GAN", results["gen_loss_GAN"]) 308 | print("gen_loss_L1", results["gen_loss_L1"]) 309 | print("gen_loss_perceptual", results["gen_loss_perceptual"]) 310 | 311 | if should(examples.steps_per_epoch): 312 | # if should(50): 313 | print("saving model") 314 | saver.save(sess, os.path.join(output_dir, "model"), global_step=sv.global_step) 315 | 316 | if sv.should_stop(): 317 | break 318 | 319 | 320 | main() 321 | -------------------------------------------------------------------------------- /geometry/utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow.compat.v1 as tf 2 | tf.disable_v2_behavior() 3 | import numpy as np 4 | from tensorflow.python.ops import math_ops 5 | 6 | 7 | def softargmax(x, beta=100): 8 | x_range = tf.range(x.shape.as_list()[-1], dtype=tf.float32) 9 | return tf.reduce_sum(tf.nn.softmax(x*beta) * x_range, axis=-1, keep_dims=True) 10 | 11 | 12 | def tf_shape(x, rank): 13 | static_shape = x.get_shape().with_rank(rank).as_list() 14 | dynamic_shape = tf.unstack(tf.shape(x), rank) 15 | return [s if s is not None else d for s,d in zip(static_shape, dynamic_shape)] 16 | 17 | 18 | def safe_divide(numerator, denominator, name='safe_divide'): 19 | return tf.where(math_ops.greater(denominator, 0), math_ops.divide(numerator, denominator), tf.zeros_like(numerator) 20 | , name=name) 21 | 22 | 23 | def preprocess(image): 24 | with tf.name_scope("preprocess"): 25 | # [0, 1] => [-1, 1] 26 | return image * 2 - 1 27 | 28 | 29 | def deprocess(image): 30 | with tf.name_scope("deprocess"): 31 | # [-1, 1] => [0, 1] 32 | return (image + 1) / 2 33 | 34 | 35 | def deprocess_label(label_logits): 36 | ''' 37 | :param label_logits: label.shape = [batch, height, width, 4] --> 4 is label number, value from 0 to 1 38 | :return: label: shape =[batch, height, width, 3] value in {0, 255}, for the purpose of show. 39 | ''' 40 | label_onehot = tf.one_hot(tf.argmax(label_logits, axis=-1), depth=4) 41 | label = label_onehot[..., 1:]*255 42 | return label 43 | 44 | 45 | 46 | def warp_pad_columns(x, n=1): 47 | 48 | out = tf.concat([x[:, :, -n:, :], x, x[:, :, :n, :]], axis=2) 49 | return tf.pad(out, [[0, 0], [n, n], [0, 0], [0, 0]]) 50 | 51 | 52 | def conv_layer_cir(x, kernel_dim, strides, output_dim, trainable, activated, bn, 53 | name='layer_conv', activation_function=tf.nn.relu): 54 | n = int((kernel_dim - 1) / 2) 55 | x = warp_pad_columns(x, n) 56 | 57 | input_dim = x.get_shape().as_list()[-1] 58 | with tf.variable_scope(name): # reuse=tf.AUTO_REUSE 59 | weight = tf.get_variable(name='weights', shape=[kernel_dim, kernel_dim, input_dim, output_dim], 60 | trainable=trainable, initializer=tf.contrib.layers.xavier_initializer()) 61 | bias = tf.get_variable(name='biases', shape=[output_dim], 62 | trainable=trainable, initializer=tf.contrib.layers.xavier_initializer()) 63 | 64 | out = tf.nn.conv2d(x, weight, strides, padding='VALID') + bias 65 | 66 | if bn: 67 | out = batchnorm(out) 68 | 69 | if activated: 70 | out = activation_function(out) 71 | 72 | return out 73 | 74 | 75 | 76 | def discrim_conv(batch_input, out_channels, stride): 77 | padded_input = tf.pad(batch_input, [[0, 0], [1, 1], [1, 1], [0, 0]], mode="CONSTANT") 78 | return tf.layers.conv2d(padded_input, out_channels, kernel_size=4, strides=(stride, stride), padding="valid", kernel_initializer=tf.random_normal_initializer(0, 0.02)) 79 | 80 | 81 | 82 | def gen_conv(batch_input, out_channels, separable_conv=False): 83 | # [batch, in_height, in_width, in_channels] => [batch, out_height, out_width, out_channels] 84 | initializer = tf.random_normal_initializer(0, 0.02) 85 | if separable_conv: 86 | return tf.layers.separable_conv2d(batch_input, out_channels, kernel_size=4, strides=(2, 2), padding="same", depthwise_initializer=initializer, pointwise_initializer=initializer) 87 | else: 88 | return tf.layers.conv2d(batch_input, out_channels, kernel_size=4, strides=(2, 2), padding="same", kernel_initializer=initializer) 89 | 90 | 91 | def gen_deconv(batch_input, out_channels, separable_conv=False): 92 | # [batch, in_height, in_width, in_channels] => [batch, out_height, out_width, out_channels] 93 | initializer = tf.random_normal_initializer(0, 0.02) 94 | if separable_conv: 95 | _b, h, w, _c = batch_input.shape 96 | resized_input = tf.image.resize_images(batch_input, [h * 2, w * 2], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) 97 | return tf.layers.separable_conv2d(resized_input, out_channels, kernel_size=4, strides=(1, 1), padding="same", depthwise_initializer=initializer, pointwise_initializer=initializer) 98 | else: 99 | return tf.layers.conv2d_transpose(batch_input, out_channels, kernel_size=4, strides=(2, 2), padding="same", kernel_initializer=initializer) 100 | 101 | 102 | def lrelu(x, a=0.2): 103 | with tf.name_scope("lrelu"): 104 | # adding these together creates the leak part and linear part 105 | # then cancels them out by subtracting/adding an absolute value term 106 | # leak: a*x/2 - a*abs(x)/2 107 | # linear: x/2 + abs(x)/2 108 | 109 | # this block looks like it has 2 inputs on the graph unless we do this 110 | x = tf.identity(x) 111 | return (0.5 * (1 + a)) * x + (0.5 * (1 - a)) * tf.abs(x) 112 | 113 | 114 | def batchnorm(inputs): 115 | return tf.layers.batch_normalization(inputs, axis=3, epsilon=1e-5, momentum=0.1, training=True, gamma_initializer=tf.random_normal_initializer(1.0, 0.02)) 116 | 117 | 118 | def check_image(image): 119 | assertion = tf.assert_equal(tf.shape(image)[-1], 3, message="image must have 3 color channels") 120 | with tf.control_dependencies([assertion]): 121 | image = tf.identity(image) 122 | 123 | if image.get_shape().ndims not in (3, 4): 124 | raise ValueError("image must be either 3 or 4 dimensions") 125 | 126 | # make the last dimension 3 so that you can unstack the colors 127 | shape = list(image.get_shape()) 128 | shape[-1] = 3 129 | image.set_shape(shape) 130 | return image 131 | 132 | 133 | def corr_distance_orien_unknow(grd_matrix, sat_matrix): 134 | ''' 135 | correlation distance for localizing ground panoramas with unknown orientation 136 | :param grd_matrix: shape = [batch_grd, height, grd_width, channel] 137 | :param sat_matrix: shape = [batch_sat, height, sat_width, channel] 138 | :return: 139 | ''' 140 | try: 141 | grd_batch, grd_height, grd_width, grd_channel = grd_matrix.get_shape().as_list() 142 | sat_batch, sat_height, sat_width, sat_channel = sat_matrix.get_shape().as_list() 143 | except: 144 | grd_batch, grd_height, grd_width, grd_channel = grd_matrix.shape 145 | sat_batch, sat_height, sat_width, sat_channel = sat_matrix.shape 146 | 147 | assert grd_height==sat_height, grd_channel==sat_channel 148 | 149 | def warp_pad_columns(x, n): 150 | out = tf.concat([x, x[:, :, :n, :]], axis=2) 151 | return out 152 | 153 | n = grd_width - 1 154 | x = warp_pad_columns(sat_matrix, n) 155 | 156 | weight = tf.transpose(grd_matrix, [1, 2, 3, 0]) 157 | 158 | out = tf.nn.conv2d(x, weight, strides=[1, 1, 1, 1], padding='VALID') 159 | 160 | assert out.get_shape().as_list() == [sat_batch, 1, sat_width, grd_batch] 161 | 162 | out = tf.squeeze(out) # shape = [sat_batch, sat_width, grd_batch] 163 | 164 | ############################ ground truth orientation corresponded distance ############################### 165 | 166 | 167 | max_dis = 2 - 2 * tf.transpose(tf.reduce_max(out, axis=1)) # shape = [grd_batch, sat_batch] 168 | 169 | pred_orien = tf.diag_part(tf.argmax(out, axis=1)) # shape = [sat_batch, grd_batch] 170 | 171 | return max_dis, pred_orien 172 | 173 | 174 | def triplet_loss(grd_matrix, sat_matrix, batch_size): 175 | ''' 176 | :param grd_matrix: shape = [grd_batch, grd_height, grd_width, grd_channel] 177 | :param sat_matrix: shape = [sat_batch, sat_height, sat_width, sat_channel] 178 | grd_batch==sat_batch grd_height==sat_height grd_channel==sat_channel grd_width<=sat_width 179 | :param grd_orien: shape = [grd_batch] the north direction (value within 0~sat_width) of each grd image 180 | :param train_grd_noise: 181 | :param batch_hard_count: the number of top hard pairs within a batch. If 0, no in-batch hard negative mining 182 | :param train_method: 0: triplet(max_dis) + regularize * (max_dis - orien_dis) 183 | 1: triplet(orien_dis) + regularize * (max_dis - orien_dis) 184 | :param regularize: 185 | :return: 186 | ''' 187 | 188 | with tf.name_scope('weighted_soft_margin_triplet_loss'): 189 | 190 | dist_array, pred_orien = corr_distance_orien_unknow(grd_matrix, sat_matrix) 191 | 192 | pos_dist = tf.diag_part(dist_array) 193 | 194 | pair_n = batch_size * (batch_size - 1.0) 195 | 196 | # ground to satellite 197 | triplet_dist_g2s = pos_dist - dist_array 198 | loss_g2s = tf.reduce_sum(tf.log(1 + tf.exp(triplet_dist_g2s * 10))) / pair_n 199 | 200 | # satellite to ground 201 | triplet_dist_s2g = tf.expand_dims(pos_dist, 1) - dist_array 202 | loss_s2g = tf.reduce_sum(tf.log(1 + tf.exp(triplet_dist_s2g * 10))) / pair_n 203 | 204 | loss = (loss_g2s + loss_s2g) / 2.0 205 | 206 | return loss 207 | 208 | 209 | def encoder_decoder(generator_inputs, generator_outputs_channels, ngf=4, activational_layer=tf.nn.softmax): 210 | layers = [] 211 | 212 | # encoder_1: [batch, 512, 512, in_channels] => [batch, 256, 256, ngf] 213 | with tf.variable_scope("encoder_1"): 214 | output = gen_conv(generator_inputs, ngf) 215 | layers.append(output) 216 | 217 | layer_specs = [ 218 | ngf * 2, # encoder_2: [batch, 256, 256, ngf] => [batch, 128, 128, ngf * 2] 219 | ngf * 4, # encoder_3: [batch, 128, 128, ngf * 2] => [batch, 64, 64, ngf * 4] 220 | ngf * 8, # encoder_4: [batch, 64, 64, ngf * 4] => [batch, 32, 32, ngf * 8] 221 | ngf * 8, # encoder_5: [batch, 32, 32, ngf * 8] => [batch, 16, 16, ngf * 8] 222 | ngf * 8, # encoder_6: [batch, 16, 16, ngf * 8] => [batch, 8, 8, ngf * 8] 223 | ngf * 8, # encoder_7: [batch, 8, 8, ngf * 8] => [batch, 4, 4, ngf * 8] 224 | # ngf * 8, # encoder_8: [batch, 4, 4, ngf * 8] => [batch, 2, 2, ngf * 8] 225 | ] 226 | 227 | for out_channels in layer_specs: 228 | with tf.variable_scope("encoder_%d" % (len(layers) + 1)): 229 | rectified = lrelu(layers[-1], 0.2) 230 | # [batch, in_height, in_width, in_channels] => [batch, in_height/2, in_width/2, out_channels] 231 | convolved = gen_conv(rectified, out_channels) 232 | output = batchnorm(convolved) 233 | layers.append(output) 234 | 235 | layer_specs = [ 236 | # (ngf * 8, 0.5), # decoder_8: [batch, 1, 4, ngf * 8] => [batch, 2, 8, ngf * 8 * 2] 237 | (ngf * 8, 0.0), # decoder_7: [batch, 2, 8, ngf * 8 * 2] => [batch, 4, 16, ngf * 8 * 2] 238 | (ngf * 8, 0.0), # decoder_6: [batch, 4, 16, ngf * 8 * 2] => [batch, 8, 32, ngf * 8 * 2] 239 | (ngf * 8, 0.0), # decoder_5: [batch, 8, 32, ngf * 8 * 2] => [batch, 16, 64, ngf * 8 * 2] 240 | (ngf * 4, 0.0), # decoder_4: [batch, 16, 64, ngf * 8 * 2] => [batch, 32, 128, ngf * 4 * 2] 241 | (ngf * 2, 0.0), # decoder_3: [batch, 32, 128, ngf * 4 * 2] => [batch, 64, 256, ngf * 2 * 2] 242 | (ngf, 0.0), # decoder_2: [batch, 64, 256, ngf * 2 * 2] => [batch, 128, 512, ngf * 2 * 2] 243 | ] 244 | 245 | num_encoder_layers = len(layers) 246 | for decoder_layer, (out_channels, dropout) in enumerate(layer_specs): 247 | skip_layer = num_encoder_layers - decoder_layer - 1 248 | with tf.variable_scope("decoder_%d" % (skip_layer + 1)): 249 | if decoder_layer == 0: 250 | # first decoder layer doesn't have skip connections 251 | # since it is directly connected to the skip_layer 252 | input = layers[-1] 253 | else: 254 | input = tf.concat([layers[-1], layers[skip_layer]], axis=3) 255 | 256 | rectified = tf.nn.relu(input) 257 | # [batch, in_height, in_width, in_channels] => [batch, in_height*2, in_width*2, out_channels] 258 | output = gen_deconv(rectified, out_channels) 259 | output = batchnorm(output) 260 | 261 | if dropout > 0.0: 262 | output = tf.nn.dropout(output, keep_prob=1 - dropout) 263 | 264 | layers.append(output) 265 | 266 | # decoder_1: [batch, 128, 512, ngf * 2] => [batch, 256, 1024, generator_outputs_channels] 267 | with tf.variable_scope("decoder_1"): 268 | # input = tf.concat([layers[-1], layers[0]], axis=3) tf.random_normal_initializer(0, 0.02) 269 | rectified = tf.nn.relu(layers[-1]) 270 | # output = gen_deconv(rectified, generator_outputs_channels) 271 | output = tf.layers.conv2d_transpose(rectified, generator_outputs_channels, kernel_size=4, strides=(2, 2), 272 | padding="same", 273 | kernel_initializer=tf.zeros_initializer(), 274 | bias_initializer=tf.constant_initializer( 275 | np.concatenate( 276 | [np.zeros(generator_outputs_channels - 1, dtype=np.float32), 277 | np.ones(1, dtype=np.float32)], axis=0))) 278 | # output = tf.tanh(output) 279 | output = activational_layer(output) 280 | layers.append(output) 281 | 282 | return layers[-1] 283 | 284 | 285 | 286 | 287 | 288 | # def sample_within_bounds_xyz(signal, batch_index, x, y, z, channel_index): 289 | # ''' 290 | # :param signal: tf variable, shape = [batch, height, width, PlaneNum, channel] 291 | # :param x: numpy 292 | # :param y: numpy 293 | # :return: 294 | # ''' 295 | # 296 | # index = tf.stack([tf.reshape(batch_index, [-1]), tf.reshape(x, [-1]), tf.reshape(y, [-1]), 297 | # tf.reshape(z, [-1]), tf.reshape(channel_index, [-1])], axis=1) 298 | # 299 | # result = tf.gather_nd(signal, index) 300 | # 301 | # batch, height, width, channel = tf_shape(x, rank=4) 302 | # 303 | # sample = tf.reshape(result, [batch, height, width, channel]) 304 | # 305 | # return sample 306 | # 307 | # 308 | # def sample_bilinear_xyz(signal, batch_index, rx, ry, rz, channel_index): 309 | # ''' 310 | # :param signal: tensor_shape = [batch, sat_height, sat_width, heightPlaneNum, channel] 311 | # :param rx: tensor_shape = [batch, grd_height, grd_width, channel] 312 | # :param ry: tensor_shape = [batch, grd_height, grd_width, channel] 313 | # :param batch_index: tensor_shape = [batch, grd_height, grd_width, channel] 314 | # :param channel_index: tensor_shape = [batch, grd_height, grd_width, channel] 315 | # :return: 316 | # ''' 317 | # 318 | # signal_dim_x, signal_dim_y, signal_dim_z = signal.get_shape().as_list()[1:-1] 319 | # 320 | # # obtain four sample coordinates 321 | # ix0 = tf.maximum(tf.cast(rx, tf.int32), 0) 322 | # iy0 = tf.maximum(tf.cast(ry, tf.int32), 0) 323 | # iz0 = tf.maximum(tf.cast(rz, tf.int32), 0) 324 | # 325 | # ix1 = tf.minimum(ix0 + 1, signal_dim_x-1) 326 | # iy1 = tf.minimum(iy0 + 1, signal_dim_y-1) 327 | # iz1 = tf.minimum(iz0 + 1, signal_dim_z-1) 328 | # 329 | # # sample signal at each four positions 330 | # signal_000 = sample_within_bounds_xyz(signal, batch_index, ix0, iy0, iz0, channel_index) 331 | # signal_100 = sample_within_bounds_xyz(signal, batch_index, ix0, iy1, iz0, channel_index) 332 | # signal_010 = sample_within_bounds_xyz(signal, batch_index, ix1, iy0, iz0, channel_index) 333 | # signal_110 = sample_within_bounds_xyz(signal, batch_index, ix1, iy1, iz0, channel_index) 334 | # 335 | # signal_001 = sample_within_bounds_xyz(signal, batch_index, ix0, iy0, iz1, channel_index) 336 | # signal_101 = sample_within_bounds_xyz(signal, batch_index, ix0, iy1, iz1, channel_index) 337 | # signal_011 = sample_within_bounds_xyz(signal, batch_index, ix1, iy0, iz1, channel_index) 338 | # signal_111 = sample_within_bounds_xyz(signal, batch_index, ix1, iy1, iz1, channel_index) 339 | # 340 | # ix1 = tf.cast(ix1, tf.float32) 341 | # iy1 = tf.cast(iy1, tf.float32) 342 | # iz1 = tf.cast(iz1, tf.float32) 343 | # 344 | # fx00 = (ix1 - rx) * signal_100 + (rx - ix0) * signal_000 345 | # fx10 = (ix1 - rx) * signal_110 + (rx - ix0) * signal_010 346 | # fy0 = (iy1 - ry) * fx10 + (ry - iy0) * fx00 347 | # 348 | # fx01 = (ix1 - rx) * signal_101 + (rx - ix0) * signal_001 349 | # fx11 = (ix1 - rx) * signal_111 + (rx - ix0) * signal_011 350 | # fy1 = (iy1 - ry) * fx11 + (ry - iy0) * fx01 351 | # 352 | # fz = (iz1 - rz) * fy1 + (rz - iz0) * fy0 353 | # 354 | # return fz 355 | # 356 | # 357 | # 358 | # def MultiPlaneImagesAer2Grd_radius(signal, estimated_height, target_height, target_width, grd_height, max_height): 359 | # ''' 360 | # :param x: tf variable, x.shape=[batch, S, S, channel] 361 | # :param height: output height 362 | # :param width: output width 363 | # :param radius: shape = [batch, height, width, channel] its value is within the range of [0, S/2). 364 | # :return: 365 | # ''' 366 | # batch, S, _, channel = tf_shape(signal, 4) 367 | # PlaneNum = estimated_height.get_shape().as_list()[-1] # shape = [batch, S, S, PlaneNum] 368 | # 369 | # Voxel = tf.transpose(tf.stack([signal]*PlaneNum, axis=-1), [0, 1, 2, 4, 3]) # shape = [batch, S, S, PlaneNum, channel] 370 | # Voxel = tf.expand_dims(estimated_height, axis=-1) * Voxel # shape = [batch, S, S, PlaneNum, channel] 371 | # 372 | # f = 144/S 373 | # 374 | # b = tf.range(0, batch) 375 | # h = tf.range(0, target_height*2) 376 | # w = tf.range(0, target_width) 377 | # c = tf.range(0, channel) 378 | # 379 | # bb, hh, ww, cc = tf.meshgrid(b, h, w, c, indexing='ij') 380 | # 381 | # sinTheta = tf.sin(ww / target_width * np.pi * 2) 382 | # cosTheta = tf.cos(ww / target_width * np.pi * 2) 383 | # tanPhi = tf.tan(hh / (target_height * 2) * np.pi) 384 | # 385 | # ww = tf.cast(ww, tf.float32) 386 | # RadiusNum = int(signal.get_shape().as_list()[1] / 2) 387 | # 388 | # target_volume = [] 389 | # for r in range(1, RadiusNum): 390 | # # r = RadiusNum - i 391 | # x = S/2 + r * cosTheta 392 | # y = S/2 + r * sinTheta 393 | # z = safe_divide(r * f, tanPhi) 394 | # z = (z - grd_height)/(max_height - grd_height) * PlaneNum 395 | # 396 | # sample = sample_bilinear_xyz(Voxel, bb, x, y, z, cc) 397 | # target_volume.append(sample) 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | # def warp_pad_columns(x, m1=1, m2=1, n1=1, n2=1): 407 | # out = tf.concat([x[:, :, -n1:, :], x, x[:, :, :n2, :]], axis=2) 408 | # return tf.pad(out, [[0, 0], [m1, m2], [0, 0], [0, 0]]) 409 | # 410 | # 411 | # def discrim_conv_cir(batch_input, out_channels, stride): 412 | # padded_input = warp_pad_columns(batch_input, m1=1, m2=1, n1=1, n2=1) 413 | # return tf.layers.conv2d(padded_input, out_channels, kernel_size=4, strides=(stride, stride), padding="valid", 414 | # kernel_initializer=tf.random_normal_initializer(0, 0.02)) 415 | # 416 | # 417 | # def gen_conv_cir(batch_input, out_channels): 418 | # initializer = tf.random_normal_initializer(0, 0.02) 419 | # x = warp_pad_columns(batch_input, m1=1, m2=1, n1=1, n2=1) 420 | # return tf.layers.conv2d(x, out_channels, kernel_size=4, strides=(2, 2), padding="valid", kernel_initializer=initializer) 421 | # 422 | # 423 | # def gen_deconv_cir(batch_input, out_channels): 424 | # initializer = tf.random_normal_initializer(0, 0.02) 425 | # _, height, width, channel = batch_input.get_shape().as_list() 426 | # x = tf.image.resize_nearest_neighbor(batch_input, (2*height, 2*width)) 427 | # x = warp_pad_columns(x, m1=1, m2=1, n1=1, n2=1) 428 | # return tf.layers.conv2d(x, out_channels, kernel_size=3, strides=(1,1), padding="valid", kernel_initializer=initializer) 429 | -------------------------------------------------------------------------------- /geometry/Geometry.py: -------------------------------------------------------------------------------- 1 | import tensorflow.compat.v1 as tf 2 | tf.disable_v2_behavior() 3 | from utils import * 4 | import numpy as np 5 | import tensorflow_addons as tfa 6 | 7 | def encoder_decoder(generator_inputs, generator_outputs_channels, ngf=4, activational_layer=tf.nn.softmax): 8 | layers = [] 9 | 10 | # encoder_1: [batch, 512, 512, in_channels] => [batch, 256, 256, ngf] 11 | with tf.variable_scope("encoder_1"): 12 | output = gen_conv(generator_inputs, ngf) 13 | layers.append(output) 14 | 15 | layer_specs = [ 16 | ngf * 2, # encoder_2: [batch, 256, 256, ngf] => [batch, 128, 128, ngf * 2] 17 | ngf * 4, # encoder_3: [batch, 128, 128, ngf * 2] => [batch, 64, 64, ngf * 4] 18 | ngf * 8, # encoder_4: [batch, 64, 64, ngf * 4] => [batch, 32, 32, ngf * 8] 19 | ngf * 8, # encoder_5: [batch, 32, 32, ngf * 8] => [batch, 16, 16, ngf * 8] 20 | ngf * 8, # encoder_6: [batch, 16, 16, ngf * 8] => [batch, 8, 8, ngf * 8] 21 | ngf * 8, # encoder_7: [batch, 8, 8, ngf * 8] => [batch, 4, 4, ngf * 8] 22 | # ngf * 8, # encoder_8: [batch, 4, 4, ngf * 8] => [batch, 2, 2, ngf * 8] 23 | ] 24 | 25 | for out_channels in layer_specs: 26 | with tf.variable_scope("encoder_%d" % (len(layers) + 1)): 27 | rectified = lrelu(layers[-1], 0.2) 28 | # [batch, in_height, in_width, in_channels] => [batch, in_height/2, in_width/2, out_channels] 29 | convolved = gen_conv(rectified, out_channels) 30 | output = batchnorm(convolved) 31 | layers.append(output) 32 | 33 | layer_specs = [ 34 | # (ngf * 8, 0.5), # decoder_8: [batch, 1, 4, ngf * 8] => [batch, 2, 8, ngf * 8 * 2] 35 | (ngf * 8, 0.0), # decoder_7: [batch, 2, 8, ngf * 8 * 2] => [batch, 4, 16, ngf * 8 * 2] 36 | (ngf * 8, 0.0), # decoder_6: [batch, 4, 16, ngf * 8 * 2] => [batch, 8, 32, ngf * 8 * 2] 37 | (ngf * 8, 0.0), # decoder_5: [batch, 8, 32, ngf * 8 * 2] => [batch, 16, 64, ngf * 8 * 2] 38 | (ngf * 4, 0.0), # decoder_4: [batch, 16, 64, ngf * 8 * 2] => [batch, 32, 128, ngf * 4 * 2] 39 | (ngf * 2, 0.0), # decoder_3: [batch, 32, 128, ngf * 4 * 2] => [batch, 64, 256, ngf * 2 * 2] 40 | (ngf, 0.0), # decoder_2: [batch, 64, 256, ngf * 2 * 2] => [batch, 128, 512, ngf * 2 * 2] 41 | ] 42 | 43 | num_encoder_layers = len(layers) 44 | for decoder_layer, (out_channels, dropout) in enumerate(layer_specs): 45 | skip_layer = num_encoder_layers - decoder_layer - 1 46 | with tf.variable_scope("decoder_%d" % (skip_layer + 1)): 47 | # if decoder_layer == 0: 48 | # # first decoder layer doesn't have skip connections 49 | # # since it is directly connected to the skip_layer 50 | # input = layers[-1] 51 | # else: 52 | # input = tf.concat([layers[-1], layers[skip_layer]], axis=3) 53 | input = layers[-1] 54 | 55 | rectified = tf.nn.relu(input) 56 | # [batch, in_height, in_width, in_channels] => [batch, in_height*2, in_width*2, out_channels] 57 | output = gen_deconv(rectified, out_channels) 58 | output = batchnorm(output) 59 | 60 | if dropout > 0.0: 61 | output = tf.nn.dropout(output, keep_prob=1 - dropout) 62 | layers.append(output) 63 | 64 | # decoder_1: [batch, 128, 512, ngf * 2] => [batch, 256, 1024, generator_outputs_channels] 65 | with tf.variable_scope("decoder_1"): 66 | # input = tf.concat([layers[-1], layers[0]], axis=3) 67 | rectified = tf.nn.relu(layers[-1]) 68 | # output = gen_deconv(rectified, generator_outputs_channels) 69 | output = tf.layers.conv2d_transpose(rectified, generator_outputs_channels, kernel_size=4, strides=(2, 2), padding="same", 70 | kernel_initializer=tf.zeros_initializer(), 71 | bias_initializer=tf.constant_initializer( 72 | np.concatenate([np.zeros(generator_outputs_channels - 1, dtype=np.float32), 73 | np.ones(1, dtype=np.float32)], axis=0))) 74 | 75 | # output = tf.tanh(output) 76 | output = activational_layer(output*100) 77 | layers.append(output) 78 | 79 | return layers[-1] 80 | 81 | 82 | def geometry_transform(aer_imgs, estimated_height, target_height, target_width, mode, grd_height, max_height, 83 | method='column', geoout_type='image', dataset='CVUSA'): 84 | ''' 85 | :param aer_imgs: 86 | :param estimated_height: 87 | :param mode: if estimated_height.channel ==1, type belongs to {'hole', 'column'}; 88 | otherwise if estimated_height.channel>1, type belongs to {'radiusPlaneMethod', 'heightPlaneMethod'} 89 | The following two parameters are only needed if mode is 'radiusPlaneMethod'. 90 | :param method: select from {'column', 'point'}. 91 | 'column' means: for each point in overhead view, we poject it and the points under it to the grd view 92 | we use cusum to mimic this process 93 | 'point' means we only project the points in the overhead view image to the grd view. 94 | :param geoout_type: select from {'volume', 'image'}. 95 | :return: 96 | ''' 97 | # PlaneNum = estimated_height.get_shape().as_list()[-1] 98 | # if height_channel==1: 99 | if mode=='heightPlaneMethod': 100 | output = MultiPlaneImagesAer2Grd_height(aer_imgs, estimated_height, target_height, target_width, grd_height, 101 | max_height, method, geoout_type, dataset) 102 | elif mode=='radiusPlaneMethod': 103 | output = MultiPlaneImagesAer2Grd_radius(aer_imgs, estimated_height, target_height, target_width, 104 | grd_height, max_height, method, geoout_type, dataset) 105 | return output 106 | 107 | 108 | def MultiPlaneImagesAer2Grd_height(signal, estimated_height, target_height, target_width, grd_height=-2, max_height=30, 109 | method='column', geoout_type='image', dataset='CVUSA'): 110 | PlaneNum = estimated_height.get_shape().as_list()[-1] 111 | 112 | if method == 'column': 113 | estimated_height = tf.cumsum(estimated_height, axis=-1) 114 | # the maximum plane corresponds to grd plane 115 | batch, S, _, channel = tf_shape(signal, 4) 116 | H, W, C = signal.get_shape().as_list()[1:] 117 | assert (H==W) 118 | 119 | i = np.arange(0, (target_height*2)) 120 | j = np.arange(0, target_width) 121 | jj, ii = np.meshgrid(j, i) 122 | 123 | if dataset=='CVUSA': 124 | f = H/55 125 | elif dataset=='CVACT' or dataset=='CVACThalf': 126 | f = H/(50*206/256) 127 | elif dataset=='CVACTunaligned': 128 | f = H/50 129 | elif dataset=='OP': 130 | f = H/100 131 | 132 | # f = H/144 133 | 134 | tanii = np.tan(ii * np.pi / (target_height*2)) 135 | 136 | images_list = [] 137 | alphas_list = [] 138 | 139 | # images_list_volume = [] 140 | 141 | for i in range(PlaneNum): 142 | z = grd_height + (max_height-grd_height) * i/PlaneNum 143 | 144 | u_dup = -1 * np.ones([(target_height*2), target_width]) 145 | v_dup = -1 * np.ones([(target_height*2), target_width]) 146 | m = target_height 147 | 148 | v = S / 2. - f * z * tanii * np.sin(jj * 2 * np.pi / target_width) 149 | u = S / 2. + f * z * tanii * np.cos(jj * 2 * np.pi / target_width) 150 | 151 | if z < 0: 152 | u_dup[-m:, :] = u[-m:, :] 153 | v_dup[-m:, :] = v[-m:, :] 154 | else: 155 | u_dup[0:m, :] = u[0:m, :] 156 | v_dup[0:m:, :] = v[0:m:, :] 157 | 158 | n = int(target_height/2) 159 | 160 | uv = np.stack([v_dup[n:-n,...], u_dup[n:-n,...]], axis=-1) 161 | uv = uv.astype(np.float32) 162 | warp = tf.stack([uv]*batch, axis=0) 163 | 164 | # images_prob = tf.contrib.resampler.resampler(signal*estimated_height[..., i:i+1], warp) 165 | # images = tf.contrib.resampler.resampler(signal, warp) 166 | # alphas = tf.contrib.resampler.resampler(estimated_height[..., i:i + 1], warp) 167 | images = tfa.image.resampler(signal, warp) 168 | alphas = tfa.image.resampler(estimated_height[..., i:i + 1], warp) 169 | images_list.append(images) 170 | alphas_list.append(alphas) 171 | 172 | # images_list_volume.append(images_prob) 173 | 174 | if geoout_type == 'volume': 175 | 176 | return tf.concat([images_list[i]*alphas_list[i] for i in range(PlaneNum)], axis=-1) 177 | 178 | # return tf.concat(images_list, axis=-1) * tf.concat(alphas_list, axis=-1) # shape = [batch, target_height, target_width, channel*PlaneNum] 179 | 180 | elif geoout_type == 'image': 181 | for i in range(PlaneNum): 182 | rgb = images_list[i] 183 | a = alphas_list[i] 184 | if i == 0: 185 | output = rgb * a 186 | else: 187 | rgb_by_alpha = rgb * a 188 | output = rgb_by_alpha + output * (1 - a) 189 | 190 | return output # shape = [batch, target_height, target_width, channel] 191 | 192 | # batch_image = tf.stack(images_list, axis=-1) 193 | # 194 | # batch_mulplanes = tf.reshape(batch_image, [-1, target_height, target_width, C*PlaneNum]) 195 | # 196 | # return batch_mulplanes 197 | 198 | 199 | def MultiPlaneImagesAer2Grd_radius(signal, estimated_height, target_height, target_width, grd_height, max_height, 200 | method='column', geoout_type='image', dataset='CVUSA'): 201 | ''' 202 | This function first convert uv coordinate to polar coordinate, i.e., from overhead planes to cylinder coordinate, 203 | and then from cylinder coordinate to spherical coordinate 204 | :param signal: [batch, height, width, channel] image 205 | :param estimated_height: [batch, height, width, PlaneNume] 206 | :param target_height: height/phi direction 207 | :param target_width: azimuth direction 208 | :param grd_height: 209 | :param max_height: 210 | :param method: select from {'column', 'point'}. 211 | 'column' means: for each point in overhead view, we poject it and the points under it to the grd view 212 | we use cusum to mimic this process 213 | 'point' means we only project the points in the overhead view image to the grd view. 214 | :param out_type: select from {'volume', 'image'}. 215 | :return: 216 | ''' 217 | PlaneNum = estimated_height.get_shape().as_list()[-1] 218 | batch, height, width, channel = tf_shape(signal, rank=4) 219 | 220 | if method=='column': 221 | # estimated_height = tf.cumsum(estimated_height, axis=-1, reverse=True) 222 | # # the 0th plane corresponds to grd plane 223 | estimated_height = tf.cumsum(estimated_height, axis=-1) 224 | # the maximum plane corresponds to grd plane 225 | 226 | voxel = tf.transpose(tf.stack([signal]*PlaneNum, axis=-1), [0, 1, 2, 4, 3]) 227 | # * tf.expand_dims(estimated_height, axis=-1) 228 | voxel = tf.reshape(voxel, [batch, height, width, PlaneNum*channel]) 229 | 230 | ################### from overhead view uvz coordinate to cylinder pthetaz coordinate ######################### 231 | S = signal.get_shape().as_list()[1] 232 | radius = int(S//4) 233 | azimuth = target_width 234 | 235 | i = np.arange(0, radius) 236 | j = np.arange(0, azimuth) 237 | jj, ii = np.meshgrid(j, i) 238 | 239 | # if train_mode: 240 | # sx = np.random.uniform(-10, 10) 241 | # sy = np.random.uniform(-10, 10) 242 | # rx = np.minimum(S/2.-sx, S/2.+sx) 243 | # ry = np.minimum(S/2.-sy, S/2.+sy) 244 | # 245 | # y = (S / 2. + sx) - rx / radius * (radius - 1 - ii) * np.sin(2 * np.pi * jj / azimuth) 246 | # x = (S / 2. + sy) + ry / radius * (radius - 1 - ii) * np.cos(2 * np.pi * jj / azimuth) 247 | # 248 | # else: 249 | 250 | y = S / 2. - S / 2. / radius * (radius - 1 - ii) * np.sin(2 * np.pi * jj / azimuth) 251 | x = S / 2. + S / 2. / radius * (radius - 1 - ii) * np.cos(2 * np.pi * jj / azimuth) 252 | 253 | uv = np.stack([y, x], axis=-1) 254 | uv = uv.astype(np.float32) 255 | warp = tf.stack([uv] * batch, axis=0) 256 | 257 | # imgs = tf.contrib.resampler.resampler(voxel, warp) 258 | imgs = tfa.image.resampler(voxel, warp) 259 | imgs = tf.reshape(imgs, [batch, radius, azimuth, PlaneNum, channel]) # batch, radius, azimuth, PlaneNum, channel] 260 | # imgs = tf.transpose(imgs, [0, 3, 2, 1, 4])[:, ::-1, ...] 261 | # # shape = [batch, PlaneNum, azimuth, radius, channel] 262 | # # the maximum PlaneNum corresponds to ground plane 263 | # alpha = tf.contrib.resampler.resampler(estimated_height, warp)[..., ::-1] # batch, radius, azimuth, PlaneNum 264 | # # the maximum PlaneNum corresponds to ground plane 265 | # alpha = tf.transpose(alpha, [0, 3, 2, 1]) # shape = [batch, PlaneNum, azimuth, radius] 266 | imgs = tf.transpose(imgs, [0, 3, 2, 1, 4]) 267 | # shape = [batch, PlaneNum, azimuth, radius, channel] 268 | # the maximum PlaneNum corresponds to ground plane 269 | # alpha = tf.contrib.resampler.resampler(estimated_height, warp) # batch, radius, azimuth, PlaneNum 270 | alpha = tfa.image.resampler(estimated_height, warp) 271 | # the maximum PlaneNum corresponds to ground plane 272 | alpha = tf.transpose(alpha, [0, 3, 2, 1]) # shape = [batch, PlaneNum, azimuth, radius] 273 | 274 | if dataset == 'CVUSA': 275 | meters = 55 276 | elif dataset == 'CVACT' or dataset=='CVACThalf': 277 | meters = (50 * 206 / 256) 278 | elif dataset == 'CVACTunaligned': 279 | meters = 50 280 | elif dataset == 'OP': 281 | meters = 100 282 | 283 | ################### from cylinder pthetaz coordinate to grd phithetar coordinate ######################### 284 | if dataset=='CVUSA' or dataset=='CVACThalf': 285 | i = np.arange(0, target_height*2) 286 | j = np.arange(0, target_width) 287 | jj, ii = np.meshgrid(j, i) 288 | tanPhi = np.tan(ii / target_height / 2 * np.pi) 289 | tanPhi[np.where(tanPhi==0)] = 1e-16 290 | 291 | n = int(target_height//2) 292 | 293 | MetersPerRadius = meters / 2 / radius 294 | rgb_layers = [] 295 | a_layers = [] 296 | for r in range(0, radius): 297 | # from far to near 298 | z = (radius-r-1)*MetersPerRadius/tanPhi[n:-n] 299 | z = (PlaneNum-1) - (z - grd_height)/(max_height - grd_height) * (PlaneNum-1) 300 | theta = jj[n:-n] 301 | uv = np.stack([theta, z], axis=-1) 302 | uv = uv.astype(np.float32) 303 | warp = tf.stack([uv] * batch, axis=0) 304 | # rgb = tf.contrib.resampler.resampler(imgs[..., r, :], warp) 305 | rgb = tfa.image.resampler(imgs[..., r, :], warp) 306 | # a = tf.contrib.resampler.resampler(alpha[..., r:r + 1], warp) 307 | a = tfa.image.resampler(alpha[..., r:r+1], warp) 308 | 309 | rgb_layers.append(rgb) 310 | a_layers.append(a) 311 | 312 | else: 313 | i = np.arange(0, target_height) 314 | j = np.arange(0, target_width) 315 | jj, ii = np.meshgrid(j, i) 316 | tanPhi = np.tan(ii / target_height * np.pi) 317 | tanPhi[np.where(tanPhi == 0)] = 1e-16 318 | 319 | # n = int(target_height // 2) 320 | 321 | MetersPerRadius = meters / 2 / radius 322 | rgb_layers = [] 323 | a_layers = [] 324 | for r in range(0, radius): 325 | # from far to near 326 | z = (radius - r - 1) * MetersPerRadius / tanPhi 327 | z = (PlaneNum - 1) - (z - grd_height) / (max_height - grd_height) * (PlaneNum - 1) 328 | theta = jj 329 | uv = np.stack([theta, z], axis=-1) 330 | uv = uv.astype(np.float32) 331 | warp = tf.stack([uv] * batch, axis=0) 332 | # rgb = tf.contrib.resampler.resampler(imgs[..., r, :], warp) 333 | # a = tf.contrib.resampler.resampler(alpha[..., r:r + 1], warp) 334 | rgb = tfa.image.resampler(imgs[..., r, :], warp) 335 | a = tfa.image.resampler(alpha[..., r:r + 1], warp) 336 | 337 | rgb_layers.append(rgb) 338 | a_layers.append(a) 339 | 340 | if geoout_type=='volume': 341 | 342 | return tf.concat([rgb_layers[i]*a_layers[i] for i in range(radius)], axis=-1) 343 | 344 | # return tf.concat(rgb_layers[::-1], axis=-1) * tf.concat(a_layers[::-1], axis=-1) # shape = [batch, target_height, target_width, channel*PlaneNum] 345 | 346 | elif geoout_type=='image': 347 | for i in range(radius): 348 | rgb = rgb_layers[i] 349 | a = a_layers[i] 350 | if i==0: 351 | output = rgb * a 352 | else: 353 | rgb_by_alpha = rgb * a 354 | output = rgb_by_alpha + output * (1 - a) 355 | 356 | return output # shape = [batch, target_height, target_width, channel] 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | # def geometry_transform_hole(aer_imgs, estimated_height, target_height, target_width, grd_height=-2.5, max_height=47.5): 378 | # _, aer_size, _, heightPlaneNum = estimated_height.get_shape().as_list() 379 | # batch, _, _, channel = tf_shape(aer_imgs, 4) 380 | # 381 | # f = 144/aer_size 382 | # 383 | # assert heightPlaneNum==1 384 | # estimated_height = tf.squeeze(estimated_height) # shape = [batch, aer_size, aer_size] 385 | # 386 | # estimated_height = grd_height + (max_height - grd_height) * estimated_height 387 | # 388 | # i = np.arange(0, aer_size) 389 | # j = np.arange(0, aer_size) 390 | # jj, ii = np.meshgrid(j, i) 391 | # 392 | # radius = np.sqrt((ii - (aer_size / 2 - 0.5)) ** 2 + (jj - (aer_size / 2 - 0.5)) ** 2) 393 | # 394 | # Theta1 = tf.atan( 395 | # (ii[:, 0:int(aer_size / 2)] - (aer_size / 2 - 0.5)) / (jj[:, 0:int(aer_size / 2)] - (aer_size / 2 - 0.5))) + 0.5 * np.pi 396 | # Theta2 = tf.atan( 397 | # (ii[:, int(aer_size / 2):] - (aer_size / 2 - 0.5)) / (jj[:, int(aer_size / 2):] - (aer_size / 2 - 0.5))) + 1.5 * np.pi 398 | # Theta = tf.concat([Theta1, Theta2], axis=-1) 399 | # 400 | # Phimax = tf.atan2(radius, estimated_height*f) 401 | # Phimin = tf.atan2(radius, grd_height*f) 402 | # 403 | # Theta = Theta / 2 / np.pi * target_width # shape = [aer_size, aer_size] 404 | # Phimax = Phimax / np.pi * (target_height * 2) # shape = [aer_size, aer_size] 405 | # Phimin = Phimin / np.pi * (target_height * 2) # shape = [aer_size, aer_size] 406 | # 407 | # target = tf.zeros([batch, target_height*2, target_width, channel]) 408 | # 409 | # for rr in range(aer_size//2): 410 | # 411 | # r = aer_size//2 - rr 412 | # 413 | # indices = tf.where((radius > (r-1)) & (radius <= r)) # shape = [batch*num, 3] 3--> batch, height, width 414 | # 415 | # selected_Theta = tf.gather_nd(Theta, indices) # shape = [batch*num] 416 | # selected_Phimax = tf.gather_nd(Phimax, indices) # shape = [batch*num] 417 | # selected_Phimin = tf.gather_nd(Phimin, indices) # shape = [batch*num] 418 | # 419 | # rgb = tf.gather_nd(aer_imgs, indices) # shape = [batch*num, channel] channel = aer_imgs.shape[-1] 420 | # 421 | # iy = tf.minimum(tf.cast(tf.round(selected_Theta), tf.int64), target_width-1) # shape = [batch*num] 422 | # ix1 = tf.minimum(tf.cast(tf.round(selected_Phimax), tf.int64), target_height*2-1) # shape = [batch*num] 423 | # ix0 = tf.minimum(tf.cast(tf.round(selected_Phimin), tf.int64), target_height*2 - 1) # shape = [batch*num] 424 | # 425 | # num = tf_shape(iy, 1)//batch 426 | # bi = tf.reshape(tf.range(batch), [1, batch]) 427 | # bi = tf.tile(bi, [num, 1]) 428 | # bi = tf.reshape(bi, [-1]) # shape = [batch*num] 429 | # index = tf.stack([bi, ix1, iy], axis=-1) # shape = [batch*num, 3] 3-->batch, phi, theta 430 | # 431 | # assign = tf.assign(tf.gather_nd(target, index), rgb) 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | -------------------------------------------------------------------------------- /script3/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | # os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' 7 | # os.environ['CUDA_VISIBLE_DEVICES'] = '0' 8 | 9 | import sys 10 | sys.path.append('../') 11 | 12 | import tensorflow.compat.v1 as tf 13 | tf.disable_v2_behavior() 14 | import numpy as np 15 | import argparse 16 | import os 17 | import json 18 | 19 | import random 20 | import collections 21 | import math 22 | import time 23 | import PIL.Image as Image 24 | import scipy.io as scio 25 | 26 | from model import * 27 | 28 | parser = argparse.ArgumentParser() 29 | # parser.add_argument("--input_dir", help="path to folder containing images", default='facades/train') 30 | parser.add_argument("--dataset", help="dataset", default='CVACT') 31 | parser.add_argument("--mode", choices=["train", "test", "export"], default="train") 32 | parser.add_argument("--output_dir", help="where to put output files", default='pix2pix_perceploss') 33 | parser.add_argument("--seed", type=int) 34 | parser.add_argument("--checkpoint", help="directory with checkpoint to resume training from or use for testing") 35 | 36 | # parser.add_argument("--max_steps", type=int, help="number of training steps (0 to disable)") 37 | parser.add_argument("--start_epochs", type=int, default=0, help="number of training epochs") 38 | parser.add_argument("--max_epochs", type=int, default=35, help="number of training epochs") 39 | parser.add_argument("--summary_freq", type=int, default=100, help="update summaries every summary_freq steps") 40 | parser.add_argument("--progress_freq", type=int, default=50, help="display progress every progress_freq steps") 41 | parser.add_argument("--trace_freq", type=int, default=0, help="trace execution every trace_freq steps") 42 | parser.add_argument("--display_freq", type=int, default=0, help="write current training images every display_freq steps") 43 | parser.add_argument("--save_freq", type=int, default=5000, help="save model every save_freq steps, 0 to disable") 44 | 45 | parser.add_argument("--separable_conv", action="store_true", help="use separable convolutions in the generator") 46 | parser.add_argument("--aspect_ratio", type=float, default=1.0, help="aspect ratio of output images (width/height)") 47 | parser.add_argument("--lab_colorization", action="store_true", help="split input image into brightness (A) and color (B)") 48 | parser.add_argument("--batch_size", type=int, default=4, help="number of images in batch") 49 | parser.add_argument("--which_direction", type=str, default="AtoG", choices=["AtoG", "GtoA"]) 50 | parser.add_argument("--ngf", type=int, default=64, help="number of generator filters in first conv layer") 51 | parser.add_argument("--ndf", type=int, default=64, help="number of discriminator filters in first conv layer") 52 | parser.add_argument("--scale_size", type=int, default=286, help="scale images to this size before cropping to 256x256") 53 | parser.add_argument("--flip", dest="flip", action="store_true", help="flip images horizontally") 54 | parser.add_argument("--no_flip", dest="flip", action="store_false", help="don't flip images horizontally") 55 | parser.set_defaults(flip=True) 56 | parser.add_argument("--lr", type=float, default=0.0002, help="initial learning rate for adam") 57 | parser.add_argument("--beta1", type=float, default=0.5, help="momentum term of adam") 58 | 59 | parser.add_argument("--inputs_type", choices=["original", "geometry", "polar", "tanpolar"], default="geometry") 60 | 61 | parser.add_argument("--gan_weight", type=float, default=1.0, help="weight on GAN term for generator gradient") 62 | parser.add_argument("--l1_weight_grd", type=float, default=100.0, help="weight on GAN term for generator gradient") 63 | parser.add_argument("--l1_weight_aer", type=float, default=0.0, help="weight on L1 term for generator gradient") 64 | parser.add_argument("--perceptual_weight_grd", type=float, default=0.0, help="weight on GAN term for generator gradient") 65 | parser.add_argument("--perceptual_weight_aer", type=float, default=0.0, help="weight on GAN term for generator gradient") 66 | 67 | parser.add_argument("--heightPlaneNum", type=int, default=1, help="weight on GAN term for generator gradient") 68 | parser.add_argument("--radiusPlaneNum", type=int, default=32, help="weight on GAN term for generator gradient") 69 | parser.add_argument("--height_mode", choices=['radiusPlaneMethod', 'heightPlaneMethod'], default='radiusPlaneMethod') 70 | # Only when 'height_mode' is 'radiusPlaneMethod', the following two parameters are required. Otherwise not. 71 | parser.add_argument("--method", choices=['column', 'point'], default='column') 72 | parser.add_argument("--geoout_type", choices=['volume', 'image'], default='image') 73 | 74 | parser.add_argument("--finalout_type", choices=['image', 'rgba', 'fgbg'], default='image') 75 | 76 | parser.add_argument("--skip", type=int, default=0, help="use skip connection or not") 77 | 78 | 79 | # export options 80 | parser.add_argument("--output_filetype", default="png", choices=["png", "jpeg"]) 81 | a = parser.parse_args() 82 | 83 | EPS = 1e-12 84 | CROP_SIZE = 256 85 | 86 | 87 | if a.inputs_type != 'geometry': 88 | if a.finalout_type == 'image': 89 | nameStr = a.inputs_type + '_' + a.finalout_type + \ 90 | '_L1Grd_' + str(a.l1_weight_grd) + '_PerGrd_' + str(a.perceptual_weight_grd) + \ 91 | '_skip_' + str(a.skip) 92 | else: 93 | nameStr = a.inputs_type + '_' + a.finalout_type + \ 94 | '_L1Grd_' + str(a.l1_weight_grd) + '_PerGrd_' + str(a.perceptual_weight_grd) + \ 95 | '_L1Aer_' + str(a.l1_weight_aer) + '_PerAer_' + str(a.perceptual_weight_aer) + \ 96 | '_skip_' + str(a.skip) 97 | else: 98 | 99 | if a.height_mode == 'heightPlaneMethod': 100 | if a.finalout_type == 'image': 101 | nameStr = a.inputs_type + '_' + a.height_mode + '_' + str(a.heightPlaneNum) + '_' + \ 102 | a.method + '_' + a.geoout_type + '_' + \ 103 | a.finalout_type + \ 104 | '_L1Grd_' + str(a.l1_weight_grd) + '_PerGrd_' + str(a.perceptual_weight_grd) + \ 105 | '_skip_' + str(a.skip) 106 | else: 107 | nameStr = a.inputs_type + '_' + a.height_mode + '_' + str(a.heightPlaneNum) + '_' + \ 108 | a.method + '_' + a.geoout_type + '_' + \ 109 | a.finalout_type + \ 110 | '_L1Grd_' + str(a.l1_weight_grd) + '_PerGrd_' + str(a.perceptual_weight_grd) + \ 111 | '_L1Aer_' + str(a.l1_weight_aer) + '_PerAer_' + str(a.perceptual_weight_aer) + \ 112 | '_skip_' + str(a.skip) 113 | elif a.height_mode == 'radiusPlaneMethod': 114 | if a.finalout_type == 'image': 115 | nameStr = a.inputs_type + '_' + a.height_mode + '_' + str(a.heightPlaneNum) + '_' + \ 116 | a.method + '_' + a.geoout_type + '_' + \ 117 | a.finalout_type + \ 118 | '_L1Grd_' + str(a.l1_weight_grd) + '_PerGrd_' + str(a.perceptual_weight_grd) + \ 119 | '_skip_' + str(a.skip) 120 | else: 121 | nameStr = a.inputs_type + '_' + a.height_mode + '_' + str(a.heightPlaneNum) + '_' + \ 122 | a.method + '_' + a.geoout_type + '_' + \ 123 | a.finalout_type + \ 124 | '_L1Grd_' + str(a.l1_weight_grd) + '_PerGrd_' + str(a.perceptual_weight_grd) + \ 125 | '_L1Aer_' + str(a.l1_weight_aer) + '_PerAer_' + str(a.perceptual_weight_aer) + \ 126 | '_skip_' + str(a.skip) 127 | 128 | 129 | def save_images(fetches, step=None): 130 | image_dir = os.path.join('./GeneratedData/', a.dataset, nameStr, 'image') 131 | if not os.path.exists(image_dir): 132 | os.makedirs(image_dir) 133 | height_dir = os.path.join('./GeneratedData/', a.dataset, nameStr, 'height_distribution') 134 | if not os.path.exists(height_dir): 135 | os.makedirs(height_dir) 136 | geotrans_dir = os.path.join('./GeneratedData/', a.dataset, nameStr, 'geotrans') 137 | if not os.path.exists(geotrans_dir): 138 | os.makedirs(geotrans_dir) 139 | 140 | filesets = [] 141 | for i, in_path in enumerate(fetches["paths"]): 142 | name, _ = os.path.splitext(os.path.basename(in_path.decode("utf8"))) 143 | fileset = {"name": name, "step": step} 144 | for kind in ["outputs"]: 145 | filename = name + ".png" 146 | if step is not None: 147 | filename = "%08d-%s" % (step, filename) 148 | fileset[kind] = filename 149 | out_path = os.path.join(image_dir, filename) 150 | contents = fetches[kind][i] 151 | with open(out_path, "wb") as f: 152 | f.write(contents) 153 | for kind in ["generator_inputs"]: 154 | filename = name + ".png" 155 | if step is not None: 156 | filename = "%08d-%s" % (step, filename) 157 | fileset[kind] = filename 158 | out_path = os.path.join(geotrans_dir, filename) 159 | contents = fetches[kind][i] 160 | with open(out_path, "wb") as f: 161 | f.write(contents) 162 | for kind in ["estimated_height"]: 163 | filename = name + ".png" 164 | if step is not None: 165 | filename = "%08d-%s" % (step, filename) 166 | fileset[kind] = filename 167 | out_path = os.path.join(height_dir, filename) 168 | # contents = cmap[fetches[kind][i]] 169 | # # contents = (fetches[kind][i]/2.*255.).astype(np.uint8) 170 | # contents = Image.fromarray(contents) 171 | # contents.save(out_path) 172 | scio.savemat(out_path.replace('png','mat'), {'height': fetches[kind][i]}) 173 | 174 | # with open(out_path, "wb") as f: 175 | # f.write(contents) 176 | filesets.append(fileset) 177 | return filesets 178 | 179 | 180 | def main(): 181 | if a.seed is None: 182 | a.seed = random.randint(0, 2**31 - 1) 183 | 184 | with tf.Graph().as_default(): 185 | 186 | tf.set_random_seed(a.seed) 187 | # tf.random.set_seed(a.seed) 188 | np.random.seed(a.seed) 189 | random.seed(a.seed) 190 | 191 | cmap = np.load('../cmap.npy') 192 | print(cmap.shape) 193 | 194 | output_dir = os.path.join(a.dataset, nameStr, 'aer') 195 | 196 | if not os.path.exists(output_dir): 197 | os.makedirs(output_dir) 198 | 199 | if a.mode == "test" or a.mode == "export": 200 | # if a.checkpoint is None: 201 | # raise Exception("checkpoint required for test mode") 202 | 203 | # load some options from the checkpoint 204 | checkpoint_dir = os.path.join(a.dataset, nameStr, 'aer') 205 | options = {"which_direction", "ngf", "ndf", "lab_colorization"} 206 | with open(os.path.join(checkpoint_dir, "options.json")) as f: 207 | for key, val in json.loads(f.read()).items(): 208 | if key in options: 209 | print("loaded", key, "=", val) 210 | setattr(a, key, val) 211 | # disable these features in test mode 212 | a.scale_size = CROP_SIZE 213 | a.flip = False 214 | 215 | for k, v in a._get_kwargs(): 216 | print(k, "=", v) 217 | 218 | with open(os.path.join(output_dir, "options.json"), "w") as f: 219 | f.write(json.dumps(vars(a), sort_keys=True, indent=4)) 220 | 221 | if a.dataset=='CVUSA': 222 | from load_data.load_data_cvusa import load_examples 223 | elif a.dataset=='CVACT': 224 | from load_data.load_data_cvact import load_examples 225 | elif a.dataset=='CVACThalf': 226 | from load_data.load_data_cvact_half import load_examples 227 | elif a.dataset=='CVACTunaligned': 228 | from load_data.load_data_cvact_unaligned import load_examples 229 | elif a.dataset=='OP': 230 | from load_data.load_data_op import load_examples 231 | 232 | examples = load_examples(a.mode, a.batch_size) 233 | print("examples count = %d" % examples.count) 234 | 235 | if a.inputs_type == 'original': 236 | inputs = examples.aer 237 | elif a.inputs_type == 'polar': 238 | inputs = examples.polar 239 | elif a.inputs_type == 'tanpolar': 240 | inputs = examples.tanpolar 241 | else: 242 | inputs = examples.aer 243 | 244 | targets = examples.pano 245 | ref_images = examples.tanpolar 246 | 247 | # inputs and targets are [batch_size, height, width, channels] 248 | model = create_model(inputs, targets, ref_images, a) 249 | 250 | inputs = deprocess(inputs) 251 | targets = deprocess(targets) 252 | outputs = deprocess(model.outputs) 253 | converted_generator_inputs = deprocess(model.generator_inputs) 254 | 255 | def convert(image): 256 | if a.aspect_ratio != 1.0: 257 | # upscale to correct aspect ratio 258 | size = [CROP_SIZE, int(round(CROP_SIZE * a.aspect_ratio))] 259 | image = tf.image.resize_images(image, size=size, method=tf.image.ResizeMethod.BICUBIC) 260 | 261 | return tf.image.convert_image_dtype(image, dtype=tf.uint8, saturate=True) 262 | 263 | # reverse any processing on images so they can be written to disk or displayed to user 264 | with tf.name_scope("convert_inputs"): 265 | converted_inputs = convert(inputs) 266 | 267 | with tf.name_scope("convert_targets"): 268 | converted_targets = convert(targets) 269 | 270 | with tf.name_scope("convert_outputs"): 271 | converted_outputs = convert(outputs) 272 | 273 | with tf.name_scope("convert_generator_inputs"): 274 | converted_generator_inputs = convert(converted_generator_inputs) 275 | 276 | # with tf.name_scope("convert_estimated_height"): 277 | # converted_estimated_height = convert(model.estimated_height) 278 | 279 | with tf.name_scope("encode_images"): 280 | display_fetches = { 281 | "paths": examples.paths, 282 | "inputs": tf.map_fn(tf.image.encode_png, converted_inputs, dtype=tf.string, name="input_pngs"), 283 | "targets": tf.map_fn(tf.image.encode_png, converted_targets, dtype=tf.string, name="target_pngs"), 284 | "outputs": tf.map_fn(tf.image.encode_png, converted_outputs, dtype=tf.string, name="output_pngs"), 285 | "generator_inputs": tf.map_fn(tf.image.encode_png, converted_generator_inputs, dtype=tf.string, name="geometry_transfer_pngs"), 286 | # "generator_inputs": converted_generator_inputs, 287 | "estimated_height": model.estimated_height, 288 | # "height": tf.map_fn(tf.image.encode_png, converted_estimated_height, dtype=tf.string, 289 | # name="height_maps"), 290 | 291 | } 292 | 293 | # summaries 294 | with tf.name_scope("inputs_summary"): 295 | tf.summary.image("inputs", converted_inputs) 296 | 297 | with tf.name_scope("targets_summary"): 298 | tf.summary.image("targets", converted_targets) 299 | 300 | with tf.name_scope("outputs_summary"): 301 | tf.summary.image("outputs", converted_outputs) 302 | 303 | with tf.name_scope("generator_inputs_summary"): 304 | tf.summary.image("generator_inputs", converted_generator_inputs) 305 | 306 | with tf.name_scope("estimated_height_summary"): 307 | tf.summary.image('estimated_height', tf.argmax(model.estimated_height, axis=-1)[..., None]/64) 308 | # tf.summary.image("predict_fake", tf.image.convert_image_dtype(tf.expand_dims(model.estimated_height/32, axis=-1), dtype=tf.uint8)) 309 | 310 | tf.summary.scalar("discriminator_loss", model.discrim_loss) 311 | tf.summary.scalar("generator_loss_GAN", model.gen_loss_GAN) 312 | tf.summary.scalar("generator_loss_L1", model.gen_loss_L1) 313 | tf.summary.scalar("gen_loss_perceptual", model.gen_loss_perceptual) 314 | 315 | for var in tf.trainable_variables(): 316 | tf.summary.histogram(var.op.name + "/values", var) 317 | 318 | for grad, var in model.discrim_grads_and_vars + model.gen_grads_and_vars: 319 | tf.summary.histogram(var.op.name + "/gradients", grad) 320 | 321 | with tf.name_scope("parameter_count"): 322 | parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()]) 323 | 324 | saver = tf.train.Saver(max_to_keep=1) 325 | 326 | # t_vars = tf.trainable_variables() 327 | # h_vars = [] 328 | # for var in t_vars: 329 | # if 'height_estimation' in var.op.name: 330 | # h_vars.append(var) 331 | # print(len(h_vars)) 332 | # print(h_vars[-1]) 333 | # print(h_vars[-2].op.name) 334 | # print(h_vars[-1].op.name) 335 | 336 | 337 | logdir = output_dir if (a.trace_freq > 0 or a.summary_freq > 0) else None 338 | sv = tf.train.Supervisor(logdir=logdir, save_summaries_secs=0, saver=None) 339 | with sv.managed_session() as sess: 340 | print("parameter_count =", sess.run(parameter_count)) 341 | 342 | # t_vars = tf.trainable_variables() 343 | # v_1, v_2 = sess.run([h_vars[-1], h_vars[-2]]) 344 | # print(v_1) 345 | # print(v_2) 346 | 347 | if a.checkpoint is not None or a.mode == 'test': 348 | # if a.mode == "test": 349 | print("loading model from checkpoint") 350 | checkpoint_dir = os.path.join(a.dataset, nameStr, 'aer') 351 | checkpoint = tf.train.latest_checkpoint(checkpoint_dir) 352 | saver.restore(sess, checkpoint) 353 | 354 | global_step_from_restore = sess.run(sv.global_step) 355 | start_epoch = int(global_step_from_restore / examples.steps_per_epoch) 356 | print('====================') 357 | print(global_step_from_restore, start_epoch) 358 | print('====================') 359 | 360 | else: 361 | start_epoch = 0 362 | 363 | # max_steps = 2**32 364 | # if a.max_epochs is not None: 365 | max_steps = examples.steps_per_epoch * a.max_epochs 366 | start_steps = examples.steps_per_epoch * start_epoch 367 | # if a.max_steps is not None: 368 | # max_steps = a.max_steps 369 | 370 | if a.mode == "test": 371 | # testing 372 | # at most, process the test data once 373 | start = time.time() 374 | max_steps = min(examples.steps_per_epoch, max_steps) 375 | for step in range(max_steps): 376 | results = sess.run(display_fetches) 377 | filesets = save_images(results) 378 | for i, f in enumerate(filesets): 379 | print("evaluated image", f["name"]) 380 | # index_path = append_index(filesets) 381 | # print("wrote index at", index_path) 382 | print("rate", (time.time() - start) / max_steps) 383 | else: 384 | # training 385 | start = time.time() 386 | 387 | for step in range(start_steps, max_steps): 388 | def should(freq): 389 | return freq > 0 and ((step + 1) % freq == 0 or step == max_steps - 1) 390 | 391 | options = None 392 | run_metadata = None 393 | if should(a.trace_freq): 394 | options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) 395 | run_metadata = tf.RunMetadata() 396 | 397 | fetches = { 398 | "train": model.train, 399 | "global_step": sv.global_step, 400 | } 401 | 402 | if should(a.progress_freq): 403 | fetches["discrim_loss"] = model.discrim_loss 404 | fetches["gen_loss_GAN"] = model.gen_loss_GAN 405 | fetches["gen_loss_L1"] = model.gen_loss_L1 406 | fetches["gen_loss_perceptual"] = model.gen_loss_perceptual 407 | 408 | if should(a.summary_freq): 409 | fetches["summary"] = sv.summary_op 410 | 411 | if should(a.display_freq): 412 | fetches["display"] = display_fetches 413 | 414 | results = sess.run(fetches, options=options, run_metadata=run_metadata) 415 | # geo_trans = sess.run(converted_generator_inputs, options=options, run_metadata=run_metadata) 416 | # print(geo_trans.shape) 417 | # for i in range(0, a.batch_size): 418 | # img = Image.fromarray(geo_trans[i]) 419 | # img.save('geotrans_' + str(i) + '.png') 420 | 421 | if should(a.summary_freq): 422 | print("recording summary") 423 | sv.summary_writer.add_summary(results["summary"], results["global_step"]) 424 | 425 | height = sess.run(model.estimated_height, options=options, run_metadata=run_metadata) 426 | height = np.argmax(height, axis=-1) 427 | # scio.savemat('height.mat', {'height': height}) 428 | for b in range(0, a.batch_size): 429 | img = cmap[height[b].squeeze()] 430 | img = Image.fromarray(img) 431 | img.save(str(b)+'height.png') 432 | 433 | if should(a.trace_freq): 434 | print("recording trace") 435 | sv.summary_writer.add_run_metadata(run_metadata, "step_%d" % results["global_step"]) 436 | 437 | if should(a.progress_freq): 438 | # global_step will have the correct step count if we resume from a checkpoint 439 | train_epoch = math.ceil(results["global_step"] / examples.steps_per_epoch) 440 | train_step = (results["global_step"] - 1) % examples.steps_per_epoch + 1 441 | rate = (step - start_steps + 1) * a.batch_size / (time.time() - start) 442 | remaining = (max_steps - step) * a.batch_size / rate 443 | print("progress epoch %d step %d image/sec %0.1f remaining %dm" % (train_epoch, train_step, rate, remaining / 60)) 444 | print("discrim_loss", results["discrim_loss"]) 445 | print("gen_loss_GAN", results["gen_loss_GAN"]) 446 | print("gen_loss_L1", results["gen_loss_L1"]) 447 | print("gen_loss_perceptual", results["gen_loss_perceptual"]) 448 | 449 | if should(examples.steps_per_epoch): 450 | # if should(50): 451 | print("saving model") 452 | saver.save(sess, os.path.join(output_dir, "model"), global_step=sv.global_step) 453 | 454 | if sv.should_stop(): 455 | break 456 | 457 | 458 | main() 459 | 460 | --------------------------------------------------------------------------------