├── framework.png
├── VGG
    ├── 0000011.jpg
    ├── 0000041.jpg
    ├── perceptual_loss_test.py
    ├── perceptual_loss.py
    └── vgg.py
├── script3
    ├── run1.sh
    ├── run.sh
    ├── model22.py
    ├── model23.py
    ├── model.py
    ├── baseline22.py
    ├── baseline23.py
    └── main.py
├── evaluation_metrics
    ├── run.sh
    ├── metrics_tf.py
    └── metrics_tf_cvact.py
├── README.md
├── geometry
    ├── projector.py
    ├── test_geometry.py
    ├── utils.py
    └── Geometry.py
└── load_data
    ├── load_data_op.py
    ├── load_data_cvusa.py
    ├── load_data_cvact.py
    ├── load_data_cvact_unaligned.py
    └── load_data_cvact_half.py


/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YujiaoShi/Sat2StrPanoramaSynthesis/HEAD/framework.png


--------------------------------------------------------------------------------
/VGG/0000011.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YujiaoShi/Sat2StrPanoramaSynthesis/HEAD/VGG/0000011.jpg


--------------------------------------------------------------------------------
/VGG/0000041.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YujiaoShi/Sat2StrPanoramaSynthesis/HEAD/VGG/0000041.jpg


--------------------------------------------------------------------------------
/VGG/perceptual_loss_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
 3 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
 4 | 
 5 | 
 6 | import tensorflow as tf
 7 | from VGG.vgg import build_vgg19
 8 | import cv2
 9 | import numpy as np
10 | 
11 | x = np.arange(0, 224*224*3).reshape((1, 224, 224, 3))/(224*224*3)*255
12 | 
13 | x1 = tf.constant(x)
14 | 
15 | vgg_model_file = './imagenet-vgg-verydeep-19.mat'
16 | 
17 | net = build_vgg19(x1, vgg_model_file)
18 | 
19 | a = 1
20 | 
21 | #
22 | # img1 = cv2.resize(cv2.imread('0000011.jpg'), (256, 256)).astype(np.float32)[np.newaxis,...]
23 | # img2 = cv2.resize(cv2.imread('0000011.jpg'), (256, 256)).astype(np.float32)[np.newaxis,...]
24 | #
25 | # real_img = tf.constant(img1)
26 | # fake_img = tf.constant(img2)
27 | #
28 | # def compute_error(real, fake):
29 | #     return tf.reduce_mean(tf.abs(fake - real))
30 | # vgg_model_file = './imagenet-vgg-verydeep-19.mat'
31 | #
32 | # vgg_real = build_vgg19(real_img, vgg_model_file)
33 | # vgg_fake = build_vgg19(fake_img, vgg_model_file)
34 | #
35 | # p0 = compute_error(vgg_real['input'], vgg_fake['input'])
36 | # p1 = compute_error(vgg_real['conv1_2'], vgg_fake['conv1_2']) / 2.6
37 | # p2 = compute_error(vgg_real['conv2_2'], vgg_fake['conv2_2']) / 4.8
38 | # p3 = compute_error(vgg_real['conv3_2'], vgg_fake['conv3_2']) / 3.7
39 | # p4 = compute_error(vgg_real['conv4_2'], vgg_fake['conv4_2']) / 5.6
40 | # p5 = compute_error(vgg_real['conv5_2'], vgg_fake['conv5_2']) * 10 / 1.5
41 | # total_loss = p0 + p1 + p2 + p3 + p4 + p5
42 | #
43 | # sess = tf.Session()
44 | #
45 | # loss = sess.run(total_loss)


--------------------------------------------------------------------------------
/script3/run1.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACT --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 1 #--mode test --checkpoint tt
 3 | CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVUSA --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 64 #--mode test --checkpoint tt
 4 | 
 5 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACT --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 0 --heightPlaneNum 32 --mode test --checkpoint tt
 6 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACT --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 0 --heightPlaneNum 64 --mode test --checkpoint tt
 7 | #
 8 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACT --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 1 --heightPlaneNum 32 --mode test --checkpoint tt
 9 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACT --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 1 --heightPlaneNum 64 --mode test --checkpoint tt
10 | #
11 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACT --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 1 --heightPlaneNum 32 --mode test --checkpoint tt
12 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACT --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 1 --heightPlaneNum 64 --mode test --checkpoint tt
13 | 
14 | 
15 | 
16 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACTunaligned --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 1 --heightPlaneNum 1 # --mode test --checkpoint tt
17 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACTunaligned --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 1 --heightPlaneNum 64 #--mode test --checkpoint tt
18 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACTunaligned --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 1 --heightPlaneNum 1 #--mode test --checkpoint tt
19 | #CUDA_VISIBLE_DEVICES=1 python main.py --dataset CVACTunaligned --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 1 --heightPlaneNum 64 #--mode test --checkpoint tt
20 | 


--------------------------------------------------------------------------------
/script3/run.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVACT --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 64 --mode test --checkpoint tt
 3 | CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 1 #--mode test --checkpoint tt
 4 | 
 5 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 0 --heightPlaneNum 32 --mode test --checkpoint tt
 6 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 0 --heightPlaneNum 64 --mode test --checkpoint tt
 7 | #
 8 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 1 --heightPlaneNum 32 --mode test --checkpoint tt
 9 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 1 --heightPlaneNum 64 --mode test --checkpoint tt
10 | #
11 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 1 --heightPlaneNum 32 --mode test --checkpoint tt
12 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 1 --heightPlaneNum 64 --mode test --checkpoint tt
13 | 
14 | 
15 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVACTunaligned --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 1 # --mode test --checkpoint tt
16 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVACTunaligned --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 64 #--mode test --checkpoint tt
17 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVACTunaligned --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 0 --heightPlaneNum 1 #--mode test --checkpoint tt
18 | #CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVACTunaligned --l1_weight_grd 100 --perceptual_weight_grd 0 --skip 0 --heightPlaneNum 64 #--mode test --checkpoint tt
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/VGG/perceptual_loss.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import tensorflow as tf
 3 | from .vgg import build_vgg19
 4 | 
 5 | 
 6 | 
 7 | def compute_error(real, fake):
 8 |     return tf.reduce_mean(tf.abs(fake - real))
 9 | 
10 | 
11 | def perceptual_loss(real_img, fake_img):
12 |     real_img = (real_img+1.)/2. * 255.
13 |     fake_img = (fake_img+1.)/2. * 255.
14 |     vgg_model_file = '../VGG/imagenet-vgg-verydeep-19.mat'
15 | 
16 |     vgg_real = build_vgg19(real_img, vgg_model_file)
17 |     vgg_fake = build_vgg19(fake_img, vgg_model_file)
18 | 
19 |     p0 = compute_error(vgg_real['input'], vgg_fake['input'])
20 |     p1 = compute_error(vgg_real['conv1_2'], vgg_fake['conv1_2']) / 2.6
21 |     p2 = compute_error(vgg_real['conv2_2'], vgg_fake['conv2_2']) / 4.8
22 |     p3 = compute_error(vgg_real['conv3_2'], vgg_fake['conv3_2']) / 3.7
23 |     p4 = compute_error(vgg_real['conv4_2'], vgg_fake['conv4_2']) / 5.6
24 |     p5 = compute_error(vgg_real['conv5_2'], vgg_fake['conv5_2']) * 10 / 1.5
25 |     total_loss = p0 + p1 + p2 + p3 + p4 + p5
26 | 
27 |     return total_loss
28 | 
29 | 
30 | def perceptual_loss_n(real_img, fake_imgs):
31 | 
32 |     vgg_model_file = '../VGG/imagenet-vgg-verydeep-19.mat'
33 | 
34 |     real_img = (real_img + 1.) / 2. * 255.
35 |     vgg_real = build_vgg19(real_img, vgg_model_file)
36 | 
37 |     loss = []
38 | 
39 |     for fake in fake_imgs:
40 |         fake = (fake + 1.) / 2. * 255.
41 | 
42 |         vgg_fake = build_vgg19(fake, vgg_model_file)
43 | 
44 |         p0 = compute_error(vgg_real['input'], vgg_fake['input'])
45 |         p1 = compute_error(vgg_real['conv1_2'], vgg_fake['conv1_2']) / 2.6
46 |         p2 = compute_error(vgg_real['conv2_2'], vgg_fake['conv2_2']) / 4.8
47 |         p3 = compute_error(vgg_real['conv3_2'], vgg_fake['conv3_2']) / 3.7
48 |         p4 = compute_error(vgg_real['conv4_2'], vgg_fake['conv4_2']) / 5.6
49 |         p5 = compute_error(vgg_real['conv5_2'], vgg_fake['conv5_2']) * 10 / 1.5
50 | 
51 |         loss.append(p0 + p1 + p2 + p3 + p4 + p5)
52 | 
53 |     total_loss = tf.stack(loss)
54 |     min_loss = tf.reduce_min(total_loss)
55 | 
56 |     return min_loss
57 | 
58 | 


--------------------------------------------------------------------------------
/evaluation_metrics/run.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../stoa/GeneratedData/CVUSA/pix2pix1_aer_L1Grd_100.0_PerGrd_0.0/
 3 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../stoa/GeneratedData/CVUSA/pix2pix1_aer_L1Grd_0.0_PerGrd_1.0/
 4 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../stoa/GeneratedData/CVUSA/pix2pix_aer_L1Grd_100.0_PerGrd_0.0/
 5 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../stoa/GeneratedData/CVUSA/pix2pix_aer_L1Grd_0.0_PerGrd_1.0/
 6 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../script3/GeneratedData/CVUSA/geometry_radiusPlaneMethod_1_column_image_image_L1Grd_100.0_PerGrd_0.0_skip_1/image/
 7 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../script3/GeneratedData/CVUSA/geometry_radiusPlaneMethod_64_column_image_image_L1Grd_100.0_PerGrd_0.0_skip_1/image/
 8 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../script3/GeneratedData/CVUSA/geometry_radiusPlaneMethod_1_column_image_image_L1Grd_0.0_PerGrd_1.0_skip_1/image/
 9 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../script3/GeneratedData/CVUSA/geometry_radiusPlaneMethod_64_column_image_image_L1Grd_0.0_PerGrd_1.0_skip_1/image/
10 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../script3/GeneratedData/CVUSA/geometry_radiusPlaneMethod_1_column_image_image_L1Grd_100.0_PerGrd_0.0_skip_0/image/
11 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../script3/GeneratedData/CVUSA/geometry_radiusPlaneMethod_64_column_image_image_L1Grd_100.0_PerGrd_0.0_skip_0/image/
12 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../script3/GeneratedData/CVUSA/geometry_radiusPlaneMethod_1_column_image_image_L1Grd_0.0_PerGrd_1.0_skip_0/image/
13 | CUDA_VISIBLE_DEVICES=1 python metrics_tf.py --dir ../script3/GeneratedData/CVUSA/geometry_radiusPlaneMethod_64_column_image_image_L1Grd_0.0_PerGrd_1.0_skip_0/image/
14 | 
15 | 
16 | 
17 | 
18 | #CUDA_VISIBLE_DEVICES=1 python metrics_tf_cvact.py --dir ../script3/GeneratedData/CVACT/geometry_radiusPlaneMethod_1_column_image_image_L1Grd_100.0_PerGrd_0.0_skip_1/image/
19 | #CUDA_VISIBLE_DEVICES=1 python metrics_tf_cvact.py --dir ../script3/GeneratedData/CVACT/geometry_radiusPlaneMethod_64_column_image_image_L1Grd_100.0_PerGrd_0.0_skip_1/image/
20 | ##CUDA_VISIBLE_DEVICES=0 python metrics_tf_cvact.py --dir ../script3/GeneratedData/CVACT/geometry_radiusPlaneMethod_1_column_image_image_L1Grd_0.0_PerGrd_1.0_skip_1/image/
21 | ##CUDA_VISIBLE_DEVICES=0 python metrics_tf_cvact.py --dir ../script3/GeneratedData/CVACT/geometry_radiusPlaneMethod_64_column_image_image_L1Grd_0.0_PerGrd_1.0_skip_1/image/
22 | #CUDA_VISIBLE_DEVICES=1 python metrics_tf_cvact.py --dir ../script3/GeneratedData/CVACT/geometry_radiusPlaneMethod_1_column_image_image_L1Grd_100.0_PerGrd_0.0_skip_0/image/
23 | #CUDA_VISIBLE_DEVICES=1 python metrics_tf_cvact.py --dir ../script3/GeneratedData/CVACT/geometry_radiusPlaneMethod_64_column_image_image_L1Grd_100.0_PerGrd_0.0_skip_0/image/
24 | #CUDA_VISIBLE_DEVICES=1 python metrics_tf_cvact.py --dir ../script3/GeneratedData/CVACT/geometry_radiusPlaneMethod_1_column_image_image_L1Grd_0.0_PerGrd_1.0_skip_0/image/
25 | #CUDA_VISIBLE_DEVICES=1 python metrics_tf_cvact.py --dir ../script3/GeneratedData/CVACT/geometry_radiusPlaneMethod_64_column_image_image_L1Grd_0.0_PerGrd_1.0_skip_0/image/
26 | 
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Sat2StrPanoramaSynthesis
 2 | Geometry-Guided Street-View Panorama Synthesis from Satellite Imagery, TPAMI 2022
 3 | ![alt text](./framework.png)
 4 | 
 5 | # Abstract
 6 | This paper presents a new approach for synthesizing a novel street-view panorama given a satellite image, as if captured from the geographical location at the center of the satellite image. Existing works approach this as an image generation problem, adopting generative adversarial networks to implicitly learn the cross-view transformations, but ignore the geometric constraints.
 7 | In this paper, we make the geometric correspondences between the satellite and street-view images explicit so as to facilitate the transfer of information between domains.
 8 | Specifically, we observe that when a 3D point is visible in both views, and the height of the point relative to the camera is known, there is a deterministic mapping between the projected points in the images.
 9 | Motivated by this, we develop a novel satellite to street-view projection (S2SP) module which learns the height map and projects the satellite image to the ground-level viewpoint, explicitly connecting corresponding pixels.
10 | With these projected satellite images as input, we next employ a generator to synthesize realistic street-view panoramas that are geometrically consistent with the satellite images.
11 | Our S2SP module is differentiable and the whole framework is trained in an end-to-end manner.
12 | Extensive experimental results on two cross-view benchmark datasets demonstrate that our method generates more accurate and consistent images than existing approaches.
13 | 
14 | ### Experiment Dataset
15 | 
16 | Our experiment is conducted on the CVUSA and CVACT dataset. For our processed data, please download [here](https://anu365-my.sharepoint.com/:f:/g/personal/u6293587_anu_edu_au/EuOBUDUQNClJvCpQ8bD1hnoBjdRBWxsHOVp946YVahiMGg?e=F4yRAC).  
17 | 
18 | (The link may expire regularly. If it does not work, please drop me an email: yujiao.shi@anu.edu.au.)
19 | 
20 | ### Preparation
21 | 
22 | Please load trained VGG model on Imagenet from [here](https://anu365-my.sharepoint.com/:u:/g/personal/u6293587_anu_edu_au/EVueknEGIBpKolDJ3JrqEjsBey5P12JFuR36xpO-inhXHg?e=kkmD4r). This is for VGG perceptual loss. 
23 | 
24 | ### Codes
25 | 
26 | #### Training
27 | 
28 | cd script3
29 | 
30 | CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 64 --mode train
31 | 
32 | CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVACT --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 64 --mode train
33 | 
34 | 
35 | #### Testing
36 | 
37 | cd script3
38 | 
39 | CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVUSA --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 64 --mode test 
40 | 
41 | CUDA_VISIBLE_DEVICES=0 python main.py --dataset CVACT --l1_weight_grd 0 --perceptual_weight_grd 1 --skip 0 --heightPlaneNum 64 --mode test 
42 | 
43 | 
44 | ### Publications
45 | This work is published in TPAMI 2022.  
46 | [Geometry-Guided Street-View Panorama Synthesis from Satellite Imagery]
47 | 
48 | If you are interested in our work and use our code, we are pleased that you can cite the following publication:  
49 | *Yujiao Shi, and Hongdong Li. Geometry-Guided Street-View Panorama Synthesis from Satellite Imagery.*
50 | 
51 | @inproceedings{shi2020where,
52 |   title={Geometry-Guided Street-View Panorama Synthesis from Satellite Imagery},
53 |   author={Shi, Yujiao and Campbell, Dylan and Yu, Xin and Li, Hongdong},
54 |   booktitle={TPAMI},
55 |   year={2022}
56 | }
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/geometry/projector.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow.compat.v1 as tf
  3 | tf.disable_v2_behavior()
  4 | from MIP import tf_shape
  5 | 
  6 | 
  7 | def over_composite(rgbas):
  8 |   """Combines a list of RGBA images using the over operation.
  9 | 
 10 |   Combines RGBA images from back to front with the over operation.
 11 |   The alpha image of the first image is ignored and assumed to be 1.0.
 12 | 
 13 |   Args:
 14 |     rgbas: A list of [batch, H, W, 4] RGBA images, combined from back to front.
 15 |   Returns:
 16 |     Composited RGB image.
 17 |   """
 18 |   for i in range(len(rgbas)):
 19 |     rgb = rgbas[i][:, :, :, 0:3]
 20 |     alpha = rgbas[i][:, :, :, 3:]
 21 |     if i == 0:
 22 |       output = rgb
 23 |     else:
 24 |       rgb_by_alpha = rgb * alpha
 25 |       output = rgb_by_alpha + output * (1.0 - alpha)
 26 |   return output
 27 | 
 28 | 
 29 | def mpi_render_grd_view(batch_rgbas, share_alpha=True):
 30 | 
 31 |   batch, height, width, channel = batch_rgbas.get_shape().as_list()
 32 | 
 33 |   if share_alpha:
 34 |     num_mpi_planes = int(channel/4)
 35 |     rgba_layers = tf.reshape(batch_rgbas, [-1, height, width, num_mpi_planes, 4])
 36 |     rgb = rgba_layers[..., :3]
 37 |     alpha = tf.expand_dims(rgba_layers[..., -1], axis=-1)
 38 |   else:
 39 |     num_mpi_planes = int(channel / 5)
 40 |     rgba_layers = tf.reshape(batch_rgbas, [-1, height, width, num_mpi_planes, 5])
 41 |     rgb = rgba_layers[..., :3]
 42 |     alpha = tf.expand_dims(rgba_layers[..., 4], axis=-1)
 43 | 
 44 |   alpha = (alpha + 1.)/2.
 45 |   rgba_layers = tf.transpose(tf.concat([rgb, alpha], axis=-1), [3, 0, 1, 2, 4])
 46 | 
 47 |   rgba_list = []
 48 |   for i in range(int(num_mpi_planes)):
 49 |     rgba_list.append(rgba_layers[i])
 50 | 
 51 |   synthesis_image = over_composite(rgba_list)
 52 |   # shape = [batch, height, width, 3]
 53 | 
 54 |   return synthesis_image
 55 | 
 56 | 
 57 | def mpi_render_aer_view(batch_rgbas, share_alpha=True):
 58 |   batch, height, width, channel = batch_rgbas.get_shape().as_list()
 59 | 
 60 |   if share_alpha:
 61 |     num_mpi_planes = int(channel / 4)
 62 |     rgba_layers = tf.reshape(batch_rgbas, [-1, height, width, num_mpi_planes, 4])
 63 |     rgb = rgba_layers[..., :3]
 64 |     alpha = tf.expand_dims(rgba_layers[..., -1], axis=-1)
 65 |   else:
 66 |     num_mpi_planes = int(channel / 5)
 67 |     rgba_layers = tf.reshape(batch_rgbas, [-1, height, width, num_mpi_planes, 5])
 68 |     rgb = rgba_layers[..., :3]
 69 |     alpha = tf.expand_dims(rgba_layers[..., -1], axis=-1)
 70 |   alpha = (alpha + 1.) / 2.
 71 |   rgba_layers = tf.transpose(tf.concat([rgb, alpha], axis=-1), [1, 0, 2, 3, 4])
 72 |   # shape = [height, batch, width, num_mpi_planes, 4]
 73 | 
 74 |   rgba_list = []
 75 |   for i in range(int(height)):
 76 |     rgba_list.append(rgba_layers[i])
 77 | 
 78 |   rgba_list = rgba_list[::-1][:int(height//2)]
 79 | 
 80 |   synthesis_image = over_composite(rgba_list)
 81 |   # shape = [batch, width, num_mpi_planes, 3]
 82 | 
 83 |   return synthesis_image
 84 | 
 85 | 
 86 | def rtheta2uv(athetaimage, aer_size):
 87 |   '''
 88 |   :param athetaimage: shape = [batch, width, PlaneNum, 3]  width-->theta PlaneNum-->radius
 89 |   :param aer_size:
 90 |   :return:
 91 |   '''
 92 |   batch, width, PlaneNum, channel = tf_shape(athetaimage, 4)
 93 |   i = np.arange(aer_size)
 94 |   j = np.arange(aer_size)
 95 |   jj, ii = np.meshgrid(j, i)
 96 | 
 97 |   center = aer_size / 2 - 0.5
 98 |   theta = np.arctan(-(jj - center) / (ii - center))
 99 |   theta[np.where(ii < center)] += np.pi
100 |   theta[np.where((ii >= center) & (jj >= center))] += 2 * np.pi
101 |   theta = theta/(2 * np.pi)*width
102 | 
103 |   RadiusByPixel = np.sqrt((ii - center) ** 2 + (jj - center) ** 2)
104 |   RadiusByPixel = (1-RadiusByPixel/aer_size*2)*PlaneNum
105 | 
106 |   uv = np.stack([RadiusByPixel, theta], axis=-1)
107 |   uv = uv.astype(np.float32)
108 |   warp = tf.stack([uv] * batch, axis=0)
109 | 
110 |   sampler_output = tf.contrib.resampler.resampler(athetaimage, warp)
111 |   # shape = [batch, aer_size, aer_size, 3]
112 | 
113 |   return sampler_output
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 


--------------------------------------------------------------------------------
/VGG/vgg.py:
--------------------------------------------------------------------------------
  1 | # ******************************************************************************
  2 | # VGG network definition from:
  3 | #
  4 | #  https://github.com/CQFIO/PhotographicImageSynthesis/blob/master/demo_1024p.py
  5 | #
  6 | # Released under an MIT License.
  7 | """VGG network definition.
  8 | """
  9 | 
 10 | from __future__ import division
 11 | import numpy as np
 12 | import tensorflow as tf
 13 | 
 14 | 
 15 | def build_net(ntype, nin, nwb=None, name=None):
 16 |   if ntype == 'conv':
 17 |     return tf.nn.relu(
 18 |         tf.nn.conv2d(
 19 |             nin, nwb[0], strides=[1, 1, 1, 1], padding='SAME', name=name) +
 20 |         nwb[1])
 21 |   elif ntype == 'pool':
 22 |     return tf.nn.avg_pool(
 23 |         nin, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 24 | 
 25 | 
 26 | def get_weight_bias(vgg_layers, i):
 27 |   weights = vgg_layers[i][0][0][2][0][0]
 28 |   weights = tf.constant(weights)
 29 |   bias = vgg_layers[i][0][0][2][0][1]
 30 |   bias = tf.constant(np.reshape(bias, (bias.size)))
 31 |   return weights, bias
 32 | 
 33 | 
 34 | def build_vgg19(input, model_filepath, reuse=False):
 35 |   with tf.variable_scope('vgg', reuse=reuse):
 36 |     net = {}
 37 |     input = tf.cast(input, tf.float32)
 38 |     import scipy.io as sio
 39 | #    with open(model_filepath, 'r') as f:
 40 |     vgg_rawnet = sio.loadmat(model_filepath)
 41 |     vgg_layers = vgg_rawnet['layers'][0]
 42 |     imagenet_mean = tf.constant(
 43 |         [123.6800, 116.7790, 103.9390], shape=[1, 1, 1, 3])
 44 |     net['input'] = input - imagenet_mean
 45 |     net['conv1_1'] = build_net(
 46 |         'conv',
 47 |         net['input'],
 48 |         get_weight_bias(vgg_layers, 0),
 49 |         name='vgg_conv1_1')
 50 |     net['conv1_2'] = build_net(
 51 |         'conv',
 52 |         net['conv1_1'],
 53 |         get_weight_bias(vgg_layers, 2),
 54 |         name='vgg_conv1_2')
 55 |     net['pool1'] = build_net('pool', net['conv1_2'])
 56 |     net['conv2_1'] = build_net(
 57 |         'conv',
 58 |         net['pool1'],
 59 |         get_weight_bias(vgg_layers, 5),
 60 |         name='vgg_conv2_1')
 61 |     net['conv2_2'] = build_net(
 62 |         'conv',
 63 |         net['conv2_1'],
 64 |         get_weight_bias(vgg_layers, 7),
 65 |         name='vgg_conv2_2')
 66 |     net['pool2'] = build_net('pool', net['conv2_2'])
 67 |     net['conv3_1'] = build_net(
 68 |         'conv',
 69 |         net['pool2'],
 70 |         get_weight_bias(vgg_layers, 10),
 71 |         name='vgg_conv3_1')
 72 |     net['conv3_2'] = build_net(
 73 |         'conv',
 74 |         net['conv3_1'],
 75 |         get_weight_bias(vgg_layers, 12),
 76 |         name='vgg_conv3_2')
 77 |     net['conv3_3'] = build_net(
 78 |         'conv',
 79 |         net['conv3_2'],
 80 |         get_weight_bias(vgg_layers, 14),
 81 |         name='vgg_conv3_3')
 82 |     net['conv3_4'] = build_net(
 83 |         'conv',
 84 |         net['conv3_3'],
 85 |         get_weight_bias(vgg_layers, 16),
 86 |         name='vgg_conv3_4')
 87 |     net['pool3'] = build_net('pool', net['conv3_4'])
 88 |     net['conv4_1'] = build_net(
 89 |         'conv',
 90 |         net['pool3'],
 91 |         get_weight_bias(vgg_layers, 19),
 92 |         name='vgg_conv4_1')
 93 |     net['conv4_2'] = build_net(
 94 |         'conv',
 95 |         net['conv4_1'],
 96 |         get_weight_bias(vgg_layers, 21),
 97 |         name='vgg_conv4_2')
 98 |     net['conv4_3'] = build_net(
 99 |         'conv',
100 |         net['conv4_2'],
101 |         get_weight_bias(vgg_layers, 23),
102 |         name='vgg_conv4_3')
103 |     net['conv4_4'] = build_net(
104 |         'conv',
105 |         net['conv4_3'],
106 |         get_weight_bias(vgg_layers, 25),
107 |         name='vgg_conv4_4')
108 |     net['pool4'] = build_net('pool', net['conv4_4'])
109 |     net['conv5_1'] = build_net(
110 |         'conv',
111 |         net['pool4'],
112 |         get_weight_bias(vgg_layers, 28),
113 |         name='vgg_conv5_1')
114 |     net['conv5_2'] = build_net(
115 |         'conv',
116 |         net['conv5_1'],
117 |         get_weight_bias(vgg_layers, 30),
118 |         name='vgg_conv5_2')
119 |   return net
120 | 
121 | # ******************************************************************************
122 | 


--------------------------------------------------------------------------------
/load_data/load_data_op.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import tensorflow.compat.v1 as tf
  3 | tf.disable_v2_behavior()
  4 | import math
  5 | 
  6 | # Examples = collections.namedtuple("Examples", "paths, aer, pano, mask, count, steps_per_epoch")
  7 | Examples = collections.namedtuple("Examples", "paths, aer, pano, tanpolar, count, steps_per_epoch")
  8 | 
  9 | 
 10 | def preprocess(image):
 11 |     with tf.name_scope("preprocess"):
 12 |         # [0, 1] => [-1, 1]
 13 |         return image * 2 - 1
 14 | 
 15 | 
 16 | def load_examples(mode='train', batch_size=2):
 17 | 
 18 |     if mode=='train':
 19 |         file_list = '../../../Data/OP/splits/train_split.txt'
 20 |     else:
 21 |         file_list = '../../../Data/OP/splits/test_split.txt'
 22 |     img_root = '../../../Data/OP/'
 23 | 
 24 |     data_list = []
 25 | 
 26 |     with open(file_list, 'r') as f:
 27 |         lines = f.readlines()
 28 |         for line in lines:
 29 |             items = line.split(',')
 30 |             if mode == 'train' and items[0].replace('_nadir', '') != items[2]:
 31 |                 continue
 32 |             else:
 33 |                 data_list.append([img_root + 'aerial/' + items[0],
 34 |                                   img_root + 'panorama/' + items[1].replace('\n', ''),
 35 |                                   # img_root + 'refinenetSeman/aerial/' + items[0],
 36 |                                   # img_root + 'refinenetSeman/panorama_visualize/' + items[1].replace('\n', ''),
 37 |                                   img_root + 'tanpolar/' + items[0],
 38 |                                   items[1].replace('\n', '')])
 39 | 
 40 |     aer_list = [item[0] for item in data_list]
 41 |     pano_list = [item[1] for item in data_list]
 42 |     # mask_list = [item[2] for item in data_list]
 43 |     tanpolar_list = [item[2] for item in data_list]
 44 |     # polar_list = [item[4] for item in data_list]
 45 | 
 46 |     aer_queue = tf.train.string_input_producer(aer_list, shuffle=mode=='train', seed=2020)
 47 |     pano_queue = tf.train.string_input_producer(pano_list, shuffle=mode=='train', seed=2020)
 48 |     # mask_queue = tf.train.string_input_producer(mask_list, shuffle=mode=='train', seed=2020)
 49 |     tanpolar_queue = tf.train.string_input_producer(tanpolar_list, shuffle=mode == 'train', seed=2020)
 50 |     # polar_queue = tf.train.string_input_producer(polar_list, shuffle=mode == 'train', seed=2020)
 51 | 
 52 |     reader = tf.WholeFileReader()
 53 |     aer_paths, aer_contents = reader.read(aer_queue)
 54 |     pano_paths, pano_contents = reader.read(pano_queue)
 55 |     # mask_paths, mask_contents = reader.read(mask_queue)
 56 |     tanpolar_paths, tanpolar_contents = reader.read(tanpolar_queue)
 57 |     # polar_paths, polar_contents = reader.read(polar_queue)
 58 | 
 59 |     aer = tf.image.decode_jpeg(aer_contents)
 60 |     panos = tf.image.decode_jpeg(pano_contents)
 61 |     # mask = tf.image.decode_png(mask_contents)
 62 |     tanpolar = tf.image.decode_png(tanpolar_contents)
 63 |     # polar = tf.image.decode_png(polar_contents)
 64 | 
 65 |     aer = tf.image.convert_image_dtype(aer, tf.float32)
 66 |     panos = tf.image.convert_image_dtype(panos, tf.float32)
 67 |     tanpolar = tf.image.convert_image_dtype(tanpolar, tf.float32)
 68 |     # mask = tf.image.convert_image_dtype(mask, tf.float32)
 69 |     # polar = tf.image.convert_image_dtype(polar, tf.float32)
 70 | 
 71 |     aer = preprocess(aer)
 72 |     panos = preprocess(panos)
 73 |     tanpolar = preprocess(tanpolar)
 74 |     # mask = preprocess(mask)
 75 |     # polar = preprocess(polar)
 76 | 
 77 |     aer.set_shape([None, None, 3])
 78 |     panos.set_shape([None, None, 3])
 79 |     # mask.set_shape([None, None, 3])
 80 |     tanpolar.set_shape([None, None, 3])
 81 |     # polar.set_shape([None, None, 3])
 82 | 
 83 |     aer = tf.image.resize_images(aer, [256, 256], method=tf.image.ResizeMethod.AREA)
 84 |     panos = tf.image.resize_images(panos, [128, 512], method=tf.image.ResizeMethod.AREA)
 85 |     # mask = tf.image.resize_images(mask, [128, 512], method=tf.image.ResizeMethod.AREA)
 86 |     # mask = tf.cast(tf.image.resize_images(mask, [128, 512], method=tf.image.ResizeMethod.AREA), tf.int32)
 87 |     # mask = 0.9 * tf.one_hot(tf.squeeze(mask, axis=-1), depth=4)
 88 |     tanpolar = tf.image.resize_images(tanpolar, [128, 512], method=tf.image.ResizeMethod.AREA)
 89 |     # polar = tf.image.resize_images(polar, [128, 512], method=tf.image.ResizeMethod.AREA)
 90 | 
 91 |     aer_batch, panos_batch, pano_paths_batch, tanpolar_batch = \
 92 |         tf.train.batch([aer, panos, pano_paths, tanpolar], batch_size=batch_size)
 93 |     # aer_batch, panos_batch, mask_batch, aer_paths_batch = \
 94 |     #     tf.train.batch([aer, panos, mask, aer_paths], batch_size=batch_size)
 95 | 
 96 |     steps_per_epoch = int(math.ceil(len(data_list) / batch_size))
 97 | 
 98 |     return Examples(
 99 |             paths=pano_paths_batch,
100 |             aer=aer_batch,
101 |             pano=panos_batch,
102 |             # mask=mask_batch,
103 |             tanpolar=tanpolar_batch,
104 |             # polar=polar_batch,
105 |             count=len(data_list),
106 |             steps_per_epoch=steps_per_epoch,
107 |         )
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/load_data/load_data_cvusa.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import os.path
  3 | 
  4 | import tensorflow.compat.v1 as tf
  5 | tf.disable_v2_behavior()
  6 | import math
  7 | 
  8 | # Examples = collections.namedtuple("Examples", "paths, aer, pano, mask, count, steps_per_epoch")
  9 | Examples = collections.namedtuple("Examples", "paths, aer, pano, tanpolar, polar, count, steps_per_epoch")
 10 | 
 11 | 
 12 | def preprocess(image):
 13 |     with tf.name_scope("preprocess"):
 14 |         # [0, 1] => [-1, 1]
 15 |         return image * 2 - 1
 16 | 
 17 | 
 18 | def load_examples(mode='train', batch_size=2):
 19 | 
 20 |     img_root = '../../../Data/CVUSA/'
 21 | 
 22 |     if mode=='train':
 23 |         file_list = os.path.join(img_root, 'splits/train-19zl.csv')
 24 |     elif mode=='test':
 25 |         file_list = os.path.join(img_root, 'splits/val-19zl.csv')
 26 | 
 27 |     data_list = []
 28 | 
 29 |     with open(file_list, 'r') as f:
 30 |         for line in f:
 31 |             data = line.split(',')
 32 |             # data_list.append([img_root + data[0], img_root + data[1], img_root + data[2][:-1]])
 33 |             data_list.append([img_root + data[0], img_root + data[1],
 34 |                               # img_root + data[2][:-1].replace('annotations', 'annotations_visualize'),
 35 |                               img_root + data[0].replace('bingmap/19', 'a2g').replace('jpg', 'png'),
 36 |                               img_root + data[0].replace('bing', 'polar').replace('jpg', 'png')])
 37 | 
 38 |     aer_list = [item[0] for item in data_list]
 39 |     pano_list = [item[1] for item in data_list]
 40 |     # mask_list = [item[2] for item in data_list]
 41 |     tanpolar_list = [item[2] for item in data_list]
 42 |     polar_list = [item[3] for item in data_list]
 43 | 
 44 |     aer_queue = tf.train.string_input_producer(aer_list, shuffle=mode=='train', seed=2020)
 45 |     pano_queue = tf.train.string_input_producer(pano_list, shuffle=mode=='train', seed=2020)
 46 |     tanpolar_queue = tf.train.string_input_producer(tanpolar_list, shuffle=mode == 'train', seed=2020)
 47 |     polar_queue = tf.train.string_input_producer(polar_list, shuffle=mode == 'train', seed=2020)
 48 | 
 49 |     # aer_queue = tf.data.Dataset.from_tensor_slices(aer_list)
 50 |     # pano_queue = tf.data.Dataset.from_tensor_slices(pano_list)
 51 |     # tanpolar_queue = tf.data.Dataset.from_tensor_slices(tanpolar_list)
 52 |     # polar_queue = tf.data.Dataset.from_tensor_slices(polar_list)
 53 |     # if mode=='train':
 54 |     #     buffer_size = len(data_list)
 55 |     #     aer_queue = aer_queue.shuffle(buffer_size, seed=2020)
 56 |     #     pano_queue = pano_queue.shuffle(buffer_size, seed=2020)
 57 |     #     tanpolar_queue = tanpolar_queue.shuffle(buffer_size, seed=2020)
 58 |     #     polar_queue = polar_queue.shuffle(buffer_size, seed=2020)
 59 | 
 60 |     reader = tf.WholeFileReader()
 61 |     aer_paths, aer_contents = reader.read(aer_queue)
 62 |     pano_paths, pano_contents = reader.read(pano_queue)
 63 |     # mask_paths, mask_contents = reader.read(mask_queue)
 64 |     tanpolar_paths, tanpolar_contents = reader.read(tanpolar_queue)
 65 |     polar_paths, polar_contents = reader.read(polar_queue)
 66 | 
 67 |     aer = tf.image.decode_jpeg(aer_contents)
 68 |     panos = tf.image.decode_jpeg(pano_contents)
 69 |     # mask = tf.image.decode_png(mask_contents)
 70 |     tanpolar = tf.image.decode_png(tanpolar_contents)
 71 |     polar = tf.image.decode_png(polar_contents)
 72 | 
 73 |     aer = tf.image.convert_image_dtype(aer, tf.float32)
 74 |     panos = tf.image.convert_image_dtype(panos, tf.float32)
 75 |     # mask = tf.image.convert_image_dtype(mask, tf.float32)
 76 |     tanpolar = tf.image.convert_image_dtype(tanpolar, tf.float32)
 77 |     polar = tf.image.convert_image_dtype(polar, tf.float32)
 78 | 
 79 |     aer = preprocess(aer)
 80 |     panos = preprocess(panos)
 81 |     # mask = preprocess(mask)
 82 |     tanpolar = preprocess(tanpolar)
 83 |     polar = preprocess(polar)
 84 | 
 85 |     aer.set_shape([None, None, 3])
 86 |     panos.set_shape([None, None, 3])
 87 |     # mask.set_shape([None, None, 3])
 88 |     tanpolar.set_shape([None, None, 3])
 89 |     polar.set_shape([None, None, 3])
 90 | 
 91 |     aer = tf.image.resize_images(aer, [256, 256], method=tf.image.ResizeMethod.AREA)
 92 |     panos = tf.image.resize_images(panos, [128, 512], method=tf.image.ResizeMethod.AREA)
 93 |     # mask = tf.image.resize_images(mask, [128, 512], method=tf.image.ResizeMethod.AREA)
 94 |     # mask = tf.cast(tf.image.resize_images(mask, [128, 512], method=tf.image.ResizeMethod.AREA), tf.int32)
 95 |     # mask = 0.9 * tf.one_hot(tf.squeeze(mask, axis=-1), depth=4)
 96 |     tanpolar = tf.image.resize_images(tanpolar, [128, 512], method=tf.image.ResizeMethod.AREA)
 97 |     polar = tf.image.resize_images(polar, [128, 512], method=tf.image.ResizeMethod.AREA)
 98 | 
 99 |     # aer_batch, panos_batch, mask_batch, aer_paths_batch, tanpolar_batch, polar_batch = \
100 |     #     tf.train.batch([aer, panos, mask, aer_paths, tanpolar, polar], batch_size=batch_size)
101 |     aer_batch, panos_batch, aer_paths_batch, tanpolar_batch, polar_batch = \
102 |         tf.train.batch([aer, panos, aer_paths, tanpolar, polar], batch_size=batch_size)
103 | 
104 |     steps_per_epoch = int(math.ceil(len(data_list) / batch_size))
105 | 
106 |     return Examples(
107 |             paths=aer_paths_batch,
108 |             aer=aer_batch,
109 |             pano=panos_batch,
110 |             # mask=mask_batch,
111 |             tanpolar=tanpolar_batch,
112 |             polar=polar_batch,
113 |             count=len(data_list),
114 |             steps_per_epoch=steps_per_epoch,
115 |         )
116 | 
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/evaluation_metrics/metrics_tf.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | # from skimage.measure import compare_ssim, compare_psnr, compare_mse, compare_nrmse
  3 | import cv2
  4 | 
  5 | import os
  6 | # os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
  7 | # os.environ['CUDA_VISIBLE_DEVICES'] = '1'
  8 | import tensorflow.compat.v1 as tf
  9 | tf.disable_v2_behavior()
 10 | from tensorflow.python.ops import math_ops
 11 | 
 12 | import argparse
 13 | 
 14 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 15 | parser.add_argument('--dir', type=str, default='../scri/GeneratedData/CVUSA/pix2pix_tanpolar_L1Grd_0.0_PerGrd_1.0/')
 16 | 
 17 | opt = parser.parse_args()
 18 | 
 19 | input_dir = opt.dir
 20 | 
 21 | def safe_divide(numerator, denominator, name='safe_divide'):
 22 | 
 23 |     return tf.where(math_ops.greater(denominator, 0),
 24 |                     math_ops.divide(numerator, denominator),
 25 |                     tf.zeros_like(numerator), name=name)
 26 | 
 27 | 
 28 | def RMSE(input, target):
 29 |     return tf.sqrt(tf.reduce_mean((input - target)**2, axis=(1, 2, 3)))
 30 | 
 31 | 
 32 | def SharpDiff(inputs, targets):
 33 |     '''
 34 |     :param inputs: shape = [batch, height, width, channel]
 35 |     :param target: shape = [batch, height, width, channel]
 36 |     :param eps:
 37 |     :return:
 38 |     '''
 39 |     s = inputs.get_shape().as_list()
 40 |     gradx_in, grady_in = tf.image.image_gradients(inputs)
 41 |     gradx_ta, grady_ta = tf.image.image_gradients(targets)
 42 |     diff_gradients = tf.abs(gradx_in - gradx_ta)[:, 1: s[1]-1, 1: s[2]-1, :] + tf.abs(grady_in - grady_ta)[:, 1: s[1]-1, 1: s[2]-1, :]
 43 |     prediction_error = 64* tf.reduce_mean(diff_gradients, axis=[1, 2, 3])
 44 | 
 45 |     sharpdiff = 10 * tf.log(255.*255./prediction_error)/tf.log(10.)
 46 | 
 47 |     return sharpdiff
 48 | 
 49 | 
 50 | def get_val_id_list():
 51 |     val_file = '../../../Data/CVUSA/splits/val-19zl.csv'
 52 | 
 53 |     id_list = []
 54 |     with open(val_file, 'r') as f:
 55 |         for line in f:
 56 |             data = line.split(',')
 57 |             pano_id = (data[0].split('/')[-1]).split('.')[0]
 58 |             id_list.append(pano_id)
 59 | 
 60 |     return id_list
 61 | 
 62 | 
 63 | def input_data_generator(input_dir, target_dir='../../../Data/CVUSA/streetview/targets/1/', batch_size=1):
 64 |     id_list = get_val_id_list()
 65 | 
 66 |     num_batches = len(id_list)//batch_size
 67 | 
 68 |     for i in range(num_batches + 1):
 69 | 
 70 |         input_list = []
 71 |         target_list = []
 72 | 
 73 |         img_num_per_batch = batch_size if i<num_batches else len(id_list)%batch_size
 74 | 
 75 |         for j in range(img_num_per_batch):
 76 |             input_list.append((cv2.imread(input_dir + id_list[i*batch_size+j] + '.png')).astype(np.float32)) # [64:, ...]
 77 | 
 78 |             target_img = cv2.imread(target_dir + id_list[i*batch_size+j] + '.png')
 79 |             target_list.append((cv2.resize(target_img, (512, 128))).astype(np.float32))  #
 80 |             # target_list.append((cv2.resize(target_img, (1024, 256))).astype(np.float32)[:, :256, :])  # [64:, ...]
 81 | 
 82 |         yield np.stack(input_list, axis=0), np.stack(target_list, axis=0)
 83 | 
 84 | 
 85 | def get_evaluation_metrics(inputs, targets):
 86 | 
 87 |     # inputs = tf.stack(input_list, axis=0)
 88 |     # targets = tf.stack(target_list, axis=0)
 89 | 
 90 |     ssim = tf.reduce_sum(tf.image.ssim(inputs, targets, max_val=255))
 91 |     psnr = tf.reduce_sum(tf.image.psnr(inputs, targets, max_val=255))
 92 |     rmse = tf.reduce_sum(RMSE(inputs, targets))
 93 |     sharpdiff = tf.reduce_sum(SharpDiff(inputs, targets))
 94 | 
 95 |     return ssim, psnr, rmse, sharpdiff
 96 | 
 97 | 
 98 | if __name__=="__main__":
 99 |     batch_size = 128
100 | 
101 |     # input_dir = '../../Data/CVUSA/GAN/MIP_corrceted/val/'
102 |     # # input_dir = '../../Data/CVUSA/polarmap/19/'
103 |     # input_list, target_list = read_input_data(input_dir)
104 |     # print('read data done...')
105 |     # ssim, psnr, rmse, sharpdiff = get_evaluation_metrics(tf.constant(input_list), tf.constant(target_list))
106 |     # sess = tf.Session()
107 |     # ssim_val, psnr_val, rmse_val, sharpdiff_val = sess.run([ssim, psnr, rmse, sharpdiff])
108 |     #
109 |     # print(ssim_val, psnr_val, rmse_val, sharpdiff_val)
110 | 
111 |     #input_dir = '../../ours_TF/script/GeneratedData/CVUSA/geometry_radiusPlaneMethod_column_image_image_L1Grd_100.0_PerGrd_0.0/image/'
112 |     #input_dir = '../../ours_TF/stoa/GeneratedData/CVUSA/pix2pix/tanpolar/val/'
113 |     # input_dir = '../../ours_Pytorch/GeneratedData/CVUSA/geoselectiongan/image/'
114 |    
115 |     data_generator = input_data_generator(input_dir, batch_size=batch_size)
116 | 
117 |     inputs = tf.placeholder(tf.float32, [None, 128, 512, 3], name='inputs')
118 |     targets = tf.placeholder(tf.float32, [None, 128, 512, 3], name='targets')
119 |     ssim, psnr, rmse, sharpdiff = get_evaluation_metrics(inputs, targets)
120 |     # sharpdiff = SharpDiff(inputs, targets)
121 | 
122 |     ssim_sum = 0
123 |     psnr_sum = 0
124 |     rmse_sum = 0
125 |     sharpdiff_sum = 0
126 |     i = 0
127 | 
128 |     sess = tf.Session()
129 |     for batch_inputs, batch_targets in data_generator:
130 |         feed_dict = {inputs: batch_inputs, targets:batch_targets}
131 |         ssim_val, psnr_val, rmse_val, sharpdiff_val = sess.run([ssim, psnr, rmse, sharpdiff], feed_dict=feed_dict)
132 |         # sharpdiff_val = sess.run(sharpdiff, feed_dict)
133 |         #print(i, ssim_val/batch_size)
134 |         ssim_sum += ssim_val
135 |         psnr_sum += psnr_val
136 |         rmse_sum += rmse_val
137 |         sharpdiff_sum += sharpdiff_val
138 |         i += batch_inputs.shape[0]
139 |     ssim_mean = ssim_sum/i
140 |     psnr_mean = psnr_sum/i
141 |     rmse_mean = rmse_sum/i
142 |     sharpdiff_mean = sharpdiff_sum/i
143 | 
144 |     print('=================================================================')
145 |     print(ssim_mean, psnr_mean, rmse_mean, sharpdiff_mean, i)
146 |     print('=================================================================')
147 | 
148 | 


--------------------------------------------------------------------------------
/geometry/test_geometry.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
  4 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
  5 | import tensorflow.compat.v1 as tf
  6 | tf.disable_v2_behavior()
  7 | 
  8 | import numpy as np
  9 | import PIL.Image as Image
 10 | import tensorflow_addons as tfa
 11 | 
 12 | dataset='OP'
 13 | # img_root = '../../../Data/CVUSA/bingmap/19/'
 14 | img_root = '../../../Data/OP/aerial/'
 15 | # img_root = '../../../Data/CVACT/satview_correct/'
 16 | files = os.listdir(img_root)
 17 | file = os.path.join(img_root, files[0])
 18 | print(file)
 19 | # img = np.asarray(Image.open(file).resize((256, 256)), np.float32)[None, ...] # [1, 256, 256, 3]
 20 | img = np.asarray(Image.open(file), np.float32)[None, ...] # [1, 256, 256, 3]
 21 | signal = tf.constant(img)
 22 | # estimated_height = tf.ones_like(signal[..., 0:1])
 23 | # estimated_height = tf.ones(signal.get_shape().as_list()[:-1] + [1], dtype=tf.float32)
 24 | estimated_height = tf.concat([tf.zeros(signal.get_shape().as_list()[:-1] + [63]),
 25 |                               tf.ones(signal.get_shape().as_list()[:-1] + [1])], axis=-1)
 26 | target_height = 512 #128
 27 | target_width = 512 * 2
 28 | grd_height = -2 
 29 | max_height = 32
 30 | method='column'
 31 | geoout_type='image'
 32 | 
 33 | PlaneNum = estimated_height.get_shape().as_list()[-1]
 34 | batch, height, width, channel = signal.get_shape().as_list()
 35 | 
 36 | if method=='column':
 37 |     # estimated_height = tf.cumsum(estimated_height, axis=-1, reverse=True)
 38 |     # # the 0th plane corresponds to grd plane
 39 |     estimated_height = tf.cumsum(estimated_height, axis=-1)
 40 |     # the maximum plane corresponds to grd plane
 41 | 
 42 | voxel = tf.transpose(tf.stack([signal]*PlaneNum, axis=-1), [0, 1, 2, 4, 3])
 43 |        
 44 | voxel = tf.reshape(voxel, [batch, height, width, PlaneNum*channel])
 45 | 
 46 | ################### from overhead view uvz coordinate to cylinder pthetaz coordinate #########################
 47 | S = signal.get_shape().as_list()[1]
 48 | radius = int(S//4)
 49 | azimuth = target_width
 50 | 
 51 | i = np.arange(0, radius)
 52 | j = np.arange(0, azimuth)
 53 | jj, ii = np.meshgrid(j, i)
 54 | 
 55 | 
 56 | y = S / 2. - S / 2. / radius * (radius - 1 - ii) * np.sin(2 * np.pi * jj / azimuth)
 57 | x = S / 2. + S / 2. / radius * (radius - 1 - ii) * np.cos(2 * np.pi * jj / azimuth)
 58 | 
 59 | uv = np.stack([y, x], axis=-1)
 60 | uv = uv.astype(np.float32)
 61 | warp = tf.stack([uv] * batch, axis=0)
 62 | 
 63 | # imgs = tf.contrib.resampler.resampler(voxel, warp)
 64 | imgs = tfa.image.resampler(voxel, warp)
 65 | imgs = tf.reshape(imgs, [batch, radius, azimuth, PlaneNum, channel])  # batch, radius, azimuth, PlaneNum, channel]
 66 | imgs = tf.transpose(imgs, [0, 3, 2, 1, 4])
 67 | # shape = [batch, PlaneNum, azimuth, radius, channel]
 68 | # the maximum PlaneNum corresponds to ground plane
 69 | 
 70 | alpha = tfa.image.resampler(estimated_height, warp)
 71 | # the maximum PlaneNum corresponds to ground plane
 72 | alpha = tf.transpose(alpha, [0, 3, 2, 1])  # shape = [batch, PlaneNum, azimuth, radius]
 73 | 
 74 | if dataset == 'CVUSA':
 75 |     meters = 55
 76 | elif dataset == 'CVACT' or dataset=='CVACThalf':
 77 |     meters = (50 * 206 / 256)
 78 | elif dataset == 'OP':
 79 |     meters = 100
 80 | 
 81 | ################### from cylinder pthetaz coordinate to grd phithetar coordinate #########################
 82 |     if dataset=='CVUSA' or dataset=='CVACThalf':
 83 |         i = np.arange(0, target_height*2)
 84 |         j = np.arange(0, target_width)
 85 |         jj, ii = np.meshgrid(j, i)
 86 |         tanPhi = np.tan(ii / target_height / 2 * np.pi)
 87 |         tanPhi[np.where(tanPhi==0)] = 1e-16
 88 | 
 89 |         n = int(target_height//2)
 90 | 
 91 |         MetersPerRadius = meters / 2 / radius
 92 |         rgb_layers = []
 93 |         a_layers = []
 94 |         for r in range(0, radius):
 95 |             # from far to near
 96 |             z = (radius-r-1)*MetersPerRadius/tanPhi[n:-n]
 97 |             z = (PlaneNum-1) - (z - grd_height)/(max_height - grd_height) * (PlaneNum-1)
 98 |             theta = jj[n:-n]
 99 |             uv = np.stack([theta, z], axis=-1)
100 |             uv = uv.astype(np.float32)
101 |             warp = tf.stack([uv] * batch, axis=0)
102 |             # rgb = tf.contrib.resampler.resampler(imgs[..., r, :], warp)
103 |             rgb = tfa.image.resampler(imgs[..., r, :], warp)
104 |             # a = tf.contrib.resampler.resampler(alpha[..., r:r + 1], warp)
105 |             a = tfa.image.resampler(alpha[..., r:r+1], warp)
106 | 
107 |             rgb_layers.append(rgb)
108 |             a_layers.append(a)
109 | 
110 |     else:
111 |         i = np.arange(0, target_height)
112 |         j = np.arange(0, target_width)
113 |         jj, ii = np.meshgrid(j, i)
114 |         tanPhi = np.tan(ii / target_height * np.pi)
115 |         tanPhi[np.where(tanPhi == 0)] = 1e-16
116 | 
117 |         # n = int(target_height // 2)
118 | 
119 |         MetersPerRadius = meters / 2 / radius
120 |         rgb_layers = []
121 |         a_layers = []
122 |         for r in range(0, radius):
123 |             # from far to near
124 |             z = (radius - r - 1) * MetersPerRadius / tanPhi
125 |             z = (PlaneNum - 1) - (z - grd_height) / (max_height - grd_height) * (PlaneNum - 1)
126 |             theta = jj
127 |             uv = np.stack([theta, z], axis=-1)
128 |             uv = uv.astype(np.float32)
129 |             warp = tf.stack([uv] * batch, axis=0)
130 |             # rgb = tf.contrib.resampler.resampler(imgs[..., r, :], warp)
131 |             # a = tf.contrib.resampler.resampler(alpha[..., r:r + 1], warp)
132 |             rgb = tfa.image.resampler(imgs[..., r, :], warp)
133 |             a = tfa.image.resampler(alpha[..., r:r + 1], warp)
134 | 
135 |             rgb_layers.append(rgb)
136 |             a_layers.append(a)
137 | 
138 | 
139 | for i in range(radius):
140 |     rgb = rgb_layers[i]
141 |     a = a_layers[i]
142 |     if i==0:
143 |         output = rgb * a
144 |     else:
145 |         rgb_by_alpha = rgb * a
146 |         output = rgb_by_alpha + output * (1 - a)
147 | 
148 | sess = tf.Session()
149 | 
150 | img0, alpha0 = sess.run([imgs, alpha])
151 | img1_list = sess.run(rgb_layers)
152 | alpha1_list = sess.run(a_layers)
153 | 
154 | iimg = Image.fromarray(img0[0,0].astype(np.uint8))
155 | iimg.save('stage1.png')
156 | out_img = sess.run(output)
157 | iimg = Image.fromarray(out_img[0].astype(np.uint8))
158 | iimg.save('stage2.png')
159 | 
160 | a = 1
161 | # for idx in range(img0.shape[0]):
162 | 
163 |    


--------------------------------------------------------------------------------
/evaluation_metrics/metrics_tf_cvact.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | # from skimage.measure import compare_ssim, compare_psnr, compare_mse, compare_nrmse
  3 | import cv2
  4 | import scipy.io as scio
  5 | 
  6 | import os
  7 | os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
  8 | os.environ['CUDA_VISIBLE_DEVICES'] = '1'
  9 | import tensorflow.compat.v1 as tf
 10 | tf.disable_v2_behavior()
 11 | from tensorflow.python.ops import math_ops
 12 | 
 13 | import argparse
 14 | 
 15 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 16 | parser.add_argument('--dir', type=str, default='../script3/GeneratedData/CVACT/geometry_radiusPlaneMethod_1_column_image_image_L1Grd_0.0_PerGrd_1.0_skip_0/image/')
 17 | 
 18 | 
 19 | opt = parser.parse_args()
 20 | 
 21 | input_dir = opt.dir
 22 | 
 23 | def safe_divide(numerator, denominator, name='safe_divide'):
 24 | 
 25 |     return tf.where(math_ops.greater(denominator, 0),
 26 |                     math_ops.divide(numerator, denominator),
 27 |                     tf.zeros_like(numerator), name=name)
 28 | 
 29 | 
 30 | def RMSE(input, target):
 31 |     return tf.sqrt(tf.reduce_mean((input - target)**2, axis=(1, 2, 3)))
 32 | 
 33 | 
 34 | def SharpDiff(inputs, targets):
 35 |     '''
 36 |     :param inputs: shape = [batch, height, width, channel]
 37 |     :param target: shape = [batch, height, width, channel]
 38 |     :param eps:
 39 |     :return:
 40 |     '''
 41 |     s = inputs.get_shape().as_list()
 42 |     gradx_in, grady_in = tf.image.image_gradients(inputs)
 43 |     gradx_ta, grady_ta = tf.image.image_gradients(targets)
 44 |     diff_gradients = tf.abs(gradx_in - gradx_ta)[:, 1: s[1]-1, 1: s[2]-1, :] + tf.abs(grady_in - grady_ta)[:, 1: s[1]-1, 1: s[2]-1, :]
 45 |     prediction_error = 64* tf.reduce_mean(diff_gradients, axis=[1, 2, 3])
 46 | 
 47 |     sharpdiff = 10 * tf.log(255.*255./prediction_error)/tf.log(10.)
 48 | 
 49 |     return sharpdiff
 50 | 
 51 | 
 52 | def get_val_id_list():
 53 |     allDataList = '../../../Data/CVACT/ACT_data.mat'
 54 |     img_root = '../../../Data/CVACT/'
 55 | 
 56 |     exist_aer_list = os.listdir(img_root + 'satview_correct')
 57 |     exist_grd_list = os.listdir(img_root + 'streetview')
 58 | 
 59 |     __cur_allid = 0  # for training
 60 | 
 61 |     # load the mat
 62 |     anuData = scio.loadmat(allDataList)
 63 | 
 64 |     data_list = []
 65 |     for i in range(0, len(anuData['panoIds'])):
 66 |         grd_id_align = anuData['panoIds'][i] + '_grdView.png'
 67 |         sat_id_ori = anuData['panoIds'][i] + '_satView_polish.png'
 68 |         data_list.append([grd_id_align, sat_id_ori, anuData['panoIds'][i]])
 69 | 
 70 |     val_inds = anuData['valSet']['valInd'][0][0] - 1
 71 |     valNum = len(val_inds)
 72 |     valList = []
 73 |     for k in range(valNum):
 74 |         valList.append(data_list[val_inds[k][0]])
 75 | 
 76 |     pano_list = [item[0] for item in valList if item[0] in exist_grd_list and item[1] in exist_aer_list]
 77 | 
 78 |     return pano_list
 79 | 
 80 | 
 81 | def input_data_generator(input_dir, target_dir='../../../Data/CVACT/targets/street/', batch_size=128):
 82 |     id_list = get_val_id_list()
 83 | 
 84 |     num_batches = len(id_list)//batch_size
 85 | 
 86 |     for i in range(num_batches + 1):
 87 | 
 88 |         input_list = []
 89 |         target_list = []
 90 | 
 91 |         img_num_per_batch = batch_size if i<num_batches else len(id_list)%batch_size
 92 | 
 93 |         for j in range(img_num_per_batch):#.replace('_grdView', '_satView_polish')
 94 |             # input_img = cv2.imread(input_dir + id_list[i*batch_size+j].replace('_grdView', '_satView_polish'))
 95 |             input_img = cv2.imread(input_dir + id_list[i * batch_size + j])
 96 |             input_img = cv2.resize(input_img, (512, 128))
 97 |             input_list.append((input_img).astype(np.float32))
 98 | 
 99 |             target_img = cv2.imread(target_dir + id_list[i*batch_size+j])
100 |             target_img = cv2.resize(target_img, (512, 128))
101 |             target_list.append((target_img).astype(np.float32))
102 | 
103 |         # for img_id in id_list:
104 |         #     input_list.append((cv2.imread(input_dir + img_id + '.png')).astype(np.float32))
105 |         #     target_list.append((cv2.imread(target_dir + img_id + '.png')).astype(np.float32))
106 | 
107 |         yield np.stack(input_list, axis=0), np.stack(target_list, axis=0)
108 | 
109 | 
110 | def get_evaluation_metrics(inputs, targets):
111 | 
112 |     # inputs = tf.stack(input_list, axis=0)
113 |     # targets = tf.stack(target_list, axis=0)
114 | 
115 |     ssim = tf.reduce_mean(tf.image.ssim(inputs, targets, max_val=255))
116 |     psnr = tf.reduce_mean(tf.image.psnr(inputs, targets, max_val=255))
117 |     rmse = tf.reduce_mean(RMSE(inputs, targets))
118 |     sharpdiff = tf.reduce_mean(SharpDiff(inputs, targets))
119 | 
120 |     return ssim, psnr, rmse, sharpdiff
121 | 
122 | 
123 | if __name__=="__main__":
124 | 
125 |     
126 |     #input_dir = '../../ours_TF/script/GeneratedData/CVACT/geometry_radiusPlaneMethod_point_volume_image_L1Grd_0.0_PerGrd_1.0/image/'
127 |     #input_dir = '../../ours_Pytorch/GeneratedData/CVACT/geoselectiongan/image/'
128 |     #input_dir = '../../ours_TF/stoa/GeneratedData/CVACThalf/XforkPer/val/'
129 |     #input_dir = '../../ours_TF/script/GeneratedData/CVACT/geometry_radiusPlaneMethod_column_image_image_L1Grd_0.0_PerGrd_1.0/image/'
130 |     data_generator = input_data_generator(input_dir)
131 | 
132 |     inputs = tf.placeholder(tf.float32, [None, 128, 512, 3], name='inputs')
133 |     targets = tf.placeholder(tf.float32, [None, 128, 512, 3], name='targets')
134 |     ssim, psnr, rmse, sharpdiff = get_evaluation_metrics(inputs, targets)
135 | 
136 |     ssim_sum = 0
137 |     psnr_sum = 0
138 |     rmse_sum = 0
139 |     sharpdiff_sum = 0
140 |     i = 0
141 | 
142 |     sess = tf.Session()
143 |     for batch_inputs, batch_targets in data_generator:
144 |         feed_dict = {inputs: batch_inputs, targets:batch_targets}
145 |         ssim_val, psnr_val, rmse_val, sharpdiff_val = sess.run([ssim, psnr, rmse, sharpdiff], feed_dict=feed_dict)
146 |         print(i, ssim_val)
147 |         ssim_sum += ssim_val
148 |         psnr_sum += psnr_val
149 |         rmse_sum += rmse_val
150 |         sharpdiff_sum += sharpdiff_val
151 |         i += 1
152 |     ssim_mean = ssim_sum/i
153 |     psnr_mean = psnr_sum/i
154 |     rmse_mean = rmse_sum/i
155 |     sharpdiff_mean = sharpdiff_sum/i
156 | 
157 |     print('=================================================================')
158 |     print(ssim_mean, psnr_mean, rmse_mean, sharpdiff_mean)
159 |     print('=================================================================')
160 | 
161 | 
162 | 


--------------------------------------------------------------------------------
/load_data/load_data_cvact.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import tensorflow.compat.v1 as tf
  3 | tf.disable_v2_behavior()
  4 | import math
  5 | import scipy.io as sio
  6 | import os
  7 | 
  8 | # Examples = collections.namedtuple("Examples", "paths, aer, pano, mask, count, steps_per_epoch, tanpolar, polar")
  9 | Examples = collections.namedtuple("Examples", "paths, aer, pano, count, steps_per_epoch, tanpolar, polar")
 10 | 
 11 | 
 12 | def preprocess(image):
 13 |     with tf.name_scope("preprocess"):
 14 |         # [0, 1] => [-1, 1]
 15 |         return image * 2 - 1
 16 | 
 17 | 
 18 | def load_examples(mode='train', batch_size=2):
 19 | 
 20 |     # allDataList = '../OriNet_CVACT/CVACT_orientations/ACT_data.mat'
 21 |     img_root = '../../../Data/CVACT/'
 22 |     allDataList = os.path.join(img_root, 'ACT_data.mat')
 23 | 
 24 |     exist_aer_list = os.listdir(img_root + 'satview_correct')
 25 |     exist_grd_list = os.listdir(img_root + 'streetview')
 26 | 
 27 |     __cur_allid = 0  # for training
 28 | 
 29 |     # load the mat
 30 |     anuData = sio.loadmat(allDataList)
 31 | 
 32 |     data_list = []
 33 |     for i in range(0, len(anuData['panoIds'])):
 34 |         # grd_id_align = img_root + 'streetview/' + anuData['panoIds'][i] + '_grdView.png'
 35 |         # sat_id_ori = img_root + 'satview_polish/' + anuData['panoIds'][i] + '_satView_polish.png'
 36 |         grd_id_align = anuData['panoIds'][i] + '_grdView.png'
 37 |         sat_id_ori = anuData['panoIds'][i] + '_satView_polish.png'
 38 |         data_list.append([grd_id_align, sat_id_ori])
 39 | 
 40 |     if mode=='train':
 41 |         training_inds = anuData['trainSet']['trainInd'][0][0] - 1
 42 |         trainNum = len(training_inds)
 43 |         trainList = []
 44 |         for k in range(trainNum):
 45 |             trainList.append(data_list[training_inds[k][0]])
 46 |         pano_list = [img_root + 'streetview/' + item[0] for item in trainList if item[0] in exist_grd_list and item[1] in exist_aer_list]
 47 |         aer_list = [img_root + 'satview_correct/' + item[1] for item in trainList if item[0] in exist_grd_list and item[1] in exist_aer_list]
 48 |         # pano_seman_list = [img_root + 'streetseman_visualize/' + item[0] for item in trainList if
 49 |         #              item[0] in exist_grd_list and item[1] in exist_aer_list]
 50 |         tanpolar_list = [img_root + 'a2g_correct/' + item[1] for item in trainList if
 51 |                     item[0] in exist_grd_list and item[1] in exist_aer_list]
 52 |         polar_list = [img_root + 'polarmap/' + item[1] for item in trainList if
 53 |                     item[0] in exist_grd_list and item[1] in exist_aer_list]
 54 | 
 55 | 
 56 |     else:
 57 | 
 58 |         val_inds = anuData['valSet']['valInd'][0][0] - 1
 59 |         valNum = len(val_inds)
 60 |         valList = []
 61 |         for k in range(valNum):
 62 |             valList.append(data_list[val_inds[k][0]])
 63 |         pano_list = [img_root + 'streetview/' + item[0] for item in valList if item[0] in exist_grd_list and item[1] in exist_aer_list]
 64 |         aer_list = [img_root + 'satview_correct/' + item[1] for item in valList if item[0] in exist_grd_list and item[1] in exist_aer_list]
 65 |         # pano_seman_list = [img_root + 'streetseman_visualize/' + item[0] for item in valList if
 66 |         #              item[0] in exist_grd_list and item[1] in exist_aer_list]
 67 |         # aer_seman_list = [img_root + 'satseman/' + item[1] for item in valList if
 68 |         #             item[0] in exist_grd_list and item[1] in exist_aer_list]
 69 |         tanpolar_list = [img_root + 'a2g_correct/' + item[1] for item in valList if
 70 |                     item[0] in exist_grd_list and item[1] in exist_aer_list]
 71 |         polar_list = [img_root + 'polarmap/' + item[1] for item in valList if
 72 |                     item[0] in exist_grd_list and item[1] in exist_aer_list]
 73 | 
 74 |     aer_queue = tf.train.string_input_producer(aer_list, shuffle=mode == 'train', seed=2020)
 75 |     pano_queue = tf.train.string_input_producer(pano_list, shuffle=mode == 'train', seed=2020)
 76 |     # pano_seman_queue = tf.train.string_input_producer(pano_seman_list, shuffle=mode == 'train', seed=2020)
 77 |     tanpolar_queue = tf.train.string_input_producer(tanpolar_list, shuffle=mode == 'train', seed=2020)
 78 |     polar_queue = tf.train.string_input_producer(polar_list, shuffle=mode == 'train', seed=2020)
 79 | 
 80 |     reader = tf.WholeFileReader()
 81 |     aer_paths, aer_contents = reader.read(aer_queue)
 82 |     pano_paths, pano_contents = reader.read(pano_queue)
 83 |     # pano_seman_paths, pano_seman_contents = reader.read(pano_seman_queue)
 84 |     tanpolar_paths, tanpolar_contents = reader.read(tanpolar_queue)
 85 |     polar_paths, polar_contents = reader.read(polar_queue)
 86 | 
 87 |     aer = tf.image.decode_png(aer_contents)
 88 |     panos = tf.image.decode_png(pano_contents)
 89 |     # panos_seman = tf.image.decode_png(pano_seman_contents)
 90 |     tanpolar = tf.image.decode_png(tanpolar_contents)
 91 |     polar = tf.image.decode_png(polar_contents)
 92 | 
 93 |     aer = tf.image.convert_image_dtype(aer, tf.float32)
 94 |     panos = tf.image.convert_image_dtype(panos, tf.float32)
 95 |     # panos_seman = tf.image.convert_image_dtype(panos_seman, tf.float32)
 96 |     tanpolar = tf.image.convert_image_dtype(tanpolar, tf.float32)
 97 |     polar = tf.image.convert_image_dtype(polar, tf.float32)
 98 | 
 99 |     aer = preprocess(aer)
100 |     panos = preprocess(panos)
101 |     # panos_seman = preprocess(panos_seman)
102 |     tanpolar = preprocess(tanpolar)
103 |     polar = preprocess(polar)
104 | 
105 |     aer.set_shape([None, None, 3])
106 |     panos.set_shape([None, None, 3])
107 |     # panos_seman.set_shape([None, None, 3])
108 |     tanpolar.set_shape([None, None, 3])
109 |     polar.set_shape([None, None, 3])
110 | 
111 |     aer = tf.image.resize_images(aer, [256, 256], method=tf.image.ResizeMethod.AREA)
112 |     panos = tf.image.resize_images(panos, [128, 512], method=tf.image.ResizeMethod.AREA)
113 |     # panos_seman = tf.image.resize_images(panos_seman, [128, 512], method=tf.image.ResizeMethod.AREA)
114 |     # panos_seman = tf.cast(tf.image.resize_images(panos_seman, [128, 512], method=tf.image.ResizeMethod.AREA), tf.int32)
115 |     # panos_seman = tf.one_hot(tf.squeeze(panos_seman, axis=-1), depth=4)
116 |     tanpolar = tf.image.resize_images(tanpolar, [128, 512], method=tf.image.ResizeMethod.AREA)
117 |     polar = tf.image.resize_images(polar, [128, 512], method=tf.image.ResizeMethod.AREA)
118 | 
119 |     aer_batch, panos_batch, grd_paths_batch, tanpolar_batch, polar_batch = \
120 |         tf.train.batch([aer, panos, pano_paths, tanpolar, polar], batch_size=batch_size)
121 | 
122 |     steps_per_epoch = int(math.ceil(len(pano_list) / batch_size))
123 | 
124 |     return Examples(
125 |         paths=grd_paths_batch,
126 |         aer=aer_batch,
127 |         pano=panos_batch,
128 |         # mask=panos_seman_batch,
129 |         tanpolar=tanpolar_batch,
130 |         polar = polar_batch,
131 |         count=len(pano_list),
132 |         steps_per_epoch=steps_per_epoch,
133 |     )
134 | 


--------------------------------------------------------------------------------
/load_data/load_data_cvact_unaligned.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import tensorflow.compat.v1 as tf
  3 | tf.disable_v2_behavior()
  4 | import math
  5 | import scipy.io as sio
  6 | import os
  7 | 
  8 | # Examples = collections.namedtuple("Examples", "paths, aer, pano, mask, count, steps_per_epoch, tanpolar, polar")
  9 | Examples = collections.namedtuple("Examples", "paths, aer, pano, count, steps_per_epoch, tanpolar, polar")
 10 | 
 11 | 
 12 | def preprocess(image):
 13 |     with tf.name_scope("preprocess"):
 14 |         # [0, 1] => [-1, 1]
 15 |         return image * 2 - 1
 16 | 
 17 | 
 18 | def load_examples(mode='train', batch_size=2):
 19 | 
 20 |     # allDataList = '../OriNet_CVACT/CVACT_orientations/ACT_data.mat'
 21 |     img_root = '../../../Data/CVACT/'
 22 |     allDataList = os.path.join(img_root, 'ACT_data.mat')
 23 | 
 24 |     exist_aer_list = os.listdir(img_root + 'satview_correct')
 25 |     exist_grd_list = os.listdir(img_root + 'streetview')
 26 | 
 27 |     __cur_allid = 0  # for training
 28 | 
 29 |     # load the mat
 30 |     anuData = sio.loadmat(allDataList)
 31 | 
 32 |     data_list = []
 33 |     for i in range(0, len(anuData['panoIds'])):
 34 |         # grd_id_align = img_root + 'streetview/' + anuData['panoIds'][i] + '_grdView.png'
 35 |         # sat_id_ori = img_root + 'satview_polish/' + anuData['panoIds'][i] + '_satView_polish.png'
 36 |         grd_id_align = anuData['panoIds'][i] + '_grdView.png'
 37 |         sat_id_ori = anuData['panoIds'][i] + '_satView_polish.png'
 38 |         data_list.append([grd_id_align, sat_id_ori])
 39 | 
 40 |     if mode=='train':
 41 |         training_inds = anuData['trainSet']['trainInd'][0][0] - 1
 42 |         trainNum = len(training_inds)
 43 |         trainList = []
 44 |         for k in range(trainNum):
 45 |             trainList.append(data_list[training_inds[k][0]])
 46 |         pano_list = [img_root + 'streetview/' + item[0] for item in trainList if item[0] in exist_grd_list and item[1] in exist_aer_list]
 47 |         aer_list = [img_root + 'satview_correct/' + item[1] for item in trainList if item[0] in exist_grd_list and item[1] in exist_aer_list]
 48 |         # pano_seman_list = [img_root + 'streetseman_visualize/' + item[0] for item in trainList if
 49 |         #              item[0] in exist_grd_list and item[1] in exist_aer_list]
 50 |         tanpolar_list = [img_root + 'a2g_correct/' + item[1] for item in trainList if
 51 |                     item[0] in exist_grd_list and item[1] in exist_aer_list]
 52 |         polar_list = [img_root + 'polarmap/' + item[1] for item in trainList if
 53 |                     item[0] in exist_grd_list and item[1] in exist_aer_list]
 54 | 
 55 | 
 56 |     else:
 57 | 
 58 |         val_inds = anuData['valSet']['valInd'][0][0] - 1
 59 |         valNum = len(val_inds)
 60 |         valList = []
 61 |         for k in range(valNum):
 62 |             valList.append(data_list[val_inds[k][0]])
 63 |         pano_list = [img_root + 'streetview/' + item[0] for item in valList if item[0] in exist_grd_list and item[1] in exist_aer_list]
 64 |         aer_list = [img_root + 'satview_polish/' + item[1] for item in valList if item[0] in exist_grd_list and item[1] in exist_aer_list]
 65 |         # pano_seman_list = [img_root + 'streetseman_visualize/' + item[0] for item in valList if
 66 |         #              item[0] in exist_grd_list and item[1] in exist_aer_list]
 67 |         # aer_seman_list = [img_root + 'satseman/' + item[1] for item in valList if
 68 |         #             item[0] in exist_grd_list and item[1] in exist_aer_list]
 69 |         tanpolar_list = [img_root + 'a2g_origin/' + item[1] for item in valList if
 70 |                     item[0] in exist_grd_list and item[1] in exist_aer_list]
 71 |         polar_list = [img_root + 'polarmap/' + item[1] for item in valList if
 72 |                     item[0] in exist_grd_list and item[1] in exist_aer_list]
 73 | 
 74 |     aer_queue = tf.train.string_input_producer(aer_list, shuffle=mode == 'train', seed=2020)
 75 |     pano_queue = tf.train.string_input_producer(pano_list, shuffle=mode == 'train', seed=2020)
 76 |     # pano_seman_queue = tf.train.string_input_producer(pano_seman_list, shuffle=mode == 'train', seed=2020)
 77 |     tanpolar_queue = tf.train.string_input_producer(tanpolar_list, shuffle=mode == 'train', seed=2020)
 78 |     polar_queue = tf.train.string_input_producer(polar_list, shuffle=mode == 'train', seed=2020)
 79 | 
 80 |     reader = tf.WholeFileReader()
 81 |     aer_paths, aer_contents = reader.read(aer_queue)
 82 |     pano_paths, pano_contents = reader.read(pano_queue)
 83 |     # pano_seman_paths, pano_seman_contents = reader.read(pano_seman_queue)
 84 |     tanpolar_paths, tanpolar_contents = reader.read(tanpolar_queue)
 85 |     polar_paths, polar_contents = reader.read(polar_queue)
 86 | 
 87 |     aer = tf.image.decode_png(aer_contents)
 88 |     panos = tf.image.decode_png(pano_contents)
 89 |     # panos_seman = tf.image.decode_png(pano_seman_contents)
 90 |     tanpolar = tf.image.decode_png(tanpolar_contents)
 91 |     polar = tf.image.decode_png(polar_contents)
 92 | 
 93 |     aer = tf.image.convert_image_dtype(aer, tf.float32)
 94 |     panos = tf.image.convert_image_dtype(panos, tf.float32)
 95 |     # panos_seman = tf.image.convert_image_dtype(panos_seman, tf.float32)
 96 |     tanpolar = tf.image.convert_image_dtype(tanpolar, tf.float32)
 97 |     polar = tf.image.convert_image_dtype(polar, tf.float32)
 98 | 
 99 |     aer = preprocess(aer)
100 |     panos = preprocess(panos)
101 |     # panos_seman = preprocess(panos_seman)
102 |     tanpolar = preprocess(tanpolar)
103 |     polar = preprocess(polar)
104 | 
105 |     aer.set_shape([None, None, 3])
106 |     panos.set_shape([None, None, 3])
107 |     # panos_seman.set_shape([None, None, 3])
108 |     tanpolar.set_shape([None, None, 3])
109 |     polar.set_shape([None, None, 3])
110 | 
111 |     aer = tf.image.resize_images(aer, [256, 256], method=tf.image.ResizeMethod.AREA)
112 |     panos = tf.image.resize_images(panos, [128, 512], method=tf.image.ResizeMethod.AREA)
113 |     # panos_seman = tf.image.resize_images(panos_seman, [128, 512], method=tf.image.ResizeMethod.AREA)
114 |     # panos_seman = tf.cast(tf.image.resize_images(panos_seman, [128, 512], method=tf.image.ResizeMethod.AREA), tf.int32)
115 |     # panos_seman = tf.one_hot(tf.squeeze(panos_seman, axis=-1), depth=4)
116 |     tanpolar = tf.image.resize_images(tanpolar, [128, 512], method=tf.image.ResizeMethod.AREA)
117 |     polar = tf.image.resize_images(polar, [128, 512], method=tf.image.ResizeMethod.AREA)
118 | 
119 |     aer_batch, panos_batch, grd_paths_batch, tanpolar_batch, polar_batch = \
120 |         tf.train.batch([aer, panos, pano_paths, tanpolar, polar], batch_size=batch_size)
121 | 
122 |     steps_per_epoch = int(math.ceil(len(pano_list) / batch_size))
123 | 
124 |     return Examples(
125 |         paths=grd_paths_batch,
126 |         aer=aer_batch,
127 |         pano=panos_batch,
128 |         # mask=panos_seman_batch,
129 |         tanpolar=tanpolar_batch,
130 |         polar = polar_batch,
131 |         count=len(pano_list),
132 |         steps_per_epoch=steps_per_epoch,
133 |     )
134 | 


--------------------------------------------------------------------------------
/load_data/load_data_cvact_half.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import tensorflow.compat.v1 as tf
  3 | tf.disable_v2_behavior()
  4 | import math
  5 | import scipy.io as sio
  6 | import os
  7 | 
  8 | Examples = collections.namedtuple("Examples", "paths, aer, pano, mask, count, steps_per_epoch, tanpolar, polar")
  9 | 
 10 | 
 11 | def preprocess(image):
 12 |     with tf.name_scope("preprocess"):
 13 |         # [0, 1] => [-1, 1]
 14 |         return image * 2 - 1
 15 | 
 16 | 
 17 | def load_examples(mode='train', batch_size=2):
 18 | 
 19 |     allDataList = '../OriNet_CVACT/CVACT_orientations/ACT_data.mat'
 20 |     img_root = '../../Data/ANU_data_small/'
 21 | 
 22 |     exist_aer_list = os.listdir(img_root + 'satview_polish')
 23 |     exist_grd_list = os.listdir(img_root + 'streetview')
 24 | 
 25 |     __cur_allid = 0  # for training
 26 | 
 27 |     # load the mat
 28 |     anuData = sio.loadmat(allDataList)
 29 | 
 30 |     data_list = []
 31 |     for i in range(0, len(anuData['panoIds'])):
 32 |         # grd_id_align = img_root + 'streetview/' + anuData['panoIds'][i] + '_grdView.png'
 33 |         # sat_id_ori = img_root + 'satview_polish/' + anuData['panoIds'][i] + '_satView_polish.png'
 34 |         grd_id_align = anuData['panoIds'][i] + '_grdView.png'
 35 |         sat_id_ori = anuData['panoIds'][i] + '_satView_polish.png'
 36 |         data_list.append([grd_id_align, sat_id_ori])
 37 | 
 38 |     if mode=='train':
 39 |         training_inds = anuData['trainSet']['trainInd'][0][0] - 1
 40 |         trainNum = len(training_inds)
 41 |         trainList = []
 42 |         for k in range(trainNum):
 43 |             trainList.append(data_list[training_inds[k][0]])
 44 |         pano_list = [img_root + 'streetview/' + item[0] for item in trainList if item[0] in exist_grd_list and item[1] in exist_aer_list]
 45 |         aer_list = [img_root + 'satview_polish/' + item[1] for item in trainList if item[0] in exist_grd_list and item[1] in exist_aer_list]
 46 |         pano_seman_list = [img_root + 'streetseman_visualize/' + item[0] for item in trainList if
 47 |                      item[0] in exist_grd_list and item[1] in exist_aer_list]
 48 |         aer_seman_list = [img_root + 'satseman/' + item[1] for item in trainList if
 49 |                     item[0] in exist_grd_list and item[1] in exist_aer_list]
 50 |         tanpolar_list = [img_root + 'tanpolarmap/' + item[1] for item in trainList if
 51 |                     item[0] in exist_grd_list and item[1] in exist_aer_list]
 52 |         polar_list = [img_root + 'polarmap/' + item[1] for item in trainList if
 53 |                     item[0] in exist_grd_list and item[1] in exist_aer_list]
 54 | 
 55 | 
 56 |     else:
 57 | 
 58 |         val_inds = anuData['valSet']['valInd'][0][0] - 1
 59 |         valNum = len(val_inds)
 60 |         valList = []
 61 |         for k in range(valNum):
 62 |             valList.append(data_list[val_inds[k][0]])
 63 |         pano_list = [img_root + 'streetview/' + item[0] for item in valList if item[0] in exist_grd_list and item[1] in exist_aer_list]
 64 |         aer_list = [img_root + 'satview_polish/' + item[1] for item in valList if item[0] in exist_grd_list and item[1] in exist_aer_list]
 65 |         pano_seman_list = [img_root + 'streetseman_visualize/' + item[0] for item in valList if
 66 |                      item[0] in exist_grd_list and item[1] in exist_aer_list]
 67 |         aer_seman_list = [img_root + 'satseman/' + item[1] for item in valList if
 68 |                     item[0] in exist_grd_list and item[1] in exist_aer_list]
 69 |         tanpolar_list = [img_root + 'tanpolarmap/' + item[1] for item in valList if
 70 |                     item[0] in exist_grd_list and item[1] in exist_aer_list]
 71 |         polar_list = [img_root + 'polarmap/' + item[1] for item in valList if
 72 |                     item[0] in exist_grd_list and item[1] in exist_aer_list]
 73 | 
 74 |     aer_queue = tf.train.string_input_producer(aer_list, shuffle=mode == 'train', seed=2020)
 75 |     pano_queue = tf.train.string_input_producer(pano_list, shuffle=mode == 'train', seed=2020)
 76 |     pano_seman_queue = tf.train.string_input_producer(pano_seman_list, shuffle=mode == 'train', seed=2020)
 77 |     tanpolar_queue = tf.train.string_input_producer(tanpolar_list, shuffle=mode == 'train', seed=2020)
 78 |     polar_queue = tf.train.string_input_producer(polar_list, shuffle=mode == 'train', seed=2020)
 79 | 
 80 |     reader = tf.WholeFileReader()
 81 |     aer_paths, aer_contents = reader.read(aer_queue)
 82 |     pano_paths, pano_contents = reader.read(pano_queue)
 83 |     pano_seman_paths, pano_seman_contents = reader.read(pano_seman_queue)
 84 |     tanpolar_paths, tanpolar_contents = reader.read(tanpolar_queue)
 85 |     polar_paths, polar_contents = reader.read(polar_queue)
 86 | 
 87 |     aer = tf.image.decode_png(aer_contents)
 88 |     panos = tf.image.decode_png(pano_contents)
 89 |     panos_seman = tf.image.decode_png(pano_seman_contents)
 90 |     tanpolar = tf.image.decode_png(tanpolar_contents)
 91 |     polar = tf.image.decode_png(polar_contents)
 92 | 
 93 |     aer = tf.image.convert_image_dtype(aer, tf.float32)
 94 |     panos = tf.image.convert_image_dtype(panos, tf.float32)
 95 |     panos_seman = tf.image.convert_image_dtype(panos_seman, tf.float32)
 96 |     tanpolar = tf.image.convert_image_dtype(tanpolar, tf.float32)
 97 |     polar = tf.image.convert_image_dtype(polar, tf.float32)
 98 | 
 99 |     aer = preprocess(aer)
100 |     panos = preprocess(panos)
101 |     panos_seman = preprocess(panos_seman)
102 |     tanpolar = preprocess(tanpolar)
103 |     polar = preprocess(polar)
104 | 
105 |     aer.set_shape([None, None, 3])
106 |     panos.set_shape([None, None, 3])
107 |     panos_seman.set_shape([None, None, 3])
108 |     tanpolar.set_shape([None, None, 3])
109 |     polar.set_shape([None, None, 3])
110 | 
111 |     aer = tf.image.resize_images(aer, [256, 256], method=tf.image.ResizeMethod.AREA)
112 |     panos = tf.image.resize_images(panos, [128, 512], method=tf.image.ResizeMethod.AREA)
113 |     panos_seman = tf.image.resize_images(panos_seman, [128, 512], method=tf.image.ResizeMethod.AREA)
114 |     # panos_seman = tf.cast(tf.image.resize_images(panos_seman, [128, 512], method=tf.image.ResizeMethod.AREA), tf.int32)
115 |     # panos_seman = tf.one_hot(tf.squeeze(panos_seman, axis=-1), depth=4)
116 |     tanpolar = tf.image.resize_images(tanpolar, [128, 512], method=tf.image.ResizeMethod.AREA)
117 |     polar = tf.image.resize_images(polar, [128, 512], method=tf.image.ResizeMethod.AREA)
118 | 
119 |     aer_batch, panos_batch, panos_seman_batch, aer_paths_batch, tanpolar_batch, polar_batch = \
120 |         tf.train.batch([aer, panos, panos_seman, aer_paths, tanpolar, polar], batch_size=batch_size)
121 | 
122 |     steps_per_epoch = int(math.ceil(len(pano_list) / batch_size))
123 | 
124 |     return Examples(
125 |         paths=aer_paths_batch,
126 |         aer=aer_batch,
127 |         pano=panos_batch,
128 |         mask=panos_seman_batch,
129 |         tanpolar=tanpolar_batch,
130 |         polar = polar_batch,
131 |         count=len(pano_list),
132 |         steps_per_epoch=steps_per_epoch,
133 |     )
134 | 
135 | 


--------------------------------------------------------------------------------
/script3/model22.py:
--------------------------------------------------------------------------------
  1 | import tensorflow.compat.v1 as tf
  2 | 
  3 | import collections
  4 | from VGG.perceptual_loss import perceptual_loss
  5 | from geometry.Geometry import *
  6 | from geometry.projector import *
  7 | from geometry.utils import *
  8 | 
  9 | 
 10 | EPS = 1e-7
 11 | 
 12 | target_height = 128
 13 | target_width = 512
 14 | aer_size = 256
 15 | grd_height = -2
 16 | max_height = 30
 17 | 
 18 | Model = collections.namedtuple("Model", "outputs, predict_real, predict_fake, discrim_loss, discrim_grads_and_vars, "
 19 |                                         "estimated_height, generator_inputs,"
 20 |                                         "gen_loss_GAN, gen_loss_L1, gen_loss_perceptual, gen_grads_and_vars, train")
 21 | 
 22 | 
 23 | def create_generator(generator_inputs, ref_images, a):
 24 | 
 25 |     generator_outputs_channels = 3
 26 | 
 27 |     ngf = a.ngf
 28 |     layers = []
 29 | 
 30 |     # encoder_1: [batch, 512, 512, in_channels] => [batch, 256, 256, ngf]
 31 |     with tf.variable_scope("encoder_1"):
 32 |         output = gen_conv(generator_inputs, ngf)
 33 |         layers.append(output)
 34 | 
 35 |     layer_specs = [
 36 |         ngf * 2, # encoder_2: [batch, 256, 256, ngf] => [batch, 128, 128, ngf * 2]
 37 |         ngf * 4, # encoder_3: [batch, 128, 128, ngf * 2] => [batch, 64, 64, ngf * 4]
 38 |         ngf * 8, # encoder_4: [batch, 64, 64, ngf * 4] => [batch, 32, 32, ngf * 8]
 39 |         ngf * 8, # encoder_5: [batch, 32, 32, ngf * 8] => [batch, 16, 16, ngf * 8]
 40 |         ngf * 8, # encoder_6: [batch, 16, 16, ngf * 8] => [batch, 8, 8, ngf * 8]
 41 |         ngf * 8, # encoder_7: [batch, 8, 8, ngf * 8] => [batch, 4, 4, ngf * 8]
 42 |         # ngf * 8, # encoder_8: [batch, 4, 4, ngf * 8] => [batch, 2, 2, ngf * 8]
 43 |     ]
 44 | 
 45 |     for out_channels in layer_specs:
 46 |         with tf.variable_scope("encoder_%d" % (len(layers) + 1)):
 47 |             rectified = lrelu(layers[-1], 0.2)
 48 |             # [batch, in_height, in_width, in_channels] => [batch, in_height/2, in_width/2, out_channels]
 49 |             convolved = gen_conv(rectified, out_channels)
 50 |             output = batchnorm(convolved)
 51 |             layers.append(output)
 52 | 
 53 |     bottleneck = tf.reshape(output, [-1, 1, 4, output.get_shape().as_list()[-1]])
 54 |     layers.append(bottleneck)
 55 | 
 56 |     layer_specs = [
 57 |         # (ngf * 8, 0.5),   # decoder_8: [batch, 1, 4, ngf * 8] => [batch, 2, 8, ngf * 8 * 2]
 58 |         (ngf * 8, 0.5),   # decoder_7: [batch, 2, 8, ngf * 8 * 2] => [batch, 4, 16, ngf * 8 * 2]
 59 |         (ngf * 8, 0.5),   # decoder_6: [batch, 4, 16, ngf * 8 * 2] => [batch, 8, 32, ngf * 8 * 2]
 60 |         (ngf * 8, 0.0),   # decoder_5: [batch, 8, 32, ngf * 8 * 2] => [batch, 16, 64, ngf * 8 * 2]
 61 |         (ngf * 4, 0.0),   # decoder_4: [batch, 16, 64, ngf * 8 * 2] => [batch, 32, 128, ngf * 4 * 2]
 62 |         (ngf * 2, 0.0),   # decoder_3: [batch, 32, 128, ngf * 4 * 2] => [batch, 64, 256, ngf * 2 * 2]
 63 |         (ngf, 0.0),       # decoder_2: [batch, 64, 256, ngf * 2 * 2] => [batch, 128, 512, ngf * 2 * 2]
 64 |     ]
 65 | 
 66 |     num_encoder_layers = len(layers)
 67 |     for decoder_layer, (out_channels, dropout) in enumerate(layer_specs):
 68 |         skip_layer = num_encoder_layers - decoder_layer - 1
 69 |         with tf.variable_scope("decoder_%d" % (skip_layer + 1)):
 70 |             # if decoder_layer == 0:
 71 |             #     # first decoder layer doesn't have skip connections
 72 |             #     # since it is directly connected to the skip_layer
 73 |             #     input = layers[-1]
 74 |             # else:
 75 |             #     input = tf.concat([layers[-1], layers[skip_layer]], axis=3)
 76 | 
 77 |             rectified = tf.nn.relu(layers[-1])
 78 |             # [batch, in_height, in_width, in_channels] => [batch, in_height*2, in_width*2, out_channels]
 79 |             output = gen_deconv(rectified, out_channels)
 80 |             output = batchnorm(output)
 81 | 
 82 |             if dropout > 0.0:
 83 |                 output = tf.nn.dropout(output, keep_prob=1 - dropout)
 84 | 
 85 |             layers.append(output)
 86 | 
 87 |     # decoder_1: [batch, 128, 512, ngf * 2] => [batch, 256, 1024, generator_outputs_channels]
 88 |     with tf.variable_scope("decoder_1"):
 89 |         # input = tf.concat([layers[-1], layers[0]], axis=3)
 90 |         rectified = tf.nn.relu(layers[-1])
 91 |         output = gen_deconv(rectified, generator_outputs_channels)
 92 |         output = tf.tanh(output)
 93 |         layers.append(output)
 94 | 
 95 |     outputs_grd = layers[-1]
 96 | 
 97 |     return outputs_grd
 98 | 
 99 | 
100 | 
101 | def create_discriminator(discrim_inputs, ndf=64):
102 |     n_layers = 3
103 |     layers = []
104 | 
105 |     # layer_1: [batch, 256, 256, in_channels * 2] => [batch, 128, 128, ndf]
106 |     with tf.variable_scope("layer_1"):
107 |         convolved = discrim_conv(discrim_inputs, ndf, stride=2)
108 |         rectified = lrelu(convolved, 0.2)
109 |         layers.append(rectified)
110 | 
111 |     # layer_2: [batch, 128, 128, ndf] => [batch, 64, 64, ndf * 2]
112 |     # layer_3: [batch, 64, 64, ndf * 2] => [batch, 32, 32, ndf * 4]
113 |     # layer_4: [batch, 32, 32, ndf * 4] => [batch, 31, 31, ndf * 8]
114 |     for i in range(n_layers):
115 |         with tf.variable_scope("layer_%d" % (len(layers) + 1)):
116 |             out_channels = ndf * min(2**(i+1), 8)
117 |             stride = 1 if i == n_layers - 1 else 2  # last layer here has stride 1
118 |             convolved = discrim_conv(layers[-1], out_channels, stride=stride)
119 |             normalized = batchnorm(convolved)
120 |             rectified = lrelu(normalized, 0.2)
121 |             layers.append(rectified)
122 | 
123 |     # layer_5: [batch, 31, 31, ndf * 8] => [batch, 30, 30, 1]
124 |     with tf.variable_scope("layer_%d" % (len(layers) + 1)):
125 |         convolved = discrim_conv(rectified, out_channels=1, stride=1)
126 |         output = tf.sigmoid(convolved)
127 |         layers.append(output)
128 | 
129 |     return layers[-1]
130 | 
131 | 
132 | def create_model(inputs, targets, ref_images, a):
133 | 
134 |     with tf.variable_scope("generator"):
135 | 
136 |         batch, height, width, channel = tf_shape(inputs, rank=4)
137 |         estimated_height = tf.ones([batch, height, width, a.heightPlaneNum])/a.heightPlaneNum
138 | 
139 |         generator_inputs = geometry_transform(inputs, estimated_height, target_height, target_width,
140 |                                                   a.height_mode, grd_height, max_height, a.method, a.geoout_type, a.dataset)
141 | 
142 |         outputs_grd = create_generator(generator_inputs, ref_images, a)
143 | 
144 |     with tf.name_scope("real_discriminator_grd"):
145 |         with tf.variable_scope("discriminator_grd"):
146 |             # 2x [batch, height, width, channels] => [batch, 30, 30, 1]
147 |             predict_real_grd = create_discriminator(targets)
148 | 
149 |     with tf.name_scope("fake_discriminator_grd"):
150 |         with tf.variable_scope("discriminator_grd", reuse=True):
151 |             # 2x [batch, height, width, channels] => [batch, 30, 30, 1]
152 |             predict_fake_grd = create_discriminator(outputs_grd)
153 | 
154 | 
155 |     with tf.name_scope("discriminator_loss"):
156 |         # minimizing -tf.log will try to get inputs to 1
157 |         # predict_real => 1
158 |         # predict_fake => 0
159 |         discrim_loss = 0.5 * (tf.reduce_mean(-(tf.log(predict_real_grd + EPS) + tf.log(1 - predict_fake_grd + EPS))))
160 | 
161 |     with tf.name_scope("generator_loss"):
162 |         # predict_fake => 1
163 |         # abs(targets - outputs) => 0
164 |         gen_loss_GAN_grd = tf.reduce_mean(-tf.log(predict_fake_grd + EPS))
165 |         gen_loss_L1_grd = tf.reduce_mean(tf.abs(targets - outputs_grd))
166 |         gen_loss_perceptual_grd = perceptual_loss(targets, outputs_grd)
167 | 
168 |         gen_loss = gen_loss_GAN_grd * a.gan_weight + \
169 |                    gen_loss_perceptual_grd * a.perceptual_weight_grd + \
170 |                    gen_loss_L1_grd * a.l1_weight_grd
171 | 
172 |     with tf.name_scope("discriminator_train"):
173 |         discrim_tvars = [var for var in tf.trainable_variables() if var.name.startswith("discriminator")]
174 |         discrim_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
175 |         discrim_grads_and_vars = discrim_optim.compute_gradients(discrim_loss, var_list=discrim_tvars)
176 |         discrim_train = discrim_optim.apply_gradients(discrim_grads_and_vars)
177 | 
178 |     with tf.name_scope("generator_train"):
179 |         with tf.control_dependencies([discrim_train]):
180 |             gen_tvars = [var for var in tf.trainable_variables() if var.name.startswith("generator")]
181 |             gen_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
182 |             gen_grads_and_vars = gen_optim.compute_gradients(gen_loss, var_list=gen_tvars)
183 |             gen_train = gen_optim.apply_gradients(gen_grads_and_vars)
184 | 
185 |     ema = tf.train.ExponentialMovingAverage(decay=0.99)
186 |     update_losses = ema.apply([discrim_loss, gen_loss_GAN_grd, gen_loss_L1_grd, gen_loss_perceptual_grd])
187 | 
188 |     global_step = tf.train.get_or_create_global_step()
189 |     incr_global_step = tf.assign(global_step, global_step+1)
190 | 
191 |     return Model(
192 |         predict_real=predict_real_grd,
193 |         predict_fake=predict_fake_grd,
194 |         discrim_loss=ema.average(discrim_loss),
195 |         discrim_grads_and_vars=discrim_grads_and_vars,
196 |         gen_loss_GAN=ema.average(gen_loss_GAN_grd),
197 |         gen_loss_L1=ema.average(gen_loss_L1_grd),
198 |         gen_loss_perceptual=ema.average(gen_loss_perceptual_grd),
199 |         gen_grads_and_vars=gen_grads_and_vars,
200 |         estimated_height=tf.argmax(estimated_height, axis=-1),
201 |         generator_inputs=generator_inputs,
202 |         outputs=outputs_grd,
203 |         train=tf.group(update_losses, incr_global_step, gen_train),
204 |     )
205 | 
206 | 


--------------------------------------------------------------------------------
/script3/model23.py:
--------------------------------------------------------------------------------
  1 | import tensorflow.compat.v1 as tf
  2 | 
  3 | import collections
  4 | from VGG.perceptual_loss import perceptual_loss
  5 | from geometry.Geometry import *
  6 | from geometry.projector import *
  7 | from geometry.utils import *
  8 | 
  9 | 
 10 | EPS = 1e-7
 11 | 
 12 | target_height = 128
 13 | target_width = 512
 14 | aer_size = 256
 15 | grd_height = -2
 16 | max_height = 30
 17 | 
 18 | Model = collections.namedtuple("Model", "outputs, predict_real, predict_fake, discrim_loss, discrim_grads_and_vars, "
 19 |                                         "estimated_height, generator_inputs,"
 20 |                                         "gen_loss_GAN, gen_loss_L1, gen_loss_perceptual, gen_grads_and_vars, train")
 21 | 
 22 | def create_generator(generator_inputs, ref_images, a):
 23 | 
 24 |     generator_outputs_channels = 3
 25 | 
 26 |     ngf = a.ngf
 27 |     layers = []
 28 | 
 29 |     # encoder_1: [batch, 512, 512, in_channels] => [batch, 256, 256, ngf]
 30 |     with tf.variable_scope("encoder_1"):
 31 |         output = gen_conv(generator_inputs, ngf)
 32 |         layers.append(output)
 33 | 
 34 |     layer_specs = [
 35 |         ngf * 2, # encoder_2: [batch, 256, 256, ngf] => [batch, 128, 128, ngf * 2]
 36 |         ngf * 4, # encoder_3: [batch, 128, 128, ngf * 2] => [batch, 64, 64, ngf * 4]
 37 |         ngf * 8, # encoder_4: [batch, 64, 64, ngf * 4] => [batch, 32, 32, ngf * 8]
 38 |         ngf * 8, # encoder_5: [batch, 32, 32, ngf * 8] => [batch, 16, 16, ngf * 8]
 39 |         ngf * 8, # encoder_6: [batch, 16, 16, ngf * 8] => [batch, 8, 8, ngf * 8]
 40 |         ngf * 8, # encoder_7: [batch, 8, 8, ngf * 8] => [batch, 4, 4, ngf * 8]
 41 |         # ngf * 8, # encoder_8: [batch, 4, 4, ngf * 8] => [batch, 2, 2, ngf * 8]
 42 |     ]
 43 | 
 44 |     for out_channels in layer_specs:
 45 |         with tf.variable_scope("encoder_%d" % (len(layers) + 1)):
 46 |             rectified = lrelu(layers[-1], 0.2)
 47 |             # [batch, in_height, in_width, in_channels] => [batch, in_height/2, in_width/2, out_channels]
 48 |             convolved = gen_conv(rectified, out_channels)
 49 |             output = batchnorm(convolved)
 50 |             layers.append(output)
 51 | 
 52 |     bottleneck = tf.reshape(output, [-1, 1, 4, output.get_shape().as_list()[-1]])
 53 |     layers.append(bottleneck)
 54 | 
 55 |     layer_specs = [
 56 |         # (ngf * 8, 0.5),   # decoder_8: [batch, 1, 4, ngf * 8] => [batch, 2, 8, ngf * 8 * 2]
 57 |         (ngf * 8, 0.5),   # decoder_7: [batch, 2, 8, ngf * 8 * 2] => [batch, 4, 16, ngf * 8 * 2]
 58 |         (ngf * 8, 0.5),   # decoder_6: [batch, 4, 16, ngf * 8 * 2] => [batch, 8, 32, ngf * 8 * 2]
 59 |         (ngf * 8, 0.0),   # decoder_5: [batch, 8, 32, ngf * 8 * 2] => [batch, 16, 64, ngf * 8 * 2]
 60 |         (ngf * 4, 0.0),   # decoder_4: [batch, 16, 64, ngf * 8 * 2] => [batch, 32, 128, ngf * 4 * 2]
 61 |         (ngf * 2, 0.0),   # decoder_3: [batch, 32, 128, ngf * 4 * 2] => [batch, 64, 256, ngf * 2 * 2]
 62 |         (ngf, 0.0),       # decoder_2: [batch, 64, 256, ngf * 2 * 2] => [batch, 128, 512, ngf * 2 * 2]
 63 |     ]
 64 | 
 65 |     num_encoder_layers = len(layers)
 66 |     for decoder_layer, (out_channels, dropout) in enumerate(layer_specs):
 67 |         skip_layer = num_encoder_layers - decoder_layer - 1
 68 |         with tf.variable_scope("decoder_%d" % (skip_layer + 1)):
 69 |             if decoder_layer == 0:
 70 |                 # first decoder layer doesn't have skip connections
 71 |                 # since it is directly connected to the skip_layer
 72 |                 input = layers[-1]
 73 |             else:
 74 |                 batch, height, width, channel = tf_shape(layers[-1], rank=4)
 75 | 
 76 |                 input = tf.concat([layers[-1], tf.reshape(layers[skip_layer - 1], [batch, height, width, channel])],
 77 |                                   axis=3)
 78 | 
 79 |             rectified = tf.nn.relu(input)
 80 |             # [batch, in_height, in_width, in_channels] => [batch, in_height*2, in_width*2, out_channels]
 81 |             output = gen_deconv(rectified, out_channels)
 82 |             output = batchnorm(output)
 83 | 
 84 |             if dropout > 0.0:
 85 |                 output = tf.nn.dropout(output, keep_prob=1 - dropout)
 86 | 
 87 |             layers.append(output)
 88 | 
 89 |     # decoder_1: [batch, 128, 512, ngf * 2] => [batch, 256, 1024, generator_outputs_channels]
 90 |     with tf.variable_scope("decoder_1"):
 91 |         # input = tf.concat([layers[-1], layers[0]], axis=3)
 92 |         rectified = tf.nn.relu(layers[-1])
 93 |         output = gen_deconv(rectified, generator_outputs_channels)
 94 |         output = tf.tanh(output)
 95 |         layers.append(output)
 96 | 
 97 |     outputs_grd = layers[-1]
 98 | 
 99 |     return outputs_grd
100 | 
101 | 
102 | def create_discriminator(discrim_inputs, ndf=64):
103 |     n_layers = 3
104 |     layers = []
105 | 
106 |     # layer_1: [batch, 256, 256, in_channels * 2] => [batch, 128, 128, ndf]
107 |     with tf.variable_scope("layer_1"):
108 |         convolved = discrim_conv(discrim_inputs, ndf, stride=2)
109 |         rectified = lrelu(convolved, 0.2)
110 |         layers.append(rectified)
111 | 
112 |     # layer_2: [batch, 128, 128, ndf] => [batch, 64, 64, ndf * 2]
113 |     # layer_3: [batch, 64, 64, ndf * 2] => [batch, 32, 32, ndf * 4]
114 |     # layer_4: [batch, 32, 32, ndf * 4] => [batch, 31, 31, ndf * 8]
115 |     for i in range(n_layers):
116 |         with tf.variable_scope("layer_%d" % (len(layers) + 1)):
117 |             out_channels = ndf * min(2**(i+1), 8)
118 |             stride = 1 if i == n_layers - 1 else 2  # last layer here has stride 1
119 |             convolved = discrim_conv(layers[-1], out_channels, stride=stride)
120 |             normalized = batchnorm(convolved)
121 |             rectified = lrelu(normalized, 0.2)
122 |             layers.append(rectified)
123 | 
124 |     # layer_5: [batch, 31, 31, ndf * 8] => [batch, 30, 30, 1]
125 |     with tf.variable_scope("layer_%d" % (len(layers) + 1)):
126 |         convolved = discrim_conv(rectified, out_channels=1, stride=1)
127 |         output = tf.sigmoid(convolved)
128 |         layers.append(output)
129 | 
130 |     return layers[-1]
131 | 
132 | 
133 | def create_model(inputs, targets, ref_images, a):
134 | 
135 |     with tf.variable_scope("generator"):
136 | 
137 |         with tf.variable_scope('height_estimation'):
138 |             estimated_height = encoder_decoder(inputs, generator_outputs_channels=a.heightPlaneNum, ngf=4,
139 |                                                  activational_layer=tf.nn.softmax)
140 |             estimated_height = softargmax(estimated_height)
141 |             # print("*******************************",estimated_height.get_shape().as_list())
142 | 
143 |         generator_inputs = tf.concat([inputs, estimated_height], axis=-1)
144 | 
145 |         outputs_grd = create_generator(generator_inputs, ref_images, a)
146 | 
147 |     with tf.name_scope("real_discriminator_grd"):
148 |         with tf.variable_scope("discriminator_grd"):
149 |             # 2x [batch, height, width, channels] => [batch, 30, 30, 1]
150 |             predict_real_grd = create_discriminator(targets)
151 | 
152 |     with tf.name_scope("fake_discriminator_grd"):
153 |         with tf.variable_scope("discriminator_grd", reuse=True):
154 |             # 2x [batch, height, width, channels] => [batch, 30, 30, 1]
155 |             predict_fake_grd = create_discriminator(outputs_grd)
156 | 
157 | 
158 |     with tf.name_scope("discriminator_loss"):
159 |         # minimizing -tf.log will try to get inputs to 1
160 |         # predict_real => 1
161 |         # predict_fake => 0
162 |         discrim_loss = 0.5 * (tf.reduce_mean(-(tf.log(predict_real_grd + EPS) + tf.log(1 - predict_fake_grd + EPS))))
163 | 
164 |     with tf.name_scope("generator_loss"):
165 |         # predict_fake => 1
166 |         # abs(targets - outputs) => 0
167 |         gen_loss_GAN_grd = tf.reduce_mean(-tf.log(predict_fake_grd + EPS))
168 |         gen_loss_L1_grd = tf.reduce_mean(tf.abs(targets - outputs_grd))
169 |         gen_loss_perceptual_grd = perceptual_loss(targets, outputs_grd)
170 | 
171 |         gen_loss = gen_loss_GAN_grd * a.gan_weight + \
172 |                    gen_loss_perceptual_grd * a.perceptual_weight_grd + \
173 |                    gen_loss_L1_grd * a.l1_weight_grd
174 | 
175 |     with tf.name_scope("discriminator_train"):
176 |         discrim_tvars = [var for var in tf.trainable_variables() if var.name.startswith("discriminator")]
177 |         discrim_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
178 |         discrim_grads_and_vars = discrim_optim.compute_gradients(discrim_loss, var_list=discrim_tvars)
179 |         discrim_train = discrim_optim.apply_gradients(discrim_grads_and_vars)
180 | 
181 |     with tf.name_scope("generator_train"):
182 |         with tf.control_dependencies([discrim_train]):
183 |             gen_tvars = [var for var in tf.trainable_variables() if var.name.startswith("generator")]
184 |             gen_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
185 |             gen_grads_and_vars = gen_optim.compute_gradients(gen_loss, var_list=gen_tvars)
186 |             gen_train = gen_optim.apply_gradients(gen_grads_and_vars)
187 | 
188 |     ema = tf.train.ExponentialMovingAverage(decay=0.99)
189 |     update_losses = ema.apply([discrim_loss, gen_loss_GAN_grd, gen_loss_L1_grd, gen_loss_perceptual_grd])
190 | 
191 |     global_step = tf.train.get_or_create_global_step()
192 |     incr_global_step = tf.assign(global_step, global_step+1)
193 | 
194 |     return Model(
195 |         predict_real=predict_real_grd,
196 |         predict_fake=predict_fake_grd,
197 |         discrim_loss=ema.average(discrim_loss),
198 |         discrim_grads_and_vars=discrim_grads_and_vars,
199 |         gen_loss_GAN=ema.average(gen_loss_GAN_grd),
200 |         gen_loss_L1=ema.average(gen_loss_L1_grd),
201 |         gen_loss_perceptual=ema.average(gen_loss_perceptual_grd),
202 |         gen_grads_and_vars=gen_grads_and_vars,
203 |         estimated_height=tf.argmax(estimated_height, axis=-1),
204 |         generator_inputs=generator_inputs,
205 |         outputs=outputs_grd,
206 |         train=tf.group(update_losses, incr_global_step, gen_train),
207 |     )
208 | 


--------------------------------------------------------------------------------
/script3/model.py:
--------------------------------------------------------------------------------
  1 | import tensorflow.compat.v1 as tf
  2 | tf.disable_v2_behavior()
  3 | 
  4 | import collections
  5 | from VGG.perceptual_loss import perceptual_loss
  6 | from geometry.Geometry import *
  7 | from geometry.projector import *
  8 | from geometry.utils import *
  9 | 
 10 | 
 11 | EPS = 1e-7
 12 | 
 13 | target_height = 128
 14 | target_width = 512
 15 | aer_size = 256
 16 | grd_height = -2
 17 | max_height = 6
 18 | 
 19 | Model = collections.namedtuple("Model", "outputs, predict_real, predict_fake, discrim_loss, discrim_grads_and_vars, "
 20 |                                         "estimated_height, generator_inputs,"
 21 |                                         "gen_loss_GAN, gen_loss_L1, gen_loss_perceptual, gen_grads_and_vars, train")
 22 | 
 23 | def create_generator(generator_inputs, ref_images, a):
 24 | 
 25 |     if a.finalout_type == 'image':  # ['image', 'rgba', 'fgbg']
 26 |         generator_outputs_channels = 3
 27 |     elif a.finalout_type == 'rgba':
 28 |         generator_outputs_channels = a.radiusPlaneNum * 4
 29 |     elif a.finalout_type == 'fgbg':
 30 |         generator_outputs_channels = a.radiusPlaneNum * 2 + 3
 31 | 
 32 |     ngf = a.ngf
 33 |     layers = []
 34 | 
 35 |     # encoder_1: [batch, 512, 512, in_channels] => [batch, 256, 256, ngf]
 36 |     with tf.variable_scope("encoder_1"):
 37 |         output = gen_conv(generator_inputs, ngf)
 38 |         layers.append(output)
 39 | 
 40 |     layer_specs = [
 41 |         ngf * 2, # encoder_2: [batch, 256, 256, ngf] => [batch, 128, 128, ngf * 2]
 42 |         ngf * 4, # encoder_3: [batch, 128, 128, ngf * 2] => [batch, 64, 64, ngf * 4]
 43 |         ngf * 8, # encoder_4: [batch, 64, 64, ngf * 4] => [batch, 32, 32, ngf * 8]
 44 |         ngf * 8, # encoder_5: [batch, 32, 32, ngf * 8] => [batch, 16, 16, ngf * 8]
 45 |         ngf * 8, # encoder_6: [batch, 16, 16, ngf * 8] => [batch, 8, 8, ngf * 8]
 46 |         ngf * 8, # encoder_7: [batch, 8, 8, ngf * 8] => [batch, 4, 4, ngf * 8]
 47 |         # ngf * 8, # encoder_8: [batch, 4, 4, ngf * 8] => [batch, 2, 2, ngf * 8]
 48 |     ]
 49 | 
 50 |     for out_channels in layer_specs:
 51 |         with tf.variable_scope("encoder_%d" % (len(layers) + 1)):
 52 |             rectified = lrelu(layers[-1], 0.2)
 53 |             # [batch, in_height, in_width, in_channels] => [batch, in_height/2, in_width/2, out_channels]
 54 |             convolved = gen_conv(rectified, out_channels)
 55 |             output = batchnorm(convolved)
 56 |             layers.append(output)
 57 | 
 58 |     bottleneck = tf.reshape(output, [-1, 1, 4, output.get_shape().as_list()[-1]])
 59 |     layers.append(bottleneck)
 60 | 
 61 |     layer_specs = [
 62 |         # (ngf * 8, 0.5),   # decoder_8: [batch, 1, 4, ngf * 8] => [batch, 2, 8, ngf * 8 * 2]
 63 |         (ngf * 8, 0.5),   # decoder_7: [batch, 2, 8, ngf * 8 * 2] => [batch, 4, 16, ngf * 8 * 2]
 64 |         (ngf * 8, 0.5),   # decoder_6: [batch, 4, 16, ngf * 8 * 2] => [batch, 8, 32, ngf * 8 * 2]
 65 |         (ngf * 8, 0.5),   # decoder_5: [batch, 8, 32, ngf * 8 * 2] => [batch, 16, 64, ngf * 8 * 2]
 66 |         (ngf * 4, 0.0),   # decoder_4: [batch, 16, 64, ngf * 8 * 2] => [batch, 32, 128, ngf * 4 * 2]
 67 |         (ngf * 2, 0.0),   # decoder_3: [batch, 32, 128, ngf * 4 * 2] => [batch, 64, 256, ngf * 2 * 2]
 68 |         (ngf, 0.0),       # decoder_2: [batch, 64, 256, ngf * 2 * 2] => [batch, 128, 512, ngf * 2 * 2]
 69 |     ]
 70 | 
 71 |     num_encoder_layers = len(layers)
 72 |     for decoder_layer, (out_channels, dropout) in enumerate(layer_specs):
 73 |         skip_layer = num_encoder_layers - decoder_layer - 1
 74 |         with tf.variable_scope("decoder_%d" % (skip_layer + 1)):
 75 | 
 76 |             if a.skip:
 77 | 
 78 |                 if decoder_layer == 0:
 79 |                     # first decoder layer doesn't have skip connections
 80 |                     # since it is directly connected to the skip_layer
 81 |                     input = layers[-1]
 82 |                 else:
 83 |                     input = tf.concat([layers[-1], layers[skip_layer-1]], axis=3)
 84 |             else:
 85 | 
 86 |                 input = layers[-1]
 87 | 
 88 |             rectified = tf.nn.relu(input)
 89 |             # [batch, in_height, in_width, in_channels] => [batch, in_height*2, in_width*2, out_channels]
 90 |             output = gen_deconv(rectified, out_channels)
 91 |             output = batchnorm(output)
 92 | 
 93 |             if dropout > 0.0:
 94 |                 output = tf.nn.dropout(output, keep_prob=1 - dropout)
 95 | 
 96 |             layers.append(output)
 97 | 
 98 |     # decoder_1: [batch, 128, 512, ngf * 2] => [batch, 256, 1024, generator_outputs_channels]
 99 |     with tf.variable_scope("decoder_1"):
100 |         # input = tf.concat([layers[-1], layers[0]], axis=3)
101 |         rectified = tf.nn.relu(layers[-1])
102 |         output = gen_deconv(rectified, generator_outputs_channels)
103 |         output = tf.tanh(output)
104 |         layers.append(output)
105 | 
106 |     if a.finalout_type == 'image':  # ['image', 'rgba', 'fgbg']
107 |         outputs_grd = layers[-1]
108 | 
109 |         return outputs_grd
110 | 
111 |     elif a.finalout_type == 'rgba':
112 |         outputs_grd = mpi_render_grd_view(layers[-1], share_alpha=True)
113 |         outputs_grd = tf.tanh(outputs_grd)
114 |         render_aer = mpi_render_aer_view(layers[-1], share_alpha=True)
115 |         # render_aer = rtheta2uv(render_aer, a.radiusPlaneNum * 2)
116 |         # outputs_aer = refine(render_aer)
117 |         render_aer = rtheta2uv(render_aer, 256)
118 |         with tf.variable_scope('refine_aer'):
119 |             outputs_aer = encoder_decoder(render_aer, 3, ngf=16, activational_layer=tf.nn.tanh)
120 | 
121 |         return outputs_grd, outputs_aer
122 | 
123 |     elif a.finalout_type == 'fgbg':
124 |         blend_weights = (layers[-1][:, :, :, :a.radiusPlaneNum] + 1.) / 2.
125 |         alphas = (layers[-1][:, :, :, a.radiusPlaneNum: 2 * a.radiusPlaneNum] + 1.) / 2.
126 |         bg_rgb = layers[-1][..., -3:]
127 |         fg_rgb = ref_images
128 | 
129 |         for i in range(a.radiusPlaneNum):
130 |             curr_alpha = tf.expand_dims(alphas[:, :, :, i], -1)
131 |             w = tf.expand_dims(blend_weights[:, :, :, i], -1)
132 |             curr_rgb = w * fg_rgb + (1 - w) * bg_rgb
133 |             curr_rgba = tf.concat([curr_rgb, curr_alpha], axis=3)
134 |             if i == 0:
135 |                 rgba_layers = curr_rgba
136 |             else:
137 |                 rgba_layers = tf.concat([rgba_layers, curr_rgba], axis=3)
138 | 
139 |         outputs_grd = mpi_render_grd_view(rgba_layers, share_alpha=True)
140 |         render_aer = mpi_render_aer_view(rgba_layers, share_alpha=True)
141 |         # render_aer = rtheta2uv(render_aer, a.radiusPlaneNum * 2)
142 |         # outputs_aer = refine(render_aer)
143 |         render_aer = rtheta2uv(render_aer, 256)
144 |         with tf.variable_scope('refine_aer'):
145 |             outputs_aer = encoder_decoder(render_aer, 3, ngf=4, activational_layer=tf.nn.tanh)
146 | 
147 |         return outputs_grd, outputs_aer
148 | 
149 | 
150 | def create_discriminator(discrim_inputs, ndf=64):
151 |     n_layers = 3
152 |     layers = []
153 | 
154 |     # layer_1: [batch, 256, 256, in_channels * 2] => [batch, 128, 128, ndf]
155 |     with tf.variable_scope("layer_1"):
156 |         convolved = discrim_conv(discrim_inputs, ndf, stride=2)
157 |         rectified = lrelu(convolved, 0.2)
158 |         layers.append(rectified)
159 | 
160 |     # layer_2: [batch, 128, 128, ndf] => [batch, 64, 64, ndf * 2]
161 |     # layer_3: [batch, 64, 64, ndf * 2] => [batch, 32, 32, ndf * 4]
162 |     # layer_4: [batch, 32, 32, ndf * 4] => [batch, 31, 31, ndf * 8]
163 |     for i in range(n_layers):
164 |         with tf.variable_scope("layer_%d" % (len(layers) + 1)):
165 |             out_channels = ndf * min(2**(i+1), 8)
166 |             stride = 1 if i == n_layers - 1 else 2  # last layer here has stride 1
167 |             convolved = discrim_conv(layers[-1], out_channels, stride=stride)
168 |             normalized = batchnorm(convolved)
169 |             rectified = lrelu(normalized, 0.2)
170 |             layers.append(rectified)
171 | 
172 |     # layer_5: [batch, 31, 31, ndf * 8] => [batch, 30, 30, 1]
173 |     with tf.variable_scope("layer_%d" % (len(layers) + 1)):
174 |         convolved = discrim_conv(rectified, out_channels=1, stride=1)
175 |         output = tf.sigmoid(convolved)
176 |         layers.append(output)
177 | 
178 |     return layers[-1]
179 | 
180 | 
181 | def create_model(inputs, targets, ref_images, a):
182 | 
183 |     with tf.variable_scope("generator"):
184 |         
185 |         with tf.variable_scope('height_estimation'):
186 | 
187 |             if a.heightPlaneNum > 1:
188 | 
189 |                 estimated_height = encoder_decoder(inputs, generator_outputs_channels=a.heightPlaneNum, ngf=4,
190 |                                                      activational_layer=tf.nn.softmax)
191 |             else:
192 |                 estimated_height = tf.concat([tf.zeros(inputs.get_shape().as_list()[:-1] + [63]),
193 |                                               tf.ones(inputs.get_shape().as_list()[:-1] + [1])], axis=-1)
194 | 
195 |         generator_inputs = geometry_transform(inputs, estimated_height, target_height, target_width,
196 |                                               a.height_mode, grd_height, max_height, a.method, a.geoout_type, a.dataset)
197 | 
198 |         # height, width = targets.get_shape().as_list()[1:-1]
199 |         # concat_inputs = tf.concat([generator_inputs[:, : int(height/2), :, :], ref_images[:, int(height/2):, :, :]], axis=1)
200 | 
201 |         outputs = create_generator(generator_inputs, ref_images, a)
202 | 
203 |         if a.finalout_type == 'image':  # ['image', 'rgba', 'fgbg']
204 |             outputs_grd = outputs
205 | 
206 |         else:
207 |             outputs_grd, outputs_aer = outputs
208 | 
209 |     with tf.name_scope("real_discriminator_grd"):
210 |         with tf.variable_scope("discriminator_grd"):
211 |             # 2x [batch, height, width, channels] => [batch, 30, 30, 1]
212 |             predict_real_grd = create_discriminator(targets)
213 | 
214 |     with tf.name_scope("fake_discriminator_grd"):
215 |         with tf.variable_scope("discriminator_grd", reuse=True):
216 |             # 2x [batch, height, width, channels] => [batch, 30, 30, 1]
217 |             predict_fake_grd = create_discriminator(outputs_grd)
218 | 
219 | 
220 |     with tf.name_scope("discriminator_loss"):
221 |         # minimizing -tf.log will try to get inputs to 1
222 |         # predict_real => 1
223 |         # predict_fake => 0
224 |         discrim_loss = 0.5 * (tf.reduce_mean(-(tf.log(predict_real_grd + EPS) + tf.log(1 - predict_fake_grd + EPS))))
225 | 
226 |     with tf.name_scope("generator_loss"):
227 |         # predict_fake => 1
228 |         # abs(targets - outputs) => 0
229 |         gen_loss_GAN_grd = tf.reduce_mean(-tf.log(predict_fake_grd + EPS))
230 |         gen_loss_L1_grd = tf.reduce_mean(tf.abs(targets - outputs_grd))
231 |         gen_loss_perceptual_grd = perceptual_loss(targets, outputs_grd)
232 |         if a.finalout_type != 'image':
233 |             gen_loss_L1_aer = tf.reduce_mean(tf.abs(inputs - outputs_aer))
234 |             gen_loss_perceptual_aer = perceptual_loss(inputs, outputs_aer)
235 |             gen_loss = gen_loss_GAN_grd * a.gan_weight + \
236 |                        gen_loss_perceptual_grd * a.perceptual_weight_grd + \
237 |                        gen_loss_perceptual_aer * a.perceptual_weight_aer + \
238 |                        gen_loss_L1_grd * a.l1_weight_grd + \
239 |                        gen_loss_L1_aer * a.l1_weight_aer
240 |         else:
241 | 
242 |             gen_loss = gen_loss_GAN_grd * a.gan_weight + \
243 |                        gen_loss_perceptual_grd * a.perceptual_weight_grd + \
244 |                        gen_loss_L1_grd * a.l1_weight_grd
245 | 
246 |     with tf.name_scope("discriminator_train"):
247 |         discrim_tvars = [var for var in tf.trainable_variables() if var.name.startswith("discriminator")]
248 |         discrim_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
249 |         discrim_grads_and_vars = discrim_optim.compute_gradients(discrim_loss, var_list=discrim_tvars)
250 |         discrim_train = discrim_optim.apply_gradients(discrim_grads_and_vars)
251 | 
252 |     with tf.name_scope("generator_train"):
253 |         with tf.control_dependencies([discrim_train]):
254 |             gen_tvars = [var for var in tf.trainable_variables() if var.name.startswith("generator")]
255 |             gen_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
256 |             gen_grads_and_vars = gen_optim.compute_gradients(gen_loss, var_list=gen_tvars)
257 |             gen_train = gen_optim.apply_gradients(gen_grads_and_vars)
258 | 
259 |     ema = tf.train.ExponentialMovingAverage(decay=0.99)
260 |     update_losses = ema.apply([discrim_loss, gen_loss_GAN_grd, gen_loss_L1_grd, gen_loss_perceptual_grd])
261 | 
262 |     global_step = tf.train.get_or_create_global_step()
263 |     incr_global_step = tf.assign(global_step, global_step+1)
264 | 
265 |     return Model(
266 |         predict_real=predict_real_grd,
267 |         predict_fake=predict_fake_grd,
268 |         discrim_loss=ema.average(discrim_loss),
269 |         discrim_grads_and_vars=discrim_grads_and_vars,
270 |         gen_loss_GAN=ema.average(gen_loss_GAN_grd),
271 |         gen_loss_L1=ema.average(gen_loss_L1_grd),
272 |         gen_loss_perceptual=ema.average(gen_loss_perceptual_grd),
273 |         gen_grads_and_vars=gen_grads_and_vars,
274 |         # estimated_height=tf.argmax(estimated_height, axis=-1),
275 |         estimated_height=estimated_height,
276 |         generator_inputs=generator_inputs,
277 |         outputs=outputs_grd,
278 |         train=tf.group(update_losses, incr_global_step, gen_train),
279 |     )
280 | 
281 | 


--------------------------------------------------------------------------------
/script3/baseline22.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | 
  6 | import os
  7 | 
  8 | # os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
  9 | # os.environ['CUDA_VISIBLE_DEVICES'] = '1'
 10 | 
 11 | import sys
 12 | sys.path.append('../')
 13 | 
 14 | import tensorflow as tf
 15 | import numpy as np
 16 | import argparse
 17 | import os
 18 | import json
 19 | 
 20 | import random
 21 | import collections
 22 | import math
 23 | import time
 24 | import PIL.Image as Image
 25 | 
 26 | from model22 import *
 27 | 
 28 | parser = argparse.ArgumentParser()
 29 | # parser.add_argument("--input_dir", help="path to folder containing images", default='facades/train')
 30 | parser.add_argument("--dataset", help="dataset", default='CVUSA')
 31 | parser.add_argument("--mode", choices=["train", "test", "export"], default="train")
 32 | parser.add_argument("--output_dir", help="where to put output files", default='pix2pix_perceploss')
 33 | parser.add_argument("--seed", type=int)
 34 | parser.add_argument("--checkpoint", help="directory with checkpoint to resume training from or use for testing")
 35 | 
 36 | parser.add_argument("--max_steps", type=int, help="number of training steps (0 to disable)")
 37 | parser.add_argument("--max_epochs", type=int, default=35, help="number of training epochs")
 38 | parser.add_argument("--summary_freq", type=int, default=100, help="update summaries every summary_freq steps")
 39 | parser.add_argument("--progress_freq", type=int, default=50, help="display progress every progress_freq steps")
 40 | parser.add_argument("--trace_freq", type=int, default=0, help="trace execution every trace_freq steps")
 41 | parser.add_argument("--display_freq", type=int, default=0, help="write current training images every display_freq steps")
 42 | parser.add_argument("--save_freq", type=int, default=5000, help="save model every save_freq steps, 0 to disable")
 43 | 
 44 | parser.add_argument("--separable_conv", action="store_true", help="use separable convolutions in the generator")
 45 | parser.add_argument("--aspect_ratio", type=float, default=1.0, help="aspect ratio of output images (width/height)")
 46 | parser.add_argument("--lab_colorization", action="store_true", help="split input image into brightness (A) and color (B)")
 47 | parser.add_argument("--batch_size", type=int, default=4, help="number of images in batch")
 48 | parser.add_argument("--which_direction", type=str, default="AtoG", choices=["AtoG", "GtoA"])
 49 | parser.add_argument("--ngf", type=int, default=64, help="number of generator filters in first conv layer")
 50 | parser.add_argument("--ndf", type=int, default=64, help="number of discriminator filters in first conv layer")
 51 | parser.add_argument("--scale_size", type=int, default=286, help="scale images to this size before cropping to 256x256")
 52 | parser.add_argument("--flip", dest="flip", action="store_true", help="flip images horizontally")
 53 | parser.add_argument("--no_flip", dest="flip", action="store_false", help="don't flip images horizontally")
 54 | parser.set_defaults(flip=True)
 55 | parser.add_argument("--lr", type=float, default=0.0002, help="initial learning rate for adam")
 56 | parser.add_argument("--beta1", type=float, default=0.5, help="momentum term of adam")
 57 | 
 58 | parser.add_argument("--inputs_type", choices=["original", "geometry"], default="geometry")
 59 | 
 60 | parser.add_argument("--gan_weight", type=float, default=1.0, help="weight on GAN term for generator gradient")
 61 | parser.add_argument("--l1_weight_grd", type=float, default=0.0, help="weight on GAN term for generator gradient")
 62 | parser.add_argument("--l1_weight_aer", type=float, default=10.0, help="weight on L1 term for generator gradient")
 63 | parser.add_argument("--perceptual_weight_grd", type=float, default=1.0, help="weight on GAN term for generator gradient")
 64 | parser.add_argument("--perceptual_weight_aer", type=float, default=0.0, help="weight on GAN term for generator gradient")
 65 | 
 66 | parser.add_argument("--heightPlaneNum", type=int, default=32, help="weight on GAN term for generator gradient")
 67 | parser.add_argument("--radiusPlaneNum", type=int, default=32, help="weight on GAN term for generator gradient")
 68 | parser.add_argument("--height_mode", choices=['radiusPlaneMethod', 'heightPlaneMethod'], default='radiusPlaneMethod')
 69 | # Only when 'height_mode' is 'radiusPlaneMethod', the following two parameters are required. Otherwise not.
 70 | parser.add_argument("--method", choices=['column', 'point'], default='column')
 71 | parser.add_argument("--geoout_type", choices=['volume', 'image'], default='image')
 72 | 
 73 | parser.add_argument("--finalout_type", choices=['image', 'rgba', 'fgbg'], default='image')
 74 | 
 75 | # export options
 76 | parser.add_argument("--output_filetype", default="png", choices=["png", "jpeg"])
 77 | a = parser.parse_args()
 78 | 
 79 | EPS = 1e-12
 80 | CROP_SIZE = 256
 81 | 
 82 | nameStr = 'baseline22'
 83 | 
 84 | def save_images(fetches, step=None):
 85 | 
 86 |     image_dir = os.path.join('./GeneratedData/', a.dataset, nameStr, 'image')
 87 |     if not os.path.exists(image_dir):
 88 |         os.makedirs(image_dir)
 89 | 
 90 |     filesets = []
 91 |     for i, in_path in enumerate(fetches["paths"]):
 92 |         name, _ = os.path.splitext(os.path.basename(in_path.decode("utf8")))
 93 |         fileset = {"name": name, "step": step}
 94 |         for kind in ["outputs"]:
 95 |             filename = name + ".png"
 96 |             if step is not None:
 97 |                 filename = "%08d-%s" % (step, filename)
 98 |             fileset[kind] = filename
 99 |             out_path = os.path.join(image_dir, filename)
100 |             contents = fetches[kind][i]
101 |             with open(out_path, "wb") as f:
102 |                 f.write(contents)
103 |         filesets.append(fileset)
104 |     return filesets
105 | 
106 | 
107 | def main():
108 |     if a.seed is None:
109 |         a.seed = random.randint(0, 2**31 - 1)
110 | 
111 |     tf.set_random_seed(a.seed)
112 |     np.random.seed(a.seed)
113 |     random.seed(a.seed)
114 | 
115 |     output_dir = os.path.join(a.dataset, nameStr, 'aer')
116 | 
117 |     if not os.path.exists(output_dir):
118 |         os.makedirs(output_dir)
119 | 
120 |     if a.mode == "test" or a.mode == "export":
121 |         if a.checkpoint is None:
122 |             raise Exception("checkpoint required for test mode")
123 | 
124 |         # load some options from the checkpoint
125 |         checkpoint_dir = os.path.join(a.dataset, nameStr, 'aer')
126 |         options = {"which_direction", "ngf", "ndf", "lab_colorization"}
127 |         with open(os.path.join(checkpoint_dir, "options.json")) as f:
128 |             for key, val in json.loads(f.read()).items():
129 |                 if key in options:
130 |                     print("loaded", key, "=", val)
131 |                     setattr(a, key, val)
132 |         # disable these features in test mode
133 |         a.scale_size = CROP_SIZE
134 |         a.flip = False
135 | 
136 |     for k, v in a._get_kwargs():
137 |         print(k, "=", v)
138 | 
139 |     with open(os.path.join(output_dir, "options.json"), "w") as f:
140 |         f.write(json.dumps(vars(a), sort_keys=True, indent=4))
141 | 
142 |     if a.dataset=='CVUSA':
143 |         from load_data.load_data_cvusa import load_examples
144 |     elif a.dataset=='CVACT':
145 |         from load_data.load_data_cvact import load_examples
146 |     elif a.dataset=='CVACThalf':
147 |         from load_data.load_data_cvact_half import load_examples
148 |     elif a.dataset=='OP':
149 |         from load_data.load_data_op import load_examples
150 | 
151 |     examples = load_examples(a.mode, a.batch_size)
152 |     print("examples count = %d" % examples.count)
153 | 
154 |     inputs = examples.aer
155 |     targets = examples.pano
156 |     ref_images = examples.tanpolar
157 | 
158 |     # inputs and targets are [batch_size, height, width, channels]
159 |     model = create_model(inputs, targets, ref_images, a)
160 | 
161 |     inputs = deprocess(inputs)
162 |     targets = deprocess(targets)
163 |     outputs = deprocess(model.outputs)
164 | 
165 |     def convert(image):
166 |         if a.aspect_ratio != 1.0:
167 |             # upscale to correct aspect ratio
168 |             size = [CROP_SIZE, int(round(CROP_SIZE * a.aspect_ratio))]
169 |             image = tf.image.resize_images(image, size=size, method=tf.image.ResizeMethod.BICUBIC)
170 | 
171 |         return tf.image.convert_image_dtype(image, dtype=tf.uint8, saturate=True)
172 | 
173 |     # reverse any processing on images so they can be written to disk or displayed to user
174 |     with tf.name_scope("convert_inputs"):
175 |         converted_inputs = convert(inputs)
176 | 
177 |     with tf.name_scope("convert_targets"):
178 |         converted_targets = convert(targets)
179 | 
180 |     with tf.name_scope("convert_outputs"):
181 |         converted_outputs = convert(outputs)
182 | 
183 |     with tf.name_scope("encode_images"):
184 |         display_fetches = {
185 |             "paths": examples.paths,
186 |             "inputs": tf.map_fn(tf.image.encode_png, converted_inputs, dtype=tf.string, name="input_pngs"),
187 |             "targets": tf.map_fn(tf.image.encode_png, converted_targets, dtype=tf.string, name="target_pngs"),
188 |             "outputs": tf.map_fn(tf.image.encode_png, converted_outputs, dtype=tf.string, name="output_pngs"),
189 |         }
190 | 
191 |     # summaries
192 |     with tf.name_scope("inputs_summary"):
193 |         tf.summary.image("inputs", converted_inputs)
194 | 
195 |     with tf.name_scope("targets_summary"):
196 |         tf.summary.image("targets", converted_targets)
197 | 
198 |     with tf.name_scope("outputs_summary"):
199 |         tf.summary.image("outputs", converted_outputs)
200 | 
201 |     tf.summary.scalar("discriminator_loss", model.discrim_loss)
202 |     tf.summary.scalar("generator_loss_GAN", model.gen_loss_GAN)
203 |     tf.summary.scalar("generator_loss_L1", model.gen_loss_L1)
204 |     tf.summary.scalar("gen_loss_perceptual", model.gen_loss_perceptual)
205 | 
206 |     for var in tf.trainable_variables():
207 |         tf.summary.histogram(var.op.name + "/values", var)
208 | 
209 |     for grad, var in model.discrim_grads_and_vars + model.gen_grads_and_vars:
210 |         tf.summary.histogram(var.op.name + "/gradients", grad)
211 | 
212 |     with tf.name_scope("parameter_count"):
213 |         parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()])
214 | 
215 |     saver = tf.train.Saver(max_to_keep=1)
216 | 
217 |     logdir = output_dir if (a.trace_freq > 0 or a.summary_freq > 0) else None
218 |     sv = tf.train.Supervisor(logdir=logdir, save_summaries_secs=0, saver=None)
219 |     with sv.managed_session() as sess:
220 |         print("parameter_count =", sess.run(parameter_count))
221 | 
222 |         if a.checkpoint is not None:
223 |             print("loading model from checkpoint")
224 |             checkpoint_dir = os.path.join(a.dataset, nameStr, 'aer')
225 |             checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
226 |             saver.restore(sess, checkpoint)
227 | 
228 |         max_steps = 2**32
229 |         if a.max_epochs is not None:
230 |             max_steps = examples.steps_per_epoch * a.max_epochs
231 |         if a.max_steps is not None:
232 |             max_steps = a.max_steps
233 | 
234 |         if a.mode == "test":
235 |             # testing
236 |             # at most, process the test data once
237 |             start = time.time()
238 |             max_steps = min(examples.steps_per_epoch, max_steps)
239 |             for step in range(max_steps):
240 |                 results = sess.run(display_fetches)
241 |                 filesets = save_images(results)
242 |                 for i, f in enumerate(filesets):
243 |                     print("evaluated image", f["name"])
244 |                 # index_path = append_index(filesets)
245 |             # print("wrote index at", index_path)
246 |             print("rate", (time.time() - start) / max_steps)
247 |         else:
248 |             # training
249 |             start = time.time()
250 | 
251 |             for step in range(max_steps):
252 |                 def should(freq):
253 |                     return freq > 0 and ((step + 1) % freq == 0 or step == max_steps - 1)
254 | 
255 |                 options = None
256 |                 run_metadata = None
257 |                 if should(a.trace_freq):
258 |                     options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
259 |                     run_metadata = tf.RunMetadata()
260 | 
261 |                 fetches = {
262 |                     "train": model.train,
263 |                     "global_step": sv.global_step,
264 |                 }
265 | 
266 |                 if should(a.progress_freq):
267 |                     fetches["discrim_loss"] = model.discrim_loss
268 |                     fetches["gen_loss_GAN"] = model.gen_loss_GAN
269 |                     fetches["gen_loss_L1"] = model.gen_loss_L1
270 |                     fetches["gen_loss_perceptual"] = model.gen_loss_perceptual
271 | 
272 |                 if should(a.summary_freq):
273 |                     fetches["summary"] = sv.summary_op
274 | 
275 |                 if should(a.display_freq):
276 |                     fetches["display"] = display_fetches
277 | 
278 |                 results = sess.run(fetches, options=options, run_metadata=run_metadata)
279 | 
280 |                 if should(a.summary_freq):
281 |                     print("recording summary")
282 |                     sv.summary_writer.add_summary(results["summary"], results["global_step"])
283 | 
284 |                 if should(a.trace_freq):
285 |                     print("recording trace")
286 |                     sv.summary_writer.add_run_metadata(run_metadata, "step_%d" % results["global_step"])
287 | 
288 |                 if should(a.progress_freq):
289 |                     # global_step will have the correct step count if we resume from a checkpoint
290 |                     train_epoch = math.ceil(results["global_step"] / examples.steps_per_epoch)
291 |                     train_step = (results["global_step"] - 1) % examples.steps_per_epoch + 1
292 |                     rate = (step + 1) * a.batch_size / (time.time() - start)
293 |                     remaining = (max_steps - step) * a.batch_size / rate
294 |                     print("progress  epoch %d  step %d  image/sec %0.1f  remaining %dm" % (train_epoch, train_step, rate, remaining / 60))
295 |                     print("discrim_loss", results["discrim_loss"])
296 |                     print("gen_loss_GAN", results["gen_loss_GAN"])
297 |                     print("gen_loss_L1", results["gen_loss_L1"])
298 |                     print("gen_loss_perceptual", results["gen_loss_perceptual"])
299 | 
300 |                 if should(examples.steps_per_epoch):
301 |                     print("saving model")
302 |                     saver.save(sess, os.path.join(output_dir, "model"), global_step=sv.global_step)
303 | 
304 |                 if sv.should_stop():
305 |                     break
306 | 
307 | 
308 | main()
309 | 


--------------------------------------------------------------------------------
/script3/baseline23.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | 
  6 | import os
  7 | 
  8 | # os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
  9 | # os.environ['CUDA_VISIBLE_DEVICES'] = '1'
 10 | 
 11 | import sys
 12 | sys.path.append('../')
 13 | 
 14 | import tensorflow as tf
 15 | import numpy as np
 16 | import argparse
 17 | import os
 18 | import json
 19 | 
 20 | import random
 21 | import collections
 22 | import math
 23 | import time
 24 | import PIL.Image as Image
 25 | 
 26 | from model23 import *
 27 | 
 28 | parser = argparse.ArgumentParser()
 29 | # parser.add_argument("--input_dir", help="path to folder containing images", default='facades/train')
 30 | parser.add_argument("--dataset", help="dataset", default='CVUSA')
 31 | parser.add_argument("--mode", choices=["train", "test", "export"], default="train")
 32 | parser.add_argument("--output_dir", help="where to put output files", default='pix2pix_perceploss')
 33 | parser.add_argument("--seed", type=int)
 34 | parser.add_argument("--checkpoint", help="directory with checkpoint to resume training from or use for testing")
 35 | 
 36 | parser.add_argument("--max_steps", type=int, help="number of training steps (0 to disable)")
 37 | parser.add_argument("--max_epochs", type=int, default=35, help="number of training epochs")
 38 | parser.add_argument("--summary_freq", type=int, default=100, help="update summaries every summary_freq steps")
 39 | parser.add_argument("--progress_freq", type=int, default=50, help="display progress every progress_freq steps")
 40 | parser.add_argument("--trace_freq", type=int, default=0, help="trace execution every trace_freq steps")
 41 | parser.add_argument("--display_freq", type=int, default=0, help="write current training images every display_freq steps")
 42 | parser.add_argument("--save_freq", type=int, default=5000, help="save model every save_freq steps, 0 to disable")
 43 | 
 44 | parser.add_argument("--separable_conv", action="store_true", help="use separable convolutions in the generator")
 45 | parser.add_argument("--aspect_ratio", type=float, default=1.0, help="aspect ratio of output images (width/height)")
 46 | parser.add_argument("--lab_colorization", action="store_true", help="split input image into brightness (A) and color (B)")
 47 | parser.add_argument("--batch_size", type=int, default=4, help="number of images in batch")
 48 | parser.add_argument("--which_direction", type=str, default="AtoG", choices=["AtoG", "GtoA"])
 49 | parser.add_argument("--ngf", type=int, default=64, help="number of generator filters in first conv layer")
 50 | parser.add_argument("--ndf", type=int, default=64, help="number of discriminator filters in first conv layer")
 51 | parser.add_argument("--scale_size", type=int, default=286, help="scale images to this size before cropping to 256x256")
 52 | parser.add_argument("--flip", dest="flip", action="store_true", help="flip images horizontally")
 53 | parser.add_argument("--no_flip", dest="flip", action="store_false", help="don't flip images horizontally")
 54 | parser.set_defaults(flip=True)
 55 | parser.add_argument("--lr", type=float, default=0.0002, help="initial learning rate for adam")
 56 | parser.add_argument("--beta1", type=float, default=0.5, help="momentum term of adam")
 57 | 
 58 | parser.add_argument("--inputs_type", choices=["original", "geometry"], default="geometry")
 59 | 
 60 | parser.add_argument("--gan_weight", type=float, default=1.0, help="weight on GAN term for generator gradient")
 61 | parser.add_argument("--l1_weight_grd", type=float, default=0.0, help="weight on GAN term for generator gradient")
 62 | parser.add_argument("--l1_weight_aer", type=float, default=10.0, help="weight on L1 term for generator gradient")
 63 | parser.add_argument("--perceptual_weight_grd", type=float, default=1.0, help="weight on GAN term for generator gradient")
 64 | parser.add_argument("--perceptual_weight_aer", type=float, default=0.0, help="weight on GAN term for generator gradient")
 65 | 
 66 | parser.add_argument("--heightPlaneNum", type=int, default=32, help="weight on GAN term for generator gradient")
 67 | parser.add_argument("--radiusPlaneNum", type=int, default=32, help="weight on GAN term for generator gradient")
 68 | parser.add_argument("--height_mode", choices=['radiusPlaneMethod', 'heightPlaneMethod'], default='radiusPlaneMethod')
 69 | # Only when 'height_mode' is 'radiusPlaneMethod', the following two parameters are required. Otherwise not.
 70 | parser.add_argument("--method", choices=['column', 'point'], default='column')
 71 | parser.add_argument("--geoout_type", choices=['volume', 'image'], default='image')
 72 | 
 73 | parser.add_argument("--finalout_type", choices=['image', 'rgba', 'fgbg'], default='image')
 74 | 
 75 | # export options
 76 | parser.add_argument("--output_filetype", default="png", choices=["png", "jpeg"])
 77 | a = parser.parse_args()
 78 | 
 79 | EPS = 1e-12
 80 | CROP_SIZE = 256
 81 | 
 82 | nameStr = 'baseline23' + '_L1Grd_' + str(a.l1_weight_grd) + '_PerGrd_' + str(a.perceptual_weight_grd)
 83 | 
 84 | def save_images(fetches, step=None):
 85 |     cmap = np.load('../cmap.npy')
 86 |     image_dir = os.path.join('./GeneratedData/', a.dataset, nameStr, 'image')
 87 |     if not os.path.exists(image_dir):
 88 |         os.makedirs(image_dir)
 89 | 
 90 |     filesets = []
 91 |     for i, in_path in enumerate(fetches["paths"]):
 92 |         name, _ = os.path.splitext(os.path.basename(in_path.decode("utf8")))
 93 |         fileset = {"name": name, "step": step}
 94 |         for kind in ["outputs"]:
 95 |             filename = name + ".png"
 96 |             if step is not None:
 97 |                 filename = "%08d-%s" % (step, filename)
 98 |             fileset[kind] = filename
 99 |             out_path = os.path.join(image_dir, filename)
100 |             contents = fetches[kind][i]
101 |             with open(out_path, "wb") as f:
102 |                 f.write(contents)
103 | 
104 |         filesets.append(fileset)
105 |     return filesets
106 | 
107 | 
108 | def main():
109 |     if a.seed is None:
110 |         a.seed = random.randint(0, 2**31 - 1)
111 | 
112 |     tf.set_random_seed(a.seed)
113 |     np.random.seed(a.seed)
114 |     random.seed(a.seed)
115 | 
116 |     cmap = np.load('../cmap.npy')
117 | 
118 |     output_dir = os.path.join(a.dataset, nameStr, 'aer')
119 | 
120 |     if not os.path.exists(output_dir):
121 |         os.makedirs(output_dir)
122 | 
123 |     if a.mode == "test" or a.mode == "export":
124 |         if a.checkpoint is None:
125 |             raise Exception("checkpoint required for test mode")
126 | 
127 |         # load some options from the checkpoint
128 |         checkpoint_dir = os.path.join(a.dataset, nameStr, 'aer')
129 |         options = {"which_direction", "ngf", "ndf", "lab_colorization"}
130 |         with open(os.path.join(checkpoint_dir, "options.json")) as f:
131 |             for key, val in json.loads(f.read()).items():
132 |                 if key in options:
133 |                     print("loaded", key, "=", val)
134 |                     setattr(a, key, val)
135 |         # disable these features in test mode
136 |         a.scale_size = CROP_SIZE
137 |         a.flip = False
138 | 
139 |     for k, v in a._get_kwargs():
140 |         print(k, "=", v)
141 | 
142 |     with open(os.path.join(output_dir, "options.json"), "w") as f:
143 |         f.write(json.dumps(vars(a), sort_keys=True, indent=4))
144 | 
145 |     if a.dataset=='CVUSA':
146 |         from load_data.load_data_cvusa import load_examples
147 |     elif a.dataset=='CVACT':
148 |         from load_data.load_data_cvact import load_examples
149 |     elif a.dataset=='CVACThalf':
150 |         from load_data.load_data_cvact_half import load_examples
151 |     elif a.dataset=='OP':
152 |         from load_data.load_data_op import load_examples
153 | 
154 |     examples = load_examples(a.mode, a.batch_size)
155 |     print("examples count = %d" % examples.count)
156 | 
157 |     inputs = examples.aer
158 |     targets = examples.pano
159 |     ref_images = examples.tanpolar
160 | 
161 |     # inputs and targets are [batch_size, height, width, channels]
162 |     model = create_model(inputs, targets, ref_images, a)
163 | 
164 |     inputs = deprocess(inputs)
165 |     targets = deprocess(targets)
166 |     outputs = deprocess(model.outputs)
167 |     converted_generator_inputs = deprocess(model.generator_inputs)
168 | 
169 |     def convert(image):
170 |         if a.aspect_ratio != 1.0:
171 |             # upscale to correct aspect ratio
172 |             size = [CROP_SIZE, int(round(CROP_SIZE * a.aspect_ratio))]
173 |             image = tf.image.resize_images(image, size=size, method=tf.image.ResizeMethod.BICUBIC)
174 | 
175 |         return tf.image.convert_image_dtype(image, dtype=tf.uint8, saturate=True)
176 | 
177 |     # reverse any processing on images so they can be written to disk or displayed to user
178 |     with tf.name_scope("convert_inputs"):
179 |         converted_inputs = convert(inputs)
180 | 
181 |     with tf.name_scope("convert_targets"):
182 |         converted_targets = convert(targets)
183 | 
184 |     with tf.name_scope("convert_outputs"):
185 |         converted_outputs = convert(outputs)
186 | 
187 |     with tf.name_scope("encode_images"):
188 |         display_fetches = {
189 |             "paths": examples.paths,
190 |             "inputs": tf.map_fn(tf.image.encode_png, converted_inputs, dtype=tf.string, name="input_pngs"),
191 |             "targets": tf.map_fn(tf.image.encode_png, converted_targets, dtype=tf.string, name="target_pngs"),
192 |             "outputs": tf.map_fn(tf.image.encode_png, converted_outputs, dtype=tf.string, name="output_pngs"),
193 | 
194 |         }
195 | 
196 |     # summaries
197 |     with tf.name_scope("inputs_summary"):
198 |         tf.summary.image("inputs", converted_inputs)
199 | 
200 |     with tf.name_scope("targets_summary"):
201 |         tf.summary.image("targets", converted_targets)
202 | 
203 |     with tf.name_scope("outputs_summary"):
204 |         tf.summary.image("outputs", converted_outputs)
205 | 
206 |     tf.summary.scalar("discriminator_loss", model.discrim_loss)
207 |     tf.summary.scalar("generator_loss_GAN", model.gen_loss_GAN)
208 |     tf.summary.scalar("generator_loss_L1", model.gen_loss_L1)
209 |     tf.summary.scalar("gen_loss_perceptual", model.gen_loss_perceptual)
210 | 
211 |     for var in tf.trainable_variables():
212 |         tf.summary.histogram(var.op.name + "/values", var)
213 | 
214 |     for grad, var in model.discrim_grads_and_vars + model.gen_grads_and_vars:
215 |         tf.summary.histogram(var.op.name + "/gradients", grad)
216 | 
217 |     with tf.name_scope("parameter_count"):
218 |         parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()])
219 | 
220 |     saver = tf.train.Saver(max_to_keep=1)
221 | 
222 |     logdir = output_dir if (a.trace_freq > 0 or a.summary_freq > 0) else None
223 |     sv = tf.train.Supervisor(logdir=logdir, save_summaries_secs=0, saver=None)
224 |     with sv.managed_session() as sess:
225 |         print("parameter_count =", sess.run(parameter_count))
226 | 
227 |         if a.checkpoint is not None:
228 |             print("loading model from checkpoint")
229 |             checkpoint_dir = os.path.join(a.dataset, nameStr, 'aer')
230 |             checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
231 |             saver.restore(sess, checkpoint)
232 | 
233 |         max_steps = 2**32
234 |         if a.max_epochs is not None:
235 |             max_steps = examples.steps_per_epoch * a.max_epochs
236 |         if a.max_steps is not None:
237 |             max_steps = a.max_steps
238 | 
239 |         if a.mode == "test":
240 |             # testing
241 |             # at most, process the test data once
242 |             start = time.time()
243 |             max_steps = min(examples.steps_per_epoch, max_steps)
244 |             for step in range(max_steps):
245 |                 results = sess.run(display_fetches)
246 |                 filesets = save_images(results)
247 |                 for i, f in enumerate(filesets):
248 |                     print("evaluated image", f["name"])
249 |                 # index_path = append_index(filesets)
250 |             # print("wrote index at", index_path)
251 |             print("rate", (time.time() - start) / max_steps)
252 |         else:
253 |             # training
254 |             start = time.time()
255 | 
256 |             for step in range(max_steps):
257 |                 def should(freq):
258 |                     return freq > 0 and ((step + 1) % freq == 0 or step == max_steps - 1)
259 | 
260 |                 options = None
261 |                 run_metadata = None
262 |                 if should(a.trace_freq):
263 |                     options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
264 |                     run_metadata = tf.RunMetadata()
265 | 
266 |                 fetches = {
267 |                     "train": model.train,
268 |                     "global_step": sv.global_step,
269 |                 }
270 | 
271 |                 if should(a.progress_freq):
272 |                     fetches["discrim_loss"] = model.discrim_loss
273 |                     fetches["gen_loss_GAN"] = model.gen_loss_GAN
274 |                     fetches["gen_loss_L1"] = model.gen_loss_L1
275 |                     fetches["gen_loss_perceptual"] = model.gen_loss_perceptual
276 | 
277 |                 if should(a.summary_freq):
278 |                     fetches["summary"] = sv.summary_op
279 | 
280 |                 if should(a.display_freq):
281 |                     fetches["display"] = display_fetches
282 | 
283 |                 results = sess.run(fetches, options=options, run_metadata=run_metadata)
284 |                 # height = sess.run(model.estimated_height, options=options, run_metadata=run_metadata)
285 |                 # for b in range(0, a.batch_size):
286 |                 #     img = cmap[height[b].squeeze()]
287 |                 #     img = Image.fromarray(img)
288 |                 #     img.save(str(b)+'height.png')
289 | 
290 | 
291 |                 if should(a.summary_freq):
292 |                     print("recording summary")
293 |                     sv.summary_writer.add_summary(results["summary"], results["global_step"])
294 | 
295 |                 if should(a.trace_freq):
296 |                     print("recording trace")
297 |                     sv.summary_writer.add_run_metadata(run_metadata, "step_%d" % results["global_step"])
298 | 
299 |                 if should(a.progress_freq):
300 |                     # global_step will have the correct step count if we resume from a checkpoint
301 |                     train_epoch = math.ceil(results["global_step"] / examples.steps_per_epoch)
302 |                     train_step = (results["global_step"] - 1) % examples.steps_per_epoch + 1
303 |                     rate = (step + 1) * a.batch_size / (time.time() - start)
304 |                     remaining = (max_steps - step) * a.batch_size / rate
305 |                     print("progress  epoch %d  step %d  image/sec %0.1f  remaining %dm" % (train_epoch, train_step, rate, remaining / 60))
306 |                     print("discrim_loss", results["discrim_loss"])
307 |                     print("gen_loss_GAN", results["gen_loss_GAN"])
308 |                     print("gen_loss_L1", results["gen_loss_L1"])
309 |                     print("gen_loss_perceptual", results["gen_loss_perceptual"])
310 | 
311 |                 if should(examples.steps_per_epoch):
312 |                 # if should(50):
313 |                     print("saving model")
314 |                     saver.save(sess, os.path.join(output_dir, "model"), global_step=sv.global_step)
315 | 
316 |                 if sv.should_stop():
317 |                     break
318 | 
319 | 
320 | main()
321 | 


--------------------------------------------------------------------------------
/geometry/utils.py:
--------------------------------------------------------------------------------
  1 | import tensorflow.compat.v1 as tf
  2 | tf.disable_v2_behavior()
  3 | import numpy as np
  4 | from tensorflow.python.ops import math_ops
  5 | 
  6 | 
  7 | def softargmax(x, beta=100):
  8 |     x_range = tf.range(x.shape.as_list()[-1], dtype=tf.float32)
  9 |     return tf.reduce_sum(tf.nn.softmax(x*beta) * x_range, axis=-1, keep_dims=True)
 10 | 
 11 | 
 12 | def tf_shape(x, rank):
 13 |     static_shape = x.get_shape().with_rank(rank).as_list()
 14 |     dynamic_shape = tf.unstack(tf.shape(x), rank)
 15 |     return [s if s is not None else d for s,d in zip(static_shape, dynamic_shape)]
 16 | 
 17 | 
 18 | def safe_divide(numerator, denominator, name='safe_divide'):
 19 |     return tf.where(math_ops.greater(denominator, 0), math_ops.divide(numerator, denominator), tf.zeros_like(numerator)
 20 |                     , name=name)
 21 | 
 22 | 
 23 | def preprocess(image):
 24 |     with tf.name_scope("preprocess"):
 25 |         # [0, 1] => [-1, 1]
 26 |         return image * 2 - 1
 27 | 
 28 | 
 29 | def deprocess(image):
 30 |     with tf.name_scope("deprocess"):
 31 |         # [-1, 1] => [0, 1]
 32 |         return (image + 1) / 2
 33 | 
 34 | 
 35 | def deprocess_label(label_logits):
 36 |     '''
 37 |     :param label_logits: label.shape = [batch, height, width, 4] --> 4 is label number, value from 0 to 1
 38 |     :return: label: shape =[batch, height, width, 3] value in {0, 255}, for the purpose of show.
 39 |     '''
 40 |     label_onehot = tf.one_hot(tf.argmax(label_logits, axis=-1), depth=4)
 41 |     label = label_onehot[..., 1:]*255
 42 |     return label
 43 | 
 44 | 
 45 | 
 46 | def warp_pad_columns(x, n=1):
 47 | 
 48 |     out = tf.concat([x[:, :, -n:, :], x, x[:, :, :n, :]], axis=2)
 49 |     return tf.pad(out, [[0, 0], [n, n], [0, 0], [0, 0]])
 50 | 
 51 | 
 52 | def conv_layer_cir(x, kernel_dim, strides, output_dim, trainable, activated, bn,
 53 |                    name='layer_conv', activation_function=tf.nn.relu):
 54 |     n = int((kernel_dim - 1) / 2)
 55 |     x = warp_pad_columns(x, n)
 56 | 
 57 |     input_dim = x.get_shape().as_list()[-1]
 58 |     with tf.variable_scope(name): # reuse=tf.AUTO_REUSE
 59 |         weight = tf.get_variable(name='weights', shape=[kernel_dim, kernel_dim, input_dim, output_dim],
 60 |                                  trainable=trainable, initializer=tf.contrib.layers.xavier_initializer())
 61 |         bias = tf.get_variable(name='biases', shape=[output_dim],
 62 |                                trainable=trainable, initializer=tf.contrib.layers.xavier_initializer())
 63 | 
 64 |         out = tf.nn.conv2d(x, weight, strides, padding='VALID') + bias
 65 | 
 66 |         if bn:
 67 |             out = batchnorm(out)
 68 | 
 69 |         if activated:
 70 |             out = activation_function(out)
 71 | 
 72 |         return out
 73 | 
 74 | 
 75 | 
 76 | def discrim_conv(batch_input, out_channels, stride):
 77 |     padded_input = tf.pad(batch_input, [[0, 0], [1, 1], [1, 1], [0, 0]], mode="CONSTANT")
 78 |     return tf.layers.conv2d(padded_input, out_channels, kernel_size=4, strides=(stride, stride), padding="valid", kernel_initializer=tf.random_normal_initializer(0, 0.02))
 79 | 
 80 | 
 81 | 
 82 | def gen_conv(batch_input, out_channels, separable_conv=False):
 83 |     # [batch, in_height, in_width, in_channels] => [batch, out_height, out_width, out_channels]
 84 |     initializer = tf.random_normal_initializer(0, 0.02)
 85 |     if separable_conv:
 86 |         return tf.layers.separable_conv2d(batch_input, out_channels, kernel_size=4, strides=(2, 2), padding="same", depthwise_initializer=initializer, pointwise_initializer=initializer)
 87 |     else:
 88 |         return tf.layers.conv2d(batch_input, out_channels, kernel_size=4, strides=(2, 2), padding="same", kernel_initializer=initializer)
 89 | 
 90 | 
 91 | def gen_deconv(batch_input, out_channels, separable_conv=False):
 92 |     # [batch, in_height, in_width, in_channels] => [batch, out_height, out_width, out_channels]
 93 |     initializer = tf.random_normal_initializer(0, 0.02)
 94 |     if separable_conv:
 95 |         _b, h, w, _c = batch_input.shape
 96 |         resized_input = tf.image.resize_images(batch_input, [h * 2, w * 2], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
 97 |         return tf.layers.separable_conv2d(resized_input, out_channels, kernel_size=4, strides=(1, 1), padding="same", depthwise_initializer=initializer, pointwise_initializer=initializer)
 98 |     else:
 99 |         return tf.layers.conv2d_transpose(batch_input, out_channels, kernel_size=4, strides=(2, 2), padding="same", kernel_initializer=initializer)
100 | 
101 | 
102 | def lrelu(x, a=0.2):
103 |     with tf.name_scope("lrelu"):
104 |         # adding these together creates the leak part and linear part
105 |         # then cancels them out by subtracting/adding an absolute value term
106 |         # leak: a*x/2 - a*abs(x)/2
107 |         # linear: x/2 + abs(x)/2
108 | 
109 |         # this block looks like it has 2 inputs on the graph unless we do this
110 |         x = tf.identity(x)
111 |         return (0.5 * (1 + a)) * x + (0.5 * (1 - a)) * tf.abs(x)
112 | 
113 | 
114 | def batchnorm(inputs):
115 |     return tf.layers.batch_normalization(inputs, axis=3, epsilon=1e-5, momentum=0.1, training=True, gamma_initializer=tf.random_normal_initializer(1.0, 0.02))
116 | 
117 | 
118 | def check_image(image):
119 |     assertion = tf.assert_equal(tf.shape(image)[-1], 3, message="image must have 3 color channels")
120 |     with tf.control_dependencies([assertion]):
121 |         image = tf.identity(image)
122 | 
123 |     if image.get_shape().ndims not in (3, 4):
124 |         raise ValueError("image must be either 3 or 4 dimensions")
125 | 
126 |     # make the last dimension 3 so that you can unstack the colors
127 |     shape = list(image.get_shape())
128 |     shape[-1] = 3
129 |     image.set_shape(shape)
130 |     return image
131 | 
132 | 
133 | def corr_distance_orien_unknow(grd_matrix, sat_matrix):
134 |     '''
135 |     correlation distance for localizing ground panoramas with unknown orientation
136 |     :param grd_matrix: shape = [batch_grd, height, grd_width, channel]
137 |     :param sat_matrix: shape = [batch_sat, height, sat_width, channel]
138 |     :return:
139 |     '''
140 |     try:
141 |         grd_batch, grd_height, grd_width, grd_channel = grd_matrix.get_shape().as_list()
142 |         sat_batch, sat_height, sat_width, sat_channel = sat_matrix.get_shape().as_list()
143 |     except:
144 |         grd_batch, grd_height, grd_width, grd_channel = grd_matrix.shape
145 |         sat_batch, sat_height, sat_width, sat_channel = sat_matrix.shape
146 | 
147 |     assert grd_height==sat_height, grd_channel==sat_channel
148 | 
149 |     def warp_pad_columns(x, n):
150 |         out = tf.concat([x, x[:, :, :n, :]], axis=2)
151 |         return out
152 | 
153 |     n = grd_width - 1
154 |     x = warp_pad_columns(sat_matrix, n)
155 | 
156 |     weight = tf.transpose(grd_matrix, [1, 2, 3, 0])
157 | 
158 |     out = tf.nn.conv2d(x, weight, strides=[1, 1, 1, 1], padding='VALID')
159 | 
160 |     assert out.get_shape().as_list() == [sat_batch, 1, sat_width, grd_batch]
161 | 
162 |     out = tf.squeeze(out)  # shape = [sat_batch, sat_width, grd_batch]
163 | 
164 |     ############################ ground truth orientation corresponded distance ###############################
165 | 
166 | 
167 |     max_dis = 2 - 2 * tf.transpose(tf.reduce_max(out, axis=1))  # shape = [grd_batch, sat_batch]
168 | 
169 |     pred_orien = tf.diag_part(tf.argmax(out, axis=1))  # shape = [sat_batch, grd_batch]
170 | 
171 |     return max_dis, pred_orien
172 | 
173 | 
174 | def triplet_loss(grd_matrix, sat_matrix, batch_size):
175 |     '''
176 |     :param grd_matrix: shape = [grd_batch, grd_height, grd_width, grd_channel]
177 |     :param sat_matrix: shape = [sat_batch, sat_height, sat_width, sat_channel]
178 |                        grd_batch==sat_batch grd_height==sat_height grd_channel==sat_channel grd_width<=sat_width
179 |     :param grd_orien:  shape = [grd_batch] the north direction (value within 0~sat_width) of each grd image
180 |     :param train_grd_noise:
181 |     :param batch_hard_count: the number of top hard pairs within a batch. If 0, no in-batch hard negative mining
182 |     :param train_method: 0: triplet(max_dis) + regularize * (max_dis - orien_dis)
183 |                          1: triplet(orien_dis) + regularize * (max_dis - orien_dis)
184 |     :param regularize:
185 |     :return:
186 |     '''
187 | 
188 |     with tf.name_scope('weighted_soft_margin_triplet_loss'):
189 | 
190 |         dist_array, pred_orien = corr_distance_orien_unknow(grd_matrix, sat_matrix)
191 | 
192 |         pos_dist = tf.diag_part(dist_array)
193 | 
194 |         pair_n = batch_size * (batch_size - 1.0)
195 | 
196 |         # ground to satellite
197 |         triplet_dist_g2s = pos_dist - dist_array
198 |         loss_g2s = tf.reduce_sum(tf.log(1 + tf.exp(triplet_dist_g2s * 10))) / pair_n
199 | 
200 |         # satellite to ground
201 |         triplet_dist_s2g = tf.expand_dims(pos_dist, 1) - dist_array
202 |         loss_s2g = tf.reduce_sum(tf.log(1 + tf.exp(triplet_dist_s2g * 10))) / pair_n
203 | 
204 |         loss = (loss_g2s + loss_s2g) / 2.0
205 | 
206 |     return loss
207 | 
208 | 
209 | def encoder_decoder(generator_inputs, generator_outputs_channels, ngf=4, activational_layer=tf.nn.softmax):
210 |     layers = []
211 | 
212 |     # encoder_1: [batch, 512, 512, in_channels] => [batch, 256, 256, ngf]
213 |     with tf.variable_scope("encoder_1"):
214 |         output = gen_conv(generator_inputs, ngf)
215 |         layers.append(output)
216 | 
217 |     layer_specs = [
218 |         ngf * 2, # encoder_2: [batch, 256, 256, ngf] => [batch, 128, 128, ngf * 2]
219 |         ngf * 4, # encoder_3: [batch, 128, 128, ngf * 2] => [batch, 64, 64, ngf * 4]
220 |         ngf * 8, # encoder_4: [batch, 64, 64, ngf * 4] => [batch, 32, 32, ngf * 8]
221 |         ngf * 8, # encoder_5: [batch, 32, 32, ngf * 8] => [batch, 16, 16, ngf * 8]
222 |         ngf * 8, # encoder_6: [batch, 16, 16, ngf * 8] => [batch, 8, 8, ngf * 8]
223 |         ngf * 8, # encoder_7: [batch, 8, 8, ngf * 8] => [batch, 4, 4, ngf * 8]
224 |         # ngf * 8, # encoder_8: [batch, 4, 4, ngf * 8] => [batch, 2, 2, ngf * 8]
225 |     ]
226 | 
227 |     for out_channels in layer_specs:
228 |         with tf.variable_scope("encoder_%d" % (len(layers) + 1)):
229 |             rectified = lrelu(layers[-1], 0.2)
230 |             # [batch, in_height, in_width, in_channels] => [batch, in_height/2, in_width/2, out_channels]
231 |             convolved = gen_conv(rectified, out_channels)
232 |             output = batchnorm(convolved)
233 |             layers.append(output)
234 | 
235 |     layer_specs = [
236 |         # (ngf * 8, 0.5),   # decoder_8: [batch, 1, 4, ngf * 8] => [batch, 2, 8, ngf * 8 * 2]
237 |         (ngf * 8, 0.0),   # decoder_7: [batch, 2, 8, ngf * 8 * 2] => [batch, 4, 16, ngf * 8 * 2]
238 |         (ngf * 8, 0.0),   # decoder_6: [batch, 4, 16, ngf * 8 * 2] => [batch, 8, 32, ngf * 8 * 2]
239 |         (ngf * 8, 0.0),   # decoder_5: [batch, 8, 32, ngf * 8 * 2] => [batch, 16, 64, ngf * 8 * 2]
240 |         (ngf * 4, 0.0),   # decoder_4: [batch, 16, 64, ngf * 8 * 2] => [batch, 32, 128, ngf * 4 * 2]
241 |         (ngf * 2, 0.0),   # decoder_3: [batch, 32, 128, ngf * 4 * 2] => [batch, 64, 256, ngf * 2 * 2]
242 |         (ngf, 0.0),       # decoder_2: [batch, 64, 256, ngf * 2 * 2] => [batch, 128, 512, ngf * 2 * 2]
243 |     ]
244 | 
245 |     num_encoder_layers = len(layers)
246 |     for decoder_layer, (out_channels, dropout) in enumerate(layer_specs):
247 |         skip_layer = num_encoder_layers - decoder_layer - 1
248 |         with tf.variable_scope("decoder_%d" % (skip_layer + 1)):
249 |             if decoder_layer == 0:
250 |                 # first decoder layer doesn't have skip connections
251 |                 # since it is directly connected to the skip_layer
252 |                 input = layers[-1]
253 |             else:
254 |                 input = tf.concat([layers[-1], layers[skip_layer]], axis=3)
255 | 
256 |             rectified = tf.nn.relu(input)
257 |             # [batch, in_height, in_width, in_channels] => [batch, in_height*2, in_width*2, out_channels]
258 |             output = gen_deconv(rectified, out_channels)
259 |             output = batchnorm(output)
260 | 
261 |             if dropout > 0.0:
262 |                 output = tf.nn.dropout(output, keep_prob=1 - dropout)
263 | 
264 |             layers.append(output)
265 | 
266 |     # decoder_1: [batch, 128, 512, ngf * 2] => [batch, 256, 1024, generator_outputs_channels]
267 |     with tf.variable_scope("decoder_1"):
268 |         # input = tf.concat([layers[-1], layers[0]], axis=3) tf.random_normal_initializer(0, 0.02)
269 |         rectified = tf.nn.relu(layers[-1])
270 |         # output = gen_deconv(rectified, generator_outputs_channels)
271 |         output = tf.layers.conv2d_transpose(rectified, generator_outputs_channels, kernel_size=4, strides=(2, 2),
272 |                                             padding="same",
273 |                                             kernel_initializer=tf.zeros_initializer(),
274 |                                             bias_initializer=tf.constant_initializer(
275 |                                                 np.concatenate(
276 |                                                     [np.zeros(generator_outputs_channels - 1, dtype=np.float32),
277 |                                                      np.ones(1, dtype=np.float32)], axis=0)))
278 |         # output = tf.tanh(output)
279 |         output = activational_layer(output)
280 |         layers.append(output)
281 | 
282 |     return layers[-1]
283 | 
284 | 
285 | 
286 | 
287 | 
288 | # def sample_within_bounds_xyz(signal, batch_index, x, y, z, channel_index):
289 | #     '''
290 | #     :param signal: tf variable, shape = [batch, height, width, PlaneNum, channel]
291 | #     :param x: numpy
292 | #     :param y: numpy
293 | #     :return:
294 | #     '''
295 | #
296 | #     index = tf.stack([tf.reshape(batch_index, [-1]), tf.reshape(x, [-1]), tf.reshape(y, [-1]),
297 | #                       tf.reshape(z, [-1]), tf.reshape(channel_index, [-1])], axis=1)
298 | #
299 | #     result = tf.gather_nd(signal, index)
300 | #
301 | #     batch, height, width, channel = tf_shape(x, rank=4)
302 | #
303 | #     sample = tf.reshape(result, [batch, height, width, channel])
304 | #
305 | #     return sample
306 | #
307 | #
308 | # def sample_bilinear_xyz(signal, batch_index, rx, ry, rz, channel_index):
309 | #     '''
310 | #     :param signal: tensor_shape = [batch, sat_height, sat_width, heightPlaneNum, channel]
311 | #     :param rx: tensor_shape = [batch, grd_height, grd_width, channel]
312 | #     :param ry: tensor_shape = [batch, grd_height, grd_width, channel]
313 | #     :param batch_index: tensor_shape = [batch, grd_height, grd_width, channel]
314 | #     :param channel_index: tensor_shape = [batch, grd_height, grd_width, channel]
315 | #     :return:
316 | #     '''
317 | #
318 | #     signal_dim_x, signal_dim_y, signal_dim_z = signal.get_shape().as_list()[1:-1]
319 | #
320 | #     # obtain four sample coordinates
321 | #     ix0 = tf.maximum(tf.cast(rx, tf.int32), 0)
322 | #     iy0 = tf.maximum(tf.cast(ry, tf.int32), 0)
323 | #     iz0 = tf.maximum(tf.cast(rz, tf.int32), 0)
324 | #
325 | #     ix1 = tf.minimum(ix0 + 1, signal_dim_x-1)
326 | #     iy1 = tf.minimum(iy0 + 1, signal_dim_y-1)
327 | #     iz1 = tf.minimum(iz0 + 1, signal_dim_z-1)
328 | #
329 | #     # sample signal at each four positions
330 | #     signal_000 = sample_within_bounds_xyz(signal, batch_index, ix0, iy0, iz0, channel_index)
331 | #     signal_100 = sample_within_bounds_xyz(signal, batch_index, ix0, iy1, iz0, channel_index)
332 | #     signal_010 = sample_within_bounds_xyz(signal, batch_index, ix1, iy0, iz0, channel_index)
333 | #     signal_110 = sample_within_bounds_xyz(signal, batch_index, ix1, iy1, iz0, channel_index)
334 | #
335 | #     signal_001 = sample_within_bounds_xyz(signal, batch_index, ix0, iy0, iz1, channel_index)
336 | #     signal_101 = sample_within_bounds_xyz(signal, batch_index, ix0, iy1, iz1, channel_index)
337 | #     signal_011 = sample_within_bounds_xyz(signal, batch_index, ix1, iy0, iz1, channel_index)
338 | #     signal_111 = sample_within_bounds_xyz(signal, batch_index, ix1, iy1, iz1, channel_index)
339 | #
340 | #     ix1 = tf.cast(ix1, tf.float32)
341 | #     iy1 = tf.cast(iy1, tf.float32)
342 | #     iz1 = tf.cast(iz1, tf.float32)
343 | #
344 | #     fx00 = (ix1 - rx) * signal_100 + (rx - ix0) * signal_000
345 | #     fx10 = (ix1 - rx) * signal_110 + (rx - ix0) * signal_010
346 | #     fy0 = (iy1 - ry) * fx10 + (ry - iy0) * fx00
347 | #
348 | #     fx01 = (ix1 - rx) * signal_101 + (rx - ix0) * signal_001
349 | #     fx11 = (ix1 - rx) * signal_111 + (rx - ix0) * signal_011
350 | #     fy1 = (iy1 - ry) * fx11 + (ry - iy0) * fx01
351 | #
352 | #     fz = (iz1 - rz) * fy1 + (rz - iz0) * fy0
353 | #
354 | #     return fz
355 | #
356 | #
357 | #
358 | # def MultiPlaneImagesAer2Grd_radius(signal, estimated_height, target_height, target_width, grd_height, max_height):
359 | #     '''
360 | #     :param x: tf variable, x.shape=[batch, S, S, channel]
361 | #     :param height: output height
362 | #     :param width: output width
363 | #     :param radius: shape = [batch, height, width, channel] its value is within the range of [0, S/2).
364 | #     :return:
365 | #     '''
366 | #     batch, S, _, channel = tf_shape(signal, 4)
367 | #     PlaneNum = estimated_height.get_shape().as_list()[-1]  # shape = [batch, S, S, PlaneNum]
368 | #
369 | #     Voxel = tf.transpose(tf.stack([signal]*PlaneNum, axis=-1), [0, 1, 2, 4, 3])  # shape = [batch, S, S, PlaneNum, channel]
370 | #     Voxel = tf.expand_dims(estimated_height, axis=-1) * Voxel  # shape = [batch, S, S, PlaneNum, channel]
371 | #
372 | #     f = 144/S
373 | #
374 | #     b = tf.range(0, batch)
375 | #     h = tf.range(0, target_height*2)
376 | #     w = tf.range(0, target_width)
377 | #     c = tf.range(0, channel)
378 | #
379 | #     bb, hh, ww, cc = tf.meshgrid(b, h, w, c, indexing='ij')
380 | #
381 | #     sinTheta = tf.sin(ww / target_width * np.pi * 2)
382 | #     cosTheta = tf.cos(ww / target_width * np.pi * 2)
383 | #     tanPhi = tf.tan(hh / (target_height * 2) * np.pi)
384 | #
385 | #     ww = tf.cast(ww, tf.float32)
386 | #     RadiusNum = int(signal.get_shape().as_list()[1] / 2)
387 | #
388 | #     target_volume = []
389 | #     for r in range(1, RadiusNum):
390 | #         # r = RadiusNum - i
391 | #         x = S/2 + r * cosTheta
392 | #         y = S/2 + r * sinTheta
393 | #         z = safe_divide(r * f, tanPhi)
394 | #         z = (z - grd_height)/(max_height - grd_height) * PlaneNum
395 | #
396 | #         sample = sample_bilinear_xyz(Voxel, bb, x, y, z, cc)
397 | #         target_volume.append(sample)
398 | 
399 | 
400 | 
401 | 
402 | 
403 | 
404 | 
405 | 
406 | # def warp_pad_columns(x, m1=1, m2=1, n1=1, n2=1):
407 | #     out = tf.concat([x[:, :, -n1:, :], x, x[:, :, :n2, :]], axis=2)
408 | #     return tf.pad(out, [[0, 0], [m1, m2], [0, 0], [0, 0]])
409 | #
410 | #
411 | # def discrim_conv_cir(batch_input, out_channels, stride):
412 | #     padded_input = warp_pad_columns(batch_input, m1=1, m2=1, n1=1, n2=1)
413 | #     return tf.layers.conv2d(padded_input, out_channels, kernel_size=4, strides=(stride, stride), padding="valid",
414 | #                             kernel_initializer=tf.random_normal_initializer(0, 0.02))
415 | #
416 | #
417 | # def gen_conv_cir(batch_input, out_channels):
418 | #     initializer = tf.random_normal_initializer(0, 0.02)
419 | #     x = warp_pad_columns(batch_input, m1=1, m2=1, n1=1, n2=1)
420 | #     return tf.layers.conv2d(x, out_channels, kernel_size=4, strides=(2, 2), padding="valid", kernel_initializer=initializer)
421 | #
422 | #
423 | # def gen_deconv_cir(batch_input, out_channels):
424 | #     initializer = tf.random_normal_initializer(0, 0.02)
425 | #     _, height, width, channel = batch_input.get_shape().as_list()
426 | #     x = tf.image.resize_nearest_neighbor(batch_input, (2*height, 2*width))
427 | #     x = warp_pad_columns(x, m1=1, m2=1, n1=1, n2=1)
428 | #     return tf.layers.conv2d(x, out_channels, kernel_size=3, strides=(1,1), padding="valid", kernel_initializer=initializer)
429 | 


--------------------------------------------------------------------------------
/geometry/Geometry.py:
--------------------------------------------------------------------------------
  1 | import tensorflow.compat.v1 as tf
  2 | tf.disable_v2_behavior()
  3 | from utils import *
  4 | import numpy as np
  5 | import tensorflow_addons as tfa
  6 | 
  7 | def encoder_decoder(generator_inputs, generator_outputs_channels, ngf=4, activational_layer=tf.nn.softmax):
  8 |     layers = []
  9 | 
 10 |     # encoder_1: [batch, 512, 512, in_channels] => [batch, 256, 256, ngf]
 11 |     with tf.variable_scope("encoder_1"):
 12 |         output = gen_conv(generator_inputs, ngf)
 13 |         layers.append(output)
 14 | 
 15 |     layer_specs = [
 16 |         ngf * 2, # encoder_2: [batch, 256, 256, ngf] => [batch, 128, 128, ngf * 2]
 17 |         ngf * 4, # encoder_3: [batch, 128, 128, ngf * 2] => [batch, 64, 64, ngf * 4]
 18 |         ngf * 8, # encoder_4: [batch, 64, 64, ngf * 4] => [batch, 32, 32, ngf * 8]
 19 |         ngf * 8, # encoder_5: [batch, 32, 32, ngf * 8] => [batch, 16, 16, ngf * 8]
 20 |         ngf * 8, # encoder_6: [batch, 16, 16, ngf * 8] => [batch, 8, 8, ngf * 8]
 21 |         ngf * 8, # encoder_7: [batch, 8, 8, ngf * 8] => [batch, 4, 4, ngf * 8]
 22 |         # ngf * 8, # encoder_8: [batch, 4, 4, ngf * 8] => [batch, 2, 2, ngf * 8]
 23 |     ]
 24 | 
 25 |     for out_channels in layer_specs:
 26 |         with tf.variable_scope("encoder_%d" % (len(layers) + 1)):
 27 |             rectified = lrelu(layers[-1], 0.2)
 28 |             # [batch, in_height, in_width, in_channels] => [batch, in_height/2, in_width/2, out_channels]
 29 |             convolved = gen_conv(rectified, out_channels)
 30 |             output = batchnorm(convolved)
 31 |             layers.append(output)
 32 | 
 33 |     layer_specs = [
 34 |         # (ngf * 8, 0.5),   # decoder_8: [batch, 1, 4, ngf * 8] => [batch, 2, 8, ngf * 8 * 2]
 35 |         (ngf * 8, 0.0),   # decoder_7: [batch, 2, 8, ngf * 8 * 2] => [batch, 4, 16, ngf * 8 * 2]
 36 |         (ngf * 8, 0.0),   # decoder_6: [batch, 4, 16, ngf * 8 * 2] => [batch, 8, 32, ngf * 8 * 2]
 37 |         (ngf * 8, 0.0),   # decoder_5: [batch, 8, 32, ngf * 8 * 2] => [batch, 16, 64, ngf * 8 * 2]
 38 |         (ngf * 4, 0.0),   # decoder_4: [batch, 16, 64, ngf * 8 * 2] => [batch, 32, 128, ngf * 4 * 2]
 39 |         (ngf * 2, 0.0),   # decoder_3: [batch, 32, 128, ngf * 4 * 2] => [batch, 64, 256, ngf * 2 * 2]
 40 |         (ngf, 0.0),       # decoder_2: [batch, 64, 256, ngf * 2 * 2] => [batch, 128, 512, ngf * 2 * 2]
 41 |     ]
 42 | 
 43 |     num_encoder_layers = len(layers)
 44 |     for decoder_layer, (out_channels, dropout) in enumerate(layer_specs):
 45 |         skip_layer = num_encoder_layers - decoder_layer - 1
 46 |         with tf.variable_scope("decoder_%d" % (skip_layer + 1)):
 47 |             # if decoder_layer == 0:
 48 |             #     # first decoder layer doesn't have skip connections
 49 |             #     # since it is directly connected to the skip_layer
 50 |             #     input = layers[-1]
 51 |             # else:
 52 |             #     input = tf.concat([layers[-1], layers[skip_layer]], axis=3)
 53 |             input = layers[-1]
 54 | 
 55 |             rectified = tf.nn.relu(input)
 56 |             # [batch, in_height, in_width, in_channels] => [batch, in_height*2, in_width*2, out_channels]
 57 |             output = gen_deconv(rectified, out_channels)
 58 |             output = batchnorm(output)
 59 | 
 60 |             if dropout > 0.0:
 61 |                 output = tf.nn.dropout(output, keep_prob=1 - dropout)
 62 |             layers.append(output)
 63 | 
 64 |     # decoder_1: [batch, 128, 512, ngf * 2] => [batch, 256, 1024, generator_outputs_channels]
 65 |     with tf.variable_scope("decoder_1"):
 66 |         # input = tf.concat([layers[-1], layers[0]], axis=3)
 67 |         rectified = tf.nn.relu(layers[-1])
 68 |         # output = gen_deconv(rectified, generator_outputs_channels)
 69 |         output = tf.layers.conv2d_transpose(rectified, generator_outputs_channels, kernel_size=4, strides=(2, 2), padding="same",
 70 |                                             kernel_initializer=tf.zeros_initializer(),
 71 |                                             bias_initializer=tf.constant_initializer(
 72 |                                                 np.concatenate([np.zeros(generator_outputs_channels - 1, dtype=np.float32),
 73 |                                                                 np.ones(1, dtype=np.float32)], axis=0)))
 74 | 
 75 |         # output = tf.tanh(output)
 76 |         output = activational_layer(output*100)
 77 |         layers.append(output)
 78 | 
 79 |     return layers[-1]
 80 | 
 81 | 
 82 | def geometry_transform(aer_imgs, estimated_height, target_height, target_width, mode, grd_height, max_height,
 83 |                        method='column', geoout_type='image', dataset='CVUSA'):
 84 |     '''
 85 |     :param aer_imgs:
 86 |     :param estimated_height:
 87 |     :param mode: if estimated_height.channel ==1, type belongs to {'hole', 'column'};
 88 |            otherwise if estimated_height.channel>1, type belongs to {'radiusPlaneMethod', 'heightPlaneMethod'}
 89 |     The following two parameters are only needed if mode is 'radiusPlaneMethod'.
 90 |     :param method: select from {'column', 'point'}.
 91 |                     'column' means: for each point in overhead view, we poject it and the points under it to the grd view
 92 |                                     we use cusum to mimic this process
 93 |                     'point' means we only project the points in the overhead view image to the grd view.
 94 |     :param geoout_type: select from {'volume', 'image'}.
 95 |     :return:
 96 |     '''
 97 |     # PlaneNum = estimated_height.get_shape().as_list()[-1]
 98 |     # if height_channel==1:
 99 |     if mode=='heightPlaneMethod':
100 |         output = MultiPlaneImagesAer2Grd_height(aer_imgs, estimated_height, target_height, target_width, grd_height,
101 |                                        max_height, method, geoout_type, dataset)
102 |     elif mode=='radiusPlaneMethod':
103 |         output = MultiPlaneImagesAer2Grd_radius(aer_imgs, estimated_height, target_height, target_width,
104 |                                                       grd_height, max_height, method, geoout_type, dataset)
105 |     return output
106 | 
107 | 
108 | def MultiPlaneImagesAer2Grd_height(signal, estimated_height, target_height, target_width, grd_height=-2, max_height=30,
109 |                                    method='column', geoout_type='image', dataset='CVUSA'):
110 |     PlaneNum = estimated_height.get_shape().as_list()[-1]
111 | 
112 |     if method == 'column':
113 |         estimated_height = tf.cumsum(estimated_height, axis=-1)
114 |         # the maximum plane corresponds to grd plane
115 |     batch, S, _, channel = tf_shape(signal, 4)
116 |     H, W, C = signal.get_shape().as_list()[1:]
117 |     assert (H==W)
118 | 
119 |     i = np.arange(0, (target_height*2))
120 |     j = np.arange(0, target_width)
121 |     jj, ii = np.meshgrid(j, i)
122 | 
123 |     if dataset=='CVUSA':
124 |         f = H/55
125 |     elif dataset=='CVACT' or dataset=='CVACThalf':
126 |         f = H/(50*206/256)
127 |     elif dataset=='CVACTunaligned':
128 |         f = H/50
129 |     elif dataset=='OP':
130 |         f = H/100
131 | 
132 |     # f = H/144
133 | 
134 |     tanii = np.tan(ii * np.pi / (target_height*2))
135 | 
136 |     images_list = []
137 |     alphas_list = []
138 | 
139 |     # images_list_volume = []
140 | 
141 |     for i in range(PlaneNum):
142 |         z = grd_height + (max_height-grd_height) * i/PlaneNum
143 | 
144 |         u_dup = -1 * np.ones([(target_height*2), target_width])
145 |         v_dup = -1 * np.ones([(target_height*2), target_width])
146 |         m = target_height
147 | 
148 |         v = S / 2. - f * z * tanii * np.sin(jj * 2 * np.pi / target_width)
149 |         u = S / 2. + f * z * tanii * np.cos(jj * 2 * np.pi / target_width)
150 | 
151 |         if z < 0:
152 |             u_dup[-m:, :] = u[-m:, :]
153 |             v_dup[-m:, :] = v[-m:, :]
154 |         else:
155 |             u_dup[0:m, :] = u[0:m, :]
156 |             v_dup[0:m:, :] = v[0:m:, :]
157 | 
158 |         n = int(target_height/2)
159 | 
160 |         uv = np.stack([v_dup[n:-n,...], u_dup[n:-n,...]], axis=-1)
161 |         uv = uv.astype(np.float32)
162 |         warp = tf.stack([uv]*batch, axis=0)
163 | 
164 |         # images_prob = tf.contrib.resampler.resampler(signal*estimated_height[..., i:i+1], warp)
165 |         # images = tf.contrib.resampler.resampler(signal, warp)
166 |         # alphas = tf.contrib.resampler.resampler(estimated_height[..., i:i + 1], warp)
167 |         images = tfa.image.resampler(signal, warp)
168 |         alphas = tfa.image.resampler(estimated_height[..., i:i + 1], warp)
169 |         images_list.append(images)
170 |         alphas_list.append(alphas)
171 | 
172 |         # images_list_volume.append(images_prob)
173 | 
174 |     if geoout_type == 'volume':
175 | 
176 |         return tf.concat([images_list[i]*alphas_list[i] for i in range(PlaneNum)], axis=-1)
177 | 
178 |         # return tf.concat(images_list, axis=-1) * tf.concat(alphas_list, axis=-1)  # shape = [batch, target_height, target_width, channel*PlaneNum]
179 | 
180 |     elif geoout_type == 'image':
181 |         for i in range(PlaneNum):
182 |             rgb = images_list[i]
183 |             a = alphas_list[i]
184 |             if i == 0:
185 |                 output = rgb * a
186 |             else:
187 |                 rgb_by_alpha = rgb * a
188 |                 output = rgb_by_alpha + output * (1 - a)
189 | 
190 |         return output  # shape = [batch, target_height, target_width, channel]
191 | 
192 |     # batch_image = tf.stack(images_list, axis=-1)
193 |     #
194 |     # batch_mulplanes = tf.reshape(batch_image, [-1, target_height, target_width, C*PlaneNum])
195 |     #
196 |     # return batch_mulplanes
197 | 
198 | 
199 | def MultiPlaneImagesAer2Grd_radius(signal, estimated_height, target_height, target_width, grd_height, max_height,
200 |                                    method='column', geoout_type='image', dataset='CVUSA'):
201 |     '''
202 |     This function first convert uv coordinate to polar coordinate, i.e., from overhead planes to cylinder coordinate,
203 |     and then from cylinder coordinate to spherical coordinate
204 |     :param signal: [batch, height, width, channel] image
205 |     :param estimated_height: [batch, height, width, PlaneNume]
206 |     :param target_height: height/phi direction
207 |     :param target_width: azimuth direction
208 |     :param grd_height:
209 |     :param max_height:
210 |     :param method: select from {'column', 'point'}.
211 |                     'column' means: for each point in overhead view, we poject it and the points under it to the grd view
212 |                                     we use cusum to mimic this process
213 |                     'point' means we only project the points in the overhead view image to the grd view.
214 |     :param out_type: select from {'volume', 'image'}.
215 |     :return:
216 |     '''
217 |     PlaneNum = estimated_height.get_shape().as_list()[-1]
218 |     batch, height, width, channel = tf_shape(signal, rank=4)
219 | 
220 |     if method=='column':
221 |         # estimated_height = tf.cumsum(estimated_height, axis=-1, reverse=True)
222 |         # # the 0th plane corresponds to grd plane
223 |         estimated_height = tf.cumsum(estimated_height, axis=-1)
224 |         # the maximum plane corresponds to grd plane
225 | 
226 |     voxel = tf.transpose(tf.stack([signal]*PlaneNum, axis=-1), [0, 1, 2, 4, 3])
227 |             # * tf.expand_dims(estimated_height, axis=-1)
228 |     voxel = tf.reshape(voxel, [batch, height, width, PlaneNum*channel])
229 | 
230 |     ################### from overhead view uvz coordinate to cylinder pthetaz coordinate #########################
231 |     S = signal.get_shape().as_list()[1]
232 |     radius = int(S//4)
233 |     azimuth = target_width
234 | 
235 |     i = np.arange(0, radius)
236 |     j = np.arange(0, azimuth)
237 |     jj, ii = np.meshgrid(j, i)
238 | 
239 |     # if train_mode:
240 |     #     sx = np.random.uniform(-10, 10)
241 |     #     sy = np.random.uniform(-10, 10)
242 |     #     rx = np.minimum(S/2.-sx, S/2.+sx)
243 |     #     ry = np.minimum(S/2.-sy, S/2.+sy)
244 |     #
245 |     #     y = (S / 2. + sx) - rx / radius * (radius - 1 - ii) * np.sin(2 * np.pi * jj / azimuth)
246 |     #     x = (S / 2. + sy) + ry / radius * (radius - 1 - ii) * np.cos(2 * np.pi * jj / azimuth)
247 |     #
248 |     # else:
249 | 
250 |     y = S / 2. - S / 2. / radius * (radius - 1 - ii) * np.sin(2 * np.pi * jj / azimuth)
251 |     x = S / 2. + S / 2. / radius * (radius - 1 - ii) * np.cos(2 * np.pi * jj / azimuth)
252 | 
253 |     uv = np.stack([y, x], axis=-1)
254 |     uv = uv.astype(np.float32)
255 |     warp = tf.stack([uv] * batch, axis=0)
256 | 
257 |     # imgs = tf.contrib.resampler.resampler(voxel, warp)
258 |     imgs = tfa.image.resampler(voxel, warp)
259 |     imgs = tf.reshape(imgs, [batch, radius, azimuth, PlaneNum, channel])  # batch, radius, azimuth, PlaneNum, channel]
260 |     # imgs = tf.transpose(imgs, [0, 3, 2, 1, 4])[:, ::-1, ...]
261 |     # # shape = [batch, PlaneNum, azimuth, radius, channel]
262 |     # # the maximum PlaneNum corresponds to ground plane
263 |     # alpha = tf.contrib.resampler.resampler(estimated_height, warp)[..., ::-1]  # batch, radius, azimuth, PlaneNum
264 |     # # the maximum PlaneNum corresponds to ground plane
265 |     # alpha = tf.transpose(alpha, [0, 3, 2, 1])  # shape = [batch, PlaneNum, azimuth, radius]
266 |     imgs = tf.transpose(imgs, [0, 3, 2, 1, 4])
267 |     # shape = [batch, PlaneNum, azimuth, radius, channel]
268 |     # the maximum PlaneNum corresponds to ground plane
269 |     # alpha = tf.contrib.resampler.resampler(estimated_height, warp)  # batch, radius, azimuth, PlaneNum
270 |     alpha = tfa.image.resampler(estimated_height, warp)
271 |     # the maximum PlaneNum corresponds to ground plane
272 |     alpha = tf.transpose(alpha, [0, 3, 2, 1])  # shape = [batch, PlaneNum, azimuth, radius]
273 | 
274 |     if dataset == 'CVUSA':
275 |         meters = 55
276 |     elif dataset == 'CVACT' or dataset=='CVACThalf':
277 |         meters = (50 * 206 / 256)
278 |     elif dataset == 'CVACTunaligned':
279 |         meters = 50
280 |     elif dataset == 'OP':
281 |         meters = 100
282 | 
283 |     ################### from cylinder pthetaz coordinate to grd phithetar coordinate #########################
284 |     if dataset=='CVUSA' or dataset=='CVACThalf':
285 |         i = np.arange(0, target_height*2)
286 |         j = np.arange(0, target_width)
287 |         jj, ii = np.meshgrid(j, i)
288 |         tanPhi = np.tan(ii / target_height / 2 * np.pi)
289 |         tanPhi[np.where(tanPhi==0)] = 1e-16
290 | 
291 |         n = int(target_height//2)
292 | 
293 |         MetersPerRadius = meters / 2 / radius
294 |         rgb_layers = []
295 |         a_layers = []
296 |         for r in range(0, radius):
297 |             # from far to near
298 |             z = (radius-r-1)*MetersPerRadius/tanPhi[n:-n]
299 |             z = (PlaneNum-1) - (z - grd_height)/(max_height - grd_height) * (PlaneNum-1)
300 |             theta = jj[n:-n]
301 |             uv = np.stack([theta, z], axis=-1)
302 |             uv = uv.astype(np.float32)
303 |             warp = tf.stack([uv] * batch, axis=0)
304 |             # rgb = tf.contrib.resampler.resampler(imgs[..., r, :], warp)
305 |             rgb = tfa.image.resampler(imgs[..., r, :], warp)
306 |             # a = tf.contrib.resampler.resampler(alpha[..., r:r + 1], warp)
307 |             a = tfa.image.resampler(alpha[..., r:r+1], warp)
308 | 
309 |             rgb_layers.append(rgb)
310 |             a_layers.append(a)
311 | 
312 |     else:
313 |         i = np.arange(0, target_height)
314 |         j = np.arange(0, target_width)
315 |         jj, ii = np.meshgrid(j, i)
316 |         tanPhi = np.tan(ii / target_height * np.pi)
317 |         tanPhi[np.where(tanPhi == 0)] = 1e-16
318 | 
319 |         # n = int(target_height // 2)
320 | 
321 |         MetersPerRadius = meters / 2 / radius
322 |         rgb_layers = []
323 |         a_layers = []
324 |         for r in range(0, radius):
325 |             # from far to near
326 |             z = (radius - r - 1) * MetersPerRadius / tanPhi
327 |             z = (PlaneNum - 1) - (z - grd_height) / (max_height - grd_height) * (PlaneNum - 1)
328 |             theta = jj
329 |             uv = np.stack([theta, z], axis=-1)
330 |             uv = uv.astype(np.float32)
331 |             warp = tf.stack([uv] * batch, axis=0)
332 |             # rgb = tf.contrib.resampler.resampler(imgs[..., r, :], warp)
333 |             # a = tf.contrib.resampler.resampler(alpha[..., r:r + 1], warp)
334 |             rgb = tfa.image.resampler(imgs[..., r, :], warp)
335 |             a = tfa.image.resampler(alpha[..., r:r + 1], warp)
336 | 
337 |             rgb_layers.append(rgb)
338 |             a_layers.append(a)
339 | 
340 |     if geoout_type=='volume':
341 | 
342 |         return tf.concat([rgb_layers[i]*a_layers[i] for i in range(radius)], axis=-1)
343 | 
344 |         # return tf.concat(rgb_layers[::-1], axis=-1) * tf.concat(a_layers[::-1], axis=-1) # shape = [batch, target_height, target_width, channel*PlaneNum]
345 | 
346 |     elif geoout_type=='image':
347 |         for i in range(radius):
348 |             rgb = rgb_layers[i]
349 |             a = a_layers[i]
350 |             if i==0:
351 |                 output = rgb * a
352 |             else:
353 |                 rgb_by_alpha = rgb * a
354 |                 output = rgb_by_alpha + output * (1 - a)
355 | 
356 |         return output  # shape = [batch, target_height, target_width, channel]
357 | 
358 | 
359 | 
360 | 
361 | 
362 | 
363 | 
364 | 
365 | 
366 | 
367 | 
368 | 
369 | 
370 | 
371 | 
372 | 
373 | 
374 | 
375 | 
376 | 
377 | # def geometry_transform_hole(aer_imgs, estimated_height, target_height, target_width, grd_height=-2.5, max_height=47.5):
378 | #     _, aer_size, _, heightPlaneNum = estimated_height.get_shape().as_list()
379 | #     batch, _, _, channel = tf_shape(aer_imgs, 4)
380 | #
381 | #     f = 144/aer_size
382 | #
383 | #     assert heightPlaneNum==1
384 | #     estimated_height = tf.squeeze(estimated_height) # shape = [batch, aer_size, aer_size]
385 | #
386 | #     estimated_height = grd_height + (max_height - grd_height) * estimated_height
387 | #
388 | #     i = np.arange(0, aer_size)
389 | #     j = np.arange(0, aer_size)
390 | #     jj, ii = np.meshgrid(j, i)
391 | #
392 | #     radius = np.sqrt((ii - (aer_size / 2 - 0.5)) ** 2 + (jj - (aer_size / 2 - 0.5)) ** 2)
393 | #
394 | #     Theta1 = tf.atan(
395 | #         (ii[:, 0:int(aer_size / 2)] - (aer_size / 2 - 0.5)) / (jj[:, 0:int(aer_size / 2)] - (aer_size / 2 - 0.5))) + 0.5 * np.pi
396 | #     Theta2 = tf.atan(
397 | #         (ii[:, int(aer_size / 2):] - (aer_size / 2 - 0.5)) / (jj[:, int(aer_size / 2):] - (aer_size / 2 - 0.5))) + 1.5 * np.pi
398 | #     Theta = tf.concat([Theta1, Theta2], axis=-1)
399 | #
400 | #     Phimax = tf.atan2(radius, estimated_height*f)
401 | #     Phimin = tf.atan2(radius, grd_height*f)
402 | #
403 | #     Theta = Theta / 2 / np.pi * target_width         # shape = [aer_size, aer_size]
404 | #     Phimax = Phimax / np.pi * (target_height * 2)    # shape = [aer_size, aer_size]
405 | #     Phimin = Phimin / np.pi * (target_height * 2)    # shape = [aer_size, aer_size]
406 | #
407 | #     target = tf.zeros([batch, target_height*2, target_width, channel])
408 | #
409 | #     for rr in range(aer_size//2):
410 | #
411 | #         r = aer_size//2 - rr
412 | #
413 | #         indices = tf.where((radius > (r-1)) & (radius <= r)) # shape = [batch*num, 3] 3--> batch, height, width
414 | #
415 | #         selected_Theta = tf.gather_nd(Theta, indices)       # shape = [batch*num]
416 | #         selected_Phimax = tf.gather_nd(Phimax, indices)     # shape = [batch*num]
417 | #         selected_Phimin = tf.gather_nd(Phimin, indices)     # shape = [batch*num]
418 | #
419 | #         rgb = tf.gather_nd(aer_imgs, indices)               # shape = [batch*num, channel] channel = aer_imgs.shape[-1]
420 | #
421 | #         iy = tf.minimum(tf.cast(tf.round(selected_Theta), tf.int64), target_width-1)           # shape = [batch*num]
422 | #         ix1 = tf.minimum(tf.cast(tf.round(selected_Phimax), tf.int64), target_height*2-1)      # shape = [batch*num]
423 | #         ix0 = tf.minimum(tf.cast(tf.round(selected_Phimin), tf.int64), target_height*2 - 1)    # shape = [batch*num]
424 | #
425 | #         num = tf_shape(iy, 1)//batch
426 | #         bi = tf.reshape(tf.range(batch), [1, batch])
427 | #         bi = tf.tile(bi, [num, 1])
428 | #         bi = tf.reshape(bi, [-1])   # shape = [batch*num]
429 | #         index = tf.stack([bi, ix1, iy], axis=-1)  # shape = [batch*num, 3] 3-->batch, phi, theta
430 | #
431 | #         assign = tf.assign(tf.gather_nd(target, index), rgb)
432 | 
433 | 
434 | 
435 | 
436 | 
437 | 
438 | 
439 | 
440 | 
441 | 
442 | 
443 | 
444 | 
445 | 


--------------------------------------------------------------------------------
/script3/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | # os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
  7 | # os.environ['CUDA_VISIBLE_DEVICES'] = '0'
  8 | 
  9 | import sys
 10 | sys.path.append('../')
 11 | 
 12 | import tensorflow.compat.v1 as tf
 13 | tf.disable_v2_behavior()
 14 | import numpy as np
 15 | import argparse
 16 | import os
 17 | import json
 18 | 
 19 | import random
 20 | import collections
 21 | import math
 22 | import time
 23 | import PIL.Image as Image
 24 | import scipy.io as scio
 25 | 
 26 | from model import *
 27 | 
 28 | parser = argparse.ArgumentParser()
 29 | # parser.add_argument("--input_dir", help="path to folder containing images", default='facades/train')
 30 | parser.add_argument("--dataset", help="dataset", default='CVACT')
 31 | parser.add_argument("--mode", choices=["train", "test", "export"], default="train")
 32 | parser.add_argument("--output_dir", help="where to put output files", default='pix2pix_perceploss')
 33 | parser.add_argument("--seed", type=int)
 34 | parser.add_argument("--checkpoint", help="directory with checkpoint to resume training from or use for testing")
 35 | 
 36 | # parser.add_argument("--max_steps", type=int, help="number of training steps (0 to disable)")
 37 | parser.add_argument("--start_epochs", type=int, default=0, help="number of training epochs")
 38 | parser.add_argument("--max_epochs", type=int, default=35, help="number of training epochs")
 39 | parser.add_argument("--summary_freq", type=int, default=100, help="update summaries every summary_freq steps")
 40 | parser.add_argument("--progress_freq", type=int, default=50, help="display progress every progress_freq steps")
 41 | parser.add_argument("--trace_freq", type=int, default=0, help="trace execution every trace_freq steps")
 42 | parser.add_argument("--display_freq", type=int, default=0, help="write current training images every display_freq steps")
 43 | parser.add_argument("--save_freq", type=int, default=5000, help="save model every save_freq steps, 0 to disable")
 44 | 
 45 | parser.add_argument("--separable_conv", action="store_true", help="use separable convolutions in the generator")
 46 | parser.add_argument("--aspect_ratio", type=float, default=1.0, help="aspect ratio of output images (width/height)")
 47 | parser.add_argument("--lab_colorization", action="store_true", help="split input image into brightness (A) and color (B)")
 48 | parser.add_argument("--batch_size", type=int, default=4, help="number of images in batch")
 49 | parser.add_argument("--which_direction", type=str, default="AtoG", choices=["AtoG", "GtoA"])
 50 | parser.add_argument("--ngf", type=int, default=64, help="number of generator filters in first conv layer")
 51 | parser.add_argument("--ndf", type=int, default=64, help="number of discriminator filters in first conv layer")
 52 | parser.add_argument("--scale_size", type=int, default=286, help="scale images to this size before cropping to 256x256")
 53 | parser.add_argument("--flip", dest="flip", action="store_true", help="flip images horizontally")
 54 | parser.add_argument("--no_flip", dest="flip", action="store_false", help="don't flip images horizontally")
 55 | parser.set_defaults(flip=True)
 56 | parser.add_argument("--lr", type=float, default=0.0002, help="initial learning rate for adam")
 57 | parser.add_argument("--beta1", type=float, default=0.5, help="momentum term of adam")
 58 | 
 59 | parser.add_argument("--inputs_type", choices=["original", "geometry", "polar", "tanpolar"], default="geometry")
 60 | 
 61 | parser.add_argument("--gan_weight", type=float, default=1.0, help="weight on GAN term for generator gradient")
 62 | parser.add_argument("--l1_weight_grd", type=float, default=100.0, help="weight on GAN term for generator gradient")
 63 | parser.add_argument("--l1_weight_aer", type=float, default=0.0, help="weight on L1 term for generator gradient")
 64 | parser.add_argument("--perceptual_weight_grd", type=float, default=0.0, help="weight on GAN term for generator gradient")
 65 | parser.add_argument("--perceptual_weight_aer", type=float, default=0.0, help="weight on GAN term for generator gradient")
 66 | 
 67 | parser.add_argument("--heightPlaneNum", type=int, default=1, help="weight on GAN term for generator gradient")
 68 | parser.add_argument("--radiusPlaneNum", type=int, default=32, help="weight on GAN term for generator gradient")
 69 | parser.add_argument("--height_mode", choices=['radiusPlaneMethod', 'heightPlaneMethod'], default='radiusPlaneMethod')
 70 | # Only when 'height_mode' is 'radiusPlaneMethod', the following two parameters are required. Otherwise not.
 71 | parser.add_argument("--method", choices=['column', 'point'], default='column')
 72 | parser.add_argument("--geoout_type", choices=['volume', 'image'], default='image')
 73 | 
 74 | parser.add_argument("--finalout_type", choices=['image', 'rgba', 'fgbg'], default='image')
 75 | 
 76 | parser.add_argument("--skip", type=int, default=0, help="use skip connection or not")
 77 | 
 78 | 
 79 | # export options
 80 | parser.add_argument("--output_filetype", default="png", choices=["png", "jpeg"])
 81 | a = parser.parse_args()
 82 | 
 83 | EPS = 1e-12
 84 | CROP_SIZE = 256
 85 | 
 86 | 
 87 | if a.inputs_type != 'geometry':
 88 |     if a.finalout_type == 'image':
 89 |         nameStr = a.inputs_type + '_' + a.finalout_type + \
 90 |                   '_L1Grd_' + str(a.l1_weight_grd) + '_PerGrd_' + str(a.perceptual_weight_grd) + \
 91 |                       '_skip_' + str(a.skip)
 92 |     else:
 93 |         nameStr = a.inputs_type + '_' + a.finalout_type + \
 94 |                   '_L1Grd_' + str(a.l1_weight_grd) + '_PerGrd_' + str(a.perceptual_weight_grd) + \
 95 |                   '_L1Aer_' + str(a.l1_weight_aer) + '_PerAer_' + str(a.perceptual_weight_aer) + \
 96 |                       '_skip_' + str(a.skip)
 97 | else:
 98 | 
 99 |     if a.height_mode == 'heightPlaneMethod':
100 |         if a.finalout_type == 'image':
101 |             nameStr = a.inputs_type + '_' + a.height_mode + '_' + str(a.heightPlaneNum) + '_' + \
102 |                       a.method + '_' + a.geoout_type + '_' + \
103 |                       a.finalout_type + \
104 |                       '_L1Grd_' + str(a.l1_weight_grd) + '_PerGrd_' + str(a.perceptual_weight_grd) + \
105 |                       '_skip_' + str(a.skip)
106 |         else:
107 |             nameStr = a.inputs_type + '_' + a.height_mode + '_' + str(a.heightPlaneNum) + '_' + \
108 |                       a.method + '_' + a.geoout_type + '_' + \
109 |                       a.finalout_type + \
110 |                       '_L1Grd_' + str(a.l1_weight_grd) + '_PerGrd_' + str(a.perceptual_weight_grd) + \
111 |                       '_L1Aer_' + str(a.l1_weight_aer) + '_PerAer_' + str(a.perceptual_weight_aer) + \
112 |                       '_skip_' + str(a.skip)
113 |     elif a.height_mode == 'radiusPlaneMethod':
114 |         if a.finalout_type == 'image':
115 |             nameStr = a.inputs_type + '_' + a.height_mode + '_' + str(a.heightPlaneNum) + '_' + \
116 |                       a.method + '_' + a.geoout_type + '_' + \
117 |                       a.finalout_type + \
118 |                       '_L1Grd_' + str(a.l1_weight_grd) + '_PerGrd_' + str(a.perceptual_weight_grd) + \
119 |                       '_skip_' + str(a.skip)
120 |         else:
121 |             nameStr = a.inputs_type + '_' + a.height_mode + '_' + str(a.heightPlaneNum) + '_' + \
122 |                       a.method + '_' + a.geoout_type + '_' + \
123 |                       a.finalout_type + \
124 |                       '_L1Grd_' + str(a.l1_weight_grd) + '_PerGrd_' + str(a.perceptual_weight_grd) + \
125 |                       '_L1Aer_' + str(a.l1_weight_aer) + '_PerAer_' + str(a.perceptual_weight_aer) + \
126 |                       '_skip_' + str(a.skip)
127 | 
128 | 
129 | def save_images(fetches, step=None):
130 |     image_dir = os.path.join('./GeneratedData/', a.dataset, nameStr, 'image')
131 |     if not os.path.exists(image_dir):
132 |         os.makedirs(image_dir)
133 |     height_dir = os.path.join('./GeneratedData/', a.dataset, nameStr, 'height_distribution')
134 |     if not os.path.exists(height_dir):
135 |         os.makedirs(height_dir)
136 |     geotrans_dir = os.path.join('./GeneratedData/', a.dataset, nameStr, 'geotrans')
137 |     if not os.path.exists(geotrans_dir):
138 |         os.makedirs(geotrans_dir)
139 | 
140 |     filesets = []
141 |     for i, in_path in enumerate(fetches["paths"]):
142 |         name, _ = os.path.splitext(os.path.basename(in_path.decode("utf8")))
143 |         fileset = {"name": name, "step": step}
144 |         for kind in ["outputs"]:
145 |             filename = name + ".png"
146 |             if step is not None:
147 |                 filename = "%08d-%s" % (step, filename)
148 |             fileset[kind] = filename
149 |             out_path = os.path.join(image_dir, filename)
150 |             contents = fetches[kind][i]
151 |             with open(out_path, "wb") as f:
152 |                 f.write(contents)
153 |         for kind in ["generator_inputs"]:
154 |             filename = name + ".png"
155 |             if step is not None:
156 |                 filename = "%08d-%s" % (step, filename)
157 |             fileset[kind] = filename
158 |             out_path = os.path.join(geotrans_dir, filename)
159 |             contents = fetches[kind][i]
160 |             with open(out_path, "wb") as f:
161 |                 f.write(contents)
162 |         for kind in ["estimated_height"]:
163 |             filename = name + ".png"
164 |             if step is not None:
165 |                 filename = "%08d-%s" % (step, filename)
166 |             fileset[kind] = filename
167 |             out_path = os.path.join(height_dir, filename)
168 |             # contents = cmap[fetches[kind][i]]
169 |             # # contents = (fetches[kind][i]/2.*255.).astype(np.uint8)
170 |             # contents = Image.fromarray(contents)
171 |             # contents.save(out_path)
172 |             scio.savemat(out_path.replace('png','mat'), {'height': fetches[kind][i]})
173 | 
174 |             # with open(out_path, "wb") as f:
175 |             #     f.write(contents)
176 |         filesets.append(fileset)
177 |     return filesets
178 | 
179 | 
180 | def main():
181 |     if a.seed is None:
182 |         a.seed = random.randint(0, 2**31 - 1)
183 | 
184 |     with tf.Graph().as_default():
185 | 
186 |         tf.set_random_seed(a.seed)
187 |         # tf.random.set_seed(a.seed)
188 |         np.random.seed(a.seed)
189 |         random.seed(a.seed)
190 | 
191 |         cmap = np.load('../cmap.npy')
192 |         print(cmap.shape)
193 | 
194 |         output_dir = os.path.join(a.dataset, nameStr, 'aer')
195 | 
196 |         if not os.path.exists(output_dir):
197 |             os.makedirs(output_dir)
198 | 
199 |         if a.mode == "test" or a.mode == "export":
200 |             # if a.checkpoint is None:
201 |             #     raise Exception("checkpoint required for test mode")
202 | 
203 |             # load some options from the checkpoint
204 |             checkpoint_dir = os.path.join(a.dataset, nameStr, 'aer')
205 |             options = {"which_direction", "ngf", "ndf", "lab_colorization"}
206 |             with open(os.path.join(checkpoint_dir, "options.json")) as f:
207 |                 for key, val in json.loads(f.read()).items():
208 |                     if key in options:
209 |                         print("loaded", key, "=", val)
210 |                         setattr(a, key, val)
211 |             # disable these features in test mode
212 |             a.scale_size = CROP_SIZE
213 |             a.flip = False
214 | 
215 |         for k, v in a._get_kwargs():
216 |             print(k, "=", v)
217 | 
218 |         with open(os.path.join(output_dir, "options.json"), "w") as f:
219 |             f.write(json.dumps(vars(a), sort_keys=True, indent=4))
220 | 
221 |         if a.dataset=='CVUSA':
222 |             from load_data.load_data_cvusa import load_examples
223 |         elif a.dataset=='CVACT':
224 |             from load_data.load_data_cvact import load_examples
225 |         elif a.dataset=='CVACThalf':
226 |             from load_data.load_data_cvact_half import load_examples
227 |         elif a.dataset=='CVACTunaligned':
228 |             from load_data.load_data_cvact_unaligned import load_examples
229 |         elif a.dataset=='OP':
230 |             from load_data.load_data_op import load_examples
231 | 
232 |         examples = load_examples(a.mode, a.batch_size)
233 |         print("examples count = %d" % examples.count)
234 | 
235 |         if a.inputs_type == 'original':
236 |             inputs = examples.aer
237 |         elif a.inputs_type == 'polar':
238 |             inputs = examples.polar
239 |         elif a.inputs_type == 'tanpolar':
240 |             inputs = examples.tanpolar
241 |         else:
242 |             inputs = examples.aer
243 | 
244 |         targets = examples.pano
245 |         ref_images = examples.tanpolar
246 | 
247 |         # inputs and targets are [batch_size, height, width, channels]
248 |         model = create_model(inputs, targets, ref_images, a)
249 | 
250 |         inputs = deprocess(inputs)
251 |         targets = deprocess(targets)
252 |         outputs = deprocess(model.outputs)
253 |         converted_generator_inputs = deprocess(model.generator_inputs)
254 | 
255 |         def convert(image):
256 |             if a.aspect_ratio != 1.0:
257 |                 # upscale to correct aspect ratio
258 |                 size = [CROP_SIZE, int(round(CROP_SIZE * a.aspect_ratio))]
259 |                 image = tf.image.resize_images(image, size=size, method=tf.image.ResizeMethod.BICUBIC)
260 | 
261 |             return tf.image.convert_image_dtype(image, dtype=tf.uint8, saturate=True)
262 | 
263 |         # reverse any processing on images so they can be written to disk or displayed to user
264 |         with tf.name_scope("convert_inputs"):
265 |             converted_inputs = convert(inputs)
266 | 
267 |         with tf.name_scope("convert_targets"):
268 |             converted_targets = convert(targets)
269 | 
270 |         with tf.name_scope("convert_outputs"):
271 |             converted_outputs = convert(outputs)
272 | 
273 |         with tf.name_scope("convert_generator_inputs"):
274 |             converted_generator_inputs = convert(converted_generator_inputs)
275 | 
276 |         # with tf.name_scope("convert_estimated_height"):
277 |         #     converted_estimated_height = convert(model.estimated_height)
278 | 
279 |         with tf.name_scope("encode_images"):
280 |             display_fetches = {
281 |                 "paths": examples.paths,
282 |                 "inputs": tf.map_fn(tf.image.encode_png, converted_inputs, dtype=tf.string, name="input_pngs"),
283 |                 "targets": tf.map_fn(tf.image.encode_png, converted_targets, dtype=tf.string, name="target_pngs"),
284 |                 "outputs": tf.map_fn(tf.image.encode_png, converted_outputs, dtype=tf.string, name="output_pngs"),
285 |                 "generator_inputs": tf.map_fn(tf.image.encode_png, converted_generator_inputs, dtype=tf.string, name="geometry_transfer_pngs"),
286 |                 # "generator_inputs": converted_generator_inputs,
287 |                 "estimated_height": model.estimated_height,
288 |                 # "height": tf.map_fn(tf.image.encode_png, converted_estimated_height, dtype=tf.string,
289 |                 #                               name="height_maps"),
290 | 
291 |             }
292 | 
293 |         # summaries
294 |         with tf.name_scope("inputs_summary"):
295 |             tf.summary.image("inputs", converted_inputs)
296 | 
297 |         with tf.name_scope("targets_summary"):
298 |             tf.summary.image("targets", converted_targets)
299 | 
300 |         with tf.name_scope("outputs_summary"):
301 |             tf.summary.image("outputs", converted_outputs)
302 | 
303 |         with tf.name_scope("generator_inputs_summary"):
304 |             tf.summary.image("generator_inputs", converted_generator_inputs)
305 | 
306 |         with tf.name_scope("estimated_height_summary"):
307 |             tf.summary.image('estimated_height', tf.argmax(model.estimated_height, axis=-1)[..., None]/64)
308 |         #     tf.summary.image("predict_fake", tf.image.convert_image_dtype(tf.expand_dims(model.estimated_height/32, axis=-1), dtype=tf.uint8))
309 | 
310 |         tf.summary.scalar("discriminator_loss", model.discrim_loss)
311 |         tf.summary.scalar("generator_loss_GAN", model.gen_loss_GAN)
312 |         tf.summary.scalar("generator_loss_L1", model.gen_loss_L1)
313 |         tf.summary.scalar("gen_loss_perceptual", model.gen_loss_perceptual)
314 | 
315 |         for var in tf.trainable_variables():
316 |             tf.summary.histogram(var.op.name + "/values", var)
317 | 
318 |         for grad, var in model.discrim_grads_and_vars + model.gen_grads_and_vars:
319 |             tf.summary.histogram(var.op.name + "/gradients", grad)
320 | 
321 |         with tf.name_scope("parameter_count"):
322 |             parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()])
323 | 
324 |         saver = tf.train.Saver(max_to_keep=1)
325 | 
326 | #        t_vars = tf.trainable_variables()
327 | #        h_vars = []
328 | #        for var in t_vars:
329 | #            if 'height_estimation' in var.op.name:
330 | #                h_vars.append(var)
331 | #        print(len(h_vars))
332 | #        print(h_vars[-1])
333 |         # print(h_vars[-2].op.name)
334 |         # print(h_vars[-1].op.name)
335 | 
336 | 
337 |         logdir = output_dir if (a.trace_freq > 0 or a.summary_freq > 0) else None
338 |         sv = tf.train.Supervisor(logdir=logdir, save_summaries_secs=0, saver=None)
339 |         with sv.managed_session() as sess:
340 |             print("parameter_count =", sess.run(parameter_count))
341 | 
342 |             # t_vars = tf.trainable_variables()
343 |  #           v_1, v_2 = sess.run([h_vars[-1], h_vars[-2]])
344 |  #           print(v_1)
345 |  #           print(v_2)
346 | 
347 |             if a.checkpoint is not None or a.mode == 'test':
348 |             # if a.mode == "test":
349 |                 print("loading model from checkpoint")
350 |                 checkpoint_dir = os.path.join(a.dataset, nameStr, 'aer')
351 |                 checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
352 |                 saver.restore(sess, checkpoint)
353 | 
354 |                 global_step_from_restore = sess.run(sv.global_step)
355 |                 start_epoch = int(global_step_from_restore / examples.steps_per_epoch)
356 |                 print('====================')
357 |                 print(global_step_from_restore, start_epoch)
358 |                 print('====================')
359 | 
360 |             else:
361 |                 start_epoch = 0
362 | 
363 |             # max_steps = 2**32
364 |             # if a.max_epochs is not None:
365 |             max_steps = examples.steps_per_epoch * a.max_epochs
366 |             start_steps = examples.steps_per_epoch * start_epoch
367 |             # if a.max_steps is not None:
368 |             #     max_steps = a.max_steps
369 | 
370 |             if a.mode == "test":
371 |                 # testing
372 |                 # at most, process the test data once
373 |                 start = time.time()
374 |                 max_steps = min(examples.steps_per_epoch, max_steps)
375 |                 for step in range(max_steps):
376 |                     results = sess.run(display_fetches)
377 |                     filesets = save_images(results)
378 |                     for i, f in enumerate(filesets):
379 |                         print("evaluated image", f["name"])
380 |                     # index_path = append_index(filesets)
381 |                 # print("wrote index at", index_path)
382 |                 print("rate", (time.time() - start) / max_steps)
383 |             else:
384 |                 # training
385 |                 start = time.time()
386 | 
387 |                 for step in range(start_steps, max_steps):
388 |                     def should(freq):
389 |                         return freq > 0 and ((step + 1) % freq == 0 or step == max_steps - 1)
390 | 
391 |                     options = None
392 |                     run_metadata = None
393 |                     if should(a.trace_freq):
394 |                         options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
395 |                         run_metadata = tf.RunMetadata()
396 | 
397 |                     fetches = {
398 |                         "train": model.train,
399 |                         "global_step": sv.global_step,
400 |                     }
401 | 
402 |                     if should(a.progress_freq):
403 |                         fetches["discrim_loss"] = model.discrim_loss
404 |                         fetches["gen_loss_GAN"] = model.gen_loss_GAN
405 |                         fetches["gen_loss_L1"] = model.gen_loss_L1
406 |                         fetches["gen_loss_perceptual"] = model.gen_loss_perceptual
407 | 
408 |                     if should(a.summary_freq):
409 |                         fetches["summary"] = sv.summary_op
410 | 
411 |                     if should(a.display_freq):
412 |                         fetches["display"] = display_fetches
413 | 
414 |                     results = sess.run(fetches, options=options, run_metadata=run_metadata)
415 |                     # geo_trans = sess.run(converted_generator_inputs, options=options, run_metadata=run_metadata)
416 |                     # print(geo_trans.shape)
417 |                     # for i in range(0, a.batch_size):
418 |                     #     img = Image.fromarray(geo_trans[i])
419 |                     #     img.save('geotrans_' + str(i) + '.png')
420 | 
421 |                     if should(a.summary_freq):
422 |                         print("recording summary")
423 |                         sv.summary_writer.add_summary(results["summary"], results["global_step"])
424 | 
425 |                         height = sess.run(model.estimated_height, options=options, run_metadata=run_metadata)
426 |                         height = np.argmax(height, axis=-1)
427 |                         # scio.savemat('height.mat', {'height': height})
428 |                         for b in range(0, a.batch_size):
429 |                             img = cmap[height[b].squeeze()]
430 |                             img = Image.fromarray(img)
431 |                             img.save(str(b)+'height.png')
432 | 
433 |                     if should(a.trace_freq):
434 |                         print("recording trace")
435 |                         sv.summary_writer.add_run_metadata(run_metadata, "step_%d" % results["global_step"])
436 | 
437 |                     if should(a.progress_freq):
438 |                         # global_step will have the correct step count if we resume from a checkpoint
439 |                         train_epoch = math.ceil(results["global_step"] / examples.steps_per_epoch)
440 |                         train_step = (results["global_step"] - 1) % examples.steps_per_epoch + 1
441 |                         rate = (step - start_steps + 1) * a.batch_size / (time.time() - start)
442 |                         remaining = (max_steps - step) * a.batch_size / rate
443 |                         print("progress  epoch %d  step %d  image/sec %0.1f  remaining %dm" % (train_epoch, train_step, rate, remaining / 60))
444 |                         print("discrim_loss", results["discrim_loss"])
445 |                         print("gen_loss_GAN", results["gen_loss_GAN"])
446 |                         print("gen_loss_L1", results["gen_loss_L1"])
447 |                         print("gen_loss_perceptual", results["gen_loss_perceptual"])
448 | 
449 |                     if should(examples.steps_per_epoch):
450 |                     # if should(50):
451 |                         print("saving model")
452 |                         saver.save(sess, os.path.join(output_dir, "model"), global_step=sv.global_step)
453 | 
454 |                     if sv.should_stop():
455 |                         break
456 | 
457 | 
458 | main()
459 | 
460 | 


--------------------------------------------------------------------------------