├── requirements.txt
├── examples.jpg
├── eval
├── gt_0017.png
└── test_0017.png
├── label_to_facades.png
├── .gitignore
├── download_dataset.sh
├── tools
├── download-dataset.py
├── split.py
├── test.py
├── dockrun.py
├── tfimage.py
└── process.py
├── README.md
├── README_pix2pix.md
├── eval.py
├── train_val_test_split.py
├── main.py
├── utils.py
├── ops.py
└── model.py
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow-gpu
2 | numpy
3 | scipy
4 | pillow
5 |
--------------------------------------------------------------------------------
/examples.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuantingchen04/Light-Field-Depth-Estimation/HEAD/examples.jpg
--------------------------------------------------------------------------------
/eval/gt_0017.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuantingchen04/Light-Field-Depth-Estimation/HEAD/eval/gt_0017.png
--------------------------------------------------------------------------------
/eval/test_0017.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuantingchen04/Light-Field-Depth-Estimation/HEAD/eval/test_0017.png
--------------------------------------------------------------------------------
/label_to_facades.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuantingchen04/Light-Field-Depth-Estimation/HEAD/label_to_facades.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | sample/*
2 | logs/*
3 | test/*
4 | datasets/*
5 | checkpoint/*
6 | val/*
7 | results/
8 | depth_images_v2_400_gt/
9 |
10 | *.pyc
11 | .idea/
12 | .ipynb_checkpoints/
13 | *.ipynb
14 |
--------------------------------------------------------------------------------
/download_dataset.sh:
--------------------------------------------------------------------------------
1 | mkdir datasets
2 | FILE=$1
3 | URL=https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets/$FILE.tar.gz
4 | TAR_FILE=./datasets/$FILE.tar.gz
5 | TARGET_DIR=./datasets/$FILE/
6 | wget -N $URL -O $TAR_FILE
7 | mkdir $TARGET_DIR
8 | tar -zxvf $TAR_FILE -C ./datasets/
9 | rm $TAR_FILE
10 |
--------------------------------------------------------------------------------
/tools/download-dataset.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | try:
6 | from urllib.request import urlopen # python 3
7 | except ImportError:
8 | from urllib2 import urlopen # python 2
9 | import sys
10 | import tarfile
11 | import tempfile
12 | import shutil
13 |
14 | dataset = sys.argv[1]
15 | url = "https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets/%s.tar.gz" % dataset
16 | with tempfile.TemporaryFile() as tmp:
17 | print("downloading", url)
18 | shutil.copyfileobj(urlopen(url), tmp)
19 | print("extracting")
20 | tmp.seek(0)
21 | tar = tarfile.open(fileobj=tmp)
22 | tar.extractall()
23 | tar.close()
24 | print("done")
25 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ### Light-Field-Depth-Estimation
2 | #### *Light Filed Depth Estimation using cGAN*
3 |
4 | #### Data
5 | * 600 Light Field Images from [DDFF 12-scene](http://hazirbas.com/datasets/ddff12scene/)
6 |
7 | #### Method
8 | * Conditional GAN using pix2pix (Tensorflow)
9 | * Fed 5 focal image stacks + LSTM embeddings
10 |
11 | #### Run
12 | * Download any checkpoint from [here](https://drive.google.com/open?id=1zV6wRKh1gkEIZg687LAFQbOlwnzK-YIH)
13 | * Download manual cropped data from [here](https://drive.google.com/open?id=1js-jLasmGDigc0pNgbc4INcmUn6Mp7Fu)
14 | * `python main.py --phase train --dataset_name scene12_v3_400`
15 |
16 | #### Acknowledgments
17 | Code borrows heavily from [pix2pix-tensorflow](https://github.com/yenchenlin/pix2pix-tensorflow). Thanks for Yen-Chen!
18 |
--------------------------------------------------------------------------------
/tools/split.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import random
6 | import argparse
7 | import glob
8 | import os
9 |
10 |
11 | parser = argparse.ArgumentParser()
12 | parser.add_argument("--dir", type=str, required=True, help="path to folder containing images")
13 | parser.add_argument("--train_frac", type=float, default=0.8, help="percentage of images to use for training set")
14 | parser.add_argument("--test_frac", type=float, default=0.0, help="percentage of images to use for test set")
15 | parser.add_argument("--sort", action="store_true", help="if set, sort the images instead of shuffling them")
16 | a = parser.parse_args()
17 |
18 |
19 | def main():
20 | random.seed(0)
21 |
22 | files = glob.glob(os.path.join(a.dir, "*.png"))
23 | files.sort()
24 |
25 | assignments = []
26 | assignments.extend(["train"] * int(a.train_frac * len(files)))
27 | assignments.extend(["test"] * int(a.test_frac * len(files)))
28 | assignments.extend(["val"] * int(len(files) - len(assignments)))
29 |
30 | if not a.sort:
31 | random.shuffle(assignments)
32 |
33 | for name in ["train", "val", "test"]:
34 | if name in assignments:
35 | d = os.path.join(a.dir, name)
36 | if not os.path.exists(d):
37 | os.makedirs(d)
38 |
39 | print(len(files), len(assignments))
40 | for inpath, assignment in zip(files, assignments):
41 | outpath = os.path.join(a.dir, assignment, os.path.basename(inpath))
42 | print(inpath, "->", outpath)
43 | os.rename(inpath, outpath)
44 |
45 | main()
46 |
--------------------------------------------------------------------------------
/README_pix2pix.md:
--------------------------------------------------------------------------------
1 | #pix2pix-tensorflow
2 |
3 | TensorFlow implementation of [Image-to-Image Translation Using Conditional Adversarial Networks](https://arxiv.org/pdf/1611.07004v1.pdf) that learns a mapping from input images to output images.
4 |
5 | Here are some results generated by the authors of paper:
6 |
7 |
8 |
9 | ## Setup
10 |
11 | ### Prerequisites
12 | - Linux
13 | - Python with numpy
14 | - NVIDIA GPU + CUDA 8.0 + CuDNNv5.1
15 | - TensorFlow 0.11
16 |
17 | ### Getting Started
18 | - Clone this repo:
19 | ```bash
20 | git clone git@github.com:yenchenlin/pix2pix-tensorflow.git
21 | cd pix2pix-tensorflow
22 | ```
23 | - Download the dataset (script borrowed from [torch code](https://github.com/phillipi/pix2pix/blob/master/datasets/download_dataset.sh)):
24 | ```bash
25 | bash ./download_dataset.sh facades
26 | ```
27 | - Train the model
28 | ```bash
29 | python main.py --phase train
30 | ```
31 | - Test the model:
32 | ```bash
33 | python main.py --phase test
34 | ```
35 |
36 | ## Results
37 | Here is the results generated from this implementation:
38 |
39 | - Facades:
40 |
41 |
42 |
43 | More results on other datasets coming soon!
44 |
45 | **Note**: To avoid the fast convergence of D (discriminator) network, G (generator) network is updated twice for each D network update, which differs from original paper but same as [DCGAN-tensorflow](https://github.com/carpedm20/DCGAN-tensorflow), which this project based on.
46 |
47 | ## Train
48 | Code currently supports [CMP Facades](http://cmp.felk.cvut.cz/~tylecr1/facade/) dataset. To reproduce results presented above, it takes 200 epochs of training. Exact computing time depends on own hardware conditions.
49 |
50 | ## Test
51 | Test the model on validation set of [CMP Facades](http://cmp.felk.cvut.cz/~tylecr1/facade/) dataset. It will generate synthesized images provided corresponding labels under directory `./test`.
52 |
53 |
54 | ## Acknowledgments
55 | Code borrows heavily from [pix2pix](https://github.com/phillipi/pix2pix) and [DCGAN-tensorflow](https://github.com/carpedm20/DCGAN-tensorflow/blob/master/model.py). Thanks for their excellent work!
56 |
--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import sys
4 |
5 | # revise from
6 | # https://github.com/mrharicot/monodepth/blob/master/utils/evaluate_kitti.py
7 | def compute_errors(gt, pred):
8 | thresh = np.maximum((gt / pred), (pred / gt))
9 | a1 = (thresh < 1.25).mean()
10 | a2 = (thresh < 1.25 ** 2).mean()
11 | a3 = (thresh < 1.25 ** 3).mean()
12 |
13 | rmse = (gt - pred) ** 2
14 | rmse = np.sqrt(rmse.mean())
15 |
16 | rmse_log = (np.log(gt) - np.log(pred)) ** 2
17 | rmse_log = np.sqrt(rmse_log.mean())
18 |
19 | abs_rel = np.mean(np.abs(gt - pred) / gt)
20 |
21 | sq_rel = np.mean(((gt - pred) ** 2) / gt)
22 |
23 | return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3
24 |
25 |
26 | min_depth = 1e-3
27 | max_depth = 80
28 | # Get GT
29 | #gt_npy_dir = 'datasets/scene12_v2_400/val/A' ## Correct GT
30 | gt_npy_dir = 'datasets/scene12_v3_400/val/A' ## Correct GT
31 |
32 | test_npy_dir = 'test/npy'
33 | data_npy = [os.path.join(gt_npy_dir, name) for name in os.listdir(gt_npy_dir)];
34 | data_npy.sort()
35 | test_npy = [os.path.join(test_npy_dir, name) for name in os.listdir(test_npy_dir)];
36 | test_npy.sort()
37 | num_samples = len(data_npy)
38 | print num_samples
39 | if (len(data_npy)!=len(test_npy)):
40 | sys.exit("Check files")
41 |
42 |
43 | rms = np.zeros(num_samples, np.float32)
44 | log_rms = np.zeros(num_samples, np.float32)
45 | abs_rel = np.zeros(num_samples, np.float32)
46 | sq_rel = np.zeros(num_samples, np.float32)
47 | d1_all = np.zeros(num_samples, np.float32)
48 | a1 = np.zeros(num_samples, np.float32)
49 | a2 = np.zeros(num_samples, np.float32)
50 | a3 = np.zeros(num_samples, np.float32)
51 |
52 | for i in range(num_samples):
53 | gt_depth = np.load(data_npy[i]) / 1000
54 | pred_depth = np.load(test_npy[i]) / 1000
55 |
56 | pred_depth[pred_depth < min_depth] = min_depth
57 | pred_depth[pred_depth > max_depth] = max_depth
58 |
59 | mask = np.logical_and(gt_depth > min_depth, gt_depth < max_depth)
60 |
61 | abs_rel[i], sq_rel[i], rms[i], log_rms[i], a1[i], a2[i], a3[i] = compute_errors(gt_depth[mask], pred_depth[mask])
62 | print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format('abs_rel', 'sq_rel', 'rms', 'log_rms', 'a1', 'a2', 'a3'))
63 | print("{:10.4f}, {:10.4f}, {:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}".format(abs_rel.mean(), sq_rel.mean(), rms.mean(), log_rms.mean(), a1.mean(), a2.mean(), a3.mean()))
64 |
65 |
--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import subprocess
6 | import os
7 | import sys
8 | import time
9 | import argparse
10 |
11 |
12 | parser = argparse.ArgumentParser()
13 | parser.add_argument("--long", action="store_true")
14 | a = parser.parse_args()
15 |
16 |
17 | def run(cmd, image="affinelayer/pix2pix-tensorflow"):
18 | docker = "docker"
19 | if sys.platform.startswith("linux"):
20 | docker = "nvidia-docker"
21 |
22 | datapath = os.path.abspath("../data")
23 | prefix = [docker, "run", "--rm", "--volume", os.getcwd() + ":/prj", "--volume", datapath + ":/data", "--workdir", "/prj", "--env", "PYTHONUNBUFFERED=x", "--volume", "/tmp/cuda-cache:/cuda-cache", "--env", "CUDA_CACHE_PATH=/cuda-cache", image]
24 | args = prefix + cmd.split(" ")
25 | print(" ".join(args))
26 | subprocess.check_call(args)
27 |
28 |
29 | def main():
30 | start = time.time()
31 |
32 | if a.long:
33 | run("python pix2pix.py --mode train --output_dir test/facades_BtoA_train --max_epochs 200 --input_dir /data/official/facades/train --which_direction BtoA --seed 0")
34 | run("python pix2pix.py --mode test --output_dir test/facades_BtoA_test --input_dir /data/official/facades/val --seed 0 --checkpoint test/facades_BtoA_train")
35 |
36 | run("python pix2pix.py --mode train --output_dir test/color-lab_AtoB_train --max_epochs 10 --input_dir /data/color-lab/train --which_direction AtoB --seed 0 --lab_colorization")
37 | run("python pix2pix.py --mode test --output_dir test/color-lab_AtoB_test --input_dir /data/color-lab/val --seed 0 --checkpoint test/color-lab_AtoB_train")
38 | else:
39 | # training
40 | for direction in ["AtoB", "BtoA"]:
41 | for dataset in ["facades"]:
42 | name = dataset + "_" + direction
43 | run("python pix2pix.py --mode train --output_dir test/%s_train --max_steps 1 --input_dir /data/official/%s/train --which_direction %s --seed 0" % (name, dataset, direction))
44 | run("python pix2pix.py --mode test --output_dir test/%s_test --max_steps 1 --input_dir /data/official/%s/val --seed 0 --checkpoint test/%s_train" % (name, dataset, name))
45 |
46 | # test lab colorization
47 | dataset = "color-lab"
48 | name = dataset + "_" + direction
49 | run("python pix2pix.py --mode train --output_dir test/%s_train --max_steps 1 --input_dir /data/%s/train --which_direction %s --seed 0 --lab_colorization" % (name, dataset, direction))
50 | run("python pix2pix.py --mode test --output_dir test/%s_test --max_steps 1 --input_dir /data/%s/val --seed 0 --checkpoint test/%s_train" % (name, dataset, name))
51 |
52 | # using pretrained model (can't use pretrained models from tensorflow 0.12, so disabled for now)
53 | # for dataset, direction in [("facades", "BtoA")]:
54 | # name = dataset + "_" + direction
55 | # run("python pix2pix.py --mode test --output_dir test/%s_pretrained_test --input_dir /data/official/%s/val --max_steps 100 --which_direction %s --seed 0 --checkpoint /data/pretrained/%s" % (name, dataset, direction, name))
56 | # run("python pix2pix.py --mode export --output_dir test/%s_pretrained_export --checkpoint /data/pretrained/%s" % (name, name))
57 |
58 | # test python3
59 | run("python pix2pix.py --mode train --output_dir test/py3_facades_AtoB_train --max_steps 1 --input_dir /data/official/facades/train --which_direction AtoB --seed 0", image="tensorflow/tensorflow:1.0.0-gpu-py3")
60 | run("python pix2pix.py --mode test --output_dir test/py3_facades_AtoB_test --max_steps 1 --input_dir /data/official/facades/val --seed 0 --checkpoint test/py3_facades_AtoB_train", image="tensorflow/tensorflow:1.0.0-gpu-py3")
61 |
62 | print("elapsed", int(time.time() - start))
63 | # long: about 9 hours (linux)
64 |
65 |
66 | main()
67 |
--------------------------------------------------------------------------------
/train_val_test_split.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # In[103]:
5 |
6 | import os
7 | import numpy as np
8 | import sys
9 | import collections as coll
10 | def train_val_test_split(trainNum=400,valNum=100,testNum=100):
11 | datasetName = 'scene12_' + str(trainNum)
12 | savePath = os.path.join(os.getcwd(),'datasets',datasetName)
13 | loadAPath = os.path.join(os.getcwd(),'data','A') # containing all 3600 images
14 | loadBPath = os.path.join(os.getcwd(),'data','B') # containing all 3600 images
15 |
16 | if not os.path.exists(savePath):
17 | os.makedirs(savePath)
18 | print('trainNum={} valNum={} testNum={}'.format(trainNum,valNum,testNum))
19 | # load all numpy array names
20 | fileNames = [name for name in os.listdir(loadAPath)]
21 |
22 | # save random indices for training, validation and testing set
23 | trainIdx = []
24 | valIdx = []
25 | testIdx = []
26 |
27 | beg = 0
28 | end = len(fileNames)//6
29 | delta = end
30 | size_per_scene = (trainNum + valNum + testNum)//6
31 | print(size_per_scene)
32 | for i in range(6):
33 | print('beg:{} end: {}'.format(beg,end))
34 | allidx = np.random.choice(range(beg,end),size_per_scene,replace=False)
35 | if i < 5:
36 | trainIdx.extend(allidx[0:trainNum//6])
37 | valIdx.extend(allidx[trainNum//6:(trainNum+valNum)//6])
38 | testIdx.extend(allidx[(trainNum+valNum)//6:])
39 | else:
40 | trainCurrentSize = len(trainIdx)
41 | valCurrentSize = len(valIdx)
42 | testCurrentSize = len(testIdx)
43 |
44 | trainIdx.extend(allidx[0:trainNum - trainCurrentSize])
45 | valIdx.extend(allidx[trainNum -trainCurrentSize:(trainNum+valNum) - trainCurrentSize - valCurrentSize])
46 | testIdx.extend(allidx[(trainNum+valNum) - trainCurrentSize - valCurrentSize:])
47 | beg += delta
48 | end += delta
49 | #end = min(len(fileNames),end)
50 |
51 | print('size of train: {}'.format(len(trainIdx)))
52 | print('size of val: {}'.format(len(valIdx)))
53 | print('size of test: {}'.format(len(testIdx)))
54 |
55 | # shuffle training idx once more
56 | np.random.shuffle(trainIdx)
57 | # print(trainIdx)
58 | # save training set
59 | saveData([fileNames[i] for i in trainIdx],'train',savePath,loadAPath)
60 | saveData([fileNames[i] for i in trainIdx],'train',savePath,loadBPath)
61 |
62 | # save val set
63 | saveData([fileNames[i] for i in valIdx],'val',savePath,loadAPath)
64 | saveData([fileNames[i] for i in valIdx],'val',savePath,loadBPath)
65 |
66 | # save test set
67 | saveData([fileNames[i] for i in testIdx],'test',savePath,loadAPath)
68 | saveData([fileNames[i] for i in testIdx],'test',savePath,loadBPath)
69 |
70 |
71 |
72 | def saveData(data,name,savePath,loadPath):
73 | dataType = loadPath[-1]
74 | np.random.shuffle(data)
75 | for d in data:
76 | img = np.load(os.path.join(loadPath,d))
77 | path = os.path.join(savePath,name,dataType)
78 | if not os.path.exists(path):
79 | os.makedirs(path)
80 | np.save(os.path.join(path,d),img)
81 |
82 |
83 |
84 |
85 |
86 | if __name__ == '__main__':
87 | if len(sys.argv) == 4:
88 | trainNum = int(sys.argv[1])
89 | valNum = int(sys.argv[2])
90 | testNum = int(sys.argv[3])
91 | else:
92 | trainNum = 2400
93 | valNum = 600
94 | testNum = 600
95 |
96 | train_val_test_split(trainNum,valNum,testNum)
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import scipy.misc
4 | import numpy as np
5 |
6 | from model import pix2pix
7 | import tensorflow as tf
8 |
9 | parser = argparse.ArgumentParser(description='')
10 | parser.add_argument('--dataset_name', dest='dataset_name', default='facades', help='name of the dataset')
11 | parser.add_argument('--epoch', dest='epoch', type=int, default=200, help='# of epoch')
12 | parser.add_argument('--batch_size', dest='batch_size', type=int, default=1, help='# images in batch')
13 | parser.add_argument('--train_size', dest='train_size', type=int, default=1e8, help='# images used to train')
14 | parser.add_argument('--load_size', dest='load_size', type=int, default=286, help='scale images to this size')
15 | parser.add_argument('--fine_size', dest='fine_size', type=int, default=256, help='then crop to this size')
16 | parser.add_argument('--ngf', dest='ngf', type=int, default=64, help='# of gen filters in first conv layer')
17 | parser.add_argument('--ndf', dest='ndf', type=int, default=64, help='# of discri filters in first conv layer')
18 | parser.add_argument('--input_nc', dest='input_nc', type=int, default=3, help='# of input image channels')
19 | parser.add_argument('--output_nc', dest='output_nc', type=int, default=3, help='# of output image channels')
20 | parser.add_argument('--niter', dest='niter', type=int, default=200, help='# of iter at starting learning rate')
21 | parser.add_argument('--lr', dest='lr', type=float, default=0.0002, help='initial learning rate for adam')
22 | parser.add_argument('--beta1', dest='beta1', type=float, default=0.5, help='momentum term of adam')
23 | parser.add_argument('--flip', dest='flip', type=bool, default=True, help='if flip the images for data argumentation')
24 | parser.add_argument('--which_direction', dest='which_direction', default='AtoB', help='AtoB or BtoA')
25 | parser.add_argument('--phase', dest='phase', default='train', help='train, test')
26 | parser.add_argument('--save_epoch_freq', dest='save_epoch_freq', type=int, default=50, help='save a model every save_epoch_freq epochs (does not overwrite previously saved models)')
27 | parser.add_argument('--save_latest_freq', dest='save_latest_freq', type=int, default=5000, help='save the latest model every latest_freq sgd iterations (overwrites the previous latest model)')
28 | parser.add_argument('--print_freq', dest='print_freq', type=int, default=50, help='print the debug information every print_freq iterations')
29 | parser.add_argument('--continue_train', dest='continue_train', type=bool, default=False, help='if continue training, load the latest model: 1: true, 0: false')
30 | parser.add_argument('--serial_batches', dest='serial_batches', type=bool, default=False, help='f 1, takes images in order to make batches, otherwise takes them randomly')
31 | parser.add_argument('--serial_batch_iter', dest='serial_batch_iter', type=bool, default=True, help='iter into serial image list')
32 | parser.add_argument('--checkpoint_dir', dest='checkpoint_dir', default='./checkpoint', help='models are saved here')
33 | parser.add_argument('--sample_dir', dest='sample_dir', default='./sample', help='sample are saved here')
34 | parser.add_argument('--test_dir', dest='test_dir', default='./test', help='test sample are saved here')
35 | parser.add_argument('--L1_lambda', dest='L1_lambda', type=float, default=100.0, help='weight on L1 term in objective')
36 |
37 | args = parser.parse_args()
38 |
39 | # python tools/dockrun.py python main.py --phase train
40 |
41 | def main(_):
42 | if not os.path.exists(args.checkpoint_dir):
43 | os.makedirs(args.checkpoint_dir)
44 | if not os.path.exists(args.sample_dir):
45 | os.makedirs(args.sample_dir)
46 | if not os.path.exists(args.test_dir):
47 | os.makedirs(args.test_dir)
48 |
49 | with tf.Session() as sess:
50 | model = pix2pix(sess, image_size=args.fine_size, batch_size=args.batch_size,
51 | output_size=args.fine_size, dataset_name=args.dataset_name,
52 | checkpoint_dir=args.checkpoint_dir, sample_dir=args.sample_dir)
53 |
54 | if args.phase == 'train':
55 | model.train(args)
56 | else:
57 | model.test(args)
58 |
59 | if __name__ == '__main__':
60 | tf.app.run()
61 |
--------------------------------------------------------------------------------
/tools/dockrun.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import os
6 | import sys
7 | import argparse
8 |
9 | parser = argparse.ArgumentParser()
10 | parser.add_argument("--port", type=int, help="port to publish from the container")
11 |
12 | # from python 3.3 source
13 | # https://github.com/python/cpython/blob/master/Lib/shutil.py
14 | def which(cmd, mode=os.F_OK | os.X_OK, path=None):
15 | """Given a command, mode, and a PATH string, return the path which
16 | conforms to the given mode on the PATH, or None if there is no such
17 | file.
18 | `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
19 | of os.environ.get("PATH"), or can be overridden with a custom search
20 | path.
21 | """
22 | # Check that a given file can be accessed with the correct mode.
23 | # Additionally check that `file` is not a directory, as on Windows
24 | # directories pass the os.access check.
25 | def _access_check(fn, mode):
26 | return (os.path.exists(fn) and os.access(fn, mode)
27 | and not os.path.isdir(fn))
28 |
29 | # If we're given a path with a directory part, look it up directly rather
30 | # than referring to PATH directories. This includes checking relative to the
31 | # current directory, e.g. ./script
32 | if os.path.dirname(cmd):
33 | if _access_check(cmd, mode):
34 | return cmd
35 | return None
36 |
37 | if path is None:
38 | path = os.environ.get("PATH", os.defpath)
39 | if not path:
40 | return None
41 | path = path.split(os.pathsep)
42 |
43 | if sys.platform == "win32":
44 | # The current directory takes precedence on Windows.
45 | if not os.curdir in path:
46 | path.insert(0, os.curdir)
47 |
48 | # PATHEXT is necessary to check on Windows.
49 | pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
50 | # See if the given file matches any of the expected path extensions.
51 | # This will allow us to short circuit when given "python.exe".
52 | # If it does match, only test that one, otherwise we have to try
53 | # others.
54 | if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
55 | files = [cmd]
56 | else:
57 | files = [cmd + ext for ext in pathext]
58 | else:
59 | # On other platforms you don't have things like PATHEXT to tell you
60 | # what file suffixes are executable, so just pass on cmd as-is.
61 | files = [cmd]
62 |
63 | seen = set()
64 | for dir in path:
65 | normdir = os.path.normcase(dir)
66 | if not normdir in seen:
67 | seen.add(normdir)
68 | for thefile in files:
69 | name = os.path.join(dir, thefile)
70 | if _access_check(name, mode):
71 | return name
72 | return None
73 |
74 |
75 | def main():
76 | args = sys.argv[1:]
77 | i = 0
78 | while i < len(args):
79 | if not args[i].startswith("--"):
80 | break
81 | i += 2
82 |
83 | a = parser.parse_args(args[:i])
84 | cmd = args[i:]
85 |
86 | # check if nvidia-docker or docker are on path
87 | docker_path = which("nvidia-docker")
88 | if docker_path is None:
89 | docker_path = which("docker")
90 |
91 | if docker_path is None:
92 | raise Exception("docker not found")
93 |
94 | docker_args = [
95 | "--rm",
96 | "--volume",
97 | "/:/host",
98 | "--workdir",
99 | "/host" + os.getcwd(),
100 | "--env",
101 | "PYTHONUNBUFFERED=x",
102 | "--env",
103 | "CUDA_CACHE_PATH=/host/tmp/cuda-cache",
104 | ]
105 |
106 | if a.port is not None:
107 | docker_args += ["--publish", "%d:%d" % (a.port, a.port)]
108 |
109 | args = [docker_path, "run"] + docker_args + ["affinelayer/pix2pix-tensorflow:v2"] + cmd
110 |
111 | if not os.access("/var/run/docker.sock", os.R_OK):
112 | args = ["sudo"] + args
113 |
114 | os.execvp(args[0], args)
115 |
116 |
117 | main()
118 |
--------------------------------------------------------------------------------
/tools/tfimage.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import tensorflow as tf
6 | import os
7 |
8 |
9 | def create_op(func, **placeholders):
10 | op = func(**placeholders)
11 |
12 | def f(**kwargs):
13 | feed_dict = {}
14 | for argname, argvalue in kwargs.items():
15 | placeholder = placeholders[argname]
16 | feed_dict[placeholder] = argvalue
17 | return tf.get_default_session().run(op, feed_dict=feed_dict)
18 |
19 | return f
20 |
21 | downscale = create_op(
22 | func=tf.image.resize_images,
23 | images=tf.placeholder(tf.float32, [None, None, None]),
24 | size=tf.placeholder(tf.int32, [2]),
25 | method=tf.image.ResizeMethod.AREA,
26 | )
27 |
28 | upscale = create_op(
29 | func=tf.image.resize_images,
30 | images=tf.placeholder(tf.float32, [None, None, None]),
31 | size=tf.placeholder(tf.int32, [2]),
32 | method=tf.image.ResizeMethod.BICUBIC,
33 | )
34 |
35 | decode_jpeg = create_op(
36 | func=tf.image.decode_jpeg,
37 | contents=tf.placeholder(tf.string),
38 | )
39 |
40 | decode_png = create_op(
41 | func=tf.image.decode_png,
42 | contents=tf.placeholder(tf.string),
43 | )
44 |
45 | rgb_to_grayscale = create_op(
46 | func=tf.image.rgb_to_grayscale,
47 | images=tf.placeholder(tf.float32),
48 | )
49 |
50 | grayscale_to_rgb = create_op(
51 | func=tf.image.grayscale_to_rgb,
52 | images=tf.placeholder(tf.float32),
53 | )
54 |
55 | encode_jpeg = create_op(
56 | func=tf.image.encode_jpeg,
57 | image=tf.placeholder(tf.uint8),
58 | )
59 |
60 | encode_png = create_op(
61 | func=tf.image.encode_png,
62 | image=tf.placeholder(tf.uint8),
63 | )
64 |
65 | crop = create_op(
66 | func=tf.image.crop_to_bounding_box,
67 | image=tf.placeholder(tf.float32),
68 | offset_height=tf.placeholder(tf.int32, []),
69 | offset_width=tf.placeholder(tf.int32, []),
70 | target_height=tf.placeholder(tf.int32, []),
71 | target_width=tf.placeholder(tf.int32, []),
72 | )
73 |
74 | pad = create_op(
75 | func=tf.image.pad_to_bounding_box,
76 | image=tf.placeholder(tf.float32),
77 | offset_height=tf.placeholder(tf.int32, []),
78 | offset_width=tf.placeholder(tf.int32, []),
79 | target_height=tf.placeholder(tf.int32, []),
80 | target_width=tf.placeholder(tf.int32, []),
81 | )
82 |
83 | to_uint8 = create_op(
84 | func=tf.image.convert_image_dtype,
85 | image=tf.placeholder(tf.float32),
86 | dtype=tf.uint8,
87 | saturate=True,
88 | )
89 |
90 | to_float32 = create_op(
91 | func=tf.image.convert_image_dtype,
92 | image=tf.placeholder(tf.uint8),
93 | dtype=tf.float32,
94 | )
95 |
96 |
97 | def load(path):
98 | with open(path, "rb") as f:
99 | contents = f.read()
100 |
101 | _, ext = os.path.splitext(path.lower())
102 |
103 | if ext == ".jpg":
104 | image = decode_jpeg(contents=contents)
105 | elif ext == ".png":
106 | image = decode_png(contents=contents)
107 | else:
108 | raise Exception("invalid image suffix")
109 |
110 | return to_float32(image=image)
111 |
112 |
113 | def find(d):
114 | result = []
115 | for filename in os.listdir(d):
116 | _, ext = os.path.splitext(filename.lower())
117 | if ext == ".jpg" or ext == ".png":
118 | result.append(os.path.join(d, filename))
119 | result.sort()
120 | return result
121 |
122 |
123 | def save(image, path, replace=False):
124 | _, ext = os.path.splitext(path.lower())
125 | image = to_uint8(image=image)
126 | if ext == ".jpg":
127 | encoded = encode_jpeg(image=image)
128 | elif ext == ".png":
129 | encoded = encode_png(image=image)
130 | else:
131 | raise Exception("invalid image suffix")
132 |
133 | dirname = os.path.dirname(path)
134 | if dirname != "" and not os.path.exists(dirname):
135 | os.makedirs(dirname)
136 |
137 | if os.path.exists(path):
138 | if replace:
139 | os.remove(path)
140 | else:
141 | raise Exception("file already exists at " + path)
142 |
143 | with open(path, "wb") as f:
144 | f.write(encoded)
145 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Some codes from https://github.com/Newmu/dcgan_code
3 | """
4 | from __future__ import division
5 | import math
6 | import json
7 | import random
8 | import pprint
9 | import scipy.misc
10 | import numpy as np
11 | from time import gmtime, strftime
12 | import os
13 |
14 | pp = pprint.PrettyPrinter()
15 |
16 | get_stddev = lambda x, k_h, k_w: 1/math.sqrt(k_w*k_h*x.get_shape()[-1])
17 |
18 | # -----------------------------
19 | # new added functions for pix2pix
20 |
21 | def load_data(image_path, flip=True, is_test=False):
22 | img_A, img_B = load_image(image_path)
23 | img_A, img_B = preprocess_A_and_B(img_A, img_B, flip=flip, is_test=is_test)
24 |
25 | img_A = img_A/127.5 - 1.
26 | img_B = img_B/127.5 - 1.
27 |
28 | img_AB = np.concatenate((img_A, img_B), axis=2)
29 | # img_AB shape: (fine_size, fine_size, input_c_dim + output_c_dim)
30 | return img_AB
31 |
32 | def load_data2(image_path, idx):
33 | img_A, img_B = load_image2(image_path,idx)
34 | rgb_scale = 127.5
35 | depth_scale = 32767.5
36 | img_A = img_A/depth_scale - 1.
37 | img_B = img_B/rgb_scale - 1.
38 |
39 | img_AB = np.concatenate((img_A, img_B), axis=2)
40 | # img_AB shape: (fine_size, fine_size, input_c_dim + output_c_dim)
41 | #print('img_AB shape: {}'.format(img_AB.shape))
42 | return img_AB
43 |
44 | def load_image(image_path):
45 | input_img = imread(image_path)
46 | w = int(input_img.shape[1])
47 | w2 = int(w/2)
48 | img_A = input_img[:, 0:w2]
49 | img_B = input_img[:, w2:w]
50 |
51 | return img_A, img_B
52 |
53 | def load_image2(image_path,idx,isLSTM=True):
54 | # assume image_path = "./train/A", and have to read from 'A' and 'B'
55 | # A is depth, B is rgb
56 | A_path = os.path.join(image_path[:-1],'A',idx)
57 | B_path = os.path.join(image_path[:-1],'B',idx)
58 | if isLSTM:
59 | # input dimension: (5,256,256,3)
60 | # output dimension: (256,256,15)
61 | img_B_original = np.load(B_path)
62 | img_B = img_B_original[0]
63 | for i in range(1,img_B_original.shape[0]):
64 | img_B = np.concatenate((img_B,img_B_original[i]),axis=2)
65 | else:
66 | img_B = np.load(B_path)[0]
67 |
68 | img_A = np.load(A_path)
69 | img_A = img_A.reshape(img_A.shape[0],img_A.shape[1],1)
70 |
71 | return img_A, img_B
72 |
73 |
74 | def preprocess_A_and_B(img_A, img_B, load_size=286, fine_size=256, flip=True, is_test=False):
75 | if is_test:
76 | img_A = scipy.misc.imresize(img_A, [fine_size, fine_size])
77 | img_B = scipy.misc.imresize(img_B, [fine_size, fine_size])
78 | else:
79 | img_A = scipy.misc.imresize(img_A, [load_size, load_size])
80 | img_B = scipy.misc.imresize(img_B, [load_size, load_size])
81 |
82 | h1 = int(np.ceil(np.random.uniform(1e-2, load_size-fine_size)))
83 | w1 = int(np.ceil(np.random.uniform(1e-2, load_size-fine_size)))
84 | img_A = img_A[h1:h1+fine_size, w1:w1+fine_size]
85 | img_B = img_B[h1:h1+fine_size, w1:w1+fine_size]
86 |
87 | if flip and np.random.random() > 0.5:
88 | img_A = np.fliplr(img_A)
89 | img_B = np.fliplr(img_B)
90 |
91 | return img_A, img_B
92 |
93 | # -----------------------------
94 |
95 | def get_image(image_path, image_size, is_crop=True, resize_w=64, is_grayscale = False):
96 | return transform(imread(image_path, is_grayscale), image_size, is_crop, resize_w)
97 |
98 | def save_images(images, size, image_path):
99 | return imsave(inverse_transform(images), size, image_path)
100 |
101 | def imread(path, is_grayscale = False):
102 | if (is_grayscale):
103 | return scipy.misc.imread(path, flatten = True).astype(np.float)
104 | else:
105 | return scipy.misc.imread(path).astype(np.float)
106 |
107 | def merge_images(images, size):
108 | return inverse_transform(images)
109 |
110 | def merge(images, size):
111 | h, w = images.shape[1], images.shape[2]
112 | img = np.zeros((h * size[0], w * size[1], 3))
113 | for idx, image in enumerate(images):
114 | i = idx % size[1]
115 | j = idx // size[1]
116 | img[j*h:j*h+h, i*w:i*w+w, :] = image
117 |
118 | return img
119 |
120 | def imsave(images, size, path):
121 | return scipy.misc.imsave(path, merge(images, size)) # Warning: will rescale to [0,255]
122 | # return scipy.misc.toimage(merge(images, size),cmin=0,cmax=255).save(path) # no rescale but hard to see
123 |
124 | def transform(image, npx=64, is_crop=True, resize_w=64):
125 | # npx : # of pixels width/height of image
126 | if is_crop:
127 | cropped_image = center_crop(image, npx, resize_w=resize_w)
128 | else:
129 | cropped_image = image
130 | return np.array(cropped_image)/127.5 - 1.
131 |
132 | def inverse_transform(images):
133 | return (images+1.)/2.*255
134 |
135 |
136 |
--------------------------------------------------------------------------------
/ops.py:
--------------------------------------------------------------------------------
1 | import math
2 | import numpy as np
3 | import tensorflow as tf
4 |
5 | from tensorflow.python.framework import ops
6 |
7 | from utils import *
8 |
9 | class batch_norm(object):
10 | # h1 = lrelu(tf.contrib.layers.batch_norm(conv2d(h0, self.df_dim*2, name='d_h1_conv'),decay=0.9,updates_collections=None,epsilon=0.00001,scale=True,scope="d_h1_conv"))
11 | def __init__(self, epsilon=1e-5, momentum = 0.9, name="batch_norm"):
12 | with tf.variable_scope(name):
13 | self.epsilon = epsilon
14 | self.momentum = momentum
15 | self.name = name
16 |
17 | def __call__(self, x, train=True):
18 | return tf.contrib.layers.batch_norm(x, decay=self.momentum, updates_collections=None, epsilon=self.epsilon, scale=True, scope=self.name)
19 |
20 | def binary_cross_entropy(preds, targets, name=None):
21 | """Computes binary cross entropy given `preds`.
22 |
23 | For brevity, let `x = `, `z = targets`. The logistic loss is
24 |
25 | loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i]))
26 |
27 | Args:
28 | preds: A `Tensor` of type `float32` or `float64`.
29 | targets: A `Tensor` of the same type and shape as `preds`.
30 | """
31 | eps = 1e-12
32 | with ops.op_scope([preds, targets], name, "bce_loss") as name:
33 | preds = ops.convert_to_tensor(preds, name="preds")
34 | targets = ops.convert_to_tensor(targets, name="targets")
35 | return tf.reduce_mean(-(targets * tf.log(preds + eps) +
36 | (1. - targets) * tf.log(1. - preds + eps)))
37 |
38 | def conv_cond_concat(x, y):
39 | """Concatenate conditioning vector on feature map axis."""
40 | x_shapes = x.get_shape()
41 | y_shapes = y.get_shape()
42 | return tf.concat([x, y*tf.ones([x_shapes[0], x_shapes[1], x_shapes[2], y_shapes[3]])], 3)
43 |
44 | def conv2d(input_, output_dim,
45 | k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
46 | name="conv2d"):
47 | with tf.variable_scope(name):
48 | w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim],
49 | initializer=tf.truncated_normal_initializer(stddev=stddev))
50 | conv = tf.nn.conv2d(input_, w, strides=[1, d_h, d_w, 1], padding='SAME')
51 |
52 | biases = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0))
53 | conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape())
54 |
55 | return conv
56 |
57 | def deconv2d(input_, output_shape,
58 | k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
59 | name="deconv2d", with_w=False):
60 | with tf.variable_scope(name):
61 | # filter : [height, width, output_channels, in_channels]
62 | w = tf.get_variable('w', [k_h, k_w, output_shape[-1], input_.get_shape()[-1]],
63 | initializer=tf.random_normal_initializer(stddev=stddev))
64 |
65 | try:
66 | deconv = tf.nn.conv2d_transpose(input_, w, output_shape=output_shape,
67 | strides=[1, d_h, d_w, 1])
68 |
69 | # Support for verisons of TensorFlow before 0.7.0
70 | except AttributeError:
71 | deconv = tf.nn.deconv2d(input_, w, output_shape=output_shape,
72 | strides=[1, d_h, d_w, 1])
73 |
74 | biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0))
75 | deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())
76 |
77 | if with_w:
78 | return deconv, w, biases
79 | else:
80 | return deconv
81 |
82 | def lrelu(x, leak=0.2, name="lrelu"):
83 | return tf.maximum(x, leak*x)
84 |
85 | def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False):
86 | shape = input_.get_shape().as_list()
87 |
88 | with tf.variable_scope(scope or "Linear"):
89 | matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32,
90 | tf.random_normal_initializer(stddev=stddev))
91 | bias = tf.get_variable("bias", [output_size],
92 | initializer=tf.constant_initializer(bias_start))
93 | if with_w:
94 | return tf.matmul(input_, matrix) + bias, matrix, bias
95 | else:
96 | return tf.matmul(input_, matrix) + bias
97 |
98 | def lstm(input_, n_hidden, keep_prob, n_dim, name="lstm"):
99 | # input (1,5,512), output (1,5,n_hidden)
100 | with tf.variable_scope(name):
101 | lstm_cell = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(n_hidden), output_keep_prob=keep_prob)for _ in range(n_dim)]);
102 |
103 | outputs,states = tf.nn.dynamic_rnn(lstm_cell,input_,dtype=tf.float32)
104 | print('lstm_output shape: {}'.format(outputs.shape))
105 | #return outputs[:,-1,:]
106 | return outputs
107 |
--------------------------------------------------------------------------------
/tools/process.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 |
6 | import argparse
7 | import os
8 | import tempfile
9 | import subprocess
10 | import tensorflow as tf
11 | import numpy as np
12 | import tfimage as im
13 | import threading
14 | import time
15 | import multiprocessing
16 |
17 | edge_pool = None
18 |
19 |
20 | parser = argparse.ArgumentParser()
21 | parser.add_argument("--input_dir", required=True, help="path to folder containing images")
22 | parser.add_argument("--output_dir", required=True, help="output path")
23 | parser.add_argument("--operation", required=True, choices=["grayscale", "resize", "blank", "combine", "edges"])
24 | parser.add_argument("--workers", type=int, default=1, help="number of workers")
25 | # resize
26 | parser.add_argument("--pad", action="store_true", help="pad instead of crop for resize operation")
27 | parser.add_argument("--size", type=int, default=256, help="size to use for resize operation")
28 | # combine
29 | parser.add_argument("--b_dir", type=str, help="path to folder containing B images for combine operation")
30 | a = parser.parse_args()
31 |
32 |
33 | def resize(src):
34 | height, width, _ = src.shape
35 | dst = src
36 | if height != width:
37 | if a.pad:
38 | size = max(height, width)
39 | # pad to correct ratio
40 | oh = (size - height) // 2
41 | ow = (size - width) // 2
42 | dst = im.pad(image=dst, offset_height=oh, offset_width=ow, target_height=size, target_width=size)
43 | else:
44 | # crop to correct ratio
45 | size = min(height, width)
46 | oh = (height - size) // 2
47 | ow = (width - size) // 2
48 | dst = im.crop(image=dst, offset_height=oh, offset_width=ow, target_height=size, target_width=size)
49 |
50 | assert(dst.shape[0] == dst.shape[1])
51 |
52 | size, _, _ = dst.shape
53 | if size > a.size:
54 | dst = im.downscale(images=dst, size=[a.size, a.size])
55 | elif size < a.size:
56 | dst = im.upscale(images=dst, size=[a.size, a.size])
57 | return dst
58 |
59 |
60 | def blank(src):
61 | height, width, _ = src.shape
62 | if height != width:
63 | raise Exception("non-square image")
64 |
65 | image_size = width
66 | size = int(image_size * 0.3)
67 | offset = int(image_size / 2 - size / 2)
68 |
69 | dst = src
70 | dst[offset:offset + size,offset:offset + size,:] = np.ones([size, size, 3])
71 | return dst
72 |
73 |
74 | def combine(src, src_path):
75 | if a.b_dir is None:
76 | raise Exception("missing b_dir")
77 |
78 | # find corresponding file in b_dir, could have a different extension
79 | basename, _ = os.path.splitext(os.path.basename(src_path))
80 | for ext in [".png", ".jpg"]:
81 | sibling_path = os.path.join(a.b_dir, basename + ext)
82 | if os.path.exists(sibling_path):
83 | sibling = im.load(sibling_path)
84 | break
85 | else:
86 | raise Exception("could not find sibling image for " + src_path)
87 |
88 | # make sure that dimensions are correct
89 | height, width, _ = src.shape
90 | if height != sibling.shape[0] or width != sibling.shape[1]:
91 | raise Exception("differing sizes")
92 |
93 | # convert both images to RGB if necessary
94 | if src.shape[2] == 1:
95 | src = im.grayscale_to_rgb(images=src)
96 |
97 | if sibling.shape[2] == 1:
98 | sibling = im.grayscale_to_rgb(images=sibling)
99 |
100 | # remove alpha channel
101 | if src.shape[2] == 4:
102 | src = src[:,:,:3]
103 |
104 | if sibling.shape[2] == 4:
105 | sibling = sibling[:,:,:3]
106 |
107 | return np.concatenate([src, sibling], axis=1)
108 |
109 |
110 | def grayscale(src):
111 | return im.grayscale_to_rgb(images=im.rgb_to_grayscale(images=src))
112 |
113 |
114 | net = None
115 | def run_caffe(src):
116 | # lazy load caffe and create net
117 | global net
118 | if net is None:
119 | # don't require caffe unless we are doing edge detection
120 | os.environ["GLOG_minloglevel"] = "2" # disable logging from caffe
121 | import caffe
122 | # using this requires using the docker image or assembling a bunch of dependencies
123 | # and then changing these hardcoded paths
124 | net = caffe.Net("/opt/caffe/examples/hed/deploy.prototxt", "/opt/caffe/hed_pretrained_bsds.caffemodel", caffe.TEST)
125 |
126 | net.blobs["data"].reshape(1, *src.shape)
127 | net.blobs["data"].data[...] = src
128 | net.forward()
129 | return net.blobs["sigmoid-fuse"].data[0][0,:,:]
130 |
131 |
132 | def edges(src):
133 | # based on https://github.com/phillipi/pix2pix/blob/master/scripts/edges/batch_hed.py
134 | # and https://github.com/phillipi/pix2pix/blob/master/scripts/edges/PostprocessHED.m
135 | import scipy.io
136 | src = src * 255
137 | border = 128 # put a padding around images since edge detection seems to detect edge of image
138 | src = src[:,:,:3] # remove alpha channel if present
139 | src = np.pad(src, ((border, border), (border, border), (0,0)), "reflect")
140 | src = src[:,:,::-1]
141 | src -= np.array((104.00698793,116.66876762,122.67891434))
142 | src = src.transpose((2, 0, 1))
143 |
144 | # [height, width, channels] => [batch, channel, height, width]
145 | fuse = edge_pool.apply(run_caffe, [src])
146 | fuse = fuse[border:-border, border:-border]
147 |
148 | with tempfile.NamedTemporaryFile(suffix=".png") as png_file, tempfile.NamedTemporaryFile(suffix=".mat") as mat_file:
149 | scipy.io.savemat(mat_file.name, {"input": fuse})
150 |
151 | octave_code = r"""
152 | E = 1-load(input_path).input;
153 | E = imresize(E, [image_width,image_width]);
154 | E = 1 - E;
155 | E = single(E);
156 | [Ox, Oy] = gradient(convTri(E, 4), 1);
157 | [Oxx, ~] = gradient(Ox, 1);
158 | [Oxy, Oyy] = gradient(Oy, 1);
159 | O = mod(atan(Oyy .* sign(-Oxy) ./ (Oxx + 1e-5)), pi);
160 | E = edgesNmsMex(E, O, 1, 5, 1.01, 1);
161 | E = double(E >= max(eps, threshold));
162 | E = bwmorph(E, 'thin', inf);
163 | E = bwareaopen(E, small_edge);
164 | E = 1 - E;
165 | E = uint8(E * 255);
166 | imwrite(E, output_path);
167 | """
168 |
169 | config = dict(
170 | input_path="'%s'" % mat_file.name,
171 | output_path="'%s'" % png_file.name,
172 | image_width=256,
173 | threshold=25.0/255.0,
174 | small_edge=5,
175 | )
176 |
177 | args = ["octave"]
178 | for k, v in config.items():
179 | args.extend(["--eval", "%s=%s;" % (k, v)])
180 |
181 | args.extend(["--eval", octave_code])
182 | try:
183 | subprocess.check_output(args, stderr=subprocess.STDOUT)
184 | except subprocess.CalledProcessError as e:
185 | print("octave failed")
186 | print("returncode:", e.returncode)
187 | print("output:", e.output)
188 | raise
189 | return im.load(png_file.name)
190 |
191 |
192 | def process(src_path, dst_path):
193 | src = im.load(src_path)
194 |
195 | if a.operation == "grayscale":
196 | dst = grayscale(src)
197 | elif a.operation == "resize":
198 | dst = resize(src)
199 | elif a.operation == "blank":
200 | dst = blank(src)
201 | elif a.operation == "combine":
202 | dst = combine(src, src_path)
203 | elif a.operation == "edges":
204 | dst = edges(src)
205 | else:
206 | raise Exception("invalid operation")
207 |
208 | im.save(dst, dst_path)
209 |
210 |
211 | complete_lock = threading.Lock()
212 | start = None
213 | num_complete = 0
214 | total = 0
215 |
216 | def complete():
217 | global num_complete, rate, last_complete
218 |
219 | with complete_lock:
220 | num_complete += 1
221 | now = time.time()
222 | elapsed = now - start
223 | rate = num_complete / elapsed
224 | if rate > 0:
225 | remaining = (total - num_complete) / rate
226 | else:
227 | remaining = 0
228 |
229 | print("%d/%d complete %0.2f images/sec %dm%ds elapsed %dm%ds remaining" % (num_complete, total, rate, elapsed // 60, elapsed % 60, remaining // 60, remaining % 60))
230 |
231 | last_complete = now
232 |
233 |
234 | def main():
235 | if not os.path.exists(a.output_dir):
236 | os.makedirs(a.output_dir)
237 |
238 | src_paths = []
239 | dst_paths = []
240 |
241 | skipped = 0
242 | for src_path in im.find(a.input_dir):
243 | name, _ = os.path.splitext(os.path.basename(src_path))
244 | dst_path = os.path.join(a.output_dir, name + ".png")
245 | if os.path.exists(dst_path):
246 | skipped += 1
247 | else:
248 | src_paths.append(src_path)
249 | dst_paths.append(dst_path)
250 |
251 | print("skipping %d files that already exist" % skipped)
252 |
253 | global total
254 | total = len(src_paths)
255 |
256 | print("processing %d files" % total)
257 |
258 | global start
259 | start = time.time()
260 |
261 | if a.operation == "edges":
262 | # use a multiprocessing pool for this operation so it can use multiple CPUs
263 | # create the pool before we launch processing threads
264 | global edge_pool
265 | edge_pool = multiprocessing.Pool(a.workers)
266 |
267 | if a.workers == 1:
268 | with tf.Session() as sess:
269 | for src_path, dst_path in zip(src_paths, dst_paths):
270 | process(src_path, dst_path)
271 | complete()
272 | else:
273 | queue = tf.train.input_producer(zip(src_paths, dst_paths), shuffle=False, num_epochs=1)
274 | dequeue_op = queue.dequeue()
275 |
276 | def worker(coord):
277 | with sess.as_default():
278 | while not coord.should_stop():
279 | try:
280 | src_path, dst_path = sess.run(dequeue_op)
281 | except tf.errors.OutOfRangeError:
282 | coord.request_stop()
283 | break
284 |
285 | process(src_path, dst_path)
286 | complete()
287 |
288 | # init epoch counter for the queue
289 | local_init_op = tf.local_variables_initializer()
290 | with tf.Session() as sess:
291 | sess.run(local_init_op)
292 |
293 | coord = tf.train.Coordinator()
294 | threads = tf.train.start_queue_runners(coord=coord)
295 | for i in range(a.workers):
296 | t = threading.Thread(target=worker, args=(coord,))
297 | t.start()
298 | threads.append(t)
299 |
300 | try:
301 | coord.join(threads)
302 | except KeyboardInterrupt:
303 | coord.request_stop()
304 | coord.join(threads)
305 |
306 | main()
307 |
--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import os
3 | import time
4 | from glob import glob
5 | import tensorflow as tf
6 | import numpy as np
7 | from six.moves import xrange
8 | import shutil
9 |
10 | from ops import *
11 | from utils import *
12 |
13 | os.environ["CUDA_VISIBLE_DEVICES"]="-1"
14 |
15 | class pix2pix(object):
16 | def __init__(self, sess, image_size=256,
17 | batch_size=1, sample_size=1, output_size=256,
18 | gf_dim=64, df_dim=64, L1_lambda=100,
19 | input_c_dim=15, output_c_dim=1, dataset_name='facades',
20 | checkpoint_dir=None, sample_dir=None):
21 | """
22 |
23 | Args:
24 | sess: TensorFlow session
25 | batch_size: The size of batch. Should be specified before training.
26 | output_size: (optional) The resolution in pixels of the images. [256]
27 | gf_dim: (optional) Dimension of gen filters in first conv layer. [64]
28 | df_dim: (optional) Dimension of discrim filters in first conv layer. [64]
29 | input_c_dim: (optional) Dimension of input image color. For grayscale input, set to 1. [3]
30 | output_c_dim: (optional) Dimension of output image color. For grayscale input, set to 1. [3]
31 | """
32 | self.sess = sess
33 | self.is_grayscale = (input_c_dim == 1)
34 | self.batch_size = batch_size
35 | self.image_size = image_size
36 | self.sample_size = sample_size
37 | self.output_size = output_size
38 |
39 | self.gf_dim = gf_dim
40 | self.df_dim = df_dim
41 |
42 | self.input_c_dim = input_c_dim
43 | self.output_c_dim = output_c_dim
44 |
45 | self.L1_lambda = L1_lambda
46 |
47 | # lstm variables
48 | self.n_hidden_lstm = 1024
49 | self.num_layer_lstm = 2
50 | self.keep_prob_lstm = 0.5
51 |
52 | # batch normalization : deals with poor initialization helps gradient flow
53 | self.d_bn1 = batch_norm(name='d_bn1')
54 | self.d_bn2 = batch_norm(name='d_bn2')
55 | self.d_bn3 = batch_norm(name='d_bn3')
56 |
57 | self.g_bn_e2 = batch_norm(name='g_bn_e2')
58 | self.g_bn_e3 = batch_norm(name='g_bn_e3')
59 | self.g_bn_e4 = batch_norm(name='g_bn_e4')
60 | self.g_bn_e5 = batch_norm(name='g_bn_e5')
61 | self.g_bn_e6 = batch_norm(name='g_bn_e6')
62 | self.g_bn_e7 = batch_norm(name='g_bn_e7')
63 | self.g_bn_e8 = batch_norm(name='g_bn_e8')
64 |
65 | self.g_bn_d1 = batch_norm(name='g_bn_d1')
66 | self.g_bn_d2 = batch_norm(name='g_bn_d2')
67 | self.g_bn_d3 = batch_norm(name='g_bn_d3')
68 | self.g_bn_d4 = batch_norm(name='g_bn_d4')
69 | self.g_bn_d5 = batch_norm(name='g_bn_d5')
70 | self.g_bn_d6 = batch_norm(name='g_bn_d6')
71 | self.g_bn_d7 = batch_norm(name='g_bn_d7')
72 |
73 | self.dataset_name = dataset_name
74 | self.checkpoint_dir = checkpoint_dir
75 | self.build_model()
76 |
77 | def build_model(self):
78 | self.real_data = tf.placeholder(tf.float32,
79 | [self.batch_size, self.image_size, self.image_size,
80 | self.input_c_dim + self.output_c_dim],
81 | name='real_A_and_B_images')
82 | #self.real_B = self.real_data[:, :, :, :self.input_c_dim]
83 | #self.real_A = self.real_data[:, :, :, self.input_c_dim:self.input_c_dim + self.output_c_dim]
84 | self.real_B = self.real_data[:, :, :, :self.output_c_dim]
85 | self.real_A = self.real_data[:, :, :, self.output_c_dim:self.input_c_dim + self.output_c_dim]
86 | mask = tf.cast(self.real_B>-1,tf.int32) # find all valid pixels
87 | #print(self.real_B)
88 | self.fake_B = self.generator(self.real_A)
89 | print(self.fake_B)
90 | self.real_AB = tf.concat([self.real_A, self.real_B], 3)
91 | print(self.real_AB)
92 | self.fake_AB = tf.concat([self.real_A, self.fake_B], 3)
93 | print(self.fake_AB)
94 | self.D, self.D_logits = self.discriminator(self.real_AB, reuse=False)
95 | self.D_, self.D_logits_ = self.discriminator(self.fake_AB, reuse=True)
96 | bad_diff, good_diff = tf.dynamic_partition(self.real_B - self.fake_B,mask,2)
97 | self.fake_B_sample = self.sampler(self.real_A)
98 |
99 | self.d_sum = tf.summary.histogram("d", self.D)
100 | self.d__sum = tf.summary.histogram("d_", self.D_)
101 | self.fake_B_sum = tf.summary.image("fake_B", self.fake_B)
102 |
103 | self.d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_logits, labels=tf.scalar_mul(0.9,tf.ones_like(self.D))))
104 | self.d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_logits_, labels=tf.zeros_like(self.D_)))
105 | self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_logits_, labels=tf.ones_like(self.D_))) \
106 | + self.L1_lambda * tf.reduce_mean(tf.abs(good_diff))
107 |
108 | self.d_loss_real_sum = tf.summary.scalar("d_loss_real", self.d_loss_real)
109 | self.d_loss_fake_sum = tf.summary.scalar("d_loss_fake", self.d_loss_fake)
110 |
111 | self.d_loss = self.d_loss_real + self.d_loss_fake
112 |
113 | self.g_loss_sum = tf.summary.scalar("g_loss", self.g_loss)
114 | self.d_loss_sum = tf.summary.scalar("d_loss", self.d_loss)
115 |
116 | t_vars = tf.trainable_variables()
117 |
118 | self.d_vars = [var for var in t_vars if 'd_' in var.name]
119 | self.g_vars = [var for var in t_vars if 'g_' in var.name]
120 |
121 | self.saver = tf.train.Saver(max_to_keep=10)
122 |
123 |
124 | def load_random_samples(self):
125 | #data = np.random.choice(glob('./datasets/{}/val/*.jpg'.format(self.dataset_name)), self.batch_size)
126 | valPath = os.path.join(os.getcwd(),'datasets',str(self.dataset_name),'val','A')
127 | data = np.random.choice([name for name in os.listdir(valPath)], self.batch_size)
128 |
129 | sample = [load_data2(valPath,sample_file) for sample_file in data]
130 |
131 | if (self.is_grayscale):
132 | sample_images = np.array(sample).astype(np.float32)[:, :, :, None]
133 | else:
134 | sample_images = np.array(sample).astype(np.float32)
135 | return sample_images
136 |
137 | def sample_model(self, sample_dir, epoch, idx):
138 | sample_images = self.load_random_samples()
139 | samples, d_loss, g_loss = self.sess.run(
140 | [self.fake_B_sample, self.d_loss, self.g_loss],
141 | feed_dict={self.real_data: sample_images}
142 | )
143 | save_images(samples, [self.batch_size, 1],
144 | './{}/train_{:02d}_{:04d}.png'.format(sample_dir, epoch, idx))
145 | print("[Sample] d_loss: {:.8f}, g_loss: {:.8f}".format(d_loss, g_loss))
146 |
147 | def train(self, args):
148 | """Train pix2pix"""
149 | d_optim = tf.train.AdamOptimizer(args.lr, beta1=args.beta1) \
150 | .minimize(self.d_loss, var_list=self.d_vars)
151 | g_optim = tf.train.AdamOptimizer(args.lr, beta1=args.beta1) \
152 | .minimize(self.g_loss, var_list=self.g_vars)
153 |
154 | init_op = tf.global_variables_initializer()
155 | self.sess.run(init_op)
156 |
157 | self.g_sum = tf.summary.merge([self.d__sum,
158 | self.fake_B_sum, self.d_loss_fake_sum, self.g_loss_sum])
159 | self.d_sum = tf.summary.merge([self.d_sum, self.d_loss_real_sum, self.d_loss_sum])
160 | self.writer = tf.summary.FileWriter("./logs", self.sess.graph)
161 |
162 | counter = 1
163 | start_time = time.time()
164 |
165 | if self.load(self.checkpoint_dir):
166 | print(" [*] Load SUCCESS")
167 | else:
168 | print(" [!] Load failed...")
169 |
170 | for epoch in xrange(args.epoch):
171 | #data = glob('./datasets/{}/train/*.jpg'.format(self.dataset_name))
172 | trainPath = os.path.join(os.getcwd(),'datasets',str(self.dataset_name),'train','A') # choose either A or B is fine
173 | data = [name for name in os.listdir(trainPath)]
174 | #np.random.shuffle(data)
175 | batch_idxs = min(len(data), args.train_size) // self.batch_size
176 |
177 | for idx in xrange(0, batch_idxs):
178 | batch_files = data[idx*self.batch_size:(idx+1)*self.batch_size]
179 | batch = [load_data2(trainPath,batch_file) for batch_file in batch_files]
180 | if (self.is_grayscale):
181 | batch_images = np.array(batch).astype(np.float32)[:, :, :, None]
182 | else:
183 | batch_images = np.array(batch).astype(np.float32)
184 |
185 | # Update D network
186 | _, summary_str = self.sess.run([d_optim, self.d_sum],
187 | feed_dict={ self.real_data: batch_images })
188 | self.writer.add_summary(summary_str, counter)
189 |
190 | # Update G network
191 | _, summary_str = self.sess.run([g_optim, self.g_sum],
192 | feed_dict={ self.real_data: batch_images })
193 | self.writer.add_summary(summary_str, counter)
194 |
195 | # Run g_optim twice to make sure that d_loss does not go to zero (different from paper)
196 | _, summary_str = self.sess.run([g_optim, self.g_sum],
197 | feed_dict={ self.real_data: batch_images })
198 | self.writer.add_summary(summary_str, counter)
199 |
200 | errD_fake = self.d_loss_fake.eval({self.real_data: batch_images})
201 | errD_real = self.d_loss_real.eval({self.real_data: batch_images})
202 | errG = self.g_loss.eval({self.real_data: batch_images})
203 |
204 | counter += 1
205 | print("Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" \
206 | % (epoch, idx, batch_idxs,
207 | time.time() - start_time, errD_fake+errD_real, errG))
208 |
209 | if np.mod(counter, 100) == 1:
210 | self.sample_model(args.sample_dir, epoch, idx)
211 |
212 | if np.mod(counter, 5000) == 2:
213 | self.save(args.checkpoint_dir, counter)
214 |
215 | def discriminator(self, image, y=None, reuse=False):
216 |
217 | with tf.variable_scope("discriminator") as scope:
218 |
219 | # image is 256 x 256 x (input_c_dim + output_c_dim)
220 | if reuse:
221 | tf.get_variable_scope().reuse_variables()
222 | else:
223 | assert tf.get_variable_scope().reuse == False
224 |
225 | h0 = lrelu(conv2d(image, self.df_dim, name='d_h0_conv'))
226 | # h0 is (128 x 128 x self.df_dim)
227 | h1 = lrelu(self.d_bn1(conv2d(h0, self.df_dim*2, name='d_h1_conv')))
228 | # h1 is (64 x 64 x self.df_dim*2)
229 | h2 = lrelu(self.d_bn2(conv2d(h1, self.df_dim*4, name='d_h2_conv')))
230 | # h2 is (32x 32 x self.df_dim*4)
231 | h3 = lrelu(self.d_bn3(conv2d(h2, self.df_dim*8, d_h=1, d_w=1, name='d_h3_conv')))
232 | # h3 is (16 x 16 x self.df_dim*8)
233 | h4 = linear(tf.reshape(h3, [self.batch_size, -1]), 1, 'd_h3_lin')
234 |
235 | return tf.nn.sigmoid(h4), h4
236 |
237 | def generator(self, image, y=None):
238 | with tf.variable_scope("generator") as scope:
239 |
240 | s = self.output_size
241 | s2, s4, s8, s16, s32, s64, s128 = int(s/2), int(s/4), int(s/8), int(s/16), int(s/32), int(s/64), int(s/128)
242 |
243 | # LSTM encoder
244 | reuse = False
245 | lstm_input = []
246 | N = image.shape[-1]//3
247 | with tf.variable_scope('LSTM_scope'):
248 | for i in range(N):
249 | img = image[:,:,:,i*3:(i+1)*3]
250 | if reuse:
251 | tf.get_variable_scope().reuse_variables()
252 |
253 | print('img_unstacked shape: {}'.format(image.shape))
254 | # image is (256 x 256 x input_c_dim)
255 | e1 = conv2d(img, self.gf_dim, name='g_e1_conv')
256 | # e1 is (128 x 128 x self.gf_dim)
257 | e2 = self.g_bn_e2(conv2d(lrelu(e1), self.gf_dim*2, name='g_e2_conv'))
258 | # e2 is (64 x 64 x self.gf_dim*2)
259 | e3 = self.g_bn_e3(conv2d(lrelu(e2), self.gf_dim*4, name='g_e3_conv'))
260 | # e3 is (32 x 32 x self.gf_dim*4)
261 | e4 = self.g_bn_e4(conv2d(lrelu(e3), self.gf_dim*8, name='g_e4_conv'))
262 | # e4 is (16 x 16 x self.gf_dim*8)
263 | e5 = self.g_bn_e5(conv2d(lrelu(e4), self.gf_dim*8, name='g_e5_conv'))
264 | # e5 is (8 x 8 x self.gf_dim*8)
265 | e6 = self.g_bn_e6(conv2d(lrelu(e5), self.gf_dim*8, name='g_e6_conv'))
266 | # e6 is (4 x 4 x self.gf_dim*8)
267 | e7 = self.g_bn_e7(conv2d(lrelu(e6), self.gf_dim*8, name='g_e7_conv'))
268 | # e7 is (2 x 2 x self.gf_dim*8)
269 | e8 = self.g_bn_e8(conv2d(lrelu(e7), self.gf_dim*8, name='g_e8_conv'))
270 | # e8 is (1 x 1 x self.gf_dim*8)
271 |
272 | if i==0 :
273 | e1_0 = tf.identity(e1)
274 | e2_0 = tf.identity(e2)
275 | e3_0 = tf.identity(e3)
276 | e4_0 = tf.identity(e4)
277 | e5_0 = tf.identity(e5)
278 | e6_0 = tf.identity(e6)
279 | e7_0 = tf.identity(e7)
280 | e8_0 = tf.identity(e8)
281 | lstm_input.append(e8)
282 | reuse = True # reuse variable after first iteration
283 |
284 | # input to lstm cell
285 | # todo: change shape of lstm_input to (1,5,512)?
286 | # initial: [(1,1,1,512)]*5 -> concate->(1,5,512)
287 | lstm_input_final = tf.reshape(lstm_input[-1],[1,1,512])
288 | print('lstm_input_final before: {}'.format(lstm_input_final.shape))
289 | for i in range(1,N):
290 | lstm_input_final = tf.concat((lstm_input_final,tf.reshape(lstm_input[i],[1,1,512])),axis =1)
291 | print('lstm_input_final after: {}'.format(lstm_input_final.shape))
292 |
293 | output_lstm = lstm(lstm_input_final,self.n_hidden_lstm,self.keep_prob_lstm, self.num_layer_lstm, name='g_lstm')
294 | output = output_lstm[:, 0,:] + output_lstm[:, 1, :] + output_lstm[:, 2, :] + output_lstm[:, 3, :] + output_lstm[:, 4, :]
295 | print('lstm_output sum up: {}'.format(output.shape))
296 |
297 | w_output = tf.Variable(tf.truncated_normal([self.n_hidden_lstm,self.gf_dim*8]),name="g_w")
298 | b_output = tf.Variable(tf.zeros(self.gf_dim*8),name="g_b")
299 | output = tf.matmul(output,w_output) + b_output
300 | print('output after matmul: {}'.format(output))
301 |
302 | output = tf.reshape(output,[-1,1,1,512])
303 | print('output after reshape: {}'.format(output))
304 |
305 | self.d1, self.d1_w, self.d1_b = deconv2d(tf.nn.relu(output),
306 | [self.batch_size, s128, s128, self.gf_dim*8], name='g_d1', with_w=True)
307 | d1 = tf.nn.dropout(self.g_bn_d1(self.d1), 0.5)
308 | d1 = tf.concat([d1, e7_0], 3)
309 | # d1 is (2 x 2 x self.gf_dim*8*2)
310 |
311 | self.d2, self.d2_w, self.d2_b = deconv2d(tf.nn.relu(d1),
312 | [self.batch_size, s64, s64, self.gf_dim*8], name='g_d2', with_w=True)
313 | d2 = tf.nn.dropout(self.g_bn_d2(self.d2), 0.5)
314 | d2 = tf.concat([d2, e6_0], 3)
315 | # d2 is (4 x 4 x self.gf_dim*8*2)
316 |
317 | self.d3, self.d3_w, self.d3_b = deconv2d(tf.nn.relu(d2),
318 | [self.batch_size, s32, s32, self.gf_dim*8], name='g_d3', with_w=True)
319 | d3 = tf.nn.dropout(self.g_bn_d3(self.d3), 0.5)
320 | d3 = tf.concat([d3, e5_0], 3)
321 | # d3 is (8 x 8 x self.gf_dim*8*2)
322 |
323 | self.d4, self.d4_w, self.d4_b = deconv2d(tf.nn.relu(d3),
324 | [self.batch_size, s16, s16, self.gf_dim*8], name='g_d4', with_w=True)
325 | d4 = self.g_bn_d4(self.d4)
326 | d4 = tf.concat([d4, e4_0], 3)
327 | # d4 is (16 x 16 x self.gf_dim*8*2)
328 |
329 | self.d5, self.d5_w, self.d5_b = deconv2d(tf.nn.relu(d4),
330 | [self.batch_size, s8, s8, self.gf_dim*4], name='g_d5', with_w=True)
331 | d5 = self.g_bn_d5(self.d5)
332 | d5 = tf.concat([d5, e3_0], 3)
333 | # d5 is (32 x 32 x self.gf_dim*4*2)
334 |
335 | self.d6, self.d6_w, self.d6_b = deconv2d(tf.nn.relu(d5),
336 | [self.batch_size, s4, s4, self.gf_dim*2], name='g_d6', with_w=True)
337 | d6 = self.g_bn_d6(self.d6)
338 | d6 = tf.concat([d6, e2_0], 3)
339 | # d6 is (64 x 64 x self.gf_dim*2*2)
340 |
341 | self.d7, self.d7_w, self.d7_b = deconv2d(tf.nn.relu(d6),
342 | [self.batch_size, s2, s2, self.gf_dim], name='g_d7', with_w=True)
343 | d7 = self.g_bn_d7(self.d7)
344 | d7 = tf.concat([d7, e1_0], 3)
345 | # d7 is (128 x 128 x self.gf_dim*1*2)
346 |
347 | self.d8, self.d8_w, self.d8_b = deconv2d(tf.nn.relu(d7),
348 | [self.batch_size, s, s, self.output_c_dim], name='g_d8', with_w=True)
349 | # d8 is (256 x 256 x output_c_dim)
350 |
351 | return tf.nn.tanh(self.d8)
352 |
353 | def sampler(self, image, y=None):
354 |
355 | with tf.variable_scope("generator") as scope:
356 | scope.reuse_variables()
357 |
358 | s = self.output_size
359 | s2, s4, s8, s16, s32, s64, s128 = int(s/2), int(s/4), int(s/8), int(s/16), int(s/32), int(s/64), int(s/128)
360 |
361 | # LSTM encoder
362 | reuse = False
363 | lstm_input = []
364 | N = image.shape[-1]//3
365 |
366 | with tf.variable_scope('LSTM_scope'):
367 | for i in range(N):
368 | img = image[:,:,:,i*3:(i+1)*3]
369 | if reuse:
370 | tf.get_variable_scope().reuse_variables()
371 |
372 | #print('img_unstacked shape: {}'.format(image.shape))
373 | # image is (256 x 256 x input_c_dim)
374 | e1 = conv2d(img, self.gf_dim, name='g_e1_conv')
375 | # e1 is (128 x 128 x self.gf_dim)
376 | e2 = self.g_bn_e2(conv2d(lrelu(e1), self.gf_dim*2, name='g_e2_conv'))
377 | # e2 is (64 x 64 x self.gf_dim*2)
378 | e3 = self.g_bn_e3(conv2d(lrelu(e2), self.gf_dim*4, name='g_e3_conv'))
379 | # e3 is (32 x 32 x self.gf_dim*4)
380 | e4 = self.g_bn_e4(conv2d(lrelu(e3), self.gf_dim*8, name='g_e4_conv'))
381 | # e4 is (16 x 16 x self.gf_dim*8)
382 | e5 = self.g_bn_e5(conv2d(lrelu(e4), self.gf_dim*8, name='g_e5_conv'))
383 | # e5 is (8 x 8 x self.gf_dim*8)
384 | e6 = self.g_bn_e6(conv2d(lrelu(e5), self.gf_dim*8, name='g_e6_conv'))
385 | # e6 is (4 x 4 x self.gf_dim*8)
386 | e7 = self.g_bn_e7(conv2d(lrelu(e6), self.gf_dim*8, name='g_e7_conv'))
387 | # e7 is (2 x 2 x self.gf_dim*8)
388 | e8 = self.g_bn_e8(conv2d(lrelu(e7), self.gf_dim*8, name='g_e8_conv'))
389 | # e8 is (1 x 1 x self.gf_dim*8)
390 | if i==0 :
391 | e1_0 = tf.identity(e1)
392 | e2_0 = tf.identity(e2)
393 | e3_0 = tf.identity(e3)
394 | e4_0 = tf.identity(e4)
395 | e5_0 = tf.identity(e5)
396 | e6_0 = tf.identity(e6)
397 | e7_0 = tf.identity(e7)
398 | e8_0 = tf.identity(e8)
399 | lstm_input.append(e8)
400 | reuse = True # reuse variable after first iteration
401 |
402 | # input to lstm cell
403 | # todo: change shape of lstm_input to (1,5,512)?
404 | # initial: [(1,1,1,512)]*5 -> concate->(1,5,512)
405 | lstm_input_final = tf.reshape(lstm_input[-1],[1,1,512])
406 | #print('lstm_input_final before: {}'.format(lstm_input_final.shape))
407 | for i in range(1,N):
408 | lstm_input_final = tf.concat((lstm_input_final,tf.reshape(lstm_input[i],[1,1,512])),axis =1)
409 | #print('lstm_input_final after: {}'.format(lstm_input_final.shape))
410 |
411 | output_lstm = lstm(lstm_input_final,self.n_hidden_lstm,self.keep_prob_lstm, self.num_layer_lstm, name='g_lstm')
412 | output = output_lstm[:, 0,:] + output_lstm[:, 1, :] + output_lstm[:, 2, :] + output_lstm[:, 3, :] + output_lstm[:, 4, :]
413 | print('lstm_output sum up: {}'.format(output.shape))
414 | print('creating cell LSTM in sampler')
415 |
416 | w_output = tf.Variable(tf.truncated_normal([self.n_hidden_lstm, self.gf_dim * 8]), name="g_w")
417 | b_output = tf.Variable(tf.zeros(self.gf_dim * 8), name="g_b")
418 | output = tf.matmul(output, w_output) + b_output
419 | print('output after lstm matmul: {}'.format(output))
420 |
421 | output = tf.reshape(output, [-1, 1, 1, 512])
422 | print('output after reshape: {}'.format(output))
423 |
424 | self.d1, self.d1_w, self.d1_b = deconv2d(tf.nn.relu(output),
425 | [self.batch_size, s128, s128, self.gf_dim*8], name='g_d1', with_w=True)
426 | d1 = tf.nn.dropout(self.g_bn_d1(self.d1), 0.5)
427 | d1 = tf.concat([d1, e7_0], 3)
428 | # d1 is (2 x 2 x self.gf_dim*8*2)
429 |
430 | self.d2, self.d2_w, self.d2_b = deconv2d(tf.nn.relu(d1),
431 | [self.batch_size, s64, s64, self.gf_dim*8], name='g_d2', with_w=True)
432 | d2 = tf.nn.dropout(self.g_bn_d2(self.d2), 0.5)
433 | d2 = tf.concat([d2, e6_0], 3)
434 | # d2 is (4 x 4 x self.gf_dim*8*2)
435 |
436 | self.d3, self.d3_w, self.d3_b = deconv2d(tf.nn.relu(d2),
437 | [self.batch_size, s32, s32, self.gf_dim*8], name='g_d3', with_w=True)
438 | d3 = tf.nn.dropout(self.g_bn_d3(self.d3), 0.5)
439 | d3 = tf.concat([d3, e5_0], 3)
440 | # d3 is (8 x 8 x self.gf_dim*8*2)
441 |
442 | self.d4, self.d4_w, self.d4_b = deconv2d(tf.nn.relu(d3),
443 | [self.batch_size, s16, s16, self.gf_dim*8], name='g_d4', with_w=True)
444 | d4 = self.g_bn_d4(self.d4)
445 | d4 = tf.concat([d4, e4_0], 3)
446 | # d4 is (16 x 16 x self.gf_dim*8*2)
447 |
448 | self.d5, self.d5_w, self.d5_b = deconv2d(tf.nn.relu(d4),
449 | [self.batch_size, s8, s8, self.gf_dim*4], name='g_d5', with_w=True)
450 | d5 = self.g_bn_d5(self.d5)
451 | d5 = tf.concat([d5, e3_0], 3)
452 | # d5 is (32 x 32 x self.gf_dim*4*2)
453 |
454 | self.d6, self.d6_w, self.d6_b = deconv2d(tf.nn.relu(d5),
455 | [self.batch_size, s4, s4, self.gf_dim*2], name='g_d6', with_w=True)
456 | d6 = self.g_bn_d6(self.d6)
457 | d6 = tf.concat([d6, e2_0], 3)
458 | # d6 is (64 x 64 x self.gf_dim*2*2)
459 |
460 | self.d7, self.d7_w, self.d7_b = deconv2d(tf.nn.relu(d6),
461 | [self.batch_size, s2, s2, self.gf_dim], name='g_d7', with_w=True)
462 | d7 = self.g_bn_d7(self.d7)
463 | d7 = tf.concat([d7, e1_0], 3)
464 | # d7 is (128 x 128 x self.gf_dim*1*2)
465 |
466 | self.d8, self.d8_w, self.d8_b = deconv2d(tf.nn.relu(d7),
467 | [self.batch_size, s, s, self.output_c_dim], name='g_d8', with_w=True)
468 | # d8 is (256 x 256 x output_c_dim)
469 |
470 | return tf.nn.tanh(self.d8)
471 |
472 | def generator_ori(self, image, y=None):
473 | with tf.variable_scope("generator") as scope:
474 |
475 | s = self.output_size
476 | s2, s4, s8, s16, s32, s64, s128 = int(s/2), int(s/4), int(s/8), int(s/16), int(s/32), int(s/64), int(s/128)
477 |
478 | img = image[:, :, :, 0:3]
479 | # image is (256 x 256 x input_c_dim)
480 | e1 = conv2d(img, self.gf_dim, name='g_e1_conv')
481 | # e1 is (128 x 128 x self.gf_dim)
482 | e2 = self.g_bn_e2(conv2d(lrelu(e1), self.gf_dim*2, name='g_e2_conv'))
483 | # e2 is (64 x 64 x self.gf_dim*2)
484 | e3 = self.g_bn_e3(conv2d(lrelu(e2), self.gf_dim*4, name='g_e3_conv'))
485 | # e3 is (32 x 32 x self.gf_dim*4)
486 | e4 = self.g_bn_e4(conv2d(lrelu(e3), self.gf_dim*8, name='g_e4_conv'))
487 | # e4 is (16 x 16 x self.gf_dim*8)
488 | e5 = self.g_bn_e5(conv2d(lrelu(e4), self.gf_dim*8, name='g_e5_conv'))
489 | # e5 is (8 x 8 x self.gf_dim*8)
490 | e6 = self.g_bn_e6(conv2d(lrelu(e5), self.gf_dim*8, name='g_e6_conv'))
491 | # e6 is (4 x 4 x self.gf_dim*8)
492 | e7 = self.g_bn_e7(conv2d(lrelu(e6), self.gf_dim*8, name='g_e7_conv'))
493 | # e7 is (2 x 2 x self.gf_dim*8)
494 | e8 = self.g_bn_e8(conv2d(lrelu(e7), self.gf_dim*8, name='g_e8_conv'))
495 | # e8 is (1 x 1 x self.gf_dim*8)
496 |
497 | self.d1, self.d1_w, self.d1_b = deconv2d(tf.nn.relu(e8),
498 | [self.batch_size, s128, s128, self.gf_dim*8], name='g_d1', with_w=True)
499 | d1 = tf.nn.dropout(self.g_bn_d1(self.d1), 0.5)
500 | d1 = tf.concat([d1, e7], 3)
501 | # d1 is (2 x 2 x self.gf_dim*8*2)
502 |
503 | self.d2, self.d2_w, self.d2_b = deconv2d(tf.nn.relu(d1),
504 | [self.batch_size, s64, s64, self.gf_dim*8], name='g_d2', with_w=True)
505 | d2 = tf.nn.dropout(self.g_bn_d2(self.d2), 0.5)
506 | d2 = tf.concat([d2, e6], 3)
507 | # d2 is (4 x 4 x self.gf_dim*8*2)
508 |
509 | self.d3, self.d3_w, self.d3_b = deconv2d(tf.nn.relu(d2),
510 | [self.batch_size, s32, s32, self.gf_dim*8], name='g_d3', with_w=True)
511 | d3 = tf.nn.dropout(self.g_bn_d3(self.d3), 0.5)
512 | d3 = tf.concat([d3, e5], 3)
513 | # d3 is (8 x 8 x self.gf_dim*8*2)
514 |
515 | self.d4, self.d4_w, self.d4_b = deconv2d(tf.nn.relu(d3),
516 | [self.batch_size, s16, s16, self.gf_dim*8], name='g_d4', with_w=True)
517 | d4 = self.g_bn_d4(self.d4)
518 | d4 = tf.concat([d4, e4], 3)
519 | # d4 is (16 x 16 x self.gf_dim*8*2)
520 |
521 | self.d5, self.d5_w, self.d5_b = deconv2d(tf.nn.relu(d4),
522 | [self.batch_size, s8, s8, self.gf_dim*4], name='g_d5', with_w=True)
523 | d5 = self.g_bn_d5(self.d5)
524 | d5 = tf.concat([d5, e3], 3)
525 | # d5 is (32 x 32 x self.gf_dim*4*2)
526 |
527 | self.d6, self.d6_w, self.d6_b = deconv2d(tf.nn.relu(d5),
528 | [self.batch_size, s4, s4, self.gf_dim*2], name='g_d6', with_w=True)
529 | d6 = self.g_bn_d6(self.d6)
530 | d6 = tf.concat([d6, e2], 3)
531 | # d6 is (64 x 64 x self.gf_dim*2*2)
532 |
533 | self.d7, self.d7_w, self.d7_b = deconv2d(tf.nn.relu(d6),
534 | [self.batch_size, s2, s2, self.gf_dim], name='g_d7', with_w=True)
535 | d7 = self.g_bn_d7(self.d7)
536 | d7 = tf.concat([d7, e1], 3)
537 | # d7 is (128 x 128 x self.gf_dim*1*2)
538 |
539 | self.d8, self.d8_w, self.d8_b = deconv2d(tf.nn.relu(d7),
540 | [self.batch_size, s, s, self.output_c_dim], name='g_d8', with_w=True)
541 | # d8 is (256 x 256 x output_c_dim)
542 |
543 | return tf.nn.tanh(self.d8)
544 |
545 |
546 | def sampler_ori(self, image, y=None):
547 |
548 | with tf.variable_scope("generator") as scope:
549 | scope.reuse_variables()
550 |
551 | s = self.output_size
552 | s2, s4, s8, s16, s32, s64, s128 = int(s/2), int(s/4), int(s/8), int(s/16), int(s/32), int(s/64), int(s/128)
553 |
554 | img = image[:, :, :, 0:3]
555 | # image is (256 x 256 x input_c_dim)
556 | e1 = conv2d(img, self.gf_dim, name='g_e1_conv')
557 | # e1 is (128 x 128 x self.gf_dim)
558 | e2 = self.g_bn_e2(conv2d(lrelu(e1), self.gf_dim*2, name='g_e2_conv'))
559 | # e2 is (64 x 64 x self.gf_dim*2)
560 | e3 = self.g_bn_e3(conv2d(lrelu(e2), self.gf_dim*4, name='g_e3_conv'))
561 | # e3 is (32 x 32 x self.gf_dim*4)
562 | e4 = self.g_bn_e4(conv2d(lrelu(e3), self.gf_dim*8, name='g_e4_conv'))
563 | # e4 is (16 x 16 x self.gf_dim*8)
564 | e5 = self.g_bn_e5(conv2d(lrelu(e4), self.gf_dim*8, name='g_e5_conv'))
565 | # e5 is (8 x 8 x self.gf_dim*8)
566 | e6 = self.g_bn_e6(conv2d(lrelu(e5), self.gf_dim*8, name='g_e6_conv'))
567 | # e6 is (4 x 4 x self.gf_dim*8)
568 | e7 = self.g_bn_e7(conv2d(lrelu(e6), self.gf_dim*8, name='g_e7_conv'))
569 | # e7 is (2 x 2 x self.gf_dim*8)
570 | e8 = self.g_bn_e8(conv2d(lrelu(e7), self.gf_dim*8, name='g_e8_conv'))
571 | # e8 is (1 x 1 x self.gf_dim*8)
572 |
573 | self.d1, self.d1_w, self.d1_b = deconv2d(tf.nn.relu(e8),
574 | [self.batch_size, s128, s128, self.gf_dim*8], name='g_d1', with_w=True)
575 | d1 = tf.nn.dropout(self.g_bn_d1(self.d1), 0.5)
576 | d1 = tf.concat([d1, e7], 3)
577 | # d1 is (2 x 2 x self.gf_dim*8*2)
578 |
579 | self.d2, self.d2_w, self.d2_b = deconv2d(tf.nn.relu(d1),
580 | [self.batch_size, s64, s64, self.gf_dim*8], name='g_d2', with_w=True)
581 | d2 = tf.nn.dropout(self.g_bn_d2(self.d2), 0.5)
582 | d2 = tf.concat([d2, e6], 3)
583 | # d2 is (4 x 4 x self.gf_dim*8*2)
584 |
585 | self.d3, self.d3_w, self.d3_b = deconv2d(tf.nn.relu(d2),
586 | [self.batch_size, s32, s32, self.gf_dim*8], name='g_d3', with_w=True)
587 | d3 = tf.nn.dropout(self.g_bn_d3(self.d3), 0.5)
588 | d3 = tf.concat([d3, e5], 3)
589 | # d3 is (8 x 8 x self.gf_dim*8*2)
590 |
591 | self.d4, self.d4_w, self.d4_b = deconv2d(tf.nn.relu(d3),
592 | [self.batch_size, s16, s16, self.gf_dim*8], name='g_d4', with_w=True)
593 | d4 = self.g_bn_d4(self.d4)
594 | d4 = tf.concat([d4, e4], 3)
595 | # d4 is (16 x 16 x self.gf_dim*8*2)
596 |
597 | self.d5, self.d5_w, self.d5_b = deconv2d(tf.nn.relu(d4),
598 | [self.batch_size, s8, s8, self.gf_dim*4], name='g_d5', with_w=True)
599 | d5 = self.g_bn_d5(self.d5)
600 | d5 = tf.concat([d5, e3], 3)
601 | # d5 is (32 x 32 x self.gf_dim*4*2)
602 |
603 | self.d6, self.d6_w, self.d6_b = deconv2d(tf.nn.relu(d5),
604 | [self.batch_size, s4, s4, self.gf_dim*2], name='g_d6', with_w=True)
605 | d6 = self.g_bn_d6(self.d6)
606 | d6 = tf.concat([d6, e2], 3)
607 | # d6 is (64 x 64 x self.gf_dim*2*2)
608 |
609 | self.d7, self.d7_w, self.d7_b = deconv2d(tf.nn.relu(d6),
610 | [self.batch_size, s2, s2, self.gf_dim], name='g_d7', with_w=True)
611 | d7 = self.g_bn_d7(self.d7)
612 | d7 = tf.concat([d7, e1], 3)
613 | # d7 is (128 x 128 x self.gf_dim*1*2)
614 |
615 | self.d8, self.d8_w, self.d8_b = deconv2d(tf.nn.relu(d7),
616 | [self.batch_size, s, s, self.output_c_dim], name='g_d8', with_w=True)
617 | # d8 is (256 x 256 x output_c_dim)
618 |
619 | return tf.nn.tanh(self.d8)
620 |
621 | def save(self, checkpoint_dir, step):
622 | model_name = "pix2pix.model"
623 | model_dir = "%s_%s_%s" % (self.dataset_name, self.batch_size, self.output_size)
624 | checkpoint_dir = os.path.join(checkpoint_dir, model_dir)
625 |
626 | if not os.path.exists(checkpoint_dir):
627 | os.makedirs(checkpoint_dir)
628 |
629 | self.saver.save(self.sess,
630 | os.path.join(checkpoint_dir, model_name),
631 | global_step=step)
632 |
633 | def load(self, checkpoint_dir):
634 | print(" [*] Reading checkpoint...")
635 |
636 | model_dir = "%s_%s_%s" % (self.dataset_name, self.batch_size, self.output_size)
637 | checkpoint_dir = os.path.join(checkpoint_dir, model_dir)
638 |
639 | ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
640 | if ckpt and ckpt.model_checkpoint_path:
641 | ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
642 | ckpt_name = "pix2pix.model-35002"
643 | self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name))
644 | return True
645 | else:
646 | return False
647 |
648 | def test(self, args):
649 | """Test pix2pix"""
650 | init_op = tf.global_variables_initializer()
651 | self.sess.run(init_op)
652 |
653 | sample_files = glob('./datasets/{}/val/*.jpg'.format(self.dataset_name))
654 | savePath = os.path.join(os.getcwd(),'datasets',str(self.dataset_name),'val','A')
655 | sample_files = [name for name in os.listdir(savePath)]
656 | # sort testing input
657 | #n = [int(i) for i in map(lambda x: x.split('/')[-1].split('.jpg')[0], sample_files)]
658 | n = [int(i.lstrip('0')) if i.lstrip('0') else 0 for i in map(lambda x: x.split('.npy')[0],sample_files)]
659 | #print(n[0].lstrip('0'))
660 | sample_files = [x for (y, x) in sorted(zip(n, sample_files))]
661 | print(sample_files)
662 | # load testing input
663 | print("Loading testing images ...")
664 | #sample = [load_data(sample_file, is_test=True) for sample_file in sample_files]
665 | sample = [load_data2(savePath,sample_file) for sample_file in sample_files]
666 |
667 | #if (self.is_grayscale):
668 | # sample_images = np.array(sample).astype(np.float32)[:, :, :, None]
669 | #else:
670 | # sample_images = np.array(sample).astype(np.float32)
671 |
672 | #sample_images = [sample_images[i:i+self.batch_size]
673 | # for i in xrange(0, len(sample_images), self.batch_size)]
674 | #sample_images = np.array(sample_images)
675 | sample_images = np.array(sample)
676 | print(sample_images.shape)
677 |
678 | npy_dir = os.path.join(args.test_dir,'npy')
679 |
680 | if os.path.exists(args.test_dir):
681 | shutil.rmtree(args.test_dir)
682 |
683 | os.makedirs(args.test_dir)
684 | os.makedirs(npy_dir)
685 |
686 | start_time = time.time()
687 | if self.load(self.checkpoint_dir):
688 | print(" [*] Load SUCCESS")
689 | else:
690 | print(" [!] Load failed...")
691 |
692 | for i, sample_image in enumerate(sample_images):
693 | idx = i+1
694 | print("sampling image ", idx)
695 | samples = self.sess.run(
696 | self.fake_B_sample,
697 | feed_dict={self.real_data: sample_image.reshape(1,sample_image.shape[0],sample_image.shape[1],sample_image.shape[2])}
698 | )
699 | np.save('./{}/test_{}.npy'.format(npy_dir, sample_files[i].rstrip('.npy')),(samples.squeeze()+1)*32767.5) # Save as metric scale
700 | save_images(samples, [self.batch_size, 1],'./{}/test_{}.png'.format(args.test_dir, sample_files[i].rstrip('.npy')))
701 |
702 | print ("Test Runtime:{}",(time.time() - start_time)/len(sample_image))
703 |
--------------------------------------------------------------------------------