├── requirements.txt ├── examples.jpg ├── eval ├── gt_0017.png └── test_0017.png ├── label_to_facades.png ├── .gitignore ├── download_dataset.sh ├── tools ├── download-dataset.py ├── split.py ├── test.py ├── dockrun.py ├── tfimage.py └── process.py ├── README.md ├── README_pix2pix.md ├── eval.py ├── train_val_test_split.py ├── main.py ├── utils.py ├── ops.py └── model.py /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow-gpu 2 | numpy 3 | scipy 4 | pillow 5 | -------------------------------------------------------------------------------- /examples.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuantingchen04/Light-Field-Depth-Estimation/HEAD/examples.jpg -------------------------------------------------------------------------------- /eval/gt_0017.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuantingchen04/Light-Field-Depth-Estimation/HEAD/eval/gt_0017.png -------------------------------------------------------------------------------- /eval/test_0017.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuantingchen04/Light-Field-Depth-Estimation/HEAD/eval/test_0017.png -------------------------------------------------------------------------------- /label_to_facades.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuantingchen04/Light-Field-Depth-Estimation/HEAD/label_to_facades.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | sample/* 2 | logs/* 3 | test/* 4 | datasets/* 5 | checkpoint/* 6 | val/* 7 | results/ 8 | depth_images_v2_400_gt/ 9 | 10 | *.pyc 11 | .idea/ 12 | .ipynb_checkpoints/ 13 | *.ipynb 14 | -------------------------------------------------------------------------------- /download_dataset.sh: -------------------------------------------------------------------------------- 1 | mkdir datasets 2 | FILE=$1 3 | URL=https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets/$FILE.tar.gz 4 | TAR_FILE=./datasets/$FILE.tar.gz 5 | TARGET_DIR=./datasets/$FILE/ 6 | wget -N $URL -O $TAR_FILE 7 | mkdir $TARGET_DIR 8 | tar -zxvf $TAR_FILE -C ./datasets/ 9 | rm $TAR_FILE 10 | -------------------------------------------------------------------------------- /tools/download-dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | try: 6 | from urllib.request import urlopen # python 3 7 | except ImportError: 8 | from urllib2 import urlopen # python 2 9 | import sys 10 | import tarfile 11 | import tempfile 12 | import shutil 13 | 14 | dataset = sys.argv[1] 15 | url = "https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets/%s.tar.gz" % dataset 16 | with tempfile.TemporaryFile() as tmp: 17 | print("downloading", url) 18 | shutil.copyfileobj(urlopen(url), tmp) 19 | print("extracting") 20 | tmp.seek(0) 21 | tar = tarfile.open(fileobj=tmp) 22 | tar.extractall() 23 | tar.close() 24 | print("done") 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Light-Field-Depth-Estimation 2 | #### *Light Filed Depth Estimation using cGAN* 3 | 4 | #### Data 5 | * 600 Light Field Images from [DDFF 12-scene](http://hazirbas.com/datasets/ddff12scene/) 6 | 7 | #### Method 8 | * Conditional GAN using pix2pix (Tensorflow) 9 | * Fed 5 focal image stacks + LSTM embeddings 10 | 11 | #### Run 12 | * Download any checkpoint from [here](https://drive.google.com/open?id=1zV6wRKh1gkEIZg687LAFQbOlwnzK-YIH) 13 | * Download manual cropped data from [here](https://drive.google.com/open?id=1js-jLasmGDigc0pNgbc4INcmUn6Mp7Fu) 14 | * `python main.py --phase train --dataset_name scene12_v3_400` 15 | 16 | #### Acknowledgments 17 | Code borrows heavily from [pix2pix-tensorflow](https://github.com/yenchenlin/pix2pix-tensorflow). Thanks for Yen-Chen! 18 | -------------------------------------------------------------------------------- /tools/split.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import random 6 | import argparse 7 | import glob 8 | import os 9 | 10 | 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument("--dir", type=str, required=True, help="path to folder containing images") 13 | parser.add_argument("--train_frac", type=float, default=0.8, help="percentage of images to use for training set") 14 | parser.add_argument("--test_frac", type=float, default=0.0, help="percentage of images to use for test set") 15 | parser.add_argument("--sort", action="store_true", help="if set, sort the images instead of shuffling them") 16 | a = parser.parse_args() 17 | 18 | 19 | def main(): 20 | random.seed(0) 21 | 22 | files = glob.glob(os.path.join(a.dir, "*.png")) 23 | files.sort() 24 | 25 | assignments = [] 26 | assignments.extend(["train"] * int(a.train_frac * len(files))) 27 | assignments.extend(["test"] * int(a.test_frac * len(files))) 28 | assignments.extend(["val"] * int(len(files) - len(assignments))) 29 | 30 | if not a.sort: 31 | random.shuffle(assignments) 32 | 33 | for name in ["train", "val", "test"]: 34 | if name in assignments: 35 | d = os.path.join(a.dir, name) 36 | if not os.path.exists(d): 37 | os.makedirs(d) 38 | 39 | print(len(files), len(assignments)) 40 | for inpath, assignment in zip(files, assignments): 41 | outpath = os.path.join(a.dir, assignment, os.path.basename(inpath)) 42 | print(inpath, "->", outpath) 43 | os.rename(inpath, outpath) 44 | 45 | main() 46 | -------------------------------------------------------------------------------- /README_pix2pix.md: -------------------------------------------------------------------------------- 1 | #pix2pix-tensorflow 2 | 3 | TensorFlow implementation of [Image-to-Image Translation Using Conditional Adversarial Networks](https://arxiv.org/pdf/1611.07004v1.pdf) that learns a mapping from input images to output images. 4 | 5 | Here are some results generated by the authors of paper: 6 | 7 | 8 | 9 | ## Setup 10 | 11 | ### Prerequisites 12 | - Linux 13 | - Python with numpy 14 | - NVIDIA GPU + CUDA 8.0 + CuDNNv5.1 15 | - TensorFlow 0.11 16 | 17 | ### Getting Started 18 | - Clone this repo: 19 | ```bash 20 | git clone git@github.com:yenchenlin/pix2pix-tensorflow.git 21 | cd pix2pix-tensorflow 22 | ``` 23 | - Download the dataset (script borrowed from [torch code](https://github.com/phillipi/pix2pix/blob/master/datasets/download_dataset.sh)): 24 | ```bash 25 | bash ./download_dataset.sh facades 26 | ``` 27 | - Train the model 28 | ```bash 29 | python main.py --phase train 30 | ``` 31 | - Test the model: 32 | ```bash 33 | python main.py --phase test 34 | ``` 35 | 36 | ## Results 37 | Here is the results generated from this implementation: 38 | 39 | - Facades: 40 | 41 | 42 | 43 | More results on other datasets coming soon! 44 | 45 | **Note**: To avoid the fast convergence of D (discriminator) network, G (generator) network is updated twice for each D network update, which differs from original paper but same as [DCGAN-tensorflow](https://github.com/carpedm20/DCGAN-tensorflow), which this project based on. 46 | 47 | ## Train 48 | Code currently supports [CMP Facades](http://cmp.felk.cvut.cz/~tylecr1/facade/) dataset. To reproduce results presented above, it takes 200 epochs of training. Exact computing time depends on own hardware conditions. 49 | 50 | ## Test 51 | Test the model on validation set of [CMP Facades](http://cmp.felk.cvut.cz/~tylecr1/facade/) dataset. It will generate synthesized images provided corresponding labels under directory `./test`. 52 | 53 | 54 | ## Acknowledgments 55 | Code borrows heavily from [pix2pix](https://github.com/phillipi/pix2pix) and [DCGAN-tensorflow](https://github.com/carpedm20/DCGAN-tensorflow/blob/master/model.py). Thanks for their excellent work! 56 | -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import sys 4 | 5 | # revise from 6 | # https://github.com/mrharicot/monodepth/blob/master/utils/evaluate_kitti.py 7 | def compute_errors(gt, pred): 8 | thresh = np.maximum((gt / pred), (pred / gt)) 9 | a1 = (thresh < 1.25).mean() 10 | a2 = (thresh < 1.25 ** 2).mean() 11 | a3 = (thresh < 1.25 ** 3).mean() 12 | 13 | rmse = (gt - pred) ** 2 14 | rmse = np.sqrt(rmse.mean()) 15 | 16 | rmse_log = (np.log(gt) - np.log(pred)) ** 2 17 | rmse_log = np.sqrt(rmse_log.mean()) 18 | 19 | abs_rel = np.mean(np.abs(gt - pred) / gt) 20 | 21 | sq_rel = np.mean(((gt - pred) ** 2) / gt) 22 | 23 | return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3 24 | 25 | 26 | min_depth = 1e-3 27 | max_depth = 80 28 | # Get GT 29 | #gt_npy_dir = 'datasets/scene12_v2_400/val/A' ## Correct GT 30 | gt_npy_dir = 'datasets/scene12_v3_400/val/A' ## Correct GT 31 | 32 | test_npy_dir = 'test/npy' 33 | data_npy = [os.path.join(gt_npy_dir, name) for name in os.listdir(gt_npy_dir)]; 34 | data_npy.sort() 35 | test_npy = [os.path.join(test_npy_dir, name) for name in os.listdir(test_npy_dir)]; 36 | test_npy.sort() 37 | num_samples = len(data_npy) 38 | print num_samples 39 | if (len(data_npy)!=len(test_npy)): 40 | sys.exit("Check files") 41 | 42 | 43 | rms = np.zeros(num_samples, np.float32) 44 | log_rms = np.zeros(num_samples, np.float32) 45 | abs_rel = np.zeros(num_samples, np.float32) 46 | sq_rel = np.zeros(num_samples, np.float32) 47 | d1_all = np.zeros(num_samples, np.float32) 48 | a1 = np.zeros(num_samples, np.float32) 49 | a2 = np.zeros(num_samples, np.float32) 50 | a3 = np.zeros(num_samples, np.float32) 51 | 52 | for i in range(num_samples): 53 | gt_depth = np.load(data_npy[i]) / 1000 54 | pred_depth = np.load(test_npy[i]) / 1000 55 | 56 | pred_depth[pred_depth < min_depth] = min_depth 57 | pred_depth[pred_depth > max_depth] = max_depth 58 | 59 | mask = np.logical_and(gt_depth > min_depth, gt_depth < max_depth) 60 | 61 | abs_rel[i], sq_rel[i], rms[i], log_rms[i], a1[i], a2[i], a3[i] = compute_errors(gt_depth[mask], pred_depth[mask]) 62 | print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format('abs_rel', 'sq_rel', 'rms', 'log_rms', 'a1', 'a2', 'a3')) 63 | print("{:10.4f}, {:10.4f}, {:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}".format(abs_rel.mean(), sq_rel.mean(), rms.mean(), log_rms.mean(), a1.mean(), a2.mean(), a3.mean())) 64 | 65 | -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import subprocess 6 | import os 7 | import sys 8 | import time 9 | import argparse 10 | 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument("--long", action="store_true") 14 | a = parser.parse_args() 15 | 16 | 17 | def run(cmd, image="affinelayer/pix2pix-tensorflow"): 18 | docker = "docker" 19 | if sys.platform.startswith("linux"): 20 | docker = "nvidia-docker" 21 | 22 | datapath = os.path.abspath("../data") 23 | prefix = [docker, "run", "--rm", "--volume", os.getcwd() + ":/prj", "--volume", datapath + ":/data", "--workdir", "/prj", "--env", "PYTHONUNBUFFERED=x", "--volume", "/tmp/cuda-cache:/cuda-cache", "--env", "CUDA_CACHE_PATH=/cuda-cache", image] 24 | args = prefix + cmd.split(" ") 25 | print(" ".join(args)) 26 | subprocess.check_call(args) 27 | 28 | 29 | def main(): 30 | start = time.time() 31 | 32 | if a.long: 33 | run("python pix2pix.py --mode train --output_dir test/facades_BtoA_train --max_epochs 200 --input_dir /data/official/facades/train --which_direction BtoA --seed 0") 34 | run("python pix2pix.py --mode test --output_dir test/facades_BtoA_test --input_dir /data/official/facades/val --seed 0 --checkpoint test/facades_BtoA_train") 35 | 36 | run("python pix2pix.py --mode train --output_dir test/color-lab_AtoB_train --max_epochs 10 --input_dir /data/color-lab/train --which_direction AtoB --seed 0 --lab_colorization") 37 | run("python pix2pix.py --mode test --output_dir test/color-lab_AtoB_test --input_dir /data/color-lab/val --seed 0 --checkpoint test/color-lab_AtoB_train") 38 | else: 39 | # training 40 | for direction in ["AtoB", "BtoA"]: 41 | for dataset in ["facades"]: 42 | name = dataset + "_" + direction 43 | run("python pix2pix.py --mode train --output_dir test/%s_train --max_steps 1 --input_dir /data/official/%s/train --which_direction %s --seed 0" % (name, dataset, direction)) 44 | run("python pix2pix.py --mode test --output_dir test/%s_test --max_steps 1 --input_dir /data/official/%s/val --seed 0 --checkpoint test/%s_train" % (name, dataset, name)) 45 | 46 | # test lab colorization 47 | dataset = "color-lab" 48 | name = dataset + "_" + direction 49 | run("python pix2pix.py --mode train --output_dir test/%s_train --max_steps 1 --input_dir /data/%s/train --which_direction %s --seed 0 --lab_colorization" % (name, dataset, direction)) 50 | run("python pix2pix.py --mode test --output_dir test/%s_test --max_steps 1 --input_dir /data/%s/val --seed 0 --checkpoint test/%s_train" % (name, dataset, name)) 51 | 52 | # using pretrained model (can't use pretrained models from tensorflow 0.12, so disabled for now) 53 | # for dataset, direction in [("facades", "BtoA")]: 54 | # name = dataset + "_" + direction 55 | # run("python pix2pix.py --mode test --output_dir test/%s_pretrained_test --input_dir /data/official/%s/val --max_steps 100 --which_direction %s --seed 0 --checkpoint /data/pretrained/%s" % (name, dataset, direction, name)) 56 | # run("python pix2pix.py --mode export --output_dir test/%s_pretrained_export --checkpoint /data/pretrained/%s" % (name, name)) 57 | 58 | # test python3 59 | run("python pix2pix.py --mode train --output_dir test/py3_facades_AtoB_train --max_steps 1 --input_dir /data/official/facades/train --which_direction AtoB --seed 0", image="tensorflow/tensorflow:1.0.0-gpu-py3") 60 | run("python pix2pix.py --mode test --output_dir test/py3_facades_AtoB_test --max_steps 1 --input_dir /data/official/facades/val --seed 0 --checkpoint test/py3_facades_AtoB_train", image="tensorflow/tensorflow:1.0.0-gpu-py3") 61 | 62 | print("elapsed", int(time.time() - start)) 63 | # long: about 9 hours (linux) 64 | 65 | 66 | main() 67 | -------------------------------------------------------------------------------- /train_val_test_split.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[103]: 5 | 6 | import os 7 | import numpy as np 8 | import sys 9 | import collections as coll 10 | def train_val_test_split(trainNum=400,valNum=100,testNum=100): 11 | datasetName = 'scene12_' + str(trainNum) 12 | savePath = os.path.join(os.getcwd(),'datasets',datasetName) 13 | loadAPath = os.path.join(os.getcwd(),'data','A') # containing all 3600 images 14 | loadBPath = os.path.join(os.getcwd(),'data','B') # containing all 3600 images 15 | 16 | if not os.path.exists(savePath): 17 | os.makedirs(savePath) 18 | print('trainNum={} valNum={} testNum={}'.format(trainNum,valNum,testNum)) 19 | # load all numpy array names 20 | fileNames = [name for name in os.listdir(loadAPath)] 21 | 22 | # save random indices for training, validation and testing set 23 | trainIdx = [] 24 | valIdx = [] 25 | testIdx = [] 26 | 27 | beg = 0 28 | end = len(fileNames)//6 29 | delta = end 30 | size_per_scene = (trainNum + valNum + testNum)//6 31 | print(size_per_scene) 32 | for i in range(6): 33 | print('beg:{} end: {}'.format(beg,end)) 34 | allidx = np.random.choice(range(beg,end),size_per_scene,replace=False) 35 | if i < 5: 36 | trainIdx.extend(allidx[0:trainNum//6]) 37 | valIdx.extend(allidx[trainNum//6:(trainNum+valNum)//6]) 38 | testIdx.extend(allidx[(trainNum+valNum)//6:]) 39 | else: 40 | trainCurrentSize = len(trainIdx) 41 | valCurrentSize = len(valIdx) 42 | testCurrentSize = len(testIdx) 43 | 44 | trainIdx.extend(allidx[0:trainNum - trainCurrentSize]) 45 | valIdx.extend(allidx[trainNum -trainCurrentSize:(trainNum+valNum) - trainCurrentSize - valCurrentSize]) 46 | testIdx.extend(allidx[(trainNum+valNum) - trainCurrentSize - valCurrentSize:]) 47 | beg += delta 48 | end += delta 49 | #end = min(len(fileNames),end) 50 | 51 | print('size of train: {}'.format(len(trainIdx))) 52 | print('size of val: {}'.format(len(valIdx))) 53 | print('size of test: {}'.format(len(testIdx))) 54 | 55 | # shuffle training idx once more 56 | np.random.shuffle(trainIdx) 57 | # print(trainIdx) 58 | # save training set 59 | saveData([fileNames[i] for i in trainIdx],'train',savePath,loadAPath) 60 | saveData([fileNames[i] for i in trainIdx],'train',savePath,loadBPath) 61 | 62 | # save val set 63 | saveData([fileNames[i] for i in valIdx],'val',savePath,loadAPath) 64 | saveData([fileNames[i] for i in valIdx],'val',savePath,loadBPath) 65 | 66 | # save test set 67 | saveData([fileNames[i] for i in testIdx],'test',savePath,loadAPath) 68 | saveData([fileNames[i] for i in testIdx],'test',savePath,loadBPath) 69 | 70 | 71 | 72 | def saveData(data,name,savePath,loadPath): 73 | dataType = loadPath[-1] 74 | np.random.shuffle(data) 75 | for d in data: 76 | img = np.load(os.path.join(loadPath,d)) 77 | path = os.path.join(savePath,name,dataType) 78 | if not os.path.exists(path): 79 | os.makedirs(path) 80 | np.save(os.path.join(path,d),img) 81 | 82 | 83 | 84 | 85 | 86 | if __name__ == '__main__': 87 | if len(sys.argv) == 4: 88 | trainNum = int(sys.argv[1]) 89 | valNum = int(sys.argv[2]) 90 | testNum = int(sys.argv[3]) 91 | else: 92 | trainNum = 2400 93 | valNum = 600 94 | testNum = 600 95 | 96 | train_val_test_split(trainNum,valNum,testNum) 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import scipy.misc 4 | import numpy as np 5 | 6 | from model import pix2pix 7 | import tensorflow as tf 8 | 9 | parser = argparse.ArgumentParser(description='') 10 | parser.add_argument('--dataset_name', dest='dataset_name', default='facades', help='name of the dataset') 11 | parser.add_argument('--epoch', dest='epoch', type=int, default=200, help='# of epoch') 12 | parser.add_argument('--batch_size', dest='batch_size', type=int, default=1, help='# images in batch') 13 | parser.add_argument('--train_size', dest='train_size', type=int, default=1e8, help='# images used to train') 14 | parser.add_argument('--load_size', dest='load_size', type=int, default=286, help='scale images to this size') 15 | parser.add_argument('--fine_size', dest='fine_size', type=int, default=256, help='then crop to this size') 16 | parser.add_argument('--ngf', dest='ngf', type=int, default=64, help='# of gen filters in first conv layer') 17 | parser.add_argument('--ndf', dest='ndf', type=int, default=64, help='# of discri filters in first conv layer') 18 | parser.add_argument('--input_nc', dest='input_nc', type=int, default=3, help='# of input image channels') 19 | parser.add_argument('--output_nc', dest='output_nc', type=int, default=3, help='# of output image channels') 20 | parser.add_argument('--niter', dest='niter', type=int, default=200, help='# of iter at starting learning rate') 21 | parser.add_argument('--lr', dest='lr', type=float, default=0.0002, help='initial learning rate for adam') 22 | parser.add_argument('--beta1', dest='beta1', type=float, default=0.5, help='momentum term of adam') 23 | parser.add_argument('--flip', dest='flip', type=bool, default=True, help='if flip the images for data argumentation') 24 | parser.add_argument('--which_direction', dest='which_direction', default='AtoB', help='AtoB or BtoA') 25 | parser.add_argument('--phase', dest='phase', default='train', help='train, test') 26 | parser.add_argument('--save_epoch_freq', dest='save_epoch_freq', type=int, default=50, help='save a model every save_epoch_freq epochs (does not overwrite previously saved models)') 27 | parser.add_argument('--save_latest_freq', dest='save_latest_freq', type=int, default=5000, help='save the latest model every latest_freq sgd iterations (overwrites the previous latest model)') 28 | parser.add_argument('--print_freq', dest='print_freq', type=int, default=50, help='print the debug information every print_freq iterations') 29 | parser.add_argument('--continue_train', dest='continue_train', type=bool, default=False, help='if continue training, load the latest model: 1: true, 0: false') 30 | parser.add_argument('--serial_batches', dest='serial_batches', type=bool, default=False, help='f 1, takes images in order to make batches, otherwise takes them randomly') 31 | parser.add_argument('--serial_batch_iter', dest='serial_batch_iter', type=bool, default=True, help='iter into serial image list') 32 | parser.add_argument('--checkpoint_dir', dest='checkpoint_dir', default='./checkpoint', help='models are saved here') 33 | parser.add_argument('--sample_dir', dest='sample_dir', default='./sample', help='sample are saved here') 34 | parser.add_argument('--test_dir', dest='test_dir', default='./test', help='test sample are saved here') 35 | parser.add_argument('--L1_lambda', dest='L1_lambda', type=float, default=100.0, help='weight on L1 term in objective') 36 | 37 | args = parser.parse_args() 38 | 39 | # python tools/dockrun.py python main.py --phase train 40 | 41 | def main(_): 42 | if not os.path.exists(args.checkpoint_dir): 43 | os.makedirs(args.checkpoint_dir) 44 | if not os.path.exists(args.sample_dir): 45 | os.makedirs(args.sample_dir) 46 | if not os.path.exists(args.test_dir): 47 | os.makedirs(args.test_dir) 48 | 49 | with tf.Session() as sess: 50 | model = pix2pix(sess, image_size=args.fine_size, batch_size=args.batch_size, 51 | output_size=args.fine_size, dataset_name=args.dataset_name, 52 | checkpoint_dir=args.checkpoint_dir, sample_dir=args.sample_dir) 53 | 54 | if args.phase == 'train': 55 | model.train(args) 56 | else: 57 | model.test(args) 58 | 59 | if __name__ == '__main__': 60 | tf.app.run() 61 | -------------------------------------------------------------------------------- /tools/dockrun.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import argparse 8 | 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument("--port", type=int, help="port to publish from the container") 11 | 12 | # from python 3.3 source 13 | # https://github.com/python/cpython/blob/master/Lib/shutil.py 14 | def which(cmd, mode=os.F_OK | os.X_OK, path=None): 15 | """Given a command, mode, and a PATH string, return the path which 16 | conforms to the given mode on the PATH, or None if there is no such 17 | file. 18 | `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result 19 | of os.environ.get("PATH"), or can be overridden with a custom search 20 | path. 21 | """ 22 | # Check that a given file can be accessed with the correct mode. 23 | # Additionally check that `file` is not a directory, as on Windows 24 | # directories pass the os.access check. 25 | def _access_check(fn, mode): 26 | return (os.path.exists(fn) and os.access(fn, mode) 27 | and not os.path.isdir(fn)) 28 | 29 | # If we're given a path with a directory part, look it up directly rather 30 | # than referring to PATH directories. This includes checking relative to the 31 | # current directory, e.g. ./script 32 | if os.path.dirname(cmd): 33 | if _access_check(cmd, mode): 34 | return cmd 35 | return None 36 | 37 | if path is None: 38 | path = os.environ.get("PATH", os.defpath) 39 | if not path: 40 | return None 41 | path = path.split(os.pathsep) 42 | 43 | if sys.platform == "win32": 44 | # The current directory takes precedence on Windows. 45 | if not os.curdir in path: 46 | path.insert(0, os.curdir) 47 | 48 | # PATHEXT is necessary to check on Windows. 49 | pathext = os.environ.get("PATHEXT", "").split(os.pathsep) 50 | # See if the given file matches any of the expected path extensions. 51 | # This will allow us to short circuit when given "python.exe". 52 | # If it does match, only test that one, otherwise we have to try 53 | # others. 54 | if any(cmd.lower().endswith(ext.lower()) for ext in pathext): 55 | files = [cmd] 56 | else: 57 | files = [cmd + ext for ext in pathext] 58 | else: 59 | # On other platforms you don't have things like PATHEXT to tell you 60 | # what file suffixes are executable, so just pass on cmd as-is. 61 | files = [cmd] 62 | 63 | seen = set() 64 | for dir in path: 65 | normdir = os.path.normcase(dir) 66 | if not normdir in seen: 67 | seen.add(normdir) 68 | for thefile in files: 69 | name = os.path.join(dir, thefile) 70 | if _access_check(name, mode): 71 | return name 72 | return None 73 | 74 | 75 | def main(): 76 | args = sys.argv[1:] 77 | i = 0 78 | while i < len(args): 79 | if not args[i].startswith("--"): 80 | break 81 | i += 2 82 | 83 | a = parser.parse_args(args[:i]) 84 | cmd = args[i:] 85 | 86 | # check if nvidia-docker or docker are on path 87 | docker_path = which("nvidia-docker") 88 | if docker_path is None: 89 | docker_path = which("docker") 90 | 91 | if docker_path is None: 92 | raise Exception("docker not found") 93 | 94 | docker_args = [ 95 | "--rm", 96 | "--volume", 97 | "/:/host", 98 | "--workdir", 99 | "/host" + os.getcwd(), 100 | "--env", 101 | "PYTHONUNBUFFERED=x", 102 | "--env", 103 | "CUDA_CACHE_PATH=/host/tmp/cuda-cache", 104 | ] 105 | 106 | if a.port is not None: 107 | docker_args += ["--publish", "%d:%d" % (a.port, a.port)] 108 | 109 | args = [docker_path, "run"] + docker_args + ["affinelayer/pix2pix-tensorflow:v2"] + cmd 110 | 111 | if not os.access("/var/run/docker.sock", os.R_OK): 112 | args = ["sudo"] + args 113 | 114 | os.execvp(args[0], args) 115 | 116 | 117 | main() 118 | -------------------------------------------------------------------------------- /tools/tfimage.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | import os 7 | 8 | 9 | def create_op(func, **placeholders): 10 | op = func(**placeholders) 11 | 12 | def f(**kwargs): 13 | feed_dict = {} 14 | for argname, argvalue in kwargs.items(): 15 | placeholder = placeholders[argname] 16 | feed_dict[placeholder] = argvalue 17 | return tf.get_default_session().run(op, feed_dict=feed_dict) 18 | 19 | return f 20 | 21 | downscale = create_op( 22 | func=tf.image.resize_images, 23 | images=tf.placeholder(tf.float32, [None, None, None]), 24 | size=tf.placeholder(tf.int32, [2]), 25 | method=tf.image.ResizeMethod.AREA, 26 | ) 27 | 28 | upscale = create_op( 29 | func=tf.image.resize_images, 30 | images=tf.placeholder(tf.float32, [None, None, None]), 31 | size=tf.placeholder(tf.int32, [2]), 32 | method=tf.image.ResizeMethod.BICUBIC, 33 | ) 34 | 35 | decode_jpeg = create_op( 36 | func=tf.image.decode_jpeg, 37 | contents=tf.placeholder(tf.string), 38 | ) 39 | 40 | decode_png = create_op( 41 | func=tf.image.decode_png, 42 | contents=tf.placeholder(tf.string), 43 | ) 44 | 45 | rgb_to_grayscale = create_op( 46 | func=tf.image.rgb_to_grayscale, 47 | images=tf.placeholder(tf.float32), 48 | ) 49 | 50 | grayscale_to_rgb = create_op( 51 | func=tf.image.grayscale_to_rgb, 52 | images=tf.placeholder(tf.float32), 53 | ) 54 | 55 | encode_jpeg = create_op( 56 | func=tf.image.encode_jpeg, 57 | image=tf.placeholder(tf.uint8), 58 | ) 59 | 60 | encode_png = create_op( 61 | func=tf.image.encode_png, 62 | image=tf.placeholder(tf.uint8), 63 | ) 64 | 65 | crop = create_op( 66 | func=tf.image.crop_to_bounding_box, 67 | image=tf.placeholder(tf.float32), 68 | offset_height=tf.placeholder(tf.int32, []), 69 | offset_width=tf.placeholder(tf.int32, []), 70 | target_height=tf.placeholder(tf.int32, []), 71 | target_width=tf.placeholder(tf.int32, []), 72 | ) 73 | 74 | pad = create_op( 75 | func=tf.image.pad_to_bounding_box, 76 | image=tf.placeholder(tf.float32), 77 | offset_height=tf.placeholder(tf.int32, []), 78 | offset_width=tf.placeholder(tf.int32, []), 79 | target_height=tf.placeholder(tf.int32, []), 80 | target_width=tf.placeholder(tf.int32, []), 81 | ) 82 | 83 | to_uint8 = create_op( 84 | func=tf.image.convert_image_dtype, 85 | image=tf.placeholder(tf.float32), 86 | dtype=tf.uint8, 87 | saturate=True, 88 | ) 89 | 90 | to_float32 = create_op( 91 | func=tf.image.convert_image_dtype, 92 | image=tf.placeholder(tf.uint8), 93 | dtype=tf.float32, 94 | ) 95 | 96 | 97 | def load(path): 98 | with open(path, "rb") as f: 99 | contents = f.read() 100 | 101 | _, ext = os.path.splitext(path.lower()) 102 | 103 | if ext == ".jpg": 104 | image = decode_jpeg(contents=contents) 105 | elif ext == ".png": 106 | image = decode_png(contents=contents) 107 | else: 108 | raise Exception("invalid image suffix") 109 | 110 | return to_float32(image=image) 111 | 112 | 113 | def find(d): 114 | result = [] 115 | for filename in os.listdir(d): 116 | _, ext = os.path.splitext(filename.lower()) 117 | if ext == ".jpg" or ext == ".png": 118 | result.append(os.path.join(d, filename)) 119 | result.sort() 120 | return result 121 | 122 | 123 | def save(image, path, replace=False): 124 | _, ext = os.path.splitext(path.lower()) 125 | image = to_uint8(image=image) 126 | if ext == ".jpg": 127 | encoded = encode_jpeg(image=image) 128 | elif ext == ".png": 129 | encoded = encode_png(image=image) 130 | else: 131 | raise Exception("invalid image suffix") 132 | 133 | dirname = os.path.dirname(path) 134 | if dirname != "" and not os.path.exists(dirname): 135 | os.makedirs(dirname) 136 | 137 | if os.path.exists(path): 138 | if replace: 139 | os.remove(path) 140 | else: 141 | raise Exception("file already exists at " + path) 142 | 143 | with open(path, "wb") as f: 144 | f.write(encoded) 145 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Some codes from https://github.com/Newmu/dcgan_code 3 | """ 4 | from __future__ import division 5 | import math 6 | import json 7 | import random 8 | import pprint 9 | import scipy.misc 10 | import numpy as np 11 | from time import gmtime, strftime 12 | import os 13 | 14 | pp = pprint.PrettyPrinter() 15 | 16 | get_stddev = lambda x, k_h, k_w: 1/math.sqrt(k_w*k_h*x.get_shape()[-1]) 17 | 18 | # ----------------------------- 19 | # new added functions for pix2pix 20 | 21 | def load_data(image_path, flip=True, is_test=False): 22 | img_A, img_B = load_image(image_path) 23 | img_A, img_B = preprocess_A_and_B(img_A, img_B, flip=flip, is_test=is_test) 24 | 25 | img_A = img_A/127.5 - 1. 26 | img_B = img_B/127.5 - 1. 27 | 28 | img_AB = np.concatenate((img_A, img_B), axis=2) 29 | # img_AB shape: (fine_size, fine_size, input_c_dim + output_c_dim) 30 | return img_AB 31 | 32 | def load_data2(image_path, idx): 33 | img_A, img_B = load_image2(image_path,idx) 34 | rgb_scale = 127.5 35 | depth_scale = 32767.5 36 | img_A = img_A/depth_scale - 1. 37 | img_B = img_B/rgb_scale - 1. 38 | 39 | img_AB = np.concatenate((img_A, img_B), axis=2) 40 | # img_AB shape: (fine_size, fine_size, input_c_dim + output_c_dim) 41 | #print('img_AB shape: {}'.format(img_AB.shape)) 42 | return img_AB 43 | 44 | def load_image(image_path): 45 | input_img = imread(image_path) 46 | w = int(input_img.shape[1]) 47 | w2 = int(w/2) 48 | img_A = input_img[:, 0:w2] 49 | img_B = input_img[:, w2:w] 50 | 51 | return img_A, img_B 52 | 53 | def load_image2(image_path,idx,isLSTM=True): 54 | # assume image_path = "./train/A", and have to read from 'A' and 'B' 55 | # A is depth, B is rgb 56 | A_path = os.path.join(image_path[:-1],'A',idx) 57 | B_path = os.path.join(image_path[:-1],'B',idx) 58 | if isLSTM: 59 | # input dimension: (5,256,256,3) 60 | # output dimension: (256,256,15) 61 | img_B_original = np.load(B_path) 62 | img_B = img_B_original[0] 63 | for i in range(1,img_B_original.shape[0]): 64 | img_B = np.concatenate((img_B,img_B_original[i]),axis=2) 65 | else: 66 | img_B = np.load(B_path)[0] 67 | 68 | img_A = np.load(A_path) 69 | img_A = img_A.reshape(img_A.shape[0],img_A.shape[1],1) 70 | 71 | return img_A, img_B 72 | 73 | 74 | def preprocess_A_and_B(img_A, img_B, load_size=286, fine_size=256, flip=True, is_test=False): 75 | if is_test: 76 | img_A = scipy.misc.imresize(img_A, [fine_size, fine_size]) 77 | img_B = scipy.misc.imresize(img_B, [fine_size, fine_size]) 78 | else: 79 | img_A = scipy.misc.imresize(img_A, [load_size, load_size]) 80 | img_B = scipy.misc.imresize(img_B, [load_size, load_size]) 81 | 82 | h1 = int(np.ceil(np.random.uniform(1e-2, load_size-fine_size))) 83 | w1 = int(np.ceil(np.random.uniform(1e-2, load_size-fine_size))) 84 | img_A = img_A[h1:h1+fine_size, w1:w1+fine_size] 85 | img_B = img_B[h1:h1+fine_size, w1:w1+fine_size] 86 | 87 | if flip and np.random.random() > 0.5: 88 | img_A = np.fliplr(img_A) 89 | img_B = np.fliplr(img_B) 90 | 91 | return img_A, img_B 92 | 93 | # ----------------------------- 94 | 95 | def get_image(image_path, image_size, is_crop=True, resize_w=64, is_grayscale = False): 96 | return transform(imread(image_path, is_grayscale), image_size, is_crop, resize_w) 97 | 98 | def save_images(images, size, image_path): 99 | return imsave(inverse_transform(images), size, image_path) 100 | 101 | def imread(path, is_grayscale = False): 102 | if (is_grayscale): 103 | return scipy.misc.imread(path, flatten = True).astype(np.float) 104 | else: 105 | return scipy.misc.imread(path).astype(np.float) 106 | 107 | def merge_images(images, size): 108 | return inverse_transform(images) 109 | 110 | def merge(images, size): 111 | h, w = images.shape[1], images.shape[2] 112 | img = np.zeros((h * size[0], w * size[1], 3)) 113 | for idx, image in enumerate(images): 114 | i = idx % size[1] 115 | j = idx // size[1] 116 | img[j*h:j*h+h, i*w:i*w+w, :] = image 117 | 118 | return img 119 | 120 | def imsave(images, size, path): 121 | return scipy.misc.imsave(path, merge(images, size)) # Warning: will rescale to [0,255] 122 | # return scipy.misc.toimage(merge(images, size),cmin=0,cmax=255).save(path) # no rescale but hard to see 123 | 124 | def transform(image, npx=64, is_crop=True, resize_w=64): 125 | # npx : # of pixels width/height of image 126 | if is_crop: 127 | cropped_image = center_crop(image, npx, resize_w=resize_w) 128 | else: 129 | cropped_image = image 130 | return np.array(cropped_image)/127.5 - 1. 131 | 132 | def inverse_transform(images): 133 | return (images+1.)/2.*255 134 | 135 | 136 | -------------------------------------------------------------------------------- /ops.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | from tensorflow.python.framework import ops 6 | 7 | from utils import * 8 | 9 | class batch_norm(object): 10 | # h1 = lrelu(tf.contrib.layers.batch_norm(conv2d(h0, self.df_dim*2, name='d_h1_conv'),decay=0.9,updates_collections=None,epsilon=0.00001,scale=True,scope="d_h1_conv")) 11 | def __init__(self, epsilon=1e-5, momentum = 0.9, name="batch_norm"): 12 | with tf.variable_scope(name): 13 | self.epsilon = epsilon 14 | self.momentum = momentum 15 | self.name = name 16 | 17 | def __call__(self, x, train=True): 18 | return tf.contrib.layers.batch_norm(x, decay=self.momentum, updates_collections=None, epsilon=self.epsilon, scale=True, scope=self.name) 19 | 20 | def binary_cross_entropy(preds, targets, name=None): 21 | """Computes binary cross entropy given `preds`. 22 | 23 | For brevity, let `x = `, `z = targets`. The logistic loss is 24 | 25 | loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i])) 26 | 27 | Args: 28 | preds: A `Tensor` of type `float32` or `float64`. 29 | targets: A `Tensor` of the same type and shape as `preds`. 30 | """ 31 | eps = 1e-12 32 | with ops.op_scope([preds, targets], name, "bce_loss") as name: 33 | preds = ops.convert_to_tensor(preds, name="preds") 34 | targets = ops.convert_to_tensor(targets, name="targets") 35 | return tf.reduce_mean(-(targets * tf.log(preds + eps) + 36 | (1. - targets) * tf.log(1. - preds + eps))) 37 | 38 | def conv_cond_concat(x, y): 39 | """Concatenate conditioning vector on feature map axis.""" 40 | x_shapes = x.get_shape() 41 | y_shapes = y.get_shape() 42 | return tf.concat([x, y*tf.ones([x_shapes[0], x_shapes[1], x_shapes[2], y_shapes[3]])], 3) 43 | 44 | def conv2d(input_, output_dim, 45 | k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, 46 | name="conv2d"): 47 | with tf.variable_scope(name): 48 | w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim], 49 | initializer=tf.truncated_normal_initializer(stddev=stddev)) 50 | conv = tf.nn.conv2d(input_, w, strides=[1, d_h, d_w, 1], padding='SAME') 51 | 52 | biases = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0)) 53 | conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape()) 54 | 55 | return conv 56 | 57 | def deconv2d(input_, output_shape, 58 | k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, 59 | name="deconv2d", with_w=False): 60 | with tf.variable_scope(name): 61 | # filter : [height, width, output_channels, in_channels] 62 | w = tf.get_variable('w', [k_h, k_w, output_shape[-1], input_.get_shape()[-1]], 63 | initializer=tf.random_normal_initializer(stddev=stddev)) 64 | 65 | try: 66 | deconv = tf.nn.conv2d_transpose(input_, w, output_shape=output_shape, 67 | strides=[1, d_h, d_w, 1]) 68 | 69 | # Support for verisons of TensorFlow before 0.7.0 70 | except AttributeError: 71 | deconv = tf.nn.deconv2d(input_, w, output_shape=output_shape, 72 | strides=[1, d_h, d_w, 1]) 73 | 74 | biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0)) 75 | deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape()) 76 | 77 | if with_w: 78 | return deconv, w, biases 79 | else: 80 | return deconv 81 | 82 | def lrelu(x, leak=0.2, name="lrelu"): 83 | return tf.maximum(x, leak*x) 84 | 85 | def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False): 86 | shape = input_.get_shape().as_list() 87 | 88 | with tf.variable_scope(scope or "Linear"): 89 | matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32, 90 | tf.random_normal_initializer(stddev=stddev)) 91 | bias = tf.get_variable("bias", [output_size], 92 | initializer=tf.constant_initializer(bias_start)) 93 | if with_w: 94 | return tf.matmul(input_, matrix) + bias, matrix, bias 95 | else: 96 | return tf.matmul(input_, matrix) + bias 97 | 98 | def lstm(input_, n_hidden, keep_prob, n_dim, name="lstm"): 99 | # input (1,5,512), output (1,5,n_hidden) 100 | with tf.variable_scope(name): 101 | lstm_cell = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(n_hidden), output_keep_prob=keep_prob)for _ in range(n_dim)]); 102 | 103 | outputs,states = tf.nn.dynamic_rnn(lstm_cell,input_,dtype=tf.float32) 104 | print('lstm_output shape: {}'.format(outputs.shape)) 105 | #return outputs[:,-1,:] 106 | return outputs 107 | -------------------------------------------------------------------------------- /tools/process.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | 6 | import argparse 7 | import os 8 | import tempfile 9 | import subprocess 10 | import tensorflow as tf 11 | import numpy as np 12 | import tfimage as im 13 | import threading 14 | import time 15 | import multiprocessing 16 | 17 | edge_pool = None 18 | 19 | 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument("--input_dir", required=True, help="path to folder containing images") 22 | parser.add_argument("--output_dir", required=True, help="output path") 23 | parser.add_argument("--operation", required=True, choices=["grayscale", "resize", "blank", "combine", "edges"]) 24 | parser.add_argument("--workers", type=int, default=1, help="number of workers") 25 | # resize 26 | parser.add_argument("--pad", action="store_true", help="pad instead of crop for resize operation") 27 | parser.add_argument("--size", type=int, default=256, help="size to use for resize operation") 28 | # combine 29 | parser.add_argument("--b_dir", type=str, help="path to folder containing B images for combine operation") 30 | a = parser.parse_args() 31 | 32 | 33 | def resize(src): 34 | height, width, _ = src.shape 35 | dst = src 36 | if height != width: 37 | if a.pad: 38 | size = max(height, width) 39 | # pad to correct ratio 40 | oh = (size - height) // 2 41 | ow = (size - width) // 2 42 | dst = im.pad(image=dst, offset_height=oh, offset_width=ow, target_height=size, target_width=size) 43 | else: 44 | # crop to correct ratio 45 | size = min(height, width) 46 | oh = (height - size) // 2 47 | ow = (width - size) // 2 48 | dst = im.crop(image=dst, offset_height=oh, offset_width=ow, target_height=size, target_width=size) 49 | 50 | assert(dst.shape[0] == dst.shape[1]) 51 | 52 | size, _, _ = dst.shape 53 | if size > a.size: 54 | dst = im.downscale(images=dst, size=[a.size, a.size]) 55 | elif size < a.size: 56 | dst = im.upscale(images=dst, size=[a.size, a.size]) 57 | return dst 58 | 59 | 60 | def blank(src): 61 | height, width, _ = src.shape 62 | if height != width: 63 | raise Exception("non-square image") 64 | 65 | image_size = width 66 | size = int(image_size * 0.3) 67 | offset = int(image_size / 2 - size / 2) 68 | 69 | dst = src 70 | dst[offset:offset + size,offset:offset + size,:] = np.ones([size, size, 3]) 71 | return dst 72 | 73 | 74 | def combine(src, src_path): 75 | if a.b_dir is None: 76 | raise Exception("missing b_dir") 77 | 78 | # find corresponding file in b_dir, could have a different extension 79 | basename, _ = os.path.splitext(os.path.basename(src_path)) 80 | for ext in [".png", ".jpg"]: 81 | sibling_path = os.path.join(a.b_dir, basename + ext) 82 | if os.path.exists(sibling_path): 83 | sibling = im.load(sibling_path) 84 | break 85 | else: 86 | raise Exception("could not find sibling image for " + src_path) 87 | 88 | # make sure that dimensions are correct 89 | height, width, _ = src.shape 90 | if height != sibling.shape[0] or width != sibling.shape[1]: 91 | raise Exception("differing sizes") 92 | 93 | # convert both images to RGB if necessary 94 | if src.shape[2] == 1: 95 | src = im.grayscale_to_rgb(images=src) 96 | 97 | if sibling.shape[2] == 1: 98 | sibling = im.grayscale_to_rgb(images=sibling) 99 | 100 | # remove alpha channel 101 | if src.shape[2] == 4: 102 | src = src[:,:,:3] 103 | 104 | if sibling.shape[2] == 4: 105 | sibling = sibling[:,:,:3] 106 | 107 | return np.concatenate([src, sibling], axis=1) 108 | 109 | 110 | def grayscale(src): 111 | return im.grayscale_to_rgb(images=im.rgb_to_grayscale(images=src)) 112 | 113 | 114 | net = None 115 | def run_caffe(src): 116 | # lazy load caffe and create net 117 | global net 118 | if net is None: 119 | # don't require caffe unless we are doing edge detection 120 | os.environ["GLOG_minloglevel"] = "2" # disable logging from caffe 121 | import caffe 122 | # using this requires using the docker image or assembling a bunch of dependencies 123 | # and then changing these hardcoded paths 124 | net = caffe.Net("/opt/caffe/examples/hed/deploy.prototxt", "/opt/caffe/hed_pretrained_bsds.caffemodel", caffe.TEST) 125 | 126 | net.blobs["data"].reshape(1, *src.shape) 127 | net.blobs["data"].data[...] = src 128 | net.forward() 129 | return net.blobs["sigmoid-fuse"].data[0][0,:,:] 130 | 131 | 132 | def edges(src): 133 | # based on https://github.com/phillipi/pix2pix/blob/master/scripts/edges/batch_hed.py 134 | # and https://github.com/phillipi/pix2pix/blob/master/scripts/edges/PostprocessHED.m 135 | import scipy.io 136 | src = src * 255 137 | border = 128 # put a padding around images since edge detection seems to detect edge of image 138 | src = src[:,:,:3] # remove alpha channel if present 139 | src = np.pad(src, ((border, border), (border, border), (0,0)), "reflect") 140 | src = src[:,:,::-1] 141 | src -= np.array((104.00698793,116.66876762,122.67891434)) 142 | src = src.transpose((2, 0, 1)) 143 | 144 | # [height, width, channels] => [batch, channel, height, width] 145 | fuse = edge_pool.apply(run_caffe, [src]) 146 | fuse = fuse[border:-border, border:-border] 147 | 148 | with tempfile.NamedTemporaryFile(suffix=".png") as png_file, tempfile.NamedTemporaryFile(suffix=".mat") as mat_file: 149 | scipy.io.savemat(mat_file.name, {"input": fuse}) 150 | 151 | octave_code = r""" 152 | E = 1-load(input_path).input; 153 | E = imresize(E, [image_width,image_width]); 154 | E = 1 - E; 155 | E = single(E); 156 | [Ox, Oy] = gradient(convTri(E, 4), 1); 157 | [Oxx, ~] = gradient(Ox, 1); 158 | [Oxy, Oyy] = gradient(Oy, 1); 159 | O = mod(atan(Oyy .* sign(-Oxy) ./ (Oxx + 1e-5)), pi); 160 | E = edgesNmsMex(E, O, 1, 5, 1.01, 1); 161 | E = double(E >= max(eps, threshold)); 162 | E = bwmorph(E, 'thin', inf); 163 | E = bwareaopen(E, small_edge); 164 | E = 1 - E; 165 | E = uint8(E * 255); 166 | imwrite(E, output_path); 167 | """ 168 | 169 | config = dict( 170 | input_path="'%s'" % mat_file.name, 171 | output_path="'%s'" % png_file.name, 172 | image_width=256, 173 | threshold=25.0/255.0, 174 | small_edge=5, 175 | ) 176 | 177 | args = ["octave"] 178 | for k, v in config.items(): 179 | args.extend(["--eval", "%s=%s;" % (k, v)]) 180 | 181 | args.extend(["--eval", octave_code]) 182 | try: 183 | subprocess.check_output(args, stderr=subprocess.STDOUT) 184 | except subprocess.CalledProcessError as e: 185 | print("octave failed") 186 | print("returncode:", e.returncode) 187 | print("output:", e.output) 188 | raise 189 | return im.load(png_file.name) 190 | 191 | 192 | def process(src_path, dst_path): 193 | src = im.load(src_path) 194 | 195 | if a.operation == "grayscale": 196 | dst = grayscale(src) 197 | elif a.operation == "resize": 198 | dst = resize(src) 199 | elif a.operation == "blank": 200 | dst = blank(src) 201 | elif a.operation == "combine": 202 | dst = combine(src, src_path) 203 | elif a.operation == "edges": 204 | dst = edges(src) 205 | else: 206 | raise Exception("invalid operation") 207 | 208 | im.save(dst, dst_path) 209 | 210 | 211 | complete_lock = threading.Lock() 212 | start = None 213 | num_complete = 0 214 | total = 0 215 | 216 | def complete(): 217 | global num_complete, rate, last_complete 218 | 219 | with complete_lock: 220 | num_complete += 1 221 | now = time.time() 222 | elapsed = now - start 223 | rate = num_complete / elapsed 224 | if rate > 0: 225 | remaining = (total - num_complete) / rate 226 | else: 227 | remaining = 0 228 | 229 | print("%d/%d complete %0.2f images/sec %dm%ds elapsed %dm%ds remaining" % (num_complete, total, rate, elapsed // 60, elapsed % 60, remaining // 60, remaining % 60)) 230 | 231 | last_complete = now 232 | 233 | 234 | def main(): 235 | if not os.path.exists(a.output_dir): 236 | os.makedirs(a.output_dir) 237 | 238 | src_paths = [] 239 | dst_paths = [] 240 | 241 | skipped = 0 242 | for src_path in im.find(a.input_dir): 243 | name, _ = os.path.splitext(os.path.basename(src_path)) 244 | dst_path = os.path.join(a.output_dir, name + ".png") 245 | if os.path.exists(dst_path): 246 | skipped += 1 247 | else: 248 | src_paths.append(src_path) 249 | dst_paths.append(dst_path) 250 | 251 | print("skipping %d files that already exist" % skipped) 252 | 253 | global total 254 | total = len(src_paths) 255 | 256 | print("processing %d files" % total) 257 | 258 | global start 259 | start = time.time() 260 | 261 | if a.operation == "edges": 262 | # use a multiprocessing pool for this operation so it can use multiple CPUs 263 | # create the pool before we launch processing threads 264 | global edge_pool 265 | edge_pool = multiprocessing.Pool(a.workers) 266 | 267 | if a.workers == 1: 268 | with tf.Session() as sess: 269 | for src_path, dst_path in zip(src_paths, dst_paths): 270 | process(src_path, dst_path) 271 | complete() 272 | else: 273 | queue = tf.train.input_producer(zip(src_paths, dst_paths), shuffle=False, num_epochs=1) 274 | dequeue_op = queue.dequeue() 275 | 276 | def worker(coord): 277 | with sess.as_default(): 278 | while not coord.should_stop(): 279 | try: 280 | src_path, dst_path = sess.run(dequeue_op) 281 | except tf.errors.OutOfRangeError: 282 | coord.request_stop() 283 | break 284 | 285 | process(src_path, dst_path) 286 | complete() 287 | 288 | # init epoch counter for the queue 289 | local_init_op = tf.local_variables_initializer() 290 | with tf.Session() as sess: 291 | sess.run(local_init_op) 292 | 293 | coord = tf.train.Coordinator() 294 | threads = tf.train.start_queue_runners(coord=coord) 295 | for i in range(a.workers): 296 | t = threading.Thread(target=worker, args=(coord,)) 297 | t.start() 298 | threads.append(t) 299 | 300 | try: 301 | coord.join(threads) 302 | except KeyboardInterrupt: 303 | coord.request_stop() 304 | coord.join(threads) 305 | 306 | main() 307 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import time 4 | from glob import glob 5 | import tensorflow as tf 6 | import numpy as np 7 | from six.moves import xrange 8 | import shutil 9 | 10 | from ops import * 11 | from utils import * 12 | 13 | os.environ["CUDA_VISIBLE_DEVICES"]="-1" 14 | 15 | class pix2pix(object): 16 | def __init__(self, sess, image_size=256, 17 | batch_size=1, sample_size=1, output_size=256, 18 | gf_dim=64, df_dim=64, L1_lambda=100, 19 | input_c_dim=15, output_c_dim=1, dataset_name='facades', 20 | checkpoint_dir=None, sample_dir=None): 21 | """ 22 | 23 | Args: 24 | sess: TensorFlow session 25 | batch_size: The size of batch. Should be specified before training. 26 | output_size: (optional) The resolution in pixels of the images. [256] 27 | gf_dim: (optional) Dimension of gen filters in first conv layer. [64] 28 | df_dim: (optional) Dimension of discrim filters in first conv layer. [64] 29 | input_c_dim: (optional) Dimension of input image color. For grayscale input, set to 1. [3] 30 | output_c_dim: (optional) Dimension of output image color. For grayscale input, set to 1. [3] 31 | """ 32 | self.sess = sess 33 | self.is_grayscale = (input_c_dim == 1) 34 | self.batch_size = batch_size 35 | self.image_size = image_size 36 | self.sample_size = sample_size 37 | self.output_size = output_size 38 | 39 | self.gf_dim = gf_dim 40 | self.df_dim = df_dim 41 | 42 | self.input_c_dim = input_c_dim 43 | self.output_c_dim = output_c_dim 44 | 45 | self.L1_lambda = L1_lambda 46 | 47 | # lstm variables 48 | self.n_hidden_lstm = 1024 49 | self.num_layer_lstm = 2 50 | self.keep_prob_lstm = 0.5 51 | 52 | # batch normalization : deals with poor initialization helps gradient flow 53 | self.d_bn1 = batch_norm(name='d_bn1') 54 | self.d_bn2 = batch_norm(name='d_bn2') 55 | self.d_bn3 = batch_norm(name='d_bn3') 56 | 57 | self.g_bn_e2 = batch_norm(name='g_bn_e2') 58 | self.g_bn_e3 = batch_norm(name='g_bn_e3') 59 | self.g_bn_e4 = batch_norm(name='g_bn_e4') 60 | self.g_bn_e5 = batch_norm(name='g_bn_e5') 61 | self.g_bn_e6 = batch_norm(name='g_bn_e6') 62 | self.g_bn_e7 = batch_norm(name='g_bn_e7') 63 | self.g_bn_e8 = batch_norm(name='g_bn_e8') 64 | 65 | self.g_bn_d1 = batch_norm(name='g_bn_d1') 66 | self.g_bn_d2 = batch_norm(name='g_bn_d2') 67 | self.g_bn_d3 = batch_norm(name='g_bn_d3') 68 | self.g_bn_d4 = batch_norm(name='g_bn_d4') 69 | self.g_bn_d5 = batch_norm(name='g_bn_d5') 70 | self.g_bn_d6 = batch_norm(name='g_bn_d6') 71 | self.g_bn_d7 = batch_norm(name='g_bn_d7') 72 | 73 | self.dataset_name = dataset_name 74 | self.checkpoint_dir = checkpoint_dir 75 | self.build_model() 76 | 77 | def build_model(self): 78 | self.real_data = tf.placeholder(tf.float32, 79 | [self.batch_size, self.image_size, self.image_size, 80 | self.input_c_dim + self.output_c_dim], 81 | name='real_A_and_B_images') 82 | #self.real_B = self.real_data[:, :, :, :self.input_c_dim] 83 | #self.real_A = self.real_data[:, :, :, self.input_c_dim:self.input_c_dim + self.output_c_dim] 84 | self.real_B = self.real_data[:, :, :, :self.output_c_dim] 85 | self.real_A = self.real_data[:, :, :, self.output_c_dim:self.input_c_dim + self.output_c_dim] 86 | mask = tf.cast(self.real_B>-1,tf.int32) # find all valid pixels 87 | #print(self.real_B) 88 | self.fake_B = self.generator(self.real_A) 89 | print(self.fake_B) 90 | self.real_AB = tf.concat([self.real_A, self.real_B], 3) 91 | print(self.real_AB) 92 | self.fake_AB = tf.concat([self.real_A, self.fake_B], 3) 93 | print(self.fake_AB) 94 | self.D, self.D_logits = self.discriminator(self.real_AB, reuse=False) 95 | self.D_, self.D_logits_ = self.discriminator(self.fake_AB, reuse=True) 96 | bad_diff, good_diff = tf.dynamic_partition(self.real_B - self.fake_B,mask,2) 97 | self.fake_B_sample = self.sampler(self.real_A) 98 | 99 | self.d_sum = tf.summary.histogram("d", self.D) 100 | self.d__sum = tf.summary.histogram("d_", self.D_) 101 | self.fake_B_sum = tf.summary.image("fake_B", self.fake_B) 102 | 103 | self.d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_logits, labels=tf.scalar_mul(0.9,tf.ones_like(self.D)))) 104 | self.d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_logits_, labels=tf.zeros_like(self.D_))) 105 | self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_logits_, labels=tf.ones_like(self.D_))) \ 106 | + self.L1_lambda * tf.reduce_mean(tf.abs(good_diff)) 107 | 108 | self.d_loss_real_sum = tf.summary.scalar("d_loss_real", self.d_loss_real) 109 | self.d_loss_fake_sum = tf.summary.scalar("d_loss_fake", self.d_loss_fake) 110 | 111 | self.d_loss = self.d_loss_real + self.d_loss_fake 112 | 113 | self.g_loss_sum = tf.summary.scalar("g_loss", self.g_loss) 114 | self.d_loss_sum = tf.summary.scalar("d_loss", self.d_loss) 115 | 116 | t_vars = tf.trainable_variables() 117 | 118 | self.d_vars = [var for var in t_vars if 'd_' in var.name] 119 | self.g_vars = [var for var in t_vars if 'g_' in var.name] 120 | 121 | self.saver = tf.train.Saver(max_to_keep=10) 122 | 123 | 124 | def load_random_samples(self): 125 | #data = np.random.choice(glob('./datasets/{}/val/*.jpg'.format(self.dataset_name)), self.batch_size) 126 | valPath = os.path.join(os.getcwd(),'datasets',str(self.dataset_name),'val','A') 127 | data = np.random.choice([name for name in os.listdir(valPath)], self.batch_size) 128 | 129 | sample = [load_data2(valPath,sample_file) for sample_file in data] 130 | 131 | if (self.is_grayscale): 132 | sample_images = np.array(sample).astype(np.float32)[:, :, :, None] 133 | else: 134 | sample_images = np.array(sample).astype(np.float32) 135 | return sample_images 136 | 137 | def sample_model(self, sample_dir, epoch, idx): 138 | sample_images = self.load_random_samples() 139 | samples, d_loss, g_loss = self.sess.run( 140 | [self.fake_B_sample, self.d_loss, self.g_loss], 141 | feed_dict={self.real_data: sample_images} 142 | ) 143 | save_images(samples, [self.batch_size, 1], 144 | './{}/train_{:02d}_{:04d}.png'.format(sample_dir, epoch, idx)) 145 | print("[Sample] d_loss: {:.8f}, g_loss: {:.8f}".format(d_loss, g_loss)) 146 | 147 | def train(self, args): 148 | """Train pix2pix""" 149 | d_optim = tf.train.AdamOptimizer(args.lr, beta1=args.beta1) \ 150 | .minimize(self.d_loss, var_list=self.d_vars) 151 | g_optim = tf.train.AdamOptimizer(args.lr, beta1=args.beta1) \ 152 | .minimize(self.g_loss, var_list=self.g_vars) 153 | 154 | init_op = tf.global_variables_initializer() 155 | self.sess.run(init_op) 156 | 157 | self.g_sum = tf.summary.merge([self.d__sum, 158 | self.fake_B_sum, self.d_loss_fake_sum, self.g_loss_sum]) 159 | self.d_sum = tf.summary.merge([self.d_sum, self.d_loss_real_sum, self.d_loss_sum]) 160 | self.writer = tf.summary.FileWriter("./logs", self.sess.graph) 161 | 162 | counter = 1 163 | start_time = time.time() 164 | 165 | if self.load(self.checkpoint_dir): 166 | print(" [*] Load SUCCESS") 167 | else: 168 | print(" [!] Load failed...") 169 | 170 | for epoch in xrange(args.epoch): 171 | #data = glob('./datasets/{}/train/*.jpg'.format(self.dataset_name)) 172 | trainPath = os.path.join(os.getcwd(),'datasets',str(self.dataset_name),'train','A') # choose either A or B is fine 173 | data = [name for name in os.listdir(trainPath)] 174 | #np.random.shuffle(data) 175 | batch_idxs = min(len(data), args.train_size) // self.batch_size 176 | 177 | for idx in xrange(0, batch_idxs): 178 | batch_files = data[idx*self.batch_size:(idx+1)*self.batch_size] 179 | batch = [load_data2(trainPath,batch_file) for batch_file in batch_files] 180 | if (self.is_grayscale): 181 | batch_images = np.array(batch).astype(np.float32)[:, :, :, None] 182 | else: 183 | batch_images = np.array(batch).astype(np.float32) 184 | 185 | # Update D network 186 | _, summary_str = self.sess.run([d_optim, self.d_sum], 187 | feed_dict={ self.real_data: batch_images }) 188 | self.writer.add_summary(summary_str, counter) 189 | 190 | # Update G network 191 | _, summary_str = self.sess.run([g_optim, self.g_sum], 192 | feed_dict={ self.real_data: batch_images }) 193 | self.writer.add_summary(summary_str, counter) 194 | 195 | # Run g_optim twice to make sure that d_loss does not go to zero (different from paper) 196 | _, summary_str = self.sess.run([g_optim, self.g_sum], 197 | feed_dict={ self.real_data: batch_images }) 198 | self.writer.add_summary(summary_str, counter) 199 | 200 | errD_fake = self.d_loss_fake.eval({self.real_data: batch_images}) 201 | errD_real = self.d_loss_real.eval({self.real_data: batch_images}) 202 | errG = self.g_loss.eval({self.real_data: batch_images}) 203 | 204 | counter += 1 205 | print("Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" \ 206 | % (epoch, idx, batch_idxs, 207 | time.time() - start_time, errD_fake+errD_real, errG)) 208 | 209 | if np.mod(counter, 100) == 1: 210 | self.sample_model(args.sample_dir, epoch, idx) 211 | 212 | if np.mod(counter, 5000) == 2: 213 | self.save(args.checkpoint_dir, counter) 214 | 215 | def discriminator(self, image, y=None, reuse=False): 216 | 217 | with tf.variable_scope("discriminator") as scope: 218 | 219 | # image is 256 x 256 x (input_c_dim + output_c_dim) 220 | if reuse: 221 | tf.get_variable_scope().reuse_variables() 222 | else: 223 | assert tf.get_variable_scope().reuse == False 224 | 225 | h0 = lrelu(conv2d(image, self.df_dim, name='d_h0_conv')) 226 | # h0 is (128 x 128 x self.df_dim) 227 | h1 = lrelu(self.d_bn1(conv2d(h0, self.df_dim*2, name='d_h1_conv'))) 228 | # h1 is (64 x 64 x self.df_dim*2) 229 | h2 = lrelu(self.d_bn2(conv2d(h1, self.df_dim*4, name='d_h2_conv'))) 230 | # h2 is (32x 32 x self.df_dim*4) 231 | h3 = lrelu(self.d_bn3(conv2d(h2, self.df_dim*8, d_h=1, d_w=1, name='d_h3_conv'))) 232 | # h3 is (16 x 16 x self.df_dim*8) 233 | h4 = linear(tf.reshape(h3, [self.batch_size, -1]), 1, 'd_h3_lin') 234 | 235 | return tf.nn.sigmoid(h4), h4 236 | 237 | def generator(self, image, y=None): 238 | with tf.variable_scope("generator") as scope: 239 | 240 | s = self.output_size 241 | s2, s4, s8, s16, s32, s64, s128 = int(s/2), int(s/4), int(s/8), int(s/16), int(s/32), int(s/64), int(s/128) 242 | 243 | # LSTM encoder 244 | reuse = False 245 | lstm_input = [] 246 | N = image.shape[-1]//3 247 | with tf.variable_scope('LSTM_scope'): 248 | for i in range(N): 249 | img = image[:,:,:,i*3:(i+1)*3] 250 | if reuse: 251 | tf.get_variable_scope().reuse_variables() 252 | 253 | print('img_unstacked shape: {}'.format(image.shape)) 254 | # image is (256 x 256 x input_c_dim) 255 | e1 = conv2d(img, self.gf_dim, name='g_e1_conv') 256 | # e1 is (128 x 128 x self.gf_dim) 257 | e2 = self.g_bn_e2(conv2d(lrelu(e1), self.gf_dim*2, name='g_e2_conv')) 258 | # e2 is (64 x 64 x self.gf_dim*2) 259 | e3 = self.g_bn_e3(conv2d(lrelu(e2), self.gf_dim*4, name='g_e3_conv')) 260 | # e3 is (32 x 32 x self.gf_dim*4) 261 | e4 = self.g_bn_e4(conv2d(lrelu(e3), self.gf_dim*8, name='g_e4_conv')) 262 | # e4 is (16 x 16 x self.gf_dim*8) 263 | e5 = self.g_bn_e5(conv2d(lrelu(e4), self.gf_dim*8, name='g_e5_conv')) 264 | # e5 is (8 x 8 x self.gf_dim*8) 265 | e6 = self.g_bn_e6(conv2d(lrelu(e5), self.gf_dim*8, name='g_e6_conv')) 266 | # e6 is (4 x 4 x self.gf_dim*8) 267 | e7 = self.g_bn_e7(conv2d(lrelu(e6), self.gf_dim*8, name='g_e7_conv')) 268 | # e7 is (2 x 2 x self.gf_dim*8) 269 | e8 = self.g_bn_e8(conv2d(lrelu(e7), self.gf_dim*8, name='g_e8_conv')) 270 | # e8 is (1 x 1 x self.gf_dim*8) 271 | 272 | if i==0 : 273 | e1_0 = tf.identity(e1) 274 | e2_0 = tf.identity(e2) 275 | e3_0 = tf.identity(e3) 276 | e4_0 = tf.identity(e4) 277 | e5_0 = tf.identity(e5) 278 | e6_0 = tf.identity(e6) 279 | e7_0 = tf.identity(e7) 280 | e8_0 = tf.identity(e8) 281 | lstm_input.append(e8) 282 | reuse = True # reuse variable after first iteration 283 | 284 | # input to lstm cell 285 | # todo: change shape of lstm_input to (1,5,512)? 286 | # initial: [(1,1,1,512)]*5 -> concate->(1,5,512) 287 | lstm_input_final = tf.reshape(lstm_input[-1],[1,1,512]) 288 | print('lstm_input_final before: {}'.format(lstm_input_final.shape)) 289 | for i in range(1,N): 290 | lstm_input_final = tf.concat((lstm_input_final,tf.reshape(lstm_input[i],[1,1,512])),axis =1) 291 | print('lstm_input_final after: {}'.format(lstm_input_final.shape)) 292 | 293 | output_lstm = lstm(lstm_input_final,self.n_hidden_lstm,self.keep_prob_lstm, self.num_layer_lstm, name='g_lstm') 294 | output = output_lstm[:, 0,:] + output_lstm[:, 1, :] + output_lstm[:, 2, :] + output_lstm[:, 3, :] + output_lstm[:, 4, :] 295 | print('lstm_output sum up: {}'.format(output.shape)) 296 | 297 | w_output = tf.Variable(tf.truncated_normal([self.n_hidden_lstm,self.gf_dim*8]),name="g_w") 298 | b_output = tf.Variable(tf.zeros(self.gf_dim*8),name="g_b") 299 | output = tf.matmul(output,w_output) + b_output 300 | print('output after matmul: {}'.format(output)) 301 | 302 | output = tf.reshape(output,[-1,1,1,512]) 303 | print('output after reshape: {}'.format(output)) 304 | 305 | self.d1, self.d1_w, self.d1_b = deconv2d(tf.nn.relu(output), 306 | [self.batch_size, s128, s128, self.gf_dim*8], name='g_d1', with_w=True) 307 | d1 = tf.nn.dropout(self.g_bn_d1(self.d1), 0.5) 308 | d1 = tf.concat([d1, e7_0], 3) 309 | # d1 is (2 x 2 x self.gf_dim*8*2) 310 | 311 | self.d2, self.d2_w, self.d2_b = deconv2d(tf.nn.relu(d1), 312 | [self.batch_size, s64, s64, self.gf_dim*8], name='g_d2', with_w=True) 313 | d2 = tf.nn.dropout(self.g_bn_d2(self.d2), 0.5) 314 | d2 = tf.concat([d2, e6_0], 3) 315 | # d2 is (4 x 4 x self.gf_dim*8*2) 316 | 317 | self.d3, self.d3_w, self.d3_b = deconv2d(tf.nn.relu(d2), 318 | [self.batch_size, s32, s32, self.gf_dim*8], name='g_d3', with_w=True) 319 | d3 = tf.nn.dropout(self.g_bn_d3(self.d3), 0.5) 320 | d3 = tf.concat([d3, e5_0], 3) 321 | # d3 is (8 x 8 x self.gf_dim*8*2) 322 | 323 | self.d4, self.d4_w, self.d4_b = deconv2d(tf.nn.relu(d3), 324 | [self.batch_size, s16, s16, self.gf_dim*8], name='g_d4', with_w=True) 325 | d4 = self.g_bn_d4(self.d4) 326 | d4 = tf.concat([d4, e4_0], 3) 327 | # d4 is (16 x 16 x self.gf_dim*8*2) 328 | 329 | self.d5, self.d5_w, self.d5_b = deconv2d(tf.nn.relu(d4), 330 | [self.batch_size, s8, s8, self.gf_dim*4], name='g_d5', with_w=True) 331 | d5 = self.g_bn_d5(self.d5) 332 | d5 = tf.concat([d5, e3_0], 3) 333 | # d5 is (32 x 32 x self.gf_dim*4*2) 334 | 335 | self.d6, self.d6_w, self.d6_b = deconv2d(tf.nn.relu(d5), 336 | [self.batch_size, s4, s4, self.gf_dim*2], name='g_d6', with_w=True) 337 | d6 = self.g_bn_d6(self.d6) 338 | d6 = tf.concat([d6, e2_0], 3) 339 | # d6 is (64 x 64 x self.gf_dim*2*2) 340 | 341 | self.d7, self.d7_w, self.d7_b = deconv2d(tf.nn.relu(d6), 342 | [self.batch_size, s2, s2, self.gf_dim], name='g_d7', with_w=True) 343 | d7 = self.g_bn_d7(self.d7) 344 | d7 = tf.concat([d7, e1_0], 3) 345 | # d7 is (128 x 128 x self.gf_dim*1*2) 346 | 347 | self.d8, self.d8_w, self.d8_b = deconv2d(tf.nn.relu(d7), 348 | [self.batch_size, s, s, self.output_c_dim], name='g_d8', with_w=True) 349 | # d8 is (256 x 256 x output_c_dim) 350 | 351 | return tf.nn.tanh(self.d8) 352 | 353 | def sampler(self, image, y=None): 354 | 355 | with tf.variable_scope("generator") as scope: 356 | scope.reuse_variables() 357 | 358 | s = self.output_size 359 | s2, s4, s8, s16, s32, s64, s128 = int(s/2), int(s/4), int(s/8), int(s/16), int(s/32), int(s/64), int(s/128) 360 | 361 | # LSTM encoder 362 | reuse = False 363 | lstm_input = [] 364 | N = image.shape[-1]//3 365 | 366 | with tf.variable_scope('LSTM_scope'): 367 | for i in range(N): 368 | img = image[:,:,:,i*3:(i+1)*3] 369 | if reuse: 370 | tf.get_variable_scope().reuse_variables() 371 | 372 | #print('img_unstacked shape: {}'.format(image.shape)) 373 | # image is (256 x 256 x input_c_dim) 374 | e1 = conv2d(img, self.gf_dim, name='g_e1_conv') 375 | # e1 is (128 x 128 x self.gf_dim) 376 | e2 = self.g_bn_e2(conv2d(lrelu(e1), self.gf_dim*2, name='g_e2_conv')) 377 | # e2 is (64 x 64 x self.gf_dim*2) 378 | e3 = self.g_bn_e3(conv2d(lrelu(e2), self.gf_dim*4, name='g_e3_conv')) 379 | # e3 is (32 x 32 x self.gf_dim*4) 380 | e4 = self.g_bn_e4(conv2d(lrelu(e3), self.gf_dim*8, name='g_e4_conv')) 381 | # e4 is (16 x 16 x self.gf_dim*8) 382 | e5 = self.g_bn_e5(conv2d(lrelu(e4), self.gf_dim*8, name='g_e5_conv')) 383 | # e5 is (8 x 8 x self.gf_dim*8) 384 | e6 = self.g_bn_e6(conv2d(lrelu(e5), self.gf_dim*8, name='g_e6_conv')) 385 | # e6 is (4 x 4 x self.gf_dim*8) 386 | e7 = self.g_bn_e7(conv2d(lrelu(e6), self.gf_dim*8, name='g_e7_conv')) 387 | # e7 is (2 x 2 x self.gf_dim*8) 388 | e8 = self.g_bn_e8(conv2d(lrelu(e7), self.gf_dim*8, name='g_e8_conv')) 389 | # e8 is (1 x 1 x self.gf_dim*8) 390 | if i==0 : 391 | e1_0 = tf.identity(e1) 392 | e2_0 = tf.identity(e2) 393 | e3_0 = tf.identity(e3) 394 | e4_0 = tf.identity(e4) 395 | e5_0 = tf.identity(e5) 396 | e6_0 = tf.identity(e6) 397 | e7_0 = tf.identity(e7) 398 | e8_0 = tf.identity(e8) 399 | lstm_input.append(e8) 400 | reuse = True # reuse variable after first iteration 401 | 402 | # input to lstm cell 403 | # todo: change shape of lstm_input to (1,5,512)? 404 | # initial: [(1,1,1,512)]*5 -> concate->(1,5,512) 405 | lstm_input_final = tf.reshape(lstm_input[-1],[1,1,512]) 406 | #print('lstm_input_final before: {}'.format(lstm_input_final.shape)) 407 | for i in range(1,N): 408 | lstm_input_final = tf.concat((lstm_input_final,tf.reshape(lstm_input[i],[1,1,512])),axis =1) 409 | #print('lstm_input_final after: {}'.format(lstm_input_final.shape)) 410 | 411 | output_lstm = lstm(lstm_input_final,self.n_hidden_lstm,self.keep_prob_lstm, self.num_layer_lstm, name='g_lstm') 412 | output = output_lstm[:, 0,:] + output_lstm[:, 1, :] + output_lstm[:, 2, :] + output_lstm[:, 3, :] + output_lstm[:, 4, :] 413 | print('lstm_output sum up: {}'.format(output.shape)) 414 | print('creating cell LSTM in sampler') 415 | 416 | w_output = tf.Variable(tf.truncated_normal([self.n_hidden_lstm, self.gf_dim * 8]), name="g_w") 417 | b_output = tf.Variable(tf.zeros(self.gf_dim * 8), name="g_b") 418 | output = tf.matmul(output, w_output) + b_output 419 | print('output after lstm matmul: {}'.format(output)) 420 | 421 | output = tf.reshape(output, [-1, 1, 1, 512]) 422 | print('output after reshape: {}'.format(output)) 423 | 424 | self.d1, self.d1_w, self.d1_b = deconv2d(tf.nn.relu(output), 425 | [self.batch_size, s128, s128, self.gf_dim*8], name='g_d1', with_w=True) 426 | d1 = tf.nn.dropout(self.g_bn_d1(self.d1), 0.5) 427 | d1 = tf.concat([d1, e7_0], 3) 428 | # d1 is (2 x 2 x self.gf_dim*8*2) 429 | 430 | self.d2, self.d2_w, self.d2_b = deconv2d(tf.nn.relu(d1), 431 | [self.batch_size, s64, s64, self.gf_dim*8], name='g_d2', with_w=True) 432 | d2 = tf.nn.dropout(self.g_bn_d2(self.d2), 0.5) 433 | d2 = tf.concat([d2, e6_0], 3) 434 | # d2 is (4 x 4 x self.gf_dim*8*2) 435 | 436 | self.d3, self.d3_w, self.d3_b = deconv2d(tf.nn.relu(d2), 437 | [self.batch_size, s32, s32, self.gf_dim*8], name='g_d3', with_w=True) 438 | d3 = tf.nn.dropout(self.g_bn_d3(self.d3), 0.5) 439 | d3 = tf.concat([d3, e5_0], 3) 440 | # d3 is (8 x 8 x self.gf_dim*8*2) 441 | 442 | self.d4, self.d4_w, self.d4_b = deconv2d(tf.nn.relu(d3), 443 | [self.batch_size, s16, s16, self.gf_dim*8], name='g_d4', with_w=True) 444 | d4 = self.g_bn_d4(self.d4) 445 | d4 = tf.concat([d4, e4_0], 3) 446 | # d4 is (16 x 16 x self.gf_dim*8*2) 447 | 448 | self.d5, self.d5_w, self.d5_b = deconv2d(tf.nn.relu(d4), 449 | [self.batch_size, s8, s8, self.gf_dim*4], name='g_d5', with_w=True) 450 | d5 = self.g_bn_d5(self.d5) 451 | d5 = tf.concat([d5, e3_0], 3) 452 | # d5 is (32 x 32 x self.gf_dim*4*2) 453 | 454 | self.d6, self.d6_w, self.d6_b = deconv2d(tf.nn.relu(d5), 455 | [self.batch_size, s4, s4, self.gf_dim*2], name='g_d6', with_w=True) 456 | d6 = self.g_bn_d6(self.d6) 457 | d6 = tf.concat([d6, e2_0], 3) 458 | # d6 is (64 x 64 x self.gf_dim*2*2) 459 | 460 | self.d7, self.d7_w, self.d7_b = deconv2d(tf.nn.relu(d6), 461 | [self.batch_size, s2, s2, self.gf_dim], name='g_d7', with_w=True) 462 | d7 = self.g_bn_d7(self.d7) 463 | d7 = tf.concat([d7, e1_0], 3) 464 | # d7 is (128 x 128 x self.gf_dim*1*2) 465 | 466 | self.d8, self.d8_w, self.d8_b = deconv2d(tf.nn.relu(d7), 467 | [self.batch_size, s, s, self.output_c_dim], name='g_d8', with_w=True) 468 | # d8 is (256 x 256 x output_c_dim) 469 | 470 | return tf.nn.tanh(self.d8) 471 | 472 | def generator_ori(self, image, y=None): 473 | with tf.variable_scope("generator") as scope: 474 | 475 | s = self.output_size 476 | s2, s4, s8, s16, s32, s64, s128 = int(s/2), int(s/4), int(s/8), int(s/16), int(s/32), int(s/64), int(s/128) 477 | 478 | img = image[:, :, :, 0:3] 479 | # image is (256 x 256 x input_c_dim) 480 | e1 = conv2d(img, self.gf_dim, name='g_e1_conv') 481 | # e1 is (128 x 128 x self.gf_dim) 482 | e2 = self.g_bn_e2(conv2d(lrelu(e1), self.gf_dim*2, name='g_e2_conv')) 483 | # e2 is (64 x 64 x self.gf_dim*2) 484 | e3 = self.g_bn_e3(conv2d(lrelu(e2), self.gf_dim*4, name='g_e3_conv')) 485 | # e3 is (32 x 32 x self.gf_dim*4) 486 | e4 = self.g_bn_e4(conv2d(lrelu(e3), self.gf_dim*8, name='g_e4_conv')) 487 | # e4 is (16 x 16 x self.gf_dim*8) 488 | e5 = self.g_bn_e5(conv2d(lrelu(e4), self.gf_dim*8, name='g_e5_conv')) 489 | # e5 is (8 x 8 x self.gf_dim*8) 490 | e6 = self.g_bn_e6(conv2d(lrelu(e5), self.gf_dim*8, name='g_e6_conv')) 491 | # e6 is (4 x 4 x self.gf_dim*8) 492 | e7 = self.g_bn_e7(conv2d(lrelu(e6), self.gf_dim*8, name='g_e7_conv')) 493 | # e7 is (2 x 2 x self.gf_dim*8) 494 | e8 = self.g_bn_e8(conv2d(lrelu(e7), self.gf_dim*8, name='g_e8_conv')) 495 | # e8 is (1 x 1 x self.gf_dim*8) 496 | 497 | self.d1, self.d1_w, self.d1_b = deconv2d(tf.nn.relu(e8), 498 | [self.batch_size, s128, s128, self.gf_dim*8], name='g_d1', with_w=True) 499 | d1 = tf.nn.dropout(self.g_bn_d1(self.d1), 0.5) 500 | d1 = tf.concat([d1, e7], 3) 501 | # d1 is (2 x 2 x self.gf_dim*8*2) 502 | 503 | self.d2, self.d2_w, self.d2_b = deconv2d(tf.nn.relu(d1), 504 | [self.batch_size, s64, s64, self.gf_dim*8], name='g_d2', with_w=True) 505 | d2 = tf.nn.dropout(self.g_bn_d2(self.d2), 0.5) 506 | d2 = tf.concat([d2, e6], 3) 507 | # d2 is (4 x 4 x self.gf_dim*8*2) 508 | 509 | self.d3, self.d3_w, self.d3_b = deconv2d(tf.nn.relu(d2), 510 | [self.batch_size, s32, s32, self.gf_dim*8], name='g_d3', with_w=True) 511 | d3 = tf.nn.dropout(self.g_bn_d3(self.d3), 0.5) 512 | d3 = tf.concat([d3, e5], 3) 513 | # d3 is (8 x 8 x self.gf_dim*8*2) 514 | 515 | self.d4, self.d4_w, self.d4_b = deconv2d(tf.nn.relu(d3), 516 | [self.batch_size, s16, s16, self.gf_dim*8], name='g_d4', with_w=True) 517 | d4 = self.g_bn_d4(self.d4) 518 | d4 = tf.concat([d4, e4], 3) 519 | # d4 is (16 x 16 x self.gf_dim*8*2) 520 | 521 | self.d5, self.d5_w, self.d5_b = deconv2d(tf.nn.relu(d4), 522 | [self.batch_size, s8, s8, self.gf_dim*4], name='g_d5', with_w=True) 523 | d5 = self.g_bn_d5(self.d5) 524 | d5 = tf.concat([d5, e3], 3) 525 | # d5 is (32 x 32 x self.gf_dim*4*2) 526 | 527 | self.d6, self.d6_w, self.d6_b = deconv2d(tf.nn.relu(d5), 528 | [self.batch_size, s4, s4, self.gf_dim*2], name='g_d6', with_w=True) 529 | d6 = self.g_bn_d6(self.d6) 530 | d6 = tf.concat([d6, e2], 3) 531 | # d6 is (64 x 64 x self.gf_dim*2*2) 532 | 533 | self.d7, self.d7_w, self.d7_b = deconv2d(tf.nn.relu(d6), 534 | [self.batch_size, s2, s2, self.gf_dim], name='g_d7', with_w=True) 535 | d7 = self.g_bn_d7(self.d7) 536 | d7 = tf.concat([d7, e1], 3) 537 | # d7 is (128 x 128 x self.gf_dim*1*2) 538 | 539 | self.d8, self.d8_w, self.d8_b = deconv2d(tf.nn.relu(d7), 540 | [self.batch_size, s, s, self.output_c_dim], name='g_d8', with_w=True) 541 | # d8 is (256 x 256 x output_c_dim) 542 | 543 | return tf.nn.tanh(self.d8) 544 | 545 | 546 | def sampler_ori(self, image, y=None): 547 | 548 | with tf.variable_scope("generator") as scope: 549 | scope.reuse_variables() 550 | 551 | s = self.output_size 552 | s2, s4, s8, s16, s32, s64, s128 = int(s/2), int(s/4), int(s/8), int(s/16), int(s/32), int(s/64), int(s/128) 553 | 554 | img = image[:, :, :, 0:3] 555 | # image is (256 x 256 x input_c_dim) 556 | e1 = conv2d(img, self.gf_dim, name='g_e1_conv') 557 | # e1 is (128 x 128 x self.gf_dim) 558 | e2 = self.g_bn_e2(conv2d(lrelu(e1), self.gf_dim*2, name='g_e2_conv')) 559 | # e2 is (64 x 64 x self.gf_dim*2) 560 | e3 = self.g_bn_e3(conv2d(lrelu(e2), self.gf_dim*4, name='g_e3_conv')) 561 | # e3 is (32 x 32 x self.gf_dim*4) 562 | e4 = self.g_bn_e4(conv2d(lrelu(e3), self.gf_dim*8, name='g_e4_conv')) 563 | # e4 is (16 x 16 x self.gf_dim*8) 564 | e5 = self.g_bn_e5(conv2d(lrelu(e4), self.gf_dim*8, name='g_e5_conv')) 565 | # e5 is (8 x 8 x self.gf_dim*8) 566 | e6 = self.g_bn_e6(conv2d(lrelu(e5), self.gf_dim*8, name='g_e6_conv')) 567 | # e6 is (4 x 4 x self.gf_dim*8) 568 | e7 = self.g_bn_e7(conv2d(lrelu(e6), self.gf_dim*8, name='g_e7_conv')) 569 | # e7 is (2 x 2 x self.gf_dim*8) 570 | e8 = self.g_bn_e8(conv2d(lrelu(e7), self.gf_dim*8, name='g_e8_conv')) 571 | # e8 is (1 x 1 x self.gf_dim*8) 572 | 573 | self.d1, self.d1_w, self.d1_b = deconv2d(tf.nn.relu(e8), 574 | [self.batch_size, s128, s128, self.gf_dim*8], name='g_d1', with_w=True) 575 | d1 = tf.nn.dropout(self.g_bn_d1(self.d1), 0.5) 576 | d1 = tf.concat([d1, e7], 3) 577 | # d1 is (2 x 2 x self.gf_dim*8*2) 578 | 579 | self.d2, self.d2_w, self.d2_b = deconv2d(tf.nn.relu(d1), 580 | [self.batch_size, s64, s64, self.gf_dim*8], name='g_d2', with_w=True) 581 | d2 = tf.nn.dropout(self.g_bn_d2(self.d2), 0.5) 582 | d2 = tf.concat([d2, e6], 3) 583 | # d2 is (4 x 4 x self.gf_dim*8*2) 584 | 585 | self.d3, self.d3_w, self.d3_b = deconv2d(tf.nn.relu(d2), 586 | [self.batch_size, s32, s32, self.gf_dim*8], name='g_d3', with_w=True) 587 | d3 = tf.nn.dropout(self.g_bn_d3(self.d3), 0.5) 588 | d3 = tf.concat([d3, e5], 3) 589 | # d3 is (8 x 8 x self.gf_dim*8*2) 590 | 591 | self.d4, self.d4_w, self.d4_b = deconv2d(tf.nn.relu(d3), 592 | [self.batch_size, s16, s16, self.gf_dim*8], name='g_d4', with_w=True) 593 | d4 = self.g_bn_d4(self.d4) 594 | d4 = tf.concat([d4, e4], 3) 595 | # d4 is (16 x 16 x self.gf_dim*8*2) 596 | 597 | self.d5, self.d5_w, self.d5_b = deconv2d(tf.nn.relu(d4), 598 | [self.batch_size, s8, s8, self.gf_dim*4], name='g_d5', with_w=True) 599 | d5 = self.g_bn_d5(self.d5) 600 | d5 = tf.concat([d5, e3], 3) 601 | # d5 is (32 x 32 x self.gf_dim*4*2) 602 | 603 | self.d6, self.d6_w, self.d6_b = deconv2d(tf.nn.relu(d5), 604 | [self.batch_size, s4, s4, self.gf_dim*2], name='g_d6', with_w=True) 605 | d6 = self.g_bn_d6(self.d6) 606 | d6 = tf.concat([d6, e2], 3) 607 | # d6 is (64 x 64 x self.gf_dim*2*2) 608 | 609 | self.d7, self.d7_w, self.d7_b = deconv2d(tf.nn.relu(d6), 610 | [self.batch_size, s2, s2, self.gf_dim], name='g_d7', with_w=True) 611 | d7 = self.g_bn_d7(self.d7) 612 | d7 = tf.concat([d7, e1], 3) 613 | # d7 is (128 x 128 x self.gf_dim*1*2) 614 | 615 | self.d8, self.d8_w, self.d8_b = deconv2d(tf.nn.relu(d7), 616 | [self.batch_size, s, s, self.output_c_dim], name='g_d8', with_w=True) 617 | # d8 is (256 x 256 x output_c_dim) 618 | 619 | return tf.nn.tanh(self.d8) 620 | 621 | def save(self, checkpoint_dir, step): 622 | model_name = "pix2pix.model" 623 | model_dir = "%s_%s_%s" % (self.dataset_name, self.batch_size, self.output_size) 624 | checkpoint_dir = os.path.join(checkpoint_dir, model_dir) 625 | 626 | if not os.path.exists(checkpoint_dir): 627 | os.makedirs(checkpoint_dir) 628 | 629 | self.saver.save(self.sess, 630 | os.path.join(checkpoint_dir, model_name), 631 | global_step=step) 632 | 633 | def load(self, checkpoint_dir): 634 | print(" [*] Reading checkpoint...") 635 | 636 | model_dir = "%s_%s_%s" % (self.dataset_name, self.batch_size, self.output_size) 637 | checkpoint_dir = os.path.join(checkpoint_dir, model_dir) 638 | 639 | ckpt = tf.train.get_checkpoint_state(checkpoint_dir) 640 | if ckpt and ckpt.model_checkpoint_path: 641 | ckpt_name = os.path.basename(ckpt.model_checkpoint_path) 642 | ckpt_name = "pix2pix.model-35002" 643 | self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name)) 644 | return True 645 | else: 646 | return False 647 | 648 | def test(self, args): 649 | """Test pix2pix""" 650 | init_op = tf.global_variables_initializer() 651 | self.sess.run(init_op) 652 | 653 | sample_files = glob('./datasets/{}/val/*.jpg'.format(self.dataset_name)) 654 | savePath = os.path.join(os.getcwd(),'datasets',str(self.dataset_name),'val','A') 655 | sample_files = [name for name in os.listdir(savePath)] 656 | # sort testing input 657 | #n = [int(i) for i in map(lambda x: x.split('/')[-1].split('.jpg')[0], sample_files)] 658 | n = [int(i.lstrip('0')) if i.lstrip('0') else 0 for i in map(lambda x: x.split('.npy')[0],sample_files)] 659 | #print(n[0].lstrip('0')) 660 | sample_files = [x for (y, x) in sorted(zip(n, sample_files))] 661 | print(sample_files) 662 | # load testing input 663 | print("Loading testing images ...") 664 | #sample = [load_data(sample_file, is_test=True) for sample_file in sample_files] 665 | sample = [load_data2(savePath,sample_file) for sample_file in sample_files] 666 | 667 | #if (self.is_grayscale): 668 | # sample_images = np.array(sample).astype(np.float32)[:, :, :, None] 669 | #else: 670 | # sample_images = np.array(sample).astype(np.float32) 671 | 672 | #sample_images = [sample_images[i:i+self.batch_size] 673 | # for i in xrange(0, len(sample_images), self.batch_size)] 674 | #sample_images = np.array(sample_images) 675 | sample_images = np.array(sample) 676 | print(sample_images.shape) 677 | 678 | npy_dir = os.path.join(args.test_dir,'npy') 679 | 680 | if os.path.exists(args.test_dir): 681 | shutil.rmtree(args.test_dir) 682 | 683 | os.makedirs(args.test_dir) 684 | os.makedirs(npy_dir) 685 | 686 | start_time = time.time() 687 | if self.load(self.checkpoint_dir): 688 | print(" [*] Load SUCCESS") 689 | else: 690 | print(" [!] Load failed...") 691 | 692 | for i, sample_image in enumerate(sample_images): 693 | idx = i+1 694 | print("sampling image ", idx) 695 | samples = self.sess.run( 696 | self.fake_B_sample, 697 | feed_dict={self.real_data: sample_image.reshape(1,sample_image.shape[0],sample_image.shape[1],sample_image.shape[2])} 698 | ) 699 | np.save('./{}/test_{}.npy'.format(npy_dir, sample_files[i].rstrip('.npy')),(samples.squeeze()+1)*32767.5) # Save as metric scale 700 | save_images(samples, [self.batch_size, 1],'./{}/test_{}.png'.format(args.test_dir, sample_files[i].rstrip('.npy'))) 701 | 702 | print ("Test Runtime:{}",(time.time() - start_time)/len(sample_image)) 703 | --------------------------------------------------------------------------------