├── .gitignore ├── README.md ├── data └── filenames │ ├── kitti_stereo_2015_test_files.txt │ ├── kitti_test_files.txt │ ├── kitti_train_files.txt │ └── kitti_val_files.txt ├── data_loader ├── __init__.py └── kitti_depth_loader.py ├── models ├── __init__.py └── base_model.py ├── monodepth.py ├── paint_x2_unet ├── .gitignore ├── __init__.py ├── cgi_exe.py ├── img2imgDataset.py ├── lnet.py ├── train_128.py ├── train_x2.py └── unet.py └── util ├── __init__.py └── bilinear_sampler.py /.gitignore: -------------------------------------------------------------------------------- 1 | #data/ 2 | paint_x2_unet/ 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Unsupervised Monocular Depth Estimation 2 | 3 | ## Requirements 4 | - python2 5 | - chainer v2.0 (or later) 6 | - opencv2 7 | 8 | ## Completed 9 | - KiTTI data loader for chainer 10 | - Basic Model 11 | 12 | ## TODO 13 | - prepare requirements.txt 14 | - Loss Function(In development) 15 | - bilinear sampler for chainer 16 | -------------------------------------------------------------------------------- /data/filenames/kitti_stereo_2015_test_files.txt: -------------------------------------------------------------------------------- 1 | training/image_2/000000_10.jpg training/image_3/000000_10.jpg 2 | training/image_2/000001_10.jpg training/image_3/000001_10.jpg 3 | training/image_2/000002_10.jpg training/image_3/000002_10.jpg 4 | training/image_2/000003_10.jpg training/image_3/000003_10.jpg 5 | training/image_2/000004_10.jpg training/image_3/000004_10.jpg 6 | training/image_2/000005_10.jpg training/image_3/000005_10.jpg 7 | training/image_2/000006_10.jpg training/image_3/000006_10.jpg 8 | training/image_2/000007_10.jpg training/image_3/000007_10.jpg 9 | training/image_2/000008_10.jpg training/image_3/000008_10.jpg 10 | training/image_2/000009_10.jpg training/image_3/000009_10.jpg 11 | training/image_2/000010_10.jpg training/image_3/000010_10.jpg 12 | training/image_2/000011_10.jpg training/image_3/000011_10.jpg 13 | training/image_2/000012_10.jpg training/image_3/000012_10.jpg 14 | training/image_2/000013_10.jpg training/image_3/000013_10.jpg 15 | training/image_2/000014_10.jpg training/image_3/000014_10.jpg 16 | training/image_2/000015_10.jpg training/image_3/000015_10.jpg 17 | training/image_2/000016_10.jpg training/image_3/000016_10.jpg 18 | training/image_2/000017_10.jpg training/image_3/000017_10.jpg 19 | training/image_2/000018_10.jpg training/image_3/000018_10.jpg 20 | training/image_2/000019_10.jpg training/image_3/000019_10.jpg 21 | training/image_2/000020_10.jpg training/image_3/000020_10.jpg 22 | training/image_2/000021_10.jpg training/image_3/000021_10.jpg 23 | training/image_2/000022_10.jpg training/image_3/000022_10.jpg 24 | training/image_2/000023_10.jpg training/image_3/000023_10.jpg 25 | training/image_2/000024_10.jpg training/image_3/000024_10.jpg 26 | training/image_2/000025_10.jpg training/image_3/000025_10.jpg 27 | training/image_2/000026_10.jpg training/image_3/000026_10.jpg 28 | training/image_2/000027_10.jpg training/image_3/000027_10.jpg 29 | training/image_2/000028_10.jpg training/image_3/000028_10.jpg 30 | training/image_2/000029_10.jpg training/image_3/000029_10.jpg 31 | training/image_2/000030_10.jpg training/image_3/000030_10.jpg 32 | training/image_2/000031_10.jpg training/image_3/000031_10.jpg 33 | training/image_2/000032_10.jpg training/image_3/000032_10.jpg 34 | training/image_2/000033_10.jpg training/image_3/000033_10.jpg 35 | training/image_2/000034_10.jpg training/image_3/000034_10.jpg 36 | training/image_2/000035_10.jpg training/image_3/000035_10.jpg 37 | training/image_2/000036_10.jpg training/image_3/000036_10.jpg 38 | training/image_2/000037_10.jpg training/image_3/000037_10.jpg 39 | training/image_2/000038_10.jpg training/image_3/000038_10.jpg 40 | training/image_2/000039_10.jpg training/image_3/000039_10.jpg 41 | training/image_2/000040_10.jpg training/image_3/000040_10.jpg 42 | training/image_2/000041_10.jpg training/image_3/000041_10.jpg 43 | training/image_2/000042_10.jpg training/image_3/000042_10.jpg 44 | training/image_2/000043_10.jpg training/image_3/000043_10.jpg 45 | training/image_2/000044_10.jpg training/image_3/000044_10.jpg 46 | training/image_2/000045_10.jpg training/image_3/000045_10.jpg 47 | training/image_2/000046_10.jpg training/image_3/000046_10.jpg 48 | training/image_2/000047_10.jpg training/image_3/000047_10.jpg 49 | training/image_2/000048_10.jpg training/image_3/000048_10.jpg 50 | training/image_2/000049_10.jpg training/image_3/000049_10.jpg 51 | training/image_2/000050_10.jpg training/image_3/000050_10.jpg 52 | training/image_2/000051_10.jpg training/image_3/000051_10.jpg 53 | training/image_2/000052_10.jpg training/image_3/000052_10.jpg 54 | training/image_2/000053_10.jpg training/image_3/000053_10.jpg 55 | training/image_2/000054_10.jpg training/image_3/000054_10.jpg 56 | training/image_2/000055_10.jpg training/image_3/000055_10.jpg 57 | training/image_2/000056_10.jpg training/image_3/000056_10.jpg 58 | training/image_2/000057_10.jpg training/image_3/000057_10.jpg 59 | training/image_2/000058_10.jpg training/image_3/000058_10.jpg 60 | training/image_2/000059_10.jpg training/image_3/000059_10.jpg 61 | training/image_2/000060_10.jpg training/image_3/000060_10.jpg 62 | training/image_2/000061_10.jpg training/image_3/000061_10.jpg 63 | training/image_2/000062_10.jpg training/image_3/000062_10.jpg 64 | training/image_2/000063_10.jpg training/image_3/000063_10.jpg 65 | training/image_2/000064_10.jpg training/image_3/000064_10.jpg 66 | training/image_2/000065_10.jpg training/image_3/000065_10.jpg 67 | training/image_2/000066_10.jpg training/image_3/000066_10.jpg 68 | training/image_2/000067_10.jpg training/image_3/000067_10.jpg 69 | training/image_2/000068_10.jpg training/image_3/000068_10.jpg 70 | training/image_2/000069_10.jpg training/image_3/000069_10.jpg 71 | training/image_2/000070_10.jpg training/image_3/000070_10.jpg 72 | training/image_2/000071_10.jpg training/image_3/000071_10.jpg 73 | training/image_2/000072_10.jpg training/image_3/000072_10.jpg 74 | training/image_2/000073_10.jpg training/image_3/000073_10.jpg 75 | training/image_2/000074_10.jpg training/image_3/000074_10.jpg 76 | training/image_2/000075_10.jpg training/image_3/000075_10.jpg 77 | training/image_2/000076_10.jpg training/image_3/000076_10.jpg 78 | training/image_2/000077_10.jpg training/image_3/000077_10.jpg 79 | training/image_2/000078_10.jpg training/image_3/000078_10.jpg 80 | training/image_2/000079_10.jpg training/image_3/000079_10.jpg 81 | training/image_2/000080_10.jpg training/image_3/000080_10.jpg 82 | training/image_2/000081_10.jpg training/image_3/000081_10.jpg 83 | training/image_2/000082_10.jpg training/image_3/000082_10.jpg 84 | training/image_2/000083_10.jpg training/image_3/000083_10.jpg 85 | training/image_2/000084_10.jpg training/image_3/000084_10.jpg 86 | training/image_2/000085_10.jpg training/image_3/000085_10.jpg 87 | training/image_2/000086_10.jpg training/image_3/000086_10.jpg 88 | training/image_2/000087_10.jpg training/image_3/000087_10.jpg 89 | training/image_2/000088_10.jpg training/image_3/000088_10.jpg 90 | training/image_2/000089_10.jpg training/image_3/000089_10.jpg 91 | training/image_2/000090_10.jpg training/image_3/000090_10.jpg 92 | training/image_2/000091_10.jpg training/image_3/000091_10.jpg 93 | training/image_2/000092_10.jpg training/image_3/000092_10.jpg 94 | training/image_2/000093_10.jpg training/image_3/000093_10.jpg 95 | training/image_2/000094_10.jpg training/image_3/000094_10.jpg 96 | training/image_2/000095_10.jpg training/image_3/000095_10.jpg 97 | training/image_2/000096_10.jpg training/image_3/000096_10.jpg 98 | training/image_2/000097_10.jpg training/image_3/000097_10.jpg 99 | training/image_2/000098_10.jpg training/image_3/000098_10.jpg 100 | training/image_2/000099_10.jpg training/image_3/000099_10.jpg 101 | training/image_2/000100_10.jpg training/image_3/000100_10.jpg 102 | training/image_2/000101_10.jpg training/image_3/000101_10.jpg 103 | training/image_2/000102_10.jpg training/image_3/000102_10.jpg 104 | training/image_2/000103_10.jpg training/image_3/000103_10.jpg 105 | training/image_2/000104_10.jpg training/image_3/000104_10.jpg 106 | training/image_2/000105_10.jpg training/image_3/000105_10.jpg 107 | training/image_2/000106_10.jpg training/image_3/000106_10.jpg 108 | training/image_2/000107_10.jpg training/image_3/000107_10.jpg 109 | training/image_2/000108_10.jpg training/image_3/000108_10.jpg 110 | training/image_2/000109_10.jpg training/image_3/000109_10.jpg 111 | training/image_2/000110_10.jpg training/image_3/000110_10.jpg 112 | training/image_2/000111_10.jpg training/image_3/000111_10.jpg 113 | training/image_2/000112_10.jpg training/image_3/000112_10.jpg 114 | training/image_2/000113_10.jpg training/image_3/000113_10.jpg 115 | training/image_2/000114_10.jpg training/image_3/000114_10.jpg 116 | training/image_2/000115_10.jpg training/image_3/000115_10.jpg 117 | training/image_2/000116_10.jpg training/image_3/000116_10.jpg 118 | training/image_2/000117_10.jpg training/image_3/000117_10.jpg 119 | training/image_2/000118_10.jpg training/image_3/000118_10.jpg 120 | training/image_2/000119_10.jpg training/image_3/000119_10.jpg 121 | training/image_2/000120_10.jpg training/image_3/000120_10.jpg 122 | training/image_2/000121_10.jpg training/image_3/000121_10.jpg 123 | training/image_2/000122_10.jpg training/image_3/000122_10.jpg 124 | training/image_2/000123_10.jpg training/image_3/000123_10.jpg 125 | training/image_2/000124_10.jpg training/image_3/000124_10.jpg 126 | training/image_2/000125_10.jpg training/image_3/000125_10.jpg 127 | training/image_2/000126_10.jpg training/image_3/000126_10.jpg 128 | training/image_2/000127_10.jpg training/image_3/000127_10.jpg 129 | training/image_2/000128_10.jpg training/image_3/000128_10.jpg 130 | training/image_2/000129_10.jpg training/image_3/000129_10.jpg 131 | training/image_2/000130_10.jpg training/image_3/000130_10.jpg 132 | training/image_2/000131_10.jpg training/image_3/000131_10.jpg 133 | training/image_2/000132_10.jpg training/image_3/000132_10.jpg 134 | training/image_2/000133_10.jpg training/image_3/000133_10.jpg 135 | training/image_2/000134_10.jpg training/image_3/000134_10.jpg 136 | training/image_2/000135_10.jpg training/image_3/000135_10.jpg 137 | training/image_2/000136_10.jpg training/image_3/000136_10.jpg 138 | training/image_2/000137_10.jpg training/image_3/000137_10.jpg 139 | training/image_2/000138_10.jpg training/image_3/000138_10.jpg 140 | training/image_2/000139_10.jpg training/image_3/000139_10.jpg 141 | training/image_2/000140_10.jpg training/image_3/000140_10.jpg 142 | training/image_2/000141_10.jpg training/image_3/000141_10.jpg 143 | training/image_2/000142_10.jpg training/image_3/000142_10.jpg 144 | training/image_2/000143_10.jpg training/image_3/000143_10.jpg 145 | training/image_2/000144_10.jpg training/image_3/000144_10.jpg 146 | training/image_2/000145_10.jpg training/image_3/000145_10.jpg 147 | training/image_2/000146_10.jpg training/image_3/000146_10.jpg 148 | training/image_2/000147_10.jpg training/image_3/000147_10.jpg 149 | training/image_2/000148_10.jpg training/image_3/000148_10.jpg 150 | training/image_2/000149_10.jpg training/image_3/000149_10.jpg 151 | training/image_2/000150_10.jpg training/image_3/000150_10.jpg 152 | training/image_2/000151_10.jpg training/image_3/000151_10.jpg 153 | training/image_2/000152_10.jpg training/image_3/000152_10.jpg 154 | training/image_2/000153_10.jpg training/image_3/000153_10.jpg 155 | training/image_2/000154_10.jpg training/image_3/000154_10.jpg 156 | training/image_2/000155_10.jpg training/image_3/000155_10.jpg 157 | training/image_2/000156_10.jpg training/image_3/000156_10.jpg 158 | training/image_2/000157_10.jpg training/image_3/000157_10.jpg 159 | training/image_2/000158_10.jpg training/image_3/000158_10.jpg 160 | training/image_2/000159_10.jpg training/image_3/000159_10.jpg 161 | training/image_2/000160_10.jpg training/image_3/000160_10.jpg 162 | training/image_2/000161_10.jpg training/image_3/000161_10.jpg 163 | training/image_2/000162_10.jpg training/image_3/000162_10.jpg 164 | training/image_2/000163_10.jpg training/image_3/000163_10.jpg 165 | training/image_2/000164_10.jpg training/image_3/000164_10.jpg 166 | training/image_2/000165_10.jpg training/image_3/000165_10.jpg 167 | training/image_2/000166_10.jpg training/image_3/000166_10.jpg 168 | training/image_2/000167_10.jpg training/image_3/000167_10.jpg 169 | training/image_2/000168_10.jpg training/image_3/000168_10.jpg 170 | training/image_2/000169_10.jpg training/image_3/000169_10.jpg 171 | training/image_2/000170_10.jpg training/image_3/000170_10.jpg 172 | training/image_2/000171_10.jpg training/image_3/000171_10.jpg 173 | training/image_2/000172_10.jpg training/image_3/000172_10.jpg 174 | training/image_2/000173_10.jpg training/image_3/000173_10.jpg 175 | training/image_2/000174_10.jpg training/image_3/000174_10.jpg 176 | training/image_2/000175_10.jpg training/image_3/000175_10.jpg 177 | training/image_2/000176_10.jpg training/image_3/000176_10.jpg 178 | training/image_2/000177_10.jpg training/image_3/000177_10.jpg 179 | training/image_2/000178_10.jpg training/image_3/000178_10.jpg 180 | training/image_2/000179_10.jpg training/image_3/000179_10.jpg 181 | training/image_2/000180_10.jpg training/image_3/000180_10.jpg 182 | training/image_2/000181_10.jpg training/image_3/000181_10.jpg 183 | training/image_2/000182_10.jpg training/image_3/000182_10.jpg 184 | training/image_2/000183_10.jpg training/image_3/000183_10.jpg 185 | training/image_2/000184_10.jpg training/image_3/000184_10.jpg 186 | training/image_2/000185_10.jpg training/image_3/000185_10.jpg 187 | training/image_2/000186_10.jpg training/image_3/000186_10.jpg 188 | training/image_2/000187_10.jpg training/image_3/000187_10.jpg 189 | training/image_2/000188_10.jpg training/image_3/000188_10.jpg 190 | training/image_2/000189_10.jpg training/image_3/000189_10.jpg 191 | training/image_2/000190_10.jpg training/image_3/000190_10.jpg 192 | training/image_2/000191_10.jpg training/image_3/000191_10.jpg 193 | training/image_2/000192_10.jpg training/image_3/000192_10.jpg 194 | training/image_2/000193_10.jpg training/image_3/000193_10.jpg 195 | training/image_2/000194_10.jpg training/image_3/000194_10.jpg 196 | training/image_2/000195_10.jpg training/image_3/000195_10.jpg 197 | training/image_2/000196_10.jpg training/image_3/000196_10.jpg 198 | training/image_2/000197_10.jpg training/image_3/000197_10.jpg 199 | training/image_2/000198_10.jpg training/image_3/000198_10.jpg 200 | training/image_2/000199_10.jpg training/image_3/000199_10.jpg 201 | -------------------------------------------------------------------------------- /data_loader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yukitsuji/monodepth_chainer/93e94097336a433af1486aa7956069b2fba0f22a/data_loader/__init__.py -------------------------------------------------------------------------------- /data_loader/kitti_depth_loader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import chainer 5 | import six 6 | import os 7 | 8 | from chainer import cuda, optimizers, Variable 9 | import cv2 10 | 11 | class KittiDataset(chainer.dataset.DatasetMixin): 12 | 13 | def __init__(self, dataset_pathes, root='./input', dtype=np.float32, train=False): 14 | if isinstance(dataset_pathes, six.string_types): 15 | with open(dataset_pathes) as pathes_file: 16 | left_pathes = [path.strip().split(" ")[0] for path in pathes_file] 17 | right_pathes = [path.strip().split(" ")[1] for path in pathes_file] 18 | self._left_pathes = left_pathes 19 | self._right_pathes = right_pathes 20 | self._root = root 21 | self._dtype = dtype 22 | self._train = train 23 | 24 | def __len__(self): 25 | return len(self._left_pathes) 26 | 27 | def augment_image_pair(self, left_image, right_image): 28 | # randomly horizontal flip 29 | if np.random.rand() > 0.5: 30 | right_image = cv2.flip(left_image, 1) 31 | left_image = cv2.flip(right_image, 1) 32 | 33 | # randomly shift gamma 34 | random_gamma = np.random.uniform(low=0.8, high=1.2) 35 | left_image = left_image ** random_gamma 36 | right_image = right_image ** random_gamma 37 | 38 | # randomly shift brightness 39 | random_brightness = np.random.uniform(low=0.5, high=2.0) 40 | left_image *= random_brightness 41 | right_image *= random_brightness 42 | 43 | # randomly shift color 44 | random_colors = np.random.uniform(low=0.8, high=1.2) 45 | white = np.ones((left_image.shape[0], left_image.shape[1])) 46 | color_image = np.stack([white * random_colors[i] for i in range(3)], axis=2) 47 | left_image *= color_image 48 | right_image *= color_image 49 | 50 | return np.clip(left_image, 0.0, 1.0), np.clip(right_image, 0.0, 1.0) 51 | 52 | def get_example(self, i, minimize=False, log=False, bin_r=0): 53 | left_image = None 54 | right_image = None 55 | 56 | if self._train: 57 | left_path = os.path.join(self._root, self._left_pathes[i]) 58 | right_path = os.path.join(self._root, self._right_pathes[i]) 59 | left_image = cv2.imread(left_path, cv2.IMREAD_COLOR).astype(self._dtype) 60 | right_image = cv2.imread(right_path, cv2.IMREAD_COLOR).astype(self._dtype) 61 | left_image /= 255. 62 | right_image /= 255. 63 | self.augment_image_pair(left_image, right_image) 64 | left_image = left_image.transpose(2, 0, 1) 65 | right_image = right_image.transpose(2, 0, 1) 66 | return left_image, right_image 67 | else: 68 | left_path = os.path.join(self._root, self._left_pathes[i]) 69 | left_image = cv2.imread(left_path, cv2.IMREAD_COLOR).astype(self._dtype) 70 | left_image /= 255. 71 | left_image = left_image.transpose(2, 0, 1) 72 | return left_image 73 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yukitsuji/monodepth_chainer/93e94097336a433af1486aa7956069b2fba0f22a/models/__init__.py -------------------------------------------------------------------------------- /models/base_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import chainer 4 | from chainer import Variable 5 | import chainer.links as L 6 | import chainer.functions as F 7 | 8 | 9 | class MonoDepth(chainer.Chain): 10 | def __init__(self, mode=False, use_deconv=False): 11 | self.train = mode 12 | 13 | super(MonoDepth, self).__init__( 14 | conv1_1=L.Convolution2D(3, 32, 7, stride=1, pad=3), 15 | conv1_2=L.Convolution2D(32, 32, 7, stride=2, pad=3), 16 | 17 | conv2_1=L.Convolution2D(32, 64, 5, stride=1, pad=2), 18 | conv2_2=L.Convolution2D(64, 64, 5, stride=2, pad=2), 19 | 20 | conv3_1=L.Convolution2D(64, 128, 3, stride=1, pad=1), 21 | conv3_2=L.Convolution2D(128, 128, 3, stride=2, pad=1), 22 | 23 | conv4_1=L.Convolution2D(128, 256, 3, stride=1, pad=1), 24 | conv4_2=L.Convolution2D(256, 256, 3, stride=2, pad=1), 25 | 26 | conv5_1=L.Convolution2D(256, 512, 3, stride=1, pad=1), 27 | conv5_2=L.Convolution2D(512, 512, 3, stride=2, pad=1), 28 | 29 | conv6_1=L.Convolution2D(512, 512, 3, stride=1, pad=1), 30 | conv6_2=L.Convolution2D(512, 512, 3, stride=2, pad=1), 31 | 32 | conv7_1=L.Convolution2D(512, 512, 3, stride=1, pad=1), 33 | conv7_2=L.Convolution2D(512, 512, 3, stride=2, pad=1), 34 | 35 | upconv7 = L.Convolution2D(512, 512, 3, stride=1, pad=1), 36 | iconv7 = L.Convolution2D(1024, 512, 3, stride=1, pad=1) 37 | 38 | upconv6 = L.Convolution2D(512, 512, 3, stride=1, pad=1), 39 | iconv6 = L.Convolution2D(1024, 512, 3, stride=1, pad=1), 40 | 41 | upconv5 = L.Convolution2D(512, 256, 3, stride=1, pad=1), 42 | iconv5 = L.Convolution2D(512, 256, 3, stride=1, pad=1), 43 | 44 | upconv4 = L.Convolution2D(256, 128, 3, stride=1, pad=1), 45 | iconv4 = L.Convolution2D(256, 128, 3, stride=1, pad=1), 46 | disp4_l = L.Convolution2D(128, 2, 3, stride=1, pad=1), 47 | 48 | upconv3 = L.Convolution2D(128, 64, 3, stride=1, pad=1), 49 | iconv3 = L.Convolution2D(130, 64, 3, stride=1, pad=1), 50 | disp3_l = L.Convolution2D(64, 2, 3, stride=1, pad=1), 51 | 52 | upconv2 = L.Convolution2D(64, 32, 3, stride=1, pad=1), 53 | iconv2 = L.Convolution2D(66, 32, 3, stride=1, pad=1), 54 | disp2_l = L.Convolution2D(32, 2, 3, stride=1, pad=1), 55 | 56 | upconv1 = L.Convolution2D(32, 16, 3, stride=1, pad=1), 57 | iconv1 = L.Convolution2D(18, 16, 3, stride=1, pad=1), 58 | disp1_l = L.Convolution2D(16, 2, 3, stride=1, pad=1), 59 | ) 60 | 61 | def calc(self, left_images): 62 | """Network Process""" 63 | h = F.relu(self.conv1_1(left_images)) 64 | conv1 = F.relu(self.conv1_2(h)) 65 | 66 | h = F.relu(self.conv2_1(conv1)) 67 | conv2 = F.relu(self.conv2_2(h)) 68 | 69 | h = F.relu(self.conv3_1(conv2)) 70 | conv3 = F.relu(self.conv3_2(h)) 71 | 72 | h = F.relu(self.conv4_1(conv3)) 73 | conv4 = F.relu(self.conv4_2(h)) 74 | 75 | h = F.relu(self.conv5_1(conv4)) 76 | conv5 = F.relu(self.conv5_2(h)) 77 | 78 | h = F.relu(self.conv6_1(conv5)) 79 | conv6 = F.relu(self.conv6_2(h)) 80 | 81 | h = F.relu(self.conv7_1(conv6)) 82 | conv7 = F.relu(self.conv7_2(h)) 83 | 84 | upsample7 = self.upsample_nn(conv7, 2) 85 | upconv7 = self.upconv7(upsample7) 86 | concat7 = F.concat((upconv7, conv6), axis=3) 87 | iconv7 = F.relu(self.iconv7(concat7)) 88 | 89 | upsample6 = self.upsample_nn(iconv7) 90 | upconv6 = self.upconv6(upsample6) 91 | concat6 = F.concat((upconv6, conv5), axis=3) 92 | iconv6 = F.relu(self.iconv6(concat6)) 93 | 94 | upsample5 = self.upsample_nn(iconv6) 95 | upconv5 = self.upconv5(upsample5) 96 | concat5 = F.concat((upconv5, conv4), axis=3) 97 | iconv5 = F.relu(self.iconv5(concat5)) 98 | 99 | upsample4 = self.upsample_nn(iconv5) 100 | upconv4 = self.upconv4(upsample4) 101 | concat4 = F.concat((upconv4, conv3), axis=3) 102 | iconv4 = F.relu(self.iconv4(concat4)) 103 | self.disp4 = 0.3 * F.sigmoid(self.disp4_l(iconv4)) 104 | udisp4 = self.upsample_nn(self.disp4, 2) 105 | 106 | upsample3 = self.upsample_nn(iconv4) 107 | upconv3 = self.upconv3(upsample3) 108 | concat3 = F.concat([upconv3, conv2, udisp4], axis=3) 109 | iconv3 = F.relu(self.iconv3(concat3)) 110 | self.disp3 = 0.3 * F.sigmoid(self.disp3_l(iconv3)) 111 | udisp3 = self.upsample_nn(self.disp3, 2) 112 | 113 | upsample2 = self.upsample_nn(iconv3) 114 | upconv2 = self.upconv2(upsample2) 115 | concat2 = F.concat([upconv2, conv1, udisp3], axis=3) 116 | iconv2 = F.relu(self.iconv2(concat2)) 117 | self.disp2 = 0.3 * F.sigmoid(self.disp2_l(iconv2)) 118 | udisp2 = self.upsample_nn(self.disp2, 2) 119 | 120 | upsample1 = self.upsample_nn(iconv2) 121 | upconv1 = self.upconv(upsample1) 122 | concat1 = F.concat([upconv1, udisp2], axis=3) 123 | iconv1 = F.relu(self.iconv1(concat1)) 124 | self.disp1 = 0.3 * F.sigmoid(self.disp1_l(iconv1)) 125 | 126 | def upsample_nn(self, x, ratio): 127 | h, w = x.shape[2:4] 128 | return F.resize_images(x, (h * ratio, w * ratio)) 129 | 130 | def __call__(self, x, t): 131 | self.calc(x) 132 | return self.disp1 133 | -------------------------------------------------------------------------------- /monodepth.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import chainer 5 | import chainer.functions as F 6 | import chainer.links as L 7 | import chainer.datasets.image_dataset as ImageDataset 8 | import six 9 | import os 10 | from PIL import Image 11 | 12 | from chainer import cuda, optimizers, serializers, Variable 13 | from chainer import training 14 | from chainer.training import extensions 15 | 16 | import argparse 17 | 18 | from data_loader.kitti_depth_loader import KittiDataset 19 | from models.base_model import MonoDepth 20 | 21 | chainer.cuda.set_max_workspace_size(1024 * 1024 * 1024) 22 | os.environ["CHAINER_TYPE_CHECK"] = "0" 23 | 24 | 25 | class monodepthUpdater(chainer.training.StandardUpdater): 26 | def __init__(self, *args, **kwargs): 27 | self.md = kwargs.pop('models') 28 | self._iter = 0 29 | super(monodepthUpdater, self).__init__(*args, **kwargs) 30 | 31 | def gradient_x(self, img): 32 | return img[:, :, :, :-1] - img[:, :, :, 1:] 33 | 34 | def gradient_y(self, img): 35 | return img[:, :, :-1] - img[:, :, 1:] 36 | 37 | def get_disparity_smoothness(self, disp, img): 38 | disp_gradients_x = self.gradient_x(disp) 39 | disp_gradients_y = self.gradient_y(disp) 40 | 41 | img_gradients_x = self.gradient_x(img) 42 | img_gradients_y = self.gradient_y(img) 43 | 44 | weight_x = F.exp(-F.mean(F.absolute(disp_gradients_x), axis=1, keep_dims=True)) 45 | weight_y = F.exp(-F.mean(F.absolute(disp_gradients_y), axis=1, keep_dims=True)) 46 | 47 | smoothness_x = disp_gradients_x * weight_x 48 | smoothness_y = disp_gradients_y * weight_y 49 | return smoothness_x + smoothness_y 50 | 51 | def scale_pyramid(self, images, scale_h, scale_w): 52 | return F.resize_images(images, (scale_h, scale_w)) 53 | 54 | def ssim(self, pred, orig): 55 | c1 = 0.01 ** 2 56 | c2 = 0.03 ** 2 57 | 58 | # TODO: Check argument 59 | mu_pred = F.average_pooling_2d(pred, 3, 1, "VALID") 60 | mu_orig = F.average_pooling_2d(orig, 3, 1, "VALID") 61 | 62 | sigma_pred = F.average_pooling_2d(pred ** 2, 3, 1, "VALID") - mu_pred ** 2 63 | sigma_orig = F.average_pooling_2d(orig ** 2, 3, 1, "VALID") - mu_orig ** 2 64 | sigma_both = F.average_pooling_2d(pred * orig, 3, 1, "VALID") - mu_pred * mu_orig 65 | 66 | ssim_n = (2 * mu_pred * mu_orig + c1) * (2 * sigma_both + c2) 67 | ssim_d = (mu_pred ** 2 + mu_orig ** 2 + c1) * (sigma_pred + sigma_orig + c2) 68 | ssim = ssim_n / ssim_d 69 | return F.clip((1 - ssim) / 2, 0.0, 1.0) 70 | 71 | def generate_image_left(self, img, disp): 72 | return bilinear_sampler_1d(img, -disp) 73 | 74 | def generate_image_right(self, img, disp): 75 | return bilinear_sampler_1d(img, disp) 76 | 77 | def loss_md(self, left_images, right_images): 78 | h, w = left_images.shape[2:] 79 | left_img1 = left_images 80 | left_img2 = self.rescale_img(left_images, h/2, w/2) 81 | left_img3 = self.rescale_img(left_images, h/4, w/4) 82 | left_img4 = self.rescale_img(left_images, h/8, w/8) 83 | 84 | right_img1 = right_images 85 | right_img2 = self.rescale_img(right_images, h/2, w/2) 86 | right_img3 = self.rescale_img(right_images, h/4, w/4) 87 | right_img4 = self.rescale_img(right_images, h/8, w/8) 88 | 89 | self.disp1_left_est = self.md.disp1[:, 0] 90 | self.disp2_left_est = self.md.disp2[:, 0] 91 | self.disp3_left_est = self.md.disp3[:, 0] 92 | self.disp4_left_est = self.md.disp4[:, 0] 93 | 94 | self.disp1_right_est = self.md.disp1[:, 1] 95 | self.disp2_right_est = self.md.disp2[:, 1] 96 | self.disp3_right_est = self.md.disp3[:, 1] 97 | self.disp4_right_est = self.md.disp4[:, 1] 98 | 99 | # TODO: Generate Images 100 | self.left_est1 = self.generate_image_left(self.right_img1, self.disp1_left_est) 101 | self.left_est2 = self.generate_image_left(self.right_img2, self.disp2_left_est) 102 | self.left_est3 = self.generate_image_left(self.right_img3, self.disp3_left_est) 103 | self.left_est4 = self.generate_image_left(self.right_img4, self.disp4_left_est) 104 | 105 | self.right_est1 = self.generate_image_right(self.left_img1, self.disp1_right_est) 106 | self.right_est2 = self.generate_image_right(self.left_img2, self.disp2_right_est) 107 | self.right_est3 = self.generate_image_right(self.left_img3, self.disp3_right_est) 108 | self.right_est4 = self.generate_image_right(self.left_img4, self.disp4_right_est) 109 | 110 | # TODO: LR Consistency 111 | self.right_to_left_disp1 = self.generate_image_left(self.disp1_right_est, self.disp1_left_est) 112 | self.right_to_left_disp2 = self.generate_image_left(self.disp2_right_est, self.disp2_left_est) 113 | self.right_to_left_disp3 = self.generate_image_left(self.disp3_right_est, self.disp3_left_est) 114 | self.right_to_left_disp4 = self.generate_image_left(self.disp4_right_est, self.disp4_left_est) 115 | 116 | self.left_to_right_disp1 = self.generate_image_right(self.disp1_left_est, self.disp1_right_est) 117 | self.left_to_right_disp2 = self.generate_image_right(self.disp2_left_est, self.disp2_right_est) 118 | self.left_to_right_disp3 = self.generate_image_right(self.disp3_left_est, self.disp3_right_est) 119 | self.left_to_right_disp4 = self.generate_image_right(self.disp4_left_est, self.disp4_right_est) 120 | 121 | # TODO: L1 Loss 122 | self.l1_left1 = F.mean(F.absolute(self.left_est1 - self.left_img1)) 123 | self.l1_left2 = F.mean(F.absolute(self.left_est2 - self.left_img2)) 124 | self.l1_left3 = F.mean(F.absolute(self.left_est3 - self.left_img3)) 125 | self.l1_left4 = F.mean(F.absolute(self.left_est4 - self.left_img4)) 126 | 127 | self.l1_right1 = F.mean(F.absolute(self.right_est1 - self.right_img1)) 128 | self.l1_right2 = F.mean(F.absolute(self.right_est2 - self.right_img2)) 129 | self.l1_right3 = F.mean(F.absolute(self.right_est3 - self.right_img3)) 130 | self.l1_right4 = F.mean(F.absolute(self.right_est4 - self.right_img4)) 131 | 132 | # TODO: SSIM Loss 133 | self.ssim_left1 = F.mean(self.ssim(self.left_est1, self.left_img1)) 134 | self.ssim_left2 = F.mean(self.ssim(self.left_est2, self.left_img2)) 135 | self.ssim_left3 = F.mean(self.ssim(self.left_est3, self.left_img3)) 136 | self.ssim_left4 = F.mean(self.ssim(self.left_est4, self.left_img4)) 137 | 138 | self.ssim_right1 = F.mean(self.ssim(self.right_est1, self.right_img1)) 139 | self.ssim_right2 = F.mean(self.ssim(self.right_est2, self.right_img2)) 140 | self.ssim_right3 = F.mean(self.ssim(self.right_est3, self.right_img3)) 141 | self.ssim_right4 = F.mean(self.ssim(self.right_est4, self.right_img4)) 142 | 143 | # TODO: Weighted Sum of L1 and SSIM loss 144 | self.image_loss_left1 = self.alpha_image_loss * self.ssim_left1 + 145 | (1 - self.alpha_image_loss) * self.l1_left1 146 | self.image_loss_left2 = self.alpha_image_loss * self.ssim_left2 + 147 | (1 - self.alpha_image_loss) * self.l1_left2 148 | self.image_loss_left3 = self.alpha_image_loss * self.ssim_left3 + 149 | (1 - self.alpha_image_loss) * self.l1_left3 150 | self.image_loss_left4 = self.alpha_image_loss * self.ssim_left4 + 151 | (1 - self.alpha_image_loss) * self.l1_left4 152 | 153 | self.image_loss_right1 = self.alpha_image_loss * self.ssim_right1 + 154 | (1 - self.alpha_image_loss) * self.l1_right1 155 | self.image_loss_right2 = self.alpha_image_loss * self.ssim_right2 + 156 | (1 - self.alpha_image_loss) * self.l1_right2 157 | self.image_loss_right3 = self.alpha_image_loss * self.ssim_right3 + 158 | (1 - self.alpha_image_loss) * self.l1_right3 159 | self.image_loss_right4 = self.alpha_image_loss * self.ssim_right4 + 160 | (1 - self.alpha_image_loss) * self.l1_right4 161 | 162 | total_image_loss = self.image_loss_left1 + self.image_loss_left2 163 | + self.image_loss_left3 + self.image_loss_left4 164 | + self.image_loss_right1 + self.image_loss_right2 165 | + self.image_loss_right3 + self.image_loss_right4 166 | 167 | # TODO: LR Consistency Loss 168 | self.lr_loss_left1 = F.mean(F.absolute(self.right_to_left_disp1 - self.disp1_left_est)) 169 | self.lr_loss_left2 = F.mean(F.absolute(self.right_to_left_disp2 - self.disp2_left_est)) 170 | self.lr_loss_left3 = F.mean(F.absolute(self.right_to_left_disp3 - self.disp3_left_est)) 171 | self.lr_loss_left4 = F.mean(F.absolute(self.right_to_left_disp4 - self.disp4_left_est)) 172 | 173 | self.lr_loss_right1 = F.mean(F.absolute(self.left_to_right_disp1 - self.disp1_right_est)) 174 | self.lr_loss_right2 = F.mean(F.absolute(self.left_to_right_disp2 - self.disp2_right_est)) 175 | self.lr_loss_right3 = F.mean(F.absolute(self.left_to_right_disp3 - self.disp3_right_est)) 176 | self.lr_loss_right4 = F.mean(F.absolute(self.left_to_right_disp4 - self.disp4_right_est)) 177 | 178 | total_lr_loss = self.lr_loss_left1 + self.lr_loss_left2 179 | + self.lr_loss_left3 + self.lr_loss_left4 180 | + self.lr_loss_right1 + self.lr_loss_right2 181 | + self.lr_loss_right3 + self.lr_loss_right4 182 | 183 | # deiparity smoothness error using gradient [:-1], [1:] 184 | self.disp1_left_smoothness = self.get_disparity_smoothness(self.md.disp1, left_img1) 185 | self.disp2_left_smoothness = self.get_disparity_smoothness(self.md.disp2, left_img2) 186 | self.disp3_left_smoothness = self.get_disparity_smoothness(self.md.disp3, left_img3) 187 | self.disp4_left_smoothness = self.get_disparity_smoothness(self.md.disp4, left_img4) 188 | 189 | self.disp1_right_smoothness = self.get_disparity_smoothness(self.md.disp1, right_img1) 190 | self.disp2_right_smoothness = self.get_disparity_smoothness(self.md.disp2, right_img2) 191 | self.disp3_right_smoothness = self.get_disparity_smoothness(self.md.disp3, right_img3) 192 | self.disp4_right_smoothness = self.get_disparity_smoothness(self.md.disp4, right_img4) 193 | 194 | self.disp1_left_loss = F.mean(F.absolute(self.disp1_left_smoothness)) 195 | self.disp2_left_loss = F.mean(F.absolute(self.disp2_left_smoothness)) / 2 196 | self.disp3_left_loss = F.mean(F.absolute(self.disp3_left_smoothness)) / 4 197 | self.disp4_left_loss = F.mean(F.absolute(self.disp4_left_smoothness)) / 8 198 | 199 | self.disp1_right_loss = F.mean(F.absolute(self.disp1_right_smoothness)) 200 | self.disp2_right_loss = F.mean(F.absolute(self.disp2_right_smoothness)) / 2 201 | self.disp3_right_loss = F.mean(F.absolute(self.disp3_right_smoothness)) / 4 202 | self.disp4_right_loss = F.mean(F.absolute(self.disp4_right_smoothness)) / 8 203 | 204 | total_smoothness_loss = self.disp1_left_loss + self.disp2_left_loss 205 | + self.disp3_left_loss + self.disp4_left_loss 206 | + self.disp1_right_loss + self.disp2_right_loss 207 | + self.disp3_right_loss + self.disp4_right_loss 208 | 209 | # TODO: Total Loss 210 | total_loss = total_image_loss + self.alpha_smoothness * total_smoothness_loss 211 | + self.alpha_lr * total_lr_loss 212 | 213 | md = self.md 214 | chainer.report({'total_loss': loss, "image_loss": total_image_loss, 215 | 'smoothness_loss': total_smoothness_loss, 216 | "lr_loss": total_lr_loss}, md) 217 | return total_loss 218 | 219 | def update_core(self): 220 | xp = self.md.xp 221 | self._iter += 1 222 | 223 | batch = self.get_iterator('train').next() 224 | 225 | # CPU to GPU 226 | batchsize = len(batch) 227 | w_in = batch.shape[-1] 228 | h_in = batch.shape[-2] 229 | left_images = xp.zeros((batchsize, 3, h_in, w_in)).astype("f") 230 | right_images = xp.zeros((batchsize, 3, h_in, w_in)).astype("f") 231 | 232 | for i in range(batchsize): 233 | left_images[i, :] = xp.asarray(batch[i][0]) 234 | right_images[i, :] = xp.asarray(batch[i][1]) 235 | left_images = Variable(left_images) 236 | right_images = Variable(right_images) 237 | 238 | self.md.calc(left_images) 239 | md_optimizer = self.get_optimizer('md') 240 | md_optimizer.update(self.loss_md, left_images, right_images) 241 | 242 | def train(args): 243 | dataset = KittiDataset( 244 | args.dataset, root=args.root, dtype=np.float32, train=True) 245 | train_iter = chainer.iterators.SerialIterator(dataset, args.batchsize) 246 | 247 | md = MonoDepth() 248 | 249 | if args.gpu >= 0: 250 | chainer.cuda.get_device(args.gpu).use() 251 | md.to_gpu() 252 | 253 | opt = optimizers.Adam() 254 | opt.setup(md) 255 | 256 | updater = monodepthUpdater( 257 | models=(md), 258 | iterator={ 259 | 'train': train_iter}, 260 | optimizer={ 261 | 'md': opt}, 262 | device=args.gpu) 263 | 264 | trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) 265 | 266 | snapshot_interval = (args.snapshot_interval, 'iteration') 267 | trainer.extend(extensions.dump_graph('md/total_loss')) 268 | trainer.extend(extensions.snapshot(), trigger=snapshot_interval) 269 | trainer.extend(extensions.snapshot_object( 270 | md, 'md_vgg_iter_{.updater.iteration}'), trigger=snapshot_interval) 271 | trainer.extend(extensions.snapshot_object( 272 | opt, 'optimizer_'), trigger=snapshot_interval) 273 | trainer.extend(extensions.LogReport(trigger=(10, 'iteration'), )) 274 | trainer.extend(extensions.PrintReport( 275 | ['epoch', 'md/total_loss', 'md/image_loss', 'md/smoothness_loss', 'md/lr_loss'])) 276 | trainer.extend(extensions.ProgressBar(update_interval=20)) 277 | 278 | if args.resume: 279 | chainer.serializers.load_npz(args.resume, trainer) 280 | 281 | trainer.run() 282 | 283 | chainer.serializers.save_npz(os.path.join(save_dir, 'model_final'), md) 284 | 285 | def test(args): 286 | dataset = KittiDataset( 287 | args.dataset, root=args.root, dtype=np.float32, train=False) 288 | test_iter = chainer.iterators.SerialIterator(dataset, args.batchsize) 289 | 290 | md = MonoDepth() 291 | 292 | if args.gpu >= 0: 293 | chainer.cuda.get_device(args.gpu).use() 294 | md.to_gpu() 295 | 296 | if args.resume: 297 | chainer.serializers.load_npz(args.resume, md) 298 | 299 | for batch in test_iter: 300 | batchsize = len(batch) 301 | w_in = batch.shape[-1] 302 | h_in = batch.shape[-2] 303 | left_images = xp.zeros((batchsize, 3, h_in, w_in)).astype("f") 304 | 305 | for i in range(batchsize): 306 | left_images[i, :] = xp.asarray(batch[i][0]) 307 | left_images = Variable(left_images) 308 | disparity = md(left_images) 309 | 310 | def main(): 311 | parser = argparse.ArgumentParser( 312 | description='Monocular Depth Estimation') 313 | parser.add_argument("--mode", type=str, help="train or test", 314 | default="test") 315 | parser.add_argument("--model_name", type=str, help="model name", 316 | default="monodepth") 317 | parser.add_argument('--dataset', type=str, 318 | default='./data/filenames/kitti_test_files', 319 | help='file of dataset list') 320 | parser.add_argument('--root', type=str, 321 | default='./', 322 | help='root path of dataset') 323 | parser.add_argument("--save_dir", type=str, default="./", 324 | help="directory for saving model parameter") 325 | 326 | parser.add_argument('--batchsize', '-b', type=int, default=16, 327 | help='Number of images in each mini-batch') 328 | parser.add_argument('--epoch', '-e', type=int, default=20, 329 | help='Number of sweeps over the dataset to train') 330 | parser.add_argument('--gpu', '-g', type=int, default=-1, 331 | help='GPU ID (negative value indicates CPU)') 332 | 333 | parser.add_argument('--resume', '-r', default='', 334 | help='Resume the training from snapshot') 335 | parser.add_argument('--snapshot_interval', type=int, default=10000, 336 | help='Interval of snapshot') 337 | parser.add_argument('--display_interval', type=int, default=100, 338 | help='Interval of displaying log to console') 339 | parser.add_argument('--seed', type=int, default=0, 340 | help='Random seed') 341 | args = parser.parse_args() 342 | 343 | if args.gpu != -1: 344 | print('Use GPU: id {}'.format(args.gpu)) 345 | 346 | print('# Minibatch-size: {}'.format(args.batchsize)) 347 | print('# epoch: {}'.format(args.epoch)) 348 | print('') 349 | 350 | mode = None 351 | if args.mode = "train": 352 | train(args) 353 | else: 354 | test(args) 355 | 356 | if __name__ == '__main__': 357 | main() 358 | -------------------------------------------------------------------------------- /paint_x2_unet/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | cg.dot 3 | models 4 | temp 5 | -------------------------------------------------------------------------------- /paint_x2_unet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yukitsuji/monodepth_chainer/93e94097336a433af1486aa7956069b2fba0f22a/paint_x2_unet/__init__.py -------------------------------------------------------------------------------- /paint_x2_unet/cgi_exe.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | import numpy as np 5 | import chainer 6 | import cv2 7 | 8 | #import chainer.functions as F 9 | #import chainer.links as L 10 | #import six 11 | #import os 12 | 13 | from chainer import cuda, serializers, Variable # , optimizers, training 14 | #from chainer.training import extensions 15 | #from train import Image2ImageDataset 16 | from img2imgDataset import ImageAndRefDataset 17 | 18 | import unet 19 | import lnet 20 | 21 | 22 | class Painter: 23 | 24 | def __init__(self, gpu=0): 25 | 26 | print("start") 27 | self.root = "./images/" 28 | self.batchsize = 1 29 | self.outdir = self.root + "out/" 30 | self.outdir_min = self.root + "out_min/" 31 | self.gpu = gpu 32 | self._dtype = np.float32 33 | 34 | print("load model") 35 | if self.gpu >= 0: 36 | cuda.get_device(self.gpu).use() 37 | cuda.set_max_workspace_size(64 * 1024 * 1024) # 64MB 38 | chainer.Function.type_check_enable = False 39 | self.cnn_128 = unet.UNET() 40 | self.cnn_512 = unet.UNET() 41 | if self.gpu >= 0: 42 | self.cnn_128.to_gpu() 43 | self.cnn_512.to_gpu() 44 | #lnn = lnet.LNET() 45 | #serializers.load_npz("./cgi-bin/wnet/models/model_cnn_128_df_4", cnn_128) 46 | #serializers.load_npz("./cgi-bin/paint_x2_unet/models/model_cnn_128_f3_2", cnn_128) 47 | serializers.load_npz( 48 | "./cgi-bin/paint_x2_unet/models/unet_128_standard", self.cnn_128) 49 | #serializers.load_npz("./cgi-bin/paint_x2_unet/models/model_cnn_128_ua_1", self.cnn_128) 50 | #serializers.load_npz("./cgi-bin/paint_x2_unet/models/model_m_1.6", self.cnn) 51 | serializers.load_npz( 52 | "./cgi-bin/paint_x2_unet/models/unet_512_standard", self.cnn_512) 53 | #serializers.load_npz("./cgi-bin/paint_x2_unet/models/model_p2_1", self.cnn) 54 | #serializers.load_npz("./cgi-bin/paint_x2_unet/models/model_10000", self.cnn) 55 | #serializers.load_npz("./cgi-bin/paint_x2_unet/models/liner_f", lnn) 56 | 57 | def save_as_img(self, array, name): 58 | array = array.transpose(1, 2, 0) 59 | array = array.clip(0, 255).astype(np.uint8) 60 | array = cuda.to_cpu(array) 61 | (major, minor, _) = cv2.__version__.split(".") 62 | if major == '3': 63 | img = cv2.cvtColor(array, cv2.COLOR_YUV2RGB) 64 | else: 65 | img = cv2.cvtColor(array, cv2.COLOR_YUV2BGR) 66 | cv2.imwrite(name, img) 67 | 68 | def liner(self, id_str): 69 | if self.gpu >= 0: 70 | cuda.get_device(self.gpu).use() 71 | 72 | image1 = cv2.imread(path1, cv2.IMREAD_GRAYSCALE) 73 | image1 = np.asarray(image1, self._dtype) 74 | if image1.ndim == 2: 75 | image1 = image1[:, :, np.newaxis] 76 | img = image1.transpose(2, 0, 1) 77 | x = np.zeros((1, 3, img.shape[1], img.shape[2]), dtype='f') 78 | if self.gpu >= 0: 79 | x = cuda.to_gpu(x) 80 | 81 | lnn = lnet.LNET() 82 | with chainer.no_backprop_mode(): 83 | with chainer.using_config('train', False): 84 | y = lnn.calc(Variable(x)) 85 | 86 | self.save_as_img(y.data[0], self.root + "line/" + id_str + ".jpg") 87 | 88 | def colorize(self, id_str, step='C', blur=0, s_size=128,colorize_format="jpg"): 89 | if self.gpu >= 0: 90 | cuda.get_device(self.gpu).use() 91 | 92 | _ = {'S': "ref/", 'L': "out_min/", 'C': "ref/"} 93 | dataset = ImageAndRefDataset( 94 | [id_str + ".png"], self.root + "line/", self.root + _[step]) 95 | 96 | _ = {'S': True, 'L': False, 'C': True} 97 | sample = dataset.get_example(0, minimize=_[step], blur=blur, s_size=s_size) 98 | 99 | _ = {'S': 0, 'L': 1, 'C': 0}[step] 100 | sample_container = np.zeros( 101 | (1, 4, sample[_].shape[1], sample[_].shape[2]), dtype='f') 102 | sample_container[0, :] = sample[_] 103 | 104 | if self.gpu >= 0: 105 | sample_container = cuda.to_gpu(sample_container) 106 | 107 | cnn = {'S': self.cnn_128, 'L': self.cnn_512, 'C': self.cnn_128} 108 | with chainer.no_backprop_mode(): 109 | with chainer.using_config('train', False): 110 | image_conv2d_layer = cnn[step].calc(Variable(sample_container)) 111 | del sample_container 112 | 113 | if step == 'C': 114 | input_bat = np.zeros((1, 4, sample[1].shape[1], sample[1].shape[2]), dtype='f') 115 | print(input_bat.shape) 116 | input_bat[0, 0, :] = sample[1] 117 | 118 | output = cuda.to_cpu(image_conv2d_layer.data[0]) 119 | del image_conv2d_layer # release memory 120 | 121 | for channel in range(3): 122 | input_bat[0, 1 + channel, :] = cv2.resize( 123 | output[channel, :], 124 | (sample[1].shape[2], sample[1].shape[1]), 125 | interpolation=cv2.INTER_CUBIC) 126 | 127 | if self.gpu >= 0: 128 | link = cuda.to_gpu(input_bat, None) 129 | else: 130 | link = input_bat 131 | with chainer.no_backprop_mode(): 132 | with chainer.using_config('train', False): 133 | image_conv2d_layer = self.cnn_512.calc(Variable(link)) 134 | del link # release memory 135 | 136 | image_out_path = { 137 | 'S': self.outdir_min + id_str + ".png", 138 | 'L': self.outdir + id_str + ".jpg", 139 | 'C': self.outdir + id_str + "_0." + colorize_format} 140 | self.save_as_img(image_conv2d_layer.data[0], image_out_path[step]) 141 | del image_conv2d_layer 142 | 143 | 144 | 145 | if __name__ == '__main__': 146 | for n in range(1): 147 | p = Painter() 148 | print(n) 149 | p.colorize(n * p.batchsize) 150 | -------------------------------------------------------------------------------- /paint_x2_unet/img2imgDataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import chainer 5 | ''' 6 | import chainer.functions as F 7 | import chainer.links as L 8 | import chainer.datasets.image_dataset as ImageDataset 9 | ''' 10 | import six 11 | import os 12 | 13 | from chainer import cuda, optimizers, serializers, Variable 14 | import cv2 15 | 16 | def cvt2YUV(img): 17 | (major, minor, _) = cv2.__version__.split(".") 18 | if major == '3': 19 | img = cv2.cvtColor( img, cv2.COLOR_RGB2YUV ) 20 | else: 21 | img = cv2.cvtColor( img, cv2.COLOR_BGR2YUV ) 22 | return img 23 | 24 | class ImageAndRefDataset(chainer.dataset.DatasetMixin): 25 | 26 | def __init__(self, paths, root1='./input', root2='./ref', dtype=np.float32): 27 | self._paths = paths 28 | self._root1 = root1 29 | self._root2 = root2 30 | self._dtype = dtype 31 | 32 | def __len__(self): 33 | return len(self._paths) 34 | 35 | def get_name(self, i): 36 | return self._paths[i] 37 | 38 | def get_example(self, i, minimize=False, blur=0, s_size=128): 39 | path1 = os.path.join(self._root1, self._paths[i]) 40 | #image1 = ImageDataset._read_image_as_array(path1, self._dtype) 41 | 42 | image1 = cv2.imread(path1, cv2.IMREAD_GRAYSCALE) 43 | print("load:" + path1, os.path.isfile(path1), image1 is None) 44 | image1 = np.asarray(image1, self._dtype) 45 | 46 | _image1 = image1.copy() 47 | if minimize: 48 | if image1.shape[0] < image1.shape[1]: 49 | s0 = s_size 50 | s1 = int(image1.shape[1] * (s_size / image1.shape[0])) 51 | s1 = s1 - s1 % 16 52 | _s0 = 4 * s0 53 | _s1 = int(image1.shape[1] * ( _s0 / image1.shape[0])) 54 | _s1 = (_s1+8) - (_s1+8) % 16 55 | else: 56 | s1 = s_size 57 | s0 = int(image1.shape[0] * (s_size / image1.shape[1])) 58 | s0 = s0 - s0 % 16 59 | _s1 = 4 * s1 60 | _s0 = int(image1.shape[0] * ( _s1 / image1.shape[1])) 61 | _s0 = (_s0+8) - (_s0+8) % 16 62 | 63 | _image1 = image1.copy() 64 | _image1 = cv2.resize(_image1, (_s1, _s0), 65 | interpolation=cv2.INTER_AREA) 66 | #noise = np.random.normal(0,5*np.random.rand(),_image1.shape).astype(self._dtype) 67 | 68 | if blur > 0: 69 | blured = cv2.blur(_image1, ksize=(blur, blur)) 70 | image1 = _image1 + blured - 255 71 | 72 | image1 = cv2.resize(image1, (s1, s0), interpolation=cv2.INTER_AREA) 73 | 74 | # image is grayscale 75 | if image1.ndim == 2: 76 | image1 = image1[:, :, np.newaxis] 77 | if _image1.ndim == 2: 78 | _image1 = _image1[:, :, np.newaxis] 79 | 80 | image1 = np.insert(image1, 1, -512, axis=2) 81 | image1 = np.insert(image1, 2, 128, axis=2) 82 | image1 = np.insert(image1, 3, 128, axis=2) 83 | 84 | # add color ref image 85 | path_ref = os.path.join(self._root2, self._paths[i]) 86 | 87 | if minimize: 88 | image_ref = cv2.imread(path_ref, cv2.IMREAD_UNCHANGED) 89 | image_ref = cv2.resize(image_ref, (image1.shape[1], image1.shape[ 90 | 0]), interpolation=cv2.INTER_NEAREST) 91 | b, g, r, a = cv2.split(image_ref) 92 | image_ref = cvt2YUV( cv2.merge((b, g, r)) ) 93 | 94 | for x in range(image1.shape[0]): 95 | for y in range(image1.shape[1]): 96 | if a[x][y] != 0: 97 | for ch in range(3): 98 | image1[x][y][ch + 1] = image_ref[x][y][ch] 99 | 100 | else: 101 | image_ref = cv2.imread(path_ref, cv2.IMREAD_COLOR) 102 | image_ref = cvt2YUV(image_ref) 103 | image1 = cv2.resize( 104 | image1, (4 * image_ref.shape[1], 4 * image_ref.shape[0]), interpolation=cv2.INTER_AREA) 105 | image_ref = cv2.resize(image_ref, (image1.shape[1], image1.shape[ 106 | 0]), interpolation=cv2.INTER_AREA) 107 | 108 | image1[:, :, 1:] = image_ref 109 | 110 | return image1.transpose(2, 0, 1), _image1.transpose(2, 0, 1) 111 | 112 | 113 | class Image2ImageDataset(chainer.dataset.DatasetMixin): 114 | 115 | def __init__(self, paths, root1='./input', root2='./terget', dtype=np.float32, leak=(0, 0), root_ref = None, train=False): 116 | if isinstance(paths, six.string_types): 117 | with open(paths) as paths_file: 118 | paths = [path.strip() for path in paths_file] 119 | self._paths = paths 120 | self._root1 = root1 121 | self._root2 = root2 122 | self._root_ref = root_ref 123 | self._dtype = dtype 124 | self._leak = leak 125 | self._img_dict = {} 126 | self._train = train 127 | 128 | def set_img_dict(self, img_dict): 129 | self._img_dict = img_dict 130 | 131 | def get_vec(self, name): 132 | tag_size = 1539 133 | v = np.zeros(tag_size).astype(np.int32) 134 | if name in self._img_dict.keys(): 135 | for i in self._img_dict[name][3]: 136 | v[i] = 1 137 | return v 138 | 139 | def __len__(self): 140 | return len(self._paths) 141 | 142 | def get_name(self, i): 143 | return self._paths[i] 144 | 145 | def get_example(self, i, minimize=False, log=False, bin_r=0): 146 | if self._train: 147 | bin_r = 0.9 148 | 149 | readed = False 150 | if np.random.rand() < bin_r: 151 | if np.random.rand() < 0.3: 152 | path1 = os.path.join(self._root1 + "_b2r/", self._paths[i]) 153 | else: 154 | path1 = os.path.join(self._root1 + "_cnn/", self._paths[i]) 155 | path2 = os.path.join(self._root2 + "_b2r/", self._paths[i]) 156 | image1 = cv2.imread(path1, cv2.IMREAD_GRAYSCALE) 157 | image2 = cv2.imread(path2, cv2.IMREAD_COLOR) 158 | if image1 is not None and image2 is not None: 159 | if image1.shape[0] > 0 and image1.shape[1] and image2.shape[0] > 0 and image2.shape[1]: 160 | readed = True 161 | if not readed: 162 | path1 = os.path.join(self._root1, self._paths[i]) 163 | path2 = os.path.join(self._root2, self._paths[i]) 164 | image1 = cv2.imread(path1, cv2.IMREAD_GRAYSCALE) 165 | image2 = cv2.imread(path2, cv2.IMREAD_COLOR) 166 | 167 | image2 = cvt2YUV( image2 ) 168 | name1 = os.path.basename(self._paths[i]) 169 | 170 | if self._train and np.random.rand() < 0.2: 171 | ret, image1 = cv2.threshold( 172 | image1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) 173 | 174 | # add flip and noise 175 | if self._train: 176 | if np.random.rand() > 0.5: 177 | image1 = cv2.flip(image1, 1) 178 | image2 = cv2.flip(image2, 1) 179 | if np.random.rand() > 0.9: 180 | image1 = cv2.flip(image1, 0) 181 | image2 = cv2.flip(image2, 0) 182 | 183 | image1 = np.asarray(image1, self._dtype) 184 | image2 = np.asarray(image2, self._dtype) 185 | 186 | if self._train: 187 | noise = np.random.normal( 188 | 0, 5 * np.random.rand(), image1.shape).astype(self._dtype) 189 | image1 += noise 190 | noise = np.random.normal( 191 | 0, 5 * np.random.rand(), image2.shape).astype(self._dtype) 192 | image2 += noise 193 | noise = np.random.normal(0, 16) 194 | image1 += noise 195 | image1[image1 < 0] = 0 196 | 197 | # image is grayscale 198 | if image1.ndim == 2: 199 | image1 = image1[:, :, np.newaxis] 200 | if image2.ndim == 2: 201 | image2 = image2[:, :, np.newaxis] 202 | 203 | image1 = np.insert(image1, 1, -512, axis=2) 204 | image1 = np.insert(image1, 2, 128, axis=2) 205 | image1 = np.insert(image1, 3, 128, axis=2) 206 | 207 | # randomly add terget image px 208 | if self._leak[1] > 0: 209 | image0 = image1 210 | n = np.random.randint(16, self._leak[1]) 211 | if self._train: 212 | r = np.random.rand() 213 | if r < 0.4: 214 | n = 0 215 | elif r < 0.7: 216 | n = np.random.randint(2, 16) 217 | 218 | x = np.random.randint(1, image1.shape[0] - 1, n) 219 | y = np.random.randint(1, image1.shape[1] - 1, n) 220 | for i in range(n): 221 | for ch in range(3): 222 | d = 20 223 | v = image2[x[i]][y[i]][ch] + np.random.normal(0, 5) 224 | v = np.floor(v / d + 0.5) * d 225 | image1[x[i]][y[i]][ch + 1] = v 226 | if np.random.rand() > 0.5: 227 | for ch in range(3): 228 | image1[x[i]][y[i] + 1][ch + 229 | 1] = image1[x[i]][y[i]][ch + 1] 230 | image1[x[i]][y[i] - 1][ch + 231 | 1] = image1[x[i]][y[i]][ch + 1] 232 | if np.random.rand() > 0.5: 233 | for ch in range(3): 234 | image1[x[i] + 1][y[i]][ch + 235 | 1] = image1[x[i]][y[i]][ch + 1] 236 | image1[x[i] - 1][y[i]][ch + 237 | 1] = image1[x[i]][y[i]][ch + 1] 238 | 239 | image1 = (image1.transpose(2, 0, 1)) 240 | image2 = (image2.transpose(2, 0, 1)) 241 | #image1 = (image1.transpose(2, 0, 1) -128) /128 242 | #image2 = (image2.transpose(2, 0, 1) -128) /128 243 | 244 | return image1, image2 # ,vec 245 | 246 | 247 | class Image2ImageDatasetX2(Image2ImageDataset): 248 | 249 | def get_example(self, i, minimize=False, log=False, bin_r=0): 250 | path1 = os.path.join(self._root1, self._paths[i]) 251 | path2 = os.path.join(self._root2, self._paths[i]) 252 | #image1 = ImageDataset._read_image_as_array(path1, self._dtype) 253 | image1 = cv2.imread(path1, cv2.IMREAD_GRAYSCALE) 254 | image2 = cv2.imread(path2, cv2.IMREAD_COLOR) 255 | image2 = cvt2YUV(image2) 256 | image2 = np.asarray(image2, self._dtype) 257 | name1 = os.path.basename(self._paths[i]) 258 | vec = self.get_vec(name1) 259 | 260 | # add flip and noise 261 | if self._train: 262 | if np.random.rand() > 0.5: 263 | image1 = cv2.flip(image1, 1) 264 | image2 = cv2.flip(image2, 1) 265 | if np.random.rand() > 0.8: 266 | image1 = cv2.flip(image1, 0) 267 | image2 = cv2.flip(image2, 0) 268 | 269 | if self._train: 270 | bin_r = 0.3 271 | if np.random.rand() < bin_r: 272 | ret, image1 = cv2.threshold( 273 | image1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) 274 | 275 | _image1 = image1.copy() 276 | _image2 = image2.copy() 277 | image1 = cv2.resize(image1, (128, 128), interpolation=cv2.INTER_AREA) 278 | image2 = cv2.resize(image2, (128, 128), interpolation=cv2.INTER_AREA) 279 | 280 | image1 = np.asarray(image1, self._dtype) 281 | _image1 = np.asarray(_image1, self._dtype) 282 | 283 | if self._train: 284 | noise = np.random.normal(0, 5, image1.shape).astype(self._dtype) 285 | image1 = image1 + noise 286 | noise = np.random.normal(0, 5, image2.shape).astype(self._dtype) 287 | image2 = image2 + noise 288 | noise = np.random.normal( 289 | 0, 4 * np.random.rand(), _image1.shape).astype(self._dtype) 290 | noise += np.random.normal(0, 24) 291 | _image1 = _image1 + noise 292 | _image1[_image1 < 0] = 0 293 | _image1[_image1 > 255] = 255 294 | 295 | # image is grayscale 296 | if image1.ndim == 2: 297 | image1 = image1[:, :, np.newaxis] 298 | if image2.ndim == 2: 299 | image2 = image2[:, :, np.newaxis] 300 | if _image1.ndim == 2: 301 | _image1 = _image1[:, :, np.newaxis] 302 | if _image2.ndim == 2: 303 | _image2 = _image2[:, :, np.newaxis] 304 | 305 | image1 = np.insert(image1, 1, -512, axis=2) 306 | image1 = np.insert(image1, 2, 128, axis=2) 307 | image1 = np.insert(image1, 3, 128, axis=2) 308 | 309 | # randomly add terget image px 310 | if self._leak[1] > 0: 311 | image0 = image1 312 | n = np.random.randint(self._leak[0], self._leak[1]) 313 | x = np.random.randint(1, image1.shape[0] - 1, n) 314 | y = np.random.randint(1, image1.shape[1] - 1, n) 315 | for i in range(n): 316 | for ch in range(3): 317 | d = 20 318 | v = image2[x[i]][y[i]][ch] + np.random.normal(0, 5) 319 | #v = np.random.normal(128,40) 320 | v = np.floor(v / d + 0.5) * d 321 | image1[x[i]][y[i]][ch + 1] = v 322 | if np.random.rand() > 0.5: 323 | image1[x[i]][y[i] + 1][ch + 1] = v 324 | image1[x[i]][y[i] - 1][ch + 1] = v 325 | if np.random.rand() > 0.5: 326 | image1[x[i] + 1][y[i]][ch + 1] = v 327 | image1[x[i] - 1][y[i]][ch + 1] = v 328 | 329 | return image1.transpose(2, 0, 1), image2.transpose(2, 0, 1), _image1.transpose(2, 0, 1), _image2.transpose(2, 0, 1) 330 | -------------------------------------------------------------------------------- /paint_x2_unet/lnet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import numpy 5 | import chainer 6 | import chainer.functions as F 7 | import chainer.links as L 8 | from chainer import cuda, optimizers, serializers, Variable 9 | 10 | 11 | from chainer import function 12 | from chainer.utils import type_check 13 | 14 | 15 | class MeanAbsoluteError(function.Function): 16 | 17 | """Mean absolute error function.""" 18 | 19 | def check_type_forward(self, in_types): 20 | type_check.expect(in_types.size() == 2) 21 | type_check.expect( 22 | in_types[0].dtype == numpy.float32, 23 | in_types[1].dtype == numpy.float32, 24 | in_types[0].shape == in_types[1].shape 25 | ) 26 | 27 | def forward_cpu(self, inputs): 28 | x0, x1 = inputs 29 | self.diff = x0 - x1 30 | diff = self.diff.ravel() 31 | return numpy.array(abs(diff).sum() / diff.size, dtype=diff.dtype), 32 | 33 | def forward_gpu(self, inputs): 34 | x0, x1 = inputs 35 | self.diff = x0 - x1 36 | diff = self.diff.ravel() 37 | return abs(diff).sum() / diff.dtype.type(diff.size), 38 | 39 | def backward(self, inputs, gy): 40 | xp = cuda.get_array_module(*inputs) 41 | coeff = gy[0] * gy[0].dtype.type(1. / self.diff.size) 42 | gx0 = coeff * xp.sign(self.diff) 43 | return gx0, -gx0 44 | 45 | 46 | def mean_absolute_error(x0, x1): 47 | """Mean absolute error function. 48 | This function computes mean absolute error between two variables. The mean 49 | is taken over the minibatch. 50 | """ 51 | return MeanAbsoluteError()(x0, x1) 52 | 53 | 54 | class LNET(chainer.Chain): 55 | 56 | def __init__(self): 57 | super(LNET, self).__init__( 58 | c0=L.Convolution2D(3, 32, 3, 1, 1), 59 | c1=L.Convolution2D(32, 64, 4, 2, 1), 60 | c2=L.Convolution2D(64, 64, 3, 1, 1), 61 | c3=L.Convolution2D(64, 128, 4, 2, 1), 62 | c4=L.Convolution2D(128, 128, 3, 1, 1), 63 | c5=L.Convolution2D(128, 256, 4, 2, 1), 64 | c6=L.Convolution2D(256, 256, 3, 1, 1), 65 | c7=L.Convolution2D(256, 512, 4, 2, 1), 66 | c8=L.Convolution2D(512, 512, 3, 1, 1), 67 | 68 | dc8=L.Deconvolution2D(1024, 512, 4, 2, 1), 69 | dc7=L.Convolution2D(512, 256, 3, 1, 1), 70 | dc6=L.Deconvolution2D(512, 256, 4, 2, 1), 71 | dc5=L.Convolution2D(256, 128, 3, 1, 1), 72 | dc4=L.Deconvolution2D(256, 128, 4, 2, 1), 73 | dc3=L.Convolution2D(128, 64, 3, 1, 1), 74 | dc2=L.Deconvolution2D(128, 64, 4, 2, 1), 75 | dc1=L.Convolution2D(64, 32, 3, 1, 1), 76 | dc0=L.Convolution2D(64, 1, 3, 1, 1), 77 | 78 | bnc0=L.BatchNormalization(32), 79 | bnc1=L.BatchNormalization(64), 80 | bnc2=L.BatchNormalization(64), 81 | bnc3=L.BatchNormalization(128), 82 | bnc4=L.BatchNormalization(128), 83 | bnc5=L.BatchNormalization(256), 84 | bnc6=L.BatchNormalization(256), 85 | bnc7=L.BatchNormalization(512), 86 | bnc8=L.BatchNormalization(512), 87 | 88 | bnd8=L.BatchNormalization(512), 89 | bnd7=L.BatchNormalization(256), 90 | bnd6=L.BatchNormalization(256), 91 | bnd5=L.BatchNormalization(128), 92 | bnd4=L.BatchNormalization(128), 93 | bnd3=L.BatchNormalization(64), 94 | bnd2=L.BatchNormalization(64), 95 | bnd1=L.BatchNormalization(32) 96 | # l = L.Linear(3*3*256, 2)' 97 | ) 98 | 99 | def enc(self, x): 100 | e0 = F.relu(self.bnc0(self.c0(x))) 101 | e1 = F.relu(self.bnc1(self.c1(e0))) 102 | e2 = F.relu(self.bnc2(self.c2(e1))) 103 | e3 = F.relu(self.bnc3(self.c3(e2))) 104 | e4 = F.relu(self.bnc4(self.c4(e3))) 105 | e5 = F.relu(self.bnc5(self.c5(e4))) 106 | e6 = F.relu(self.bnc6(self.c6(e5))) 107 | e7 = F.relu(self.bnc7(self.c7(e6))) 108 | e8 = F.relu(self.bnc8(self.c8(e7))) 109 | return [e0, e2, e4, e6, e7, e8] 110 | 111 | def calc(self, x): 112 | e0, e2, e4, e6, e7, e8 = self.enc(x) 113 | 114 | d8 = F.relu(self.bnd8(self.dc8(F.concat([e7, e8])))) 115 | del e7, e8 116 | d7 = F.relu(self.bnd7(self.dc7(d8))) 117 | del d8 118 | d6 = F.relu(self.bnd6(self.dc6(F.concat([e6, d7])))) 119 | del d7, e6 120 | d5 = F.relu(self.bnd5(self.dc5(d6))) 121 | del d6 122 | d4 = F.relu(self.bnd4(self.dc4(F.concat([e4, d5])))) 123 | del d5, e4 124 | d3 = F.relu(self.bnd3(self.dc3(d4))) 125 | del d4 126 | d2 = F.relu(self.bnd2(self.dc2(F.concat([e2, d3])))) 127 | del d3, e2 128 | d1 = F.relu(self.bnd1(self.dc1(d2))) 129 | del d2 130 | d0 = self.dc0(F.concat([e0, d1])) 131 | 132 | return d0 133 | 134 | def __call__(self, x, t): 135 | h = self.calc(x) 136 | loss_c = mean_absolute_error(h, t) 137 | loss = loss_c 138 | chainer.report({'loss': loss, 'loss_c': loss_c}, self) 139 | return loss 140 | -------------------------------------------------------------------------------- /paint_x2_unet/train_128.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import chainer 5 | import chainer.functions as F 6 | import chainer.links as L 7 | import chainer.datasets.image_dataset as ImageDataset 8 | import six 9 | import os 10 | from PIL import Image 11 | 12 | from chainer import cuda, optimizers, serializers, Variable 13 | from chainer import training 14 | from chainer.training import extensions 15 | 16 | import argparse 17 | 18 | import unet 19 | import lnet 20 | 21 | #from images_dict import img_dict 22 | from img2imgDataset import Image2ImageDataset 23 | 24 | chainer.cuda.set_max_workspace_size(1024 * 1024 * 1024) 25 | os.environ["CHAINER_TYPE_CHECK"] = "0" 26 | 27 | 28 | def main(): 29 | parser = argparse.ArgumentParser( 30 | description='chainer line drawing colorization') 31 | parser.add_argument('--batchsize', '-b', type=int, default=16, 32 | help='Number of images in each mini-batch') 33 | parser.add_argument('--epoch', '-e', type=int, default=20, 34 | help='Number of sweeps over the dataset to train') 35 | parser.add_argument('--gpu', '-g', type=int, default=-1, 36 | help='GPU ID (negative value indicates CPU)') 37 | parser.add_argument('--dataset', '-i', default='./images/', 38 | help='Directory of image files.') 39 | parser.add_argument('--out', '-o', default='result', 40 | help='Directory to output the result') 41 | parser.add_argument('--resume', '-r', default='', 42 | help='Resume the training from snapshot') 43 | parser.add_argument('--seed', type=int, default=0, 44 | help='Random seed') 45 | parser.add_argument('--snapshot_interval', type=int, default=10000, 46 | help='Interval of snapshot') 47 | parser.add_argument('--display_interval', type=int, default=100, 48 | help='Interval of displaying log to console') 49 | args = parser.parse_args() 50 | 51 | print('GPU: {}'.format(args.gpu)) 52 | print('# Minibatch-size: {}'.format(args.batchsize)) 53 | print('# epoch: {}'.format(args.epoch)) 54 | print('') 55 | 56 | root = args.dataset 57 | #model = "./model_paint" 58 | 59 | cnn = unet.UNET() 60 | #serializers.load_npz("result/model_iter_10000", cnn) 61 | 62 | dis = unet.DIS() 63 | #serializers.load_npz("result/model_dis_iter_20000", dis) 64 | 65 | l = lnet.LNET() 66 | serializers.load_npz("models/liner_f", l) 67 | 68 | dataset = Image2ImageDataset( 69 | "dat/images_color_train.dat", root + "line/", root + "color/", train=True) 70 | # dataset.set_img_dict(img_dict) 71 | train_iter = chainer.iterators.SerialIterator(dataset, args.batchsize) 72 | 73 | if args.gpu >= 0: 74 | chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current 75 | cnn.to_gpu() # Copy the model to the GPU 76 | dis.to_gpu() # Copy the model to the GPU 77 | l.to_gpu() 78 | 79 | # Setup optimizer parameters. 80 | opt = optimizers.Adam(alpha=0.0001) 81 | opt.setup(cnn) 82 | opt.add_hook(chainer.optimizer.WeightDecay(1e-5), 'hook_cnn') 83 | 84 | opt_d = chainer.optimizers.Adam(alpha=0.0001) 85 | opt_d.setup(dis) 86 | opt_d.add_hook(chainer.optimizer.WeightDecay(1e-5), 'hook_dec') 87 | 88 | # Set up a trainer 89 | updater = ganUpdater( 90 | models=(cnn, dis, l), 91 | iterator={ 92 | 'main': train_iter, 93 | #'test': test_iter 94 | }, 95 | optimizer={ 96 | 'cnn': opt, 97 | 'dis': opt_d}, 98 | device=args.gpu) 99 | 100 | trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) 101 | 102 | snapshot_interval = (args.snapshot_interval, 'iteration') 103 | snapshot_interval2 = (args.snapshot_interval * 2, 'iteration') 104 | trainer.extend(extensions.dump_graph('cnn/loss')) 105 | trainer.extend(extensions.snapshot(), trigger=snapshot_interval2) 106 | trainer.extend(extensions.snapshot_object( 107 | cnn, 'cnn_128_iter_{.updater.iteration}'), trigger=snapshot_interval) 108 | trainer.extend(extensions.snapshot_object( 109 | dis, 'cnn_128_dis_iter_{.updater.iteration}'), trigger=snapshot_interval) 110 | trainer.extend(extensions.snapshot_object( 111 | opt, 'optimizer_'), trigger=snapshot_interval) 112 | trainer.extend(extensions.LogReport(trigger=(10, 'iteration'), )) 113 | trainer.extend(extensions.PrintReport( 114 | ['epoch', 'cnn/loss', 'cnn/loss_rec', 'cnn/loss_adv', 'cnn/loss_tag', 'cnn/loss_l', 'dis/loss'])) 115 | trainer.extend(extensions.ProgressBar(update_interval=20)) 116 | 117 | trainer.run() 118 | 119 | if args.resume: 120 | # Resume from a snapshot 121 | chainer.serializers.load_npz(args.resume, trainer) 122 | 123 | # Save the trained model 124 | chainer.serializers.save_npz(os.path.join(out_dir, 'model_final'), cnn) 125 | chainer.serializers.save_npz(os.path.join(out_dir, 'optimizer_final'), opt) 126 | 127 | 128 | class ganUpdater(chainer.training.StandardUpdater): 129 | 130 | def __init__(self, *args, **kwargs): 131 | self.cnn, self.dis, self.l = kwargs.pop('models') 132 | self._iter = 0 133 | super(ganUpdater, self).__init__(*args, **kwargs) 134 | 135 | def loss_cnn(self, cnn, x_out, t_out, y_out, lam1=1, lam2=1, lam3=10): 136 | loss_rec = lam1 * (F.mean_absolute_error(x_out, t_out)) 137 | loss_adv = lam2 * y_out 138 | l_t = self.l.calc((t_out - 128) / 128) 139 | l_x = self.l.calc((x_out - 128) / 128) 140 | loss_l = lam3 * (F.mean_absolute_error(l_x, l_t)) 141 | loss = loss_rec + loss_adv + loss_l 142 | chainer.report({'loss': loss, "loss_rec": loss_rec, 143 | 'loss_adv': loss_adv, "loss_l": loss_l}, cnn) 144 | 145 | return loss 146 | 147 | def loss_dis(self, dis, y_in, y_out): 148 | L1 = y_in 149 | L2 = y_out 150 | loss = L1 + L2 151 | chainer.report({'loss': loss}, dis) 152 | return loss 153 | 154 | def update_core(self): 155 | xp = self.cnn.xp 156 | self._iter += 1 157 | 158 | batch = self.get_iterator('main').next() 159 | batchsize = len(batch) 160 | 161 | w_in = 128 162 | w_out = 128 163 | 164 | x_in = xp.zeros((batchsize, 4, w_in, w_in)).astype("f") 165 | t_out = xp.zeros((batchsize, 3, w_out, w_out)).astype("f") 166 | 167 | for i in range(batchsize): 168 | x_in[i, :] = xp.asarray(batch[i][0]) 169 | t_out[i, :] = xp.asarray(batch[i][1]) 170 | x_in = Variable(x_in) 171 | t_out = Variable(t_out) 172 | 173 | x_out = self.cnn.calc(x_in) 174 | 175 | cnn_optimizer = self.get_optimizer('cnn') 176 | dis_optimizer = self.get_optimizer('dis') 177 | 178 | y_target = self.dis(x_out, Variable( 179 | xp.zeros(batchsize, dtype=np.int32))) 180 | 181 | cnn_optimizer.update(self.loss_cnn, self.cnn, x_out, t_out, y_target) 182 | 183 | x_out.unchain_backward() 184 | y_fake = self.dis(x_out, Variable( 185 | xp.ones(batchsize, dtype=np.int32))) 186 | y_real = self.dis(t_out, Variable( 187 | xp.zeros(batchsize, dtype=np.int32))) 188 | dis_optimizer.update(self.loss_dis, self.dis, y_real, y_fake) 189 | 190 | if __name__ == '__main__': 191 | main() 192 | -------------------------------------------------------------------------------- /paint_x2_unet/train_x2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import chainer 5 | import chainer.functions as F 6 | import chainer.links as L 7 | import chainer.datasets.image_dataset as ImageDataset 8 | import six 9 | import os 10 | import cv2 11 | 12 | from chainer import cuda, optimizers, serializers, Variable 13 | from chainer import training 14 | from chainer.training import extensions 15 | 16 | import argparse 17 | 18 | import unet 19 | import lnet 20 | 21 | #from images_dict import img_dict 22 | from img2imgDataset import Image2ImageDatasetX2 23 | 24 | 25 | # chainer.cuda.set_max_workspace_size(1024*1024*1024) 26 | #os.environ["CHAINER_TYPE_CHECK"] = "0" 27 | 28 | 29 | def main(): 30 | parser = argparse.ArgumentParser( 31 | description='chainer line drawing colorization') 32 | parser.add_argument('--batchsize', '-b', type=int, default=4, 33 | help='Number of images in each mini-batch') 34 | parser.add_argument('--epoch', '-e', type=int, default=20, 35 | help='Number of sweeps over the dataset to train') 36 | parser.add_argument('--gpu', '-g', type=int, default=-1, 37 | help='GPU ID (negative value indicates CPU)') 38 | parser.add_argument('--dataset', '-i', default='./images/', 39 | help='Directory of image files.') 40 | parser.add_argument('--out', '-o', default='result', 41 | help='Directory to output the result') 42 | parser.add_argument('--resume', '-r', default='', 43 | help='Resume the training from snapshot') 44 | parser.add_argument('--seed', type=int, default=0, 45 | help='Random seed') 46 | parser.add_argument('--snapshot_interval', type=int, default=10000, 47 | help='Interval of snapshot') 48 | parser.add_argument('--display_interval', type=int, default=100, 49 | help='Interval of displaying log to console') 50 | args = parser.parse_args() 51 | 52 | print('GPU: {}'.format(args.gpu)) 53 | print('# Minibatch-size: {}'.format(args.batchsize)) 54 | print('# epoch: {}'.format(args.epoch)) 55 | print('') 56 | 57 | root = args.dataset 58 | #model = "./model_paint" 59 | 60 | cnn = unet.UNET() 61 | #serializers.load_npz("result/model_iter_10000", cnn) 62 | cnn_128 = unet.UNET() 63 | serializers.load_npz("models/model_cnn_128_dfl2_9", cnn_128) 64 | 65 | dataset = Image2ImageDatasetX2( 66 | "dat/images_color_train.dat", root + "linex2/", root + "colorx2/", train=True) 67 | # dataset.set_img_dict(img_dict) 68 | train_iter = chainer.iterators.SerialIterator(dataset, args.batchsize) 69 | 70 | if args.gpu >= 0: 71 | chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current 72 | cnn.to_gpu() # Copy the model to the GPU 73 | cnn_128.to_gpu() # Copy the model to the GPU 74 | 75 | # Setup optimizer parameters. 76 | opt = optimizers.Adam(alpha=0.0001) 77 | opt.setup(cnn) 78 | opt.add_hook(chainer.optimizer.WeightDecay(1e-5), 'hook_cnn') 79 | 80 | # Set up a trainer 81 | updater = ganUpdater( 82 | models=(cnn, cnn_128), 83 | iterator={'main': train_iter, }, 84 | optimizer={'cnn': opt}, 85 | device=args.gpu) 86 | 87 | trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) 88 | 89 | snapshot_interval = (args.snapshot_interval, 'iteration') 90 | snapshot_interval2 = (args.snapshot_interval * 2, 'iteration') 91 | trainer.extend(extensions.dump_graph('cnn/loss')) 92 | trainer.extend(extensions.snapshot(), trigger=snapshot_interval2) 93 | trainer.extend(extensions.snapshot_object( 94 | cnn, 'cnn_x2_iter_{.updater.iteration}'), trigger=snapshot_interval) 95 | trainer.extend(extensions.snapshot_object( 96 | opt, 'optimizer_'), trigger=snapshot_interval) 97 | trainer.extend(extensions.LogReport(trigger=(10, 'iteration'), )) 98 | trainer.extend(extensions.PrintReport( 99 | ['epoch', 'cnn/loss', 'cnn/loss_rec'])) 100 | trainer.extend(extensions.ProgressBar(update_interval=20)) 101 | 102 | trainer.run() 103 | 104 | if args.resume: 105 | # Resume from a snapshot 106 | chainer.serializers.load_npz(args.resume, trainer) 107 | 108 | # Save the trained model 109 | chainer.serializers.save_npz(os.path.join(out_dir, 'model_final'), cnn) 110 | chainer.serializers.save_npz(os.path.join(out_dir, 'optimizer_final'), opt) 111 | 112 | 113 | class ganUpdater(chainer.training.StandardUpdater): 114 | 115 | def __init__(self, *args, **kwargs): 116 | self.cnn, self.cnn_128 = kwargs.pop('models') 117 | self._iter = 0 118 | super(ganUpdater, self).__init__(*args, **kwargs) 119 | 120 | def loss_cnn(self, cnn, x_out, t_out, lam1=1): 121 | loss_rec = lam1 * (F.mean_absolute_error(x_out, t_out)) 122 | loss = loss_rec 123 | chainer.report({'loss': loss, "loss_rec": loss_rec}, cnn) 124 | 125 | return loss 126 | 127 | def update_core(self): 128 | xp = self.cnn.xp 129 | self._iter += 1 130 | 131 | batch = self.get_iterator('main').next() 132 | batchsize = len(batch) 133 | 134 | w_in = 128 135 | w_in_2 = 512 136 | w_out = 512 137 | 138 | x_in = xp.zeros((batchsize, 4, w_in, w_in)).astype("f") 139 | x_in_2 = xp.zeros((batchsize, 4, w_in_2, w_in_2)).astype("f") 140 | t_out = xp.zeros((batchsize, 3, w_out, w_out)).astype("f") 141 | 142 | for i in range(batchsize): 143 | x_in[i, :] = xp.asarray(batch[i][0]) 144 | x_in_2[i, 0, :] = xp.asarray(batch[i][2]) 145 | for ch in range(3): 146 | color_ch = cv2.resize( 147 | batch[i][1][ch], (w_out, w_out), interpolation=cv2.INTER_CUBIC).astype("f") 148 | x_in_2[i, ch + 1, :] = xp.asarray(color_ch) 149 | t_out[i, :] = xp.asarray(batch[i][3]) 150 | 151 | x_in = Variable(x_in) 152 | t_out = Variable(t_out) 153 | 154 | x_out = self.cnn_128.calc(x_in) 155 | x_out = x_out.data.get() 156 | 157 | for j in range(batchsize): 158 | for ch in range(3): 159 | # randomly use src color ch 160 | if np.random.rand() < 0.8: 161 | x_in_2[j, 1 + ch, :] = xp.asarray(cv2.resize( 162 | x_out[j, ch, :], (w_in_2, w_in_2), interpolation=cv2.INTER_CUBIC)) 163 | 164 | x_in_2 = Variable(x_in_2) 165 | x_out_2 = self.cnn.calc(x_in_2) 166 | 167 | cnn_optimizer = self.get_optimizer('cnn') 168 | 169 | cnn_optimizer.update(self.loss_cnn, self.cnn, x_out_2, t_out) 170 | 171 | 172 | if __name__ == '__main__': 173 | main() 174 | -------------------------------------------------------------------------------- /paint_x2_unet/unet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import math 5 | import chainer 6 | import chainer.functions as F 7 | import chainer.links as L 8 | from chainer import cuda, optimizers, serializers, Variable 9 | 10 | 11 | from chainer import function 12 | from chainer.utils import type_check 13 | 14 | 15 | class UNET(chainer.Chain): 16 | 17 | def __init__(self): 18 | super(UNET, self).__init__( 19 | c0=L.Convolution2D(4, 32, 3, 1, 1), 20 | c1=L.Convolution2D(32, 64, 4, 2, 1), 21 | c2=L.Convolution2D(64, 64, 3, 1, 1), 22 | c3=L.Convolution2D(64, 128, 4, 2, 1), 23 | c4=L.Convolution2D(128, 128, 3, 1, 1), 24 | c5=L.Convolution2D(128, 256, 4, 2, 1), 25 | c6=L.Convolution2D(256, 256, 3, 1, 1), 26 | c7=L.Convolution2D(256, 512, 4, 2, 1), 27 | c8=L.Convolution2D(512, 512, 3, 1, 1), 28 | 29 | dc8=L.Deconvolution2D(1024, 512, 4, 2, 1), 30 | dc7=L.Convolution2D(512, 256, 3, 1, 1), 31 | dc6=L.Deconvolution2D(512, 256, 4, 2, 1), 32 | dc5=L.Convolution2D(256, 128, 3, 1, 1), 33 | dc4=L.Deconvolution2D(256, 128, 4, 2, 1), 34 | dc3=L.Convolution2D(128, 64, 3, 1, 1), 35 | dc2=L.Deconvolution2D(128, 64, 4, 2, 1), 36 | dc1=L.Convolution2D(64, 32, 3, 1, 1), 37 | dc0=L.Convolution2D(64, 3, 3, 1, 1), 38 | 39 | bnc0=L.BatchNormalization(32), 40 | bnc1=L.BatchNormalization(64), 41 | bnc2=L.BatchNormalization(64), 42 | bnc3=L.BatchNormalization(128), 43 | bnc4=L.BatchNormalization(128), 44 | bnc5=L.BatchNormalization(256), 45 | bnc6=L.BatchNormalization(256), 46 | bnc7=L.BatchNormalization(512), 47 | bnc8=L.BatchNormalization(512), 48 | 49 | bnd8=L.BatchNormalization(512), 50 | bnd7=L.BatchNormalization(256), 51 | bnd6=L.BatchNormalization(256), 52 | bnd5=L.BatchNormalization(128), 53 | bnd4=L.BatchNormalization(128), 54 | bnd3=L.BatchNormalization(64), 55 | bnd2=L.BatchNormalization(64), 56 | bnd1=L.BatchNormalization(32) 57 | # l = L.Linear(3*3*256, 2)' 58 | ) 59 | 60 | def calc(self, x): 61 | e0 = F.relu(self.bnc0(self.c0(x))) 62 | e1 = F.relu(self.bnc1(self.c1(e0))) 63 | e2 = F.relu(self.bnc2(self.c2(e1))) 64 | del e1 65 | e3 = F.relu(self.bnc3(self.c3(e2))) 66 | e4 = F.relu(self.bnc4(self.c4(e3))) 67 | del e3 68 | e5 = F.relu(self.bnc5(self.c5(e4))) 69 | e6 = F.relu(self.bnc6(self.c6(e5))) 70 | del e5 71 | e7 = F.relu(self.bnc7(self.c7(e6))) 72 | e8 = F.relu(self.bnc8(self.c8(e7))) 73 | 74 | d8 = F.relu(self.bnd8(self.dc8(F.concat([e7, e8])))) 75 | del e7, e8 76 | d7 = F.relu(self.bnd7(self.dc7(d8))) 77 | del d8 78 | d6 = F.relu(self.bnd6(self.dc6(F.concat([e6, d7])))) 79 | del d7, e6 80 | d5 = F.relu(self.bnd5(self.dc5(d6))) 81 | del d6 82 | d4 = F.relu(self.bnd4(self.dc4(F.concat([e4, d5])))) 83 | del d5, e4 84 | d3 = F.relu(self.bnd3(self.dc3(d4))) 85 | del d4 86 | d2 = F.relu(self.bnd2(self.dc2(F.concat([e2, d3])))) 87 | del d3, e2 88 | d1 = F.relu(self.bnd1(self.dc1(d2))) 89 | del d2 90 | d0 = self.dc0(F.concat([e0, d1])) 91 | 92 | return d0 93 | 94 | def __call__(self, x, t): 95 | h = self.calc(x) 96 | loss = F.mean_absolute_error(h, t) 97 | chainer.report({'loss': loss}, self) 98 | return loss 99 | 100 | 101 | class DIS(chainer.Chain): 102 | 103 | def __init__(self): 104 | super(DIS, self).__init__( 105 | c1=L.Convolution2D(3, 32, 4, 2, 1), 106 | c2=L.Convolution2D(32, 32, 3, 1, 1), 107 | c3=L.Convolution2D(32, 64, 4, 2, 1), 108 | c4=L.Convolution2D(64, 64, 3, 1, 1), 109 | c5=L.Convolution2D(64, 128, 4, 2, 1), 110 | c6=L.Convolution2D(128, 128, 3, 1, 1), 111 | c7=L.Convolution2D(128, 256, 4, 2, 1), 112 | l8l=L.Linear(None, 2, 113 | initialW=chainer.initializers.HeNormal( 114 | math.sqrt(0.02 * math.sqrt(8 * 8 * 256) / 2))), 115 | 116 | bnc1=L.BatchNormalization(32), 117 | bnc2=L.BatchNormalization(32), 118 | bnc3=L.BatchNormalization(64), 119 | bnc4=L.BatchNormalization(64), 120 | bnc5=L.BatchNormalization(128), 121 | bnc6=L.BatchNormalization(128), 122 | bnc7=L.BatchNormalization(256), 123 | ) 124 | 125 | def calc(self, x): 126 | h = F.relu(self.bnc1(self.c1(x))) 127 | h = F.relu(self.bnc2(self.c2(h))) 128 | h = F.relu(self.bnc3(self.c3(h))) 129 | h = F.relu(self.bnc4(self.c4(h))) 130 | h = F.relu(self.bnc5(self.c5(h))) 131 | h = F.relu(self.bnc6(self.c6(h))) 132 | h = F.relu(self.bnc7(self.c7(h))) 133 | return self.l8l(h) 134 | 135 | def __call__(self, x, t): 136 | h = self.calc(x) 137 | loss = F.softmax_cross_entropy(h, t) 138 | #chainer.report({'loss': loss }, self) 139 | return loss 140 | -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yukitsuji/monodepth_chainer/93e94097336a433af1486aa7956069b2fba0f22a/util/__init__.py -------------------------------------------------------------------------------- /util/bilinear_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yukitsuji/monodepth_chainer/93e94097336a433af1486aa7956069b2fba0f22a/util/bilinear_sampler.py --------------------------------------------------------------------------------