├── .gitignore ├── Colorizing-with-GANs ├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── build_dataset.py ├── dataset.py ├── img │ ├── cgan.png │ ├── con_gan.png │ ├── discriminator.png │ ├── gan.png │ ├── gan_new.png │ ├── places365.jpg │ ├── places365.png │ └── unet.png ├── main.py ├── models.py ├── networks.py ├── ops.py ├── options.py ├── requirements.txt ├── setup.cfg ├── test-eval.py ├── test-turing.py ├── train.py ├── utils.py ├── video_colorize_GAN.py ├── video_colorize_GAN_1st-truth-ref.py └── video_colorize_GAN_truth-ref.py ├── Deep-Learning-Colorization ├── models │ ├── .gitignore │ ├── alexnet_deploy.prototxt │ ├── alexnet_deploy_fc.prototxt │ ├── alexnet_deploy_lab.prototxt │ ├── alexnet_deploy_lab_fc.prototxt │ ├── colorization_deploy_v1.prototxt │ ├── colorization_deploy_v2.prototxt │ ├── colorization_train_val_v2.prototxt │ ├── fetch_alexnet_model.sh │ └── fetch_release_models.sh ├── resources │ ├── batch_norm_absorb.py │ ├── caffe_traininglayers.py │ ├── caffe_traininglayers.pyc │ ├── conv_into_fc.py │ ├── magic_init │ │ ├── LICENSE │ │ ├── README.md │ │ ├── load.py │ │ ├── load.pyc │ │ ├── magic_init_mod.py │ │ └── measure_stat.py │ ├── prior_probs.npy │ ├── pts_in_hull.npy │ ├── softmax_cross_entropy_loss_layer.cpp │ ├── softmax_cross_entropy_loss_layer.cu │ └── softmax_cross_entropy_loss_layer.hpp └── video_colorize_parallel.py ├── README.md ├── Ref-GAN-Colorization ├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── build_dataset.py ├── dataset.py ├── main.py ├── models.py ├── models_base.py ├── models_no-discriminator.py ├── networks.py ├── ops.py ├── options.py ├── requirements.txt ├── setup.cfg ├── src │ ├── __init__.py │ ├── build_dataset.py │ ├── dataset.py │ ├── main.py │ ├── models.py │ ├── models_base.py │ ├── models_baseline_img.py │ ├── networks.py │ ├── ops.py │ ├── options.py │ ├── test-eval.py │ ├── test-turing.py │ ├── train.py │ ├── utils.py │ └── video_colorize_GAN.py ├── test-eval.py ├── test-turing.py ├── train.py ├── utils.py └── video_colorize_GAN.py ├── automatic-video-colorization.pdf ├── configuration.txt ├── convert_moment_dataset.sh ├── converter.py ├── cs230_poster.pdf ├── requirements.txt └── synthesize_results.py /.gitignore: -------------------------------------------------------------------------------- 1 | data/** 2 | yt8m/** 3 | -------------------------------------------------------------------------------- /Colorizing-with-GANs/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # custom 104 | _TODO 105 | checkpoints 106 | plots 107 | vcs.xml 108 | .idea 109 | .vscode -------------------------------------------------------------------------------- /Colorizing-with-GANs/README.md: -------------------------------------------------------------------------------- 1 | # Image Colorization with Generative Adversarial Networks 2 | In this work, we generalize the colorization procedure using a conditional Deep Convolutional Generative Adversarial Network (DCGAN) as as suggested by [Pix2Pix]. The network is trained on the datasets [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) and [Places365](http://places2.csail.mit.edu). Some of the results from Places365 dataset are [shown here.](#places365-results) 3 | 4 | ## Prerequisites 5 | - Linux 6 | - Tensorflow 1.7 7 | - NVIDIA GPU (12G or 24G memory) + CUDA cuDNN 8 | 9 | ## Getting Started 10 | ### Installation 11 | - Clone this repo: 12 | ```bash 13 | git clone https://github.com/ImagingLab/Colorizing-with-GANs.git 14 | cd Colorizing-with-GANs 15 | ``` 16 | - Install Tensorflow and dependencies from https://www.tensorflow.org/install/ 17 | - Install python requirements: 18 | ```bash 19 | pip install -r requirements.txt 20 | ``` 21 | 22 | ### Dataset 23 | - We use [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) and [Places365](http://places2.csail.mit.edu) datasets. To train a model on the full dataset, download datasets from official websites. 24 | After downloading, put then under the `datasets` folder. 25 | 26 | ### Training 27 | - To train the model, run `main.py` script 28 | ```bash 29 | python main.py 30 | ``` 31 | - To train the model on places365 dataset with tuned hyperparameters: 32 | ``` 33 | python train.py \ 34 | --seed 100 \ 35 | --dataset places365 \ 36 | --dataset-path ./dataset/places365 \ 37 | --checkpoints-path ./checkpoints \ 38 | --batch-size 16 \ 39 | --epochs 10 \ 40 | --lr 3e-4 \ 41 | --label-smoothing 1 42 | 43 | ``` 44 | 45 | - To train the model of cifar10 dataset with tuned hyperparameters: 46 | ``` 47 | python train.py \ 48 | --seed 100 \ 49 | --dataset cifar10 \ 50 | --dataset-path ./dataset/cifar10 \ 51 | --checkpoints-path ./checkpoints \ 52 | --batch-size 128 \ 53 | --epochs 200 \ 54 | --lr 3e-4 \ 55 | --lr-decay-steps 1e4 \ 56 | --augment True 57 | 58 | ``` 59 | 60 | ### Evaluate 61 | - Download the pre-trained weights [from here.](https://drive.google.com/open?id=1jTsAUAKrMiHO2gn7s-fFZ_zUSzgKoPyp) and copy them in the `checkpoints` folder. 62 | - To evaluate the model quantitatively on the test-set, run `test-eval.py` script: 63 | ```bash 64 | python test-eval.py 65 | ``` 66 | 67 | ### Turing Test 68 | - Download the pre-trained weights [from here.](https://drive.google.com/open?id=1jTsAUAKrMiHO2gn7s-fFZ_zUSzgKoPyp) and copy them in the `checkpoints` folder. 69 | - To evaluate the model qualitatively using visual Turing test, run `test-turing.py`: 70 | ```bash 71 | python test-turing.py 72 | ``` 73 | 74 | - To apply time-based visual Turing test run (2 seconds decision time): 75 | ```bash 76 | python test-turing.py --test-delay 2 77 | ``` 78 | 79 | 80 | ## Method 81 | 82 | ### Generative Adversarial Network 83 | Both generator and discriminator use CNNs. The generator is trained to minimize the probability that the discriminator makes a correct prediction in generated data, while discriminator is trained to maximize the probability of assigning the correct label. This is presented as a single minimax game problem: 84 |

85 | 86 |

87 | In our model, we have redefined the generator's cost function by maximizing the probability of the discriminator being mistaken, as opposed to minimizing the probability of the discriminator being correct. In addition, the cost function was further modified by adding an L1 based regularizer. This will theoretically preserve the structure of the original images and prevent the generator from assigning arbitrary colors to pixels just to fool the discriminator: 88 |

89 | 90 |

91 | 92 | ### Conditional GAN 93 | In a traditional GAN, the input of the generator is randomly generated noise data z. However, this approach is not applicable to the automatic colorization problem due to the nature of its inputs. The generator must be modified to accept grayscale images as inputs rather than noise. This problem was addressed by using a variant of GAN called [conditional generative adversarial networks](https://arxiv.org/abs/1411.1784). Since no noise is introduced, the input of the generator is treated as zero noise with the grayscale input as a prior: 94 |

95 | 96 |

97 | The discriminator gets colored images from both generator and original data along with the grayscale input as the condition and tries to tell which pair contains the true colored image: 98 |

99 | 100 |

101 | 102 | ### Networks Architecture 103 | The architecture of generator is inspired by [U-Net](https://arxiv.org/abs/1505.04597): The architecture of the model is symmetric, with `n` encoding units and `n` decoding units. The contracting path consists of 4x4 convolution layers with stride 2 for downsampling, each followed by batch normalization and Leaky-ReLU activation function with the slope of 0.2. The number of channels are doubled after each step. Each unit in the expansive path consists of a 4x4 transposed convolutional layer with stride 2 for upsampling, concatenation with the activation map of the mirroring layer in the contracting path, followed by batch normalization and ReLU activation function. The last layer of the network is a 1x1 convolution which is equivalent to cross-channel parametric pooling layer. We use `tanh` function for the last layer. 104 |

105 | 106 |

107 | 108 | For discriminator, we use similar architecture as the baselines contractive path: a series of 4x4 convolutional layers with stride 2 with the number of channels being doubled after each downsampling. All convolution layers are followed by batch normalization, leaky ReLU activation with slope 0.2. After the last layer, a convolution is applied to map to a 1 dimensional output, followed by a sigmoid function to return a probability value of the input being real or fake 109 |

110 | 111 |

112 | 113 | ## Places365 Results 114 | Colorization results with Places365. (a) Grayscale. (b) Original Image. (c) Colorized with GAN. 115 |

116 | 117 |

118 | 119 | ## Citation 120 | If you use this code for your research, please cite our paper Image Colorization with Generative Adversarial Networks: 121 | 122 | ``` 123 | @inproceedings{nazeri2018image, 124 | title={Image Colorization Using Generative Adversarial Networks}, 125 | author={Nazeri, Kamyar and Ng, Eric and Ebrahimi, Mehran}, 126 | booktitle={International Conference on Articulated Motion and Deformable Objects}, 127 | pages={85--94}, 128 | year={2018}, 129 | organization={Springer} 130 | } 131 | ``` 132 | -------------------------------------------------------------------------------- /Colorizing-with-GANs/__init__.py: -------------------------------------------------------------------------------- 1 | from .options import * 2 | from .models import * 3 | from .utils import * 4 | from .dataset import * 5 | from .main import * -------------------------------------------------------------------------------- /Colorizing-with-GANs/build_dataset.py: -------------------------------------------------------------------------------- 1 | """Split the SIGNS dataset into train/dev/test and resize images to 64x64. 2 | 3 | The SIGNS dataset comes in the following format: 4 | train_signs/ 5 | 0_IMG_5864.jpg 6 | ... 7 | test_signs/ 8 | 0_IMG_5942.jpg 9 | ... 10 | 11 | Original images have size (3024, 3024). 12 | Resizing to (64, 64) reduces the dataset size from 1.16 GB to 4.7 MB, and loading smaller images 13 | makes training faster. 14 | 15 | We already have a test set created, so we only need to split "train_signs" into train and dev sets. 16 | Because we don't have a lot of images and we want that the statistics on the dev set be as 17 | representative as possible, we'll take 20% of "train_signs" as dev set. 18 | """ 19 | 20 | import argparse 21 | import random 22 | import os 23 | 24 | import numpy as np 25 | 26 | from PIL import Image 27 | from tqdm import tqdm 28 | import cv2 29 | 30 | # size of the resized frames 31 | SIZE = 256 32 | 33 | # subfolder of the "Moments_in_Time" dataset to consider 34 | SUBFOLDER = "/baking" 35 | 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument('--data_dir', default='../data/Moments_in_Time_Mini', help="Directory with the Moments in Time dataset") 38 | parser.add_argument('--output_dir', default='../data/momentsintime', help="Where to write the new data") 39 | parser.add_argument('--dt', type=int, default=1, help="Time between consecutives frames") 40 | 41 | 42 | def split_resize_and_save(filename, i, output_dir, dt=1, size=SIZE): 43 | """Split the video clip in pair of consecutive frames (t, t+dt), resize the frames, and save the pairs to the `output_dir`""" 44 | 45 | vidcap = cv2.VideoCapture(filename) 46 | 47 | success, frame = vidcap.read() 48 | # convert BGR to RGB convention 49 | frame = frame[:,:,::-1] 50 | # default : use bilinear interpolation 51 | frame_prev = cv2.resize(frame, (size, size)) 52 | 53 | # counter to build pairs of consecutive frames 54 | count = 1 55 | 56 | while success: 57 | count += 1 58 | 59 | success, frame = vidcap.read() 60 | 61 | if success: 62 | # convert BGR to RGB convention 63 | frame = frame[:,:,::-1] 64 | # default : use bilinear interpolation 65 | frame = cv2.resize(frame, (size, size)) 66 | else: 67 | break 68 | #print('Read a new frame: ', success) 69 | 70 | if count % (1+dt) == 0: 71 | img = np.concatenate((frame, frame_prev), 2) 72 | frame_prev = frame 73 | np.save(output_dir + "/video{}_frame{}".format(i, count), img) 74 | 75 | if __name__ == '__main__': 76 | args = parser.parse_args() 77 | # Define the output directory 78 | args.output_dir = args.output_dir + "_dt" + str(args.dt) 79 | 80 | assert os.path.isdir(args.data_dir), "Couldn't find the dataset at {}".format(args.data_dir) 81 | 82 | # Define the data directories 83 | train_data_dir = os.path.join(args.data_dir, 'training' + SUBFOLDER) 84 | test_data_dir = os.path.join(args.data_dir, 'validation' + SUBFOLDER) 85 | 86 | # Get the filenames in each directory (train and test) 87 | filenames = os.listdir(train_data_dir) 88 | filenames = [os.path.join(train_data_dir, f) for f in filenames if f.endswith('.mp4')] 89 | 90 | test_filenames = os.listdir(test_data_dir) 91 | test_filenames = [os.path.join(test_data_dir, f) for f in test_filenames if f.endswith('.mp4')] 92 | 93 | # Split the images in 'train_moments' into 80% train and 20% dev 94 | # Make sure to always shuffle with a fixed seed so that the split is reproducible 95 | random.seed(230) 96 | filenames.sort() 97 | random.shuffle(filenames) 98 | 99 | split = int(0.9 * len(filenames)) 100 | train_filenames = filenames[:split] 101 | dev_filenames = filenames[split:] 102 | 103 | filenames = {'train': train_filenames, 104 | 'dev': dev_filenames, 105 | 'test': test_filenames} 106 | 107 | if not os.path.exists(args.output_dir): 108 | os.mkdir(args.output_dir) 109 | else: 110 | print("Warning: output dir {} already exists".format(args.output_dir)) 111 | 112 | # Preprocess train, dev and test 113 | for split in ['train', 'dev', 'test']: 114 | output_dir_split = os.path.join(args.output_dir, '{}_moments'.format(split)) 115 | if not os.path.exists(output_dir_split): 116 | os.mkdir(output_dir_split) 117 | else: 118 | print("Warning: dir {} already exists".format(output_dir_split)) 119 | 120 | print("Processing {} data, saving preprocessed data to {}".format(split, output_dir_split)) 121 | for i, filename in enumerate(tqdm(filenames[split])): 122 | split_resize_and_save(filename, i, output_dir_split, dt=args.dt, size=SIZE) 123 | 124 | print("Done building dataset") 125 | -------------------------------------------------------------------------------- /Colorizing-with-GANs/dataset.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import numpy as np 3 | import tensorflow as tf 4 | from scipy.misc import imread 5 | from abc import abstractmethod 6 | from utils import unpickle 7 | 8 | CIFAR10_DATASET = 'cifar10' 9 | PLACES365_DATASET = 'places365' 10 | MOMENTSINTIME_DATASET = 'momentsintime' 11 | 12 | class BaseDataset(): 13 | def __init__(self, name, path, training=True, augment=True): 14 | self.name = name 15 | self.augment = augment and training 16 | self.training = training 17 | self.path = path 18 | self._data = [] 19 | 20 | def __len__(self): 21 | return len(self.data) 22 | 23 | def __iter__(self): 24 | total = len(self) 25 | start = 0 26 | 27 | while start < total: 28 | item = self[start] 29 | start += 1 30 | yield item 31 | 32 | raise StopIteration 33 | 34 | def __getitem__(self, index): 35 | val = self.data[index] 36 | try: 37 | # OLD : img = imread(val) if isinstance(val, str) else val 38 | img = np.load(val) if isinstance(val, str) else val 39 | 40 | if self.augment and np.random.binomial(1, 0.5) == 1: 41 | img = img[:, ::-1, :] 42 | 43 | except: 44 | img = None 45 | 46 | return img 47 | 48 | def generator(self, batch_size, recursive=False): 49 | start = 0 50 | total = len(self) 51 | 52 | while True: 53 | while start < total: 54 | end = np.min([start + batch_size, total]) 55 | items = [] 56 | 57 | for ix in range(start, end): 58 | item = self[ix] 59 | if item is not None: 60 | items.append(item) 61 | 62 | start = end 63 | yield np.array(items) 64 | 65 | if recursive: 66 | start = 0 67 | 68 | else: 69 | raise StopIteration 70 | 71 | 72 | @property 73 | def data(self): 74 | if len(self._data) == 0: 75 | self._data = self.load() 76 | np.random.shuffle(self._data) 77 | 78 | return self._data 79 | 80 | @abstractmethod 81 | def load(self): 82 | return [] 83 | 84 | 85 | class Cifar10Dataset(BaseDataset): 86 | def __init__(self, path, training=True, augment=True): 87 | super(Cifar10Dataset, self).__init__(CIFAR10_DATASET, path, training, augment) 88 | 89 | def load(self): 90 | data = [] 91 | if self.training: 92 | for i in range(1, 6): 93 | filename = '{}/data_batch_{}'.format(self.path, i) 94 | batch_data = unpickle(filename) 95 | if len(data) > 0: 96 | data = np.vstack((data, batch_data[b'data'])) 97 | else: 98 | data = batch_data[b'data'] 99 | 100 | else: 101 | filename = '{}/test_batch'.format(self.path) 102 | batch_data = unpickle(filename) 103 | data = batch_data[b'data'] 104 | 105 | w = 32 106 | h = 32 107 | s = w * h 108 | data = np.array(data) 109 | data = np.dstack((data[:, :s], data[:, s:2 * s], data[:, 2 * s:])) 110 | data = data.reshape((-1, w, h, 3)) 111 | return data 112 | 113 | 114 | class Places365Dataset(BaseDataset): 115 | def __init__(self, path, training=True, augment=True): 116 | super(Places365Dataset, self).__init__(PLACES365_DATASET, path, training, augment) 117 | 118 | def load(self): 119 | if self.training: 120 | data = np.array( 121 | glob.glob(self.path + '/data_256/**/*.jpg', recursive=True)) 122 | 123 | else: 124 | data = np.array(glob.glob(self.path + '/val_256/*.jpg')) 125 | 126 | return data 127 | 128 | 129 | class MomentsInTimeDataset(BaseDataset): 130 | def __init__(self, path, training=True, augment=True): 131 | super(MomentsInTimeDataset, self).__init__(MOMENTSINTIME_DATASET, path, training, augment) 132 | 133 | def load(self): 134 | if self.training: 135 | #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/train_moments/*")) 136 | data = np.array(glob.glob("." + self.path + "/train_moments/*")) 137 | else: 138 | #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/dev_moments/*")) 139 | data = np.array(glob.glob("." + self.path + "/dev_moments/*")) 140 | 141 | return data 142 | -------------------------------------------------------------------------------- /Colorizing-with-GANs/img/cgan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/cgan.png -------------------------------------------------------------------------------- /Colorizing-with-GANs/img/con_gan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/con_gan.png -------------------------------------------------------------------------------- /Colorizing-with-GANs/img/discriminator.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/discriminator.png -------------------------------------------------------------------------------- /Colorizing-with-GANs/img/gan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/gan.png -------------------------------------------------------------------------------- /Colorizing-with-GANs/img/gan_new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/gan_new.png -------------------------------------------------------------------------------- /Colorizing-with-GANs/img/places365.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/places365.jpg -------------------------------------------------------------------------------- /Colorizing-with-GANs/img/places365.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/places365.png -------------------------------------------------------------------------------- /Colorizing-with-GANs/img/unet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/unet.png -------------------------------------------------------------------------------- /Colorizing-with-GANs/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import numpy as np 4 | import tensorflow as tf 5 | from options import ModelOptions 6 | from models import Cifar10Model, Places365Model, MomentsInTimeModel 7 | from dataset import CIFAR10_DATASET, PLACES365_DATASET, MOMENTSINTIME_DATASET 8 | 9 | 10 | def main(options): 11 | 12 | # reset tensorflow graph 13 | tf.reset_default_graph() 14 | 15 | # initialize random seed 16 | tf.set_random_seed(options.seed) 17 | np.random.seed(options.seed) 18 | random.seed(options.seed) 19 | 20 | # create a session environment 21 | with tf.Session() as sess: 22 | 23 | if options.dataset == CIFAR10_DATASET: 24 | model = Cifar10Model(sess, options) 25 | 26 | elif options.dataset == PLACES365_DATASET: 27 | model = Places365Model(sess, options) 28 | 29 | elif options.dataset == MOMENTSINTIME_DATASET: 30 | model = MomentsInTimeModel(sess, options) 31 | 32 | else: 33 | model = MomentsInTimeModel(sess, options) 34 | 35 | if not os.path.exists(options.checkpoints_path): 36 | os.makedirs(options.checkpoints_path) 37 | 38 | if options.log: 39 | open(model.train_log_file, 'w').close() 40 | open(model.test_log_file, 'w').close() 41 | 42 | # build the model and initialize 43 | model.build() 44 | sess.run(tf.global_variables_initializer()) 45 | 46 | 47 | # load model only after global variables initialization 48 | model.load() 49 | 50 | 51 | if options.mode == 0: 52 | args = vars(options) 53 | print('\n------------ Options -------------') 54 | with open(os.path.join(options.checkpoints_path, 'options.dat'), 'w') as f: 55 | for k, v in sorted(args.items()): 56 | print('%s: %s' % (str(k), str(v))) 57 | f.write('%s: %s\n' % (str(k), str(v))) 58 | print('-------------- End ----------------\n') 59 | 60 | model.train() 61 | 62 | elif options.mode == 1: 63 | model.evaluate() 64 | while True: 65 | model.sample() 66 | 67 | else: 68 | model.turing_test() 69 | 70 | 71 | if __name__ == "__main__": 72 | main(ModelOptions().parse()) 73 | -------------------------------------------------------------------------------- /Colorizing-with-GANs/networks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from ops import conv2d, conv2d_transpose, pixelwise_accuracy 4 | 5 | 6 | class Discriminator(object): 7 | def __init__(self, name, kernels): 8 | self.name = name 9 | self.kernels = kernels 10 | self.var_list = [] 11 | 12 | def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None): 13 | output = inputs 14 | with tf.variable_scope(self.name, reuse=reuse_variables): 15 | for index, kernel in enumerate(self.kernels): 16 | 17 | # not use batch-norm in the first layer 18 | bnorm = False if index == 0 else True 19 | name = 'conv' + str(index) 20 | output = conv2d( 21 | inputs=output, 22 | name=name, 23 | kernel_size=kernel_size, 24 | filters=kernel[0], 25 | strides=kernel[1], 26 | bnorm=bnorm, 27 | activation=tf.nn.leaky_relu, 28 | seed=seed 29 | ) 30 | 31 | if kernel[2] > 0: 32 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed) 33 | 34 | output = conv2d( 35 | inputs=output, 36 | name='conv_last', 37 | filters=1, 38 | kernel_size=4, # last layer kernel size = 4 39 | strides=1, # last layer stride = 1 40 | bnorm=False, # do not use batch-norm for the last layer 41 | seed=seed 42 | ) 43 | 44 | self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name) 45 | 46 | return output 47 | 48 | 49 | class Generator(object): 50 | def __init__(self, name, encoder_kernels, decoder_kernels, output_channels=3): 51 | self.name = name 52 | self.encoder_kernels = encoder_kernels 53 | self.decoder_kernels = decoder_kernels 54 | self.output_channels = output_channels 55 | self.var_list = [] 56 | 57 | def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None): 58 | output = inputs 59 | 60 | with tf.variable_scope(self.name, reuse=reuse_variables): 61 | 62 | layers = [] 63 | 64 | # encoder branch 65 | for index, kernel in enumerate(self.encoder_kernels): 66 | 67 | name = 'conv' + str(index) 68 | output = conv2d( 69 | inputs=output, 70 | name=name, 71 | kernel_size=kernel_size, 72 | filters=kernel[0], 73 | strides=kernel[1], 74 | activation=tf.nn.leaky_relu, 75 | seed=seed 76 | ) 77 | 78 | # save contracting path layers to be used for skip connections 79 | layers.append(output) 80 | 81 | if kernel[2] > 0: 82 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed) 83 | 84 | # decoder branch 85 | for index, kernel in enumerate(self.decoder_kernels): 86 | 87 | name = 'deconv' + str(index) 88 | output = conv2d_transpose( 89 | inputs=output, 90 | name=name, 91 | kernel_size=kernel_size, 92 | filters=kernel[0], 93 | strides=kernel[1], 94 | activation=tf.nn.relu, 95 | seed=seed 96 | ) 97 | 98 | if kernel[2] > 0: 99 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed) 100 | 101 | # concat the layer from the contracting path with the output of the current layer 102 | # concat only the channels (axis=3) 103 | output = tf.concat([layers[len(layers) - index - 2], output], axis=3) 104 | 105 | output = conv2d( 106 | inputs=output, 107 | name='conv_last', 108 | filters=self.output_channels, # number of output chanels 109 | kernel_size=1, # last layer kernel size = 1 110 | strides=1, # last layer stride = 1 111 | bnorm=False, # do not use batch-norm for the last layer 112 | activation=tf.nn.tanh, # tanh activation function for the output 113 | seed=seed 114 | ) 115 | 116 | self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name) 117 | 118 | return output 119 | -------------------------------------------------------------------------------- /Colorizing-with-GANs/ops.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | COLORSPACE_RGB = 'RGB' 5 | COLORSPACE_LAB = 'LAB' 6 | tf.nn.softmax_cross_entropy_with_logits_v2 7 | 8 | def conv2d(inputs, filters, name, kernel_size=4, strides=2, bnorm=True, activation=None, seed=None): 9 | """ 10 | Creates a conv2D block 11 | """ 12 | initializer=tf.variance_scaling_initializer(seed=seed) 13 | res = tf.layers.conv2d( 14 | name=name, 15 | inputs=inputs, 16 | filters=filters, 17 | kernel_size=kernel_size, 18 | strides=strides, 19 | padding="same", 20 | kernel_initializer=initializer) 21 | 22 | if bnorm: 23 | res = tf.layers.batch_normalization(inputs=res, name='bn_' + name, training=True) 24 | 25 | # activation after batch-norm 26 | if activation is not None: 27 | res = activation(res) 28 | 29 | return res 30 | 31 | 32 | def conv2d_transpose(inputs, filters, name, kernel_size=4, strides=2, bnorm=True, activation=None, seed=None): 33 | """ 34 | Creates a conv2D-transpose block 35 | """ 36 | initializer=tf.variance_scaling_initializer(seed=seed) 37 | res = tf.layers.conv2d_transpose( 38 | name=name, 39 | inputs=inputs, 40 | filters=filters, 41 | kernel_size=kernel_size, 42 | strides=strides, 43 | padding="same", 44 | kernel_initializer=initializer) 45 | 46 | if bnorm: 47 | res = tf.layers.batch_normalization(inputs=res, name='bn_' + name, training=True) 48 | 49 | # activation after batch-norm 50 | if activation is not None: 51 | res = activation(res) 52 | 53 | return res 54 | 55 | 56 | def pixelwise_accuracy(img_real, img_fake, colorspace, thresh): 57 | """ 58 | Measures the accuracy of the colorization process by comparing pixels 59 | """ 60 | img_real = postprocess(img_real, colorspace, COLORSPACE_LAB) 61 | img_fake = postprocess(img_fake, colorspace, COLORSPACE_LAB) 62 | 63 | diffL = tf.abs(tf.round(img_real[..., 0]) - tf.round(img_fake[..., 0])) 64 | diffA = tf.abs(tf.round(img_real[..., 1]) - tf.round(img_fake[..., 1])) 65 | diffB = tf.abs(tf.round(img_real[..., 2]) - tf.round(img_fake[..., 2])) 66 | 67 | # within %thresh of the original 68 | predL = tf.cast(tf.less_equal(diffL, 1 * thresh), tf.float64) # L: [0, 100] 69 | predA = tf.cast(tf.less_equal(diffA, 2.2 * thresh), tf.float64) # A: [-110, 110] 70 | predB = tf.cast(tf.less_equal(diffB, 2.2 * thresh), tf.float64) # B: [-110, 110] 71 | 72 | # all three channels are within the threshold 73 | pred = predL * predA * predB 74 | 75 | return tf.reduce_mean(pred) 76 | 77 | 78 | def preprocess(img, colorspace_in, colorspace_out): 79 | if colorspace_out.upper() == COLORSPACE_RGB: 80 | if colorspace_in == COLORSPACE_LAB: 81 | img = lab_to_rgb(img) 82 | 83 | # [0, 1] => [-1, 1] 84 | img = (img / 255.0) * 2 - 1 85 | 86 | elif colorspace_out.upper() == COLORSPACE_LAB: 87 | if colorspace_in == COLORSPACE_RGB: 88 | img = rgb_to_lab(img / 255.0) 89 | 90 | L_chan, a_chan, b_chan = tf.unstack(img, axis=3) 91 | 92 | # L: [0, 100] => [-1, 1] 93 | # A, B: [-110, 110] => [-1, 1] 94 | img = tf.stack([L_chan / 50 - 1, a_chan / 110, b_chan / 110], axis=3) 95 | 96 | return img 97 | 98 | 99 | def postprocess(img, colorspace_in, colorspace_out): 100 | if colorspace_in.upper() == COLORSPACE_RGB: 101 | # [-1, 1] => [0, 1] 102 | img = (img + 1) / 2 103 | 104 | if colorspace_out == COLORSPACE_LAB: 105 | img = rgb_to_lab(img) 106 | 107 | elif colorspace_in.upper() == COLORSPACE_LAB: 108 | L_chan, a_chan, b_chan = tf.unstack(img, axis=3) 109 | 110 | # L: [-1, 1] => [0, 100] 111 | # A, B: [-1, 1] => [-110, 110] 112 | img = tf.stack([(L_chan + 1) / 2 * 100, a_chan * 110, b_chan * 110], axis=3) 113 | 114 | if colorspace_out == COLORSPACE_RGB: 115 | img = lab_to_rgb(img) 116 | 117 | return img 118 | 119 | 120 | def rgb_to_lab(srgb): 121 | # based on https://github.com/torch/image/blob/9f65c30167b2048ecbe8b7befdc6b2d6d12baee9/generic/image.c 122 | with tf.name_scope("rgb_to_lab"): 123 | srgb_pixels = tf.reshape(srgb, [-1, 3]) 124 | 125 | with tf.name_scope("srgb_to_xyz"): 126 | linear_mask = tf.cast(srgb_pixels <= 0.04045, dtype=tf.float32) 127 | exponential_mask = tf.cast(srgb_pixels > 0.04045, dtype=tf.float32) 128 | rgb_pixels = (srgb_pixels / 12.92 * linear_mask) + (((srgb_pixels + 0.055) / 1.055) ** 2.4) * exponential_mask 129 | rgb_to_xyz = tf.constant([ 130 | # X Y Z 131 | [0.412453, 0.212671, 0.019334], # R 132 | [0.357580, 0.715160, 0.119193], # G 133 | [0.180423, 0.072169, 0.950227], # B 134 | ]) 135 | xyz_pixels = tf.matmul(rgb_pixels, rgb_to_xyz) 136 | 137 | # https://en.wikipedia.org/wiki/Lab_color_space#CIELAB-CIEXYZ_conversions 138 | with tf.name_scope("xyz_to_cielab"): 139 | 140 | # normalize for D65 white point 141 | xyz_normalized_pixels = tf.multiply(xyz_pixels, [1 / 0.950456, 1.0, 1 / 1.088754]) 142 | 143 | epsilon = 6 / 29 144 | linear_mask = tf.cast(xyz_normalized_pixels <= (epsilon**3), dtype=tf.float32) 145 | exponential_mask = tf.cast(xyz_normalized_pixels > (epsilon**3), dtype=tf.float32) 146 | fxfyfz_pixels = (xyz_normalized_pixels / (3 * epsilon**2) + 4 / 29) * linear_mask + (xyz_normalized_pixels ** (1 / 3)) * exponential_mask 147 | 148 | # convert to lab 149 | fxfyfz_to_lab = tf.constant([ 150 | # l a b 151 | [0.0, 500.0, 0.0], # fx 152 | [116.0, -500.0, 200.0], # fy 153 | [0.0, 0.0, -200.0], # fz 154 | ]) 155 | lab_pixels = tf.matmul(fxfyfz_pixels, fxfyfz_to_lab) + tf.constant([-16.0, 0.0, 0.0]) 156 | 157 | return tf.reshape(lab_pixels, tf.shape(srgb)) 158 | 159 | 160 | def lab_to_rgb(lab): 161 | with tf.name_scope("lab_to_rgb"): 162 | lab_pixels = tf.reshape(lab, [-1, 3]) 163 | 164 | # https://en.wikipedia.org/wiki/Lab_color_space#CIELAB-CIEXYZ_conversions 165 | with tf.name_scope("cielab_to_xyz"): 166 | # convert to fxfyfz 167 | lab_to_fxfyfz = tf.constant([ 168 | # fx fy fz 169 | [1 / 116.0, 1 / 116.0, 1 / 116.0], # l 170 | [1 / 500.0, 0.0, 0.0], # a 171 | [0.0, 0.0, -1 / 200.0], # b 172 | ]) 173 | fxfyfz_pixels = tf.matmul(lab_pixels + tf.constant([16.0, 0.0, 0.0]), lab_to_fxfyfz) 174 | 175 | # convert to xyz 176 | epsilon = 6 / 29 177 | linear_mask = tf.cast(fxfyfz_pixels <= epsilon, dtype=tf.float32) 178 | exponential_mask = tf.cast(fxfyfz_pixels > epsilon, dtype=tf.float32) 179 | xyz_pixels = (3 * epsilon**2 * (fxfyfz_pixels - 4 / 29)) * linear_mask + (fxfyfz_pixels ** 3) * exponential_mask 180 | 181 | # denormalize for D65 white point 182 | xyz_pixels = tf.multiply(xyz_pixels, [0.950456, 1.0, 1.088754]) 183 | 184 | with tf.name_scope("xyz_to_srgb"): 185 | xyz_to_rgb = tf.constant([ 186 | # r g b 187 | [3.2404542, -0.9692660, 0.0556434], # x 188 | [-1.5371385, 1.8760108, -0.2040259], # y 189 | [-0.4985314, 0.0415560, 1.0572252], # z 190 | ]) 191 | rgb_pixels = tf.matmul(xyz_pixels, xyz_to_rgb) 192 | # avoid a slightly negative number messing up the conversion 193 | rgb_pixels = tf.clip_by_value(rgb_pixels, 0.0, 1.0) 194 | linear_mask = tf.cast(rgb_pixels <= 0.0031308, dtype=tf.float32) 195 | exponential_mask = tf.cast(rgb_pixels > 0.0031308, dtype=tf.float32) 196 | srgb_pixels = (rgb_pixels * 12.92 * linear_mask) + ((rgb_pixels ** (1 / 2.4) * 1.055) - 0.055) * exponential_mask 197 | 198 | return tf.reshape(srgb_pixels, tf.shape(lab)) 199 | -------------------------------------------------------------------------------- /Colorizing-with-GANs/options.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import random 4 | import argparse 5 | 6 | 7 | def str2bool(v): 8 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 9 | return True 10 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 11 | return False 12 | else: 13 | raise argparse.ArgumentTypeError('Boolean value expected.') 14 | 15 | 16 | class ModelOptions: 17 | def __init__(self): 18 | parser = argparse.ArgumentParser(description='Colorization with GANs') 19 | parser.add_argument('--seed', type=int, default=0, metavar='S', help='random seed (default: 0)') 20 | parser.add_argument('--name', type=str, default='CGAN', help='arbitrary model name (default: CGAN)') 21 | parser.add_argument('--mode', default=0, help='run mode [0: train, 1: evaluate, 2: test] (default: 0)') 22 | parser.add_argument('--dataset', type=str, default='momentsintime', help='the name of dataset [places365, cifar10] (default: momentsintime)') 23 | parser.add_argument('--dataset-path', type=str, default='./data', help='dataset path (default: ./data)') 24 | parser.add_argument('--checkpoints-path', type=str, default='./checkpoints', help='models are saved here (default: ./checkpoints)') 25 | parser.add_argument('--batch-size', type=int, default=16, metavar='N', help='input batch size for training (default: 16)') 26 | parser.add_argument('--color-space', type=str, default='lab', help='model color space [lab, rgb] (default: lab)') 27 | parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 30)') 28 | parser.add_argument('--lr', type=float, default=5e-3, metavar='LR', help='learning rate (default: 3e-4)') 29 | parser.add_argument('--lr-decay-rate', type=float, default=0.1, help='learning rate exponentially decay rate (default: 0.1)') 30 | parser.add_argument('--lr-decay-steps', type=float, default=25e2, help='learning rate exponentially decay steps (default: 5e5)') 31 | parser.add_argument('--beta1', type=float, default=0, help='momentum term of adam optimizer (default: 0)') 32 | parser.add_argument("--l1-weight", type=float, default=100.0, help="weight on L1 term for generator gradient (default: 100.0)") 33 | parser.add_argument('--augment', type=str2bool, default=True, help='True for augmentation (default: True)') 34 | parser.add_argument('--label-smoothing', type=str2bool, default=False, help='True for one-sided label smoothing (default: False)') 35 | parser.add_argument('--acc-thresh', type=float, default=2.0, help="accuracy threshold (default: 2.0)") 36 | parser.add_argument('--kernel-size', type=int, default=4, help="default kernel size (default: 4)") 37 | parser.add_argument('--save', type=str2bool, default=True, help='True for saving (default: True)') 38 | parser.add_argument('--save-interval', type=int, default=100, help='how many batches to wait before saving model (default: 1000)') 39 | parser.add_argument('--sample', type=str2bool, default=True, help='True for sampling (default: True)') 40 | parser.add_argument('--sample-size', type=int, default=8, help='number of images to sample (default: 8)') 41 | parser.add_argument('--sample-interval', type=int, default=100, help='how many batches to wait before sampling (default: 1000)') 42 | parser.add_argument('--validate', type=str2bool, default=True, help='True for validation (default: True)') 43 | parser.add_argument('--validate-interval', type=int, default=0, help='how many batches to wait before validating (default: 0)') 44 | parser.add_argument('--log', type=str2bool, default=True, help='True for logging (default: True)') 45 | parser.add_argument('--log-interval', type=int, default=10, help='how many iterations to wait before logging training status (default: 10)') 46 | parser.add_argument('--visualize', type=str2bool, default=False, help='True for accuracy visualization (default: False)') 47 | parser.add_argument('--visualize-window', type=int, default=100, help='the exponentially moving average window width (default: 100)') 48 | parser.add_argument('--test-size', type=int, default=100, metavar='N', help='number of Turing tests (default: 100)') 49 | parser.add_argument('--test-delay', type=int, default=0, metavar='N', help='number of seconds to wait when doing Turing test, 0 for unlimited (default: 0)') 50 | parser.add_argument('--gpu-ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') 51 | # to recolorize a video clip 52 | parser.add_argument('--filename', type=str, default='*', help='Filename of input BW video') 53 | parser.add_argument('--input_dir', type=str, default='../data/examples/converted', help='Directory of input files') 54 | parser.add_argument('--output_dir', type=str, default='../data/examples/recolorized', help='Directory of output files') 55 | 56 | self._parser = parser 57 | 58 | def parse(self): 59 | opt = self._parser.parse_args() 60 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_ids 61 | 62 | opt.color_space = opt.color_space.upper() 63 | 64 | if opt.seed == 0: 65 | opt.seed = random.randint(0, 2**31 - 1) 66 | 67 | if (opt.dataset_path == './data') or (opt.dataset_path == './dataset'): 68 | opt.dataset_path += ('/' + opt.dataset) 69 | 70 | if opt.checkpoints_path == './checkpoints': 71 | opt.checkpoints_path += ('/' + opt.dataset) 72 | 73 | return opt 74 | -------------------------------------------------------------------------------- /Colorizing-with-GANs/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy ~= 1.14.3 2 | scipy ~= 1.0.1 3 | future ~= 0.16.0 4 | matplotlib ~= 2.2.2 5 | pillow ~= 5.0.0 6 | -------------------------------------------------------------------------------- /Colorizing-with-GANs/setup.cfg: -------------------------------------------------------------------------------- 1 | [pycodestyle] 2 | ignore = E303 3 | max-line-length = 200 -------------------------------------------------------------------------------- /Colorizing-with-GANs/test-eval.py: -------------------------------------------------------------------------------- 1 | from src import ModelOptions, main 2 | 3 | options = ModelOptions().parse() 4 | options.mode = 1 5 | main(options) 6 | -------------------------------------------------------------------------------- /Colorizing-with-GANs/test-turing.py: -------------------------------------------------------------------------------- 1 | from src import ModelOptions, main 2 | 3 | options = ModelOptions().parse() 4 | options.mode = 2 5 | main(options) 6 | -------------------------------------------------------------------------------- /Colorizing-with-GANs/train.py: -------------------------------------------------------------------------------- 1 | from src import ModelOptions, main 2 | 3 | options = ModelOptions().parse() 4 | options.mode = 0 5 | main(options) 6 | -------------------------------------------------------------------------------- /Colorizing-with-GANs/utils.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | from PIL import Image 4 | import matplotlib.pyplot as plt 5 | 6 | 7 | def stitch_images(grayscale, original, pred): 8 | gap = 5 9 | width, height = original[0][:, :, 0].shape 10 | img_per_row = 2 if width > 200 else 4 11 | img = Image.new('RGB', (width * img_per_row * 3 + gap * (img_per_row - 1), height * int(len(original) / img_per_row))) 12 | 13 | grayscale = np.array(grayscale).squeeze() 14 | original = np.array(original) 15 | pred = np.array(pred) 16 | 17 | for ix in range(len(original)): 18 | xoffset = int(ix % img_per_row) * width * 3 + int(ix % img_per_row) * gap 19 | yoffset = int(ix / img_per_row) * height 20 | im1 = Image.fromarray(grayscale[ix]) 21 | im2 = Image.fromarray(original[ix]) 22 | im3 = Image.fromarray((pred[ix] * 255).astype(np.uint8)) 23 | img.paste(im1, (xoffset, yoffset)) 24 | img.paste(im2, (xoffset + width, yoffset)) 25 | img.paste(im3, (xoffset + width + width, yoffset)) 26 | 27 | return img 28 | 29 | 30 | def unpickle(file): 31 | with open(file, 'rb') as fo: 32 | dict = pickle.load(fo, encoding='bytes') 33 | return dict 34 | 35 | 36 | def moving_average(data, window_width): 37 | cumsum_vec = np.cumsum(np.insert(data, 0, 0)) 38 | ma_vec = (cumsum_vec[window_width:] - cumsum_vec[:-window_width]) / window_width 39 | return ma_vec 40 | 41 | 42 | def imshow(img, title=''): 43 | fig = plt.gcf() 44 | fig.canvas.set_window_title(title) 45 | plt.axis('off') 46 | plt.imshow(img, interpolation='none') 47 | plt.show() 48 | 49 | 50 | def turing_test(real_img, fake_img, delay=0): 51 | height, width, _ = real_img.shape 52 | imgs = np.array([real_img, (fake_img * 255).astype(np.uint8)]) 53 | real_index = np.random.binomial(1, 0.5) 54 | fake_index = (real_index + 1) % 2 55 | 56 | img = Image.new('RGB', (2 + width * 2, height)) 57 | img.paste(Image.fromarray(imgs[real_index]), (0, 0)) 58 | img.paste(Image.fromarray(imgs[fake_index]), (2 + width, 0)) 59 | 60 | img.success = 0 61 | 62 | def onclick(event): 63 | if event.xdata is not None: 64 | if event.x < width and real_index == 0: 65 | img.success = 1 66 | 67 | elif event.x > width and real_index == 1: 68 | img.success = 1 69 | 70 | plt.gcf().canvas.stop_event_loop() 71 | 72 | plt.ion() 73 | plt.gcf().canvas.mpl_connect('button_press_event', onclick) 74 | plt.title('click on the real image') 75 | plt.axis('off') 76 | plt.imshow(img, interpolation='none') 77 | plt.show() 78 | plt.draw() 79 | plt.gcf().canvas.start_event_loop(delay) 80 | 81 | return img.success 82 | 83 | 84 | def visualize(train_log_file, test_log_file, window_width, title=''): 85 | train_data = np.loadtxt(train_log_file) 86 | test_data = np.loadtxt(test_log_file) 87 | 88 | if len(train_data.shape) < 2: 89 | return 90 | 91 | if len(train_data) < window_width: 92 | window_width = len(train_data) - 1 93 | 94 | fig = plt.gcf() 95 | fig.canvas.set_window_title(title) 96 | 97 | plt.ion() 98 | plt.subplot('121') 99 | plt.cla() 100 | if len(train_data) > 1: 101 | plt.plot(moving_average(train_data[:, 8], window_width)) 102 | plt.title('train') 103 | 104 | plt.subplot('122') 105 | plt.cla() 106 | if len(test_data) > 1: 107 | plt.plot(test_data[:, 8]) 108 | plt.title('test') 109 | 110 | plt.show() 111 | plt.draw() 112 | plt.pause(.01) 113 | -------------------------------------------------------------------------------- /Colorizing-with-GANs/video_colorize_GAN.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | 5 | import cv2 6 | import numpy as np 7 | from PIL import Image 8 | from skimage import img_as_ubyte, img_as_float 9 | import skimage.color as color 10 | import scipy.ndimage.interpolation as sni 11 | from ops import postprocess 12 | from ops import COLORSPACE_RGB, COLORSPACE_LAB 13 | 14 | import tensorflow as tf 15 | from options import ModelOptions 16 | from models import MomentsInTimeModel 17 | 18 | 19 | def image_colorization_propagation(model, img_bw_in, img_rgb_prev, options): 20 | 21 | # colorize the image based on the previous one 22 | feed_dic = {model.input_rgb: np.expand_dims(img_bw_in, axis=0), model.input_rgb_prev: np.expand_dims(img_rgb_prev, axis=0)} 23 | fake_image, _ = model.sess.run([model.sampler, model.input_gray], feed_dict=feed_dic) 24 | fake_image = postprocess(tf.convert_to_tensor(fake_image), colorspace_in=options.color_space, colorspace_out=COLORSPACE_RGB) 25 | 26 | # evalute the tensor 27 | img_rgb_out = fake_image.eval() 28 | img_rgb_out = (img_rgb_out.squeeze(0) * 255).astype(np.uint8) 29 | 30 | return img_rgb_out 31 | 32 | def bw2color(options, inputname, inputpath, outputpath): 33 | if inputname.endswith(".mp4"): 34 | # size of the input frames 35 | size = 256 36 | 37 | # check that the video exists 38 | path_to_video = os.path.join(inputpath, inputname) 39 | if not os.path.exists(path_to_video): 40 | print("The file :", path_to_video, "does not exist !") 41 | 42 | # store informations about the original video 43 | cap = cv2.VideoCapture(os.path.join(path_to_video)) 44 | # original dimensions 45 | width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 46 | totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 47 | fourcc = cv2.VideoWriter_fourcc(*'mp4v'); 48 | # parameters of output file 49 | # dimensions of the output image 50 | new_width, new_height = size, size 51 | # number of frames 52 | fps = 30.0 53 | 54 | # recolorized output video 55 | color_out = cv2.VideoWriter( 56 | os.path.join(outputpath, 'color_' + inputname), 57 | fourcc, 58 | fps, 59 | (new_width, new_height), 60 | isColor=True 61 | ) 62 | 63 | # TO CHANGE to DL colorization of 1st frame 64 | # pick the first frame from the original video clip as the first reference 65 | cap_temp = cv2.VideoCapture(os.path.join(inputpath, "color" + inputname[2:])) 66 | 67 | ret_temp, frame_prev = cap_temp.read() 68 | # convert BGR to RGB convention 69 | frame_prev = frame_prev[:,:,::-1] 70 | frame_prev = cv2.resize(frame_prev, (size, size)) 71 | 72 | # count the number of recolorized frames 73 | frames_processed = 0 74 | 75 | with tf.Session() as sess: 76 | 77 | model = MomentsInTimeModel(sess, options) 78 | 79 | # build the model and initialize 80 | model.build() 81 | sess.run(tf.global_variables_initializer()) 82 | 83 | # load model only after global variables initialization 84 | model.load() 85 | 86 | while(cap.isOpened()): 87 | ret, frame_in = cap.read() 88 | 89 | # check if we are not at the end of the video 90 | if ret==True: 91 | # convert BGR to RGB convention 92 | frame_in = frame_in[:,:,::-1] 93 | # resize the frame to match the input size of the GAN 94 | frame_in = cv2.resize(frame_in, (size, size)) 95 | 96 | # colorize the BW frame 97 | frame_out = image_colorization_propagation(model, frame_in, frame_prev, options) 98 | 99 | #generate sample 100 | get_image = False 101 | if get_image: 102 | img = Image.fromarray(frame_out) 103 | 104 | if not os.path.exists(model.samples_dir): 105 | os.makedirs(model.samples_dir) 106 | 107 | sample = model.options.dataset + "_" + inputname + "_" + str(frames_processed).zfill(5) + ".png" 108 | img.save(os.path.join(model.samples_dir, sample)) 109 | 110 | # save the recolorized frame 111 | frame_prev = frame_out 112 | # convert RGB to BGR convention 113 | frame_out = frame_out[:,:,::-1] 114 | # write the color frame 115 | color_out.write(frame_out) 116 | 117 | # print progress 118 | frames_processed += 1 119 | print("Processed {}/{} frames ({}%)".format(frames_processed, totalFrames, frames_processed * 100 //totalFrames), end="\r") 120 | if cv2.waitKey(1) & 0xFF == ord('q'): 121 | break 122 | # end of the video 123 | else: 124 | break 125 | 126 | # release everything if job is finished 127 | cap.release() 128 | color_out.release() 129 | 130 | def main(): 131 | 132 | # reset tensorflow graph 133 | tf.reset_default_graph() 134 | 135 | options = ModelOptions().parse() 136 | 137 | if options.filename == '*': 138 | for filename in os.listdir(options.input_dir): 139 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir) 140 | else: 141 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir) 142 | 143 | # cleanup 144 | cv2.destroyAllWindows() 145 | 146 | return 0 147 | 148 | if __name__ == '__main__': 149 | main() 150 | -------------------------------------------------------------------------------- /Colorizing-with-GANs/video_colorize_GAN_1st-truth-ref.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | 5 | import cv2 6 | import numpy as np 7 | from PIL import Image 8 | from skimage import img_as_ubyte, img_as_float 9 | import skimage.color as color 10 | import scipy.ndimage.interpolation as sni 11 | from ops import postprocess 12 | from ops import COLORSPACE_RGB, COLORSPACE_LAB 13 | 14 | import tensorflow as tf 15 | from options import ModelOptions 16 | from models import MomentsInTimeModel 17 | 18 | 19 | def image_colorization_propagation(model, img_bw_in, img_rgb_prev, options): 20 | 21 | # colorize the image based on the previous one 22 | feed_dic = {model.input_rgb: np.expand_dims(img_bw_in, axis=0), model.input_rgb_prev: np.expand_dims(img_rgb_prev, axis=0)} 23 | fake_image, _ = model.sess.run([model.sampler, model.input_gray], feed_dict=feed_dic) 24 | fake_image = postprocess(tf.convert_to_tensor(fake_image), colorspace_in=options.color_space, colorspace_out=COLORSPACE_RGB) 25 | 26 | # evalute the tensor 27 | img_rgb_out = fake_image.eval() 28 | img_rgb_out = (img_rgb_out.squeeze(0) * 255).astype(np.uint8) 29 | 30 | return img_rgb_out 31 | 32 | def bw2color(options, inputname, inputpath, outputpath): 33 | if inputname.endswith(".mp4"): 34 | # size of the input frames 35 | size = 256 36 | 37 | # check that the video exists 38 | path_to_video = os.path.join(inputpath, inputname) 39 | if not os.path.exists(path_to_video): 40 | print("The file :", path_to_video, "does not exist !") 41 | 42 | # store informations about the original video 43 | cap = cv2.VideoCapture(os.path.join(path_to_video)) 44 | # original dimensions 45 | width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 46 | totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 47 | fourcc = cv2.VideoWriter_fourcc(*'mp4v'); 48 | # parameters of output file 49 | # dimensions of the output image 50 | new_width, new_height = size, size 51 | # number of frames 52 | fps = 30.0 53 | 54 | # recolorized output video 55 | color_out = cv2.VideoWriter( 56 | os.path.join(outputpath, 'color_' + inputname), 57 | fourcc, 58 | fps, 59 | (new_width, new_height), 60 | isColor=True 61 | ) 62 | 63 | # TO CHANGE to DL colorization of 1st frame 64 | # pick the first frame from the original video clip as the first reference 65 | cap_temp = cv2.VideoCapture(os.path.join(inputpath, "color" + inputname[2:])) 66 | 67 | ret_temp, frame_prev = cap_temp.read() 68 | # convert BGR to RGB convention 69 | frame_prev = frame_prev[:,:,::-1] 70 | frame_prev = cv2.resize(frame_prev, (size, size)) 71 | 72 | # count the number of recolorized frames 73 | frames_processed = 0 74 | 75 | with tf.Session() as sess: 76 | 77 | model = MomentsInTimeModel(sess, options) 78 | 79 | # build the model and initialize 80 | model.build() 81 | sess.run(tf.global_variables_initializer()) 82 | 83 | # load model only after global variables initialization 84 | model.load() 85 | 86 | while(cap.isOpened()): 87 | ret, frame_in = cap.read() 88 | 89 | # check if we are not at the end of the video 90 | if ret==True: 91 | # convert BGR to RGB convention 92 | frame_in = frame_in[:,:,::-1] 93 | # resize the frame to match the input size of the GAN 94 | frame_in = cv2.resize(frame_in, (size, size)) 95 | 96 | # colorize the BW frame 97 | frame_out = image_colorization_propagation(model, frame_in, frame_prev, options) 98 | 99 | #generate sample 100 | get_image = False 101 | if get_image: 102 | img = Image.fromarray(frame_out) 103 | 104 | if not os.path.exists(model.samples_dir): 105 | os.makedirs(model.samples_dir) 106 | 107 | sample = model.options.dataset + "_" + inputname + "_" + str(frames_processed).zfill(5) + ".png" 108 | img.save(os.path.join(model.samples_dir, sample)) 109 | 110 | # save the recolorized frame 111 | #frame_prev = frame_out 112 | # convert RGB to BGR convention 113 | frame_out = frame_out[:,:,::-1] 114 | # write the color frame 115 | color_out.write(frame_out) 116 | #break 117 | 118 | # print progress 119 | frames_processed += 1 120 | print("Processed {}/{} frames ({}%)".format(frames_processed, totalFrames, frames_processed * 100 //totalFrames), end="\r") 121 | if cv2.waitKey(1) & 0xFF == ord('q'): 122 | break 123 | # end of the video 124 | else: 125 | break 126 | 127 | # release everything if job is finished 128 | cap.release() 129 | color_out.release() 130 | 131 | def main(): 132 | 133 | # reset tensorflow graph 134 | tf.reset_default_graph() 135 | 136 | options = ModelOptions().parse() 137 | 138 | if options.filename == '*': 139 | for filename in os.listdir(options.input_dir): 140 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir) 141 | else: 142 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir) 143 | 144 | # cleanup 145 | cv2.destroyAllWindows() 146 | 147 | return 0 148 | 149 | if __name__ == '__main__': 150 | main() 151 | -------------------------------------------------------------------------------- /Colorizing-with-GANs/video_colorize_GAN_truth-ref.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | 5 | import cv2 6 | import numpy as np 7 | from PIL import Image 8 | from skimage import img_as_ubyte, img_as_float 9 | import skimage.color as color 10 | import scipy.ndimage.interpolation as sni 11 | from ops import postprocess 12 | from ops import COLORSPACE_RGB, COLORSPACE_LAB 13 | 14 | import tensorflow as tf 15 | from options import ModelOptions 16 | from models import MomentsInTimeModel 17 | 18 | 19 | def image_colorization_propagation(model, img_bw_in, img_rgb_prev, options): 20 | 21 | # colorize the image based on the previous one 22 | feed_dic = {model.input_rgb: np.expand_dims(img_bw_in, axis=0), model.input_rgb_prev: np.expand_dims(img_rgb_prev, axis=0)} 23 | fake_image, _ = model.sess.run([model.sampler, model.input_gray], feed_dict=feed_dic) 24 | fake_image = postprocess(tf.convert_to_tensor(fake_image), colorspace_in=options.color_space, colorspace_out=COLORSPACE_RGB) 25 | 26 | # evalute the tensor 27 | img_rgb_out = fake_image.eval() 28 | img_rgb_out = (img_rgb_out.squeeze(0) * 255).astype(np.uint8) 29 | 30 | return img_rgb_out 31 | 32 | def bw2color(options, inputname, inputpath, outputpath): 33 | if inputname.endswith(".mp4"): 34 | # size of the input frames 35 | size = 256 36 | 37 | # check that the video exists 38 | path_to_video = os.path.join(inputpath, inputname) 39 | if not os.path.exists(path_to_video): 40 | print("The file :", path_to_video, "does not exist !") 41 | 42 | # store informations about the original video 43 | cap = cv2.VideoCapture(os.path.join(path_to_video)) 44 | # original dimensions 45 | width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 46 | totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 47 | fourcc = cv2.VideoWriter_fourcc(*'mp4v'); 48 | # parameters of output file 49 | # dimensions of the output image 50 | new_width, new_height = size, size 51 | # number of frames 52 | fps = 30.0 53 | 54 | # recolorized output video 55 | color_out = cv2.VideoWriter( 56 | os.path.join(outputpath, 'color_' + inputname), 57 | fourcc, 58 | fps, 59 | (new_width, new_height), 60 | isColor=True 61 | ) 62 | 63 | # TO CHANGE to DL colorization of 1st frame 64 | # pick the first frame from the original video clip as the first reference 65 | cap_temp = cv2.VideoCapture(os.path.join(inputpath, "color" + inputname[2:])) 66 | 67 | # count the number of recolorized frames 68 | frames_processed = 0 69 | 70 | with tf.Session() as sess: 71 | 72 | model = MomentsInTimeModel(sess, options) 73 | 74 | # build the model and initialize 75 | model.build() 76 | sess.run(tf.global_variables_initializer()) 77 | 78 | # load model only after global variables initialization 79 | model.load() 80 | 81 | while(cap.isOpened()): 82 | ret, frame_in = cap.read() 83 | 84 | ret_temp, frame_prev = cap_temp.read() 85 | 86 | # check if we are not at the end of the video 87 | if ret==True: 88 | frame_prev = frame_prev[:,:,::-1] 89 | frame_prev = cv2.resize(frame_prev, (size, size)) 90 | 91 | # convert BGR to RGB convention 92 | frame_in = frame_in[:,:,::-1] 93 | # resize the frame to match the input size of the GAN 94 | frame_in = cv2.resize(frame_in, (size, size)) 95 | 96 | # colorize the BW frame 97 | frame_out = image_colorization_propagation(model, frame_in, frame_prev, options) 98 | 99 | #generate sample 100 | get_image = False 101 | if get_image: 102 | img = Image.fromarray(frame_out) 103 | 104 | if not os.path.exists(model.samples_dir): 105 | os.makedirs(model.samples_dir) 106 | 107 | sample = model.options.dataset + "_" + inputname + "_" + str(frames_processed).zfill(5) + ".png" 108 | img.save(os.path.join(model.samples_dir, sample)) 109 | 110 | # save the recolorized frame 111 | #frame_prev = frame_out 112 | # convert RGB to BGR convention 113 | frame_out = frame_out[:,:,::-1] 114 | # write the color frame 115 | color_out.write(frame_out) 116 | #break 117 | 118 | # print progress 119 | frames_processed += 1 120 | print("Processed {}/{} frames ({}%)".format(frames_processed, totalFrames, frames_processed * 100 //totalFrames), end="\r") 121 | if cv2.waitKey(1) & 0xFF == ord('q'): 122 | break 123 | # end of the video 124 | else: 125 | break 126 | 127 | # release everything if job is finished 128 | cap.release() 129 | color_out.release() 130 | 131 | def main(): 132 | 133 | # reset tensorflow graph 134 | tf.reset_default_graph() 135 | 136 | options = ModelOptions().parse() 137 | 138 | if options.filename == '*': 139 | for filename in os.listdir(options.input_dir): 140 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir) 141 | else: 142 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir) 143 | 144 | # cleanup 145 | cv2.destroyAllWindows() 146 | 147 | return 0 148 | 149 | if __name__ == '__main__': 150 | main() 151 | -------------------------------------------------------------------------------- /Deep-Learning-Colorization/models/.gitignore: -------------------------------------------------------------------------------- 1 | *.caffemodel -------------------------------------------------------------------------------- /Deep-Learning-Colorization/models/alexnet_deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "Colornet" 2 | layer { 3 | name: "data" 4 | top: "data" # BGR [0,255] ***non-mean centered*** 5 | type: "Input" 6 | input_param { shape { dim: 1 dim: 3 dim: 227 dim: 227 } } 7 | } 8 | # ************************** 9 | # ***** PROCESS COLORS ***** 10 | # ************************** 11 | layer { # Convert to lab 12 | name: "img_lab" 13 | type: "ColorConv" 14 | bottom: "data" 15 | top: "img_lab" 16 | propagate_down: false 17 | color_conv_param { 18 | input: 0 # BGR 19 | output: 3 # Lab 20 | } 21 | } 22 | layer { 23 | name: "img_slice" 24 | type: "Slice" 25 | bottom: "img_lab" 26 | top: "img_l" # [0,100] 27 | top: "data_ab" # [-110,110] 28 | propagate_down: false 29 | slice_param { 30 | axis: 1 31 | slice_point: 1 32 | } 33 | } 34 | layer { 35 | name: "silence_ab" 36 | type: "Silence" 37 | bottom: "data_ab" 38 | } 39 | layer { # 0-center lightness channel 40 | name: "data_l" 41 | type: "Convolution" 42 | bottom: "img_l" 43 | top: "data_l" # scaled and centered lightness value 44 | propagate_down: false 45 | param {lr_mult: 0 decay_mult: 0} 46 | param {lr_mult: 0 decay_mult: 0} 47 | convolution_param { 48 | kernel_size: 1 49 | num_output: 1 50 | } 51 | } 52 | layer { 53 | name: "conv1" 54 | type: "Convolution" 55 | bottom: "data_l" 56 | top: "conv1" 57 | param { lr_mult: 1 decay_mult: 1 } 58 | param { lr_mult: 2 decay_mult: 0 } 59 | convolution_param { 60 | num_output: 96 61 | kernel_size: 11 62 | stride: 4 63 | weight_filler { 64 | type: "gaussian" 65 | std: 0.01 66 | } 67 | bias_filler { 68 | type: "constant" 69 | value: 0 70 | } 71 | } 72 | } 73 | layer { 74 | name: "relu1" 75 | type: "ReLU" 76 | bottom: "conv1" 77 | top: "conv1" 78 | } 79 | layer { 80 | name: "pool1" 81 | type: "Pooling" 82 | bottom: "conv1" 83 | top: "pool1" 84 | pooling_param { 85 | pool: MAX 86 | kernel_size: 3 87 | stride: 2 88 | } 89 | } 90 | layer { 91 | name: "conv2" 92 | type: "Convolution" 93 | bottom: "pool1" 94 | top: "conv2" 95 | param { lr_mult: 1 decay_mult: 1 } 96 | param { lr_mult: 2 decay_mult: 0 } 97 | convolution_param { 98 | num_output: 256 99 | pad: 2 100 | kernel_size: 5 101 | group: 2 102 | weight_filler { 103 | type: "gaussian" 104 | std: 0.01 105 | } 106 | bias_filler { 107 | type: "constant" 108 | value: 1 109 | } 110 | } 111 | } 112 | layer { 113 | name: "relu2" 114 | type: "ReLU" 115 | bottom: "conv2" 116 | top: "conv2" 117 | } 118 | layer { 119 | name: "pool2" 120 | type: "Pooling" 121 | bottom: "conv2" 122 | top: "pool2" 123 | pooling_param { 124 | pool: MAX 125 | kernel_size: 3 126 | stride: 2 127 | } 128 | } 129 | layer { 130 | name: "conv3" 131 | type: "Convolution" 132 | bottom: "pool2" 133 | top: "conv3" 134 | param { lr_mult: 1 decay_mult: 1 } 135 | param { lr_mult: 2 decay_mult: 0 } 136 | convolution_param { 137 | num_output: 384 138 | pad: 1 139 | kernel_size: 3 140 | weight_filler { 141 | type: "gaussian" 142 | std: 0.01 143 | } 144 | bias_filler { 145 | type: "constant" 146 | value: 0 147 | } 148 | } 149 | } 150 | layer { 151 | name: "relu3" 152 | type: "ReLU" 153 | bottom: "conv3" 154 | top: "conv3" 155 | } 156 | layer { 157 | name: "conv4" 158 | type: "Convolution" 159 | bottom: "conv3" 160 | top: "conv4" 161 | param { lr_mult: 1 decay_mult: 1 } 162 | param { lr_mult: 2 decay_mult: 0 } 163 | convolution_param { 164 | num_output: 384 165 | pad: 1 166 | kernel_size: 3 167 | group: 2 168 | weight_filler { 169 | type: "gaussian" 170 | std: 0.01 171 | } 172 | bias_filler { 173 | type: "constant" 174 | value: 1 175 | } 176 | } 177 | } 178 | layer { 179 | name: "relu4" 180 | type: "ReLU" 181 | bottom: "conv4" 182 | top: "conv4" 183 | } 184 | layer { 185 | name: "conv5" 186 | type: "Convolution" 187 | bottom: "conv4" 188 | top: "conv5" 189 | param { lr_mult: 1 decay_mult: 1 } 190 | param { lr_mult: 2 decay_mult: 0 } 191 | convolution_param { 192 | num_output: 256 193 | pad: 1 194 | kernel_size: 3 195 | group: 2 196 | weight_filler { 197 | type: "gaussian" 198 | std: 0.01 199 | } 200 | bias_filler { 201 | type: "constant" 202 | value: 1 203 | } 204 | } 205 | } 206 | layer { 207 | name: "relu5" 208 | type: "ReLU" 209 | bottom: "conv5" 210 | top: "conv5" 211 | } 212 | layer { 213 | name: "pool5" 214 | type: "Pooling" 215 | bottom: "conv5" 216 | top: "pool5" 217 | pooling_param { 218 | pool: MAX 219 | kernel_size: 3 220 | stride: 2 221 | } 222 | } 223 | layer { 224 | name: "fc6" 225 | type: "Convolution" 226 | bottom: "pool5" 227 | top: "fc6" 228 | param { lr_mult: 1 decay_mult: 1 } 229 | param { lr_mult: 2 decay_mult: 0 } 230 | convolution_param { 231 | kernel_size: 6 232 | stride: 1 233 | num_output: 4096 234 | } 235 | } 236 | layer { 237 | name: "relu6" 238 | type: "ReLU" 239 | bottom: "fc6" 240 | top: "fc6" 241 | } 242 | layer { 243 | name: "drop6" 244 | type: "Dropout" 245 | bottom: "fc6" 246 | top: "fc6" 247 | dropout_param { 248 | dropout_ratio: 0.5 249 | } 250 | } 251 | layer { 252 | name: "fc7" 253 | type: "Convolution" 254 | bottom: "fc6" 255 | top: "fc7" 256 | param { lr_mult: 1 decay_mult: 1 } 257 | param { lr_mult: 2 decay_mult: 0 } 258 | convolution_param { 259 | kernel_size: 1 260 | stride: 1 261 | num_output: 4096 262 | } 263 | } 264 | layer { 265 | name: "relu7" 266 | type: "ReLU" 267 | bottom: "fc7" 268 | top: "fc7" 269 | } 270 | layer { 271 | name: "drop7" 272 | type: "Dropout" 273 | bottom: "fc7" 274 | top: "fc7" 275 | dropout_param { 276 | dropout_ratio: 0.5 277 | } 278 | } 279 | 280 | -------------------------------------------------------------------------------- /Deep-Learning-Colorization/models/alexnet_deploy_fc.prototxt: -------------------------------------------------------------------------------- 1 | name: "Colornet" 2 | layer { 3 | name: "data" 4 | top: "data" # BGR [0,255] ***non-mean centered*** 5 | type: "Input" 6 | input_param { shape { dim: 1 dim: 3 dim: 227 dim: 227 } } 7 | } 8 | # ************************** 9 | # ***** PROCESS COLORS ***** 10 | # ************************** 11 | layer { # Convert to lab 12 | name: "img_lab" 13 | type: "ColorConv" 14 | bottom: "data" 15 | top: "img_lab" 16 | propagate_down: false 17 | color_conv_param { 18 | input: 0 # BGR 19 | output: 3 # Lab 20 | } 21 | } 22 | layer { 23 | name: "img_slice" 24 | type: "Slice" 25 | bottom: "img_lab" 26 | top: "img_l" # [0,100] 27 | top: "data_ab" # [-110,110] 28 | propagate_down: false 29 | slice_param { 30 | axis: 1 31 | slice_point: 1 32 | } 33 | } 34 | layer { 35 | name: "silence_ab" 36 | type: "Silence" 37 | bottom: "data_ab" 38 | } 39 | layer { # 0-center lightness channel 40 | name: "data_l" 41 | type: "Convolution" 42 | bottom: "img_l" 43 | top: "data_l" # scaled and centered lightness value 44 | propagate_down: false 45 | param {lr_mult: 0 decay_mult: 0} 46 | param {lr_mult: 0 decay_mult: 0} 47 | convolution_param { 48 | kernel_size: 1 49 | num_output: 1 50 | } 51 | } 52 | layer { 53 | name: "conv1" 54 | type: "Convolution" 55 | bottom: "data_l" 56 | top: "conv1" 57 | param { lr_mult: 1 decay_mult: 1 } 58 | param { lr_mult: 2 decay_mult: 0 } 59 | convolution_param { 60 | num_output: 96 61 | kernel_size: 11 62 | stride: 4 63 | weight_filler { 64 | type: "gaussian" 65 | std: 0.01 66 | } 67 | bias_filler { 68 | type: "constant" 69 | value: 0 70 | } 71 | } 72 | } 73 | layer { 74 | name: "relu1" 75 | type: "ReLU" 76 | bottom: "conv1" 77 | top: "conv1" 78 | } 79 | layer { 80 | name: "pool1" 81 | type: "Pooling" 82 | bottom: "conv1" 83 | top: "pool1" 84 | pooling_param { 85 | pool: MAX 86 | kernel_size: 3 87 | stride: 2 88 | } 89 | } 90 | layer { 91 | name: "conv2" 92 | type: "Convolution" 93 | bottom: "pool1" 94 | top: "conv2" 95 | param { lr_mult: 1 decay_mult: 1 } 96 | param { lr_mult: 2 decay_mult: 0 } 97 | convolution_param { 98 | num_output: 256 99 | pad: 2 100 | kernel_size: 5 101 | group: 2 102 | weight_filler { 103 | type: "gaussian" 104 | std: 0.01 105 | } 106 | bias_filler { 107 | type: "constant" 108 | value: 1 109 | } 110 | } 111 | } 112 | layer { 113 | name: "relu2" 114 | type: "ReLU" 115 | bottom: "conv2" 116 | top: "conv2" 117 | } 118 | layer { 119 | name: "pool2" 120 | type: "Pooling" 121 | bottom: "conv2" 122 | top: "pool2" 123 | pooling_param { 124 | pool: MAX 125 | kernel_size: 3 126 | stride: 2 127 | } 128 | } 129 | layer { 130 | name: "conv3" 131 | type: "Convolution" 132 | bottom: "pool2" 133 | top: "conv3" 134 | param { lr_mult: 1 decay_mult: 1 } 135 | param { lr_mult: 2 decay_mult: 0 } 136 | convolution_param { 137 | num_output: 384 138 | pad: 1 139 | kernel_size: 3 140 | weight_filler { 141 | type: "gaussian" 142 | std: 0.01 143 | } 144 | bias_filler { 145 | type: "constant" 146 | value: 0 147 | } 148 | } 149 | } 150 | layer { 151 | name: "relu3" 152 | type: "ReLU" 153 | bottom: "conv3" 154 | top: "conv3" 155 | } 156 | layer { 157 | name: "conv4" 158 | type: "Convolution" 159 | bottom: "conv3" 160 | top: "conv4" 161 | param { lr_mult: 1 decay_mult: 1 } 162 | param { lr_mult: 2 decay_mult: 0 } 163 | convolution_param { 164 | num_output: 384 165 | pad: 1 166 | kernel_size: 3 167 | group: 2 168 | weight_filler { 169 | type: "gaussian" 170 | std: 0.01 171 | } 172 | bias_filler { 173 | type: "constant" 174 | value: 1 175 | } 176 | } 177 | } 178 | layer { 179 | name: "relu4" 180 | type: "ReLU" 181 | bottom: "conv4" 182 | top: "conv4" 183 | } 184 | layer { 185 | name: "conv5" 186 | type: "Convolution" 187 | bottom: "conv4" 188 | top: "conv5" 189 | param { lr_mult: 1 decay_mult: 1 } 190 | param { lr_mult: 2 decay_mult: 0 } 191 | convolution_param { 192 | num_output: 256 193 | pad: 1 194 | kernel_size: 3 195 | group: 2 196 | weight_filler { 197 | type: "gaussian" 198 | std: 0.01 199 | } 200 | bias_filler { 201 | type: "constant" 202 | value: 1 203 | } 204 | } 205 | } 206 | layer { 207 | name: "relu5" 208 | type: "ReLU" 209 | bottom: "conv5" 210 | top: "conv5" 211 | } 212 | layer { 213 | name: "pool5" 214 | type: "Pooling" 215 | bottom: "conv5" 216 | top: "pool5" 217 | pooling_param { 218 | pool: MAX 219 | kernel_size: 3 220 | stride: 2 221 | } 222 | } 223 | layer { 224 | name: "fc6" 225 | type: "InnerProduct" 226 | bottom: "pool5" 227 | top: "fc6" 228 | param { lr_mult: 1 decay_mult: 1 } 229 | param { lr_mult: 2 decay_mult: 0 } 230 | inner_product_param { 231 | num_output: 4096 232 | } 233 | } 234 | layer { 235 | name: "relu6" 236 | type: "ReLU" 237 | bottom: "fc6" 238 | top: "fc6" 239 | } 240 | layer { 241 | name: "drop6" 242 | type: "Dropout" 243 | bottom: "fc6" 244 | top: "fc6" 245 | dropout_param { 246 | dropout_ratio: 0.5 247 | } 248 | } 249 | layer { 250 | name: "fc7" 251 | type: "InnerProduct" 252 | bottom: "fc6" 253 | top: "fc7" 254 | param { lr_mult: 1 decay_mult: 1 } 255 | param { lr_mult: 2 decay_mult: 0 } 256 | inner_product_param { 257 | num_output: 4096 258 | } 259 | } 260 | layer { 261 | name: "relu7" 262 | type: "ReLU" 263 | bottom: "fc7" 264 | top: "fc7" 265 | } 266 | layer { 267 | name: "drop7" 268 | type: "Dropout" 269 | bottom: "fc7" 270 | top: "fc7" 271 | dropout_param { 272 | dropout_ratio: 0.5 273 | } 274 | } 275 | 276 | -------------------------------------------------------------------------------- /Deep-Learning-Colorization/models/alexnet_deploy_lab.prototxt: -------------------------------------------------------------------------------- 1 | name: "Colornet" 2 | layer { 3 | name: "img_lab" 4 | top: "img_lab" # Lab color space 5 | type: "Input" 6 | input_param { shape { dim: 1 dim: 3 dim: 227 dim: 227 } } 7 | } 8 | # ************************** 9 | # ***** PROCESS COLORS ***** 10 | # ************************** 11 | # layer { # Convert to lab 12 | # name: "img_lab" 13 | # type: "ColorConv" 14 | # bottom: "data" 15 | # top: "img_lab" 16 | # propagate_down: false 17 | # color_conv_param { 18 | # input: 0 # BGR 19 | # output: 3 # Lab 20 | # } 21 | # } 22 | layer { 23 | name: "img_slice" 24 | type: "Slice" 25 | bottom: "img_lab" 26 | top: "img_l" # [0,100] 27 | top: "data_ab" # [-110,110] 28 | propagate_down: false 29 | slice_param { 30 | axis: 1 31 | slice_point: 1 32 | } 33 | } 34 | layer { 35 | name: "silence_ab" 36 | type: "Silence" 37 | bottom: "data_ab" 38 | } 39 | layer { # 0-center lightness channel 40 | name: "data_l" 41 | type: "Convolution" 42 | bottom: "img_l" 43 | top: "data_l" # scaled and centered lightness value 44 | propagate_down: false 45 | param {lr_mult: 0 decay_mult: 0} 46 | param {lr_mult: 0 decay_mult: 0} 47 | convolution_param { 48 | kernel_size: 1 49 | num_output: 1 50 | } 51 | } 52 | layer { 53 | name: "conv1" 54 | type: "Convolution" 55 | bottom: "data_l" 56 | top: "conv1" 57 | param { lr_mult: 1 decay_mult: 1 } 58 | param { lr_mult: 2 decay_mult: 0 } 59 | convolution_param { 60 | num_output: 96 61 | kernel_size: 11 62 | stride: 4 63 | weight_filler { 64 | type: "gaussian" 65 | std: 0.01 66 | } 67 | bias_filler { 68 | type: "constant" 69 | value: 0 70 | } 71 | } 72 | } 73 | layer { 74 | name: "relu1" 75 | type: "ReLU" 76 | bottom: "conv1" 77 | top: "conv1" 78 | } 79 | layer { 80 | name: "pool1" 81 | type: "Pooling" 82 | bottom: "conv1" 83 | top: "pool1" 84 | pooling_param { 85 | pool: MAX 86 | kernel_size: 3 87 | stride: 2 88 | } 89 | } 90 | layer { 91 | name: "conv2" 92 | type: "Convolution" 93 | bottom: "pool1" 94 | top: "conv2" 95 | param { lr_mult: 1 decay_mult: 1 } 96 | param { lr_mult: 2 decay_mult: 0 } 97 | convolution_param { 98 | num_output: 256 99 | pad: 2 100 | kernel_size: 5 101 | group: 2 102 | weight_filler { 103 | type: "gaussian" 104 | std: 0.01 105 | } 106 | bias_filler { 107 | type: "constant" 108 | value: 1 109 | } 110 | } 111 | } 112 | layer { 113 | name: "relu2" 114 | type: "ReLU" 115 | bottom: "conv2" 116 | top: "conv2" 117 | } 118 | layer { 119 | name: "pool2" 120 | type: "Pooling" 121 | bottom: "conv2" 122 | top: "pool2" 123 | pooling_param { 124 | pool: MAX 125 | kernel_size: 3 126 | stride: 2 127 | } 128 | } 129 | layer { 130 | name: "conv3" 131 | type: "Convolution" 132 | bottom: "pool2" 133 | top: "conv3" 134 | param { lr_mult: 1 decay_mult: 1 } 135 | param { lr_mult: 2 decay_mult: 0 } 136 | convolution_param { 137 | num_output: 384 138 | pad: 1 139 | kernel_size: 3 140 | weight_filler { 141 | type: "gaussian" 142 | std: 0.01 143 | } 144 | bias_filler { 145 | type: "constant" 146 | value: 0 147 | } 148 | } 149 | } 150 | layer { 151 | name: "relu3" 152 | type: "ReLU" 153 | bottom: "conv3" 154 | top: "conv3" 155 | } 156 | layer { 157 | name: "conv4" 158 | type: "Convolution" 159 | bottom: "conv3" 160 | top: "conv4" 161 | param { lr_mult: 1 decay_mult: 1 } 162 | param { lr_mult: 2 decay_mult: 0 } 163 | convolution_param { 164 | num_output: 384 165 | pad: 1 166 | kernel_size: 3 167 | group: 2 168 | weight_filler { 169 | type: "gaussian" 170 | std: 0.01 171 | } 172 | bias_filler { 173 | type: "constant" 174 | value: 1 175 | } 176 | } 177 | } 178 | layer { 179 | name: "relu4" 180 | type: "ReLU" 181 | bottom: "conv4" 182 | top: "conv4" 183 | } 184 | layer { 185 | name: "conv5" 186 | type: "Convolution" 187 | bottom: "conv4" 188 | top: "conv5" 189 | param { lr_mult: 1 decay_mult: 1 } 190 | param { lr_mult: 2 decay_mult: 0 } 191 | convolution_param { 192 | num_output: 256 193 | pad: 1 194 | kernel_size: 3 195 | group: 2 196 | weight_filler { 197 | type: "gaussian" 198 | std: 0.01 199 | } 200 | bias_filler { 201 | type: "constant" 202 | value: 1 203 | } 204 | } 205 | } 206 | layer { 207 | name: "relu5" 208 | type: "ReLU" 209 | bottom: "conv5" 210 | top: "conv5" 211 | } 212 | layer { 213 | name: "pool5" 214 | type: "Pooling" 215 | bottom: "conv5" 216 | top: "pool5" 217 | pooling_param { 218 | pool: MAX 219 | kernel_size: 3 220 | stride: 2 221 | } 222 | } 223 | layer { 224 | name: "fc6" 225 | type: "Convolution" 226 | bottom: "pool5" 227 | top: "fc6" 228 | param { lr_mult: 1 decay_mult: 1 } 229 | param { lr_mult: 2 decay_mult: 0 } 230 | convolution_param { 231 | kernel_size: 6 232 | stride: 1 233 | num_output: 4096 234 | } 235 | } 236 | layer { 237 | name: "relu6" 238 | type: "ReLU" 239 | bottom: "fc6" 240 | top: "fc6" 241 | } 242 | layer { 243 | name: "drop6" 244 | type: "Dropout" 245 | bottom: "fc6" 246 | top: "fc6" 247 | dropout_param { 248 | dropout_ratio: 0.5 249 | } 250 | } 251 | layer { 252 | name: "fc7" 253 | type: "Convolution" 254 | bottom: "fc6" 255 | top: "fc7" 256 | param { lr_mult: 1 decay_mult: 1 } 257 | param { lr_mult: 2 decay_mult: 0 } 258 | convolution_param { 259 | kernel_size: 1 260 | stride: 1 261 | num_output: 4096 262 | } 263 | } 264 | layer { 265 | name: "relu7" 266 | type: "ReLU" 267 | bottom: "fc7" 268 | top: "fc7" 269 | } 270 | layer { 271 | name: "drop7" 272 | type: "Dropout" 273 | bottom: "fc7" 274 | top: "fc7" 275 | dropout_param { 276 | dropout_ratio: 0.5 277 | } 278 | } 279 | 280 | -------------------------------------------------------------------------------- /Deep-Learning-Colorization/models/alexnet_deploy_lab_fc.prototxt: -------------------------------------------------------------------------------- 1 | name: "Colornet" 2 | layer { 3 | name: "img_lab" 4 | top: "img_lab" # Lab color space 5 | type: "Input" 6 | input_param { shape { dim: 1 dim: 3 dim: 227 dim: 227 } } 7 | } 8 | # ************************** 9 | # ***** PROCESS COLORS ***** 10 | # ************************** 11 | # layer { # Convert to lab 12 | # name: "img_lab" 13 | # type: "ColorConv" 14 | # bottom: "data" 15 | # top: "img_lab" 16 | # propagate_down: false 17 | # color_conv_param { 18 | # input: 0 # BGR 19 | # output: 3 # Lab 20 | # } 21 | # } 22 | layer { 23 | name: "img_slice" 24 | type: "Slice" 25 | bottom: "img_lab" 26 | top: "img_l" # [0,100] 27 | top: "data_ab" # [-110,110] 28 | propagate_down: false 29 | slice_param { 30 | axis: 1 31 | slice_point: 1 32 | } 33 | } 34 | layer { 35 | name: "silence_ab" 36 | type: "Silence" 37 | bottom: "data_ab" 38 | } 39 | layer { # 0-center lightness channel 40 | name: "data_l" 41 | type: "Convolution" 42 | bottom: "img_l" 43 | top: "data_l" # scaled and centered lightness value 44 | propagate_down: false 45 | param {lr_mult: 0 decay_mult: 0} 46 | param {lr_mult: 0 decay_mult: 0} 47 | convolution_param { 48 | kernel_size: 1 49 | num_output: 1 50 | } 51 | } 52 | layer { 53 | name: "conv1" 54 | type: "Convolution" 55 | bottom: "data_l" 56 | top: "conv1" 57 | param { lr_mult: 1 decay_mult: 1 } 58 | param { lr_mult: 2 decay_mult: 0 } 59 | convolution_param { 60 | num_output: 96 61 | kernel_size: 11 62 | stride: 4 63 | weight_filler { 64 | type: "gaussian" 65 | std: 0.01 66 | } 67 | bias_filler { 68 | type: "constant" 69 | value: 0 70 | } 71 | } 72 | } 73 | layer { 74 | name: "relu1" 75 | type: "ReLU" 76 | bottom: "conv1" 77 | top: "conv1" 78 | } 79 | layer { 80 | name: "pool1" 81 | type: "Pooling" 82 | bottom: "conv1" 83 | top: "pool1" 84 | pooling_param { 85 | pool: MAX 86 | kernel_size: 3 87 | stride: 2 88 | } 89 | } 90 | layer { 91 | name: "conv2" 92 | type: "Convolution" 93 | bottom: "pool1" 94 | top: "conv2" 95 | param { lr_mult: 1 decay_mult: 1 } 96 | param { lr_mult: 2 decay_mult: 0 } 97 | convolution_param { 98 | num_output: 256 99 | pad: 2 100 | kernel_size: 5 101 | group: 2 102 | weight_filler { 103 | type: "gaussian" 104 | std: 0.01 105 | } 106 | bias_filler { 107 | type: "constant" 108 | value: 1 109 | } 110 | } 111 | } 112 | layer { 113 | name: "relu2" 114 | type: "ReLU" 115 | bottom: "conv2" 116 | top: "conv2" 117 | } 118 | layer { 119 | name: "pool2" 120 | type: "Pooling" 121 | bottom: "conv2" 122 | top: "pool2" 123 | pooling_param { 124 | pool: MAX 125 | kernel_size: 3 126 | stride: 2 127 | } 128 | } 129 | layer { 130 | name: "conv3" 131 | type: "Convolution" 132 | bottom: "pool2" 133 | top: "conv3" 134 | param { lr_mult: 1 decay_mult: 1 } 135 | param { lr_mult: 2 decay_mult: 0 } 136 | convolution_param { 137 | num_output: 384 138 | pad: 1 139 | kernel_size: 3 140 | weight_filler { 141 | type: "gaussian" 142 | std: 0.01 143 | } 144 | bias_filler { 145 | type: "constant" 146 | value: 0 147 | } 148 | } 149 | } 150 | layer { 151 | name: "relu3" 152 | type: "ReLU" 153 | bottom: "conv3" 154 | top: "conv3" 155 | } 156 | layer { 157 | name: "conv4" 158 | type: "Convolution" 159 | bottom: "conv3" 160 | top: "conv4" 161 | param { lr_mult: 1 decay_mult: 1 } 162 | param { lr_mult: 2 decay_mult: 0 } 163 | convolution_param { 164 | num_output: 384 165 | pad: 1 166 | kernel_size: 3 167 | group: 2 168 | weight_filler { 169 | type: "gaussian" 170 | std: 0.01 171 | } 172 | bias_filler { 173 | type: "constant" 174 | value: 1 175 | } 176 | } 177 | } 178 | layer { 179 | name: "relu4" 180 | type: "ReLU" 181 | bottom: "conv4" 182 | top: "conv4" 183 | } 184 | layer { 185 | name: "conv5" 186 | type: "Convolution" 187 | bottom: "conv4" 188 | top: "conv5" 189 | param { lr_mult: 1 decay_mult: 1 } 190 | param { lr_mult: 2 decay_mult: 0 } 191 | convolution_param { 192 | num_output: 256 193 | pad: 1 194 | kernel_size: 3 195 | group: 2 196 | weight_filler { 197 | type: "gaussian" 198 | std: 0.01 199 | } 200 | bias_filler { 201 | type: "constant" 202 | value: 1 203 | } 204 | } 205 | } 206 | layer { 207 | name: "relu5" 208 | type: "ReLU" 209 | bottom: "conv5" 210 | top: "conv5" 211 | } 212 | layer { 213 | name: "pool5" 214 | type: "Pooling" 215 | bottom: "conv5" 216 | top: "pool5" 217 | pooling_param { 218 | pool: MAX 219 | kernel_size: 3 220 | stride: 2 221 | } 222 | } 223 | layer { 224 | name: "fc6" 225 | type: "InnerProduct" 226 | bottom: "pool5" 227 | top: "fc6" 228 | param { lr_mult: 1 decay_mult: 1 } 229 | param { lr_mult: 2 decay_mult: 0 } 230 | inner_product_param { 231 | num_output: 4096 232 | } 233 | } 234 | layer { 235 | name: "relu6" 236 | type: "ReLU" 237 | bottom: "fc6" 238 | top: "fc6" 239 | } 240 | layer { 241 | name: "drop6" 242 | type: "Dropout" 243 | bottom: "fc6" 244 | top: "fc6" 245 | dropout_param { 246 | dropout_ratio: 0.5 247 | } 248 | } 249 | layer { 250 | name: "fc7" 251 | type: "InnerProduct" 252 | bottom: "fc6" 253 | top: "fc7" 254 | param { lr_mult: 1 decay_mult: 1 } 255 | param { lr_mult: 2 decay_mult: 0 } 256 | inner_product_param { 257 | num_output: 4096 258 | } 259 | } 260 | layer { 261 | name: "relu7" 262 | type: "ReLU" 263 | bottom: "fc7" 264 | top: "fc7" 265 | } 266 | layer { 267 | name: "drop7" 268 | type: "Dropout" 269 | bottom: "fc7" 270 | top: "fc7" 271 | dropout_param { 272 | dropout_ratio: 0.5 273 | } 274 | } 275 | 276 | -------------------------------------------------------------------------------- /Deep-Learning-Colorization/models/fetch_alexnet_model.sh: -------------------------------------------------------------------------------- 1 | 2 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/alexnet_release_450000.caffemodel -O ./models/alexnet_release_450000.caffemodel 3 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/alexnet_release_450000_nobn.caffemodel -O ./models/alexnet_release_450000_nobn.caffemodel 4 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/alexnet_release_450000_nobn_rs.caffemodel -O ./models/alexnet_release_450000_nobn_rs.caffemodel 5 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/alexnet_release_450000_nobn_fc_rs.caffemodel -O ./models/alexnet_release_450000_nobn_fc_rs.caffemodel 6 | -------------------------------------------------------------------------------- /Deep-Learning-Colorization/models/fetch_release_models.sh: -------------------------------------------------------------------------------- 1 | 2 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/colorization_release_v2.caffemodel -O ./models/colorization_release_v2.caffemodel 3 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/colorization_release_v2_norebal.caffemodel -O ./models/colorization_release_v2_norebal.caffemodel 4 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v1/colorization_release_v1.caffemodel -O ./models/colorization_release_v1.caffemodel 5 | -------------------------------------------------------------------------------- /Deep-Learning-Colorization/resources/batch_norm_absorb.py: -------------------------------------------------------------------------------- 1 | 2 | # ************************************** 3 | # ***** Richard Zhang / 2016.06.04 ***** 4 | # ************************************** 5 | # Absorb batch norm into convolution layers 6 | # This script only supports the conv-batchnorm configuration 7 | # Currently unsupported: 8 | # - deconv layers 9 | # - fc layers 10 | # - batchnorm before linear layer 11 | 12 | import caffe 13 | import os 14 | import string 15 | import numpy as np 16 | import argparse 17 | import matplotlib.pyplot as plt 18 | 19 | def parse_args(): 20 | parser = argparse.ArgumentParser(description='BatchNorm absorption') 21 | parser.add_argument('--gpu', dest='gpu', help='gpu id', type=int, default=0) 22 | parser.add_argument('--prototxt_in',dest='prototxt_in',help='prototxt with batchnorm', type=str, default='') 23 | parser.add_argument('--prototxt_out',dest='prototxt_out',help='prototxt without batchnorm', type=str, default='') 24 | parser.add_argument('--caffemodel_in',dest='caffemodel_in',help='caffemodel with batchnorm', type=str, default='') 25 | parser.add_argument('--caffemodel_out',dest='caffemodel_out',help='caffemodel without batchnorm, to be saved', type=str, default='') 26 | 27 | args = parser.parse_args() 28 | return args 29 | 30 | if __name__ == '__main__': 31 | args = parse_args() 32 | 33 | gpu_id = args.gpu 34 | PROTOTXT1_PATH = args.prototxt_in 35 | PROTOTXT2_PATH = args.prototxt_out # no batch norm 36 | MODEL_PATH = args.caffemodel_in 37 | MODEL2_PATH = args.caffemodel_out # to be saved off 38 | 39 | caffe.set_mode_gpu() 40 | caffe.set_device(gpu_id) 41 | 42 | net1 = caffe.Net(PROTOTXT1_PATH, MODEL_PATH, caffe.TEST) 43 | net2 = caffe.Net(PROTOTXT2_PATH, MODEL_PATH, caffe.TEST) 44 | 45 | # call forward on net1, net2 46 | net1.forward() 47 | net2.forward() 48 | 49 | # identify batch norms and paired linear layers 50 | BN_INDS = np.where(np.array([layer.type for layer in net1.layers])=='BatchNorm')[0] 51 | BN_NAMES = np.zeros(BN_INDS.shape,dtype='S50') # batch norm layer names 52 | LIN_NAMES = np.zeros(BN_INDS.shape,dtype='S50') # linear layer names 53 | PRE_NAMES = np.zeros(BN_INDS.shape,dtype='S50') # blob right before 54 | POST_NAMES = np.zeros(BN_INDS.shape,dtype='S50') # blob right after 55 | 56 | PRE_POST = -1+np.zeros(BN_INDS.shape) # 0 - pre, 1 - post 57 | CONV_DECONV = -1+np.zeros(BN_INDS.shape) # 0 - conv, 1 - deconv 58 | 59 | # identify layers which are paired with batch norms (only supporting convolution) 60 | for (ll,bn_ind) in enumerate(BN_INDS): 61 | BN_NAMES[ll] = net1._layer_names[bn_ind] 62 | if(net1.layers[bn_ind-1].type=='Convolution' or net1.layers[bn_ind-1].type=='Deconvolution'): 63 | PRE_POST[ll] = 0 64 | LIN_NAMES[ll] = net1._layer_names[bn_ind-1] 65 | POST_NAMES[ll] = net1._layer_names[bn_ind+1] 66 | if(net1.layers[bn_ind-1].type=='Convolution'): 67 | CONV_DECONV[ll] = 0 68 | elif(net1.layers[bn_ind-1].type=='Deconvolution'): 69 | CONV_DECONV[ll] = 1 70 | elif(net1.layers[bn_ind+1].type=='Convolution' or net1.layers[bn_ind+1].type=='Deconvolution'): 71 | PRE_POST[ll] = 1 72 | LIN_NAMES[ll] = net1._layer_names[bn_ind+1] 73 | POST_NAMES[ll] = net1._layer_names[bn_ind+3] 74 | if(net1.layers[bn_ind+1].type=='Convolution'): 75 | CONV_DECONV[ll] = 0 76 | elif(net1.layers[bn_ind+1].type=='Deconvolution'): 77 | CONV_DECONV[ll] = 1 78 | else: 79 | PRE_POST[ll] = -1 80 | PRE_NAMES[ll] = net1.bottom_names[BN_NAMES[ll]][0] 81 | 82 | LIN_INDS = BN_INDS+PRE_POST # linear layer indices 83 | ALL_SLOPES = {} 84 | 85 | # compute batch norm parameters on net1 in first layer 86 | # absorb into weights in first layer 87 | for ll in range(BN_INDS.size): 88 | bn_ind = BN_INDS[ll] 89 | BN_NAME = BN_NAMES[ll] 90 | PRE_NAME = PRE_NAMES[ll] 91 | POST_NAME = POST_NAMES[ll] 92 | LIN_NAME = LIN_NAMES[ll] 93 | 94 | print 'LAYERS %s, %s'%(PRE_NAME,BN_NAME) 95 | # print net1.blobs[BN_NAME].data.shape 96 | # print net1.blobs[PRE_NAME].data.shape 97 | 98 | C = net1.blobs[BN_NAME].data.shape[1] 99 | in_blob = net1.blobs[PRE_NAME].data 100 | bn_blob = net1.blobs[BN_NAME].data 101 | 102 | scale_factor = 1./net1.params[BN_NAME][2].data[...] 103 | mean = scale_factor * net1.params[BN_NAME][0].data[...] 104 | scale = scale_factor * net1.params[BN_NAME][1].data[...] 105 | 106 | slopes = np.sqrt(1./scale) 107 | offs = -mean*slopes 108 | 109 | print ' Computing error on data...' 110 | bn_blob_rep = in_blob*slopes[np.newaxis,:,np.newaxis,np.newaxis]+offs[np.newaxis,:,np.newaxis,np.newaxis] 111 | 112 | # Visually verify that factors are correct 113 | print ' Maximum error: %.3e'%np.max(np.abs(bn_blob_rep[bn_blob>0] - bn_blob[bn_blob>0])) 114 | print ' RMS error: %.3e'%np.linalg.norm(bn_blob_rep[bn_blob>0] - bn_blob[bn_blob>0]) 115 | print ' RMS signal: %.3e'%np.linalg.norm(bn_blob_rep[bn_blob>0]) 116 | 117 | print ' Absorbing slope and offset...' 118 | # absorb slope and offset into appropriate parameter 119 | if(PRE_POST[ll]==0): # linear layer is before 120 | if(CONV_DECONV[ll]==0): # convolution 121 | net2.params[LIN_NAME][0].data[...] = net1.params[LIN_NAME][0].data[...]*slopes[:,np.newaxis,np.newaxis,np.newaxis] 122 | net2.params[LIN_NAME][1].data[...] = offs + (slopes*net1.params[LIN_NAME][1].data) 123 | elif(CONV_DECONV[ll]==1): # deconvolution 124 | print '*** Deconvolution not implemented ***' 125 | elif(PRE_POST[ll]==1): # batchnorm is BEFORE linear layer 126 | print '*** Not implemented ***' 127 | 128 | net2.save(MODEL2_PATH) 129 | 130 | for arg in vars(args): 131 | print('[%s] =' % arg, getattr(args, arg)) 132 | print 'Saving model into: %s'%MODEL2_PATH 133 | -------------------------------------------------------------------------------- /Deep-Learning-Colorization/resources/caffe_traininglayers.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Deep-Learning-Colorization/resources/caffe_traininglayers.pyc -------------------------------------------------------------------------------- /Deep-Learning-Colorization/resources/conv_into_fc.py: -------------------------------------------------------------------------------- 1 | 2 | import caffe 3 | import os 4 | import string 5 | import numpy as np 6 | import argparse 7 | import matplotlib.pyplot as plt 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description='Convert conv layers into FC layers') 11 | parser.add_argument('--gpu', dest='gpu', help='gpu id', type=int, default=0) 12 | parser.add_argument('--prototxt_in',dest='prototxt_in',help='prototxt with conv layers', type=str, default='') 13 | parser.add_argument('--prototxt_out',dest='prototxt_out',help='prototxt with fc layers', type=str, default='') 14 | parser.add_argument('--caffemodel_in',dest='caffemodel_in',help='caffemodel with conv layers', type=str, default='') 15 | parser.add_argument('--caffemodel_out',dest='caffemodel_out',help='caffemodel with fc layers, to be saved', type=str, default='') 16 | parser.add_argument('--dummymodel',dest='dummymodel',help='blank caffemodel',type=str,default='./models/dummy.caffemodel') 17 | 18 | args = parser.parse_args() 19 | return args 20 | 21 | if __name__ == '__main__': 22 | args = parse_args() 23 | 24 | gpu_id = args.gpu 25 | PROTOTXT1_PATH = args.prototxt_in 26 | PROTOTXT2_PATH = args.prototxt_out # no batch norm 27 | MODEL_PATH = args.caffemodel_in 28 | DUMMYMODEL_PATH = args.dummymodel 29 | MODEL2_PATH = args.caffemodel_out # to be saved off 30 | 31 | caffe.set_mode_gpu() 32 | caffe.set_device(gpu_id) 33 | 34 | net1 = caffe.Net(PROTOTXT1_PATH, MODEL_PATH, caffe.TEST) 35 | net2 = caffe.Net(PROTOTXT2_PATH, DUMMYMODEL_PATH, caffe.TEST) 36 | 37 | import rz_fcns as rz 38 | rz.caffe_param_shapes(net1,to_print=True) 39 | rz.caffe_param_shapes(net2,to_print=True) 40 | rz.caffe_shapes(net2,to_print=True) 41 | 42 | # CONV_INDS = np.where(np.array([layer.type for layer in net1.layers])=='Convolution')[0] 43 | print net1.params.keys() 44 | print net2.params.keys() 45 | 46 | for (ll,layer) in enumerate(net2.params.keys()): 47 | P = len(net2.params[layer]) # number of blobs 48 | if(P>0): 49 | for pp in range(P): 50 | ndim1 = net1.params[layer][pp].data.ndim 51 | ndim2 = net2.params[layer][pp].data.ndim 52 | 53 | print('Copying layer %s, param blob %i (%i-dim => %i-dim)'%(layer,pp,ndim1,ndim2)) 54 | if(ndim1==ndim2): 55 | print(' Same dimensionality...') 56 | net2.params[layer][pp].data[...] = net1.params[layer][pp].data[...] 57 | else: 58 | print(' Different dimensionality...') 59 | net2.params[layer][pp].data[...] = net1.params[layer][pp].data[...].reshape(net2.params[layer][pp].data[...].shape) 60 | 61 | net2.save(MODEL2_PATH) 62 | 63 | for arg in vars(args): 64 | print('[%s] =' % arg, getattr(args, arg)) 65 | print 'Saving model into: %s'%MODEL2_PATH 66 | -------------------------------------------------------------------------------- /Deep-Learning-Colorization/resources/magic_init/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Philipp Krähenbühl 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. -------------------------------------------------------------------------------- /Deep-Learning-Colorization/resources/magic_init/README.md: -------------------------------------------------------------------------------- 1 | # Data-dependent initialization of convolutional neural networks 2 | 3 | Created by Philipp Krähenbühl. 4 | 5 | ### Introduction 6 | 7 | This code implements the initialization presented in our [arXiv tech report](http://arxiv.org/abs/1511.06856), which is under submission at ICLR 2016. 8 | 9 | *This is a reimplementation and currently work in progress. Use at your own risk.* 10 | 11 | ### License 12 | 13 | This code is released under the BSD License (refer to the LICENSE file for details). 14 | 15 | ### Citing 16 | 17 | If you find our initialization useful in your research, please consider citing: 18 | 19 | @article{krahenbuhl2015data, 20 | title={Data-dependent Initializations of Convolutional Neural Networks}, 21 | author={Kr{\"a}henb{\"u}hl, Philipp and Doersch, Carl and Donahue, Jeff and Darrell, Trevor}, 22 | journal={arXiv preprint arXiv:1511.06856}, 23 | year={2015} 24 | } 25 | 26 | ### Setup 27 | 28 | Checkout the project and create a symlink to caffe in the `magic_init` directory: 29 | ```Shell 30 | ln -s path/to/caffe/python/caffe caffe 31 | ``` 32 | 33 | ### Examples 34 | 35 | Here is a quick example on how to initialize alexnet: 36 | ```bash 37 | python magic_init.py path/to/alexnet/deploy.prototxt path/to/output.caffemodel -d "path/to/some/images/*.png" -q -nit 10 -cs 38 | ``` 39 | Here ```-d``` flag allows you to initialize the network using your own images. Feel free to use imagenet, Pascal, COCO or whatever you have at hand, it shouldn't make a big difference. The ```-q``` (queit) flag suppresses all the caffe logging, ```-nit``` controls the number of batches used (while ```-bs``` controls the batch size). Finally ```-cs``` rescales the gradients accross layers. This rescaling currently works best for feed-forward networks, and might not work too well for DAG structured networks (we are working on that). 40 | 41 | To run the k-means initialization use: 42 | ```bash 43 | python magic_init.py path/to/alexnet/deploy.prototxt path/to/output.caffemodel -d "path/to/some/images/*.png" -q -nit 10 -cs -t kmeans 44 | ``` 45 | 46 | Finally, ```python magic_init.py -h``` should provide you with more help. 47 | 48 | 49 | ### Pro tips 50 | If you're numpy implementation is based on openblas, try disabeling threading ```export OPENBLAS_NUM_THREADS=1```, it can improve the runtime performance a bit. 51 | -------------------------------------------------------------------------------- /Deep-Learning-Colorization/resources/magic_init/load.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | 3 | def parseProtoString(s): 4 | from google.protobuf import text_format 5 | from caffe.proto import caffe_pb2 as pb 6 | proto_net = pb.NetParameter() 7 | text_format.Merge(s, proto_net) 8 | return proto_net 9 | 10 | 11 | def get_param(l, exclude=set(['top', 'bottom', 'name', 'type'])): 12 | if not hasattr(l,'ListFields'): 13 | if hasattr(l,'__delitem__'): 14 | return list(l) 15 | return l 16 | r = dict() 17 | for f, v in l.ListFields(): 18 | if f.name not in exclude: 19 | r[f.name] = get_param(v, []) 20 | return r 21 | 22 | class ProtoDesc: 23 | def __init__(self, prototxt): 24 | from os import path 25 | self.prototxt = prototxt 26 | self.parsed_proto = parseProtoString(open(self.prototxt, 'r').read()) 27 | # Guess the input dimension 28 | self.input_dim = (3, 227, 227) 29 | net = self.parsed_proto 30 | if len(net.input_dim) > 0: 31 | self.input_dim = net.input_dim[1:] 32 | else: 33 | lrs = net.layer 34 | cs = [l.transform_param.crop_size for l in lrs 35 | if l.HasField('transform_param')] 36 | if len(cs): 37 | self.input_dim = (3, cs[0], cs[0]) 38 | 39 | def __call__(self, clip=None, **inputs): 40 | from caffe import layers as L 41 | from collections import OrderedDict 42 | net = self.parsed_proto 43 | blobs = OrderedDict(inputs) 44 | for l in net.layer: 45 | if l.name not in inputs: 46 | in_place = l.top == l.bottom 47 | param = get_param(l) 48 | assert all([b in blobs for b in l.bottom]), "Some bottoms not founds: " + ', '.join([b for b in l.bottom if not b in blobs]) 49 | tops = getattr(L, l.type)(*[blobs[b] for b in l.bottom], 50 | ntop=len(l.top), in_place=in_place, 51 | name=l.name, 52 | **param) 53 | if len(l.top) <= 1: 54 | tops = [tops] 55 | for i, t in enumerate(l.top): 56 | blobs[t] = tops[i] 57 | if l.name == clip: 58 | break 59 | return list(blobs.values())[-1] 60 | 61 | 62 | -------------------------------------------------------------------------------- /Deep-Learning-Colorization/resources/magic_init/load.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Deep-Learning-Colorization/resources/magic_init/load.pyc -------------------------------------------------------------------------------- /Deep-Learning-Colorization/resources/magic_init/measure_stat.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from magic_init import * 3 | 4 | class BCOLORS: 5 | HEADER = '\033[95m' 6 | OKBLUE = '\033[94m' 7 | OKGREEN = '\033[92m' 8 | WARNING = '\033[93m' 9 | FAIL = '\033[91m' 10 | ENDC = '\033[0m' 11 | BOLD = '\033[1m' 12 | UNDERLINE = '\033[4m' 13 | 14 | class NOCOLORS: 15 | HEADER = '' 16 | OKBLUE = '' 17 | OKGREEN = '' 18 | WARNING = '' 19 | FAIL = '' 20 | ENDC = '' 21 | BOLD = '' 22 | UNDERLINE = '' 23 | 24 | def coloredNumbers(v, color=None, fmt='%6.2f', max_display=300, bcolors=BCOLORS): 25 | import numpy as np 26 | # Display a numpy array and highlight the min and max values [required a nice linux 27 | # terminal supporting colors] 28 | r = "" 29 | mn, mx = np.min(v), np.max(v) 30 | for k,i in enumerate(v): 31 | if len(v) > max_display and k > max_display/2 and k < len(v) - max_display/2: 32 | if r[-1] != '.': 33 | r += '...' 34 | continue 35 | if i <= mn + 1e-3: 36 | r += bcolors.BOLD+bcolors.FAIL 37 | elif i + 1e-3 >= mx: 38 | r += bcolors.BOLD+bcolors.FAIL 39 | elif color is not None: 40 | r += color 41 | r += (fmt+' ')%i 42 | r += bcolors.ENDC 43 | r += bcolors.ENDC 44 | return r 45 | 46 | def computeGradientRatio(net, NIT=1): 47 | import numpy as np 48 | last_layer = 0 49 | for i, (n, l) in enumerate(zip(net._layer_names, net.layers)): 50 | if l.type not in STRIP_LAYER: 51 | last_layer = i 52 | last_tops = net.top_names[net._layer_names[last_layer]] 53 | 54 | var = {} 55 | for it in range(NIT): 56 | net._forward(0, last_layer) 57 | # Reset the diffs 58 | for l in net.layers: 59 | for b in l.blobs: 60 | b.diff[...] = 0 61 | # Set the top diffs 62 | for t in last_tops: 63 | net.blobs[t].diff[...] = np.random.normal(0, 1, net.blobs[t].shape) 64 | net._backward(last_layer, 0) 65 | for i, (n, l) in enumerate(zip(net._layer_names, net.layers)): 66 | if len(l.blobs) > 0: 67 | assert l.type in PARAMETER_LAYERS, "Parameter layer '%s' currently not supported"%l.type 68 | b = l.blobs[0] 69 | r = np.mean(b.diff.swapaxes(0,1).reshape((b.diff.shape[1],-1))**2, axis=1) / np.mean(b.data**2) 70 | if n in var: var[n] += r / NIT 71 | else: var[n] = r / NIT 72 | std = {n: np.sqrt(var[n]) for n in var} 73 | return {n: np.std(s) / np.mean(s) for n,s in std.items()}, {n: np.mean(s) for n,s in std.items()} 74 | 75 | def printMeanStddev(net, NIT=10, show_all=False, show_color=True, quiet=False): 76 | import numpy as np 77 | bcolors = NOCOLORS 78 | if show_color: bcolors = BCOLORS 79 | 80 | layer_names = list(net._layer_names) 81 | if not show_all: 82 | layer_names = [n for n, l in zip(net._layer_names, net.layers) if len(l.blobs)>0] 83 | if 'data' in net._layer_names: 84 | layer_names.append('data') 85 | 86 | # When was a blob last used 87 | last_used = {} 88 | # Make sure all layers are supported, and compute the range each blob is used in 89 | for i, (n, l) in enumerate(zip(net._layer_names, net.layers)): 90 | for b in net.bottom_names[n]: 91 | last_used[b] = i 92 | 93 | active_data, cvar = {}, {} 94 | for i, (n, l) in enumerate(zip(net._layer_names, net.layers)): 95 | # Run the network forward 96 | new_data = forward(net, i, NIT, {b: active_data[b] for b in net.bottom_names[n]}, net.top_names[n]) 97 | active_data.update(new_data) 98 | 99 | if len(net.top_names[n]) > 0 and n in layer_names: 100 | m = net.top_names[n][0] 101 | D = flattenData(new_data[m]) 102 | mean = np.mean(D, axis=0) 103 | stddev = np.std(D, axis=0) 104 | if not quiet: 105 | print( bcolors.BOLD, ' '*5, n, ':', m, ' '*5, bcolors.ENDC ) 106 | print( 'mean ', coloredNumbers(mean, bcolors.OKGREEN, bcolors=bcolors) ) 107 | print( 'stddev', coloredNumbers(stddev, bcolors.OKBLUE, bcolors=bcolors) ) 108 | print( 'coef of variation ', bcolors.OKGREEN, stddev.std() / stddev.mean(), bcolors.ENDC ) 109 | print() 110 | cvar[n] = stddev.std() / stddev.mean() 111 | # Delete all unused data 112 | for k in list(active_data): 113 | if k not in last_used or last_used[k] == i: 114 | del active_data[k] 115 | return cvar 116 | 117 | def main(): 118 | from argparse import ArgumentParser 119 | from os import path 120 | 121 | parser = ArgumentParser() 122 | parser.add_argument('prototxt') 123 | parser.add_argument('-l', '--load', help='Load a caffemodel') 124 | parser.add_argument('-d', '--data', default=None, help='Image list to use [default prototxt data]') 125 | #parser.add_argument('-q', action='store_true', help='Quiet execution') 126 | parser.add_argument('-sm', action='store_true', help='Summary only') 127 | parser.add_argument('-q', action='store_true', help='Quiet execution') 128 | parser.add_argument('-a', '--all', action='store_true', help='Show the statistic for all layers') 129 | parser.add_argument('-nc', action='store_true', help='Do not use color') 130 | parser.add_argument('-s', type=float, default=1.0, help='Scale the input [only custom data "-d"]') 131 | parser.add_argument('-bs', type=int, default=16, help='Batch size [only custom data "-d"]') 132 | parser.add_argument('-nit', type=int, default=10, help='Number of iterations') 133 | parser.add_argument('--gpu', type=int, default=0, help='What gpu to run it on?') 134 | args = parser.parse_args() 135 | 136 | if args.q: 137 | from os import environ 138 | environ['GLOG_minloglevel'] = '2' 139 | import caffe, load 140 | from caffe import NetSpec, layers as L 141 | 142 | caffe.set_mode_gpu() 143 | if args.gpu is not None: 144 | caffe.set_device(args.gpu) 145 | 146 | if args.data is not None: 147 | model = load.ProtoDesc(args.prototxt) 148 | net = NetSpec() 149 | fl = getFileList(args.data) 150 | if len(fl) == 0: 151 | print("Unknown data type for '%s'"%args.data) 152 | exit(1) 153 | from tempfile import NamedTemporaryFile 154 | f = NamedTemporaryFile('w') 155 | f.write('\n'.join([path.abspath(i)+' 0' for i in fl])) 156 | f.flush() 157 | net.data, net.label = L.ImageData(source=f.name, batch_size=args.bs, new_width=model.input_dim[-1], new_height=model.input_dim[-1], transform_param=dict(mean_value=[104,117,123], scale=args.s),ntop=2) 158 | net.out = model(data=net.data, label=net.label) 159 | n = netFromString('force_backward:true\n'+str(net.to_proto()), caffe.TRAIN ) 160 | else: 161 | n = caffe.Net(args.prototxt, caffe.TRAIN) 162 | 163 | if args.load is not None: 164 | n.copy_from(args.load) 165 | 166 | cvar = printMeanStddev(n, NIT=args.nit, show_all=args.all, show_color=not args.nc, quiet=args.sm) 167 | cv, gr = computeGradientRatio(n, NIT=args.nit) 168 | print() 169 | print(' Summary ') 170 | print('-----------') 171 | print() 172 | print('layer name out cvar rate cvar rate mean') 173 | for l in n._layer_names: 174 | if l in cvar and l in cv and l in gr: 175 | print('%-30s %10.2f %10.2f %10.2f'%(l, cvar[l], cv[l], gr[l]) ) 176 | 177 | if __name__ == "__main__": 178 | main() 179 | -------------------------------------------------------------------------------- /Deep-Learning-Colorization/resources/prior_probs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Deep-Learning-Colorization/resources/prior_probs.npy -------------------------------------------------------------------------------- /Deep-Learning-Colorization/resources/pts_in_hull.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Deep-Learning-Colorization/resources/pts_in_hull.npy -------------------------------------------------------------------------------- /Deep-Learning-Colorization/resources/softmax_cross_entropy_loss_layer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | // #include "caffe/layer.hpp" 6 | // #include "caffe/util/math_functions.hpp" 7 | // #include "caffe/vision_layer.hpp" 8 | #include "caffe/layers/softmax_cross_entropy_loss_layer.hpp" 9 | #include "caffe/util/math_functions.hpp" 10 | 11 | namespace caffe { 12 | 13 | template 14 | void SoftmaxCrossEntropyLossLayer::LayerSetUp( 15 | const vector*>& bottom, const vector*>& top) { 16 | LossLayer::LayerSetUp(bottom, top); 17 | softmax_bottom_vec_.clear(); 18 | softmax_bottom_vec_.push_back(bottom[0]); 19 | softmax_top_vec_.clear(); 20 | softmax_top_vec_.push_back(softmax_output_.get()); 21 | softmax_layer_->SetUp(softmax_bottom_vec_, softmax_top_vec_); 22 | } 23 | 24 | template 25 | void SoftmaxCrossEntropyLossLayer::Reshape( 26 | const vector*>& bottom, const vector*>& top) { 27 | LossLayer::Reshape(bottom, top); 28 | CHECK_EQ(bottom[0]->count(), bottom[1]->count()) << 29 | "SOFTMAX_CROSS_ENTROPY_LOSS layer inputs must have the same count."; 30 | softmax_layer_->Reshape(softmax_bottom_vec_, softmax_top_vec_); 31 | } 32 | 33 | template 34 | void SoftmaxCrossEntropyLossLayer::Forward_cpu( 35 | const vector*>& bottom, const vector*>& top) { 36 | // The forward pass computes the softmax outputs. 37 | softmax_bottom_vec_[0] = bottom[0]; 38 | softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_); 39 | // Compute the loss (negative log likelihood) 40 | const int count = bottom[0]->count(); 41 | const int num = bottom[0]->num(); 42 | // Stable version of loss computation from input data 43 | // const Dtype* input_data = bottom[0]->cpu_data(); 44 | const Dtype* target = bottom[1]->cpu_data(); 45 | Dtype loss = 0; 46 | const Dtype* softmax_output_data = softmax_top_vec_[0]->cpu_data(); 47 | 48 | // First compute max of input data 49 | for (int i = 0; i < count; ++i) { 50 | //loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) - 51 | // log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0))); 52 | if (target[i] > 0 ) { 53 | loss -= target[i] * (log(softmax_output_data[i]) - log(target[i])); 54 | } 55 | } 56 | 57 | top[0]->mutable_cpu_data()[0] = loss / num; 58 | } 59 | 60 | template 61 | void SoftmaxCrossEntropyLossLayer::Backward_cpu( 62 | const vector*>& top, const vector& propagate_down, 63 | const vector*>& bottom) { 64 | if (propagate_down[1]) { 65 | LOG(FATAL) << this->type() 66 | << " Layer cannot backpropagate to label inputs."; 67 | } 68 | if (propagate_down[0]) { 69 | // First, compute the diff 70 | const int count = bottom[0]->count(); 71 | const int num = bottom[0]->num(); 72 | const Dtype* softmax_output_data = softmax_output_->cpu_data(); 73 | const Dtype* target = bottom[1]->cpu_data(); 74 | Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); 75 | // Gradient is: target[i] - softmax_output_data[i] 76 | caffe_sub(count, softmax_output_data, target, bottom_diff); 77 | // Scale down gradient 78 | const Dtype loss_weight = top[0]->cpu_diff()[0]; 79 | caffe_scal(count, loss_weight / num, bottom_diff); 80 | } 81 | } 82 | 83 | #ifdef CPU_ONLY 84 | STUB_GPU(SoftmaxCrossEntropyLossLayer); 85 | #endif 86 | 87 | INSTANTIATE_CLASS(SoftmaxCrossEntropyLossLayer); 88 | REGISTER_LAYER_CLASS(SoftmaxCrossEntropyLoss); 89 | 90 | } // namespace caffe 91 | -------------------------------------------------------------------------------- /Deep-Learning-Colorization/resources/softmax_cross_entropy_loss_layer.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | // #include "caffe/layer.hpp" 6 | // #include "caffe/util/math_functions.hpp" 7 | // #include "caffe/vision_layer.hpp" 8 | #include "caffe/layers/softmax_cross_entropy_loss_layer.hpp" 9 | #include "caffe/util/math_functions.hpp" 10 | 11 | namespace caffe { 12 | 13 | template 14 | void SoftmaxCrossEntropyLossLayer::Forward_gpu( 15 | const vector*>& bottom, const vector*>& top) { 16 | // The forward pass computes the softmax outputs. 17 | softmax_bottom_vec_[0] = bottom[0]; 18 | softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_); 19 | // Compute the loss (negative log likelihood) 20 | const int count = bottom[0]->count(); 21 | const int num = bottom[0]->num(); 22 | // Stable version of loss computation from input data 23 | const Dtype* input_data = bottom[0]->cpu_data(); 24 | const Dtype* target = bottom[1]->cpu_data(); 25 | // Output of softmax forward pass 26 | const Dtype* softmax_output = softmax_top_vec_[0]->cpu_data(); 27 | Dtype loss = 0; 28 | for (int i = 0; i < count; ++i) { 29 | //loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) - 30 | // log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0))); 31 | if ( target[i] > 0 ) { 32 | // loss -= target[i] * ( log(softmax_output[i]) - log(target[i]) ); 33 | // if(softmax_output[i] > 0) { 34 | loss -= target[i] * ( log(softmax_output[i]+1.0e-35) - log(target[i]) ); 35 | // } 36 | // else { 37 | // // LOG(INFO) << "Prediction was 0"; 38 | // loss -= target[i] * ( log(1.0e-35) - log(target[i]) ); // adding epsilon 39 | // } 40 | } 41 | } 42 | top[0]->mutable_cpu_data()[0] = loss / num; 43 | } 44 | 45 | template 46 | void SoftmaxCrossEntropyLossLayer::Backward_gpu( 47 | const vector*>& top, const vector& propagate_down, 48 | const vector*>& bottom) { 49 | if (propagate_down[1]) { 50 | LOG(FATAL) << this->type() 51 | << " Layer cannot backpropagate to label inputs."; 52 | } 53 | if (propagate_down[0]) { 54 | // First, compute the diff 55 | const int count = bottom[0]->count(); 56 | const int num = bottom[0]->num(); 57 | const Dtype* softmax_output_data = softmax_output_->gpu_data(); 58 | const Dtype* target = bottom[1]->gpu_data(); 59 | Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); 60 | caffe_copy(count, softmax_output_data, bottom_diff); 61 | caffe_gpu_axpy(count, Dtype(-1), target, bottom_diff); 62 | // Scale down gradient 63 | const Dtype loss_weight = top[0]->cpu_diff()[0]; 64 | caffe_gpu_scal(count, loss_weight / num, bottom_diff); 65 | } 66 | } 67 | 68 | INSTANTIATE_LAYER_GPU_FUNCS(SoftmaxCrossEntropyLossLayer); 69 | 70 | 71 | } // namespace caffe 72 | -------------------------------------------------------------------------------- /Deep-Learning-Colorization/resources/softmax_cross_entropy_loss_layer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CAFFE_SOFTMAX_CROSS_ENTROPY_LOSS_LAYER_HPP_ 2 | #define CAFFE_SOFTMAX_CROSS_ENTROPY_LOSS_LAYER_HPP_ 3 | 4 | #include 5 | 6 | #include "caffe/blob.hpp" 7 | #include "caffe/layer.hpp" 8 | #include "caffe/proto/caffe.pb.h" 9 | 10 | #include "caffe/layers/loss_layer.hpp" 11 | // #include "caffe/layers/sigmoid_layer.hpp" 12 | #include "caffe/layers/softmax_layer.hpp" 13 | 14 | namespace caffe { 15 | 16 | // Forward declare SoftmaxLayer for use in SoftmaxWithLossLayer. 17 | template class SoftmaxLayer; 18 | 19 | template 20 | class SoftmaxCrossEntropyLossLayer : public LossLayer { 21 | public: 22 | explicit SoftmaxCrossEntropyLossLayer(const LayerParameter& param) 23 | : LossLayer(param), 24 | softmax_layer_(new SoftmaxLayer(param)), 25 | softmax_output_(new Blob()) {} 26 | virtual void LayerSetUp(const vector*>& bottom, 27 | const vector*>& top); 28 | virtual void Reshape(const vector*>& bottom, 29 | const vector*>& top); 30 | 31 | virtual inline const char* type() const { return "SoftmaxCrossEntropyLoss"; } 32 | 33 | protected: 34 | /// @copydoc SigmoidCrossEntropyLossLayer 35 | virtual void Forward_cpu(const vector*>& bottom, 36 | const vector*>& top); 37 | virtual void Forward_gpu(const vector*>& bottom, 38 | const vector*>& top); 39 | 40 | /** 41 | * @brief Computes the softmax cross-entropy loss error gradient w.r.t. the 42 | * predictions. 43 | * 44 | * Gradients cannot be computed with respect to the target inputs (bottom[1]), 45 | * so this method ignores bottom[1] and requires !propagate_down[1], crashing 46 | * if propagate_down[1] is set. 47 | * 48 | * @param top output Blob vector (length 1), providing the error gradient with 49 | * respect to the outputs 50 | * -# @f$ (1 \times 1 \times 1 \times 1) @f$ 51 | * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, 52 | * as @f$ \lambda @f$ is the coefficient of this layer's output 53 | * @f$\ell_i@f$ in the overall Net loss 54 | * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence 55 | * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. 56 | * (*Assuming that this top Blob is not used as a bottom (input) by any 57 | * other layer of the Net.) 58 | * @param propagate_down see Layer::Backward. 59 | * propagate_down[1] must be false as gradient computation with respect 60 | * to the targets is not implemented. 61 | * @param bottom input Blob vector (length 2) 62 | * -# @f$ (N \times C \times H \times W) @f$ 63 | * the predictions @f$x@f$; Backward computes diff 64 | * @f$ \frac{\partial E}{\partial x} = 65 | * \frac{1}{n} \sum\limits_{n=1}^N (\hat{p}_n - p_n) 66 | * @f$ 67 | * -# @f$ (N \times 1 \times 1 \times 1) @f$ 68 | * the labels -- ignored as we can't compute their error gradients 69 | */ 70 | virtual void Backward_cpu(const vector*>& top, 71 | const vector& propagate_down, const vector*>& bottom); 72 | virtual void Backward_gpu(const vector*>& top, 73 | const vector& propagate_down, const vector*>& bottom); 74 | 75 | /// The internal SoftmaxLayer used to map predictions to probabilities. 76 | shared_ptr > softmax_layer_; 77 | /// sigmoid_output stores the output of the SigmoidLayer. 78 | shared_ptr > softmax_output_; 79 | /// bottom vector holder to call the underlying SigmoidLayer::Forward 80 | vector*> softmax_bottom_vec_; 81 | /// top vector holder to call the underlying SigmoidLayer::Forward 82 | vector*> softmax_top_vec_; 83 | }; 84 | 85 | } // namespace caffe 86 | 87 | #endif // CAFFE_SOFTMAX_CROSS_ENTROPY_LOSS_LAYER_HPP_ 88 | -------------------------------------------------------------------------------- /Deep-Learning-Colorization/video_colorize_parallel.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | 5 | import cv2 6 | import numpy as np 7 | from skimage import img_as_float 8 | import skimage.color as color 9 | import scipy.ndimage.interpolation as sni 10 | import caffe 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--filename', type=str, default='*', help='Filename of input BW video') 15 | parser.add_argument('--input_dir', type=str, default='/home/ubuntu/Automatic-Video-Colorization/data/examples/converted/', help='Directory of input files') 16 | parser.add_argument('--output_dir', type=str, default='/home/ubuntu/Automatic-Video-Colorization/data/examples/recolorized/', help='Directory of output files') 17 | parser.add_argument('--gpu', dest='gpu', help='gpu id', type=int, default=0) 18 | parser.add_argument('--prototxt',dest='prototxt',help='prototxt filepath', type=str, default='./models/colorization_deploy_v2.prototxt') 19 | parser.add_argument('--caffemodel',dest='caffemodel',help='caffemodel filepath', type=str, default='./models/colorization_release_v2.caffemodel') 20 | 21 | args = parser.parse_args() 22 | return args 23 | 24 | def image_colorization(frame, args): 25 | 26 | caffe.set_mode_gpu() 27 | caffe.set_device(args.gpu) 28 | 29 | # Select desired model 30 | net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST) 31 | 32 | (H_in,W_in) = net.blobs['data_l'].data.shape[2:] # get input shape 33 | (H_out,W_out) = net.blobs['class8_ab'].data.shape[2:] # get output shape 34 | 35 | pts_in_hull = np.load('./resources/pts_in_hull.npy') # load cluster centers 36 | net.params['class8_ab'][0].data[:,:,0,0] = pts_in_hull.transpose((1,0)) # populate cluster centers as 1x1 convolution kernel 37 | # print 'Annealed-Mean Parameters populated' 38 | 39 | # load the original image 40 | img_rgb = img_as_float(frame).astype(np.float32) 41 | 42 | img_lab = color.rgb2lab(img_rgb) # convert image to lab color space 43 | img_l = img_lab[:,:,0] # pull out L channel 44 | (H_orig,W_orig) = img_rgb.shape[:2] # original image size 45 | 46 | # create grayscale version of image (just for displaying) 47 | img_lab_bw = img_lab.copy() 48 | img_lab_bw[:,:,1:] = 0 49 | img_rgb_bw = color.lab2rgb(img_lab_bw) 50 | 51 | # resize image to network input size 52 | img_rs = caffe.io.resize_image(img_rgb,(H_in,W_in)) # resize image to network input size 53 | img_lab_rs = color.rgb2lab(img_rs) 54 | img_l_rs = img_lab_rs[:,:,0] 55 | 56 | net.blobs['data_l'].data[0,0,:,:] = img_l_rs-50 # subtract 50 for mean-centering 57 | net.forward() # run network 58 | 59 | ab_dec = net.blobs['class8_ab'].data[0,:,:,:].transpose((1,2,0)) # this is our result 60 | ab_dec_us = sni.zoom(ab_dec,(1.*H_orig/H_out,1.*W_orig/W_out,1)) # upsample to match size of original image L 61 | img_lab_out = np.concatenate((img_l[:,:,np.newaxis],ab_dec_us),axis=2) # concatenate with original image L 62 | img_rgb_out = (255*np.clip(color.lab2rgb(img_lab_out),0,1)).astype('uint8') # convert back to rgb 63 | 64 | return img_rgb_out 65 | 66 | def bw2color(args, inputname, inputpath, outputpath): 67 | if inputname.endswith(".mp4"): 68 | 69 | # store informations about the original video 70 | cap = cv2.VideoCapture(inputpath + inputname) 71 | # original dimensions 72 | width, height = int(cap.get(3)), int(cap.get(4)) 73 | 74 | fourcc = cv2.VideoWriter_fourcc(*'mp4v'); 75 | 76 | # parameters of output file 77 | # dimensions of the output image 78 | new_width, new_height = width, height 79 | # number of frames 80 | fps = 30.0 81 | 82 | # recolorized output video 83 | color_out = cv2.VideoWriter( 84 | outputpath + 'color_' + inputname, 85 | fourcc, 86 | fps, 87 | (new_width, new_height), 88 | isColor=True 89 | ) 90 | 91 | while(cap.isOpened()): 92 | ret, frame_in = cap.read() 93 | # check if we are not at the end of the video 94 | if ret==True: 95 | # convert BGR to RGB convention 96 | frame_in = frame_in[:,:,::-1] 97 | # colorize the BW frame 98 | frame_out = image_colorization(frame_in, args) 99 | # convert RGB to BGR convention 100 | frame_out = frame_out[:,:,::-1] 101 | # write the color frame 102 | color_out.write(frame_out) 103 | 104 | if cv2.waitKey(1) & 0xFF == ord('q'): 105 | break 106 | # end of the video 107 | else: 108 | break 109 | 110 | # release everything if job is finished 111 | cap.release() 112 | color_out.release() 113 | 114 | def main(): 115 | args = parse_args() 116 | 117 | if args.filename == '*': 118 | for filename in os.listdir(args.input_dir): 119 | bw2color(args, inputname = filename, inputpath = args.input_dir, outputpath = args.output_dir) 120 | else: 121 | bw2color(args, inputname = args.filename, inputpath = args.input_dir, outputpath = args.output_dir) 122 | 123 | # cleanup 124 | cv2.destroyAllWindows() 125 | 126 | return 0 127 | 128 | if __name__ == '__main__': 129 | main() 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CS230-Final-Project 2 | 3 | ### Converting videos 4 | 5 | 1. Create the data directories 6 | ``` 7 | mkdir data; mkdir data/raw; mkdir data/converted; 8 | ``` 9 | 2. Place videos inside 'data/raw' directory 10 | 3. Run the conversion script 11 | 12 | For all videos inside 'data/raw' directory 13 | ``` 14 | python3 converter.py 15 | ``` 16 | 17 | For one specific video 'filename' 18 | ``` 19 | python3 converter.py --inputname filename 20 | ``` 21 | 22 | To convert all videos in the data/raw folder to a consistent fps and resolution: 23 | ``` 24 | python3 converter.py --fps 30 --out_dim 640 360 25 | ``` 26 | 27 | #### Moments in Time (Mini) Dataset 28 | Download and unzip the dataset 29 | ``` 30 | wget http://data.csail.mit.edu/soundnet/actions3/split1/Moments_in_Time_Mini.zip 31 | unzip Moments_in_Time_Mini.zip -d data/. 32 | ``` 33 | Pre-process the dataset 34 | ``` 35 | ./convert_moment_dataset.sh 36 | ``` 37 | 38 | ## Running the baseline on a specific video 39 | Go into the folder "Deep-Learning-Colorization" 40 | 41 | Run ```./models/fetch_release_models.sh``` to download the model. 42 | 43 | Then run the following command to colorize your video : 44 | ``` 45 | python3 video_colorize_parallel.py --filename --input_dir --output_dir 46 | ``` 47 | 48 | ## Requirements 49 | 50 | ### Dependencies 51 | 52 | You can install Python dependencies using `pip install -r requirements.txt` 53 | 54 | 55 | ### Issues with CUDA 56 | 57 | When running `import tensorflow as tf`, if you encounter the following error: 58 | ``` 59 | ImportError: libcublas.so.9.0: cannot open shared object file: No such file or directory 60 | ``` 61 | 62 | Run the following to create links: 63 | ``` 64 | sudo ln -s /usr/lib/x86_64-linux-gnu/libcublas.so.9.1.85 /usr/lib/x86_64-linux-gnu/libcublas.so.9.0 65 | sudo ln -s /usr/lib/x86_64-linux-gnu/libcusolver.so.9.1.85 /usr/lib/x86_64-linux-gnu/libcusolver.so.9.0 66 | ``` 67 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # custom 104 | _TODO 105 | checkpoints 106 | plots 107 | vcs.xml 108 | .idea 109 | .vscode -------------------------------------------------------------------------------- /Ref-GAN-Colorization/README.md: -------------------------------------------------------------------------------- 1 | # Image Colorization with Generative Adversarial Networks 2 | In this work, we generalize the colorization procedure using a conditional Deep Convolutional Generative Adversarial Network (DCGAN) as as suggested by [Pix2Pix]. The network is trained on the datasets [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) and [Places365](http://places2.csail.mit.edu). Some of the results from Places365 dataset are [shown here.](#places365-results) 3 | 4 | ## Prerequisites 5 | - Linux 6 | - Tensorflow 1.7 7 | - NVIDIA GPU (12G or 24G memory) + CUDA cuDNN 8 | 9 | ## Getting Started 10 | ### Installation 11 | - Clone this repo: 12 | ```bash 13 | git clone https://github.com/ImagingLab/Colorizing-with-GANs.git 14 | cd Colorizing-with-GANs 15 | ``` 16 | - Install Tensorflow and dependencies from https://www.tensorflow.org/install/ 17 | - Install python requirements: 18 | ```bash 19 | pip install -r requirements.txt 20 | ``` 21 | 22 | ### Dataset 23 | - We use [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) and [Places365](http://places2.csail.mit.edu) datasets. To train a model on the full dataset, download datasets from official websites. 24 | After downloading, put then under the `datasets` folder. 25 | 26 | ### Training 27 | - To train the model, run `main.py` script 28 | ```bash 29 | python main.py 30 | ``` 31 | - To train the model on places365 dataset with tuned hyperparameters: 32 | ``` 33 | python train.py \ 34 | --seed 100 \ 35 | --dataset places365 \ 36 | --dataset-path ./dataset/places365 \ 37 | --checkpoints-path ./checkpoints \ 38 | --batch-size 16 \ 39 | --epochs 10 \ 40 | --lr 3e-4 \ 41 | --label-smoothing 1 42 | 43 | ``` 44 | 45 | - To train the model of cifar10 dataset with tuned hyperparameters: 46 | ``` 47 | python train.py \ 48 | --seed 100 \ 49 | --dataset cifar10 \ 50 | --dataset-path ./dataset/cifar10 \ 51 | --checkpoints-path ./checkpoints \ 52 | --batch-size 128 \ 53 | --epochs 200 \ 54 | --lr 3e-4 \ 55 | --lr-decay-steps 1e4 \ 56 | --augment True 57 | 58 | ``` 59 | 60 | ### Evaluate 61 | - Download the pre-trained weights [from here.](https://drive.google.com/open?id=1jTsAUAKrMiHO2gn7s-fFZ_zUSzgKoPyp) and copy them in the `checkpoints` folder. 62 | - To evaluate the model quantitatively on the test-set, run `test-eval.py` script: 63 | ```bash 64 | python test-eval.py 65 | ``` 66 | 67 | ### Turing Test 68 | - Download the pre-trained weights [from here.](https://drive.google.com/open?id=1jTsAUAKrMiHO2gn7s-fFZ_zUSzgKoPyp) and copy them in the `checkpoints` folder. 69 | - To evaluate the model qualitatively using visual Turing test, run `test-turing.py`: 70 | ```bash 71 | python test-turing.py 72 | ``` 73 | 74 | - To apply time-based visual Turing test run (2 seconds decision time): 75 | ```bash 76 | python test-turing.py --test-delay 2 77 | ``` 78 | 79 | 80 | ## Method 81 | 82 | ### Generative Adversarial Network 83 | Both generator and discriminator use CNNs. The generator is trained to minimize the probability that the discriminator makes a correct prediction in generated data, while discriminator is trained to maximize the probability of assigning the correct label. This is presented as a single minimax game problem: 84 |

85 | 86 |

87 | In our model, we have redefined the generator's cost function by maximizing the probability of the discriminator being mistaken, as opposed to minimizing the probability of the discriminator being correct. In addition, the cost function was further modified by adding an L1 based regularizer. This will theoretically preserve the structure of the original images and prevent the generator from assigning arbitrary colors to pixels just to fool the discriminator: 88 |

89 | 90 |

91 | 92 | ### Conditional GAN 93 | In a traditional GAN, the input of the generator is randomly generated noise data z. However, this approach is not applicable to the automatic colorization problem due to the nature of its inputs. The generator must be modified to accept grayscale images as inputs rather than noise. This problem was addressed by using a variant of GAN called [conditional generative adversarial networks](https://arxiv.org/abs/1411.1784). Since no noise is introduced, the input of the generator is treated as zero noise with the grayscale input as a prior: 94 |

95 | 96 |

97 | The discriminator gets colored images from both generator and original data along with the grayscale input as the condition and tries to tell which pair contains the true colored image: 98 |

99 | 100 |

101 | 102 | ### Networks Architecture 103 | The architecture of generator is inspired by [U-Net](https://arxiv.org/abs/1505.04597): The architecture of the model is symmetric, with `n` encoding units and `n` decoding units. The contracting path consists of 4x4 convolution layers with stride 2 for downsampling, each followed by batch normalization and Leaky-ReLU activation function with the slope of 0.2. The number of channels are doubled after each step. Each unit in the expansive path consists of a 4x4 transposed convolutional layer with stride 2 for upsampling, concatenation with the activation map of the mirroring layer in the contracting path, followed by batch normalization and ReLU activation function. The last layer of the network is a 1x1 convolution which is equivalent to cross-channel parametric pooling layer. We use `tanh` function for the last layer. 104 |

105 | 106 |

107 | 108 | For discriminator, we use similar architecture as the baselines contractive path: a series of 4x4 convolutional layers with stride 2 with the number of channels being doubled after each downsampling. All convolution layers are followed by batch normalization, leaky ReLU activation with slope 0.2. After the last layer, a convolution is applied to map to a 1 dimensional output, followed by a sigmoid function to return a probability value of the input being real or fake 109 |

110 | 111 |

112 | 113 | ## Places365 Results 114 | Colorization results with Places365. (a) Grayscale. (b) Original Image. (c) Colorized with GAN. 115 |

116 | 117 |

118 | 119 | ## Citation 120 | If you use this code for your research, please cite our paper Image Colorization with Generative Adversarial Networks: 121 | 122 | ``` 123 | @inproceedings{nazeri2018image, 124 | title={Image Colorization Using Generative Adversarial Networks}, 125 | author={Nazeri, Kamyar and Ng, Eric and Ebrahimi, Mehran}, 126 | booktitle={International Conference on Articulated Motion and Deformable Objects}, 127 | pages={85--94}, 128 | year={2018}, 129 | organization={Springer} 130 | } 131 | ``` 132 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/__init__.py: -------------------------------------------------------------------------------- 1 | from .options import * 2 | from .models import * 3 | from .utils import * 4 | from .dataset import * 5 | from .main import * -------------------------------------------------------------------------------- /Ref-GAN-Colorization/build_dataset.py: -------------------------------------------------------------------------------- 1 | """Split the SIGNS dataset into train/dev/test and resize images to 64x64. 2 | 3 | The SIGNS dataset comes in the following format: 4 | train_signs/ 5 | 0_IMG_5864.jpg 6 | ... 7 | test_signs/ 8 | 0_IMG_5942.jpg 9 | ... 10 | 11 | Original images have size (3024, 3024). 12 | Resizing to (64, 64) reduces the dataset size from 1.16 GB to 4.7 MB, and loading smaller images 13 | makes training faster. 14 | 15 | We already have a test set created, so we only need to split "train_signs" into train and dev sets. 16 | Because we don't have a lot of images and we want that the statistics on the dev set be as 17 | representative as possible, we'll take 20% of "train_signs" as dev set. 18 | """ 19 | 20 | import argparse 21 | import random 22 | import os 23 | 24 | import numpy as np 25 | 26 | from PIL import Image 27 | from tqdm import tqdm 28 | import cv2 29 | 30 | # size of the resized frames 31 | SIZE = 256 32 | 33 | # subfolder of the "Moments_in_Time" dataset to consider 34 | SUBFOLDER = "/baking" 35 | 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument('--data_dir', default='../data/Moments_in_Time_Mini', help="Directory with the Moments in Time dataset") 38 | parser.add_argument('--output_dir', default='../data/momentsintime_ref', help="Where to write the new data") 39 | parser.add_argument('--dt', type=int, default=1, help="Time between consecutives frames") 40 | 41 | 42 | def split_resize_and_save(filename, i, output_dir, dt=1, size=SIZE): 43 | """Split the video clip in pair of consecutive frames (t, t+dt), resize the frames, and save the pairs to the `output_dir`""" 44 | 45 | vidcap = cv2.VideoCapture(filename) 46 | 47 | success, frame = vidcap.read() 48 | # convert BGR to RGB convention 49 | frame = frame[:,:,::-1] 50 | # default : use bilinear interpolation 51 | frame_prev = cv2.resize(frame, (size, size)) 52 | # save the first frame as the "color palette" reference 53 | frame_ref = frame_prev 54 | 55 | # counter to build pairs of consecutive frames 56 | count = 1 57 | 58 | while success: 59 | count += 1 60 | 61 | success, frame = vidcap.read() 62 | 63 | if success: 64 | # convert BGR to RGB convention 65 | frame = frame[:,:,::-1] 66 | # default : use bilinear interpolation 67 | frame = cv2.resize(frame, (size, size)) 68 | else: 69 | break 70 | #print('Read a new frame: ', success) 71 | 72 | if count % (1+dt) == 0: 73 | img = np.concatenate((frame, frame_prev, frame_ref), 2) 74 | frame_prev = frame 75 | np.save(output_dir + "/video{}_frame{}".format(i, count), img) 76 | 77 | if __name__ == '__main__': 78 | args = parser.parse_args() 79 | # Define the output directory 80 | args.output_dir = args.output_dir + "_dt" + str(args.dt) 81 | 82 | assert os.path.isdir(args.data_dir), "Couldn't find the dataset at {}".format(args.data_dir) 83 | 84 | # Define the data directories 85 | train_data_dir = os.path.join(args.data_dir, 'training' + SUBFOLDER) 86 | test_data_dir = os.path.join(args.data_dir, 'validation' + SUBFOLDER) 87 | 88 | # Get the filenames in each directory (train and test) 89 | filenames = os.listdir(train_data_dir) 90 | filenames = [os.path.join(train_data_dir, f) for f in filenames if f.endswith('.mp4')] 91 | 92 | test_filenames = os.listdir(test_data_dir) 93 | test_filenames = [os.path.join(test_data_dir, f) for f in test_filenames if f.endswith('.mp4')] 94 | 95 | # Split the images in 'train_moments' into 80% train and 20% dev 96 | # Make sure to always shuffle with a fixed seed so that the split is reproducible 97 | random.seed(230) 98 | filenames.sort() 99 | random.shuffle(filenames) 100 | 101 | split = int(0.9 * len(filenames)) 102 | train_filenames = filenames[:split] 103 | dev_filenames = filenames[split:] 104 | 105 | filenames = {'train': train_filenames, 106 | 'dev': dev_filenames, 107 | 'test': test_filenames} 108 | 109 | if not os.path.exists(args.output_dir): 110 | os.mkdir(args.output_dir) 111 | else: 112 | print("Warning: output dir {} already exists".format(args.output_dir)) 113 | 114 | # Preprocess train, dev and test 115 | for split in ['train', 'dev', 'test']: 116 | output_dir_split = os.path.join(args.output_dir, '{}_moments'.format(split)) 117 | if not os.path.exists(output_dir_split): 118 | os.mkdir(output_dir_split) 119 | else: 120 | print("Warning: dir {} already exists".format(output_dir_split)) 121 | 122 | print("Processing {} data, saving preprocessed data to {}".format(split, output_dir_split)) 123 | for i, filename in enumerate(tqdm(filenames[split])): 124 | split_resize_and_save(filename, i, output_dir_split, dt=args.dt, size=SIZE) 125 | 126 | print("Done building dataset") 127 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/dataset.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import numpy as np 3 | import tensorflow as tf 4 | from scipy.misc import imread 5 | from abc import abstractmethod 6 | from utils import unpickle 7 | 8 | CIFAR10_DATASET = 'cifar10' 9 | PLACES365_DATASET = 'places365' 10 | MOMENTSINTIME_DATASET = 'momentsintime' 11 | 12 | class BaseDataset(): 13 | def __init__(self, name, path, training=True, augment=True): 14 | self.name = name 15 | self.augment = augment and training 16 | self.training = training 17 | self.path = path 18 | self._data = [] 19 | 20 | def __len__(self): 21 | return len(self.data) 22 | 23 | def __iter__(self): 24 | total = len(self) 25 | start = 0 26 | 27 | while start < total: 28 | item = self[start] 29 | start += 1 30 | yield item 31 | 32 | raise StopIteration 33 | 34 | def __getitem__(self, index): 35 | val = self.data[index] 36 | try: 37 | # OLD : img = imread(val) if isinstance(val, str) else val 38 | img = np.load(val) if isinstance(val, str) else val 39 | 40 | if self.augment and np.random.binomial(1, 0.5) == 1: 41 | img = img[:, ::-1, :] 42 | 43 | except: 44 | img = None 45 | 46 | return img 47 | 48 | def generator(self, batch_size, recursive=False): 49 | start = 0 50 | total = len(self) 51 | 52 | while True: 53 | while start < total: 54 | end = np.min([start + batch_size, total]) 55 | items = [] 56 | 57 | for ix in range(start, end): 58 | item = self[ix] 59 | if item is not None: 60 | items.append(item) 61 | 62 | start = end 63 | yield np.array(items) 64 | 65 | if recursive: 66 | start = 0 67 | 68 | else: 69 | raise StopIteration 70 | 71 | 72 | @property 73 | def data(self): 74 | if len(self._data) == 0: 75 | self._data = self.load() 76 | np.random.shuffle(self._data) 77 | 78 | return self._data 79 | 80 | @abstractmethod 81 | def load(self): 82 | return [] 83 | 84 | 85 | class Cifar10Dataset(BaseDataset): 86 | def __init__(self, path, training=True, augment=True): 87 | super(Cifar10Dataset, self).__init__(CIFAR10_DATASET, path, training, augment) 88 | 89 | def load(self): 90 | data = [] 91 | if self.training: 92 | for i in range(1, 6): 93 | filename = '{}/data_batch_{}'.format(self.path, i) 94 | batch_data = unpickle(filename) 95 | if len(data) > 0: 96 | data = np.vstack((data, batch_data[b'data'])) 97 | else: 98 | data = batch_data[b'data'] 99 | 100 | else: 101 | filename = '{}/test_batch'.format(self.path) 102 | batch_data = unpickle(filename) 103 | data = batch_data[b'data'] 104 | 105 | w = 32 106 | h = 32 107 | s = w * h 108 | data = np.array(data) 109 | data = np.dstack((data[:, :s], data[:, s:2 * s], data[:, 2 * s:])) 110 | data = data.reshape((-1, w, h, 3)) 111 | return data 112 | 113 | 114 | class Places365Dataset(BaseDataset): 115 | def __init__(self, path, training=True, augment=True): 116 | super(Places365Dataset, self).__init__(PLACES365_DATASET, path, training, augment) 117 | 118 | def load(self): 119 | if self.training: 120 | data = np.array( 121 | glob.glob(self.path + '/data_256/**/*.jpg', recursive=True)) 122 | 123 | else: 124 | data = np.array(glob.glob(self.path + '/val_256/*.jpg')) 125 | 126 | return data 127 | 128 | 129 | class MomentsInTimeDataset(BaseDataset): 130 | def __init__(self, path, training=True, augment=True): 131 | super(MomentsInTimeDataset, self).__init__(MOMENTSINTIME_DATASET, path, training, augment) 132 | 133 | def load(self): 134 | if self.training: 135 | #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/train_moments/*")) 136 | data = np.array(glob.glob("." + self.path + "/train_moments/*")) 137 | else: 138 | #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/dev_moments/*")) 139 | data = np.array(glob.glob("." + self.path + "/dev_moments/*")) 140 | 141 | return data 142 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import numpy as np 4 | import tensorflow as tf 5 | from options import ModelOptions 6 | from models import Cifar10Model, Places365Model, MomentsInTimeModel 7 | from dataset import CIFAR10_DATASET, PLACES365_DATASET, MOMENTSINTIME_DATASET 8 | 9 | 10 | def main(options): 11 | 12 | # reset tensorflow graph 13 | tf.reset_default_graph() 14 | 15 | # initialize random seed 16 | tf.set_random_seed(options.seed) 17 | np.random.seed(options.seed) 18 | random.seed(options.seed) 19 | 20 | # create a session environment 21 | with tf.Session() as sess: 22 | 23 | if options.dataset == CIFAR10_DATASET: 24 | model = Cifar10Model(sess, options) 25 | 26 | elif options.dataset == PLACES365_DATASET: 27 | model = Places365Model(sess, options) 28 | 29 | elif options.dataset == MOMENTSINTIME_DATASET: 30 | model = MomentsInTimeModel(sess, options) 31 | 32 | else: 33 | model = MomentsInTimeModel(sess, options) 34 | 35 | if not os.path.exists(options.checkpoints_path): 36 | os.makedirs(options.checkpoints_path) 37 | 38 | if options.log: 39 | open(model.train_log_file, 'w').close() 40 | open(model.test_log_file, 'w').close() 41 | 42 | # build the model and initialize 43 | model.build() 44 | sess.run(tf.global_variables_initializer()) 45 | 46 | 47 | # load model only after global variables initialization 48 | model.load() 49 | 50 | 51 | if options.mode == 0: 52 | args = vars(options) 53 | print('\n------------ Options -------------') 54 | with open(os.path.join(options.checkpoints_path, 'options.dat'), 'w') as f: 55 | for k, v in sorted(args.items()): 56 | print('%s: %s' % (str(k), str(v))) 57 | f.write('%s: %s\n' % (str(k), str(v))) 58 | print('-------------- End ----------------\n') 59 | 60 | model.train() 61 | 62 | elif options.mode == 1: 63 | model.evaluate() 64 | while True: 65 | model.sample() 66 | 67 | else: 68 | model.turing_test() 69 | 70 | 71 | if __name__ == "__main__": 72 | main(ModelOptions().parse()) 73 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/networks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from ops import conv2d, conv2d_transpose, pixelwise_accuracy 4 | 5 | 6 | class Discriminator(object): 7 | def __init__(self, name, kernels): 8 | self.name = name 9 | self.kernels = kernels 10 | self.var_list = [] 11 | 12 | def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None): 13 | output = inputs 14 | with tf.variable_scope(self.name, reuse=reuse_variables): 15 | for index, kernel in enumerate(self.kernels): 16 | 17 | # not use batch-norm in the first layer 18 | bnorm = False if index == 0 else True 19 | name = 'conv' + str(index) 20 | output = conv2d( 21 | inputs=output, 22 | name=name, 23 | kernel_size=kernel_size, 24 | filters=kernel[0], 25 | strides=kernel[1], 26 | bnorm=bnorm, 27 | activation=tf.nn.leaky_relu, 28 | seed=seed 29 | ) 30 | 31 | if kernel[2] > 0: 32 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed) 33 | 34 | output = conv2d( 35 | inputs=output, 36 | name='conv_last', 37 | filters=1, 38 | kernel_size=4, # last layer kernel size = 4 39 | strides=1, # last layer stride = 1 40 | bnorm=False, # do not use batch-norm for the last layer 41 | seed=seed 42 | ) 43 | 44 | self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name) 45 | 46 | return output 47 | 48 | 49 | class Generator(object): 50 | def __init__(self, name, encoder_kernels, decoder_kernels, output_channels=3): 51 | self.name = name 52 | self.encoder_kernels = encoder_kernels 53 | self.decoder_kernels = decoder_kernels 54 | self.output_channels = output_channels 55 | self.var_list = [] 56 | 57 | def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None): 58 | output = inputs 59 | 60 | with tf.variable_scope(self.name, reuse=reuse_variables): 61 | 62 | layers = [] 63 | 64 | # encoder branch 65 | for index, kernel in enumerate(self.encoder_kernels): 66 | 67 | name = 'conv' + str(index) 68 | output = conv2d( 69 | inputs=output, 70 | name=name, 71 | kernel_size=kernel_size, 72 | filters=kernel[0], 73 | strides=kernel[1], 74 | activation=tf.nn.leaky_relu, 75 | seed=seed 76 | ) 77 | 78 | # save contracting path layers to be used for skip connections 79 | layers.append(output) 80 | 81 | if kernel[2] > 0: 82 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed) 83 | 84 | # decoder branch 85 | for index, kernel in enumerate(self.decoder_kernels): 86 | 87 | name = 'deconv' + str(index) 88 | output = conv2d_transpose( 89 | inputs=output, 90 | name=name, 91 | kernel_size=kernel_size, 92 | filters=kernel[0], 93 | strides=kernel[1], 94 | activation=tf.nn.relu, 95 | seed=seed 96 | ) 97 | 98 | if kernel[2] > 0: 99 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed) 100 | 101 | # concat the layer from the contracting path with the output of the current layer 102 | # concat only the channels (axis=3) 103 | output = tf.concat([layers[len(layers) - index - 2], output], axis=3) 104 | 105 | output = conv2d( 106 | inputs=output, 107 | name='conv_last', 108 | filters=self.output_channels, # number of output chanels 109 | kernel_size=1, # last layer kernel size = 1 110 | strides=1, # last layer stride = 1 111 | bnorm=False, # do not use batch-norm for the last layer 112 | activation=tf.nn.tanh, # tanh activation function for the output 113 | seed=seed 114 | ) 115 | 116 | self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name) 117 | 118 | return output 119 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/ops.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | COLORSPACE_RGB = 'RGB' 5 | COLORSPACE_LAB = 'LAB' 6 | tf.nn.softmax_cross_entropy_with_logits_v2 7 | 8 | def conv2d(inputs, filters, name, kernel_size=4, strides=2, bnorm=True, activation=None, seed=None): 9 | """ 10 | Creates a conv2D block 11 | """ 12 | initializer=tf.variance_scaling_initializer(seed=seed) 13 | res = tf.layers.conv2d( 14 | name=name, 15 | inputs=inputs, 16 | filters=filters, 17 | kernel_size=kernel_size, 18 | strides=strides, 19 | padding="same", 20 | kernel_initializer=initializer) 21 | 22 | if bnorm: 23 | res = tf.layers.batch_normalization(inputs=res, name='bn_' + name, training=True) 24 | 25 | # activation after batch-norm 26 | if activation is not None: 27 | res = activation(res) 28 | 29 | return res 30 | 31 | 32 | def conv2d_transpose(inputs, filters, name, kernel_size=4, strides=2, bnorm=True, activation=None, seed=None): 33 | """ 34 | Creates a conv2D-transpose block 35 | """ 36 | initializer=tf.variance_scaling_initializer(seed=seed) 37 | res = tf.layers.conv2d_transpose( 38 | name=name, 39 | inputs=inputs, 40 | filters=filters, 41 | kernel_size=kernel_size, 42 | strides=strides, 43 | padding="same", 44 | kernel_initializer=initializer) 45 | 46 | if bnorm: 47 | res = tf.layers.batch_normalization(inputs=res, name='bn_' + name, training=True) 48 | 49 | # activation after batch-norm 50 | if activation is not None: 51 | res = activation(res) 52 | 53 | return res 54 | 55 | 56 | def pixelwise_accuracy(img_real, img_fake, colorspace, thresh): 57 | """ 58 | Measures the accuracy of the colorization process by comparing pixels 59 | """ 60 | img_real = postprocess(img_real, colorspace, COLORSPACE_LAB) 61 | img_fake = postprocess(img_fake, colorspace, COLORSPACE_LAB) 62 | 63 | diffL = tf.abs(tf.round(img_real[..., 0]) - tf.round(img_fake[..., 0])) 64 | diffA = tf.abs(tf.round(img_real[..., 1]) - tf.round(img_fake[..., 1])) 65 | diffB = tf.abs(tf.round(img_real[..., 2]) - tf.round(img_fake[..., 2])) 66 | 67 | # within %thresh of the original 68 | predL = tf.cast(tf.less_equal(diffL, 1 * thresh), tf.float64) # L: [0, 100] 69 | predA = tf.cast(tf.less_equal(diffA, 2.2 * thresh), tf.float64) # A: [-110, 110] 70 | predB = tf.cast(tf.less_equal(diffB, 2.2 * thresh), tf.float64) # B: [-110, 110] 71 | 72 | # all three channels are within the threshold 73 | pred = predL * predA * predB 74 | 75 | return tf.reduce_mean(pred) 76 | 77 | 78 | def preprocess(img, colorspace_in, colorspace_out): 79 | if colorspace_out.upper() == COLORSPACE_RGB: 80 | if colorspace_in == COLORSPACE_LAB: 81 | img = lab_to_rgb(img) 82 | 83 | # [0, 1] => [-1, 1] 84 | img = (img / 255.0) * 2 - 1 85 | 86 | elif colorspace_out.upper() == COLORSPACE_LAB: 87 | if colorspace_in == COLORSPACE_RGB: 88 | img = rgb_to_lab(img / 255.0) 89 | 90 | L_chan, a_chan, b_chan = tf.unstack(img, axis=3) 91 | 92 | # L: [0, 100] => [-1, 1] 93 | # A, B: [-110, 110] => [-1, 1] 94 | img = tf.stack([L_chan / 50 - 1, a_chan / 110, b_chan / 110], axis=3) 95 | 96 | return img 97 | 98 | 99 | def postprocess(img, colorspace_in, colorspace_out): 100 | if colorspace_in.upper() == COLORSPACE_RGB: 101 | # [-1, 1] => [0, 1] 102 | img = (img + 1) / 2 103 | 104 | if colorspace_out == COLORSPACE_LAB: 105 | img = rgb_to_lab(img) 106 | 107 | elif colorspace_in.upper() == COLORSPACE_LAB: 108 | L_chan, a_chan, b_chan = tf.unstack(img, axis=3) 109 | 110 | # L: [-1, 1] => [0, 100] 111 | # A, B: [-1, 1] => [-110, 110] 112 | img = tf.stack([(L_chan + 1) / 2 * 100, a_chan * 110, b_chan * 110], axis=3) 113 | 114 | if colorspace_out == COLORSPACE_RGB: 115 | img = lab_to_rgb(img) 116 | 117 | return img 118 | 119 | 120 | def rgb_to_lab(srgb): 121 | # based on https://github.com/torch/image/blob/9f65c30167b2048ecbe8b7befdc6b2d6d12baee9/generic/image.c 122 | with tf.name_scope("rgb_to_lab"): 123 | srgb_pixels = tf.reshape(srgb, [-1, 3]) 124 | 125 | with tf.name_scope("srgb_to_xyz"): 126 | linear_mask = tf.cast(srgb_pixels <= 0.04045, dtype=tf.float32) 127 | exponential_mask = tf.cast(srgb_pixels > 0.04045, dtype=tf.float32) 128 | rgb_pixels = (srgb_pixels / 12.92 * linear_mask) + (((srgb_pixels + 0.055) / 1.055) ** 2.4) * exponential_mask 129 | rgb_to_xyz = tf.constant([ 130 | # X Y Z 131 | [0.412453, 0.212671, 0.019334], # R 132 | [0.357580, 0.715160, 0.119193], # G 133 | [0.180423, 0.072169, 0.950227], # B 134 | ]) 135 | xyz_pixels = tf.matmul(rgb_pixels, rgb_to_xyz) 136 | 137 | # https://en.wikipedia.org/wiki/Lab_color_space#CIELAB-CIEXYZ_conversions 138 | with tf.name_scope("xyz_to_cielab"): 139 | 140 | # normalize for D65 white point 141 | xyz_normalized_pixels = tf.multiply(xyz_pixels, [1 / 0.950456, 1.0, 1 / 1.088754]) 142 | 143 | epsilon = 6 / 29 144 | linear_mask = tf.cast(xyz_normalized_pixels <= (epsilon**3), dtype=tf.float32) 145 | exponential_mask = tf.cast(xyz_normalized_pixels > (epsilon**3), dtype=tf.float32) 146 | fxfyfz_pixels = (xyz_normalized_pixels / (3 * epsilon**2) + 4 / 29) * linear_mask + (xyz_normalized_pixels ** (1 / 3)) * exponential_mask 147 | 148 | # convert to lab 149 | fxfyfz_to_lab = tf.constant([ 150 | # l a b 151 | [0.0, 500.0, 0.0], # fx 152 | [116.0, -500.0, 200.0], # fy 153 | [0.0, 0.0, -200.0], # fz 154 | ]) 155 | lab_pixels = tf.matmul(fxfyfz_pixels, fxfyfz_to_lab) + tf.constant([-16.0, 0.0, 0.0]) 156 | 157 | return tf.reshape(lab_pixels, tf.shape(srgb)) 158 | 159 | 160 | def lab_to_rgb(lab): 161 | with tf.name_scope("lab_to_rgb"): 162 | lab_pixels = tf.reshape(lab, [-1, 3]) 163 | 164 | # https://en.wikipedia.org/wiki/Lab_color_space#CIELAB-CIEXYZ_conversions 165 | with tf.name_scope("cielab_to_xyz"): 166 | # convert to fxfyfz 167 | lab_to_fxfyfz = tf.constant([ 168 | # fx fy fz 169 | [1 / 116.0, 1 / 116.0, 1 / 116.0], # l 170 | [1 / 500.0, 0.0, 0.0], # a 171 | [0.0, 0.0, -1 / 200.0], # b 172 | ]) 173 | fxfyfz_pixels = tf.matmul(lab_pixels + tf.constant([16.0, 0.0, 0.0]), lab_to_fxfyfz) 174 | 175 | # convert to xyz 176 | epsilon = 6 / 29 177 | linear_mask = tf.cast(fxfyfz_pixels <= epsilon, dtype=tf.float32) 178 | exponential_mask = tf.cast(fxfyfz_pixels > epsilon, dtype=tf.float32) 179 | xyz_pixels = (3 * epsilon**2 * (fxfyfz_pixels - 4 / 29)) * linear_mask + (fxfyfz_pixels ** 3) * exponential_mask 180 | 181 | # denormalize for D65 white point 182 | xyz_pixels = tf.multiply(xyz_pixels, [0.950456, 1.0, 1.088754]) 183 | 184 | with tf.name_scope("xyz_to_srgb"): 185 | xyz_to_rgb = tf.constant([ 186 | # r g b 187 | [3.2404542, -0.9692660, 0.0556434], # x 188 | [-1.5371385, 1.8760108, -0.2040259], # y 189 | [-0.4985314, 0.0415560, 1.0572252], # z 190 | ]) 191 | rgb_pixels = tf.matmul(xyz_pixels, xyz_to_rgb) 192 | # avoid a slightly negative number messing up the conversion 193 | rgb_pixels = tf.clip_by_value(rgb_pixels, 0.0, 1.0) 194 | linear_mask = tf.cast(rgb_pixels <= 0.0031308, dtype=tf.float32) 195 | exponential_mask = tf.cast(rgb_pixels > 0.0031308, dtype=tf.float32) 196 | srgb_pixels = (rgb_pixels * 12.92 * linear_mask) + ((rgb_pixels ** (1 / 2.4) * 1.055) - 0.055) * exponential_mask 197 | 198 | return tf.reshape(srgb_pixels, tf.shape(lab)) 199 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/options.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import random 4 | import argparse 5 | 6 | 7 | def str2bool(v): 8 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 9 | return True 10 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 11 | return False 12 | else: 13 | raise argparse.ArgumentTypeError('Boolean value expected.') 14 | 15 | 16 | class ModelOptions: 17 | def __init__(self): 18 | parser = argparse.ArgumentParser(description='Colorization with GANs') 19 | parser.add_argument('--seed', type=int, default=0, metavar='S', help='random seed (default: 0)') 20 | parser.add_argument('--name', type=str, default='CGAN', help='arbitrary model name (default: CGAN)') 21 | parser.add_argument('--mode', default=0, help='run mode [0: train, 1: evaluate, 2: test] (default: 0)') 22 | parser.add_argument('--dataset', type=str, default='momentsintime', help='the name of dataset [places365, cifar10] (default: momentsintime)') 23 | parser.add_argument('--dataset-path', type=str, default='./data', help='dataset path (default: ./data)') 24 | parser.add_argument('--checkpoints-path', type=str, default='./checkpoints', help='models are saved here (default: ./checkpoints)') 25 | parser.add_argument('--batch-size', type=int, default=16, metavar='N', help='input batch size for training (default: 16)') 26 | parser.add_argument('--color-space', type=str, default='lab', help='model color space [lab, rgb] (default: lab)') 27 | parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 30)') 28 | parser.add_argument('--lr', type=float, default=5e-3, metavar='LR', help='learning rate (default: 3e-4)') 29 | parser.add_argument('--lr-decay-rate', type=float, default=0.1, help='learning rate exponentially decay rate (default: 0.1)') 30 | parser.add_argument('--lr-decay-steps', type=float, default=25e2, help='learning rate exponentially decay steps (default: 5e5)') 31 | parser.add_argument('--beta1', type=float, default=0, help='momentum term of adam optimizer (default: 0)') 32 | parser.add_argument("--l1-weight", type=float, default=100.0, help="weight on L1 term for generator gradient (default: 100.0)") 33 | parser.add_argument('--augment', type=str2bool, default=True, help='True for augmentation (default: True)') 34 | parser.add_argument('--label-smoothing', type=str2bool, default=False, help='True for one-sided label smoothing (default: False)') 35 | parser.add_argument('--acc-thresh', type=float, default=2.0, help="accuracy threshold (default: 2.0)") 36 | parser.add_argument('--kernel-size', type=int, default=4, help="default kernel size (default: 4)") 37 | parser.add_argument('--save', type=str2bool, default=True, help='True for saving (default: True)') 38 | parser.add_argument('--save-interval', type=int, default=100, help='how many batches to wait before saving model (default: 1000)') 39 | parser.add_argument('--sample', type=str2bool, default=True, help='True for sampling (default: True)') 40 | parser.add_argument('--sample-size', type=int, default=8, help='number of images to sample (default: 8)') 41 | parser.add_argument('--sample-interval', type=int, default=100, help='how many batches to wait before sampling (default: 1000)') 42 | parser.add_argument('--validate', type=str2bool, default=True, help='True for validation (default: True)') 43 | parser.add_argument('--validate-interval', type=int, default=0, help='how many batches to wait before validating (default: 0)') 44 | parser.add_argument('--log', type=str2bool, default=True, help='True for logging (default: True)') 45 | parser.add_argument('--log-interval', type=int, default=10, help='how many iterations to wait before logging training status (default: 10)') 46 | parser.add_argument('--visualize', type=str2bool, default=False, help='True for accuracy visualization (default: False)') 47 | parser.add_argument('--visualize-window', type=int, default=100, help='the exponentially moving average window width (default: 100)') 48 | parser.add_argument('--test-size', type=int, default=100, metavar='N', help='number of Turing tests (default: 100)') 49 | parser.add_argument('--test-delay', type=int, default=0, metavar='N', help='number of seconds to wait when doing Turing test, 0 for unlimited (default: 0)') 50 | parser.add_argument('--gpu-ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') 51 | # to recolorize a video clip 52 | parser.add_argument('--filename', type=str, default='*', help='Filename of input BW video') 53 | parser.add_argument('--input_dir', type=str, default='../data/examples/converted', help='Directory of input files') 54 | parser.add_argument('--output_dir', type=str, default='../data/examples/recolorized', help='Directory of output files') 55 | 56 | self._parser = parser 57 | 58 | def parse(self): 59 | opt = self._parser.parse_args() 60 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_ids 61 | 62 | opt.color_space = opt.color_space.upper() 63 | 64 | if opt.seed == 0: 65 | opt.seed = random.randint(0, 2**31 - 1) 66 | 67 | if (opt.dataset_path == './data') or (opt.dataset_path == './dataset'): 68 | opt.dataset_path += ('/' + opt.dataset) 69 | 70 | if opt.checkpoints_path == './checkpoints': 71 | opt.checkpoints_path += ('/' + opt.dataset) 72 | 73 | return opt 74 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy ~= 1.14.3 2 | scipy ~= 1.0.1 3 | future ~= 0.16.0 4 | matplotlib ~= 2.2.2 5 | pillow ~= 5.0.0 6 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/setup.cfg: -------------------------------------------------------------------------------- 1 | [pycodestyle] 2 | ignore = E303 3 | max-line-length = 200 -------------------------------------------------------------------------------- /Ref-GAN-Colorization/src/__init__.py: -------------------------------------------------------------------------------- 1 | from .options import * 2 | from .models import * 3 | from .utils import * 4 | from .dataset import * 5 | from .main import * -------------------------------------------------------------------------------- /Ref-GAN-Colorization/src/build_dataset.py: -------------------------------------------------------------------------------- 1 | """Split the SIGNS dataset into train/dev/test and resize images to 64x64. 2 | 3 | The SIGNS dataset comes in the following format: 4 | train_signs/ 5 | 0_IMG_5864.jpg 6 | ... 7 | test_signs/ 8 | 0_IMG_5942.jpg 9 | ... 10 | 11 | Original images have size (3024, 3024). 12 | Resizing to (64, 64) reduces the dataset size from 1.16 GB to 4.7 MB, and loading smaller images 13 | makes training faster. 14 | 15 | We already have a test set created, so we only need to split "train_signs" into train and dev sets. 16 | Because we don't have a lot of images and we want that the statistics on the dev set be as 17 | representative as possible, we'll take 20% of "train_signs" as dev set. 18 | """ 19 | 20 | import argparse 21 | import random 22 | import os 23 | 24 | import numpy as np 25 | 26 | from PIL import Image 27 | from tqdm import tqdm 28 | import cv2 29 | 30 | # size of the resized frames 31 | SIZE = 256 32 | 33 | # subfolder of the "Moments_in_Time" dataset to consider 34 | SUBFOLDER = "/baking" 35 | 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument('--data_dir', default='../data/Moments_in_Time_Mini', help="Directory with the Moments in Time dataset") 38 | parser.add_argument('--output_dir', default='../data/momentsintime_ref', help="Where to write the new data") 39 | parser.add_argument('--dt', type=int, default=1, help="Time between consecutives frames") 40 | 41 | 42 | def split_resize_and_save(filename, i, output_dir, dt=1, size=SIZE): 43 | """Split the video clip in pair of consecutive frames (t, t+dt), resize the frames, and save the pairs to the `output_dir`""" 44 | 45 | vidcap = cv2.VideoCapture(filename) 46 | 47 | success, frame = vidcap.read() 48 | # convert BGR to RGB convention 49 | frame = frame[:,:,::-1] 50 | # default : use bilinear interpolation 51 | frame_prev = cv2.resize(frame, (size, size)) 52 | # save the first frame as the "color palette" reference 53 | frame_ref = frame_prev 54 | 55 | # counter to build pairs of consecutive frames 56 | count = 1 57 | 58 | while success: 59 | count += 1 60 | 61 | success, frame = vidcap.read() 62 | 63 | if success: 64 | # convert BGR to RGB convention 65 | frame = frame[:,:,::-1] 66 | # default : use bilinear interpolation 67 | frame = cv2.resize(frame, (size, size)) 68 | else: 69 | break 70 | #print('Read a new frame: ', success) 71 | 72 | if count % (1+dt) == 0: 73 | img = np.concatenate((frame, frame_prev, frame_ref), 2) 74 | frame_prev = frame 75 | np.save(output_dir + "/video{}_frame{}".format(i, count), img) 76 | 77 | if __name__ == '__main__': 78 | args = parser.parse_args() 79 | # Define the output directory 80 | args.output_dir = args.output_dir + "_dt" + str(args.dt) 81 | 82 | assert os.path.isdir(args.data_dir), "Couldn't find the dataset at {}".format(args.data_dir) 83 | 84 | # Define the data directories 85 | train_data_dir = os.path.join(args.data_dir, 'training' + SUBFOLDER) 86 | test_data_dir = os.path.join(args.data_dir, 'validation' + SUBFOLDER) 87 | 88 | # Get the filenames in each directory (train and test) 89 | filenames = os.listdir(train_data_dir) 90 | filenames = [os.path.join(train_data_dir, f) for f in filenames if f.endswith('.mp4')] 91 | 92 | test_filenames = os.listdir(test_data_dir) 93 | test_filenames = [os.path.join(test_data_dir, f) for f in test_filenames if f.endswith('.mp4')] 94 | 95 | # Split the images in 'train_moments' into 80% train and 20% dev 96 | # Make sure to always shuffle with a fixed seed so that the split is reproducible 97 | random.seed(230) 98 | filenames.sort() 99 | random.shuffle(filenames) 100 | 101 | split = int(0.9 * len(filenames)) 102 | train_filenames = filenames[:split] 103 | dev_filenames = filenames[split:] 104 | 105 | filenames = {'train': train_filenames, 106 | 'dev': dev_filenames, 107 | 'test': test_filenames} 108 | 109 | if not os.path.exists(args.output_dir): 110 | os.mkdir(args.output_dir) 111 | else: 112 | print("Warning: output dir {} already exists".format(args.output_dir)) 113 | 114 | # Preprocess train, dev and test 115 | for split in ['train', 'dev', 'test']: 116 | output_dir_split = os.path.join(args.output_dir, '{}_moments'.format(split)) 117 | if not os.path.exists(output_dir_split): 118 | os.mkdir(output_dir_split) 119 | else: 120 | print("Warning: dir {} already exists".format(output_dir_split)) 121 | 122 | print("Processing {} data, saving preprocessed data to {}".format(split, output_dir_split)) 123 | for i, filename in enumerate(tqdm(filenames[split])): 124 | split_resize_and_save(filename, i, output_dir_split, dt=args.dt, size=SIZE) 125 | 126 | print("Done building dataset") 127 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/src/dataset.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import numpy as np 3 | import tensorflow as tf 4 | from scipy.misc import imread 5 | from abc import abstractmethod 6 | from utils import unpickle 7 | 8 | CIFAR10_DATASET = 'cifar10' 9 | PLACES365_DATASET = 'places365' 10 | MOMENTSINTIME_DATASET = 'momentsintime' 11 | 12 | class BaseDataset(): 13 | def __init__(self, name, path, training=True, augment=True): 14 | self.name = name 15 | self.augment = augment and training 16 | self.training = training 17 | self.path = path 18 | self._data = [] 19 | 20 | def __len__(self): 21 | return len(self.data) 22 | 23 | def __iter__(self): 24 | total = len(self) 25 | start = 0 26 | 27 | while start < total: 28 | item = self[start] 29 | start += 1 30 | yield item 31 | 32 | raise StopIteration 33 | 34 | def __getitem__(self, index): 35 | val = self.data[index] 36 | try: 37 | # OLD : img = imread(val) if isinstance(val, str) else val 38 | img = np.load(val) if isinstance(val, str) else val 39 | 40 | if self.augment and np.random.binomial(1, 0.5) == 1: 41 | img = img[:, ::-1, :] 42 | 43 | except: 44 | img = None 45 | 46 | return img 47 | 48 | def generator(self, batch_size, recursive=False): 49 | start = 0 50 | total = len(self) 51 | 52 | while True: 53 | while start < total: 54 | end = np.min([start + batch_size, total]) 55 | items = [] 56 | 57 | for ix in range(start, end): 58 | item = self[ix] 59 | if item is not None: 60 | items.append(item) 61 | 62 | start = end 63 | yield np.array(items) 64 | 65 | if recursive: 66 | start = 0 67 | 68 | else: 69 | raise StopIteration 70 | 71 | 72 | @property 73 | def data(self): 74 | if len(self._data) == 0: 75 | self._data = self.load() 76 | np.random.shuffle(self._data) 77 | 78 | return self._data 79 | 80 | @abstractmethod 81 | def load(self): 82 | return [] 83 | 84 | 85 | class Cifar10Dataset(BaseDataset): 86 | def __init__(self, path, training=True, augment=True): 87 | super(Cifar10Dataset, self).__init__(CIFAR10_DATASET, path, training, augment) 88 | 89 | def load(self): 90 | data = [] 91 | if self.training: 92 | for i in range(1, 6): 93 | filename = '{}/data_batch_{}'.format(self.path, i) 94 | batch_data = unpickle(filename) 95 | if len(data) > 0: 96 | data = np.vstack((data, batch_data[b'data'])) 97 | else: 98 | data = batch_data[b'data'] 99 | 100 | else: 101 | filename = '{}/test_batch'.format(self.path) 102 | batch_data = unpickle(filename) 103 | data = batch_data[b'data'] 104 | 105 | w = 32 106 | h = 32 107 | s = w * h 108 | data = np.array(data) 109 | data = np.dstack((data[:, :s], data[:, s:2 * s], data[:, 2 * s:])) 110 | data = data.reshape((-1, w, h, 3)) 111 | return data 112 | 113 | 114 | class Places365Dataset(BaseDataset): 115 | def __init__(self, path, training=True, augment=True): 116 | super(Places365Dataset, self).__init__(PLACES365_DATASET, path, training, augment) 117 | 118 | def load(self): 119 | if self.training: 120 | data = np.array( 121 | glob.glob(self.path + '/data_256/**/*.jpg', recursive=True)) 122 | 123 | else: 124 | data = np.array(glob.glob(self.path + '/val_256/*.jpg')) 125 | 126 | return data 127 | 128 | 129 | class MomentsInTimeDataset(BaseDataset): 130 | def __init__(self, path, training=True, augment=True): 131 | super(MomentsInTimeDataset, self).__init__(MOMENTSINTIME_DATASET, path, training, augment) 132 | 133 | def load(self): 134 | if self.training: 135 | #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/train_moments/*")) 136 | data = np.array(glob.glob("." + self.path + "/train_moments/*")) 137 | else: 138 | #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/dev_moments/*")) 139 | data = np.array(glob.glob("." + self.path + "/dev_moments/*")) 140 | 141 | return data 142 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/src/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import numpy as np 4 | import tensorflow as tf 5 | from options import ModelOptions 6 | from models import Cifar10Model, Places365Model, MomentsInTimeModel 7 | from dataset import CIFAR10_DATASET, PLACES365_DATASET, MOMENTSINTIME_DATASET 8 | 9 | 10 | def main(options): 11 | 12 | # reset tensorflow graph 13 | tf.reset_default_graph() 14 | 15 | # initialize random seed 16 | tf.set_random_seed(options.seed) 17 | np.random.seed(options.seed) 18 | random.seed(options.seed) 19 | 20 | # create a session environment 21 | with tf.Session() as sess: 22 | 23 | if options.dataset == CIFAR10_DATASET: 24 | model = Cifar10Model(sess, options) 25 | 26 | elif options.dataset == PLACES365_DATASET: 27 | model = Places365Model(sess, options) 28 | 29 | elif options.dataset == MOMENTSINTIME_DATASET: 30 | model = MomentsInTimeModel(sess, options) 31 | 32 | else: 33 | model = MomentsInTimeModel(sess, options) 34 | 35 | if not os.path.exists(options.checkpoints_path): 36 | os.makedirs(options.checkpoints_path) 37 | 38 | if options.log: 39 | open(model.train_log_file, 'w').close() 40 | open(model.test_log_file, 'w').close() 41 | 42 | # build the model and initialize 43 | model.build() 44 | sess.run(tf.global_variables_initializer()) 45 | 46 | 47 | # load model only after global variables initialization 48 | model.load() 49 | 50 | 51 | if options.mode == 0: 52 | args = vars(options) 53 | print('\n------------ Options -------------') 54 | with open(os.path.join(options.checkpoints_path, 'options.dat'), 'w') as f: 55 | for k, v in sorted(args.items()): 56 | print('%s: %s' % (str(k), str(v))) 57 | f.write('%s: %s\n' % (str(k), str(v))) 58 | print('-------------- End ----------------\n') 59 | 60 | model.train() 61 | 62 | elif options.mode == 1: 63 | model.evaluate() 64 | while True: 65 | model.sample() 66 | 67 | else: 68 | model.turing_test() 69 | 70 | 71 | if __name__ == "__main__": 72 | main(ModelOptions().parse()) 73 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/src/networks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from ops import conv2d, conv2d_transpose, pixelwise_accuracy 4 | 5 | 6 | class Discriminator(object): 7 | def __init__(self, name, kernels): 8 | self.name = name 9 | self.kernels = kernels 10 | self.var_list = [] 11 | 12 | def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None): 13 | output = inputs 14 | with tf.variable_scope(self.name, reuse=reuse_variables): 15 | for index, kernel in enumerate(self.kernels): 16 | 17 | # not use batch-norm in the first layer 18 | bnorm = False if index == 0 else True 19 | name = 'conv' + str(index) 20 | output = conv2d( 21 | inputs=output, 22 | name=name, 23 | kernel_size=kernel_size, 24 | filters=kernel[0], 25 | strides=kernel[1], 26 | bnorm=bnorm, 27 | activation=tf.nn.leaky_relu, 28 | seed=seed 29 | ) 30 | 31 | if kernel[2] > 0: 32 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed) 33 | 34 | output = conv2d( 35 | inputs=output, 36 | name='conv_last', 37 | filters=1, 38 | kernel_size=4, # last layer kernel size = 4 39 | strides=1, # last layer stride = 1 40 | bnorm=False, # do not use batch-norm for the last layer 41 | seed=seed 42 | ) 43 | 44 | self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name) 45 | 46 | return output 47 | 48 | 49 | class Generator(object): 50 | def __init__(self, name, encoder_kernels, decoder_kernels, output_channels=3): 51 | self.name = name 52 | self.encoder_kernels = encoder_kernels 53 | self.decoder_kernels = decoder_kernels 54 | self.output_channels = output_channels 55 | self.var_list = [] 56 | 57 | def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None): 58 | output = inputs 59 | 60 | with tf.variable_scope(self.name, reuse=reuse_variables): 61 | 62 | layers = [] 63 | 64 | # encoder branch 65 | for index, kernel in enumerate(self.encoder_kernels): 66 | 67 | name = 'conv' + str(index) 68 | output = conv2d( 69 | inputs=output, 70 | name=name, 71 | kernel_size=kernel_size, 72 | filters=kernel[0], 73 | strides=kernel[1], 74 | activation=tf.nn.leaky_relu, 75 | seed=seed 76 | ) 77 | 78 | # save contracting path layers to be used for skip connections 79 | layers.append(output) 80 | 81 | if kernel[2] > 0: 82 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed) 83 | 84 | # decoder branch 85 | for index, kernel in enumerate(self.decoder_kernels): 86 | 87 | name = 'deconv' + str(index) 88 | output = conv2d_transpose( 89 | inputs=output, 90 | name=name, 91 | kernel_size=kernel_size, 92 | filters=kernel[0], 93 | strides=kernel[1], 94 | activation=tf.nn.relu, 95 | seed=seed 96 | ) 97 | 98 | if kernel[2] > 0: 99 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed) 100 | 101 | # concat the layer from the contracting path with the output of the current layer 102 | # concat only the channels (axis=3) 103 | output = tf.concat([layers[len(layers) - index - 2], output], axis=3) 104 | 105 | output = conv2d( 106 | inputs=output, 107 | name='conv_last', 108 | filters=self.output_channels, # number of output chanels 109 | kernel_size=1, # last layer kernel size = 1 110 | strides=1, # last layer stride = 1 111 | bnorm=False, # do not use batch-norm for the last layer 112 | activation=tf.nn.tanh, # tanh activation function for the output 113 | seed=seed 114 | ) 115 | 116 | self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name) 117 | 118 | return output 119 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/src/options.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import random 4 | import argparse 5 | 6 | 7 | def str2bool(v): 8 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 9 | return True 10 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 11 | return False 12 | else: 13 | raise argparse.ArgumentTypeError('Boolean value expected.') 14 | 15 | 16 | class ModelOptions: 17 | def __init__(self): 18 | parser = argparse.ArgumentParser(description='Colorization with GANs') 19 | parser.add_argument('--seed', type=int, default=0, metavar='S', help='random seed (default: 0)') 20 | parser.add_argument('--name', type=str, default='CGAN', help='arbitrary model name (default: CGAN)') 21 | parser.add_argument('--mode', default=0, help='run mode [0: train, 1: evaluate, 2: test] (default: 0)') 22 | parser.add_argument('--dataset', type=str, default='momentsintime', help='the name of dataset [places365, cifar10] (default: momentsintime)') 23 | parser.add_argument('--dataset-path', type=str, default='./data', help='dataset path (default: ./data)') 24 | parser.add_argument('--checkpoints-path', type=str, default='./checkpoints', help='models are saved here (default: ./checkpoints)') 25 | parser.add_argument('--batch-size', type=int, default=16, metavar='N', help='input batch size for training (default: 16)') 26 | parser.add_argument('--color-space', type=str, default='lab', help='model color space [lab, rgb] (default: lab)') 27 | parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 30)') 28 | parser.add_argument('--lr', type=float, default=5e-3, metavar='LR', help='learning rate (default: 3e-4)') 29 | parser.add_argument('--lr-decay-rate', type=float, default=0.1, help='learning rate exponentially decay rate (default: 0.1)') 30 | parser.add_argument('--lr-decay-steps', type=float, default=25e2, help='learning rate exponentially decay steps (default: 5e5)') 31 | parser.add_argument('--beta1', type=float, default=0, help='momentum term of adam optimizer (default: 0)') 32 | parser.add_argument("--l1-weight", type=float, default=100.0, help="weight on L1 term for generator gradient (default: 100.0)") 33 | parser.add_argument('--augment', type=str2bool, default=True, help='True for augmentation (default: True)') 34 | parser.add_argument('--label-smoothing', type=str2bool, default=False, help='True for one-sided label smoothing (default: False)') 35 | parser.add_argument('--acc-thresh', type=float, default=2.0, help="accuracy threshold (default: 2.0)") 36 | parser.add_argument('--kernel-size', type=int, default=4, help="default kernel size (default: 4)") 37 | parser.add_argument('--save', type=str2bool, default=True, help='True for saving (default: True)') 38 | parser.add_argument('--save-interval', type=int, default=100, help='how many batches to wait before saving model (default: 1000)') 39 | parser.add_argument('--sample', type=str2bool, default=True, help='True for sampling (default: True)') 40 | parser.add_argument('--sample-size', type=int, default=8, help='number of images to sample (default: 8)') 41 | parser.add_argument('--sample-interval', type=int, default=100, help='how many batches to wait before sampling (default: 1000)') 42 | parser.add_argument('--validate', type=str2bool, default=True, help='True for validation (default: True)') 43 | parser.add_argument('--validate-interval', type=int, default=0, help='how many batches to wait before validating (default: 0)') 44 | parser.add_argument('--log', type=str2bool, default=True, help='True for logging (default: True)') 45 | parser.add_argument('--log-interval', type=int, default=10, help='how many iterations to wait before logging training status (default: 10)') 46 | parser.add_argument('--visualize', type=str2bool, default=False, help='True for accuracy visualization (default: False)') 47 | parser.add_argument('--visualize-window', type=int, default=100, help='the exponentially moving average window width (default: 100)') 48 | parser.add_argument('--test-size', type=int, default=100, metavar='N', help='number of Turing tests (default: 100)') 49 | parser.add_argument('--test-delay', type=int, default=0, metavar='N', help='number of seconds to wait when doing Turing test, 0 for unlimited (default: 0)') 50 | parser.add_argument('--gpu-ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') 51 | # to recolorize a video clip 52 | parser.add_argument('--filename', type=str, default='*', help='Filename of input BW video') 53 | parser.add_argument('--input_dir', type=str, default='../data/examples/converted', help='Directory of input files') 54 | parser.add_argument('--output_dir', type=str, default='../data/examples/recolorized', help='Directory of output files') 55 | 56 | self._parser = parser 57 | 58 | def parse(self): 59 | opt = self._parser.parse_args() 60 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_ids 61 | 62 | opt.color_space = opt.color_space.upper() 63 | 64 | if opt.seed == 0: 65 | opt.seed = random.randint(0, 2**31 - 1) 66 | 67 | if (opt.dataset_path == './data') or (opt.dataset_path == './dataset'): 68 | opt.dataset_path += ('/' + opt.dataset) 69 | 70 | if opt.checkpoints_path == './checkpoints': 71 | opt.checkpoints_path += ('/' + opt.dataset) 72 | 73 | return opt 74 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/src/test-eval.py: -------------------------------------------------------------------------------- 1 | from src import ModelOptions, main 2 | 3 | options = ModelOptions().parse() 4 | options.mode = 1 5 | main(options) 6 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/src/test-turing.py: -------------------------------------------------------------------------------- 1 | from src import ModelOptions, main 2 | 3 | options = ModelOptions().parse() 4 | options.mode = 2 5 | main(options) 6 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/src/train.py: -------------------------------------------------------------------------------- 1 | from src import ModelOptions, main 2 | 3 | options = ModelOptions().parse() 4 | options.mode = 0 5 | main(options) 6 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/src/utils.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | from PIL import Image 4 | import matplotlib.pyplot as plt 5 | 6 | 7 | def stitch_images(grayscale, original, pred): 8 | gap = 5 9 | width, height = original[0][:, :, 0].shape 10 | img_per_row = 2 if width > 200 else 4 11 | img = Image.new('RGB', (width * img_per_row * 3 + gap * (img_per_row - 1), height * int(len(original) / img_per_row))) 12 | 13 | grayscale = np.array(grayscale).squeeze() 14 | original = np.array(original) 15 | pred = np.array(pred) 16 | 17 | for ix in range(len(original)): 18 | xoffset = int(ix % img_per_row) * width * 3 + int(ix % img_per_row) * gap 19 | yoffset = int(ix / img_per_row) * height 20 | im1 = Image.fromarray(grayscale[ix]) 21 | im2 = Image.fromarray(original[ix]) 22 | im3 = Image.fromarray((pred[ix] * 255).astype(np.uint8)) 23 | img.paste(im1, (xoffset, yoffset)) 24 | img.paste(im2, (xoffset + width, yoffset)) 25 | img.paste(im3, (xoffset + width + width, yoffset)) 26 | 27 | return img 28 | 29 | 30 | def unpickle(file): 31 | with open(file, 'rb') as fo: 32 | dict = pickle.load(fo, encoding='bytes') 33 | return dict 34 | 35 | 36 | def moving_average(data, window_width): 37 | cumsum_vec = np.cumsum(np.insert(data, 0, 0)) 38 | ma_vec = (cumsum_vec[window_width:] - cumsum_vec[:-window_width]) / window_width 39 | return ma_vec 40 | 41 | 42 | def imshow(img, title=''): 43 | fig = plt.gcf() 44 | fig.canvas.set_window_title(title) 45 | plt.axis('off') 46 | plt.imshow(img, interpolation='none') 47 | plt.show() 48 | 49 | 50 | def turing_test(real_img, fake_img, delay=0): 51 | height, width, _ = real_img.shape 52 | imgs = np.array([real_img, (fake_img * 255).astype(np.uint8)]) 53 | real_index = np.random.binomial(1, 0.5) 54 | fake_index = (real_index + 1) % 2 55 | 56 | img = Image.new('RGB', (2 + width * 2, height)) 57 | img.paste(Image.fromarray(imgs[real_index]), (0, 0)) 58 | img.paste(Image.fromarray(imgs[fake_index]), (2 + width, 0)) 59 | 60 | img.success = 0 61 | 62 | def onclick(event): 63 | if event.xdata is not None: 64 | if event.x < width and real_index == 0: 65 | img.success = 1 66 | 67 | elif event.x > width and real_index == 1: 68 | img.success = 1 69 | 70 | plt.gcf().canvas.stop_event_loop() 71 | 72 | plt.ion() 73 | plt.gcf().canvas.mpl_connect('button_press_event', onclick) 74 | plt.title('click on the real image') 75 | plt.axis('off') 76 | plt.imshow(img, interpolation='none') 77 | plt.show() 78 | plt.draw() 79 | plt.gcf().canvas.start_event_loop(delay) 80 | 81 | return img.success 82 | 83 | 84 | def visualize(train_log_file, test_log_file, window_width, title=''): 85 | train_data = np.loadtxt(train_log_file) 86 | test_data = np.loadtxt(test_log_file) 87 | 88 | if len(train_data.shape) < 2: 89 | return 90 | 91 | if len(train_data) < window_width: 92 | window_width = len(train_data) - 1 93 | 94 | fig = plt.gcf() 95 | fig.canvas.set_window_title(title) 96 | 97 | plt.ion() 98 | plt.subplot('121') 99 | plt.cla() 100 | if len(train_data) > 1: 101 | plt.plot(moving_average(train_data[:, 8], window_width)) 102 | plt.title('train') 103 | 104 | plt.subplot('122') 105 | plt.cla() 106 | if len(test_data) > 1: 107 | plt.plot(test_data[:, 8]) 108 | plt.title('test') 109 | 110 | plt.show() 111 | plt.draw() 112 | plt.pause(.01) 113 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/src/video_colorize_GAN.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | 5 | import cv2 6 | import numpy as np 7 | from PIL import Image 8 | from skimage import img_as_ubyte, img_as_float 9 | import skimage.color as color 10 | import scipy.ndimage.interpolation as sni 11 | from ops import postprocess 12 | from ops import COLORSPACE_RGB, COLORSPACE_LAB 13 | 14 | import tensorflow as tf 15 | from options import ModelOptions 16 | from models import MomentsInTimeModel 17 | 18 | 19 | def image_colorization_propagation(model, img_bw_in, img_rgb_prev, img_rgb_first, options): 20 | 21 | # colorize the image based on the previous one 22 | feed_dic = {model.input_rgb: np.expand_dims(img_bw_in, axis=0), model.input_rgb_prev: np.expand_dims(img_rgb_prev, axis=0), model.input_rgb_first: np.expand_dims(img_rgb_first, axis=0)} 23 | fake_image, _ = model.sess.run([model.sampler, model.input_gray], feed_dict=feed_dic) 24 | fake_image = postprocess(tf.convert_to_tensor(fake_image), colorspace_in=options.color_space, colorspace_out=COLORSPACE_RGB) 25 | 26 | # evalute the tensor 27 | img_rgb_out = fake_image.eval() 28 | img_rgb_out = (img_rgb_out.squeeze(0) * 255).astype(np.uint8) 29 | 30 | return img_rgb_out 31 | 32 | def bw2color(options, inputname, inputpath, outputpath): 33 | if inputname.endswith(".mp4"): 34 | # size of the input frames 35 | size = 256 36 | 37 | # check that the video exists 38 | path_to_video = os.path.join(inputpath, inputname) 39 | if not os.path.exists(path_to_video): 40 | print("The file :", path_to_video, "does not exist !") 41 | 42 | # store informations about the original video 43 | cap = cv2.VideoCapture(os.path.join(path_to_video)) 44 | # original dimensions 45 | width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 46 | totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 47 | fourcc = cv2.VideoWriter_fourcc(*'mp4v'); 48 | # parameters of output file 49 | # dimensions of the output image 50 | new_width, new_height = size, size 51 | # number of frames 52 | fps = 30.0 53 | 54 | # recolorized output video 55 | color_out = cv2.VideoWriter( 56 | os.path.join(outputpath, 'color_' + inputname), 57 | fourcc, 58 | fps, 59 | (new_width, new_height), 60 | isColor=True 61 | ) 62 | 63 | # TO CHANGE to DL colorization of 1st frame 64 | # pick the first frame from the original video clip as the first reference 65 | cap_temp = cv2.VideoCapture(os.path.join(inputpath, "color" + inputname[2:])) 66 | 67 | ret_temp, frame_prev = cap_temp.read() 68 | # convert BGR to RGB convention 69 | frame_prev = frame_prev[:,:,::-1] 70 | frame_prev = cv2.resize(frame_prev, (size, size)) 71 | # save the first frame as the reference 72 | frame_ref = frame_prev 73 | 74 | # count the number of recolorized frames 75 | frames_processed = 0 76 | 77 | with tf.Session() as sess: 78 | 79 | model = MomentsInTimeModel(sess, options) 80 | 81 | # build the model and initialize 82 | model.build() 83 | sess.run(tf.global_variables_initializer()) 84 | 85 | # load model only after global variables initialization 86 | model.load() 87 | 88 | while(cap.isOpened()): 89 | ret, frame_in = cap.read() 90 | 91 | # check if we are not at the end of the video 92 | if ret==True: 93 | # convert BGR to RGB convention 94 | frame_in = frame_in[:,:,::-1] 95 | # resize the frame to match the input size of the GAN 96 | frame_in = cv2.resize(frame_in, (size, size)) 97 | 98 | # colorize the BW frame 99 | frame_out = image_colorization_propagation(model, frame_in, frame_prev, frame_ref, options) 100 | 101 | #generate sample 102 | get_image = False 103 | if get_image: 104 | img = Image.fromarray(frame_out) 105 | 106 | if not os.path.exists(model.samples_dir): 107 | os.makedirs(model.samples_dir) 108 | 109 | sample = model.options.dataset + "_" + inputname + "_" + str(frames_processed).zfill(5) + ".png" 110 | img.save(os.path.join(model.samples_dir, sample)) 111 | 112 | # save the recolorized frame 113 | frame_prev = frame_out 114 | # convert RGB to BGR convention 115 | frame_out = frame_out[:,:,::-1] 116 | # write the color frame 117 | color_out.write(frame_out) 118 | 119 | # print progress 120 | frames_processed += 1 121 | print("Processed {}/{} frames ({}%)".format(frames_processed, totalFrames, frames_processed * 100 //totalFrames), end="\r") 122 | if cv2.waitKey(1) & 0xFF == ord('q'): 123 | break 124 | # end of the video 125 | else: 126 | break 127 | 128 | # release everything if job is finished 129 | cap.release() 130 | color_out.release() 131 | 132 | def main(): 133 | 134 | # reset tensorflow graph 135 | tf.reset_default_graph() 136 | 137 | options = ModelOptions().parse() 138 | 139 | if options.filename == '*': 140 | for filename in os.listdir(options.input_dir): 141 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir) 142 | else: 143 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir) 144 | 145 | # cleanup 146 | cv2.destroyAllWindows() 147 | 148 | return 0 149 | 150 | if __name__ == '__main__': 151 | main() 152 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/test-eval.py: -------------------------------------------------------------------------------- 1 | from src import ModelOptions, main 2 | 3 | options = ModelOptions().parse() 4 | options.mode = 1 5 | main(options) 6 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/test-turing.py: -------------------------------------------------------------------------------- 1 | from src import ModelOptions, main 2 | 3 | options = ModelOptions().parse() 4 | options.mode = 2 5 | main(options) 6 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/train.py: -------------------------------------------------------------------------------- 1 | from src import ModelOptions, main 2 | 3 | options = ModelOptions().parse() 4 | options.mode = 0 5 | main(options) 6 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/utils.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | from PIL import Image 4 | import matplotlib.pyplot as plt 5 | 6 | 7 | def stitch_images(grayscale, original, pred): 8 | gap = 5 9 | width, height = original[0][:, :, 0].shape 10 | img_per_row = 2 if width > 200 else 4 11 | img = Image.new('RGB', (width * img_per_row * 3 + gap * (img_per_row - 1), height * int(len(original) / img_per_row))) 12 | 13 | grayscale = np.array(grayscale).squeeze() 14 | original = np.array(original) 15 | pred = np.array(pred) 16 | 17 | for ix in range(len(original)): 18 | xoffset = int(ix % img_per_row) * width * 3 + int(ix % img_per_row) * gap 19 | yoffset = int(ix / img_per_row) * height 20 | im1 = Image.fromarray(grayscale[ix]) 21 | im2 = Image.fromarray(original[ix]) 22 | im3 = Image.fromarray((pred[ix] * 255).astype(np.uint8)) 23 | img.paste(im1, (xoffset, yoffset)) 24 | img.paste(im2, (xoffset + width, yoffset)) 25 | img.paste(im3, (xoffset + width + width, yoffset)) 26 | 27 | return img 28 | 29 | 30 | def unpickle(file): 31 | with open(file, 'rb') as fo: 32 | dict = pickle.load(fo, encoding='bytes') 33 | return dict 34 | 35 | 36 | def moving_average(data, window_width): 37 | cumsum_vec = np.cumsum(np.insert(data, 0, 0)) 38 | ma_vec = (cumsum_vec[window_width:] - cumsum_vec[:-window_width]) / window_width 39 | return ma_vec 40 | 41 | 42 | def imshow(img, title=''): 43 | fig = plt.gcf() 44 | fig.canvas.set_window_title(title) 45 | plt.axis('off') 46 | plt.imshow(img, interpolation='none') 47 | plt.show() 48 | 49 | 50 | def turing_test(real_img, fake_img, delay=0): 51 | height, width, _ = real_img.shape 52 | imgs = np.array([real_img, (fake_img * 255).astype(np.uint8)]) 53 | real_index = np.random.binomial(1, 0.5) 54 | fake_index = (real_index + 1) % 2 55 | 56 | img = Image.new('RGB', (2 + width * 2, height)) 57 | img.paste(Image.fromarray(imgs[real_index]), (0, 0)) 58 | img.paste(Image.fromarray(imgs[fake_index]), (2 + width, 0)) 59 | 60 | img.success = 0 61 | 62 | def onclick(event): 63 | if event.xdata is not None: 64 | if event.x < width and real_index == 0: 65 | img.success = 1 66 | 67 | elif event.x > width and real_index == 1: 68 | img.success = 1 69 | 70 | plt.gcf().canvas.stop_event_loop() 71 | 72 | plt.ion() 73 | plt.gcf().canvas.mpl_connect('button_press_event', onclick) 74 | plt.title('click on the real image') 75 | plt.axis('off') 76 | plt.imshow(img, interpolation='none') 77 | plt.show() 78 | plt.draw() 79 | plt.gcf().canvas.start_event_loop(delay) 80 | 81 | return img.success 82 | 83 | 84 | def visualize(train_log_file, test_log_file, window_width, title=''): 85 | train_data = np.loadtxt(train_log_file) 86 | test_data = np.loadtxt(test_log_file) 87 | 88 | if len(train_data.shape) < 2: 89 | return 90 | 91 | if len(train_data) < window_width: 92 | window_width = len(train_data) - 1 93 | 94 | fig = plt.gcf() 95 | fig.canvas.set_window_title(title) 96 | 97 | plt.ion() 98 | plt.subplot('121') 99 | plt.cla() 100 | if len(train_data) > 1: 101 | plt.plot(moving_average(train_data[:, 8], window_width)) 102 | plt.title('train') 103 | 104 | plt.subplot('122') 105 | plt.cla() 106 | if len(test_data) > 1: 107 | plt.plot(test_data[:, 8]) 108 | plt.title('test') 109 | 110 | plt.show() 111 | plt.draw() 112 | plt.pause(.01) 113 | -------------------------------------------------------------------------------- /Ref-GAN-Colorization/video_colorize_GAN.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | 5 | import cv2 6 | import numpy as np 7 | from PIL import Image 8 | from skimage import img_as_ubyte, img_as_float 9 | import skimage.color as color 10 | import scipy.ndimage.interpolation as sni 11 | from ops import postprocess 12 | from ops import COLORSPACE_RGB, COLORSPACE_LAB 13 | 14 | import tensorflow as tf 15 | from options import ModelOptions 16 | from models import MomentsInTimeModel 17 | 18 | 19 | def image_colorization_propagation(model, img_bw_in, img_rgb_prev, img_rgb_first, options): 20 | 21 | # colorize the image based on the previous one 22 | feed_dic = {model.input_rgb: np.expand_dims(img_bw_in, axis=0), model.input_rgb_prev: np.expand_dims(img_rgb_prev, axis=0), model.input_rgb_first: np.expand_dims(img_rgb_first, axis=0)} 23 | fake_image, _ = model.sess.run([model.sampler, model.input_gray], feed_dict=feed_dic) 24 | fake_image = postprocess(tf.convert_to_tensor(fake_image), colorspace_in=options.color_space, colorspace_out=COLORSPACE_RGB) 25 | 26 | # evalute the tensor 27 | img_rgb_out = fake_image.eval() 28 | img_rgb_out = (img_rgb_out.squeeze(0) * 255).astype(np.uint8) 29 | 30 | return img_rgb_out 31 | 32 | def bw2color(options, inputname, inputpath, outputpath): 33 | if inputname.endswith(".mp4"): 34 | # size of the input frames 35 | size = 256 36 | 37 | # check that the video exists 38 | path_to_video = os.path.join(inputpath, inputname) 39 | if not os.path.exists(path_to_video): 40 | print("The file :", path_to_video, "does not exist !") 41 | 42 | # store informations about the original video 43 | cap = cv2.VideoCapture(os.path.join(path_to_video)) 44 | # original dimensions 45 | width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 46 | totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 47 | fourcc = cv2.VideoWriter_fourcc(*'mp4v'); 48 | # parameters of output file 49 | # dimensions of the output image 50 | new_width, new_height = size, size 51 | # number of frames 52 | fps = 30.0 53 | 54 | # recolorized output video 55 | color_out = cv2.VideoWriter( 56 | os.path.join(outputpath, 'color_' + inputname), 57 | fourcc, 58 | fps, 59 | (new_width, new_height), 60 | isColor=True 61 | ) 62 | 63 | # TO CHANGE to DL colorization of 1st frame 64 | # pick the first frame from the original video clip as the first reference 65 | cap_temp = cv2.VideoCapture(os.path.join(inputpath, "color" + inputname[2:])) 66 | 67 | ret_temp, frame_prev = cap_temp.read() 68 | # convert BGR to RGB convention 69 | frame_prev = frame_prev[:,:,::-1] 70 | frame_prev = cv2.resize(frame_prev, (size, size)) 71 | # save the first frame as the reference 72 | frame_ref = frame_prev 73 | 74 | # count the number of recolorized frames 75 | frames_processed = 0 76 | 77 | with tf.Session() as sess: 78 | 79 | model = MomentsInTimeModel(sess, options) 80 | 81 | # build the model and initialize 82 | model.build() 83 | sess.run(tf.global_variables_initializer()) 84 | 85 | # load model only after global variables initialization 86 | model.load() 87 | 88 | while(cap.isOpened()): 89 | ret, frame_in = cap.read() 90 | 91 | # check if we are not at the end of the video 92 | if ret==True: 93 | # convert BGR to RGB convention 94 | frame_in = frame_in[:,:,::-1] 95 | # resize the frame to match the input size of the GAN 96 | frame_in = cv2.resize(frame_in, (size, size)) 97 | 98 | # colorize the BW frame 99 | frame_out = image_colorization_propagation(model, frame_in, frame_prev, frame_ref, options) 100 | 101 | #generate sample 102 | get_image = False 103 | if get_image: 104 | img = Image.fromarray(frame_out) 105 | 106 | if not os.path.exists(model.samples_dir): 107 | os.makedirs(model.samples_dir) 108 | 109 | sample = model.options.dataset + "_" + inputname + "_" + str(frames_processed).zfill(5) + ".png" 110 | img.save(os.path.join(model.samples_dir, sample)) 111 | 112 | # save the recolorized frame 113 | frame_prev = frame_out 114 | # convert RGB to BGR convention 115 | frame_out = frame_out[:,:,::-1] 116 | # write the color frame 117 | color_out.write(frame_out) 118 | 119 | # print progress 120 | frames_processed += 1 121 | print("Processed {}/{} frames ({}%)".format(frames_processed, totalFrames, frames_processed * 100 //totalFrames), end="\r") 122 | if cv2.waitKey(1) & 0xFF == ord('q'): 123 | break 124 | # end of the video 125 | else: 126 | break 127 | 128 | # release everything if job is finished 129 | cap.release() 130 | color_out.release() 131 | 132 | def main(): 133 | 134 | # reset tensorflow graph 135 | tf.reset_default_graph() 136 | 137 | options = ModelOptions().parse() 138 | 139 | if options.filename == '*': 140 | for filename in os.listdir(options.input_dir): 141 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir) 142 | else: 143 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir) 144 | 145 | # cleanup 146 | cv2.destroyAllWindows() 147 | 148 | return 0 149 | 150 | if __name__ == '__main__': 151 | main() 152 | -------------------------------------------------------------------------------- /automatic-video-colorization.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/automatic-video-colorization.pdf -------------------------------------------------------------------------------- /configuration.txt: -------------------------------------------------------------------------------- 1 | # AWS instance : ec2 p2.xlarge 2 | # ubuntu version : 18.04 (64 bits) 3 | # python version : 3.6 4 | 5 | # Conda installation 6 | # Nvidia drivers 7 | sudo apt install ubuntu-drivers-common 8 | # CHECK : ubuntu-drivers devices 9 | sudo ubuntu-drivers autoinstall 10 | # CHECK : nvidia-smi 11 | # CUDA 12 | sudo apt install nvidia-cuda-toolkit 13 | # CuDNN 14 | # register at nvidia developers https://developer.nvidia.com/cudnn 15 | # download the corresponding runtime library (DEB) 16 | sudo apt install \path\.deb 17 | 18 | # REBOOT instance 19 | 20 | # Automatic Image Colorization 21 | # GitHUb repo cloning 22 | git clone -b master --single-branch https://github.com/richzhang/colorization.git 23 | # download model 24 | ./models/fetch_release_models.sh 25 | 26 | # Caffe (DL framework used in repo) installation 27 | sudo apt install caffe-cuda 28 | 29 | # Image visualisation 30 | sudo apt install eog 31 | # EXEMPLE : eog image.jpg -------------------------------------------------------------------------------- /convert_moment_dataset.sh: -------------------------------------------------------------------------------- 1 | if [ ! -d data/Moments_in_Time_Mini ]; then 2 | echo "Moments_in_Time_Mini dataset not downloaded"; 3 | exit; 4 | fi 5 | 6 | mkdir -p data/Moments_processed; 7 | 8 | for directory in $(find data/Moments_in_Time_Mini/training -type d -mindepth 1); 9 | do 10 | echo "Converting videos in directory $directory"; 11 | python3 converter.py --input_dir "$directory/" --output_dir data/Moments_processed/; 12 | done 13 | -------------------------------------------------------------------------------- /converter.py: -------------------------------------------------------------------------------- 1 | # convert Color to BW video clips 2 | 3 | import os 4 | import argparse 5 | 6 | import numpy as np 7 | import cv2 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument('--filename', type=str, default='*', help='Filename of input video') 12 | parser.add_argument('--input_dir', type=str, default='data/raw/', help='Directory of input files') 13 | parser.add_argument('--output_dir', type=str, default='data/converted/', help='Directory of output files') 14 | parser.add_argument('--out_dim', type=int, nargs=2, default=None, help='Dimensions of output frames (width, height)') 15 | parser.add_argument('--fps', type=int, default=None, help='Number of fps of output files') 16 | 17 | args = parser.parse_args() 18 | return args 19 | 20 | def parse_config(args): 21 | with open('config.yml', 'r') as f: 22 | config = yaml.load(f) 23 | if not os.path.exists(args.log_dir): 24 | os.makedirs(args.log_dir) 25 | with open(os.path.join(args.log_dir, 'config.yml'), 'w') as f: 26 | yaml.dump(config, f, default_flow_style=False) 27 | return dict2namespace(config) 28 | 29 | def color2bw(inputname, inputpath, outputpath, out_dim, fps): 30 | if inputname.endswith(".mp4"): 31 | 32 | # store informations about the original video 33 | cap = cv2.VideoCapture(inputpath + inputname) 34 | # original dimensions 35 | width, height = int(cap.get(3)), int(cap.get(4)) 36 | 37 | 38 | fourcc = cv2.VideoWriter_fourcc(*'mp4v'); 39 | 40 | # parameters of output file 41 | if out_dim == None: 42 | # dimensions of the output image 43 | new_width, new_height = width, height 44 | else: 45 | new_width, new_height = out_dim 46 | if fps == None: 47 | # number of frames 48 | fps = 30.0 49 | 50 | # grayscale output video 51 | gray_out = cv2.VideoWriter( 52 | outputpath + 'bw_' + inputname, 53 | fourcc, 54 | fps, 55 | (new_width, new_height), 56 | isColor=False 57 | ) 58 | 59 | # color output video 60 | color_out = cv2.VideoWriter( 61 | outputpath + 'color_' + inputname, 62 | fourcc, 63 | fps, 64 | (new_width, new_height), 65 | isColor=True 66 | ) 67 | 68 | 69 | while(cap.isOpened()): 70 | ret, frame = cap.read() 71 | # check if we are not at the end of the video 72 | if ret==True: 73 | 74 | #resize frame 75 | frame = cv2.resize(frame, (new_width, new_height), interpolation = cv2.INTER_LINEAR) 76 | 77 | # write the color frame 78 | color_out.write(frame) 79 | 80 | # change color to BW 81 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 82 | 83 | # write the grayscaled frame 84 | gray_out.write(frame) 85 | 86 | if cv2.waitKey(1) & 0xFF == ord('q'): 87 | break 88 | # end of the video 89 | else: 90 | break 91 | 92 | # release everything if job is finished 93 | cap.release() 94 | gray_out.release() 95 | color_out.release() 96 | 97 | def main(): 98 | args = parse_args() 99 | 100 | if args.filename == '*': 101 | for filename in os.listdir(args.input_dir): 102 | color2bw(inputname = filename, inputpath = args.input_dir, outputpath = args.output_dir, out_dim = args.out_dim, fps = args.fps) 103 | else: 104 | color2bw(inputname = args.filename, inputpath = args.input_dir, outputpath = args.output_dir, out_dim = args.out_dim, fps = args.fps) 105 | 106 | # cleanup 107 | cv2.destroyAllWindows() 108 | 109 | return 0 110 | 111 | if __name__ == '__main__': 112 | main() 113 | -------------------------------------------------------------------------------- /cs230_poster.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/cs230_poster.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | 4 | matplotlib 5 | pillow 6 | 7 | scikit-image # to convert color images from RGB to LAB color space 8 | opencv-python # to read video clips 9 | 10 | tqdm # to visualize progress bar -------------------------------------------------------------------------------- /synthesize_results.py: -------------------------------------------------------------------------------- 1 | """Aggregates results from the metrics_eval_best_weights.json in a parent folder""" 2 | 3 | import argparse 4 | import json 5 | import os 6 | 7 | from tabulate import tabulate 8 | 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument('--parent_dir', default='experiments', 12 | help='Directory containing results of experiments') 13 | 14 | 15 | def aggregate_metrics(parent_dir, metrics): 16 | """Aggregate the metrics of all experiments in folder `parent_dir`. 17 | 18 | Assumes that `parent_dir` contains multiple experiments, with their results stored in 19 | `parent_dir/subdir/metrics_dev.json` 20 | 21 | Args: 22 | parent_dir: (string) path to directory containing experiments results 23 | metrics: (dict) subdir -> {'accuracy': ..., ...} 24 | """ 25 | # Get the metrics for the folder if it has results from an experiment 26 | metrics_file = os.path.join(parent_dir, 'metrics_eval_best_weights.json') 27 | if os.path.isfile(metrics_file): 28 | with open(metrics_file, 'r') as f: 29 | metrics[parent_dir] = json.load(f) 30 | 31 | # Check every subdirectory of parent_dir 32 | for subdir in os.listdir(parent_dir): 33 | if not os.path.isdir(os.path.join(parent_dir, subdir)): 34 | continue 35 | else: 36 | aggregate_metrics(os.path.join(parent_dir, subdir), metrics) 37 | 38 | 39 | def metrics_to_table(metrics): 40 | # Get the headers from the first subdir. Assumes everything has the same metrics 41 | headers = metrics[list(metrics.keys())[0]].keys() 42 | table = [[subdir] + [values[h] for h in headers] for subdir, values in metrics.items()] 43 | res = tabulate(table, headers, tablefmt='pipe') 44 | 45 | return res 46 | 47 | 48 | if __name__ == "__main__": 49 | args = parser.parse_args() 50 | 51 | # Aggregate metrics from args.parent_dir directory 52 | metrics = dict() 53 | aggregate_metrics(args.parent_dir, metrics) 54 | table = metrics_to_table(metrics) 55 | 56 | # Display the table to terminal 57 | print(table) 58 | 59 | # Save results in parent_dir/results.md 60 | save_file = os.path.join(args.parent_dir, "results.md") 61 | with open(save_file, 'w') as f: 62 | f.write(table) 63 | --------------------------------------------------------------------------------