├── .gitignore
├── Colorizing-with-GANs
    ├── .gitignore
    ├── LICENSE
    ├── README.md
    ├── __init__.py
    ├── build_dataset.py
    ├── dataset.py
    ├── img
    │   ├── cgan.png
    │   ├── con_gan.png
    │   ├── discriminator.png
    │   ├── gan.png
    │   ├── gan_new.png
    │   ├── places365.jpg
    │   ├── places365.png
    │   └── unet.png
    ├── main.py
    ├── models.py
    ├── networks.py
    ├── ops.py
    ├── options.py
    ├── requirements.txt
    ├── setup.cfg
    ├── test-eval.py
    ├── test-turing.py
    ├── train.py
    ├── utils.py
    ├── video_colorize_GAN.py
    ├── video_colorize_GAN_1st-truth-ref.py
    └── video_colorize_GAN_truth-ref.py
├── Deep-Learning-Colorization
    ├── models
    │   ├── .gitignore
    │   ├── alexnet_deploy.prototxt
    │   ├── alexnet_deploy_fc.prototxt
    │   ├── alexnet_deploy_lab.prototxt
    │   ├── alexnet_deploy_lab_fc.prototxt
    │   ├── colorization_deploy_v1.prototxt
    │   ├── colorization_deploy_v2.prototxt
    │   ├── colorization_train_val_v2.prototxt
    │   ├── fetch_alexnet_model.sh
    │   └── fetch_release_models.sh
    ├── resources
    │   ├── batch_norm_absorb.py
    │   ├── caffe_traininglayers.py
    │   ├── caffe_traininglayers.pyc
    │   ├── conv_into_fc.py
    │   ├── magic_init
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── load.py
    │   │   ├── load.pyc
    │   │   ├── magic_init_mod.py
    │   │   └── measure_stat.py
    │   ├── prior_probs.npy
    │   ├── pts_in_hull.npy
    │   ├── softmax_cross_entropy_loss_layer.cpp
    │   ├── softmax_cross_entropy_loss_layer.cu
    │   └── softmax_cross_entropy_loss_layer.hpp
    └── video_colorize_parallel.py
├── README.md
├── Ref-GAN-Colorization
    ├── .gitignore
    ├── LICENSE
    ├── README.md
    ├── __init__.py
    ├── build_dataset.py
    ├── dataset.py
    ├── main.py
    ├── models.py
    ├── models_base.py
    ├── models_no-discriminator.py
    ├── networks.py
    ├── ops.py
    ├── options.py
    ├── requirements.txt
    ├── setup.cfg
    ├── src
    │   ├── __init__.py
    │   ├── build_dataset.py
    │   ├── dataset.py
    │   ├── main.py
    │   ├── models.py
    │   ├── models_base.py
    │   ├── models_baseline_img.py
    │   ├── networks.py
    │   ├── ops.py
    │   ├── options.py
    │   ├── test-eval.py
    │   ├── test-turing.py
    │   ├── train.py
    │   ├── utils.py
    │   └── video_colorize_GAN.py
    ├── test-eval.py
    ├── test-turing.py
    ├── train.py
    ├── utils.py
    └── video_colorize_GAN.py
├── automatic-video-colorization.pdf
├── configuration.txt
├── convert_moment_dataset.sh
├── converter.py
├── cs230_poster.pdf
├── requirements.txt
└── synthesize_results.py


/.gitignore:
--------------------------------------------------------------------------------
1 | data/**
2 | yt8m/**
3 | 


--------------------------------------------------------------------------------
/Colorizing-with-GANs/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | # custom
104 | _TODO
105 | checkpoints
106 | plots
107 | vcs.xml
108 | .idea
109 | .vscode


--------------------------------------------------------------------------------
/Colorizing-with-GANs/README.md:
--------------------------------------------------------------------------------
  1 | # Image Colorization with Generative Adversarial Networks 
  2 | In this work, we generalize the colorization procedure using a conditional Deep Convolutional Generative Adversarial Network (DCGAN) as as suggested by [Pix2Pix]. The network is trained on the datasets [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) and [Places365](http://places2.csail.mit.edu). Some of the results from Places365 dataset are [shown here.](#places365-results)
  3 | 
  4 | ## Prerequisites
  5 | - Linux
  6 | - Tensorflow 1.7
  7 | - NVIDIA GPU (12G or 24G memory) + CUDA cuDNN
  8 | 
  9 | ## Getting Started
 10 | ### Installation
 11 | - Clone this repo:
 12 | ```bash
 13 | git clone https://github.com/ImagingLab/Colorizing-with-GANs.git
 14 | cd Colorizing-with-GANs
 15 | ```
 16 | - Install Tensorflow and dependencies from https://www.tensorflow.org/install/
 17 | - Install python requirements:
 18 | ```bash
 19 | pip install -r requirements.txt
 20 | ```
 21 | 
 22 | ### Dataset
 23 | - We use [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) and [Places365](http://places2.csail.mit.edu) datasets. To train a model on the full dataset, download datasets from official websites.
 24 | After downloading, put then under the `datasets` folder.
 25 | 
 26 | ### Training
 27 | - To train the model, run `main.py` script
 28 | ```bash
 29 | python main.py
 30 | ```
 31 | - To train the model on places365 dataset with tuned hyperparameters:
 32 | ```
 33 | python train.py \
 34 |   --seed 100 \
 35 |   --dataset places365 \
 36 |   --dataset-path ./dataset/places365 \
 37 |   --checkpoints-path ./checkpoints \
 38 |   --batch-size 16 \
 39 |   --epochs 10 \
 40 |   --lr 3e-4 \
 41 |   --label-smoothing 1
 42 |   
 43 | ```
 44 | 
 45 | - To train the model of cifar10 dataset with tuned hyperparameters:
 46 | ```
 47 | python train.py \
 48 |   --seed 100 \
 49 |   --dataset cifar10 \
 50 |   --dataset-path ./dataset/cifar10 \
 51 |   --checkpoints-path ./checkpoints \
 52 |   --batch-size 128 \
 53 |   --epochs 200 \
 54 |   --lr 3e-4 \
 55 |   --lr-decay-steps 1e4 \
 56 |   --augment True
 57 |   
 58 | ```
 59 | 
 60 | ### Evaluate
 61 | - Download the pre-trained weights [from here.](https://drive.google.com/open?id=1jTsAUAKrMiHO2gn7s-fFZ_zUSzgKoPyp) and copy them in the `checkpoints` folder.
 62 | - To evaluate the model quantitatively on the test-set, run `test-eval.py` script:
 63 | ```bash
 64 | python test-eval.py
 65 | ```
 66 | 
 67 | ### Turing Test
 68 | - Download the pre-trained weights [from here.](https://drive.google.com/open?id=1jTsAUAKrMiHO2gn7s-fFZ_zUSzgKoPyp) and copy them in the `checkpoints` folder.
 69 | - To evaluate the model qualitatively using visual Turing test, run `test-turing.py`:
 70 | ```bash
 71 | python test-turing.py
 72 | ```
 73 | 
 74 | - To apply time-based visual Turing test run (2 seconds decision time):
 75 | ```bash
 76 | python test-turing.py --test-delay 2
 77 | ```
 78 | 
 79 | 
 80 | ## Method
 81 | 
 82 | ### Generative Adversarial Network
 83 | Both generator and discriminator use CNNs. The generator is trained to minimize the probability that the discriminator makes a correct prediction in generated data, while discriminator is trained to maximize the probability of assigning the correct label. This is presented as a single minimax game problem:
 84 | <p align='center'>  
 85 |   <img src='img/gan.png' />
 86 | </p>
 87 | In our model, we have redefined the generator's cost function by maximizing the probability of the discriminator being mistaken, as opposed to minimizing the probability of the discriminator being correct. In addition, the cost function was further modified by adding an L1 based regularizer. This will theoretically preserve the structure of the original images and prevent the generator from assigning arbitrary colors to pixels just to fool the discriminator:
 88 | <p align='center'>  
 89 |   <img src='img/gan_new.png' />
 90 | </p>
 91 | 
 92 | ### Conditional GAN
 93 | In a traditional GAN, the input of the generator is randomly generated noise data z. However, this approach is not applicable to the automatic colorization problem due to the nature of its inputs. The generator must be modified to accept grayscale images as inputs rather than noise. This problem was addressed by using a variant of GAN called [conditional generative adversarial networks](https://arxiv.org/abs/1411.1784). Since no noise is introduced, the input of the generator is treated as zero noise with the grayscale input as a prior:
 94 | <p align='center'>  
 95 |   <img src='img/con_gan.png' />
 96 | </p>
 97 | The discriminator gets colored images from both generator and original data along with the grayscale input as the condition and tries to tell which pair contains the true colored image:
 98 | <p align='center'>  
 99 |   <img src='img/cgan.png' width='450px' height='368px' />
100 | </p>
101 | 
102 | ### Networks Architecture
103 | The architecture of generator is inspired by  [U-Net](https://arxiv.org/abs/1505.04597):  The architecture of the model is symmetric, with `n` encoding units and `n` decoding units. The contracting path consists of 4x4 convolution layers with stride 2 for downsampling, each followed by batch normalization and Leaky-ReLU activation function with the slope of 0.2. The number of channels are doubled after each step. Each unit in the expansive path consists of a 4x4 transposed convolutional layer with stride 2 for upsampling, concatenation with the activation map of the mirroring layer in the contracting path, followed by batch normalization and ReLU activation function. The last layer of the network is a 1x1 convolution which is equivalent to cross-channel parametric pooling layer. We use `tanh` function for the last layer.
104 | <p align='center'>  
105 |   <img src='img/unet.png' width='700px' height='168px' />
106 | </p>
107 | 
108 | For discriminator, we use similar architecture as the baselines contractive path: a series of 4x4 convolutional layers with stride 2 with the number of channels being doubled after each downsampling. All convolution layers are followed by batch normalization, leaky ReLU activation with slope 0.2. After the last layer, a convolution is applied to map to a 1 dimensional output, followed by a sigmoid function to return a probability value of the input being real or fake
109 | <p align='center'>  
110 |   <img src='img/discriminator.png' width='450px' height='168px' />
111 | </p>
112 |   
113 | ## Places365 Results
114 | Colorization results with Places365. (a) Grayscale. (b) Original Image. (c) Colorized with GAN.
115 | <p align='center'>  
116 |   <img src='img/places365.jpg' />
117 | </p>
118 | 
119 | ## Citation
120 | If you use this code for your research, please cite our paper <a href="https://arxiv.org/abs/1803.05400">Image Colorization with Generative Adversarial Networks</a>:
121 | 
122 | ```
123 | @inproceedings{nazeri2018image,
124 |   title={Image Colorization Using Generative Adversarial Networks},
125 |   author={Nazeri, Kamyar and Ng, Eric and Ebrahimi, Mehran},
126 |   booktitle={International Conference on Articulated Motion and Deformable Objects},
127 |   pages={85--94},
128 |   year={2018},
129 |   organization={Springer}
130 | }
131 | ```
132 | 


--------------------------------------------------------------------------------
/Colorizing-with-GANs/__init__.py:
--------------------------------------------------------------------------------
1 | from .options import *
2 | from .models import *
3 | from .utils import *
4 | from .dataset import *
5 | from .main import *


--------------------------------------------------------------------------------
/Colorizing-with-GANs/build_dataset.py:
--------------------------------------------------------------------------------
  1 | """Split the SIGNS dataset into train/dev/test and resize images to 64x64.
  2 | 
  3 | The SIGNS dataset comes in the following format:
  4 |     train_signs/
  5 |         0_IMG_5864.jpg
  6 |         ...
  7 |     test_signs/
  8 |         0_IMG_5942.jpg
  9 |         ...
 10 | 
 11 | Original images have size (3024, 3024).
 12 | Resizing to (64, 64) reduces the dataset size from 1.16 GB to 4.7 MB, and loading smaller images
 13 | makes training faster.
 14 | 
 15 | We already have a test set created, so we only need to split "train_signs" into train and dev sets.
 16 | Because we don't have a lot of images and we want that the statistics on the dev set be as
 17 | representative as possible, we'll take 20% of "train_signs" as dev set.
 18 | """
 19 | 
 20 | import argparse
 21 | import random
 22 | import os
 23 | 
 24 | import numpy as np
 25 | 
 26 | from PIL import Image
 27 | from tqdm import tqdm
 28 | import cv2
 29 | 
 30 | # size of the resized frames
 31 | SIZE = 256
 32 | 
 33 | # subfolder of the "Moments_in_Time" dataset to consider
 34 | SUBFOLDER = "/baking"
 35 | 
 36 | parser = argparse.ArgumentParser()
 37 | parser.add_argument('--data_dir', default='../data/Moments_in_Time_Mini', help="Directory with the Moments in Time dataset")
 38 | parser.add_argument('--output_dir', default='../data/momentsintime', help="Where to write the new data")
 39 | parser.add_argument('--dt', type=int, default=1, help="Time between consecutives frames")
 40 | 
 41 | 
 42 | def split_resize_and_save(filename, i, output_dir, dt=1, size=SIZE):
 43 |     """Split the video clip in pair of consecutive frames (t, t+dt), resize the frames, and save the pairs to the `output_dir`"""
 44 |                 
 45 |     vidcap = cv2.VideoCapture(filename)
 46 |     
 47 |     success, frame = vidcap.read()
 48 |     # convert BGR to RGB convention
 49 |     frame = frame[:,:,::-1]
 50 |     # default : use bilinear interpolation
 51 |     frame_prev = cv2.resize(frame, (size, size)) 
 52 |     
 53 |     # counter to build pairs of consecutive frames
 54 |     count = 1
 55 |     
 56 |     while success:
 57 |       count += 1
 58 |       
 59 |       success, frame = vidcap.read()
 60 |       
 61 |       if success:
 62 |           # convert BGR to RGB convention
 63 |           frame = frame[:,:,::-1]
 64 |           # default : use bilinear interpolation
 65 |           frame = cv2.resize(frame, (size, size)) 
 66 |       else:
 67 |         break
 68 |       #print('Read a new frame: ', success)
 69 |             
 70 |       if count % (1+dt) == 0:
 71 |           img = np.concatenate((frame, frame_prev), 2)
 72 |           frame_prev = frame     
 73 |           np.save(output_dir + "/video{}_frame{}".format(i, count), img)
 74 |           
 75 | if __name__ == '__main__':
 76 |     args = parser.parse_args()
 77 |     # Define the output directory
 78 |     args.output_dir = args.output_dir + "_dt" + str(args.dt)
 79 |     
 80 |     assert os.path.isdir(args.data_dir), "Couldn't find the dataset at {}".format(args.data_dir)
 81 | 
 82 |     # Define the data directories
 83 |     train_data_dir = os.path.join(args.data_dir, 'training' + SUBFOLDER)
 84 |     test_data_dir = os.path.join(args.data_dir, 'validation' + SUBFOLDER)
 85 | 
 86 |     # Get the filenames in each directory (train and test)
 87 |     filenames = os.listdir(train_data_dir)
 88 |     filenames = [os.path.join(train_data_dir, f) for f in filenames if f.endswith('.mp4')]
 89 | 
 90 |     test_filenames = os.listdir(test_data_dir)
 91 |     test_filenames = [os.path.join(test_data_dir, f) for f in test_filenames if f.endswith('.mp4')]
 92 | 
 93 |     # Split the images in 'train_moments' into 80% train and 20% dev
 94 |     # Make sure to always shuffle with a fixed seed so that the split is reproducible
 95 |     random.seed(230)
 96 |     filenames.sort()
 97 |     random.shuffle(filenames)
 98 | 
 99 |     split = int(0.9 * len(filenames))
100 |     train_filenames = filenames[:split]
101 |     dev_filenames = filenames[split:]
102 | 
103 |     filenames = {'train': train_filenames,
104 |                  'dev': dev_filenames,
105 |                  'test': test_filenames}
106 | 
107 |     if not os.path.exists(args.output_dir):
108 |         os.mkdir(args.output_dir)
109 |     else:
110 |         print("Warning: output dir {} already exists".format(args.output_dir))
111 | 
112 |     # Preprocess train, dev and test
113 |     for split in ['train', 'dev', 'test']:
114 |         output_dir_split = os.path.join(args.output_dir, '{}_moments'.format(split))
115 |         if not os.path.exists(output_dir_split):
116 |             os.mkdir(output_dir_split)
117 |         else:
118 |             print("Warning: dir {} already exists".format(output_dir_split))
119 | 
120 |         print("Processing {} data, saving preprocessed data to {}".format(split, output_dir_split))
121 |         for i, filename in enumerate(tqdm(filenames[split])):
122 |             split_resize_and_save(filename, i, output_dir_split, dt=args.dt, size=SIZE)
123 | 
124 |     print("Done building dataset")
125 | 


--------------------------------------------------------------------------------
/Colorizing-with-GANs/dataset.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | from scipy.misc import imread
  5 | from abc import abstractmethod
  6 | from utils import unpickle
  7 | 
  8 | CIFAR10_DATASET = 'cifar10'
  9 | PLACES365_DATASET = 'places365'
 10 | MOMENTSINTIME_DATASET = 'momentsintime'
 11 | 
 12 | class BaseDataset():
 13 |     def __init__(self, name, path, training=True, augment=True):
 14 |         self.name = name
 15 |         self.augment = augment and training
 16 |         self.training = training
 17 |         self.path = path
 18 |         self._data = []
 19 | 
 20 |     def __len__(self):
 21 |         return len(self.data)
 22 | 
 23 |     def __iter__(self):
 24 |         total = len(self)
 25 |         start = 0
 26 | 
 27 |         while start < total:
 28 |             item = self[start]
 29 |             start += 1
 30 |             yield item
 31 | 
 32 |         raise StopIteration
 33 | 
 34 |     def __getitem__(self, index):
 35 |         val = self.data[index]
 36 |         try:
 37 |             # OLD : img = imread(val) if isinstance(val, str) else val
 38 |             img = np.load(val) if isinstance(val, str) else val
 39 | 
 40 |             if self.augment and np.random.binomial(1, 0.5) == 1:
 41 |                 img = img[:, ::-1, :]
 42 | 
 43 |         except:
 44 |             img = None
 45 | 
 46 |         return img
 47 | 
 48 |     def generator(self, batch_size, recursive=False):
 49 |         start = 0
 50 |         total = len(self)
 51 | 
 52 |         while True:
 53 |             while start < total:
 54 |                 end = np.min([start + batch_size, total])
 55 |                 items = []
 56 | 
 57 |                 for ix in range(start, end):
 58 |                     item = self[ix]
 59 |                     if item is not None:
 60 |                         items.append(item)
 61 | 
 62 |                 start = end
 63 |                 yield np.array(items)
 64 | 
 65 |             if recursive:
 66 |                 start = 0
 67 | 
 68 |             else:
 69 |                 raise StopIteration
 70 | 
 71 | 
 72 |     @property
 73 |     def data(self):
 74 |         if len(self._data) == 0:
 75 |             self._data = self.load()
 76 |             np.random.shuffle(self._data)
 77 | 
 78 |         return self._data
 79 | 
 80 |     @abstractmethod
 81 |     def load(self):
 82 |         return []
 83 | 
 84 | 
 85 | class Cifar10Dataset(BaseDataset):
 86 |     def __init__(self, path, training=True, augment=True):
 87 |         super(Cifar10Dataset, self).__init__(CIFAR10_DATASET, path, training, augment)
 88 | 
 89 |     def load(self):
 90 |         data = []
 91 |         if self.training:
 92 |             for i in range(1, 6):
 93 |                 filename = '{}/data_batch_{}'.format(self.path, i)
 94 |                 batch_data = unpickle(filename)
 95 |                 if len(data) > 0:
 96 |                     data = np.vstack((data, batch_data[b'data']))
 97 |                 else:
 98 |                     data = batch_data[b'data']
 99 | 
100 |         else:
101 |             filename = '{}/test_batch'.format(self.path)
102 |             batch_data = unpickle(filename)
103 |             data = batch_data[b'data']
104 | 
105 |         w = 32
106 |         h = 32
107 |         s = w * h
108 |         data = np.array(data)
109 |         data = np.dstack((data[:, :s], data[:, s:2 * s], data[:, 2 * s:]))
110 |         data = data.reshape((-1, w, h, 3))
111 |         return data
112 | 
113 | 
114 | class Places365Dataset(BaseDataset):
115 |     def __init__(self, path, training=True, augment=True):
116 |         super(Places365Dataset, self).__init__(PLACES365_DATASET, path, training, augment)
117 | 
118 |     def load(self):
119 |         if self.training:
120 |             data = np.array(
121 |                 glob.glob(self.path + '/data_256/**/*.jpg', recursive=True))
122 |                 
123 |         else:
124 |             data = np.array(glob.glob(self.path + '/val_256/*.jpg'))
125 | 
126 |         return data
127 | 
128 |         
129 | class MomentsInTimeDataset(BaseDataset):
130 |     def __init__(self, path, training=True, augment=True):
131 |         super(MomentsInTimeDataset, self).__init__(MOMENTSINTIME_DATASET, path, training, augment)
132 | 
133 |     def load(self):
134 |         if self.training:
135 |             #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/train_moments/*"))
136 |             data = np.array(glob.glob("." + self.path + "/train_moments/*"))
137 |         else:
138 |             #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/dev_moments/*"))
139 |             data = np.array(glob.glob("." + self.path + "/dev_moments/*"))
140 | 
141 |         return data
142 | 


--------------------------------------------------------------------------------
/Colorizing-with-GANs/img/cgan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/cgan.png


--------------------------------------------------------------------------------
/Colorizing-with-GANs/img/con_gan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/con_gan.png


--------------------------------------------------------------------------------
/Colorizing-with-GANs/img/discriminator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/discriminator.png


--------------------------------------------------------------------------------
/Colorizing-with-GANs/img/gan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/gan.png


--------------------------------------------------------------------------------
/Colorizing-with-GANs/img/gan_new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/gan_new.png


--------------------------------------------------------------------------------
/Colorizing-with-GANs/img/places365.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/places365.jpg


--------------------------------------------------------------------------------
/Colorizing-with-GANs/img/places365.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/places365.png


--------------------------------------------------------------------------------
/Colorizing-with-GANs/img/unet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/unet.png


--------------------------------------------------------------------------------
/Colorizing-with-GANs/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | from options import ModelOptions
 6 | from models import Cifar10Model, Places365Model, MomentsInTimeModel
 7 | from dataset import CIFAR10_DATASET, PLACES365_DATASET, MOMENTSINTIME_DATASET
 8 | 
 9 | 
10 | def main(options):
11 | 
12 |     # reset tensorflow graph
13 |     tf.reset_default_graph()
14 | 
15 |     # initialize random seed
16 |     tf.set_random_seed(options.seed)
17 |     np.random.seed(options.seed)
18 |     random.seed(options.seed)
19 | 
20 |     # create a session environment
21 |     with tf.Session() as sess:
22 |         
23 |         if options.dataset == CIFAR10_DATASET:
24 |             model = Cifar10Model(sess, options)
25 | 
26 |         elif options.dataset == PLACES365_DATASET:
27 |             model = Places365Model(sess, options)
28 |         
29 |         elif options.dataset == MOMENTSINTIME_DATASET:
30 |             model = MomentsInTimeModel(sess, options)
31 |         
32 |         else:
33 |             model = MomentsInTimeModel(sess, options)
34 | 
35 |         if not os.path.exists(options.checkpoints_path):
36 |             os.makedirs(options.checkpoints_path)
37 | 
38 |         if options.log:
39 |             open(model.train_log_file, 'w').close()
40 |             open(model.test_log_file, 'w').close()
41 | 
42 |         # build the model and initialize
43 |         model.build()
44 |         sess.run(tf.global_variables_initializer())
45 | 
46 | 
47 |         # load model only after global variables initialization
48 |         model.load()
49 | 
50 | 
51 |         if options.mode == 0:
52 |             args = vars(options)
53 |             print('\n------------ Options -------------')
54 |             with open(os.path.join(options.checkpoints_path, 'options.dat'), 'w') as f:
55 |                 for k, v in sorted(args.items()):
56 |                     print('%s: %s' % (str(k), str(v)))
57 |                     f.write('%s: %s\n' % (str(k), str(v)))
58 |             print('-------------- End ----------------\n')
59 |             
60 |             model.train()
61 | 
62 |         elif options.mode == 1:
63 |             model.evaluate()
64 |             while True:
65 |                 model.sample()
66 |         
67 |         else:
68 |             model.turing_test()
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     main(ModelOptions().parse())
73 | 


--------------------------------------------------------------------------------
/Colorizing-with-GANs/networks.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from ops import conv2d, conv2d_transpose, pixelwise_accuracy
  4 | 
  5 | 
  6 | class Discriminator(object):
  7 |     def __init__(self, name, kernels):
  8 |         self.name = name
  9 |         self.kernels = kernels
 10 |         self.var_list = []
 11 | 
 12 |     def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None):
 13 |         output = inputs
 14 |         with tf.variable_scope(self.name, reuse=reuse_variables):
 15 |             for index, kernel in enumerate(self.kernels):
 16 | 
 17 |                 # not use batch-norm in the first layer
 18 |                 bnorm = False if index == 0 else True
 19 |                 name = 'conv' + str(index)
 20 |                 output = conv2d(
 21 |                     inputs=output,
 22 |                     name=name,
 23 |                     kernel_size=kernel_size,
 24 |                     filters=kernel[0],
 25 |                     strides=kernel[1],
 26 |                     bnorm=bnorm,
 27 |                     activation=tf.nn.leaky_relu,
 28 |                     seed=seed
 29 |                 )
 30 | 
 31 |                 if kernel[2] > 0:
 32 |                     output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
 33 | 
 34 |             output = conv2d(
 35 |                 inputs=output,
 36 |                 name='conv_last',
 37 |                 filters=1,
 38 |                 kernel_size=4,                  # last layer kernel size = 4
 39 |                 strides=1,                      # last layer stride = 1
 40 |                 bnorm=False,                    # do not use batch-norm for the last layer
 41 |                 seed=seed
 42 |             )
 43 | 
 44 |             self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name)
 45 | 
 46 |             return output
 47 | 
 48 | 
 49 | class Generator(object):
 50 |     def __init__(self, name, encoder_kernels, decoder_kernels, output_channels=3):
 51 |         self.name = name
 52 |         self.encoder_kernels = encoder_kernels
 53 |         self.decoder_kernels = decoder_kernels
 54 |         self.output_channels = output_channels
 55 |         self.var_list = []
 56 | 
 57 |     def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None):
 58 |         output = inputs
 59 | 
 60 |         with tf.variable_scope(self.name, reuse=reuse_variables):
 61 | 
 62 |             layers = []
 63 | 
 64 |             # encoder branch
 65 |             for index, kernel in enumerate(self.encoder_kernels):
 66 | 
 67 |                 name = 'conv' + str(index)
 68 |                 output = conv2d(
 69 |                     inputs=output,
 70 |                     name=name,
 71 |                     kernel_size=kernel_size,
 72 |                     filters=kernel[0],
 73 |                     strides=kernel[1],
 74 |                     activation=tf.nn.leaky_relu,
 75 |                     seed=seed
 76 |                 )
 77 | 
 78 |                 # save contracting path layers to be used for skip connections
 79 |                 layers.append(output)
 80 | 
 81 |                 if kernel[2] > 0:
 82 |                     output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
 83 | 
 84 |             # decoder branch
 85 |             for index, kernel in enumerate(self.decoder_kernels):
 86 | 
 87 |                 name = 'deconv' + str(index)
 88 |                 output = conv2d_transpose(
 89 |                     inputs=output,
 90 |                     name=name,
 91 |                     kernel_size=kernel_size,
 92 |                     filters=kernel[0],
 93 |                     strides=kernel[1],
 94 |                     activation=tf.nn.relu,
 95 |                     seed=seed
 96 |                 )
 97 | 
 98 |                 if kernel[2] > 0:
 99 |                     output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
100 | 
101 |                 # concat the layer from the contracting path with the output of the current layer
102 |                 # concat only the channels (axis=3)
103 |                 output = tf.concat([layers[len(layers) - index - 2], output], axis=3)
104 | 
105 |             output = conv2d(
106 |                 inputs=output,
107 |                 name='conv_last',
108 |                 filters=self.output_channels,   # number of output chanels
109 |                 kernel_size=1,                  # last layer kernel size = 1
110 |                 strides=1,                      # last layer stride = 1
111 |                 bnorm=False,                    # do not use batch-norm for the last layer
112 |                 activation=tf.nn.tanh,          # tanh activation function for the output
113 |                 seed=seed
114 |             )
115 | 
116 |             self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name)
117 | 
118 |             return output
119 | 


--------------------------------------------------------------------------------
/Colorizing-with-GANs/ops.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | COLORSPACE_RGB = 'RGB'
  5 | COLORSPACE_LAB = 'LAB'
  6 | tf.nn.softmax_cross_entropy_with_logits_v2
  7 | 
  8 | def conv2d(inputs, filters, name, kernel_size=4, strides=2, bnorm=True, activation=None, seed=None):
  9 |     """
 10 |     Creates a conv2D block
 11 |     """
 12 |     initializer=tf.variance_scaling_initializer(seed=seed)
 13 |     res = tf.layers.conv2d(
 14 |         name=name,
 15 |         inputs=inputs,
 16 |         filters=filters,
 17 |         kernel_size=kernel_size,
 18 |         strides=strides,
 19 |         padding="same",
 20 |         kernel_initializer=initializer)
 21 | 
 22 |     if bnorm:
 23 |         res = tf.layers.batch_normalization(inputs=res, name='bn_' + name, training=True)
 24 | 
 25 |     # activation after batch-norm
 26 |     if activation is not None:
 27 |         res = activation(res)
 28 | 
 29 |     return res
 30 | 
 31 | 
 32 | def conv2d_transpose(inputs, filters, name, kernel_size=4, strides=2, bnorm=True, activation=None, seed=None):
 33 |     """
 34 |     Creates a conv2D-transpose block
 35 |     """
 36 |     initializer=tf.variance_scaling_initializer(seed=seed)
 37 |     res = tf.layers.conv2d_transpose(
 38 |         name=name,
 39 |         inputs=inputs,
 40 |         filters=filters,
 41 |         kernel_size=kernel_size,
 42 |         strides=strides,
 43 |         padding="same",
 44 |         kernel_initializer=initializer)
 45 | 
 46 |     if bnorm:
 47 |         res = tf.layers.batch_normalization(inputs=res, name='bn_' + name, training=True)
 48 | 
 49 |     # activation after batch-norm
 50 |     if activation is not None:
 51 |         res = activation(res)
 52 | 
 53 |     return res
 54 | 
 55 | 
 56 | def pixelwise_accuracy(img_real, img_fake, colorspace, thresh):
 57 |     """
 58 |     Measures the accuracy of the colorization process by comparing pixels
 59 |     """
 60 |     img_real = postprocess(img_real, colorspace, COLORSPACE_LAB)
 61 |     img_fake = postprocess(img_fake, colorspace, COLORSPACE_LAB)
 62 | 
 63 |     diffL = tf.abs(tf.round(img_real[..., 0]) - tf.round(img_fake[..., 0]))
 64 |     diffA = tf.abs(tf.round(img_real[..., 1]) - tf.round(img_fake[..., 1]))
 65 |     diffB = tf.abs(tf.round(img_real[..., 2]) - tf.round(img_fake[..., 2]))
 66 | 
 67 |     # within %thresh of the original
 68 |     predL = tf.cast(tf.less_equal(diffL, 1 * thresh), tf.float64)        # L: [0, 100]
 69 |     predA = tf.cast(tf.less_equal(diffA, 2.2 * thresh), tf.float64)      # A: [-110, 110]
 70 |     predB = tf.cast(tf.less_equal(diffB, 2.2 * thresh), tf.float64)      # B: [-110, 110]
 71 | 
 72 |     # all three channels are within the threshold
 73 |     pred = predL * predA * predB
 74 | 
 75 |     return tf.reduce_mean(pred)
 76 | 
 77 | 
 78 | def preprocess(img, colorspace_in, colorspace_out):
 79 |     if colorspace_out.upper() == COLORSPACE_RGB:
 80 |         if colorspace_in == COLORSPACE_LAB:
 81 |             img = lab_to_rgb(img)
 82 | 
 83 |         # [0, 1] => [-1, 1]
 84 |         img = (img / 255.0) * 2 - 1
 85 | 
 86 |     elif colorspace_out.upper() == COLORSPACE_LAB:
 87 |         if colorspace_in == COLORSPACE_RGB:
 88 |             img = rgb_to_lab(img / 255.0)
 89 | 
 90 |         L_chan, a_chan, b_chan = tf.unstack(img, axis=3)
 91 | 
 92 |         # L: [0, 100] => [-1, 1]
 93 |         # A, B: [-110, 110] => [-1, 1]
 94 |         img = tf.stack([L_chan / 50 - 1, a_chan / 110, b_chan / 110], axis=3)
 95 | 
 96 |     return img
 97 | 
 98 | 
 99 | def postprocess(img, colorspace_in, colorspace_out):
100 |     if colorspace_in.upper() == COLORSPACE_RGB:
101 |         # [-1, 1] => [0, 1]
102 |         img = (img + 1) / 2
103 | 
104 |         if colorspace_out == COLORSPACE_LAB:
105 |             img = rgb_to_lab(img)
106 | 
107 |     elif colorspace_in.upper() == COLORSPACE_LAB:
108 |         L_chan, a_chan, b_chan = tf.unstack(img, axis=3)
109 | 
110 |         # L: [-1, 1] => [0, 100]
111 |         # A, B: [-1, 1] => [-110, 110]
112 |         img = tf.stack([(L_chan + 1) / 2 * 100, a_chan * 110, b_chan * 110], axis=3)
113 | 
114 |         if colorspace_out == COLORSPACE_RGB:
115 |             img = lab_to_rgb(img)
116 | 
117 |     return img
118 | 
119 | 
120 | def rgb_to_lab(srgb):
121 |     # based on https://github.com/torch/image/blob/9f65c30167b2048ecbe8b7befdc6b2d6d12baee9/generic/image.c
122 |     with tf.name_scope("rgb_to_lab"):
123 |         srgb_pixels = tf.reshape(srgb, [-1, 3])
124 | 
125 |         with tf.name_scope("srgb_to_xyz"):
126 |             linear_mask = tf.cast(srgb_pixels <= 0.04045, dtype=tf.float32)
127 |             exponential_mask = tf.cast(srgb_pixels > 0.04045, dtype=tf.float32)
128 |             rgb_pixels = (srgb_pixels / 12.92 * linear_mask) + (((srgb_pixels + 0.055) / 1.055) ** 2.4) * exponential_mask
129 |             rgb_to_xyz = tf.constant([
130 |                 #    X        Y          Z
131 |                 [0.412453, 0.212671, 0.019334],  # R
132 |                 [0.357580, 0.715160, 0.119193],  # G
133 |                 [0.180423, 0.072169, 0.950227],  # B
134 |             ])
135 |             xyz_pixels = tf.matmul(rgb_pixels, rgb_to_xyz)
136 | 
137 |         # https://en.wikipedia.org/wiki/Lab_color_space#CIELAB-CIEXYZ_conversions
138 |         with tf.name_scope("xyz_to_cielab"):
139 | 
140 |             # normalize for D65 white point
141 |             xyz_normalized_pixels = tf.multiply(xyz_pixels, [1 / 0.950456, 1.0, 1 / 1.088754])
142 | 
143 |             epsilon = 6 / 29
144 |             linear_mask = tf.cast(xyz_normalized_pixels <= (epsilon**3), dtype=tf.float32)
145 |             exponential_mask = tf.cast(xyz_normalized_pixels > (epsilon**3), dtype=tf.float32)
146 |             fxfyfz_pixels = (xyz_normalized_pixels / (3 * epsilon**2) + 4 / 29) * linear_mask + (xyz_normalized_pixels ** (1 / 3)) * exponential_mask
147 | 
148 |             # convert to lab
149 |             fxfyfz_to_lab = tf.constant([
150 |                 #  l       a       b
151 |                 [0.0, 500.0, 0.0],  # fx
152 |                 [116.0, -500.0, 200.0],  # fy
153 |                 [0.0, 0.0, -200.0],  # fz
154 |             ])
155 |             lab_pixels = tf.matmul(fxfyfz_pixels, fxfyfz_to_lab) + tf.constant([-16.0, 0.0, 0.0])
156 | 
157 |         return tf.reshape(lab_pixels, tf.shape(srgb))
158 | 
159 | 
160 | def lab_to_rgb(lab):
161 |     with tf.name_scope("lab_to_rgb"):
162 |         lab_pixels = tf.reshape(lab, [-1, 3])
163 | 
164 |         # https://en.wikipedia.org/wiki/Lab_color_space#CIELAB-CIEXYZ_conversions
165 |         with tf.name_scope("cielab_to_xyz"):
166 |             # convert to fxfyfz
167 |             lab_to_fxfyfz = tf.constant([
168 |                 #   fx      fy        fz
169 |                 [1 / 116.0, 1 / 116.0, 1 / 116.0],  # l
170 |                 [1 / 500.0, 0.0, 0.0],  # a
171 |                 [0.0, 0.0, -1 / 200.0],  # b
172 |             ])
173 |             fxfyfz_pixels = tf.matmul(lab_pixels + tf.constant([16.0, 0.0, 0.0]), lab_to_fxfyfz)
174 | 
175 |             # convert to xyz
176 |             epsilon = 6 / 29
177 |             linear_mask = tf.cast(fxfyfz_pixels <= epsilon, dtype=tf.float32)
178 |             exponential_mask = tf.cast(fxfyfz_pixels > epsilon, dtype=tf.float32)
179 |             xyz_pixels = (3 * epsilon**2 * (fxfyfz_pixels - 4 / 29)) * linear_mask + (fxfyfz_pixels ** 3) * exponential_mask
180 | 
181 |             # denormalize for D65 white point
182 |             xyz_pixels = tf.multiply(xyz_pixels, [0.950456, 1.0, 1.088754])
183 | 
184 |         with tf.name_scope("xyz_to_srgb"):
185 |             xyz_to_rgb = tf.constant([
186 |                 #     r           g          b
187 |                 [3.2404542, -0.9692660, 0.0556434],  # x
188 |                 [-1.5371385, 1.8760108, -0.2040259],  # y
189 |                 [-0.4985314, 0.0415560, 1.0572252],  # z
190 |             ])
191 |             rgb_pixels = tf.matmul(xyz_pixels, xyz_to_rgb)
192 |             # avoid a slightly negative number messing up the conversion
193 |             rgb_pixels = tf.clip_by_value(rgb_pixels, 0.0, 1.0)
194 |             linear_mask = tf.cast(rgb_pixels <= 0.0031308, dtype=tf.float32)
195 |             exponential_mask = tf.cast(rgb_pixels > 0.0031308, dtype=tf.float32)
196 |             srgb_pixels = (rgb_pixels * 12.92 * linear_mask) + ((rgb_pixels ** (1 / 2.4) * 1.055) - 0.055) * exponential_mask
197 | 
198 |         return tf.reshape(srgb_pixels, tf.shape(lab))
199 | 


--------------------------------------------------------------------------------
/Colorizing-with-GANs/options.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import random
 4 | import argparse
 5 | 
 6 | 
 7 | def str2bool(v):
 8 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
 9 |         return True
10 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
11 |         return False
12 |     else:
13 |         raise argparse.ArgumentTypeError('Boolean value expected.')
14 | 
15 | 
16 | class ModelOptions:
17 |     def __init__(self):
18 |         parser = argparse.ArgumentParser(description='Colorization with GANs')
19 |         parser.add_argument('--seed', type=int, default=0, metavar='S', help='random seed (default: 0)')
20 |         parser.add_argument('--name', type=str, default='CGAN', help='arbitrary model name (default: CGAN)')
21 |         parser.add_argument('--mode', default=0, help='run mode [0: train, 1: evaluate, 2: test] (default: 0)')
22 |         parser.add_argument('--dataset', type=str, default='momentsintime', help='the name of dataset [places365, cifar10] (default: momentsintime)')
23 |         parser.add_argument('--dataset-path', type=str, default='./data', help='dataset path (default: ./data)')
24 |         parser.add_argument('--checkpoints-path', type=str, default='./checkpoints', help='models are saved here (default: ./checkpoints)')
25 |         parser.add_argument('--batch-size', type=int, default=16, metavar='N', help='input batch size for training (default: 16)')
26 |         parser.add_argument('--color-space', type=str, default='lab', help='model color space [lab, rgb] (default: lab)')
27 |         parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 30)')
28 |         parser.add_argument('--lr', type=float, default=5e-3, metavar='LR', help='learning rate (default: 3e-4)')
29 |         parser.add_argument('--lr-decay-rate', type=float, default=0.1, help='learning rate exponentially decay rate (default: 0.1)')
30 |         parser.add_argument('--lr-decay-steps', type=float, default=25e2, help='learning rate exponentially decay steps (default: 5e5)')
31 |         parser.add_argument('--beta1', type=float, default=0, help='momentum term of adam optimizer (default: 0)')
32 |         parser.add_argument("--l1-weight", type=float, default=100.0, help="weight on L1 term for generator gradient (default: 100.0)")
33 |         parser.add_argument('--augment', type=str2bool, default=True, help='True for augmentation (default: True)')
34 |         parser.add_argument('--label-smoothing', type=str2bool, default=False, help='True for one-sided label smoothing (default: False)')
35 |         parser.add_argument('--acc-thresh', type=float, default=2.0, help="accuracy threshold (default: 2.0)")
36 |         parser.add_argument('--kernel-size', type=int, default=4, help="default kernel size (default: 4)")
37 |         parser.add_argument('--save', type=str2bool, default=True, help='True for saving (default: True)')
38 |         parser.add_argument('--save-interval', type=int, default=100, help='how many batches to wait before saving model (default: 1000)')
39 |         parser.add_argument('--sample', type=str2bool, default=True, help='True for sampling (default: True)')
40 |         parser.add_argument('--sample-size', type=int, default=8, help='number of images to sample (default: 8)')
41 |         parser.add_argument('--sample-interval', type=int, default=100, help='how many batches to wait before sampling (default: 1000)')
42 |         parser.add_argument('--validate', type=str2bool, default=True, help='True for validation (default: True)')
43 |         parser.add_argument('--validate-interval', type=int, default=0, help='how many batches to wait before validating (default: 0)')
44 |         parser.add_argument('--log', type=str2bool, default=True, help='True for logging (default: True)')
45 |         parser.add_argument('--log-interval', type=int, default=10, help='how many iterations to wait before logging training status (default: 10)')
46 |         parser.add_argument('--visualize', type=str2bool, default=False, help='True for accuracy visualization (default: False)')
47 |         parser.add_argument('--visualize-window', type=int, default=100, help='the exponentially moving average window width (default: 100)')
48 |         parser.add_argument('--test-size', type=int, default=100, metavar='N', help='number of Turing tests (default: 100)')
49 |         parser.add_argument('--test-delay', type=int, default=0, metavar='N', help='number of seconds to wait when doing Turing test, 0 for unlimited (default: 0)')
50 |         parser.add_argument('--gpu-ids', type=str, default='0', help='gpu ids: e.g. 0  0,1,2, 0,2. use -1 for CPU')
51 |         # to recolorize a video clip
52 |         parser.add_argument('--filename', type=str, default='*', help='Filename of input BW video')
53 |         parser.add_argument('--input_dir', type=str, default='../data/examples/converted', help='Directory of input files')
54 |         parser.add_argument('--output_dir', type=str, default='../data/examples/recolorized', help='Directory of output files')
55 |         
56 |         self._parser = parser
57 | 
58 |     def parse(self):
59 |         opt = self._parser.parse_args()
60 |         os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_ids
61 | 
62 |         opt.color_space = opt.color_space.upper()
63 | 
64 |         if opt.seed == 0:
65 |             opt.seed = random.randint(0, 2**31 - 1)
66 | 
67 |         if (opt.dataset_path == './data') or (opt.dataset_path == './dataset'):
68 |             opt.dataset_path += ('/' + opt.dataset)
69 | 
70 |         if opt.checkpoints_path == './checkpoints':
71 |             opt.checkpoints_path += ('/' + opt.dataset)
72 | 
73 |         return opt
74 | 


--------------------------------------------------------------------------------
/Colorizing-with-GANs/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy ~= 1.14.3
2 | scipy ~= 1.0.1
3 | future ~= 0.16.0
4 | matplotlib ~= 2.2.2
5 | pillow ~= 5.0.0
6 | 


--------------------------------------------------------------------------------
/Colorizing-with-GANs/setup.cfg:
--------------------------------------------------------------------------------
1 | [pycodestyle]
2 | ignore = E303
3 | max-line-length = 200


--------------------------------------------------------------------------------
/Colorizing-with-GANs/test-eval.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 | 
3 | options = ModelOptions().parse()
4 | options.mode = 1
5 | main(options)
6 | 


--------------------------------------------------------------------------------
/Colorizing-with-GANs/test-turing.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 | 
3 | options = ModelOptions().parse()
4 | options.mode = 2
5 | main(options)
6 | 


--------------------------------------------------------------------------------
/Colorizing-with-GANs/train.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 | 
3 | options = ModelOptions().parse()
4 | options.mode = 0
5 | main(options)
6 | 


--------------------------------------------------------------------------------
/Colorizing-with-GANs/utils.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import numpy as np
  3 | from PIL import Image
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | 
  7 | def stitch_images(grayscale, original, pred):
  8 |     gap = 5
  9 |     width, height = original[0][:, :, 0].shape
 10 |     img_per_row = 2 if width > 200 else 4
 11 |     img = Image.new('RGB', (width * img_per_row * 3 + gap * (img_per_row - 1), height * int(len(original) / img_per_row)))
 12 | 
 13 |     grayscale = np.array(grayscale).squeeze()
 14 |     original = np.array(original)
 15 |     pred = np.array(pred)
 16 | 
 17 |     for ix in range(len(original)):
 18 |         xoffset = int(ix % img_per_row) * width * 3 + int(ix % img_per_row) * gap
 19 |         yoffset = int(ix / img_per_row) * height
 20 |         im1 = Image.fromarray(grayscale[ix])
 21 |         im2 = Image.fromarray(original[ix])
 22 |         im3 = Image.fromarray((pred[ix] * 255).astype(np.uint8))
 23 |         img.paste(im1, (xoffset, yoffset))
 24 |         img.paste(im2, (xoffset + width, yoffset))
 25 |         img.paste(im3, (xoffset + width + width, yoffset))
 26 | 
 27 |     return img
 28 | 
 29 | 
 30 | def unpickle(file):
 31 |     with open(file, 'rb') as fo:
 32 |         dict = pickle.load(fo, encoding='bytes')
 33 |     return dict
 34 | 
 35 | 
 36 | def moving_average(data, window_width):
 37 |     cumsum_vec = np.cumsum(np.insert(data, 0, 0))
 38 |     ma_vec = (cumsum_vec[window_width:] - cumsum_vec[:-window_width]) / window_width
 39 |     return ma_vec
 40 | 
 41 | 
 42 | def imshow(img, title=''):
 43 |     fig = plt.gcf()
 44 |     fig.canvas.set_window_title(title)
 45 |     plt.axis('off')
 46 |     plt.imshow(img, interpolation='none')
 47 |     plt.show()
 48 | 
 49 | 
 50 | def turing_test(real_img, fake_img, delay=0):
 51 |     height, width, _ = real_img.shape
 52 |     imgs = np.array([real_img, (fake_img * 255).astype(np.uint8)])
 53 |     real_index = np.random.binomial(1, 0.5)
 54 |     fake_index = (real_index + 1) % 2
 55 | 
 56 |     img = Image.new('RGB', (2 + width * 2, height))
 57 |     img.paste(Image.fromarray(imgs[real_index]), (0, 0))
 58 |     img.paste(Image.fromarray(imgs[fake_index]), (2 + width, 0))
 59 | 
 60 |     img.success = 0
 61 | 
 62 |     def onclick(event):
 63 |         if event.xdata is not None:
 64 |             if event.x < width and real_index == 0:
 65 |                 img.success = 1
 66 | 
 67 |             elif event.x > width and real_index == 1:
 68 |                 img.success = 1
 69 | 
 70 |         plt.gcf().canvas.stop_event_loop()
 71 | 
 72 |     plt.ion()
 73 |     plt.gcf().canvas.mpl_connect('button_press_event', onclick)
 74 |     plt.title('click on the real image')
 75 |     plt.axis('off')
 76 |     plt.imshow(img, interpolation='none')
 77 |     plt.show()
 78 |     plt.draw()
 79 |     plt.gcf().canvas.start_event_loop(delay)
 80 | 
 81 |     return img.success
 82 | 
 83 | 
 84 | def visualize(train_log_file, test_log_file, window_width, title=''):
 85 |     train_data = np.loadtxt(train_log_file)
 86 |     test_data = np.loadtxt(test_log_file)
 87 | 
 88 |     if len(train_data.shape) < 2:
 89 |         return
 90 | 
 91 |     if len(train_data) < window_width:
 92 |         window_width = len(train_data) - 1
 93 | 
 94 |     fig = plt.gcf()
 95 |     fig.canvas.set_window_title(title)
 96 | 
 97 |     plt.ion()
 98 |     plt.subplot('121')
 99 |     plt.cla()
100 |     if len(train_data) > 1:
101 |         plt.plot(moving_average(train_data[:, 8], window_width))
102 |     plt.title('train')
103 | 
104 |     plt.subplot('122')
105 |     plt.cla()
106 |     if len(test_data) > 1:
107 |         plt.plot(test_data[:, 8])
108 |     plt.title('test')
109 | 
110 |     plt.show()
111 |     plt.draw()
112 |     plt.pause(.01)
113 | 


--------------------------------------------------------------------------------
/Colorizing-with-GANs/video_colorize_GAN.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import argparse
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from PIL import Image
  8 | from skimage import img_as_ubyte, img_as_float
  9 | import skimage.color as color
 10 | import scipy.ndimage.interpolation as sni
 11 | from ops import postprocess
 12 | from ops import COLORSPACE_RGB, COLORSPACE_LAB
 13 | 
 14 | import tensorflow as tf
 15 | from options import ModelOptions
 16 | from models import MomentsInTimeModel
 17 | 
 18 |     
 19 | def image_colorization_propagation(model, img_bw_in, img_rgb_prev, options):
 20 | 
 21 |     # colorize the image based on the previous one
 22 |     feed_dic = {model.input_rgb: np.expand_dims(img_bw_in, axis=0), model.input_rgb_prev: np.expand_dims(img_rgb_prev, axis=0)}
 23 |     fake_image, _ = model.sess.run([model.sampler, model.input_gray], feed_dict=feed_dic)
 24 |     fake_image = postprocess(tf.convert_to_tensor(fake_image), colorspace_in=options.color_space, colorspace_out=COLORSPACE_RGB)
 25 |     
 26 |     # evalute the tensor
 27 |     img_rgb_out = fake_image.eval()
 28 |     img_rgb_out = (img_rgb_out.squeeze(0) * 255).astype(np.uint8)
 29 | 
 30 |     return img_rgb_out
 31 | 
 32 | def bw2color(options, inputname, inputpath, outputpath):
 33 |     if inputname.endswith(".mp4"):
 34 |         # size of the input frames
 35 |         size = 256
 36 | 
 37 |         # check that the video exists
 38 |         path_to_video = os.path.join(inputpath, inputname)
 39 |         if not os.path.exists(path_to_video):
 40 |             print("The file :", path_to_video, "does not exist !")
 41 |         
 42 |         # store informations about the original video
 43 |         cap = cv2.VideoCapture(os.path.join(path_to_video))
 44 |         # original dimensions
 45 |         width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 46 |         totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 47 |         fourcc = cv2.VideoWriter_fourcc(*'mp4v');
 48 |         # parameters of output file
 49 |             # dimensions of the output image
 50 |         new_width, new_height = size, size
 51 |             # number of frames
 52 |         fps = 30.0
 53 |     
 54 |         # recolorized output video
 55 |         color_out = cv2.VideoWriter(
 56 |             os.path.join(outputpath, 'color_' + inputname),
 57 |             fourcc,
 58 |             fps,
 59 |             (new_width, new_height),
 60 |             isColor=True
 61 |         )
 62 |         
 63 |         # TO CHANGE to DL colorization of 1st frame
 64 |         # pick the first frame from the original video clip as the first reference
 65 |         cap_temp = cv2.VideoCapture(os.path.join(inputpath, "color" + inputname[2:]))
 66 |         
 67 |         ret_temp, frame_prev = cap_temp.read()
 68 |         # convert BGR to RGB convention
 69 |         frame_prev = frame_prev[:,:,::-1]
 70 |         frame_prev = cv2.resize(frame_prev, (size, size)) 
 71 |         
 72 |         # count the number of recolorized frames
 73 |         frames_processed = 0
 74 | 
 75 |         with tf.Session() as sess:
 76 | 
 77 |             model = MomentsInTimeModel(sess, options)
 78 | 
 79 |             # build the model and initialize
 80 |             model.build()
 81 |             sess.run(tf.global_variables_initializer())
 82 | 
 83 |             # load model only after global variables initialization
 84 |             model.load()
 85 | 
 86 |             while(cap.isOpened()):
 87 |                 ret, frame_in = cap.read()
 88 |                                 
 89 |                 # check if we are not at the end of the video
 90 |                 if ret==True:                          
 91 |                     # convert BGR to RGB convention
 92 |                     frame_in = frame_in[:,:,::-1]
 93 |                     # resize the frame to match the input size of the GAN
 94 |                     frame_in = cv2.resize(frame_in, (size, size))
 95 | 
 96 |                     # colorize the BW frame
 97 |                     frame_out = image_colorization_propagation(model, frame_in, frame_prev, options)
 98 |                     
 99 |                     #generate sample
100 |                     get_image = False
101 |                     if get_image:                    
102 |                         img = Image.fromarray(frame_out)
103 | 
104 |                         if not os.path.exists(model.samples_dir):
105 |                             os.makedirs(model.samples_dir)
106 | 
107 |                         sample = model.options.dataset + "_" + inputname + "_" + str(frames_processed).zfill(5) + ".png"
108 |                         img.save(os.path.join(model.samples_dir, sample))
109 | 
110 |                     # save the recolorized frame
111 |                     frame_prev = frame_out
112 |                     # convert RGB to BGR convention
113 |                     frame_out = frame_out[:,:,::-1]
114 |                     # write the color frame
115 |                     color_out.write(frame_out)
116 |                     
117 |                     # print progress
118 |                     frames_processed += 1
119 |                     print("Processed {}/{} frames ({}%)".format(frames_processed, totalFrames, frames_processed * 100 //totalFrames), end="\r")
120 |                     if cv2.waitKey(1) & 0xFF == ord('q'):
121 |                         break
122 |                 # end of the video
123 |                 else:
124 |                     break
125 | 
126 |         # release everything if job is finished
127 |         cap.release()
128 |         color_out.release()
129 | 
130 | def main():
131 | 
132 |     # reset tensorflow graph
133 |     tf.reset_default_graph()
134 | 
135 |     options = ModelOptions().parse()
136 | 
137 |     if options.filename == '*':
138 |         for filename in os.listdir(options.input_dir):
139 |             bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
140 |     else:
141 |         bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
142 |         
143 |     # cleanup
144 |     cv2.destroyAllWindows()
145 | 
146 |     return 0
147 | 
148 | if __name__ == '__main__':
149 |     main()
150 | 


--------------------------------------------------------------------------------
/Colorizing-with-GANs/video_colorize_GAN_1st-truth-ref.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import argparse
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from PIL import Image
  8 | from skimage import img_as_ubyte, img_as_float
  9 | import skimage.color as color
 10 | import scipy.ndimage.interpolation as sni
 11 | from ops import postprocess
 12 | from ops import COLORSPACE_RGB, COLORSPACE_LAB
 13 | 
 14 | import tensorflow as tf
 15 | from options import ModelOptions
 16 | from models import MomentsInTimeModel
 17 | 
 18 |     
 19 | def image_colorization_propagation(model, img_bw_in, img_rgb_prev, options):
 20 | 
 21 |     # colorize the image based on the previous one
 22 |     feed_dic = {model.input_rgb: np.expand_dims(img_bw_in, axis=0), model.input_rgb_prev: np.expand_dims(img_rgb_prev, axis=0)}
 23 |     fake_image, _ = model.sess.run([model.sampler, model.input_gray], feed_dict=feed_dic)
 24 |     fake_image = postprocess(tf.convert_to_tensor(fake_image), colorspace_in=options.color_space, colorspace_out=COLORSPACE_RGB)
 25 |     
 26 |     # evalute the tensor
 27 |     img_rgb_out = fake_image.eval()
 28 |     img_rgb_out = (img_rgb_out.squeeze(0) * 255).astype(np.uint8)
 29 | 
 30 |     return img_rgb_out
 31 | 
 32 | def bw2color(options, inputname, inputpath, outputpath):
 33 |     if inputname.endswith(".mp4"):
 34 |         # size of the input frames
 35 |         size = 256
 36 | 
 37 |         # check that the video exists
 38 |         path_to_video = os.path.join(inputpath, inputname)
 39 |         if not os.path.exists(path_to_video):
 40 |             print("The file :", path_to_video, "does not exist !")
 41 |         
 42 |         # store informations about the original video
 43 |         cap = cv2.VideoCapture(os.path.join(path_to_video))
 44 |         # original dimensions
 45 |         width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 46 |         totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 47 |         fourcc = cv2.VideoWriter_fourcc(*'mp4v');
 48 |         # parameters of output file
 49 |             # dimensions of the output image
 50 |         new_width, new_height = size, size
 51 |             # number of frames
 52 |         fps = 30.0
 53 |     
 54 |         # recolorized output video
 55 |         color_out = cv2.VideoWriter(
 56 |             os.path.join(outputpath, 'color_' + inputname),
 57 |             fourcc,
 58 |             fps,
 59 |             (new_width, new_height),
 60 |             isColor=True
 61 |         )
 62 |         
 63 |         # TO CHANGE to DL colorization of 1st frame
 64 |         # pick the first frame from the original video clip as the first reference
 65 |         cap_temp = cv2.VideoCapture(os.path.join(inputpath, "color" + inputname[2:]))
 66 |         
 67 |         ret_temp, frame_prev = cap_temp.read()
 68 |         # convert BGR to RGB convention
 69 |         frame_prev = frame_prev[:,:,::-1]
 70 |         frame_prev = cv2.resize(frame_prev, (size, size)) 
 71 |         
 72 |         # count the number of recolorized frames
 73 |         frames_processed = 0
 74 | 
 75 |         with tf.Session() as sess:
 76 | 
 77 |             model = MomentsInTimeModel(sess, options)
 78 | 
 79 |             # build the model and initialize
 80 |             model.build()
 81 |             sess.run(tf.global_variables_initializer())
 82 | 
 83 |             # load model only after global variables initialization
 84 |             model.load()
 85 | 
 86 |             while(cap.isOpened()):
 87 |                 ret, frame_in = cap.read()
 88 |                                 
 89 |                 # check if we are not at the end of the video
 90 |                 if ret==True:      
 91 |                     # convert BGR to RGB convention
 92 |                     frame_in = frame_in[:,:,::-1]
 93 |                     # resize the frame to match the input size of the GAN
 94 |                     frame_in = cv2.resize(frame_in, (size, size))
 95 | 
 96 |                     # colorize the BW frame
 97 |                     frame_out = image_colorization_propagation(model, frame_in, frame_prev, options)
 98 |                     
 99 |                     #generate sample
100 |                     get_image = False
101 |                     if get_image:                    
102 |                         img = Image.fromarray(frame_out)
103 | 
104 |                         if not os.path.exists(model.samples_dir):
105 |                             os.makedirs(model.samples_dir)
106 | 
107 |                         sample = model.options.dataset + "_" + inputname + "_" + str(frames_processed).zfill(5) + ".png"
108 |                         img.save(os.path.join(model.samples_dir, sample))
109 | 
110 |                     # save the recolorized frame
111 |                     #frame_prev = frame_out
112 |                     # convert RGB to BGR convention
113 |                     frame_out = frame_out[:,:,::-1]
114 |                     # write the color frame
115 |                     color_out.write(frame_out)
116 |                     #break                
117 |                     
118 |                     # print progress
119 |                     frames_processed += 1
120 |                     print("Processed {}/{} frames ({}%)".format(frames_processed, totalFrames, frames_processed * 100 //totalFrames), end="\r")
121 |                     if cv2.waitKey(1) & 0xFF == ord('q'):
122 |                         break
123 |                 # end of the video
124 |                 else:
125 |                     break
126 | 
127 |         # release everything if job is finished
128 |         cap.release()
129 |         color_out.release()
130 | 
131 | def main():
132 | 
133 |     # reset tensorflow graph
134 |     tf.reset_default_graph()
135 | 
136 |     options = ModelOptions().parse()
137 | 
138 |     if options.filename == '*':
139 |         for filename in os.listdir(options.input_dir):
140 |             bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
141 |     else:
142 |         bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
143 |         
144 |     # cleanup
145 |     cv2.destroyAllWindows()
146 | 
147 |     return 0
148 | 
149 | if __name__ == '__main__':
150 |     main()
151 | 


--------------------------------------------------------------------------------
/Colorizing-with-GANs/video_colorize_GAN_truth-ref.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import argparse
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from PIL import Image
  8 | from skimage import img_as_ubyte, img_as_float
  9 | import skimage.color as color
 10 | import scipy.ndimage.interpolation as sni
 11 | from ops import postprocess
 12 | from ops import COLORSPACE_RGB, COLORSPACE_LAB
 13 | 
 14 | import tensorflow as tf
 15 | from options import ModelOptions
 16 | from models import MomentsInTimeModel
 17 | 
 18 |     
 19 | def image_colorization_propagation(model, img_bw_in, img_rgb_prev, options):
 20 | 
 21 |     # colorize the image based on the previous one
 22 |     feed_dic = {model.input_rgb: np.expand_dims(img_bw_in, axis=0), model.input_rgb_prev: np.expand_dims(img_rgb_prev, axis=0)}
 23 |     fake_image, _ = model.sess.run([model.sampler, model.input_gray], feed_dict=feed_dic)
 24 |     fake_image = postprocess(tf.convert_to_tensor(fake_image), colorspace_in=options.color_space, colorspace_out=COLORSPACE_RGB)
 25 |     
 26 |     # evalute the tensor
 27 |     img_rgb_out = fake_image.eval()
 28 |     img_rgb_out = (img_rgb_out.squeeze(0) * 255).astype(np.uint8)
 29 | 
 30 |     return img_rgb_out
 31 | 
 32 | def bw2color(options, inputname, inputpath, outputpath):
 33 |     if inputname.endswith(".mp4"):
 34 |         # size of the input frames
 35 |         size = 256
 36 | 
 37 |         # check that the video exists
 38 |         path_to_video = os.path.join(inputpath, inputname)
 39 |         if not os.path.exists(path_to_video):
 40 |             print("The file :", path_to_video, "does not exist !")
 41 |         
 42 |         # store informations about the original video
 43 |         cap = cv2.VideoCapture(os.path.join(path_to_video))
 44 |         # original dimensions
 45 |         width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 46 |         totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 47 |         fourcc = cv2.VideoWriter_fourcc(*'mp4v');
 48 |         # parameters of output file
 49 |             # dimensions of the output image
 50 |         new_width, new_height = size, size
 51 |             # number of frames
 52 |         fps = 30.0
 53 |     
 54 |         # recolorized output video
 55 |         color_out = cv2.VideoWriter(
 56 |             os.path.join(outputpath, 'color_' + inputname),
 57 |             fourcc,
 58 |             fps,
 59 |             (new_width, new_height),
 60 |             isColor=True
 61 |         )
 62 |         
 63 |         # TO CHANGE to DL colorization of 1st frame
 64 |         # pick the first frame from the original video clip as the first reference
 65 |         cap_temp = cv2.VideoCapture(os.path.join(inputpath, "color" + inputname[2:]))
 66 |         
 67 |         # count the number of recolorized frames
 68 |         frames_processed = 0
 69 | 
 70 |         with tf.Session() as sess:
 71 | 
 72 |             model = MomentsInTimeModel(sess, options)
 73 | 
 74 |             # build the model and initialize
 75 |             model.build()
 76 |             sess.run(tf.global_variables_initializer())
 77 | 
 78 |             # load model only after global variables initialization
 79 |             model.load()
 80 | 
 81 |             while(cap.isOpened()):
 82 |                 ret, frame_in = cap.read()
 83 |                 
 84 |                 ret_temp, frame_prev = cap_temp.read()
 85 |                 
 86 |                 # check if we are not at the end of the video
 87 |                 if ret==True:      
 88 |                     frame_prev = frame_prev[:,:,::-1]
 89 |                     frame_prev = cv2.resize(frame_prev, (size, size))
 90 |                     
 91 |                     # convert BGR to RGB convention
 92 |                     frame_in = frame_in[:,:,::-1]
 93 |                     # resize the frame to match the input size of the GAN
 94 |                     frame_in = cv2.resize(frame_in, (size, size))
 95 | 
 96 |                     # colorize the BW frame
 97 |                     frame_out = image_colorization_propagation(model, frame_in, frame_prev, options)
 98 |                     
 99 |                     #generate sample
100 |                     get_image = False
101 |                     if get_image:                    
102 |                         img = Image.fromarray(frame_out)
103 | 
104 |                         if not os.path.exists(model.samples_dir):
105 |                             os.makedirs(model.samples_dir)
106 | 
107 |                         sample = model.options.dataset + "_" + inputname + "_" + str(frames_processed).zfill(5) + ".png"
108 |                         img.save(os.path.join(model.samples_dir, sample))
109 | 
110 |                     # save the recolorized frame
111 |                     #frame_prev = frame_out
112 |                     # convert RGB to BGR convention
113 |                     frame_out = frame_out[:,:,::-1]
114 |                     # write the color frame
115 |                     color_out.write(frame_out)
116 |                     #break                
117 |                     
118 |                     # print progress
119 |                     frames_processed += 1
120 |                     print("Processed {}/{} frames ({}%)".format(frames_processed, totalFrames, frames_processed * 100 //totalFrames), end="\r")
121 |                     if cv2.waitKey(1) & 0xFF == ord('q'):
122 |                         break
123 |                 # end of the video
124 |                 else:
125 |                     break
126 | 
127 |         # release everything if job is finished
128 |         cap.release()
129 |         color_out.release()
130 | 
131 | def main():
132 | 
133 |     # reset tensorflow graph
134 |     tf.reset_default_graph()
135 | 
136 |     options = ModelOptions().parse()
137 | 
138 |     if options.filename == '*':
139 |         for filename in os.listdir(options.input_dir):
140 |             bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
141 |     else:
142 |         bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
143 |         
144 |     # cleanup
145 |     cv2.destroyAllWindows()
146 | 
147 |     return 0
148 | 
149 | if __name__ == '__main__':
150 |     main()
151 | 


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/models/.gitignore:
--------------------------------------------------------------------------------
1 | *.caffemodel


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/models/alexnet_deploy.prototxt:
--------------------------------------------------------------------------------
  1 | name: "Colornet"
  2 | layer {
  3 |   name: "data"
  4 |   top: "data" # BGR [0,255] ***non-mean centered***
  5 |   type: "Input"
  6 |   input_param { shape { dim: 1 dim: 3 dim: 227 dim: 227 } }
  7 | }
  8 | # **************************
  9 | # ***** PROCESS COLORS *****
 10 | # **************************
 11 | layer { # Convert to lab
 12 |   name: "img_lab"
 13 |   type: "ColorConv"
 14 |   bottom: "data"
 15 |   top: "img_lab"
 16 |   propagate_down: false
 17 |   color_conv_param {
 18 |     input: 0 # BGR
 19 |     output: 3 # Lab
 20 |   }
 21 | }
 22 | layer {
 23 |   name: "img_slice"
 24 |   type: "Slice"
 25 |   bottom: "img_lab"
 26 |   top: "img_l" # [0,100]
 27 |   top: "data_ab" # [-110,110]
 28 |   propagate_down: false
 29 |   slice_param {
 30 |     axis: 1
 31 |     slice_point: 1
 32 |   }
 33 | }
 34 | layer {
 35 |   name: "silence_ab"
 36 |   type: "Silence"
 37 |   bottom: "data_ab"
 38 | }
 39 | layer { # 0-center lightness channel
 40 |   name: "data_l"
 41 |   type: "Convolution"
 42 |   bottom: "img_l"
 43 |   top: "data_l" # scaled and centered lightness value
 44 |   propagate_down: false
 45 |   param {lr_mult: 0 decay_mult: 0}
 46 |   param {lr_mult: 0 decay_mult: 0}
 47 |   convolution_param {
 48 |     kernel_size: 1
 49 |     num_output: 1
 50 |   }
 51 | }
 52 | layer {
 53 |   name: "conv1"
 54 |   type: "Convolution"
 55 |   bottom: "data_l"
 56 |   top: "conv1"
 57 |   param {    lr_mult: 1    decay_mult: 1  }
 58 |   param {    lr_mult: 2    decay_mult: 0  }
 59 |   convolution_param {
 60 |     num_output: 96
 61 |     kernel_size: 11
 62 |     stride: 4
 63 |     weight_filler {
 64 |       type: "gaussian"
 65 |       std: 0.01
 66 |     }
 67 |     bias_filler {
 68 |       type: "constant"
 69 |       value: 0
 70 |     }
 71 |   }
 72 | }
 73 | layer {
 74 |   name: "relu1"
 75 |   type: "ReLU"
 76 |   bottom: "conv1"
 77 |   top: "conv1"
 78 | }
 79 | layer {
 80 |   name: "pool1"
 81 |   type: "Pooling"
 82 |   bottom: "conv1"
 83 |   top: "pool1"
 84 |   pooling_param {
 85 |     pool: MAX
 86 |     kernel_size: 3
 87 |     stride: 2
 88 |   }
 89 | }
 90 | layer {
 91 |   name: "conv2"
 92 |   type: "Convolution"
 93 |   bottom: "pool1"
 94 |   top: "conv2"
 95 |   param {    lr_mult: 1    decay_mult: 1  }
 96 |   param {    lr_mult: 2    decay_mult: 0  }
 97 |   convolution_param {
 98 |     num_output: 256
 99 |     pad: 2
100 |     kernel_size: 5
101 |     group: 2
102 |     weight_filler {
103 |       type: "gaussian"
104 |       std: 0.01
105 |     }
106 |     bias_filler {
107 |       type: "constant"
108 |       value: 1
109 |     }
110 |   }
111 | }
112 | layer {
113 |   name: "relu2"
114 |   type: "ReLU"
115 |   bottom: "conv2"
116 |   top: "conv2"
117 | }
118 | layer {
119 |   name: "pool2"
120 |   type: "Pooling"
121 |   bottom: "conv2"
122 |   top: "pool2"
123 |   pooling_param {
124 |     pool: MAX
125 |     kernel_size: 3
126 |     stride: 2
127 |   }
128 | }
129 | layer {
130 |   name: "conv3"
131 |   type: "Convolution"
132 |   bottom: "pool2"
133 |   top: "conv3"
134 |   param {    lr_mult: 1    decay_mult: 1  }
135 |   param {    lr_mult: 2    decay_mult: 0  }
136 |   convolution_param {
137 |     num_output: 384
138 |     pad: 1
139 |     kernel_size: 3
140 |     weight_filler {
141 |       type: "gaussian"
142 |       std: 0.01
143 |     }
144 |     bias_filler {
145 |       type: "constant"
146 |       value: 0
147 |     }
148 |   }
149 | }
150 | layer {
151 |   name: "relu3"
152 |   type: "ReLU"
153 |   bottom: "conv3"
154 |   top: "conv3"
155 | }
156 | layer {
157 |   name: "conv4"
158 |   type: "Convolution"
159 |   bottom: "conv3"
160 |   top: "conv4"
161 |   param {    lr_mult: 1    decay_mult: 1  }
162 |   param {    lr_mult: 2    decay_mult: 0  }
163 |   convolution_param {
164 |     num_output: 384
165 |     pad: 1
166 |     kernel_size: 3
167 |     group: 2
168 |     weight_filler {
169 |       type: "gaussian"
170 |       std: 0.01
171 |     }
172 |     bias_filler {
173 |       type: "constant"
174 |       value: 1
175 |     }
176 |   }
177 | }
178 | layer {
179 |   name: "relu4"
180 |   type: "ReLU"
181 |   bottom: "conv4"
182 |   top: "conv4"
183 | }
184 | layer {
185 |   name: "conv5"
186 |   type: "Convolution"
187 |   bottom: "conv4"
188 |   top: "conv5"
189 |   param {    lr_mult: 1    decay_mult: 1  }
190 |   param {    lr_mult: 2    decay_mult: 0  }
191 |   convolution_param {
192 |     num_output: 256
193 |     pad: 1
194 |     kernel_size: 3
195 |     group: 2
196 |     weight_filler {
197 |       type: "gaussian"
198 |       std: 0.01
199 |     }
200 |     bias_filler {
201 |       type: "constant"
202 |       value: 1
203 |     }
204 |   }
205 | }
206 | layer {
207 |   name: "relu5"
208 |   type: "ReLU"
209 |   bottom: "conv5"
210 |   top: "conv5"
211 | }
212 | layer {
213 |   name: "pool5"
214 |   type: "Pooling"
215 |   bottom: "conv5"
216 |   top: "pool5"
217 |   pooling_param {
218 |     pool: MAX
219 |     kernel_size: 3
220 |     stride: 2
221 |   }
222 | }
223 | layer {
224 |   name: "fc6"
225 |   type: "Convolution"
226 |   bottom: "pool5"
227 |   top: "fc6"
228 |   param {    lr_mult: 1    decay_mult: 1  }
229 |   param {    lr_mult: 2    decay_mult: 0  }
230 |   convolution_param {
231 |     kernel_size: 6
232 |     stride: 1
233 |     num_output: 4096
234 |   }
235 | }
236 | layer {
237 |   name: "relu6"
238 |   type: "ReLU"
239 |   bottom: "fc6"
240 |   top: "fc6"
241 | }
242 | layer {
243 |   name: "drop6"
244 |   type: "Dropout"
245 |   bottom: "fc6"
246 |   top: "fc6"
247 |   dropout_param {
248 |     dropout_ratio: 0.5
249 |   }
250 | }
251 | layer {
252 |   name: "fc7"
253 |   type: "Convolution"
254 |   bottom: "fc6"
255 |   top: "fc7"
256 |   param {    lr_mult: 1    decay_mult: 1  }
257 |   param {    lr_mult: 2    decay_mult: 0  }
258 |   convolution_param {
259 |     kernel_size: 1
260 |     stride: 1
261 |     num_output: 4096
262 |   }
263 | }
264 | layer {
265 |   name: "relu7"
266 |   type: "ReLU"
267 |   bottom: "fc7"
268 |   top: "fc7"
269 | }
270 | layer {
271 |   name: "drop7"
272 |   type: "Dropout"
273 |   bottom: "fc7"
274 |   top: "fc7"
275 |   dropout_param {
276 |     dropout_ratio: 0.5
277 |   }
278 | }
279 | 
280 | 


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/models/alexnet_deploy_fc.prototxt:
--------------------------------------------------------------------------------
  1 | name: "Colornet"
  2 | layer {
  3 |   name: "data"
  4 |   top: "data" # BGR [0,255] ***non-mean centered***
  5 |   type: "Input"
  6 |   input_param { shape { dim: 1 dim: 3 dim: 227 dim: 227 } }
  7 | }
  8 | # **************************
  9 | # ***** PROCESS COLORS *****
 10 | # **************************
 11 | layer { # Convert to lab
 12 |   name: "img_lab"
 13 |   type: "ColorConv"
 14 |   bottom: "data"
 15 |   top: "img_lab"
 16 |   propagate_down: false
 17 |   color_conv_param {
 18 |     input: 0 # BGR
 19 |     output: 3 # Lab
 20 |   }
 21 | }
 22 | layer {
 23 |   name: "img_slice"
 24 |   type: "Slice"
 25 |   bottom: "img_lab"
 26 |   top: "img_l" # [0,100]
 27 |   top: "data_ab" # [-110,110]
 28 |   propagate_down: false
 29 |   slice_param {
 30 |     axis: 1
 31 |     slice_point: 1
 32 |   }
 33 | }
 34 | layer {
 35 |   name: "silence_ab"
 36 |   type: "Silence"
 37 |   bottom: "data_ab"
 38 | }
 39 | layer { # 0-center lightness channel
 40 |   name: "data_l"
 41 |   type: "Convolution"
 42 |   bottom: "img_l"
 43 |   top: "data_l" # scaled and centered lightness value
 44 |   propagate_down: false
 45 |   param {lr_mult: 0 decay_mult: 0}
 46 |   param {lr_mult: 0 decay_mult: 0}
 47 |   convolution_param {
 48 |     kernel_size: 1
 49 |     num_output: 1
 50 |   }
 51 | }
 52 | layer {
 53 |   name: "conv1"
 54 |   type: "Convolution"
 55 |   bottom: "data_l"
 56 |   top: "conv1"
 57 |   param {    lr_mult: 1    decay_mult: 1  }
 58 |   param {    lr_mult: 2    decay_mult: 0  }
 59 |   convolution_param {
 60 |     num_output: 96
 61 |     kernel_size: 11
 62 |     stride: 4
 63 |     weight_filler {
 64 |       type: "gaussian"
 65 |       std: 0.01
 66 |     }
 67 |     bias_filler {
 68 |       type: "constant"
 69 |       value: 0
 70 |     }
 71 |   }
 72 | }
 73 | layer {
 74 |   name: "relu1"
 75 |   type: "ReLU"
 76 |   bottom: "conv1"
 77 |   top: "conv1"
 78 | }
 79 | layer {
 80 |   name: "pool1"
 81 |   type: "Pooling"
 82 |   bottom: "conv1"
 83 |   top: "pool1"
 84 |   pooling_param {
 85 |     pool: MAX
 86 |     kernel_size: 3
 87 |     stride: 2
 88 |   }
 89 | }
 90 | layer {
 91 |   name: "conv2"
 92 |   type: "Convolution"
 93 |   bottom: "pool1"
 94 |   top: "conv2"
 95 |   param {    lr_mult: 1    decay_mult: 1  }
 96 |   param {    lr_mult: 2    decay_mult: 0  }
 97 |   convolution_param {
 98 |     num_output: 256
 99 |     pad: 2
100 |     kernel_size: 5
101 |     group: 2
102 |     weight_filler {
103 |       type: "gaussian"
104 |       std: 0.01
105 |     }
106 |     bias_filler {
107 |       type: "constant"
108 |       value: 1
109 |     }
110 |   }
111 | }
112 | layer {
113 |   name: "relu2"
114 |   type: "ReLU"
115 |   bottom: "conv2"
116 |   top: "conv2"
117 | }
118 | layer {
119 |   name: "pool2"
120 |   type: "Pooling"
121 |   bottom: "conv2"
122 |   top: "pool2"
123 |   pooling_param {
124 |     pool: MAX
125 |     kernel_size: 3
126 |     stride: 2
127 |   }
128 | }
129 | layer {
130 |   name: "conv3"
131 |   type: "Convolution"
132 |   bottom: "pool2"
133 |   top: "conv3"
134 |   param {    lr_mult: 1    decay_mult: 1  }
135 |   param {    lr_mult: 2    decay_mult: 0  }
136 |   convolution_param {
137 |     num_output: 384
138 |     pad: 1
139 |     kernel_size: 3
140 |     weight_filler {
141 |       type: "gaussian"
142 |       std: 0.01
143 |     }
144 |     bias_filler {
145 |       type: "constant"
146 |       value: 0
147 |     }
148 |   }
149 | }
150 | layer {
151 |   name: "relu3"
152 |   type: "ReLU"
153 |   bottom: "conv3"
154 |   top: "conv3"
155 | }
156 | layer {
157 |   name: "conv4"
158 |   type: "Convolution"
159 |   bottom: "conv3"
160 |   top: "conv4"
161 |   param {    lr_mult: 1    decay_mult: 1  }
162 |   param {    lr_mult: 2    decay_mult: 0  }
163 |   convolution_param {
164 |     num_output: 384
165 |     pad: 1
166 |     kernel_size: 3
167 |     group: 2
168 |     weight_filler {
169 |       type: "gaussian"
170 |       std: 0.01
171 |     }
172 |     bias_filler {
173 |       type: "constant"
174 |       value: 1
175 |     }
176 |   }
177 | }
178 | layer {
179 |   name: "relu4"
180 |   type: "ReLU"
181 |   bottom: "conv4"
182 |   top: "conv4"
183 | }
184 | layer {
185 |   name: "conv5"
186 |   type: "Convolution"
187 |   bottom: "conv4"
188 |   top: "conv5"
189 |   param {    lr_mult: 1    decay_mult: 1  }
190 |   param {    lr_mult: 2    decay_mult: 0  }
191 |   convolution_param {
192 |     num_output: 256
193 |     pad: 1
194 |     kernel_size: 3
195 |     group: 2
196 |     weight_filler {
197 |       type: "gaussian"
198 |       std: 0.01
199 |     }
200 |     bias_filler {
201 |       type: "constant"
202 |       value: 1
203 |     }
204 |   }
205 | }
206 | layer {
207 |   name: "relu5"
208 |   type: "ReLU"
209 |   bottom: "conv5"
210 |   top: "conv5"
211 | }
212 | layer {
213 |   name: "pool5"
214 |   type: "Pooling"
215 |   bottom: "conv5"
216 |   top: "pool5"
217 |   pooling_param {
218 |     pool: MAX
219 |     kernel_size: 3
220 |     stride: 2
221 |   }
222 | }
223 | layer {
224 |   name: "fc6"
225 |   type: "InnerProduct"
226 |   bottom: "pool5"
227 |   top: "fc6"
228 |   param {    lr_mult: 1    decay_mult: 1  }
229 |   param {    lr_mult: 2    decay_mult: 0  }
230 |   inner_product_param {
231 |     num_output: 4096
232 |   }
233 | }
234 | layer {
235 |   name: "relu6"
236 |   type: "ReLU"
237 |   bottom: "fc6"
238 |   top: "fc6"
239 | }
240 | layer {
241 |   name: "drop6"
242 |   type: "Dropout"
243 |   bottom: "fc6"
244 |   top: "fc6"
245 |   dropout_param {
246 |     dropout_ratio: 0.5
247 |   }
248 | }
249 | layer {
250 |   name: "fc7"
251 |   type: "InnerProduct"
252 |   bottom: "fc6"
253 |   top: "fc7"
254 |   param {    lr_mult: 1    decay_mult: 1  }
255 |   param {    lr_mult: 2    decay_mult: 0  }
256 |   inner_product_param {
257 |     num_output: 4096
258 |   }
259 | }
260 | layer {
261 |   name: "relu7"
262 |   type: "ReLU"
263 |   bottom: "fc7"
264 |   top: "fc7"
265 | }
266 | layer {
267 |   name: "drop7"
268 |   type: "Dropout"
269 |   bottom: "fc7"
270 |   top: "fc7"
271 |   dropout_param {
272 |     dropout_ratio: 0.5
273 |   }
274 | }
275 | 
276 | 


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/models/alexnet_deploy_lab.prototxt:
--------------------------------------------------------------------------------
  1 | name: "Colornet"
  2 | layer {
  3 |   name: "img_lab"
  4 |   top: "img_lab" # Lab color space
  5 |   type: "Input"
  6 |   input_param { shape { dim: 1 dim: 3 dim: 227 dim: 227 } }
  7 | }
  8 | # **************************
  9 | # ***** PROCESS COLORS *****
 10 | # **************************
 11 | # layer { # Convert to lab
 12 | #   name: "img_lab"
 13 | #   type: "ColorConv"
 14 | #   bottom: "data"
 15 | #   top: "img_lab"
 16 | #   propagate_down: false
 17 | #   color_conv_param {
 18 | #     input: 0 # BGR
 19 | #     output: 3 # Lab
 20 | #   }
 21 | # }
 22 | layer {
 23 |   name: "img_slice"
 24 |   type: "Slice"
 25 |   bottom: "img_lab"
 26 |   top: "img_l" # [0,100]
 27 |   top: "data_ab" # [-110,110]
 28 |   propagate_down: false
 29 |   slice_param {
 30 |     axis: 1
 31 |     slice_point: 1
 32 |   }
 33 | }
 34 | layer {
 35 |   name: "silence_ab"
 36 |   type: "Silence"
 37 |   bottom: "data_ab"
 38 | }
 39 | layer { # 0-center lightness channel
 40 |   name: "data_l"
 41 |   type: "Convolution"
 42 |   bottom: "img_l"
 43 |   top: "data_l" # scaled and centered lightness value
 44 |   propagate_down: false
 45 |   param {lr_mult: 0 decay_mult: 0}
 46 |   param {lr_mult: 0 decay_mult: 0}
 47 |   convolution_param {
 48 |     kernel_size: 1
 49 |     num_output: 1
 50 |   }
 51 | }
 52 | layer {
 53 |   name: "conv1"
 54 |   type: "Convolution"
 55 |   bottom: "data_l"
 56 |   top: "conv1"
 57 |   param {    lr_mult: 1    decay_mult: 1  }
 58 |   param {    lr_mult: 2    decay_mult: 0  }
 59 |   convolution_param {
 60 |     num_output: 96
 61 |     kernel_size: 11
 62 |     stride: 4
 63 |     weight_filler {
 64 |       type: "gaussian"
 65 |       std: 0.01
 66 |     }
 67 |     bias_filler {
 68 |       type: "constant"
 69 |       value: 0
 70 |     }
 71 |   }
 72 | }
 73 | layer {
 74 |   name: "relu1"
 75 |   type: "ReLU"
 76 |   bottom: "conv1"
 77 |   top: "conv1"
 78 | }
 79 | layer {
 80 |   name: "pool1"
 81 |   type: "Pooling"
 82 |   bottom: "conv1"
 83 |   top: "pool1"
 84 |   pooling_param {
 85 |     pool: MAX
 86 |     kernel_size: 3
 87 |     stride: 2
 88 |   }
 89 | }
 90 | layer {
 91 |   name: "conv2"
 92 |   type: "Convolution"
 93 |   bottom: "pool1"
 94 |   top: "conv2"
 95 |   param {    lr_mult: 1    decay_mult: 1  }
 96 |   param {    lr_mult: 2    decay_mult: 0  }
 97 |   convolution_param {
 98 |     num_output: 256
 99 |     pad: 2
100 |     kernel_size: 5
101 |     group: 2
102 |     weight_filler {
103 |       type: "gaussian"
104 |       std: 0.01
105 |     }
106 |     bias_filler {
107 |       type: "constant"
108 |       value: 1
109 |     }
110 |   }
111 | }
112 | layer {
113 |   name: "relu2"
114 |   type: "ReLU"
115 |   bottom: "conv2"
116 |   top: "conv2"
117 | }
118 | layer {
119 |   name: "pool2"
120 |   type: "Pooling"
121 |   bottom: "conv2"
122 |   top: "pool2"
123 |   pooling_param {
124 |     pool: MAX
125 |     kernel_size: 3
126 |     stride: 2
127 |   }
128 | }
129 | layer {
130 |   name: "conv3"
131 |   type: "Convolution"
132 |   bottom: "pool2"
133 |   top: "conv3"
134 |   param {    lr_mult: 1    decay_mult: 1  }
135 |   param {    lr_mult: 2    decay_mult: 0  }
136 |   convolution_param {
137 |     num_output: 384
138 |     pad: 1
139 |     kernel_size: 3
140 |     weight_filler {
141 |       type: "gaussian"
142 |       std: 0.01
143 |     }
144 |     bias_filler {
145 |       type: "constant"
146 |       value: 0
147 |     }
148 |   }
149 | }
150 | layer {
151 |   name: "relu3"
152 |   type: "ReLU"
153 |   bottom: "conv3"
154 |   top: "conv3"
155 | }
156 | layer {
157 |   name: "conv4"
158 |   type: "Convolution"
159 |   bottom: "conv3"
160 |   top: "conv4"
161 |   param {    lr_mult: 1    decay_mult: 1  }
162 |   param {    lr_mult: 2    decay_mult: 0  }
163 |   convolution_param {
164 |     num_output: 384
165 |     pad: 1
166 |     kernel_size: 3
167 |     group: 2
168 |     weight_filler {
169 |       type: "gaussian"
170 |       std: 0.01
171 |     }
172 |     bias_filler {
173 |       type: "constant"
174 |       value: 1
175 |     }
176 |   }
177 | }
178 | layer {
179 |   name: "relu4"
180 |   type: "ReLU"
181 |   bottom: "conv4"
182 |   top: "conv4"
183 | }
184 | layer {
185 |   name: "conv5"
186 |   type: "Convolution"
187 |   bottom: "conv4"
188 |   top: "conv5"
189 |   param {    lr_mult: 1    decay_mult: 1  }
190 |   param {    lr_mult: 2    decay_mult: 0  }
191 |   convolution_param {
192 |     num_output: 256
193 |     pad: 1
194 |     kernel_size: 3
195 |     group: 2
196 |     weight_filler {
197 |       type: "gaussian"
198 |       std: 0.01
199 |     }
200 |     bias_filler {
201 |       type: "constant"
202 |       value: 1
203 |     }
204 |   }
205 | }
206 | layer {
207 |   name: "relu5"
208 |   type: "ReLU"
209 |   bottom: "conv5"
210 |   top: "conv5"
211 | }
212 | layer {
213 |   name: "pool5"
214 |   type: "Pooling"
215 |   bottom: "conv5"
216 |   top: "pool5"
217 |   pooling_param {
218 |     pool: MAX
219 |     kernel_size: 3
220 |     stride: 2
221 |   }
222 | }
223 | layer {
224 |   name: "fc6"
225 |   type: "Convolution"
226 |   bottom: "pool5"
227 |   top: "fc6"
228 |   param {    lr_mult: 1    decay_mult: 1  }
229 |   param {    lr_mult: 2    decay_mult: 0  }
230 |   convolution_param {
231 |     kernel_size: 6
232 |     stride: 1
233 |     num_output: 4096
234 |   }
235 | }
236 | layer {
237 |   name: "relu6"
238 |   type: "ReLU"
239 |   bottom: "fc6"
240 |   top: "fc6"
241 | }
242 | layer {
243 |   name: "drop6"
244 |   type: "Dropout"
245 |   bottom: "fc6"
246 |   top: "fc6"
247 |   dropout_param {
248 |     dropout_ratio: 0.5
249 |   }
250 | }
251 | layer {
252 |   name: "fc7"
253 |   type: "Convolution"
254 |   bottom: "fc6"
255 |   top: "fc7"
256 |   param {    lr_mult: 1    decay_mult: 1  }
257 |   param {    lr_mult: 2    decay_mult: 0  }
258 |   convolution_param {
259 |     kernel_size: 1
260 |     stride: 1
261 |     num_output: 4096
262 |   }
263 | }
264 | layer {
265 |   name: "relu7"
266 |   type: "ReLU"
267 |   bottom: "fc7"
268 |   top: "fc7"
269 | }
270 | layer {
271 |   name: "drop7"
272 |   type: "Dropout"
273 |   bottom: "fc7"
274 |   top: "fc7"
275 |   dropout_param {
276 |     dropout_ratio: 0.5
277 |   }
278 | }
279 | 
280 | 


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/models/alexnet_deploy_lab_fc.prototxt:
--------------------------------------------------------------------------------
  1 | name: "Colornet"
  2 | layer {
  3 |   name: "img_lab"
  4 |   top: "img_lab" # Lab color space
  5 |   type: "Input"
  6 |   input_param { shape { dim: 1 dim: 3 dim: 227 dim: 227 } }
  7 | }
  8 | # **************************
  9 | # ***** PROCESS COLORS *****
 10 | # **************************
 11 | # layer { # Convert to lab
 12 | #   name: "img_lab"
 13 | #   type: "ColorConv"
 14 | #   bottom: "data"
 15 | #   top: "img_lab"
 16 | #   propagate_down: false
 17 | #   color_conv_param {
 18 | #     input: 0 # BGR
 19 | #     output: 3 # Lab
 20 | #   }
 21 | # }
 22 | layer {
 23 |   name: "img_slice"
 24 |   type: "Slice"
 25 |   bottom: "img_lab"
 26 |   top: "img_l" # [0,100]
 27 |   top: "data_ab" # [-110,110]
 28 |   propagate_down: false
 29 |   slice_param {
 30 |     axis: 1
 31 |     slice_point: 1
 32 |   }
 33 | }
 34 | layer {
 35 |   name: "silence_ab"
 36 |   type: "Silence"
 37 |   bottom: "data_ab"
 38 | }
 39 | layer { # 0-center lightness channel
 40 |   name: "data_l"
 41 |   type: "Convolution"
 42 |   bottom: "img_l"
 43 |   top: "data_l" # scaled and centered lightness value
 44 |   propagate_down: false
 45 |   param {lr_mult: 0 decay_mult: 0}
 46 |   param {lr_mult: 0 decay_mult: 0}
 47 |   convolution_param {
 48 |     kernel_size: 1
 49 |     num_output: 1
 50 |   }
 51 | }
 52 | layer {
 53 |   name: "conv1"
 54 |   type: "Convolution"
 55 |   bottom: "data_l"
 56 |   top: "conv1"
 57 |   param {    lr_mult: 1    decay_mult: 1  }
 58 |   param {    lr_mult: 2    decay_mult: 0  }
 59 |   convolution_param {
 60 |     num_output: 96
 61 |     kernel_size: 11
 62 |     stride: 4
 63 |     weight_filler {
 64 |       type: "gaussian"
 65 |       std: 0.01
 66 |     }
 67 |     bias_filler {
 68 |       type: "constant"
 69 |       value: 0
 70 |     }
 71 |   }
 72 | }
 73 | layer {
 74 |   name: "relu1"
 75 |   type: "ReLU"
 76 |   bottom: "conv1"
 77 |   top: "conv1"
 78 | }
 79 | layer {
 80 |   name: "pool1"
 81 |   type: "Pooling"
 82 |   bottom: "conv1"
 83 |   top: "pool1"
 84 |   pooling_param {
 85 |     pool: MAX
 86 |     kernel_size: 3
 87 |     stride: 2
 88 |   }
 89 | }
 90 | layer {
 91 |   name: "conv2"
 92 |   type: "Convolution"
 93 |   bottom: "pool1"
 94 |   top: "conv2"
 95 |   param {    lr_mult: 1    decay_mult: 1  }
 96 |   param {    lr_mult: 2    decay_mult: 0  }
 97 |   convolution_param {
 98 |     num_output: 256
 99 |     pad: 2
100 |     kernel_size: 5
101 |     group: 2
102 |     weight_filler {
103 |       type: "gaussian"
104 |       std: 0.01
105 |     }
106 |     bias_filler {
107 |       type: "constant"
108 |       value: 1
109 |     }
110 |   }
111 | }
112 | layer {
113 |   name: "relu2"
114 |   type: "ReLU"
115 |   bottom: "conv2"
116 |   top: "conv2"
117 | }
118 | layer {
119 |   name: "pool2"
120 |   type: "Pooling"
121 |   bottom: "conv2"
122 |   top: "pool2"
123 |   pooling_param {
124 |     pool: MAX
125 |     kernel_size: 3
126 |     stride: 2
127 |   }
128 | }
129 | layer {
130 |   name: "conv3"
131 |   type: "Convolution"
132 |   bottom: "pool2"
133 |   top: "conv3"
134 |   param {    lr_mult: 1    decay_mult: 1  }
135 |   param {    lr_mult: 2    decay_mult: 0  }
136 |   convolution_param {
137 |     num_output: 384
138 |     pad: 1
139 |     kernel_size: 3
140 |     weight_filler {
141 |       type: "gaussian"
142 |       std: 0.01
143 |     }
144 |     bias_filler {
145 |       type: "constant"
146 |       value: 0
147 |     }
148 |   }
149 | }
150 | layer {
151 |   name: "relu3"
152 |   type: "ReLU"
153 |   bottom: "conv3"
154 |   top: "conv3"
155 | }
156 | layer {
157 |   name: "conv4"
158 |   type: "Convolution"
159 |   bottom: "conv3"
160 |   top: "conv4"
161 |   param {    lr_mult: 1    decay_mult: 1  }
162 |   param {    lr_mult: 2    decay_mult: 0  }
163 |   convolution_param {
164 |     num_output: 384
165 |     pad: 1
166 |     kernel_size: 3
167 |     group: 2
168 |     weight_filler {
169 |       type: "gaussian"
170 |       std: 0.01
171 |     }
172 |     bias_filler {
173 |       type: "constant"
174 |       value: 1
175 |     }
176 |   }
177 | }
178 | layer {
179 |   name: "relu4"
180 |   type: "ReLU"
181 |   bottom: "conv4"
182 |   top: "conv4"
183 | }
184 | layer {
185 |   name: "conv5"
186 |   type: "Convolution"
187 |   bottom: "conv4"
188 |   top: "conv5"
189 |   param {    lr_mult: 1    decay_mult: 1  }
190 |   param {    lr_mult: 2    decay_mult: 0  }
191 |   convolution_param {
192 |     num_output: 256
193 |     pad: 1
194 |     kernel_size: 3
195 |     group: 2
196 |     weight_filler {
197 |       type: "gaussian"
198 |       std: 0.01
199 |     }
200 |     bias_filler {
201 |       type: "constant"
202 |       value: 1
203 |     }
204 |   }
205 | }
206 | layer {
207 |   name: "relu5"
208 |   type: "ReLU"
209 |   bottom: "conv5"
210 |   top: "conv5"
211 | }
212 | layer {
213 |   name: "pool5"
214 |   type: "Pooling"
215 |   bottom: "conv5"
216 |   top: "pool5"
217 |   pooling_param {
218 |     pool: MAX
219 |     kernel_size: 3
220 |     stride: 2
221 |   }
222 | }
223 | layer {
224 |   name: "fc6"
225 |   type: "InnerProduct"
226 |   bottom: "pool5"
227 |   top: "fc6"
228 |   param {    lr_mult: 1    decay_mult: 1  }
229 |   param {    lr_mult: 2    decay_mult: 0  }
230 |   inner_product_param {
231 |     num_output: 4096
232 |   }
233 | }
234 | layer {
235 |   name: "relu6"
236 |   type: "ReLU"
237 |   bottom: "fc6"
238 |   top: "fc6"
239 | }
240 | layer {
241 |   name: "drop6"
242 |   type: "Dropout"
243 |   bottom: "fc6"
244 |   top: "fc6"
245 |   dropout_param {
246 |     dropout_ratio: 0.5
247 |   }
248 | }
249 | layer {
250 |   name: "fc7"
251 |   type: "InnerProduct"
252 |   bottom: "fc6"
253 |   top: "fc7"
254 |   param {    lr_mult: 1    decay_mult: 1  }
255 |   param {    lr_mult: 2    decay_mult: 0  }
256 |   inner_product_param {
257 |     num_output: 4096
258 |   }
259 | }
260 | layer {
261 |   name: "relu7"
262 |   type: "ReLU"
263 |   bottom: "fc7"
264 |   top: "fc7"
265 | }
266 | layer {
267 |   name: "drop7"
268 |   type: "Dropout"
269 |   bottom: "fc7"
270 |   top: "fc7"
271 |   dropout_param {
272 |     dropout_ratio: 0.5
273 |   }
274 | }
275 | 
276 | 


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/models/fetch_alexnet_model.sh:
--------------------------------------------------------------------------------
1 | 
2 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/alexnet_release_450000.caffemodel -O ./models/alexnet_release_450000.caffemodel
3 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/alexnet_release_450000_nobn.caffemodel -O ./models/alexnet_release_450000_nobn.caffemodel
4 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/alexnet_release_450000_nobn_rs.caffemodel -O ./models/alexnet_release_450000_nobn_rs.caffemodel
5 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/alexnet_release_450000_nobn_fc_rs.caffemodel -O ./models/alexnet_release_450000_nobn_fc_rs.caffemodel
6 | 


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/models/fetch_release_models.sh:
--------------------------------------------------------------------------------
1 | 
2 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/colorization_release_v2.caffemodel -O ./models/colorization_release_v2.caffemodel
3 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/colorization_release_v2_norebal.caffemodel -O ./models/colorization_release_v2_norebal.caffemodel
4 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v1/colorization_release_v1.caffemodel -O ./models/colorization_release_v1.caffemodel
5 | 


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/batch_norm_absorb.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # **************************************
  3 | # ***** Richard Zhang / 2016.06.04 *****
  4 | # **************************************
  5 | # Absorb batch norm into convolution layers
  6 | # This script only supports the conv-batchnorm configuration
  7 | # Currently unsupported: 
  8 | # 	- deconv layers
  9 | # 	- fc layers
 10 | # 	- batchnorm before linear layer
 11 | 
 12 | import caffe
 13 | import os
 14 | import string
 15 | import numpy as np
 16 | import argparse
 17 | import matplotlib.pyplot as plt
 18 | 
 19 | def parse_args():
 20 |     parser = argparse.ArgumentParser(description='BatchNorm absorption')
 21 |     parser.add_argument('--gpu', dest='gpu', help='gpu id', type=int, default=0)
 22 |     parser.add_argument('--prototxt_in',dest='prototxt_in',help='prototxt with batchnorm', type=str, default='')
 23 |     parser.add_argument('--prototxt_out',dest='prototxt_out',help='prototxt without batchnorm', type=str, default='')
 24 |     parser.add_argument('--caffemodel_in',dest='caffemodel_in',help='caffemodel with batchnorm', type=str, default='')
 25 |     parser.add_argument('--caffemodel_out',dest='caffemodel_out',help='caffemodel without batchnorm, to be saved', type=str, default='')
 26 | 
 27 |     args = parser.parse_args()
 28 |     return args
 29 | 
 30 | if __name__ == '__main__':
 31 | 	args = parse_args()
 32 | 
 33 | 	gpu_id = args.gpu
 34 | 	PROTOTXT1_PATH = args.prototxt_in
 35 | 	PROTOTXT2_PATH = args.prototxt_out # no batch norm
 36 | 	MODEL_PATH = args.caffemodel_in
 37 | 	MODEL2_PATH = args.caffemodel_out # to be saved off
 38 | 
 39 | 	caffe.set_mode_gpu()
 40 | 	caffe.set_device(gpu_id)
 41 | 
 42 | 	net1 = caffe.Net(PROTOTXT1_PATH, MODEL_PATH, caffe.TEST)
 43 | 	net2 = caffe.Net(PROTOTXT2_PATH, MODEL_PATH, caffe.TEST)
 44 | 
 45 | 	# call forward on net1, net2
 46 | 	net1.forward()
 47 | 	net2.forward()
 48 | 
 49 | 	# identify batch norms and paired linear layers
 50 | 	BN_INDS = np.where(np.array([layer.type for layer in net1.layers])=='BatchNorm')[0]
 51 | 	BN_NAMES = np.zeros(BN_INDS.shape,dtype='S50') # batch norm layer names
 52 | 	LIN_NAMES = np.zeros(BN_INDS.shape,dtype='S50') # linear layer names
 53 | 	PRE_NAMES = np.zeros(BN_INDS.shape,dtype='S50') # blob right before
 54 | 	POST_NAMES = np.zeros(BN_INDS.shape,dtype='S50') # blob right after
 55 | 
 56 | 	PRE_POST = -1+np.zeros(BN_INDS.shape) # 0 - pre, 1 - post
 57 | 	CONV_DECONV = -1+np.zeros(BN_INDS.shape) # 0 - conv, 1 - deconv
 58 | 
 59 | 	# identify layers which are paired with batch norms (only supporting convolution)
 60 | 	for (ll,bn_ind) in enumerate(BN_INDS):
 61 | 		BN_NAMES[ll] = net1._layer_names[bn_ind]
 62 | 		if(net1.layers[bn_ind-1].type=='Convolution' or net1.layers[bn_ind-1].type=='Deconvolution'):
 63 | 			PRE_POST[ll] = 0
 64 | 			LIN_NAMES[ll] = net1._layer_names[bn_ind-1]
 65 | 			POST_NAMES[ll] = net1._layer_names[bn_ind+1]
 66 | 			if(net1.layers[bn_ind-1].type=='Convolution'):
 67 | 				CONV_DECONV[ll] = 0
 68 | 			elif(net1.layers[bn_ind-1].type=='Deconvolution'):
 69 | 				CONV_DECONV[ll] = 1
 70 | 		elif(net1.layers[bn_ind+1].type=='Convolution' or net1.layers[bn_ind+1].type=='Deconvolution'):
 71 | 			PRE_POST[ll] = 1 
 72 | 			LIN_NAMES[ll] = net1._layer_names[bn_ind+1]
 73 | 			POST_NAMES[ll] = net1._layer_names[bn_ind+3]
 74 | 			if(net1.layers[bn_ind+1].type=='Convolution'):
 75 | 				CONV_DECONV[ll] = 0
 76 | 			elif(net1.layers[bn_ind+1].type=='Deconvolution'):
 77 | 				CONV_DECONV[ll] = 1
 78 | 		else:
 79 | 			PRE_POST[ll] = -1
 80 | 		PRE_NAMES[ll] = net1.bottom_names[BN_NAMES[ll]][0]
 81 | 
 82 | 	LIN_INDS = BN_INDS+PRE_POST # linear layer indices
 83 | 	ALL_SLOPES = {}
 84 | 
 85 | 	# compute batch norm parameters on net1 in first layer
 86 | 	# absorb into weights in first layer
 87 | 	for ll in range(BN_INDS.size):
 88 | 		bn_ind = BN_INDS[ll]
 89 | 		BN_NAME = BN_NAMES[ll]
 90 | 		PRE_NAME = PRE_NAMES[ll]
 91 | 		POST_NAME = POST_NAMES[ll]
 92 | 		LIN_NAME = LIN_NAMES[ll]
 93 | 
 94 | 		print 'LAYERS %s, %s'%(PRE_NAME,BN_NAME)
 95 | 		# print net1.blobs[BN_NAME].data.shape
 96 | 		# print net1.blobs[PRE_NAME].data.shape
 97 | 
 98 | 		C = net1.blobs[BN_NAME].data.shape[1]
 99 | 		in_blob = net1.blobs[PRE_NAME].data
100 | 		bn_blob = net1.blobs[BN_NAME].data
101 | 
102 | 		scale_factor = 1./net1.params[BN_NAME][2].data[...]
103 | 		mean = scale_factor * net1.params[BN_NAME][0].data[...]
104 | 		scale = scale_factor * net1.params[BN_NAME][1].data[...]
105 | 
106 | 		slopes = np.sqrt(1./scale)
107 | 		offs = -mean*slopes
108 | 
109 | 		print '  Computing error on data...'
110 | 		bn_blob_rep = in_blob*slopes[np.newaxis,:,np.newaxis,np.newaxis]+offs[np.newaxis,:,np.newaxis,np.newaxis]
111 | 
112 | 		# Visually verify that factors are correct
113 | 		print '  Maximum error: %.3e'%np.max(np.abs(bn_blob_rep[bn_blob>0] - bn_blob[bn_blob>0]))
114 | 		print '  RMS error: %.3e'%np.linalg.norm(bn_blob_rep[bn_blob>0] - bn_blob[bn_blob>0])
115 | 		print '  RMS signal: %.3e'%np.linalg.norm(bn_blob_rep[bn_blob>0])
116 | 
117 | 		print '  Absorbing slope and offset...'
118 | 		# absorb slope and offset into appropriate parameter
119 | 		if(PRE_POST[ll]==0): # linear layer is before
120 | 			if(CONV_DECONV[ll]==0): # convolution
121 | 				net2.params[LIN_NAME][0].data[...] = net1.params[LIN_NAME][0].data[...]*slopes[:,np.newaxis,np.newaxis,np.newaxis]
122 | 				net2.params[LIN_NAME][1].data[...] = offs + (slopes*net1.params[LIN_NAME][1].data)
123 | 			elif(CONV_DECONV[ll]==1): # deconvolution
124 | 				print '*** Deconvolution not implemented ***'
125 | 		elif(PRE_POST[ll]==1): # batchnorm is BEFORE linear layer
126 | 			print '*** Not implemented ***'
127 | 
128 | 	net2.save(MODEL2_PATH)
129 | 
130 | 	for arg in vars(args):
131 | 		print('[%s] =' % arg, getattr(args, arg))
132 | 	print 'Saving model into: %s'%MODEL2_PATH
133 | 


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/caffe_traininglayers.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Deep-Learning-Colorization/resources/caffe_traininglayers.pyc


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/conv_into_fc.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import caffe
 3 | import os
 4 | import string
 5 | import numpy as np
 6 | import argparse
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(description='Convert conv layers into FC layers')
11 |     parser.add_argument('--gpu', dest='gpu', help='gpu id', type=int, default=0)
12 |     parser.add_argument('--prototxt_in',dest='prototxt_in',help='prototxt with conv layers', type=str, default='')
13 |     parser.add_argument('--prototxt_out',dest='prototxt_out',help='prototxt with fc layers', type=str, default='')
14 |     parser.add_argument('--caffemodel_in',dest='caffemodel_in',help='caffemodel with conv layers', type=str, default='')
15 |     parser.add_argument('--caffemodel_out',dest='caffemodel_out',help='caffemodel with fc layers, to be saved', type=str, default='')
16 |     parser.add_argument('--dummymodel',dest='dummymodel',help='blank caffemodel',type=str,default='./models/dummy.caffemodel')
17 | 
18 |     args = parser.parse_args()
19 |     return args
20 | 
21 | if __name__ == '__main__':
22 | 	args = parse_args()
23 | 
24 | 	gpu_id = args.gpu
25 | 	PROTOTXT1_PATH = args.prototxt_in
26 | 	PROTOTXT2_PATH = args.prototxt_out # no batch norm
27 | 	MODEL_PATH = args.caffemodel_in	
28 | 	DUMMYMODEL_PATH = args.dummymodel
29 | 	MODEL2_PATH = args.caffemodel_out # to be saved off
30 | 
31 | 	caffe.set_mode_gpu()
32 | 	caffe.set_device(gpu_id)
33 | 
34 | 	net1 = caffe.Net(PROTOTXT1_PATH, MODEL_PATH, caffe.TEST)
35 | 	net2 = caffe.Net(PROTOTXT2_PATH, DUMMYMODEL_PATH, caffe.TEST)
36 | 
37 | 	import rz_fcns as rz
38 | 	rz.caffe_param_shapes(net1,to_print=True)
39 | 	rz.caffe_param_shapes(net2,to_print=True)
40 | 	rz.caffe_shapes(net2,to_print=True)
41 | 
42 | 	# CONV_INDS = np.where(np.array([layer.type for layer in net1.layers])=='Convolution')[0]
43 | 	print net1.params.keys()
44 | 	print net2.params.keys()
45 | 
46 | 	for (ll,layer) in enumerate(net2.params.keys()):
47 | 		P = len(net2.params[layer]) # number of blobs
48 | 		if(P>0):
49 | 			for pp in range(P):
50 | 				ndim1 = net1.params[layer][pp].data.ndim
51 | 				ndim2 = net2.params[layer][pp].data.ndim
52 | 
53 | 				print('Copying layer %s, param blob %i (%i-dim => %i-dim)'%(layer,pp,ndim1,ndim2))
54 | 				if(ndim1==ndim2):
55 | 					print('  Same dimensionality...')
56 | 					net2.params[layer][pp].data[...] = net1.params[layer][pp].data[...]
57 | 				else:
58 | 					print('  Different dimensionality...')
59 | 					net2.params[layer][pp].data[...] = net1.params[layer][pp].data[...].reshape(net2.params[layer][pp].data[...].shape)
60 | 
61 | 	net2.save(MODEL2_PATH)
62 | 
63 | 	for arg in vars(args):
64 | 		print('[%s] =' % arg, getattr(args, arg))
65 | 	print 'Saving model into: %s'%MODEL2_PATH
66 | 


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/magic_init/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016, Philipp Krähenbühl
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer.
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution.
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 | 
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies,
26 | either expressed or implied, of the FreeBSD Project.


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/magic_init/README.md:
--------------------------------------------------------------------------------
 1 | # Data-dependent initialization of convolutional neural networks
 2 | 
 3 | Created by Philipp Krähenbühl.
 4 | 
 5 | ### Introduction
 6 | 
 7 | This code implements the initialization presented in our [arXiv tech report](http://arxiv.org/abs/1511.06856), which is under submission at ICLR 2016.
 8 | 
 9 | *This is a reimplementation and currently work in progress. Use at your own risk.*
10 | 
11 | ### License
12 | 
13 | This code is released under the BSD License (refer to the LICENSE file for details).
14 | 
15 | ### Citing
16 | 
17 | If you find our initialization useful in your research, please consider citing:
18 | 
19 |     @article{krahenbuhl2015data,
20 |       title={Data-dependent Initializations of Convolutional Neural Networks},
21 |       author={Kr{\"a}henb{\"u}hl, Philipp and Doersch, Carl and Donahue, Jeff and Darrell, Trevor},
22 |       journal={arXiv preprint arXiv:1511.06856},
23 |       year={2015}
24 |     }
25 | 
26 | ### Setup
27 | 
28 | Checkout the project and create a symlink to caffe in the `magic_init` directory:
29 | ```Shell
30 | ln -s path/to/caffe/python/caffe caffe
31 | ```
32 | 
33 | ### Examples
34 | 
35 | Here is a quick example on how to initialize alexnet:
36 | ```bash
37 | python magic_init.py path/to/alexnet/deploy.prototxt path/to/output.caffemodel -d "path/to/some/images/*.png" -q -nit 10 -cs
38 | ```
39 | Here ```-d``` flag allows you to initialize the network using your own images. Feel free to use imagenet, Pascal, COCO or whatever you have at hand, it shouldn't make a big difference. The ```-q``` (queit) flag suppresses all the caffe logging, ```-nit``` controls the number of batches used (while ```-bs``` controls the batch size). Finally ```-cs``` rescales the gradients accross layers. This rescaling currently works best for feed-forward networks, and might not work too well for DAG structured networks (we are working on that).
40 | 
41 | To run the k-means initialization use:
42 | ```bash
43 | python magic_init.py path/to/alexnet/deploy.prototxt path/to/output.caffemodel -d "path/to/some/images/*.png" -q -nit 10 -cs -t kmeans
44 | ```
45 | 
46 | Finally, ```python magic_init.py -h``` should provide you with more help.
47 | 
48 | 
49 | ### Pro tips
50 | If you're numpy implementation is based on openblas, try disabeling threading ```export OPENBLAS_NUM_THREADS=1```, it can improve the runtime performance a bit.
51 | 


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/magic_init/load.py:
--------------------------------------------------------------------------------
 1 | import caffe
 2 | 
 3 | def parseProtoString(s):
 4 | 	from google.protobuf import text_format
 5 | 	from caffe.proto import caffe_pb2 as pb
 6 | 	proto_net = pb.NetParameter()
 7 | 	text_format.Merge(s, proto_net)
 8 | 	return proto_net
 9 | 
10 | 
11 | def get_param(l, exclude=set(['top', 'bottom', 'name', 'type'])):
12 | 	if not hasattr(l,'ListFields'):
13 | 		if hasattr(l,'__delitem__'):
14 | 			return list(l)
15 | 		return l
16 | 	r = dict()
17 | 	for f, v in l.ListFields():
18 | 		if f.name not in exclude:
19 | 			r[f.name] = get_param(v, [])
20 | 	return r
21 | 
22 | class ProtoDesc:
23 | 	def __init__(self, prototxt):
24 | 		from os import path
25 | 		self.prototxt = prototxt
26 | 		self.parsed_proto = parseProtoString(open(self.prototxt, 'r').read())
27 | 		# Guess the input dimension
28 | 		self.input_dim = (3, 227, 227)
29 | 		net = self.parsed_proto
30 | 		if len(net.input_dim) > 0:
31 | 			self.input_dim = net.input_dim[1:]
32 | 		else:
33 | 			lrs = net.layer
34 | 			cs = [l.transform_param.crop_size for l in lrs
35 | 				if l.HasField('transform_param')]
36 | 			if len(cs):
37 | 				self.input_dim = (3, cs[0], cs[0])
38 | 
39 | 	def __call__(self, clip=None, **inputs):
40 | 		from caffe import layers as L
41 | 		from collections import OrderedDict
42 | 		net = self.parsed_proto
43 | 		blobs = OrderedDict(inputs)
44 | 		for l in net.layer:
45 | 			if l.name not in inputs:
46 | 				in_place = l.top == l.bottom
47 | 				param = get_param(l)
48 | 				assert all([b in blobs for b in l.bottom]), "Some bottoms not founds: " + ', '.join([b for b in l.bottom if not b in blobs])
49 | 				tops = getattr(L, l.type)(*[blobs[b] for b in l.bottom],
50 | 				                          ntop=len(l.top), in_place=in_place,
51 | 				                          name=l.name,
52 | 				                          **param)
53 | 				if len(l.top) <= 1:
54 | 					tops = [tops]
55 | 				for i, t in enumerate(l.top):
56 | 					blobs[t] = tops[i]
57 | 			if l.name == clip:
58 | 				break
59 | 		return list(blobs.values())[-1]
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/magic_init/load.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Deep-Learning-Colorization/resources/magic_init/load.pyc


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/magic_init/measure_stat.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from magic_init import *
  3 | 
  4 | class BCOLORS:
  5 | 	HEADER = '\033[95m'
  6 | 	OKBLUE = '\033[94m'
  7 | 	OKGREEN = '\033[92m'
  8 | 	WARNING = '\033[93m'
  9 | 	FAIL = '\033[91m'
 10 | 	ENDC = '\033[0m'
 11 | 	BOLD = '\033[1m'
 12 | 	UNDERLINE = '\033[4m'
 13 | 	
 14 | class NOCOLORS:
 15 | 	HEADER = ''
 16 | 	OKBLUE = ''
 17 | 	OKGREEN = ''
 18 | 	WARNING = ''
 19 | 	FAIL = ''
 20 | 	ENDC = ''
 21 | 	BOLD = ''
 22 | 	UNDERLINE = ''
 23 | 
 24 | def coloredNumbers(v, color=None, fmt='%6.2f', max_display=300, bcolors=BCOLORS):
 25 | 	import numpy as np
 26 | 	# Display a numpy array and highlight the min and max values [required a nice linux
 27 | 	# terminal supporting colors]
 28 | 	r = ""
 29 | 	mn, mx = np.min(v), np.max(v)
 30 | 	for k,i in enumerate(v):
 31 | 		if len(v) > max_display and k > max_display/2 and k < len(v) - max_display/2:
 32 | 			if r[-1] != '.':
 33 | 				r += '...'
 34 | 			continue
 35 | 		if i <= mn + 1e-3:
 36 | 			r += bcolors.BOLD+bcolors.FAIL
 37 | 		elif i + 1e-3 >= mx:
 38 | 			r += bcolors.BOLD+bcolors.FAIL
 39 | 		elif color is not None:
 40 | 			r += color
 41 | 		r += (fmt+' ')%i
 42 | 		r += bcolors.ENDC
 43 | 	r += bcolors.ENDC
 44 | 	return r
 45 | 
 46 | def computeGradientRatio(net, NIT=1):
 47 | 	import numpy as np
 48 | 	last_layer = 0
 49 | 	for i, (n, l) in enumerate(zip(net._layer_names, net.layers)):
 50 | 		if l.type not in STRIP_LAYER:
 51 | 			last_layer = i
 52 | 	last_tops = net.top_names[net._layer_names[last_layer]]
 53 | 	
 54 | 	var = {}
 55 | 	for it in range(NIT):
 56 | 		net._forward(0, last_layer)
 57 | 		# Reset the diffs
 58 | 		for l in net.layers:
 59 | 			for b in l.blobs:
 60 | 				b.diff[...] = 0
 61 | 		# Set the top diffs
 62 | 		for t in last_tops:
 63 | 			net.blobs[t].diff[...] = np.random.normal(0, 1, net.blobs[t].shape)
 64 | 		net._backward(last_layer, 0)
 65 | 		for i, (n, l) in enumerate(zip(net._layer_names, net.layers)):
 66 | 			if len(l.blobs) > 0:
 67 | 				assert l.type in PARAMETER_LAYERS, "Parameter layer '%s' currently not supported"%l.type
 68 | 				b = l.blobs[0]
 69 | 				r = np.mean(b.diff.swapaxes(0,1).reshape((b.diff.shape[1],-1))**2, axis=1) / np.mean(b.data**2)
 70 | 				if n in var: var[n] += r / NIT
 71 | 				else: var[n] = r / NIT
 72 | 	std = {n: np.sqrt(var[n]) for n in var}
 73 | 	return {n: np.std(s) / np.mean(s) for n,s in std.items()}, {n: np.mean(s) for n,s in std.items()}
 74 | 
 75 | def printMeanStddev(net, NIT=10, show_all=False, show_color=True, quiet=False):
 76 | 	import numpy as np
 77 | 	bcolors = NOCOLORS
 78 | 	if show_color: bcolors = BCOLORS
 79 | 	
 80 | 	layer_names = list(net._layer_names)
 81 | 	if not show_all:
 82 | 		layer_names = [n for n, l in zip(net._layer_names, net.layers) if len(l.blobs)>0]
 83 | 		if 'data' in net._layer_names:
 84 | 			layer_names.append('data')
 85 | 
 86 | 	# When was a blob last used
 87 | 	last_used = {}
 88 | 	# Make sure all layers are supported, and compute the range each blob is used in
 89 | 	for i, (n, l) in enumerate(zip(net._layer_names, net.layers)):
 90 | 		for b in net.bottom_names[n]:
 91 | 			last_used[b] = i
 92 | 	
 93 | 	active_data, cvar = {}, {}
 94 | 	for i, (n, l) in enumerate(zip(net._layer_names, net.layers)):
 95 | 		# Run the network forward
 96 | 		new_data = forward(net, i, NIT, {b: active_data[b] for b in net.bottom_names[n]}, net.top_names[n])
 97 | 		active_data.update(new_data)
 98 | 		
 99 | 		if len(net.top_names[n]) > 0 and n in layer_names:
100 | 			m = net.top_names[n][0]
101 | 			D = flattenData(new_data[m])
102 | 			mean = np.mean(D, axis=0)
103 | 			stddev = np.std(D, axis=0)
104 | 			if not quiet:
105 | 				print( bcolors.BOLD, ' '*5, n, ':', m, ' '*5, bcolors.ENDC )
106 | 				print( 'mean  ', coloredNumbers(mean, bcolors.OKGREEN, bcolors=bcolors) )
107 | 				print( 'stddev', coloredNumbers(stddev, bcolors.OKBLUE, bcolors=bcolors) )
108 | 				print( 'coef of variation ', bcolors.OKGREEN, stddev.std() / stddev.mean(), bcolors.ENDC )
109 | 				print()
110 | 			cvar[n] = stddev.std() / stddev.mean()
111 | 		# Delete all unused data
112 | 		for k in list(active_data):
113 | 			if k not in last_used or last_used[k] == i:
114 | 				del active_data[k]
115 | 	return cvar
116 | 
117 | def main():
118 | 	from argparse import ArgumentParser
119 | 	from os import path
120 | 	
121 | 	parser = ArgumentParser()
122 | 	parser.add_argument('prototxt')
123 | 	parser.add_argument('-l', '--load', help='Load a caffemodel')
124 | 	parser.add_argument('-d', '--data', default=None, help='Image list to use [default prototxt data]')
125 | 	#parser.add_argument('-q', action='store_true', help='Quiet execution')
126 | 	parser.add_argument('-sm', action='store_true', help='Summary only')
127 | 	parser.add_argument('-q', action='store_true', help='Quiet execution')
128 | 	parser.add_argument('-a', '--all', action='store_true', help='Show the statistic for all layers')
129 | 	parser.add_argument('-nc', action='store_true', help='Do not use color')
130 | 	parser.add_argument('-s', type=float, default=1.0, help='Scale the input [only custom data "-d"]')
131 | 	parser.add_argument('-bs', type=int, default=16, help='Batch size [only custom data "-d"]')
132 | 	parser.add_argument('-nit', type=int, default=10, help='Number of iterations')
133 | 	parser.add_argument('--gpu', type=int, default=0, help='What gpu to run it on?')
134 | 	args = parser.parse_args()
135 | 	
136 | 	if args.q:
137 | 		from os import environ
138 | 		environ['GLOG_minloglevel'] = '2'
139 | 	import caffe, load
140 | 	from caffe import NetSpec, layers as L
141 | 	
142 | 	caffe.set_mode_gpu()
143 | 	if args.gpu is not None:
144 | 		caffe.set_device(args.gpu)
145 | 	
146 | 	if args.data is not None:
147 | 		model = load.ProtoDesc(args.prototxt)
148 | 		net = NetSpec()
149 | 		fl = getFileList(args.data)
150 | 		if len(fl) == 0:
151 | 			print("Unknown data type for '%s'"%args.data)
152 | 			exit(1)
153 | 		from tempfile import NamedTemporaryFile
154 | 		f = NamedTemporaryFile('w')
155 | 		f.write('\n'.join([path.abspath(i)+' 0' for i in fl]))
156 | 		f.flush()
157 | 		net.data, net.label = L.ImageData(source=f.name, batch_size=args.bs, new_width=model.input_dim[-1], new_height=model.input_dim[-1], transform_param=dict(mean_value=[104,117,123], scale=args.s),ntop=2)
158 | 		net.out = model(data=net.data, label=net.label)
159 | 		n = netFromString('force_backward:true\n'+str(net.to_proto()), caffe.TRAIN )
160 | 	else:
161 | 		n = caffe.Net(args.prototxt, caffe.TRAIN)
162 | 
163 | 	if args.load is not None:
164 | 		n.copy_from(args.load)
165 | 	
166 | 	cvar = printMeanStddev(n, NIT=args.nit, show_all=args.all, show_color=not args.nc, quiet=args.sm)
167 | 	cv, gr = computeGradientRatio(n, NIT=args.nit)
168 | 	print()
169 | 	print('  Summary  ')
170 | 	print('-----------')
171 | 	print()
172 | 	print('layer name                         out cvar    rate cvar    rate mean')
173 | 	for l in n._layer_names:
174 | 		if l in cvar and l in cv and l in gr:
175 | 			print('%-30s   %10.2f   %10.2f   %10.2f'%(l, cvar[l], cv[l], gr[l]) )
176 | 
177 | if __name__ == "__main__":
178 | 	main()
179 | 


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/prior_probs.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Deep-Learning-Colorization/resources/prior_probs.npy


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/pts_in_hull.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Deep-Learning-Colorization/resources/pts_in_hull.npy


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/softmax_cross_entropy_loss_layer.cpp:
--------------------------------------------------------------------------------
 1 | #include <algorithm>
 2 | #include <cfloat>
 3 | #include <vector>
 4 | 
 5 | // #include "caffe/layer.hpp"
 6 | // #include "caffe/util/math_functions.hpp"
 7 | // #include "caffe/vision_layer.hpp"
 8 | #include "caffe/layers/softmax_cross_entropy_loss_layer.hpp"
 9 | #include "caffe/util/math_functions.hpp"
10 | 
11 | namespace caffe {
12 | 
13 | template <typename Dtype>
14 | void SoftmaxCrossEntropyLossLayer<Dtype>::LayerSetUp(
15 |     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
16 |   LossLayer<Dtype>::LayerSetUp(bottom, top);
17 |   softmax_bottom_vec_.clear();
18 |   softmax_bottom_vec_.push_back(bottom[0]);
19 |   softmax_top_vec_.clear();
20 |   softmax_top_vec_.push_back(softmax_output_.get());
21 |   softmax_layer_->SetUp(softmax_bottom_vec_, softmax_top_vec_);
22 | }
23 | 
24 | template <typename Dtype>
25 | void SoftmaxCrossEntropyLossLayer<Dtype>::Reshape(
26 |     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
27 |   LossLayer<Dtype>::Reshape(bottom, top);
28 |   CHECK_EQ(bottom[0]->count(), bottom[1]->count()) <<
29 |       "SOFTMAX_CROSS_ENTROPY_LOSS layer inputs must have the same count.";
30 |   softmax_layer_->Reshape(softmax_bottom_vec_, softmax_top_vec_);
31 | }
32 | 
33 | template <typename Dtype>
34 | void SoftmaxCrossEntropyLossLayer<Dtype>::Forward_cpu(
35 |     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
36 |   // The forward pass computes the softmax outputs.
37 |   softmax_bottom_vec_[0] = bottom[0];
38 |   softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);
39 |   // Compute the loss (negative log likelihood)
40 |   const int count = bottom[0]->count();
41 |   const int num = bottom[0]->num();
42 |   // Stable version of loss computation from input data
43 | //  const Dtype* input_data = bottom[0]->cpu_data();
44 |   const Dtype* target = bottom[1]->cpu_data();
45 |   Dtype loss = 0;
46 |   const Dtype* softmax_output_data = softmax_top_vec_[0]->cpu_data();
47 | 
48 |   // First compute max of input data
49 |   for (int i = 0; i < count; ++i) {
50 |     //loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -
51 |     //    log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
52 |     if (target[i] > 0 ) {
53 |       loss -= target[i] * (log(softmax_output_data[i]) - log(target[i]));
54 |     }
55 |   }
56 | 
57 |   top[0]->mutable_cpu_data()[0] = loss / num;
58 | }
59 | 
60 | template <typename Dtype>
61 | void SoftmaxCrossEntropyLossLayer<Dtype>::Backward_cpu(
62 |     const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
63 |     const vector<Blob<Dtype>*>& bottom) {
64 |   if (propagate_down[1]) {
65 |     LOG(FATAL) << this->type()
66 |                << " Layer cannot backpropagate to label inputs.";
67 |   }
68 |   if (propagate_down[0]) {
69 |     // First, compute the diff
70 |     const int count = bottom[0]->count();
71 |     const int num = bottom[0]->num();
72 |     const Dtype* softmax_output_data = softmax_output_->cpu_data();
73 |     const Dtype* target = bottom[1]->cpu_data();
74 |     Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
75 |     // Gradient is: target[i] - softmax_output_data[i]
76 |     caffe_sub(count, softmax_output_data, target, bottom_diff);
77 |     // Scale down gradient
78 |     const Dtype loss_weight = top[0]->cpu_diff()[0];
79 |     caffe_scal(count, loss_weight / num, bottom_diff);
80 |   }
81 | }
82 | 
83 | #ifdef CPU_ONLY
84 | STUB_GPU(SoftmaxCrossEntropyLossLayer);
85 | #endif
86 | 
87 | INSTANTIATE_CLASS(SoftmaxCrossEntropyLossLayer);
88 | REGISTER_LAYER_CLASS(SoftmaxCrossEntropyLoss);
89 | 
90 | }  // namespace caffe
91 | 


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/softmax_cross_entropy_loss_layer.cu:
--------------------------------------------------------------------------------
 1 | #include <algorithm>
 2 | #include <cfloat>
 3 | #include <vector>
 4 | 
 5 | // #include "caffe/layer.hpp"
 6 | // #include "caffe/util/math_functions.hpp"
 7 | // #include "caffe/vision_layer.hpp"
 8 | #include "caffe/layers/softmax_cross_entropy_loss_layer.hpp"
 9 | #include "caffe/util/math_functions.hpp"
10 | 
11 | namespace caffe {
12 | 
13 | template <typename Dtype>
14 | void SoftmaxCrossEntropyLossLayer<Dtype>::Forward_gpu(
15 |     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
16 |   // The forward pass computes the softmax outputs.
17 |   softmax_bottom_vec_[0] = bottom[0];
18 |   softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);
19 |   // Compute the loss (negative log likelihood)
20 |   const int count = bottom[0]->count();
21 |   const int num = bottom[0]->num();
22 |   // Stable version of loss computation from input data
23 |   const Dtype* input_data = bottom[0]->cpu_data();
24 |   const Dtype* target = bottom[1]->cpu_data();
25 |   // Output of softmax forward pass
26 |   const Dtype* softmax_output = softmax_top_vec_[0]->cpu_data();
27 |   Dtype loss = 0;
28 |   for (int i = 0; i < count; ++i) {
29 |     //loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -
30 |     //    log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
31 |     if ( target[i] > 0 ) {
32 |         // loss -= target[i] * ( log(softmax_output[i]) - log(target[i]) ); 
33 |         // if(softmax_output[i] > 0) {
34 |         loss -= target[i] * ( log(softmax_output[i]+1.0e-35) - log(target[i]) );
35 |         // }
36 |         // else {
37 |         //   // LOG(INFO) << "Prediction was 0";
38 |         //   loss -= target[i] * ( log(1.0e-35) - log(target[i]) );  // adding epsilon
39 |         // }
40 |     }
41 |   }
42 |   top[0]->mutable_cpu_data()[0] = loss / num;
43 | }
44 | 
45 | template <typename Dtype>
46 | void SoftmaxCrossEntropyLossLayer<Dtype>::Backward_gpu(
47 |     const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
48 |     const vector<Blob<Dtype>*>& bottom) {
49 |   if (propagate_down[1]) {
50 |     LOG(FATAL) << this->type()
51 |                << " Layer cannot backpropagate to label inputs.";
52 |   }
53 |   if (propagate_down[0]) {
54 |     // First, compute the diff
55 |     const int count = bottom[0]->count();
56 |     const int num = bottom[0]->num();
57 |     const Dtype* softmax_output_data = softmax_output_->gpu_data();
58 |     const Dtype* target = bottom[1]->gpu_data();
59 |     Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
60 |     caffe_copy(count, softmax_output_data, bottom_diff);
61 |     caffe_gpu_axpy(count, Dtype(-1), target, bottom_diff);
62 |     // Scale down gradient
63 |     const Dtype loss_weight = top[0]->cpu_diff()[0];
64 |     caffe_gpu_scal(count, loss_weight / num, bottom_diff);
65 |   }
66 | }
67 | 
68 | INSTANTIATE_LAYER_GPU_FUNCS(SoftmaxCrossEntropyLossLayer);
69 | 
70 | 
71 | }  // namespace caffe
72 | 


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/softmax_cross_entropy_loss_layer.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef CAFFE_SOFTMAX_CROSS_ENTROPY_LOSS_LAYER_HPP_
 2 | #define CAFFE_SOFTMAX_CROSS_ENTROPY_LOSS_LAYER_HPP_
 3 | 
 4 | #include <vector>
 5 | 
 6 | #include "caffe/blob.hpp"
 7 | #include "caffe/layer.hpp"
 8 | #include "caffe/proto/caffe.pb.h"
 9 | 
10 | #include "caffe/layers/loss_layer.hpp"
11 | // #include "caffe/layers/sigmoid_layer.hpp"
12 | #include "caffe/layers/softmax_layer.hpp"
13 | 
14 | namespace caffe {
15 | 
16 | // Forward declare SoftmaxLayer for use in SoftmaxWithLossLayer.
17 | template <typename Dtype> class SoftmaxLayer;
18 | 
19 | template <typename Dtype>
20 | class SoftmaxCrossEntropyLossLayer : public LossLayer<Dtype> {
21 |  public:
22 |   explicit SoftmaxCrossEntropyLossLayer(const LayerParameter& param)
23 |       : LossLayer<Dtype>(param),
24 |           softmax_layer_(new SoftmaxLayer<Dtype>(param)),
25 |           softmax_output_(new Blob<Dtype>()) {}
26 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
27 |       const vector<Blob<Dtype>*>& top);
28 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
29 |       const vector<Blob<Dtype>*>& top);
30 | 
31 |   virtual inline const char* type() const { return "SoftmaxCrossEntropyLoss"; }
32 | 
33 |  protected:
34 |   /// @copydoc SigmoidCrossEntropyLossLayer
35 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
36 |       const vector<Blob<Dtype>*>& top);
37 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
38 |       const vector<Blob<Dtype>*>& top);
39 | 
40 |   /**
41 |    * @brief Computes the softmax cross-entropy loss error gradient w.r.t. the
42 |    *        predictions.
43 |    *
44 |    * Gradients cannot be computed with respect to the target inputs (bottom[1]),
45 |    * so this method ignores bottom[1] and requires !propagate_down[1], crashing
46 |    * if propagate_down[1] is set.
47 |    *
48 |    * @param top output Blob vector (length 1), providing the error gradient with
49 |    *      respect to the outputs
50 |    *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
51 |    *      This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$,
52 |    *      as @f$ \lambda @f$ is the coefficient of this layer's output
53 |    *      @f$\ell_i@f$ in the overall Net loss
54 |    *      @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence
55 |    *      @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$.
56 |    *      (*Assuming that this top Blob is not used as a bottom (input) by any
57 |    *      other layer of the Net.)
58 |    * @param propagate_down see Layer::Backward.
59 |    *      propagate_down[1] must be false as gradient computation with respect
60 |    *      to the targets is not implemented.
61 |    * @param bottom input Blob vector (length 2)
62 |    *   -# @f$ (N \times C \times H \times W) @f$
63 |    *      the predictions @f$x@f$; Backward computes diff
64 |    *      @f$ \frac{\partial E}{\partial x} =
65 |    *          \frac{1}{n} \sum\limits_{n=1}^N (\hat{p}_n - p_n)
66 |    *      @f$
67 |    *   -# @f$ (N \times 1 \times 1 \times 1) @f$
68 |    *      the labels -- ignored as we can't compute their error gradients
69 |    */
70 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
71 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
72 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
73 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
74 | 
75 |   /// The internal SoftmaxLayer used to map predictions to probabilities.
76 |   shared_ptr<SoftmaxLayer<Dtype> > softmax_layer_;
77 |   /// sigmoid_output stores the output of the SigmoidLayer.
78 |   shared_ptr<Blob<Dtype> > softmax_output_;
79 |   /// bottom vector holder to call the underlying SigmoidLayer::Forward
80 |   vector<Blob<Dtype>*> softmax_bottom_vec_;
81 |   /// top vector holder to call the underlying SigmoidLayer::Forward
82 |   vector<Blob<Dtype>*> softmax_top_vec_;
83 | };
84 | 
85 | }  // namespace caffe
86 | 
87 | #endif  // CAFFE_SOFTMAX_CROSS_ENTROPY_LOSS_LAYER_HPP_
88 | 


--------------------------------------------------------------------------------
/Deep-Learning-Colorization/video_colorize_parallel.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import argparse
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from skimage import img_as_float
  8 | import skimage.color as color
  9 | import scipy.ndimage.interpolation as sni
 10 | import caffe
 11 | 
 12 | def parse_args():
 13 |     parser = argparse.ArgumentParser()
 14 |     parser.add_argument('--filename', type=str, default='*', help='Filename of input BW video')
 15 |     parser.add_argument('--input_dir', type=str, default='/home/ubuntu/Automatic-Video-Colorization/data/examples/converted/', help='Directory of input files')
 16 |     parser.add_argument('--output_dir', type=str, default='/home/ubuntu/Automatic-Video-Colorization/data/examples/recolorized/', help='Directory of output files')
 17 |     parser.add_argument('--gpu', dest='gpu', help='gpu id', type=int, default=0)
 18 |     parser.add_argument('--prototxt',dest='prototxt',help='prototxt filepath', type=str, default='./models/colorization_deploy_v2.prototxt')
 19 |     parser.add_argument('--caffemodel',dest='caffemodel',help='caffemodel filepath', type=str, default='./models/colorization_release_v2.caffemodel')
 20 | 
 21 |     args = parser.parse_args()
 22 |     return args
 23 |     
 24 | def image_colorization(frame, args):
 25 | 
 26 | 	caffe.set_mode_gpu()
 27 | 	caffe.set_device(args.gpu)
 28 | 
 29 | 	# Select desired model
 30 | 	net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
 31 | 
 32 | 	(H_in,W_in) = net.blobs['data_l'].data.shape[2:] # get input shape
 33 | 	(H_out,W_out) = net.blobs['class8_ab'].data.shape[2:] # get output shape
 34 | 
 35 | 	pts_in_hull = np.load('./resources/pts_in_hull.npy') # load cluster centers
 36 | 	net.params['class8_ab'][0].data[:,:,0,0] = pts_in_hull.transpose((1,0)) # populate cluster centers as 1x1 convolution kernel
 37 | 	# print 'Annealed-Mean Parameters populated'
 38 | 
 39 | 	# load the original image
 40 | 	img_rgb = img_as_float(frame).astype(np.float32)
 41 | 
 42 | 	img_lab = color.rgb2lab(img_rgb) # convert image to lab color space
 43 | 	img_l = img_lab[:,:,0] # pull out L channel
 44 | 	(H_orig,W_orig) = img_rgb.shape[:2] # original image size
 45 | 
 46 | 	# create grayscale version of image (just for displaying)
 47 | 	img_lab_bw = img_lab.copy()
 48 | 	img_lab_bw[:,:,1:] = 0
 49 | 	img_rgb_bw = color.lab2rgb(img_lab_bw)
 50 | 
 51 | 	# resize image to network input size
 52 | 	img_rs = caffe.io.resize_image(img_rgb,(H_in,W_in)) # resize image to network input size
 53 | 	img_lab_rs = color.rgb2lab(img_rs)
 54 | 	img_l_rs = img_lab_rs[:,:,0]
 55 | 
 56 | 	net.blobs['data_l'].data[0,0,:,:] = img_l_rs-50 # subtract 50 for mean-centering
 57 | 	net.forward() # run network
 58 | 
 59 | 	ab_dec = net.blobs['class8_ab'].data[0,:,:,:].transpose((1,2,0)) # this is our result
 60 | 	ab_dec_us = sni.zoom(ab_dec,(1.*H_orig/H_out,1.*W_orig/W_out,1)) # upsample to match size of original image L
 61 | 	img_lab_out = np.concatenate((img_l[:,:,np.newaxis],ab_dec_us),axis=2) # concatenate with original image L
 62 | 	img_rgb_out = (255*np.clip(color.lab2rgb(img_lab_out),0,1)).astype('uint8') # convert back to rgb
 63 | 
 64 | 	return img_rgb_out
 65 | 
 66 | def bw2color(args, inputname, inputpath, outputpath):
 67 |     if inputname.endswith(".mp4"):
 68 |         
 69 |         # store informations about the original video
 70 |         cap = cv2.VideoCapture(inputpath + inputname)
 71 |         # original dimensions
 72 |         width, height = int(cap.get(3)), int(cap.get(4))
 73 | 
 74 |         fourcc = cv2.VideoWriter_fourcc(*'mp4v');
 75 |         
 76 |         # parameters of output file
 77 |             # dimensions of the output image
 78 |         new_width, new_height = width, height
 79 |             # number of frames
 80 |         fps = 30.0
 81 |     
 82 |         # recolorized output video
 83 |         color_out = cv2.VideoWriter(
 84 |             outputpath + 'color_' + inputname,
 85 |             fourcc,
 86 |             fps,
 87 |             (new_width, new_height),
 88 |             isColor=True
 89 |         )
 90 |         
 91 |         while(cap.isOpened()):
 92 |             ret, frame_in = cap.read()
 93 |             # check if we are not at the end of the video
 94 |             if ret==True:                
 95 |                 # convert BGR to RGB convention
 96 |                 frame_in = frame_in[:,:,::-1]
 97 |                 # colorize the BW frame
 98 |                 frame_out = image_colorization(frame_in, args)
 99 |                 # convert RGB to BGR convention
100 |                 frame_out = frame_out[:,:,::-1]
101 |                 # write the color frame
102 |                 color_out.write(frame_out)
103 |                 
104 |                 if cv2.waitKey(1) & 0xFF == ord('q'):
105 |                     break
106 |             # end of the video
107 |             else:
108 |                 break
109 | 
110 |         # release everything if job is finished
111 |         cap.release()
112 |         color_out.release()
113 | 
114 | def main():
115 |     args = parse_args()
116 | 
117 |     if args.filename == '*':
118 |         for filename in os.listdir(args.input_dir):
119 |             bw2color(args, inputname = filename, inputpath = args.input_dir, outputpath = args.output_dir)
120 |     else:
121 |         bw2color(args, inputname = args.filename, inputpath = args.input_dir, outputpath = args.output_dir)
122 |         
123 |     # cleanup
124 |     cv2.destroyAllWindows()
125 | 
126 |     return 0
127 | 
128 | if __name__ == '__main__':
129 |     main()
130 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CS230-Final-Project
 2 | 
 3 | ### Converting videos
 4 | 
 5 | 1. Create the data directories
 6 | ```
 7 | mkdir data; mkdir data/raw; mkdir data/converted;
 8 | ```
 9 | 2. Place videos inside 'data/raw' directory
10 | 3. Run the conversion script
11 | 
12 | For all videos inside 'data/raw' directory
13 | ```
14 | python3 converter.py
15 | ```
16 | 
17 | For one specific video 'filename'
18 | ```
19 | python3 converter.py --inputname filename
20 | ```
21 | 
22 | To convert all videos in the data/raw folder to a consistent fps and resolution:
23 | ```
24 | python3 converter.py --fps 30 --out_dim 640 360
25 | ```
26 | 
27 | #### Moments in Time (Mini) Dataset
28 | Download and unzip the dataset
29 | ```
30 | wget http://data.csail.mit.edu/soundnet/actions3/split1/Moments_in_Time_Mini.zip
31 | unzip Moments_in_Time_Mini.zip -d data/.
32 | ```
33 | Pre-process the dataset
34 | ```
35 | ./convert_moment_dataset.sh
36 | ```
37 | 
38 | ## Running the baseline on a specific video
39 | Go into the folder "Deep-Learning-Colorization"
40 | 
41 | Run ```./models/fetch_release_models.sh``` to download the model.
42 | 
43 | Then run the following command to colorize your video :
44 | ```
45 | python3 video_colorize_parallel.py --filename <BW_video_filename> --input_dir <path_to_input_directory> --output_dir <path_to_output_directory>
46 | ```
47 | 
48 | ## Requirements
49 | 
50 | ### Dependencies
51 | 
52 | You can install Python dependencies using `pip install -r requirements.txt`
53 | 
54 | 
55 | ### Issues with CUDA
56 | 
57 | When running `import tensorflow as tf`, if you encounter the following error:
58 | ```
59 | ImportError: libcublas.so.9.0: cannot open shared object file: No such file or directory
60 | ```
61 | 
62 | Run the following to create links:
63 | ```
64 | sudo ln -s /usr/lib/x86_64-linux-gnu/libcublas.so.9.1.85 /usr/lib/x86_64-linux-gnu/libcublas.so.9.0
65 | sudo ln -s /usr/lib/x86_64-linux-gnu/libcusolver.so.9.1.85 /usr/lib/x86_64-linux-gnu/libcusolver.so.9.0
66 | ```
67 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | # custom
104 | _TODO
105 | checkpoints
106 | plots
107 | vcs.xml
108 | .idea
109 | .vscode


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/README.md:
--------------------------------------------------------------------------------
  1 | # Image Colorization with Generative Adversarial Networks 
  2 | In this work, we generalize the colorization procedure using a conditional Deep Convolutional Generative Adversarial Network (DCGAN) as as suggested by [Pix2Pix]. The network is trained on the datasets [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) and [Places365](http://places2.csail.mit.edu). Some of the results from Places365 dataset are [shown here.](#places365-results)
  3 | 
  4 | ## Prerequisites
  5 | - Linux
  6 | - Tensorflow 1.7
  7 | - NVIDIA GPU (12G or 24G memory) + CUDA cuDNN
  8 | 
  9 | ## Getting Started
 10 | ### Installation
 11 | - Clone this repo:
 12 | ```bash
 13 | git clone https://github.com/ImagingLab/Colorizing-with-GANs.git
 14 | cd Colorizing-with-GANs
 15 | ```
 16 | - Install Tensorflow and dependencies from https://www.tensorflow.org/install/
 17 | - Install python requirements:
 18 | ```bash
 19 | pip install -r requirements.txt
 20 | ```
 21 | 
 22 | ### Dataset
 23 | - We use [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) and [Places365](http://places2.csail.mit.edu) datasets. To train a model on the full dataset, download datasets from official websites.
 24 | After downloading, put then under the `datasets` folder.
 25 | 
 26 | ### Training
 27 | - To train the model, run `main.py` script
 28 | ```bash
 29 | python main.py
 30 | ```
 31 | - To train the model on places365 dataset with tuned hyperparameters:
 32 | ```
 33 | python train.py \
 34 |   --seed 100 \
 35 |   --dataset places365 \
 36 |   --dataset-path ./dataset/places365 \
 37 |   --checkpoints-path ./checkpoints \
 38 |   --batch-size 16 \
 39 |   --epochs 10 \
 40 |   --lr 3e-4 \
 41 |   --label-smoothing 1
 42 |   
 43 | ```
 44 | 
 45 | - To train the model of cifar10 dataset with tuned hyperparameters:
 46 | ```
 47 | python train.py \
 48 |   --seed 100 \
 49 |   --dataset cifar10 \
 50 |   --dataset-path ./dataset/cifar10 \
 51 |   --checkpoints-path ./checkpoints \
 52 |   --batch-size 128 \
 53 |   --epochs 200 \
 54 |   --lr 3e-4 \
 55 |   --lr-decay-steps 1e4 \
 56 |   --augment True
 57 |   
 58 | ```
 59 | 
 60 | ### Evaluate
 61 | - Download the pre-trained weights [from here.](https://drive.google.com/open?id=1jTsAUAKrMiHO2gn7s-fFZ_zUSzgKoPyp) and copy them in the `checkpoints` folder.
 62 | - To evaluate the model quantitatively on the test-set, run `test-eval.py` script:
 63 | ```bash
 64 | python test-eval.py
 65 | ```
 66 | 
 67 | ### Turing Test
 68 | - Download the pre-trained weights [from here.](https://drive.google.com/open?id=1jTsAUAKrMiHO2gn7s-fFZ_zUSzgKoPyp) and copy them in the `checkpoints` folder.
 69 | - To evaluate the model qualitatively using visual Turing test, run `test-turing.py`:
 70 | ```bash
 71 | python test-turing.py
 72 | ```
 73 | 
 74 | - To apply time-based visual Turing test run (2 seconds decision time):
 75 | ```bash
 76 | python test-turing.py --test-delay 2
 77 | ```
 78 | 
 79 | 
 80 | ## Method
 81 | 
 82 | ### Generative Adversarial Network
 83 | Both generator and discriminator use CNNs. The generator is trained to minimize the probability that the discriminator makes a correct prediction in generated data, while discriminator is trained to maximize the probability of assigning the correct label. This is presented as a single minimax game problem:
 84 | <p align='center'>  
 85 |   <img src='img/gan.png' />
 86 | </p>
 87 | In our model, we have redefined the generator's cost function by maximizing the probability of the discriminator being mistaken, as opposed to minimizing the probability of the discriminator being correct. In addition, the cost function was further modified by adding an L1 based regularizer. This will theoretically preserve the structure of the original images and prevent the generator from assigning arbitrary colors to pixels just to fool the discriminator:
 88 | <p align='center'>  
 89 |   <img src='img/gan_new.png' />
 90 | </p>
 91 | 
 92 | ### Conditional GAN
 93 | In a traditional GAN, the input of the generator is randomly generated noise data z. However, this approach is not applicable to the automatic colorization problem due to the nature of its inputs. The generator must be modified to accept grayscale images as inputs rather than noise. This problem was addressed by using a variant of GAN called [conditional generative adversarial networks](https://arxiv.org/abs/1411.1784). Since no noise is introduced, the input of the generator is treated as zero noise with the grayscale input as a prior:
 94 | <p align='center'>  
 95 |   <img src='img/con_gan.png' />
 96 | </p>
 97 | The discriminator gets colored images from both generator and original data along with the grayscale input as the condition and tries to tell which pair contains the true colored image:
 98 | <p align='center'>  
 99 |   <img src='img/cgan.png' width='450px' height='368px' />
100 | </p>
101 | 
102 | ### Networks Architecture
103 | The architecture of generator is inspired by  [U-Net](https://arxiv.org/abs/1505.04597):  The architecture of the model is symmetric, with `n` encoding units and `n` decoding units. The contracting path consists of 4x4 convolution layers with stride 2 for downsampling, each followed by batch normalization and Leaky-ReLU activation function with the slope of 0.2. The number of channels are doubled after each step. Each unit in the expansive path consists of a 4x4 transposed convolutional layer with stride 2 for upsampling, concatenation with the activation map of the mirroring layer in the contracting path, followed by batch normalization and ReLU activation function. The last layer of the network is a 1x1 convolution which is equivalent to cross-channel parametric pooling layer. We use `tanh` function for the last layer.
104 | <p align='center'>  
105 |   <img src='img/unet.png' width='700px' height='168px' />
106 | </p>
107 | 
108 | For discriminator, we use similar architecture as the baselines contractive path: a series of 4x4 convolutional layers with stride 2 with the number of channels being doubled after each downsampling. All convolution layers are followed by batch normalization, leaky ReLU activation with slope 0.2. After the last layer, a convolution is applied to map to a 1 dimensional output, followed by a sigmoid function to return a probability value of the input being real or fake
109 | <p align='center'>  
110 |   <img src='img/discriminator.png' width='450px' height='168px' />
111 | </p>
112 |   
113 | ## Places365 Results
114 | Colorization results with Places365. (a) Grayscale. (b) Original Image. (c) Colorized with GAN.
115 | <p align='center'>  
116 |   <img src='img/places365.jpg' />
117 | </p>
118 | 
119 | ## Citation
120 | If you use this code for your research, please cite our paper <a href="https://arxiv.org/abs/1803.05400">Image Colorization with Generative Adversarial Networks</a>:
121 | 
122 | ```
123 | @inproceedings{nazeri2018image,
124 |   title={Image Colorization Using Generative Adversarial Networks},
125 |   author={Nazeri, Kamyar and Ng, Eric and Ebrahimi, Mehran},
126 |   booktitle={International Conference on Articulated Motion and Deformable Objects},
127 |   pages={85--94},
128 |   year={2018},
129 |   organization={Springer}
130 | }
131 | ```
132 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/__init__.py:
--------------------------------------------------------------------------------
1 | from .options import *
2 | from .models import *
3 | from .utils import *
4 | from .dataset import *
5 | from .main import *


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/build_dataset.py:
--------------------------------------------------------------------------------
  1 | """Split the SIGNS dataset into train/dev/test and resize images to 64x64.
  2 | 
  3 | The SIGNS dataset comes in the following format:
  4 |     train_signs/
  5 |         0_IMG_5864.jpg
  6 |         ...
  7 |     test_signs/
  8 |         0_IMG_5942.jpg
  9 |         ...
 10 | 
 11 | Original images have size (3024, 3024).
 12 | Resizing to (64, 64) reduces the dataset size from 1.16 GB to 4.7 MB, and loading smaller images
 13 | makes training faster.
 14 | 
 15 | We already have a test set created, so we only need to split "train_signs" into train and dev sets.
 16 | Because we don't have a lot of images and we want that the statistics on the dev set be as
 17 | representative as possible, we'll take 20% of "train_signs" as dev set.
 18 | """
 19 | 
 20 | import argparse
 21 | import random
 22 | import os
 23 | 
 24 | import numpy as np
 25 | 
 26 | from PIL import Image
 27 | from tqdm import tqdm
 28 | import cv2
 29 | 
 30 | # size of the resized frames
 31 | SIZE = 256
 32 | 
 33 | # subfolder of the "Moments_in_Time" dataset to consider
 34 | SUBFOLDER = "/baking"
 35 | 
 36 | parser = argparse.ArgumentParser()
 37 | parser.add_argument('--data_dir', default='../data/Moments_in_Time_Mini', help="Directory with the Moments in Time dataset")
 38 | parser.add_argument('--output_dir', default='../data/momentsintime_ref', help="Where to write the new data")
 39 | parser.add_argument('--dt', type=int, default=1, help="Time between consecutives frames")
 40 | 
 41 | 
 42 | def split_resize_and_save(filename, i, output_dir, dt=1, size=SIZE):
 43 |     """Split the video clip in pair of consecutive frames (t, t+dt), resize the frames, and save the pairs to the `output_dir`"""
 44 |                 
 45 |     vidcap = cv2.VideoCapture(filename)
 46 |     
 47 |     success, frame = vidcap.read()
 48 |     # convert BGR to RGB convention
 49 |     frame = frame[:,:,::-1]
 50 |     # default : use bilinear interpolation
 51 |     frame_prev = cv2.resize(frame, (size, size)) 
 52 |     # save the first frame as the "color palette" reference
 53 |     frame_ref = frame_prev
 54 |     
 55 |     # counter to build pairs of consecutive frames
 56 |     count = 1
 57 |     
 58 |     while success:
 59 |       count += 1
 60 |       
 61 |       success, frame = vidcap.read()
 62 |       
 63 |       if success:
 64 |           # convert BGR to RGB convention
 65 |           frame = frame[:,:,::-1]
 66 |           # default : use bilinear interpolation
 67 |           frame = cv2.resize(frame, (size, size)) 
 68 |       else:
 69 |         break
 70 |       #print('Read a new frame: ', success)
 71 |             
 72 |       if count % (1+dt) == 0:
 73 |           img = np.concatenate((frame, frame_prev, frame_ref), 2)
 74 |           frame_prev = frame     
 75 |           np.save(output_dir + "/video{}_frame{}".format(i, count), img)
 76 |           
 77 | if __name__ == '__main__':
 78 |     args = parser.parse_args()
 79 |     # Define the output directory
 80 |     args.output_dir = args.output_dir + "_dt" + str(args.dt)
 81 |     
 82 |     assert os.path.isdir(args.data_dir), "Couldn't find the dataset at {}".format(args.data_dir)
 83 | 
 84 |     # Define the data directories
 85 |     train_data_dir = os.path.join(args.data_dir, 'training' + SUBFOLDER)
 86 |     test_data_dir = os.path.join(args.data_dir, 'validation' + SUBFOLDER)
 87 | 
 88 |     # Get the filenames in each directory (train and test)
 89 |     filenames = os.listdir(train_data_dir)
 90 |     filenames = [os.path.join(train_data_dir, f) for f in filenames if f.endswith('.mp4')]
 91 | 
 92 |     test_filenames = os.listdir(test_data_dir)
 93 |     test_filenames = [os.path.join(test_data_dir, f) for f in test_filenames if f.endswith('.mp4')]
 94 | 
 95 |     # Split the images in 'train_moments' into 80% train and 20% dev
 96 |     # Make sure to always shuffle with a fixed seed so that the split is reproducible
 97 |     random.seed(230)
 98 |     filenames.sort()
 99 |     random.shuffle(filenames)
100 | 
101 |     split = int(0.9 * len(filenames))
102 |     train_filenames = filenames[:split]
103 |     dev_filenames = filenames[split:]
104 | 
105 |     filenames = {'train': train_filenames,
106 |                  'dev': dev_filenames,
107 |                  'test': test_filenames}
108 | 
109 |     if not os.path.exists(args.output_dir):
110 |         os.mkdir(args.output_dir)
111 |     else:
112 |         print("Warning: output dir {} already exists".format(args.output_dir))
113 | 
114 |     # Preprocess train, dev and test
115 |     for split in ['train', 'dev', 'test']:
116 |         output_dir_split = os.path.join(args.output_dir, '{}_moments'.format(split))
117 |         if not os.path.exists(output_dir_split):
118 |             os.mkdir(output_dir_split)
119 |         else:
120 |             print("Warning: dir {} already exists".format(output_dir_split))
121 | 
122 |         print("Processing {} data, saving preprocessed data to {}".format(split, output_dir_split))
123 |         for i, filename in enumerate(tqdm(filenames[split])):
124 |             split_resize_and_save(filename, i, output_dir_split, dt=args.dt, size=SIZE)
125 | 
126 |     print("Done building dataset")
127 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/dataset.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | from scipy.misc import imread
  5 | from abc import abstractmethod
  6 | from utils import unpickle
  7 | 
  8 | CIFAR10_DATASET = 'cifar10'
  9 | PLACES365_DATASET = 'places365'
 10 | MOMENTSINTIME_DATASET = 'momentsintime'
 11 | 
 12 | class BaseDataset():
 13 |     def __init__(self, name, path, training=True, augment=True):
 14 |         self.name = name
 15 |         self.augment = augment and training
 16 |         self.training = training
 17 |         self.path = path
 18 |         self._data = []
 19 | 
 20 |     def __len__(self):
 21 |         return len(self.data)
 22 | 
 23 |     def __iter__(self):
 24 |         total = len(self)
 25 |         start = 0
 26 | 
 27 |         while start < total:
 28 |             item = self[start]
 29 |             start += 1
 30 |             yield item
 31 | 
 32 |         raise StopIteration
 33 | 
 34 |     def __getitem__(self, index):
 35 |         val = self.data[index]
 36 |         try:
 37 |             # OLD : img = imread(val) if isinstance(val, str) else val
 38 |             img = np.load(val) if isinstance(val, str) else val
 39 | 
 40 |             if self.augment and np.random.binomial(1, 0.5) == 1:
 41 |                 img = img[:, ::-1, :]
 42 | 
 43 |         except:
 44 |             img = None
 45 | 
 46 |         return img
 47 | 
 48 |     def generator(self, batch_size, recursive=False):
 49 |         start = 0
 50 |         total = len(self)
 51 | 
 52 |         while True:
 53 |             while start < total:
 54 |                 end = np.min([start + batch_size, total])
 55 |                 items = []
 56 | 
 57 |                 for ix in range(start, end):
 58 |                     item = self[ix]
 59 |                     if item is not None:
 60 |                         items.append(item)
 61 | 
 62 |                 start = end
 63 |                 yield np.array(items)
 64 | 
 65 |             if recursive:
 66 |                 start = 0
 67 | 
 68 |             else:
 69 |                 raise StopIteration
 70 | 
 71 | 
 72 |     @property
 73 |     def data(self):
 74 |         if len(self._data) == 0:
 75 |             self._data = self.load()
 76 |             np.random.shuffle(self._data)
 77 | 
 78 |         return self._data
 79 | 
 80 |     @abstractmethod
 81 |     def load(self):
 82 |         return []
 83 | 
 84 | 
 85 | class Cifar10Dataset(BaseDataset):
 86 |     def __init__(self, path, training=True, augment=True):
 87 |         super(Cifar10Dataset, self).__init__(CIFAR10_DATASET, path, training, augment)
 88 | 
 89 |     def load(self):
 90 |         data = []
 91 |         if self.training:
 92 |             for i in range(1, 6):
 93 |                 filename = '{}/data_batch_{}'.format(self.path, i)
 94 |                 batch_data = unpickle(filename)
 95 |                 if len(data) > 0:
 96 |                     data = np.vstack((data, batch_data[b'data']))
 97 |                 else:
 98 |                     data = batch_data[b'data']
 99 | 
100 |         else:
101 |             filename = '{}/test_batch'.format(self.path)
102 |             batch_data = unpickle(filename)
103 |             data = batch_data[b'data']
104 | 
105 |         w = 32
106 |         h = 32
107 |         s = w * h
108 |         data = np.array(data)
109 |         data = np.dstack((data[:, :s], data[:, s:2 * s], data[:, 2 * s:]))
110 |         data = data.reshape((-1, w, h, 3))
111 |         return data
112 | 
113 | 
114 | class Places365Dataset(BaseDataset):
115 |     def __init__(self, path, training=True, augment=True):
116 |         super(Places365Dataset, self).__init__(PLACES365_DATASET, path, training, augment)
117 | 
118 |     def load(self):
119 |         if self.training:
120 |             data = np.array(
121 |                 glob.glob(self.path + '/data_256/**/*.jpg', recursive=True))
122 |                 
123 |         else:
124 |             data = np.array(glob.glob(self.path + '/val_256/*.jpg'))
125 | 
126 |         return data
127 | 
128 |         
129 | class MomentsInTimeDataset(BaseDataset):
130 |     def __init__(self, path, training=True, augment=True):
131 |         super(MomentsInTimeDataset, self).__init__(MOMENTSINTIME_DATASET, path, training, augment)
132 | 
133 |     def load(self):
134 |         if self.training:
135 |             #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/train_moments/*"))
136 |             data = np.array(glob.glob("." + self.path + "/train_moments/*"))
137 |         else:
138 |             #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/dev_moments/*"))
139 |             data = np.array(glob.glob("." + self.path + "/dev_moments/*"))
140 | 
141 |         return data
142 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | from options import ModelOptions
 6 | from models import Cifar10Model, Places365Model, MomentsInTimeModel
 7 | from dataset import CIFAR10_DATASET, PLACES365_DATASET, MOMENTSINTIME_DATASET
 8 | 
 9 | 
10 | def main(options):
11 | 
12 |     # reset tensorflow graph
13 |     tf.reset_default_graph()
14 | 
15 |     # initialize random seed
16 |     tf.set_random_seed(options.seed)
17 |     np.random.seed(options.seed)
18 |     random.seed(options.seed)
19 | 
20 |     # create a session environment
21 |     with tf.Session() as sess:
22 |         
23 |         if options.dataset == CIFAR10_DATASET:
24 |             model = Cifar10Model(sess, options)
25 | 
26 |         elif options.dataset == PLACES365_DATASET:
27 |             model = Places365Model(sess, options)
28 |         
29 |         elif options.dataset == MOMENTSINTIME_DATASET:
30 |             model = MomentsInTimeModel(sess, options)
31 |         
32 |         else:
33 |             model = MomentsInTimeModel(sess, options)
34 | 
35 |         if not os.path.exists(options.checkpoints_path):
36 |             os.makedirs(options.checkpoints_path)
37 | 
38 |         if options.log:
39 |             open(model.train_log_file, 'w').close()
40 |             open(model.test_log_file, 'w').close()
41 | 
42 |         # build the model and initialize
43 |         model.build()
44 |         sess.run(tf.global_variables_initializer())
45 | 
46 | 
47 |         # load model only after global variables initialization
48 |         model.load()
49 | 
50 | 
51 |         if options.mode == 0:
52 |             args = vars(options)
53 |             print('\n------------ Options -------------')
54 |             with open(os.path.join(options.checkpoints_path, 'options.dat'), 'w') as f:
55 |                 for k, v in sorted(args.items()):
56 |                     print('%s: %s' % (str(k), str(v)))
57 |                     f.write('%s: %s\n' % (str(k), str(v)))
58 |             print('-------------- End ----------------\n')
59 |             
60 |             model.train()
61 | 
62 |         elif options.mode == 1:
63 |             model.evaluate()
64 |             while True:
65 |                 model.sample()
66 |         
67 |         else:
68 |             model.turing_test()
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     main(ModelOptions().parse())
73 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/networks.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from ops import conv2d, conv2d_transpose, pixelwise_accuracy
  4 | 
  5 | 
  6 | class Discriminator(object):
  7 |     def __init__(self, name, kernels):
  8 |         self.name = name
  9 |         self.kernels = kernels
 10 |         self.var_list = []
 11 | 
 12 |     def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None):
 13 |         output = inputs
 14 |         with tf.variable_scope(self.name, reuse=reuse_variables):
 15 |             for index, kernel in enumerate(self.kernels):
 16 | 
 17 |                 # not use batch-norm in the first layer
 18 |                 bnorm = False if index == 0 else True
 19 |                 name = 'conv' + str(index)
 20 |                 output = conv2d(
 21 |                     inputs=output,
 22 |                     name=name,
 23 |                     kernel_size=kernel_size,
 24 |                     filters=kernel[0],
 25 |                     strides=kernel[1],
 26 |                     bnorm=bnorm,
 27 |                     activation=tf.nn.leaky_relu,
 28 |                     seed=seed
 29 |                 )
 30 | 
 31 |                 if kernel[2] > 0:
 32 |                     output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
 33 | 
 34 |             output = conv2d(
 35 |                 inputs=output,
 36 |                 name='conv_last',
 37 |                 filters=1,
 38 |                 kernel_size=4,                  # last layer kernel size = 4
 39 |                 strides=1,                      # last layer stride = 1
 40 |                 bnorm=False,                    # do not use batch-norm for the last layer
 41 |                 seed=seed
 42 |             )
 43 | 
 44 |             self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name)
 45 | 
 46 |             return output
 47 | 
 48 | 
 49 | class Generator(object):
 50 |     def __init__(self, name, encoder_kernels, decoder_kernels, output_channels=3):
 51 |         self.name = name
 52 |         self.encoder_kernels = encoder_kernels
 53 |         self.decoder_kernels = decoder_kernels
 54 |         self.output_channels = output_channels
 55 |         self.var_list = []
 56 | 
 57 |     def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None):
 58 |         output = inputs
 59 | 
 60 |         with tf.variable_scope(self.name, reuse=reuse_variables):
 61 | 
 62 |             layers = []
 63 | 
 64 |             # encoder branch
 65 |             for index, kernel in enumerate(self.encoder_kernels):
 66 | 
 67 |                 name = 'conv' + str(index)
 68 |                 output = conv2d(
 69 |                     inputs=output,
 70 |                     name=name,
 71 |                     kernel_size=kernel_size,
 72 |                     filters=kernel[0],
 73 |                     strides=kernel[1],
 74 |                     activation=tf.nn.leaky_relu,
 75 |                     seed=seed
 76 |                 )
 77 | 
 78 |                 # save contracting path layers to be used for skip connections
 79 |                 layers.append(output)
 80 | 
 81 |                 if kernel[2] > 0:
 82 |                     output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
 83 | 
 84 |             # decoder branch
 85 |             for index, kernel in enumerate(self.decoder_kernels):
 86 | 
 87 |                 name = 'deconv' + str(index)
 88 |                 output = conv2d_transpose(
 89 |                     inputs=output,
 90 |                     name=name,
 91 |                     kernel_size=kernel_size,
 92 |                     filters=kernel[0],
 93 |                     strides=kernel[1],
 94 |                     activation=tf.nn.relu,
 95 |                     seed=seed
 96 |                 )
 97 | 
 98 |                 if kernel[2] > 0:
 99 |                     output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
100 | 
101 |                 # concat the layer from the contracting path with the output of the current layer
102 |                 # concat only the channels (axis=3)
103 |                 output = tf.concat([layers[len(layers) - index - 2], output], axis=3)
104 | 
105 |             output = conv2d(
106 |                 inputs=output,
107 |                 name='conv_last',
108 |                 filters=self.output_channels,   # number of output chanels
109 |                 kernel_size=1,                  # last layer kernel size = 1
110 |                 strides=1,                      # last layer stride = 1
111 |                 bnorm=False,                    # do not use batch-norm for the last layer
112 |                 activation=tf.nn.tanh,          # tanh activation function for the output
113 |                 seed=seed
114 |             )
115 | 
116 |             self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name)
117 | 
118 |             return output
119 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/ops.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | COLORSPACE_RGB = 'RGB'
  5 | COLORSPACE_LAB = 'LAB'
  6 | tf.nn.softmax_cross_entropy_with_logits_v2
  7 | 
  8 | def conv2d(inputs, filters, name, kernel_size=4, strides=2, bnorm=True, activation=None, seed=None):
  9 |     """
 10 |     Creates a conv2D block
 11 |     """
 12 |     initializer=tf.variance_scaling_initializer(seed=seed)
 13 |     res = tf.layers.conv2d(
 14 |         name=name,
 15 |         inputs=inputs,
 16 |         filters=filters,
 17 |         kernel_size=kernel_size,
 18 |         strides=strides,
 19 |         padding="same",
 20 |         kernel_initializer=initializer)
 21 | 
 22 |     if bnorm:
 23 |         res = tf.layers.batch_normalization(inputs=res, name='bn_' + name, training=True)
 24 | 
 25 |     # activation after batch-norm
 26 |     if activation is not None:
 27 |         res = activation(res)
 28 | 
 29 |     return res
 30 | 
 31 | 
 32 | def conv2d_transpose(inputs, filters, name, kernel_size=4, strides=2, bnorm=True, activation=None, seed=None):
 33 |     """
 34 |     Creates a conv2D-transpose block
 35 |     """
 36 |     initializer=tf.variance_scaling_initializer(seed=seed)
 37 |     res = tf.layers.conv2d_transpose(
 38 |         name=name,
 39 |         inputs=inputs,
 40 |         filters=filters,
 41 |         kernel_size=kernel_size,
 42 |         strides=strides,
 43 |         padding="same",
 44 |         kernel_initializer=initializer)
 45 | 
 46 |     if bnorm:
 47 |         res = tf.layers.batch_normalization(inputs=res, name='bn_' + name, training=True)
 48 | 
 49 |     # activation after batch-norm
 50 |     if activation is not None:
 51 |         res = activation(res)
 52 | 
 53 |     return res
 54 | 
 55 | 
 56 | def pixelwise_accuracy(img_real, img_fake, colorspace, thresh):
 57 |     """
 58 |     Measures the accuracy of the colorization process by comparing pixels
 59 |     """
 60 |     img_real = postprocess(img_real, colorspace, COLORSPACE_LAB)
 61 |     img_fake = postprocess(img_fake, colorspace, COLORSPACE_LAB)
 62 | 
 63 |     diffL = tf.abs(tf.round(img_real[..., 0]) - tf.round(img_fake[..., 0]))
 64 |     diffA = tf.abs(tf.round(img_real[..., 1]) - tf.round(img_fake[..., 1]))
 65 |     diffB = tf.abs(tf.round(img_real[..., 2]) - tf.round(img_fake[..., 2]))
 66 | 
 67 |     # within %thresh of the original
 68 |     predL = tf.cast(tf.less_equal(diffL, 1 * thresh), tf.float64)        # L: [0, 100]
 69 |     predA = tf.cast(tf.less_equal(diffA, 2.2 * thresh), tf.float64)      # A: [-110, 110]
 70 |     predB = tf.cast(tf.less_equal(diffB, 2.2 * thresh), tf.float64)      # B: [-110, 110]
 71 | 
 72 |     # all three channels are within the threshold
 73 |     pred = predL * predA * predB
 74 | 
 75 |     return tf.reduce_mean(pred)
 76 | 
 77 | 
 78 | def preprocess(img, colorspace_in, colorspace_out):
 79 |     if colorspace_out.upper() == COLORSPACE_RGB:
 80 |         if colorspace_in == COLORSPACE_LAB:
 81 |             img = lab_to_rgb(img)
 82 | 
 83 |         # [0, 1] => [-1, 1]
 84 |         img = (img / 255.0) * 2 - 1
 85 | 
 86 |     elif colorspace_out.upper() == COLORSPACE_LAB:
 87 |         if colorspace_in == COLORSPACE_RGB:
 88 |             img = rgb_to_lab(img / 255.0)
 89 | 
 90 |         L_chan, a_chan, b_chan = tf.unstack(img, axis=3)
 91 | 
 92 |         # L: [0, 100] => [-1, 1]
 93 |         # A, B: [-110, 110] => [-1, 1]
 94 |         img = tf.stack([L_chan / 50 - 1, a_chan / 110, b_chan / 110], axis=3)
 95 | 
 96 |     return img
 97 | 
 98 | 
 99 | def postprocess(img, colorspace_in, colorspace_out):
100 |     if colorspace_in.upper() == COLORSPACE_RGB:
101 |         # [-1, 1] => [0, 1]
102 |         img = (img + 1) / 2
103 | 
104 |         if colorspace_out == COLORSPACE_LAB:
105 |             img = rgb_to_lab(img)
106 | 
107 |     elif colorspace_in.upper() == COLORSPACE_LAB:
108 |         L_chan, a_chan, b_chan = tf.unstack(img, axis=3)
109 | 
110 |         # L: [-1, 1] => [0, 100]
111 |         # A, B: [-1, 1] => [-110, 110]
112 |         img = tf.stack([(L_chan + 1) / 2 * 100, a_chan * 110, b_chan * 110], axis=3)
113 | 
114 |         if colorspace_out == COLORSPACE_RGB:
115 |             img = lab_to_rgb(img)
116 | 
117 |     return img
118 | 
119 | 
120 | def rgb_to_lab(srgb):
121 |     # based on https://github.com/torch/image/blob/9f65c30167b2048ecbe8b7befdc6b2d6d12baee9/generic/image.c
122 |     with tf.name_scope("rgb_to_lab"):
123 |         srgb_pixels = tf.reshape(srgb, [-1, 3])
124 | 
125 |         with tf.name_scope("srgb_to_xyz"):
126 |             linear_mask = tf.cast(srgb_pixels <= 0.04045, dtype=tf.float32)
127 |             exponential_mask = tf.cast(srgb_pixels > 0.04045, dtype=tf.float32)
128 |             rgb_pixels = (srgb_pixels / 12.92 * linear_mask) + (((srgb_pixels + 0.055) / 1.055) ** 2.4) * exponential_mask
129 |             rgb_to_xyz = tf.constant([
130 |                 #    X        Y          Z
131 |                 [0.412453, 0.212671, 0.019334],  # R
132 |                 [0.357580, 0.715160, 0.119193],  # G
133 |                 [0.180423, 0.072169, 0.950227],  # B
134 |             ])
135 |             xyz_pixels = tf.matmul(rgb_pixels, rgb_to_xyz)
136 | 
137 |         # https://en.wikipedia.org/wiki/Lab_color_space#CIELAB-CIEXYZ_conversions
138 |         with tf.name_scope("xyz_to_cielab"):
139 | 
140 |             # normalize for D65 white point
141 |             xyz_normalized_pixels = tf.multiply(xyz_pixels, [1 / 0.950456, 1.0, 1 / 1.088754])
142 | 
143 |             epsilon = 6 / 29
144 |             linear_mask = tf.cast(xyz_normalized_pixels <= (epsilon**3), dtype=tf.float32)
145 |             exponential_mask = tf.cast(xyz_normalized_pixels > (epsilon**3), dtype=tf.float32)
146 |             fxfyfz_pixels = (xyz_normalized_pixels / (3 * epsilon**2) + 4 / 29) * linear_mask + (xyz_normalized_pixels ** (1 / 3)) * exponential_mask
147 | 
148 |             # convert to lab
149 |             fxfyfz_to_lab = tf.constant([
150 |                 #  l       a       b
151 |                 [0.0, 500.0, 0.0],  # fx
152 |                 [116.0, -500.0, 200.0],  # fy
153 |                 [0.0, 0.0, -200.0],  # fz
154 |             ])
155 |             lab_pixels = tf.matmul(fxfyfz_pixels, fxfyfz_to_lab) + tf.constant([-16.0, 0.0, 0.0])
156 | 
157 |         return tf.reshape(lab_pixels, tf.shape(srgb))
158 | 
159 | 
160 | def lab_to_rgb(lab):
161 |     with tf.name_scope("lab_to_rgb"):
162 |         lab_pixels = tf.reshape(lab, [-1, 3])
163 | 
164 |         # https://en.wikipedia.org/wiki/Lab_color_space#CIELAB-CIEXYZ_conversions
165 |         with tf.name_scope("cielab_to_xyz"):
166 |             # convert to fxfyfz
167 |             lab_to_fxfyfz = tf.constant([
168 |                 #   fx      fy        fz
169 |                 [1 / 116.0, 1 / 116.0, 1 / 116.0],  # l
170 |                 [1 / 500.0, 0.0, 0.0],  # a
171 |                 [0.0, 0.0, -1 / 200.0],  # b
172 |             ])
173 |             fxfyfz_pixels = tf.matmul(lab_pixels + tf.constant([16.0, 0.0, 0.0]), lab_to_fxfyfz)
174 | 
175 |             # convert to xyz
176 |             epsilon = 6 / 29
177 |             linear_mask = tf.cast(fxfyfz_pixels <= epsilon, dtype=tf.float32)
178 |             exponential_mask = tf.cast(fxfyfz_pixels > epsilon, dtype=tf.float32)
179 |             xyz_pixels = (3 * epsilon**2 * (fxfyfz_pixels - 4 / 29)) * linear_mask + (fxfyfz_pixels ** 3) * exponential_mask
180 | 
181 |             # denormalize for D65 white point
182 |             xyz_pixels = tf.multiply(xyz_pixels, [0.950456, 1.0, 1.088754])
183 | 
184 |         with tf.name_scope("xyz_to_srgb"):
185 |             xyz_to_rgb = tf.constant([
186 |                 #     r           g          b
187 |                 [3.2404542, -0.9692660, 0.0556434],  # x
188 |                 [-1.5371385, 1.8760108, -0.2040259],  # y
189 |                 [-0.4985314, 0.0415560, 1.0572252],  # z
190 |             ])
191 |             rgb_pixels = tf.matmul(xyz_pixels, xyz_to_rgb)
192 |             # avoid a slightly negative number messing up the conversion
193 |             rgb_pixels = tf.clip_by_value(rgb_pixels, 0.0, 1.0)
194 |             linear_mask = tf.cast(rgb_pixels <= 0.0031308, dtype=tf.float32)
195 |             exponential_mask = tf.cast(rgb_pixels > 0.0031308, dtype=tf.float32)
196 |             srgb_pixels = (rgb_pixels * 12.92 * linear_mask) + ((rgb_pixels ** (1 / 2.4) * 1.055) - 0.055) * exponential_mask
197 | 
198 |         return tf.reshape(srgb_pixels, tf.shape(lab))
199 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/options.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import random
 4 | import argparse
 5 | 
 6 | 
 7 | def str2bool(v):
 8 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
 9 |         return True
10 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
11 |         return False
12 |     else:
13 |         raise argparse.ArgumentTypeError('Boolean value expected.')
14 | 
15 | 
16 | class ModelOptions:
17 |     def __init__(self):
18 |         parser = argparse.ArgumentParser(description='Colorization with GANs')
19 |         parser.add_argument('--seed', type=int, default=0, metavar='S', help='random seed (default: 0)')
20 |         parser.add_argument('--name', type=str, default='CGAN', help='arbitrary model name (default: CGAN)')
21 |         parser.add_argument('--mode', default=0, help='run mode [0: train, 1: evaluate, 2: test] (default: 0)')
22 |         parser.add_argument('--dataset', type=str, default='momentsintime', help='the name of dataset [places365, cifar10] (default: momentsintime)')
23 |         parser.add_argument('--dataset-path', type=str, default='./data', help='dataset path (default: ./data)')
24 |         parser.add_argument('--checkpoints-path', type=str, default='./checkpoints', help='models are saved here (default: ./checkpoints)')
25 |         parser.add_argument('--batch-size', type=int, default=16, metavar='N', help='input batch size for training (default: 16)')
26 |         parser.add_argument('--color-space', type=str, default='lab', help='model color space [lab, rgb] (default: lab)')
27 |         parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 30)')
28 |         parser.add_argument('--lr', type=float, default=5e-3, metavar='LR', help='learning rate (default: 3e-4)')
29 |         parser.add_argument('--lr-decay-rate', type=float, default=0.1, help='learning rate exponentially decay rate (default: 0.1)')
30 |         parser.add_argument('--lr-decay-steps', type=float, default=25e2, help='learning rate exponentially decay steps (default: 5e5)')
31 |         parser.add_argument('--beta1', type=float, default=0, help='momentum term of adam optimizer (default: 0)')
32 |         parser.add_argument("--l1-weight", type=float, default=100.0, help="weight on L1 term for generator gradient (default: 100.0)")
33 |         parser.add_argument('--augment', type=str2bool, default=True, help='True for augmentation (default: True)')
34 |         parser.add_argument('--label-smoothing', type=str2bool, default=False, help='True for one-sided label smoothing (default: False)')
35 |         parser.add_argument('--acc-thresh', type=float, default=2.0, help="accuracy threshold (default: 2.0)")
36 |         parser.add_argument('--kernel-size', type=int, default=4, help="default kernel size (default: 4)")
37 |         parser.add_argument('--save', type=str2bool, default=True, help='True for saving (default: True)')
38 |         parser.add_argument('--save-interval', type=int, default=100, help='how many batches to wait before saving model (default: 1000)')
39 |         parser.add_argument('--sample', type=str2bool, default=True, help='True for sampling (default: True)')
40 |         parser.add_argument('--sample-size', type=int, default=8, help='number of images to sample (default: 8)')
41 |         parser.add_argument('--sample-interval', type=int, default=100, help='how many batches to wait before sampling (default: 1000)')
42 |         parser.add_argument('--validate', type=str2bool, default=True, help='True for validation (default: True)')
43 |         parser.add_argument('--validate-interval', type=int, default=0, help='how many batches to wait before validating (default: 0)')
44 |         parser.add_argument('--log', type=str2bool, default=True, help='True for logging (default: True)')
45 |         parser.add_argument('--log-interval', type=int, default=10, help='how many iterations to wait before logging training status (default: 10)')
46 |         parser.add_argument('--visualize', type=str2bool, default=False, help='True for accuracy visualization (default: False)')
47 |         parser.add_argument('--visualize-window', type=int, default=100, help='the exponentially moving average window width (default: 100)')
48 |         parser.add_argument('--test-size', type=int, default=100, metavar='N', help='number of Turing tests (default: 100)')
49 |         parser.add_argument('--test-delay', type=int, default=0, metavar='N', help='number of seconds to wait when doing Turing test, 0 for unlimited (default: 0)')
50 |         parser.add_argument('--gpu-ids', type=str, default='0', help='gpu ids: e.g. 0  0,1,2, 0,2. use -1 for CPU')
51 |         # to recolorize a video clip
52 |         parser.add_argument('--filename', type=str, default='*', help='Filename of input BW video')
53 |         parser.add_argument('--input_dir', type=str, default='../data/examples/converted', help='Directory of input files')
54 |         parser.add_argument('--output_dir', type=str, default='../data/examples/recolorized', help='Directory of output files')
55 |         
56 |         self._parser = parser
57 | 
58 |     def parse(self):
59 |         opt = self._parser.parse_args()
60 |         os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_ids
61 | 
62 |         opt.color_space = opt.color_space.upper()
63 | 
64 |         if opt.seed == 0:
65 |             opt.seed = random.randint(0, 2**31 - 1)
66 | 
67 |         if (opt.dataset_path == './data') or (opt.dataset_path == './dataset'):
68 |             opt.dataset_path += ('/' + opt.dataset)
69 | 
70 |         if opt.checkpoints_path == './checkpoints':
71 |             opt.checkpoints_path += ('/' + opt.dataset)
72 | 
73 |         return opt
74 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy ~= 1.14.3
2 | scipy ~= 1.0.1
3 | future ~= 0.16.0
4 | matplotlib ~= 2.2.2
5 | pillow ~= 5.0.0
6 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/setup.cfg:
--------------------------------------------------------------------------------
1 | [pycodestyle]
2 | ignore = E303
3 | max-line-length = 200


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/__init__.py:
--------------------------------------------------------------------------------
1 | from .options import *
2 | from .models import *
3 | from .utils import *
4 | from .dataset import *
5 | from .main import *


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/build_dataset.py:
--------------------------------------------------------------------------------
  1 | """Split the SIGNS dataset into train/dev/test and resize images to 64x64.
  2 | 
  3 | The SIGNS dataset comes in the following format:
  4 |     train_signs/
  5 |         0_IMG_5864.jpg
  6 |         ...
  7 |     test_signs/
  8 |         0_IMG_5942.jpg
  9 |         ...
 10 | 
 11 | Original images have size (3024, 3024).
 12 | Resizing to (64, 64) reduces the dataset size from 1.16 GB to 4.7 MB, and loading smaller images
 13 | makes training faster.
 14 | 
 15 | We already have a test set created, so we only need to split "train_signs" into train and dev sets.
 16 | Because we don't have a lot of images and we want that the statistics on the dev set be as
 17 | representative as possible, we'll take 20% of "train_signs" as dev set.
 18 | """
 19 | 
 20 | import argparse
 21 | import random
 22 | import os
 23 | 
 24 | import numpy as np
 25 | 
 26 | from PIL import Image
 27 | from tqdm import tqdm
 28 | import cv2
 29 | 
 30 | # size of the resized frames
 31 | SIZE = 256
 32 | 
 33 | # subfolder of the "Moments_in_Time" dataset to consider
 34 | SUBFOLDER = "/baking"
 35 | 
 36 | parser = argparse.ArgumentParser()
 37 | parser.add_argument('--data_dir', default='../data/Moments_in_Time_Mini', help="Directory with the Moments in Time dataset")
 38 | parser.add_argument('--output_dir', default='../data/momentsintime_ref', help="Where to write the new data")
 39 | parser.add_argument('--dt', type=int, default=1, help="Time between consecutives frames")
 40 | 
 41 | 
 42 | def split_resize_and_save(filename, i, output_dir, dt=1, size=SIZE):
 43 |     """Split the video clip in pair of consecutive frames (t, t+dt), resize the frames, and save the pairs to the `output_dir`"""
 44 |                 
 45 |     vidcap = cv2.VideoCapture(filename)
 46 |     
 47 |     success, frame = vidcap.read()
 48 |     # convert BGR to RGB convention
 49 |     frame = frame[:,:,::-1]
 50 |     # default : use bilinear interpolation
 51 |     frame_prev = cv2.resize(frame, (size, size)) 
 52 |     # save the first frame as the "color palette" reference
 53 |     frame_ref = frame_prev
 54 |     
 55 |     # counter to build pairs of consecutive frames
 56 |     count = 1
 57 |     
 58 |     while success:
 59 |       count += 1
 60 |       
 61 |       success, frame = vidcap.read()
 62 |       
 63 |       if success:
 64 |           # convert BGR to RGB convention
 65 |           frame = frame[:,:,::-1]
 66 |           # default : use bilinear interpolation
 67 |           frame = cv2.resize(frame, (size, size)) 
 68 |       else:
 69 |         break
 70 |       #print('Read a new frame: ', success)
 71 |             
 72 |       if count % (1+dt) == 0:
 73 |           img = np.concatenate((frame, frame_prev, frame_ref), 2)
 74 |           frame_prev = frame     
 75 |           np.save(output_dir + "/video{}_frame{}".format(i, count), img)
 76 |           
 77 | if __name__ == '__main__':
 78 |     args = parser.parse_args()
 79 |     # Define the output directory
 80 |     args.output_dir = args.output_dir + "_dt" + str(args.dt)
 81 |     
 82 |     assert os.path.isdir(args.data_dir), "Couldn't find the dataset at {}".format(args.data_dir)
 83 | 
 84 |     # Define the data directories
 85 |     train_data_dir = os.path.join(args.data_dir, 'training' + SUBFOLDER)
 86 |     test_data_dir = os.path.join(args.data_dir, 'validation' + SUBFOLDER)
 87 | 
 88 |     # Get the filenames in each directory (train and test)
 89 |     filenames = os.listdir(train_data_dir)
 90 |     filenames = [os.path.join(train_data_dir, f) for f in filenames if f.endswith('.mp4')]
 91 | 
 92 |     test_filenames = os.listdir(test_data_dir)
 93 |     test_filenames = [os.path.join(test_data_dir, f) for f in test_filenames if f.endswith('.mp4')]
 94 | 
 95 |     # Split the images in 'train_moments' into 80% train and 20% dev
 96 |     # Make sure to always shuffle with a fixed seed so that the split is reproducible
 97 |     random.seed(230)
 98 |     filenames.sort()
 99 |     random.shuffle(filenames)
100 | 
101 |     split = int(0.9 * len(filenames))
102 |     train_filenames = filenames[:split]
103 |     dev_filenames = filenames[split:]
104 | 
105 |     filenames = {'train': train_filenames,
106 |                  'dev': dev_filenames,
107 |                  'test': test_filenames}
108 | 
109 |     if not os.path.exists(args.output_dir):
110 |         os.mkdir(args.output_dir)
111 |     else:
112 |         print("Warning: output dir {} already exists".format(args.output_dir))
113 | 
114 |     # Preprocess train, dev and test
115 |     for split in ['train', 'dev', 'test']:
116 |         output_dir_split = os.path.join(args.output_dir, '{}_moments'.format(split))
117 |         if not os.path.exists(output_dir_split):
118 |             os.mkdir(output_dir_split)
119 |         else:
120 |             print("Warning: dir {} already exists".format(output_dir_split))
121 | 
122 |         print("Processing {} data, saving preprocessed data to {}".format(split, output_dir_split))
123 |         for i, filename in enumerate(tqdm(filenames[split])):
124 |             split_resize_and_save(filename, i, output_dir_split, dt=args.dt, size=SIZE)
125 | 
126 |     print("Done building dataset")
127 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/dataset.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | from scipy.misc import imread
  5 | from abc import abstractmethod
  6 | from utils import unpickle
  7 | 
  8 | CIFAR10_DATASET = 'cifar10'
  9 | PLACES365_DATASET = 'places365'
 10 | MOMENTSINTIME_DATASET = 'momentsintime'
 11 | 
 12 | class BaseDataset():
 13 |     def __init__(self, name, path, training=True, augment=True):
 14 |         self.name = name
 15 |         self.augment = augment and training
 16 |         self.training = training
 17 |         self.path = path
 18 |         self._data = []
 19 | 
 20 |     def __len__(self):
 21 |         return len(self.data)
 22 | 
 23 |     def __iter__(self):
 24 |         total = len(self)
 25 |         start = 0
 26 | 
 27 |         while start < total:
 28 |             item = self[start]
 29 |             start += 1
 30 |             yield item
 31 | 
 32 |         raise StopIteration
 33 | 
 34 |     def __getitem__(self, index):
 35 |         val = self.data[index]
 36 |         try:
 37 |             # OLD : img = imread(val) if isinstance(val, str) else val
 38 |             img = np.load(val) if isinstance(val, str) else val
 39 | 
 40 |             if self.augment and np.random.binomial(1, 0.5) == 1:
 41 |                 img = img[:, ::-1, :]
 42 | 
 43 |         except:
 44 |             img = None
 45 | 
 46 |         return img
 47 | 
 48 |     def generator(self, batch_size, recursive=False):
 49 |         start = 0
 50 |         total = len(self)
 51 | 
 52 |         while True:
 53 |             while start < total:
 54 |                 end = np.min([start + batch_size, total])
 55 |                 items = []
 56 | 
 57 |                 for ix in range(start, end):
 58 |                     item = self[ix]
 59 |                     if item is not None:
 60 |                         items.append(item)
 61 | 
 62 |                 start = end
 63 |                 yield np.array(items)
 64 | 
 65 |             if recursive:
 66 |                 start = 0
 67 | 
 68 |             else:
 69 |                 raise StopIteration
 70 | 
 71 | 
 72 |     @property
 73 |     def data(self):
 74 |         if len(self._data) == 0:
 75 |             self._data = self.load()
 76 |             np.random.shuffle(self._data)
 77 | 
 78 |         return self._data
 79 | 
 80 |     @abstractmethod
 81 |     def load(self):
 82 |         return []
 83 | 
 84 | 
 85 | class Cifar10Dataset(BaseDataset):
 86 |     def __init__(self, path, training=True, augment=True):
 87 |         super(Cifar10Dataset, self).__init__(CIFAR10_DATASET, path, training, augment)
 88 | 
 89 |     def load(self):
 90 |         data = []
 91 |         if self.training:
 92 |             for i in range(1, 6):
 93 |                 filename = '{}/data_batch_{}'.format(self.path, i)
 94 |                 batch_data = unpickle(filename)
 95 |                 if len(data) > 0:
 96 |                     data = np.vstack((data, batch_data[b'data']))
 97 |                 else:
 98 |                     data = batch_data[b'data']
 99 | 
100 |         else:
101 |             filename = '{}/test_batch'.format(self.path)
102 |             batch_data = unpickle(filename)
103 |             data = batch_data[b'data']
104 | 
105 |         w = 32
106 |         h = 32
107 |         s = w * h
108 |         data = np.array(data)
109 |         data = np.dstack((data[:, :s], data[:, s:2 * s], data[:, 2 * s:]))
110 |         data = data.reshape((-1, w, h, 3))
111 |         return data
112 | 
113 | 
114 | class Places365Dataset(BaseDataset):
115 |     def __init__(self, path, training=True, augment=True):
116 |         super(Places365Dataset, self).__init__(PLACES365_DATASET, path, training, augment)
117 | 
118 |     def load(self):
119 |         if self.training:
120 |             data = np.array(
121 |                 glob.glob(self.path + '/data_256/**/*.jpg', recursive=True))
122 |                 
123 |         else:
124 |             data = np.array(glob.glob(self.path + '/val_256/*.jpg'))
125 | 
126 |         return data
127 | 
128 |         
129 | class MomentsInTimeDataset(BaseDataset):
130 |     def __init__(self, path, training=True, augment=True):
131 |         super(MomentsInTimeDataset, self).__init__(MOMENTSINTIME_DATASET, path, training, augment)
132 | 
133 |     def load(self):
134 |         if self.training:
135 |             #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/train_moments/*"))
136 |             data = np.array(glob.glob("." + self.path + "/train_moments/*"))
137 |         else:
138 |             #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/dev_moments/*"))
139 |             data = np.array(glob.glob("." + self.path + "/dev_moments/*"))
140 | 
141 |         return data
142 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | from options import ModelOptions
 6 | from models import Cifar10Model, Places365Model, MomentsInTimeModel
 7 | from dataset import CIFAR10_DATASET, PLACES365_DATASET, MOMENTSINTIME_DATASET
 8 | 
 9 | 
10 | def main(options):
11 | 
12 |     # reset tensorflow graph
13 |     tf.reset_default_graph()
14 | 
15 |     # initialize random seed
16 |     tf.set_random_seed(options.seed)
17 |     np.random.seed(options.seed)
18 |     random.seed(options.seed)
19 | 
20 |     # create a session environment
21 |     with tf.Session() as sess:
22 |         
23 |         if options.dataset == CIFAR10_DATASET:
24 |             model = Cifar10Model(sess, options)
25 | 
26 |         elif options.dataset == PLACES365_DATASET:
27 |             model = Places365Model(sess, options)
28 |         
29 |         elif options.dataset == MOMENTSINTIME_DATASET:
30 |             model = MomentsInTimeModel(sess, options)
31 |         
32 |         else:
33 |             model = MomentsInTimeModel(sess, options)
34 | 
35 |         if not os.path.exists(options.checkpoints_path):
36 |             os.makedirs(options.checkpoints_path)
37 | 
38 |         if options.log:
39 |             open(model.train_log_file, 'w').close()
40 |             open(model.test_log_file, 'w').close()
41 | 
42 |         # build the model and initialize
43 |         model.build()
44 |         sess.run(tf.global_variables_initializer())
45 | 
46 | 
47 |         # load model only after global variables initialization
48 |         model.load()
49 | 
50 | 
51 |         if options.mode == 0:
52 |             args = vars(options)
53 |             print('\n------------ Options -------------')
54 |             with open(os.path.join(options.checkpoints_path, 'options.dat'), 'w') as f:
55 |                 for k, v in sorted(args.items()):
56 |                     print('%s: %s' % (str(k), str(v)))
57 |                     f.write('%s: %s\n' % (str(k), str(v)))
58 |             print('-------------- End ----------------\n')
59 |             
60 |             model.train()
61 | 
62 |         elif options.mode == 1:
63 |             model.evaluate()
64 |             while True:
65 |                 model.sample()
66 |         
67 |         else:
68 |             model.turing_test()
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     main(ModelOptions().parse())
73 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/networks.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from ops import conv2d, conv2d_transpose, pixelwise_accuracy
  4 | 
  5 | 
  6 | class Discriminator(object):
  7 |     def __init__(self, name, kernels):
  8 |         self.name = name
  9 |         self.kernels = kernels
 10 |         self.var_list = []
 11 | 
 12 |     def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None):
 13 |         output = inputs
 14 |         with tf.variable_scope(self.name, reuse=reuse_variables):
 15 |             for index, kernel in enumerate(self.kernels):
 16 | 
 17 |                 # not use batch-norm in the first layer
 18 |                 bnorm = False if index == 0 else True
 19 |                 name = 'conv' + str(index)
 20 |                 output = conv2d(
 21 |                     inputs=output,
 22 |                     name=name,
 23 |                     kernel_size=kernel_size,
 24 |                     filters=kernel[0],
 25 |                     strides=kernel[1],
 26 |                     bnorm=bnorm,
 27 |                     activation=tf.nn.leaky_relu,
 28 |                     seed=seed
 29 |                 )
 30 | 
 31 |                 if kernel[2] > 0:
 32 |                     output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
 33 | 
 34 |             output = conv2d(
 35 |                 inputs=output,
 36 |                 name='conv_last',
 37 |                 filters=1,
 38 |                 kernel_size=4,                  # last layer kernel size = 4
 39 |                 strides=1,                      # last layer stride = 1
 40 |                 bnorm=False,                    # do not use batch-norm for the last layer
 41 |                 seed=seed
 42 |             )
 43 | 
 44 |             self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name)
 45 | 
 46 |             return output
 47 | 
 48 | 
 49 | class Generator(object):
 50 |     def __init__(self, name, encoder_kernels, decoder_kernels, output_channels=3):
 51 |         self.name = name
 52 |         self.encoder_kernels = encoder_kernels
 53 |         self.decoder_kernels = decoder_kernels
 54 |         self.output_channels = output_channels
 55 |         self.var_list = []
 56 | 
 57 |     def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None):
 58 |         output = inputs
 59 | 
 60 |         with tf.variable_scope(self.name, reuse=reuse_variables):
 61 | 
 62 |             layers = []
 63 | 
 64 |             # encoder branch
 65 |             for index, kernel in enumerate(self.encoder_kernels):
 66 | 
 67 |                 name = 'conv' + str(index)
 68 |                 output = conv2d(
 69 |                     inputs=output,
 70 |                     name=name,
 71 |                     kernel_size=kernel_size,
 72 |                     filters=kernel[0],
 73 |                     strides=kernel[1],
 74 |                     activation=tf.nn.leaky_relu,
 75 |                     seed=seed
 76 |                 )
 77 | 
 78 |                 # save contracting path layers to be used for skip connections
 79 |                 layers.append(output)
 80 | 
 81 |                 if kernel[2] > 0:
 82 |                     output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
 83 | 
 84 |             # decoder branch
 85 |             for index, kernel in enumerate(self.decoder_kernels):
 86 | 
 87 |                 name = 'deconv' + str(index)
 88 |                 output = conv2d_transpose(
 89 |                     inputs=output,
 90 |                     name=name,
 91 |                     kernel_size=kernel_size,
 92 |                     filters=kernel[0],
 93 |                     strides=kernel[1],
 94 |                     activation=tf.nn.relu,
 95 |                     seed=seed
 96 |                 )
 97 | 
 98 |                 if kernel[2] > 0:
 99 |                     output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
100 | 
101 |                 # concat the layer from the contracting path with the output of the current layer
102 |                 # concat only the channels (axis=3)
103 |                 output = tf.concat([layers[len(layers) - index - 2], output], axis=3)
104 | 
105 |             output = conv2d(
106 |                 inputs=output,
107 |                 name='conv_last',
108 |                 filters=self.output_channels,   # number of output chanels
109 |                 kernel_size=1,                  # last layer kernel size = 1
110 |                 strides=1,                      # last layer stride = 1
111 |                 bnorm=False,                    # do not use batch-norm for the last layer
112 |                 activation=tf.nn.tanh,          # tanh activation function for the output
113 |                 seed=seed
114 |             )
115 | 
116 |             self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name)
117 | 
118 |             return output
119 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/options.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import random
 4 | import argparse
 5 | 
 6 | 
 7 | def str2bool(v):
 8 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
 9 |         return True
10 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
11 |         return False
12 |     else:
13 |         raise argparse.ArgumentTypeError('Boolean value expected.')
14 | 
15 | 
16 | class ModelOptions:
17 |     def __init__(self):
18 |         parser = argparse.ArgumentParser(description='Colorization with GANs')
19 |         parser.add_argument('--seed', type=int, default=0, metavar='S', help='random seed (default: 0)')
20 |         parser.add_argument('--name', type=str, default='CGAN', help='arbitrary model name (default: CGAN)')
21 |         parser.add_argument('--mode', default=0, help='run mode [0: train, 1: evaluate, 2: test] (default: 0)')
22 |         parser.add_argument('--dataset', type=str, default='momentsintime', help='the name of dataset [places365, cifar10] (default: momentsintime)')
23 |         parser.add_argument('--dataset-path', type=str, default='./data', help='dataset path (default: ./data)')
24 |         parser.add_argument('--checkpoints-path', type=str, default='./checkpoints', help='models are saved here (default: ./checkpoints)')
25 |         parser.add_argument('--batch-size', type=int, default=16, metavar='N', help='input batch size for training (default: 16)')
26 |         parser.add_argument('--color-space', type=str, default='lab', help='model color space [lab, rgb] (default: lab)')
27 |         parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 30)')
28 |         parser.add_argument('--lr', type=float, default=5e-3, metavar='LR', help='learning rate (default: 3e-4)')
29 |         parser.add_argument('--lr-decay-rate', type=float, default=0.1, help='learning rate exponentially decay rate (default: 0.1)')
30 |         parser.add_argument('--lr-decay-steps', type=float, default=25e2, help='learning rate exponentially decay steps (default: 5e5)')
31 |         parser.add_argument('--beta1', type=float, default=0, help='momentum term of adam optimizer (default: 0)')
32 |         parser.add_argument("--l1-weight", type=float, default=100.0, help="weight on L1 term for generator gradient (default: 100.0)")
33 |         parser.add_argument('--augment', type=str2bool, default=True, help='True for augmentation (default: True)')
34 |         parser.add_argument('--label-smoothing', type=str2bool, default=False, help='True for one-sided label smoothing (default: False)')
35 |         parser.add_argument('--acc-thresh', type=float, default=2.0, help="accuracy threshold (default: 2.0)")
36 |         parser.add_argument('--kernel-size', type=int, default=4, help="default kernel size (default: 4)")
37 |         parser.add_argument('--save', type=str2bool, default=True, help='True for saving (default: True)')
38 |         parser.add_argument('--save-interval', type=int, default=100, help='how many batches to wait before saving model (default: 1000)')
39 |         parser.add_argument('--sample', type=str2bool, default=True, help='True for sampling (default: True)')
40 |         parser.add_argument('--sample-size', type=int, default=8, help='number of images to sample (default: 8)')
41 |         parser.add_argument('--sample-interval', type=int, default=100, help='how many batches to wait before sampling (default: 1000)')
42 |         parser.add_argument('--validate', type=str2bool, default=True, help='True for validation (default: True)')
43 |         parser.add_argument('--validate-interval', type=int, default=0, help='how many batches to wait before validating (default: 0)')
44 |         parser.add_argument('--log', type=str2bool, default=True, help='True for logging (default: True)')
45 |         parser.add_argument('--log-interval', type=int, default=10, help='how many iterations to wait before logging training status (default: 10)')
46 |         parser.add_argument('--visualize', type=str2bool, default=False, help='True for accuracy visualization (default: False)')
47 |         parser.add_argument('--visualize-window', type=int, default=100, help='the exponentially moving average window width (default: 100)')
48 |         parser.add_argument('--test-size', type=int, default=100, metavar='N', help='number of Turing tests (default: 100)')
49 |         parser.add_argument('--test-delay', type=int, default=0, metavar='N', help='number of seconds to wait when doing Turing test, 0 for unlimited (default: 0)')
50 |         parser.add_argument('--gpu-ids', type=str, default='0', help='gpu ids: e.g. 0  0,1,2, 0,2. use -1 for CPU')
51 |         # to recolorize a video clip
52 |         parser.add_argument('--filename', type=str, default='*', help='Filename of input BW video')
53 |         parser.add_argument('--input_dir', type=str, default='../data/examples/converted', help='Directory of input files')
54 |         parser.add_argument('--output_dir', type=str, default='../data/examples/recolorized', help='Directory of output files')
55 |         
56 |         self._parser = parser
57 | 
58 |     def parse(self):
59 |         opt = self._parser.parse_args()
60 |         os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_ids
61 | 
62 |         opt.color_space = opt.color_space.upper()
63 | 
64 |         if opt.seed == 0:
65 |             opt.seed = random.randint(0, 2**31 - 1)
66 | 
67 |         if (opt.dataset_path == './data') or (opt.dataset_path == './dataset'):
68 |             opt.dataset_path += ('/' + opt.dataset)
69 | 
70 |         if opt.checkpoints_path == './checkpoints':
71 |             opt.checkpoints_path += ('/' + opt.dataset)
72 | 
73 |         return opt
74 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/test-eval.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 | 
3 | options = ModelOptions().parse()
4 | options.mode = 1
5 | main(options)
6 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/test-turing.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 | 
3 | options = ModelOptions().parse()
4 | options.mode = 2
5 | main(options)
6 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/train.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 | 
3 | options = ModelOptions().parse()
4 | options.mode = 0
5 | main(options)
6 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/utils.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import numpy as np
  3 | from PIL import Image
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | 
  7 | def stitch_images(grayscale, original, pred):
  8 |     gap = 5
  9 |     width, height = original[0][:, :, 0].shape
 10 |     img_per_row = 2 if width > 200 else 4
 11 |     img = Image.new('RGB', (width * img_per_row * 3 + gap * (img_per_row - 1), height * int(len(original) / img_per_row)))
 12 | 
 13 |     grayscale = np.array(grayscale).squeeze()
 14 |     original = np.array(original)
 15 |     pred = np.array(pred)
 16 | 
 17 |     for ix in range(len(original)):
 18 |         xoffset = int(ix % img_per_row) * width * 3 + int(ix % img_per_row) * gap
 19 |         yoffset = int(ix / img_per_row) * height
 20 |         im1 = Image.fromarray(grayscale[ix])
 21 |         im2 = Image.fromarray(original[ix])
 22 |         im3 = Image.fromarray((pred[ix] * 255).astype(np.uint8))
 23 |         img.paste(im1, (xoffset, yoffset))
 24 |         img.paste(im2, (xoffset + width, yoffset))
 25 |         img.paste(im3, (xoffset + width + width, yoffset))
 26 | 
 27 |     return img
 28 | 
 29 | 
 30 | def unpickle(file):
 31 |     with open(file, 'rb') as fo:
 32 |         dict = pickle.load(fo, encoding='bytes')
 33 |     return dict
 34 | 
 35 | 
 36 | def moving_average(data, window_width):
 37 |     cumsum_vec = np.cumsum(np.insert(data, 0, 0))
 38 |     ma_vec = (cumsum_vec[window_width:] - cumsum_vec[:-window_width]) / window_width
 39 |     return ma_vec
 40 | 
 41 | 
 42 | def imshow(img, title=''):
 43 |     fig = plt.gcf()
 44 |     fig.canvas.set_window_title(title)
 45 |     plt.axis('off')
 46 |     plt.imshow(img, interpolation='none')
 47 |     plt.show()
 48 | 
 49 | 
 50 | def turing_test(real_img, fake_img, delay=0):
 51 |     height, width, _ = real_img.shape
 52 |     imgs = np.array([real_img, (fake_img * 255).astype(np.uint8)])
 53 |     real_index = np.random.binomial(1, 0.5)
 54 |     fake_index = (real_index + 1) % 2
 55 | 
 56 |     img = Image.new('RGB', (2 + width * 2, height))
 57 |     img.paste(Image.fromarray(imgs[real_index]), (0, 0))
 58 |     img.paste(Image.fromarray(imgs[fake_index]), (2 + width, 0))
 59 | 
 60 |     img.success = 0
 61 | 
 62 |     def onclick(event):
 63 |         if event.xdata is not None:
 64 |             if event.x < width and real_index == 0:
 65 |                 img.success = 1
 66 | 
 67 |             elif event.x > width and real_index == 1:
 68 |                 img.success = 1
 69 | 
 70 |         plt.gcf().canvas.stop_event_loop()
 71 | 
 72 |     plt.ion()
 73 |     plt.gcf().canvas.mpl_connect('button_press_event', onclick)
 74 |     plt.title('click on the real image')
 75 |     plt.axis('off')
 76 |     plt.imshow(img, interpolation='none')
 77 |     plt.show()
 78 |     plt.draw()
 79 |     plt.gcf().canvas.start_event_loop(delay)
 80 | 
 81 |     return img.success
 82 | 
 83 | 
 84 | def visualize(train_log_file, test_log_file, window_width, title=''):
 85 |     train_data = np.loadtxt(train_log_file)
 86 |     test_data = np.loadtxt(test_log_file)
 87 | 
 88 |     if len(train_data.shape) < 2:
 89 |         return
 90 | 
 91 |     if len(train_data) < window_width:
 92 |         window_width = len(train_data) - 1
 93 | 
 94 |     fig = plt.gcf()
 95 |     fig.canvas.set_window_title(title)
 96 | 
 97 |     plt.ion()
 98 |     plt.subplot('121')
 99 |     plt.cla()
100 |     if len(train_data) > 1:
101 |         plt.plot(moving_average(train_data[:, 8], window_width))
102 |     plt.title('train')
103 | 
104 |     plt.subplot('122')
105 |     plt.cla()
106 |     if len(test_data) > 1:
107 |         plt.plot(test_data[:, 8])
108 |     plt.title('test')
109 | 
110 |     plt.show()
111 |     plt.draw()
112 |     plt.pause(.01)
113 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/video_colorize_GAN.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import argparse
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from PIL import Image
  8 | from skimage import img_as_ubyte, img_as_float
  9 | import skimage.color as color
 10 | import scipy.ndimage.interpolation as sni
 11 | from ops import postprocess
 12 | from ops import COLORSPACE_RGB, COLORSPACE_LAB
 13 | 
 14 | import tensorflow as tf
 15 | from options import ModelOptions
 16 | from models import MomentsInTimeModel
 17 | 
 18 |     
 19 | def image_colorization_propagation(model, img_bw_in, img_rgb_prev, img_rgb_first, options):
 20 | 
 21 |     # colorize the image based on the previous one
 22 |     feed_dic = {model.input_rgb: np.expand_dims(img_bw_in, axis=0), model.input_rgb_prev: np.expand_dims(img_rgb_prev, axis=0), model.input_rgb_first: np.expand_dims(img_rgb_first, axis=0)}
 23 |     fake_image, _ = model.sess.run([model.sampler, model.input_gray], feed_dict=feed_dic)
 24 |     fake_image = postprocess(tf.convert_to_tensor(fake_image), colorspace_in=options.color_space, colorspace_out=COLORSPACE_RGB)
 25 |     
 26 |     # evalute the tensor
 27 |     img_rgb_out = fake_image.eval()
 28 |     img_rgb_out = (img_rgb_out.squeeze(0) * 255).astype(np.uint8)
 29 | 
 30 |     return img_rgb_out
 31 | 
 32 | def bw2color(options, inputname, inputpath, outputpath):
 33 |     if inputname.endswith(".mp4"):
 34 |         # size of the input frames
 35 |         size = 256
 36 | 
 37 |         # check that the video exists
 38 |         path_to_video = os.path.join(inputpath, inputname)
 39 |         if not os.path.exists(path_to_video):
 40 |             print("The file :", path_to_video, "does not exist !")
 41 |         
 42 |         # store informations about the original video
 43 |         cap = cv2.VideoCapture(os.path.join(path_to_video))
 44 |         # original dimensions
 45 |         width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 46 |         totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 47 |         fourcc = cv2.VideoWriter_fourcc(*'mp4v');
 48 |         # parameters of output file
 49 |             # dimensions of the output image
 50 |         new_width, new_height = size, size
 51 |             # number of frames
 52 |         fps = 30.0
 53 |     
 54 |         # recolorized output video
 55 |         color_out = cv2.VideoWriter(
 56 |             os.path.join(outputpath, 'color_' + inputname),
 57 |             fourcc,
 58 |             fps,
 59 |             (new_width, new_height),
 60 |             isColor=True
 61 |         )
 62 |         
 63 |         # TO CHANGE to DL colorization of 1st frame
 64 |         # pick the first frame from the original video clip as the first reference
 65 |         cap_temp = cv2.VideoCapture(os.path.join(inputpath, "color" + inputname[2:]))
 66 |         
 67 |         ret_temp, frame_prev = cap_temp.read()
 68 |         # convert BGR to RGB convention
 69 |         frame_prev = frame_prev[:,:,::-1]
 70 |         frame_prev = cv2.resize(frame_prev, (size, size)) 
 71 |         # save the first frame as the reference
 72 |         frame_ref = frame_prev
 73 |         
 74 |         # count the number of recolorized frames
 75 |         frames_processed = 0
 76 | 
 77 |         with tf.Session() as sess:
 78 | 
 79 |             model = MomentsInTimeModel(sess, options)
 80 | 
 81 |             # build the model and initialize
 82 |             model.build()
 83 |             sess.run(tf.global_variables_initializer())
 84 | 
 85 |             # load model only after global variables initialization
 86 |             model.load()
 87 | 
 88 |             while(cap.isOpened()):
 89 |                 ret, frame_in = cap.read()
 90 |                                 
 91 |                 # check if we are not at the end of the video
 92 |                 if ret==True:                          
 93 |                     # convert BGR to RGB convention
 94 |                     frame_in = frame_in[:,:,::-1]
 95 |                     # resize the frame to match the input size of the GAN
 96 |                     frame_in = cv2.resize(frame_in, (size, size))
 97 | 
 98 |                     # colorize the BW frame
 99 |                     frame_out = image_colorization_propagation(model, frame_in, frame_prev, frame_ref, options)
100 |                     
101 |                     #generate sample
102 |                     get_image = False
103 |                     if get_image:                    
104 |                         img = Image.fromarray(frame_out)
105 | 
106 |                         if not os.path.exists(model.samples_dir):
107 |                             os.makedirs(model.samples_dir)
108 | 
109 |                         sample = model.options.dataset + "_" + inputname + "_" + str(frames_processed).zfill(5) + ".png"
110 |                         img.save(os.path.join(model.samples_dir, sample))
111 | 
112 |                     # save the recolorized frame
113 |                     frame_prev = frame_out
114 |                     # convert RGB to BGR convention
115 |                     frame_out = frame_out[:,:,::-1]
116 |                     # write the color frame
117 |                     color_out.write(frame_out)
118 |                     
119 |                     # print progress
120 |                     frames_processed += 1
121 |                     print("Processed {}/{} frames ({}%)".format(frames_processed, totalFrames, frames_processed * 100 //totalFrames), end="\r")
122 |                     if cv2.waitKey(1) & 0xFF == ord('q'):
123 |                         break
124 |                 # end of the video
125 |                 else:
126 |                     break
127 | 
128 |         # release everything if job is finished
129 |         cap.release()
130 |         color_out.release()
131 | 
132 | def main():
133 | 
134 |     # reset tensorflow graph
135 |     tf.reset_default_graph()
136 | 
137 |     options = ModelOptions().parse()
138 | 
139 |     if options.filename == '*':
140 |         for filename in os.listdir(options.input_dir):
141 |             bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
142 |     else:
143 |         bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
144 |         
145 |     # cleanup
146 |     cv2.destroyAllWindows()
147 | 
148 |     return 0
149 | 
150 | if __name__ == '__main__':
151 |     main()
152 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/test-eval.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 | 
3 | options = ModelOptions().parse()
4 | options.mode = 1
5 | main(options)
6 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/test-turing.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 | 
3 | options = ModelOptions().parse()
4 | options.mode = 2
5 | main(options)
6 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/train.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 | 
3 | options = ModelOptions().parse()
4 | options.mode = 0
5 | main(options)
6 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/utils.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import numpy as np
  3 | from PIL import Image
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | 
  7 | def stitch_images(grayscale, original, pred):
  8 |     gap = 5
  9 |     width, height = original[0][:, :, 0].shape
 10 |     img_per_row = 2 if width > 200 else 4
 11 |     img = Image.new('RGB', (width * img_per_row * 3 + gap * (img_per_row - 1), height * int(len(original) / img_per_row)))
 12 | 
 13 |     grayscale = np.array(grayscale).squeeze()
 14 |     original = np.array(original)
 15 |     pred = np.array(pred)
 16 | 
 17 |     for ix in range(len(original)):
 18 |         xoffset = int(ix % img_per_row) * width * 3 + int(ix % img_per_row) * gap
 19 |         yoffset = int(ix / img_per_row) * height
 20 |         im1 = Image.fromarray(grayscale[ix])
 21 |         im2 = Image.fromarray(original[ix])
 22 |         im3 = Image.fromarray((pred[ix] * 255).astype(np.uint8))
 23 |         img.paste(im1, (xoffset, yoffset))
 24 |         img.paste(im2, (xoffset + width, yoffset))
 25 |         img.paste(im3, (xoffset + width + width, yoffset))
 26 | 
 27 |     return img
 28 | 
 29 | 
 30 | def unpickle(file):
 31 |     with open(file, 'rb') as fo:
 32 |         dict = pickle.load(fo, encoding='bytes')
 33 |     return dict
 34 | 
 35 | 
 36 | def moving_average(data, window_width):
 37 |     cumsum_vec = np.cumsum(np.insert(data, 0, 0))
 38 |     ma_vec = (cumsum_vec[window_width:] - cumsum_vec[:-window_width]) / window_width
 39 |     return ma_vec
 40 | 
 41 | 
 42 | def imshow(img, title=''):
 43 |     fig = plt.gcf()
 44 |     fig.canvas.set_window_title(title)
 45 |     plt.axis('off')
 46 |     plt.imshow(img, interpolation='none')
 47 |     plt.show()
 48 | 
 49 | 
 50 | def turing_test(real_img, fake_img, delay=0):
 51 |     height, width, _ = real_img.shape
 52 |     imgs = np.array([real_img, (fake_img * 255).astype(np.uint8)])
 53 |     real_index = np.random.binomial(1, 0.5)
 54 |     fake_index = (real_index + 1) % 2
 55 | 
 56 |     img = Image.new('RGB', (2 + width * 2, height))
 57 |     img.paste(Image.fromarray(imgs[real_index]), (0, 0))
 58 |     img.paste(Image.fromarray(imgs[fake_index]), (2 + width, 0))
 59 | 
 60 |     img.success = 0
 61 | 
 62 |     def onclick(event):
 63 |         if event.xdata is not None:
 64 |             if event.x < width and real_index == 0:
 65 |                 img.success = 1
 66 | 
 67 |             elif event.x > width and real_index == 1:
 68 |                 img.success = 1
 69 | 
 70 |         plt.gcf().canvas.stop_event_loop()
 71 | 
 72 |     plt.ion()
 73 |     plt.gcf().canvas.mpl_connect('button_press_event', onclick)
 74 |     plt.title('click on the real image')
 75 |     plt.axis('off')
 76 |     plt.imshow(img, interpolation='none')
 77 |     plt.show()
 78 |     plt.draw()
 79 |     plt.gcf().canvas.start_event_loop(delay)
 80 | 
 81 |     return img.success
 82 | 
 83 | 
 84 | def visualize(train_log_file, test_log_file, window_width, title=''):
 85 |     train_data = np.loadtxt(train_log_file)
 86 |     test_data = np.loadtxt(test_log_file)
 87 | 
 88 |     if len(train_data.shape) < 2:
 89 |         return
 90 | 
 91 |     if len(train_data) < window_width:
 92 |         window_width = len(train_data) - 1
 93 | 
 94 |     fig = plt.gcf()
 95 |     fig.canvas.set_window_title(title)
 96 | 
 97 |     plt.ion()
 98 |     plt.subplot('121')
 99 |     plt.cla()
100 |     if len(train_data) > 1:
101 |         plt.plot(moving_average(train_data[:, 8], window_width))
102 |     plt.title('train')
103 | 
104 |     plt.subplot('122')
105 |     plt.cla()
106 |     if len(test_data) > 1:
107 |         plt.plot(test_data[:, 8])
108 |     plt.title('test')
109 | 
110 |     plt.show()
111 |     plt.draw()
112 |     plt.pause(.01)
113 | 


--------------------------------------------------------------------------------
/Ref-GAN-Colorization/video_colorize_GAN.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import argparse
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from PIL import Image
  8 | from skimage import img_as_ubyte, img_as_float
  9 | import skimage.color as color
 10 | import scipy.ndimage.interpolation as sni
 11 | from ops import postprocess
 12 | from ops import COLORSPACE_RGB, COLORSPACE_LAB
 13 | 
 14 | import tensorflow as tf
 15 | from options import ModelOptions
 16 | from models import MomentsInTimeModel
 17 | 
 18 |     
 19 | def image_colorization_propagation(model, img_bw_in, img_rgb_prev, img_rgb_first, options):
 20 | 
 21 |     # colorize the image based on the previous one
 22 |     feed_dic = {model.input_rgb: np.expand_dims(img_bw_in, axis=0), model.input_rgb_prev: np.expand_dims(img_rgb_prev, axis=0), model.input_rgb_first: np.expand_dims(img_rgb_first, axis=0)}
 23 |     fake_image, _ = model.sess.run([model.sampler, model.input_gray], feed_dict=feed_dic)
 24 |     fake_image = postprocess(tf.convert_to_tensor(fake_image), colorspace_in=options.color_space, colorspace_out=COLORSPACE_RGB)
 25 |     
 26 |     # evalute the tensor
 27 |     img_rgb_out = fake_image.eval()
 28 |     img_rgb_out = (img_rgb_out.squeeze(0) * 255).astype(np.uint8)
 29 | 
 30 |     return img_rgb_out
 31 | 
 32 | def bw2color(options, inputname, inputpath, outputpath):
 33 |     if inputname.endswith(".mp4"):
 34 |         # size of the input frames
 35 |         size = 256
 36 | 
 37 |         # check that the video exists
 38 |         path_to_video = os.path.join(inputpath, inputname)
 39 |         if not os.path.exists(path_to_video):
 40 |             print("The file :", path_to_video, "does not exist !")
 41 |         
 42 |         # store informations about the original video
 43 |         cap = cv2.VideoCapture(os.path.join(path_to_video))
 44 |         # original dimensions
 45 |         width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 46 |         totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 47 |         fourcc = cv2.VideoWriter_fourcc(*'mp4v');
 48 |         # parameters of output file
 49 |             # dimensions of the output image
 50 |         new_width, new_height = size, size
 51 |             # number of frames
 52 |         fps = 30.0
 53 |     
 54 |         # recolorized output video
 55 |         color_out = cv2.VideoWriter(
 56 |             os.path.join(outputpath, 'color_' + inputname),
 57 |             fourcc,
 58 |             fps,
 59 |             (new_width, new_height),
 60 |             isColor=True
 61 |         )
 62 |         
 63 |         # TO CHANGE to DL colorization of 1st frame
 64 |         # pick the first frame from the original video clip as the first reference
 65 |         cap_temp = cv2.VideoCapture(os.path.join(inputpath, "color" + inputname[2:]))
 66 |         
 67 |         ret_temp, frame_prev = cap_temp.read()
 68 |         # convert BGR to RGB convention
 69 |         frame_prev = frame_prev[:,:,::-1]
 70 |         frame_prev = cv2.resize(frame_prev, (size, size)) 
 71 |         # save the first frame as the reference
 72 |         frame_ref = frame_prev
 73 |         
 74 |         # count the number of recolorized frames
 75 |         frames_processed = 0
 76 | 
 77 |         with tf.Session() as sess:
 78 | 
 79 |             model = MomentsInTimeModel(sess, options)
 80 | 
 81 |             # build the model and initialize
 82 |             model.build()
 83 |             sess.run(tf.global_variables_initializer())
 84 | 
 85 |             # load model only after global variables initialization
 86 |             model.load()
 87 | 
 88 |             while(cap.isOpened()):
 89 |                 ret, frame_in = cap.read()
 90 |                                 
 91 |                 # check if we are not at the end of the video
 92 |                 if ret==True:                          
 93 |                     # convert BGR to RGB convention
 94 |                     frame_in = frame_in[:,:,::-1]
 95 |                     # resize the frame to match the input size of the GAN
 96 |                     frame_in = cv2.resize(frame_in, (size, size))
 97 | 
 98 |                     # colorize the BW frame
 99 |                     frame_out = image_colorization_propagation(model, frame_in, frame_prev, frame_ref, options)
100 |                     
101 |                     #generate sample
102 |                     get_image = False
103 |                     if get_image:                    
104 |                         img = Image.fromarray(frame_out)
105 | 
106 |                         if not os.path.exists(model.samples_dir):
107 |                             os.makedirs(model.samples_dir)
108 | 
109 |                         sample = model.options.dataset + "_" + inputname + "_" + str(frames_processed).zfill(5) + ".png"
110 |                         img.save(os.path.join(model.samples_dir, sample))
111 | 
112 |                     # save the recolorized frame
113 |                     frame_prev = frame_out
114 |                     # convert RGB to BGR convention
115 |                     frame_out = frame_out[:,:,::-1]
116 |                     # write the color frame
117 |                     color_out.write(frame_out)
118 |                     
119 |                     # print progress
120 |                     frames_processed += 1
121 |                     print("Processed {}/{} frames ({}%)".format(frames_processed, totalFrames, frames_processed * 100 //totalFrames), end="\r")
122 |                     if cv2.waitKey(1) & 0xFF == ord('q'):
123 |                         break
124 |                 # end of the video
125 |                 else:
126 |                     break
127 | 
128 |         # release everything if job is finished
129 |         cap.release()
130 |         color_out.release()
131 | 
132 | def main():
133 | 
134 |     # reset tensorflow graph
135 |     tf.reset_default_graph()
136 | 
137 |     options = ModelOptions().parse()
138 | 
139 |     if options.filename == '*':
140 |         for filename in os.listdir(options.input_dir):
141 |             bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
142 |     else:
143 |         bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
144 |         
145 |     # cleanup
146 |     cv2.destroyAllWindows()
147 | 
148 |     return 0
149 | 
150 | if __name__ == '__main__':
151 |     main()
152 | 


--------------------------------------------------------------------------------
/automatic-video-colorization.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/automatic-video-colorization.pdf


--------------------------------------------------------------------------------
/configuration.txt:
--------------------------------------------------------------------------------
 1 | # AWS instance : ec2 p2.xlarge
 2 | # ubuntu version : 18.04 (64 bits)
 3 | # python version : 3.6
 4 | 
 5 | # Conda installation
 6 |     # Nvidia drivers
 7 | sudo apt install ubuntu-drivers-common
 8 | # CHECK : ubuntu-drivers devices
 9 | sudo ubuntu-drivers autoinstall
10 | # CHECK : nvidia-smi
11 |     # CUDA
12 | sudo apt install nvidia-cuda-toolkit
13 |     # CuDNN
14 |         # register at nvidia developers https://developer.nvidia.com/cudnn 
15 |         # download the corresponding runtime library (DEB)
16 | sudo apt install \path\<name>.deb
17 |  
18 | # REBOOT instance
19 |  
20 | # Automatic Image Colorization
21 |     # GitHUb repo cloning
22 | git clone -b master --single-branch https://github.com/richzhang/colorization.git
23 |     # download model
24 | ./models/fetch_release_models.sh
25 | 
26 | # Caffe (DL framework used in repo) installation
27 | sudo apt install caffe-cuda
28 | 
29 | # Image visualisation
30 | sudo apt install eog
31 | # EXEMPLE : eog image.jpg


--------------------------------------------------------------------------------
/convert_moment_dataset.sh:
--------------------------------------------------------------------------------
 1 | if [ ! -d data/Moments_in_Time_Mini ]; then
 2 |   echo "Moments_in_Time_Mini dataset not downloaded";
 3 |   exit;
 4 | fi
 5 | 
 6 | mkdir -p data/Moments_processed;
 7 | 
 8 | for directory in $(find data/Moments_in_Time_Mini/training -type d -mindepth 1); 
 9 | do
10 |     echo "Converting videos in directory $directory"; 
11 |     python3 converter.py --input_dir "$directory/" --output_dir data/Moments_processed/;
12 | done
13 | 


--------------------------------------------------------------------------------
/converter.py:
--------------------------------------------------------------------------------
  1 | # convert Color to BW video clips
  2 | 
  3 | import os
  4 | import argparse
  5 | 
  6 | import numpy as np
  7 | import cv2
  8 | 
  9 | def parse_args():
 10 |     parser = argparse.ArgumentParser()
 11 |     parser.add_argument('--filename', type=str, default='*', help='Filename of input video')
 12 |     parser.add_argument('--input_dir', type=str, default='data/raw/', help='Directory of input files')
 13 |     parser.add_argument('--output_dir', type=str, default='data/converted/', help='Directory of output files')
 14 |     parser.add_argument('--out_dim', type=int, nargs=2, default=None, help='Dimensions of output frames (width, height)')
 15 |     parser.add_argument('--fps', type=int, default=None, help='Number of fps of output files')
 16 | 
 17 |     args = parser.parse_args()
 18 |     return args
 19 | 
 20 | def parse_config(args):
 21 |     with open('config.yml', 'r') as f:
 22 |         config = yaml.load(f)
 23 |     if not os.path.exists(args.log_dir):
 24 |         os.makedirs(args.log_dir)
 25 |     with open(os.path.join(args.log_dir, 'config.yml'), 'w') as f:
 26 |         yaml.dump(config, f, default_flow_style=False)
 27 |     return dict2namespace(config)
 28 |     
 29 | def color2bw(inputname, inputpath, outputpath, out_dim, fps):
 30 |     if inputname.endswith(".mp4"):
 31 |         
 32 |         # store informations about the original video
 33 |         cap = cv2.VideoCapture(inputpath + inputname)
 34 |         # original dimensions
 35 |         width, height = int(cap.get(3)), int(cap.get(4))
 36 | 
 37 |         
 38 |         fourcc = cv2.VideoWriter_fourcc(*'mp4v');
 39 |         
 40 |         # parameters of output file
 41 |         if out_dim == None:
 42 |             # dimensions of the output image
 43 |             new_width, new_height = width, height
 44 |         else:
 45 |             new_width, new_height = out_dim
 46 |         if fps == None:
 47 |             # number of frames
 48 |             fps = 30.0
 49 |     
 50 |         # grayscale output video
 51 |         gray_out = cv2.VideoWriter(
 52 |             outputpath + 'bw_' + inputname,
 53 |             fourcc,
 54 |             fps,
 55 |             (new_width, new_height),
 56 |             isColor=False
 57 |         )
 58 |         
 59 |         # color output video
 60 |         color_out = cv2.VideoWriter(
 61 |             outputpath + 'color_' + inputname,
 62 |             fourcc,
 63 |             fps,
 64 |             (new_width, new_height),
 65 |             isColor=True
 66 |         )
 67 | 
 68 | 
 69 |         while(cap.isOpened()):
 70 |             ret, frame = cap.read()
 71 |             # check if we are not at the end of the video
 72 |             if ret==True:
 73 |                 
 74 |                 #resize frame
 75 |                 frame = cv2.resize(frame, (new_width, new_height), interpolation = cv2.INTER_LINEAR)
 76 |                 
 77 |                 # write the color frame
 78 |                 color_out.write(frame)
 79 |                 
 80 |                 # change color to BW
 81 |                 frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
 82 |                 
 83 |                 # write the grayscaled frame
 84 |                 gray_out.write(frame)
 85 |                 
 86 |                 if cv2.waitKey(1) & 0xFF == ord('q'):
 87 |                     break
 88 |             # end of the video
 89 |             else:
 90 |                 break
 91 | 
 92 |         # release everything if job is finished
 93 |         cap.release()
 94 |         gray_out.release()
 95 |         color_out.release()
 96 | 
 97 | def main():
 98 |     args = parse_args()
 99 | 
100 |     if args.filename == '*':
101 |         for filename in os.listdir(args.input_dir):
102 |             color2bw(inputname = filename, inputpath = args.input_dir, outputpath = args.output_dir, out_dim = args.out_dim, fps = args.fps)
103 |     else:
104 |         color2bw(inputname = args.filename, inputpath = args.input_dir, outputpath = args.output_dir, out_dim = args.out_dim, fps = args.fps)
105 |         
106 |     # cleanup
107 |     cv2.destroyAllWindows()
108 | 
109 |     return 0
110 | 
111 | if __name__ == '__main__':
112 |     main()
113 | 


--------------------------------------------------------------------------------
/cs230_poster.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/cs230_poster.pdf


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | scipy
 3 | 
 4 | matplotlib
 5 | pillow
 6 | 
 7 | scikit-image        # to convert color images from RGB to LAB color space
 8 | opencv-python		# to read video clips
 9 | 
10 | tqdm	# to visualize progress bar


--------------------------------------------------------------------------------
/synthesize_results.py:
--------------------------------------------------------------------------------
 1 | """Aggregates results from the metrics_eval_best_weights.json in a parent folder"""
 2 | 
 3 | import argparse
 4 | import json
 5 | import os
 6 | 
 7 | from tabulate import tabulate
 8 | 
 9 | 
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument('--parent_dir', default='experiments',
12 |                     help='Directory containing results of experiments')
13 | 
14 | 
15 | def aggregate_metrics(parent_dir, metrics):
16 |     """Aggregate the metrics of all experiments in folder `parent_dir`.
17 | 
18 |     Assumes that `parent_dir` contains multiple experiments, with their results stored in
19 |     `parent_dir/subdir/metrics_dev.json`
20 | 
21 |     Args:
22 |         parent_dir: (string) path to directory containing experiments results
23 |         metrics: (dict) subdir -> {'accuracy': ..., ...}
24 |     """
25 |     # Get the metrics for the folder if it has results from an experiment
26 |     metrics_file = os.path.join(parent_dir, 'metrics_eval_best_weights.json')
27 |     if os.path.isfile(metrics_file):
28 |         with open(metrics_file, 'r') as f:
29 |             metrics[parent_dir] = json.load(f)
30 | 
31 |     # Check every subdirectory of parent_dir
32 |     for subdir in os.listdir(parent_dir):
33 |         if not os.path.isdir(os.path.join(parent_dir, subdir)):
34 |             continue
35 |         else:
36 |             aggregate_metrics(os.path.join(parent_dir, subdir), metrics)
37 | 
38 | 
39 | def metrics_to_table(metrics):
40 |     # Get the headers from the first subdir. Assumes everything has the same metrics
41 |     headers = metrics[list(metrics.keys())[0]].keys()
42 |     table = [[subdir] + [values[h] for h in headers] for subdir, values in metrics.items()]
43 |     res = tabulate(table, headers, tablefmt='pipe')
44 | 
45 |     return res
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     args = parser.parse_args()
50 | 
51 |     # Aggregate metrics from args.parent_dir directory
52 |     metrics = dict()
53 |     aggregate_metrics(args.parent_dir, metrics)
54 |     table = metrics_to_table(metrics)
55 | 
56 |     # Display the table to terminal
57 |     print(table)
58 | 
59 |     # Save results in parent_dir/results.md
60 |     save_file = os.path.join(args.parent_dir, "results.md")
61 |     with open(save_file, 'w') as f:
62 |         f.write(table)
63 | 


--------------------------------------------------------------------------------