├── .gitignore
├── Colorizing-with-GANs
├── .gitignore
├── LICENSE
├── README.md
├── __init__.py
├── build_dataset.py
├── dataset.py
├── img
│ ├── cgan.png
│ ├── con_gan.png
│ ├── discriminator.png
│ ├── gan.png
│ ├── gan_new.png
│ ├── places365.jpg
│ ├── places365.png
│ └── unet.png
├── main.py
├── models.py
├── networks.py
├── ops.py
├── options.py
├── requirements.txt
├── setup.cfg
├── test-eval.py
├── test-turing.py
├── train.py
├── utils.py
├── video_colorize_GAN.py
├── video_colorize_GAN_1st-truth-ref.py
└── video_colorize_GAN_truth-ref.py
├── Deep-Learning-Colorization
├── models
│ ├── .gitignore
│ ├── alexnet_deploy.prototxt
│ ├── alexnet_deploy_fc.prototxt
│ ├── alexnet_deploy_lab.prototxt
│ ├── alexnet_deploy_lab_fc.prototxt
│ ├── colorization_deploy_v1.prototxt
│ ├── colorization_deploy_v2.prototxt
│ ├── colorization_train_val_v2.prototxt
│ ├── fetch_alexnet_model.sh
│ └── fetch_release_models.sh
├── resources
│ ├── batch_norm_absorb.py
│ ├── caffe_traininglayers.py
│ ├── caffe_traininglayers.pyc
│ ├── conv_into_fc.py
│ ├── magic_init
│ │ ├── LICENSE
│ │ ├── README.md
│ │ ├── load.py
│ │ ├── load.pyc
│ │ ├── magic_init_mod.py
│ │ └── measure_stat.py
│ ├── prior_probs.npy
│ ├── pts_in_hull.npy
│ ├── softmax_cross_entropy_loss_layer.cpp
│ ├── softmax_cross_entropy_loss_layer.cu
│ └── softmax_cross_entropy_loss_layer.hpp
└── video_colorize_parallel.py
├── README.md
├── Ref-GAN-Colorization
├── .gitignore
├── LICENSE
├── README.md
├── __init__.py
├── build_dataset.py
├── dataset.py
├── main.py
├── models.py
├── models_base.py
├── models_no-discriminator.py
├── networks.py
├── ops.py
├── options.py
├── requirements.txt
├── setup.cfg
├── src
│ ├── __init__.py
│ ├── build_dataset.py
│ ├── dataset.py
│ ├── main.py
│ ├── models.py
│ ├── models_base.py
│ ├── models_baseline_img.py
│ ├── networks.py
│ ├── ops.py
│ ├── options.py
│ ├── test-eval.py
│ ├── test-turing.py
│ ├── train.py
│ ├── utils.py
│ └── video_colorize_GAN.py
├── test-eval.py
├── test-turing.py
├── train.py
├── utils.py
└── video_colorize_GAN.py
├── automatic-video-colorization.pdf
├── configuration.txt
├── convert_moment_dataset.sh
├── converter.py
├── cs230_poster.pdf
├── requirements.txt
└── synthesize_results.py
/.gitignore:
--------------------------------------------------------------------------------
1 | data/**
2 | yt8m/**
3 |
--------------------------------------------------------------------------------
/Colorizing-with-GANs/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
103 | # custom
104 | _TODO
105 | checkpoints
106 | plots
107 | vcs.xml
108 | .idea
109 | .vscode
--------------------------------------------------------------------------------
/Colorizing-with-GANs/README.md:
--------------------------------------------------------------------------------
1 | # Image Colorization with Generative Adversarial Networks
2 | In this work, we generalize the colorization procedure using a conditional Deep Convolutional Generative Adversarial Network (DCGAN) as as suggested by [Pix2Pix]. The network is trained on the datasets [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) and [Places365](http://places2.csail.mit.edu). Some of the results from Places365 dataset are [shown here.](#places365-results)
3 |
4 | ## Prerequisites
5 | - Linux
6 | - Tensorflow 1.7
7 | - NVIDIA GPU (12G or 24G memory) + CUDA cuDNN
8 |
9 | ## Getting Started
10 | ### Installation
11 | - Clone this repo:
12 | ```bash
13 | git clone https://github.com/ImagingLab/Colorizing-with-GANs.git
14 | cd Colorizing-with-GANs
15 | ```
16 | - Install Tensorflow and dependencies from https://www.tensorflow.org/install/
17 | - Install python requirements:
18 | ```bash
19 | pip install -r requirements.txt
20 | ```
21 |
22 | ### Dataset
23 | - We use [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) and [Places365](http://places2.csail.mit.edu) datasets. To train a model on the full dataset, download datasets from official websites.
24 | After downloading, put then under the `datasets` folder.
25 |
26 | ### Training
27 | - To train the model, run `main.py` script
28 | ```bash
29 | python main.py
30 | ```
31 | - To train the model on places365 dataset with tuned hyperparameters:
32 | ```
33 | python train.py \
34 | --seed 100 \
35 | --dataset places365 \
36 | --dataset-path ./dataset/places365 \
37 | --checkpoints-path ./checkpoints \
38 | --batch-size 16 \
39 | --epochs 10 \
40 | --lr 3e-4 \
41 | --label-smoothing 1
42 |
43 | ```
44 |
45 | - To train the model of cifar10 dataset with tuned hyperparameters:
46 | ```
47 | python train.py \
48 | --seed 100 \
49 | --dataset cifar10 \
50 | --dataset-path ./dataset/cifar10 \
51 | --checkpoints-path ./checkpoints \
52 | --batch-size 128 \
53 | --epochs 200 \
54 | --lr 3e-4 \
55 | --lr-decay-steps 1e4 \
56 | --augment True
57 |
58 | ```
59 |
60 | ### Evaluate
61 | - Download the pre-trained weights [from here.](https://drive.google.com/open?id=1jTsAUAKrMiHO2gn7s-fFZ_zUSzgKoPyp) and copy them in the `checkpoints` folder.
62 | - To evaluate the model quantitatively on the test-set, run `test-eval.py` script:
63 | ```bash
64 | python test-eval.py
65 | ```
66 |
67 | ### Turing Test
68 | - Download the pre-trained weights [from here.](https://drive.google.com/open?id=1jTsAUAKrMiHO2gn7s-fFZ_zUSzgKoPyp) and copy them in the `checkpoints` folder.
69 | - To evaluate the model qualitatively using visual Turing test, run `test-turing.py`:
70 | ```bash
71 | python test-turing.py
72 | ```
73 |
74 | - To apply time-based visual Turing test run (2 seconds decision time):
75 | ```bash
76 | python test-turing.py --test-delay 2
77 | ```
78 |
79 |
80 | ## Method
81 |
82 | ### Generative Adversarial Network
83 | Both generator and discriminator use CNNs. The generator is trained to minimize the probability that the discriminator makes a correct prediction in generated data, while discriminator is trained to maximize the probability of assigning the correct label. This is presented as a single minimax game problem:
84 |
85 |
86 |
87 | In our model, we have redefined the generator's cost function by maximizing the probability of the discriminator being mistaken, as opposed to minimizing the probability of the discriminator being correct. In addition, the cost function was further modified by adding an L1 based regularizer. This will theoretically preserve the structure of the original images and prevent the generator from assigning arbitrary colors to pixels just to fool the discriminator:
88 |
89 |
90 |
91 |
92 | ### Conditional GAN
93 | In a traditional GAN, the input of the generator is randomly generated noise data z. However, this approach is not applicable to the automatic colorization problem due to the nature of its inputs. The generator must be modified to accept grayscale images as inputs rather than noise. This problem was addressed by using a variant of GAN called [conditional generative adversarial networks](https://arxiv.org/abs/1411.1784). Since no noise is introduced, the input of the generator is treated as zero noise with the grayscale input as a prior:
94 |
95 |
96 |
97 | The discriminator gets colored images from both generator and original data along with the grayscale input as the condition and tries to tell which pair contains the true colored image:
98 |
99 |
100 |
101 |
102 | ### Networks Architecture
103 | The architecture of generator is inspired by [U-Net](https://arxiv.org/abs/1505.04597): The architecture of the model is symmetric, with `n` encoding units and `n` decoding units. The contracting path consists of 4x4 convolution layers with stride 2 for downsampling, each followed by batch normalization and Leaky-ReLU activation function with the slope of 0.2. The number of channels are doubled after each step. Each unit in the expansive path consists of a 4x4 transposed convolutional layer with stride 2 for upsampling, concatenation with the activation map of the mirroring layer in the contracting path, followed by batch normalization and ReLU activation function. The last layer of the network is a 1x1 convolution which is equivalent to cross-channel parametric pooling layer. We use `tanh` function for the last layer.
104 |
105 |
106 |
107 |
108 | For discriminator, we use similar architecture as the baselines contractive path: a series of 4x4 convolutional layers with stride 2 with the number of channels being doubled after each downsampling. All convolution layers are followed by batch normalization, leaky ReLU activation with slope 0.2. After the last layer, a convolution is applied to map to a 1 dimensional output, followed by a sigmoid function to return a probability value of the input being real or fake
109 |
110 |
111 |
112 |
113 | ## Places365 Results
114 | Colorization results with Places365. (a) Grayscale. (b) Original Image. (c) Colorized with GAN.
115 |
116 |
117 |
118 |
119 | ## Citation
120 | If you use this code for your research, please cite our paper Image Colorization with Generative Adversarial Networks:
121 |
122 | ```
123 | @inproceedings{nazeri2018image,
124 | title={Image Colorization Using Generative Adversarial Networks},
125 | author={Nazeri, Kamyar and Ng, Eric and Ebrahimi, Mehran},
126 | booktitle={International Conference on Articulated Motion and Deformable Objects},
127 | pages={85--94},
128 | year={2018},
129 | organization={Springer}
130 | }
131 | ```
132 |
--------------------------------------------------------------------------------
/Colorizing-with-GANs/__init__.py:
--------------------------------------------------------------------------------
1 | from .options import *
2 | from .models import *
3 | from .utils import *
4 | from .dataset import *
5 | from .main import *
--------------------------------------------------------------------------------
/Colorizing-with-GANs/build_dataset.py:
--------------------------------------------------------------------------------
1 | """Split the SIGNS dataset into train/dev/test and resize images to 64x64.
2 |
3 | The SIGNS dataset comes in the following format:
4 | train_signs/
5 | 0_IMG_5864.jpg
6 | ...
7 | test_signs/
8 | 0_IMG_5942.jpg
9 | ...
10 |
11 | Original images have size (3024, 3024).
12 | Resizing to (64, 64) reduces the dataset size from 1.16 GB to 4.7 MB, and loading smaller images
13 | makes training faster.
14 |
15 | We already have a test set created, so we only need to split "train_signs" into train and dev sets.
16 | Because we don't have a lot of images and we want that the statistics on the dev set be as
17 | representative as possible, we'll take 20% of "train_signs" as dev set.
18 | """
19 |
20 | import argparse
21 | import random
22 | import os
23 |
24 | import numpy as np
25 |
26 | from PIL import Image
27 | from tqdm import tqdm
28 | import cv2
29 |
30 | # size of the resized frames
31 | SIZE = 256
32 |
33 | # subfolder of the "Moments_in_Time" dataset to consider
34 | SUBFOLDER = "/baking"
35 |
36 | parser = argparse.ArgumentParser()
37 | parser.add_argument('--data_dir', default='../data/Moments_in_Time_Mini', help="Directory with the Moments in Time dataset")
38 | parser.add_argument('--output_dir', default='../data/momentsintime', help="Where to write the new data")
39 | parser.add_argument('--dt', type=int, default=1, help="Time between consecutives frames")
40 |
41 |
42 | def split_resize_and_save(filename, i, output_dir, dt=1, size=SIZE):
43 | """Split the video clip in pair of consecutive frames (t, t+dt), resize the frames, and save the pairs to the `output_dir`"""
44 |
45 | vidcap = cv2.VideoCapture(filename)
46 |
47 | success, frame = vidcap.read()
48 | # convert BGR to RGB convention
49 | frame = frame[:,:,::-1]
50 | # default : use bilinear interpolation
51 | frame_prev = cv2.resize(frame, (size, size))
52 |
53 | # counter to build pairs of consecutive frames
54 | count = 1
55 |
56 | while success:
57 | count += 1
58 |
59 | success, frame = vidcap.read()
60 |
61 | if success:
62 | # convert BGR to RGB convention
63 | frame = frame[:,:,::-1]
64 | # default : use bilinear interpolation
65 | frame = cv2.resize(frame, (size, size))
66 | else:
67 | break
68 | #print('Read a new frame: ', success)
69 |
70 | if count % (1+dt) == 0:
71 | img = np.concatenate((frame, frame_prev), 2)
72 | frame_prev = frame
73 | np.save(output_dir + "/video{}_frame{}".format(i, count), img)
74 |
75 | if __name__ == '__main__':
76 | args = parser.parse_args()
77 | # Define the output directory
78 | args.output_dir = args.output_dir + "_dt" + str(args.dt)
79 |
80 | assert os.path.isdir(args.data_dir), "Couldn't find the dataset at {}".format(args.data_dir)
81 |
82 | # Define the data directories
83 | train_data_dir = os.path.join(args.data_dir, 'training' + SUBFOLDER)
84 | test_data_dir = os.path.join(args.data_dir, 'validation' + SUBFOLDER)
85 |
86 | # Get the filenames in each directory (train and test)
87 | filenames = os.listdir(train_data_dir)
88 | filenames = [os.path.join(train_data_dir, f) for f in filenames if f.endswith('.mp4')]
89 |
90 | test_filenames = os.listdir(test_data_dir)
91 | test_filenames = [os.path.join(test_data_dir, f) for f in test_filenames if f.endswith('.mp4')]
92 |
93 | # Split the images in 'train_moments' into 80% train and 20% dev
94 | # Make sure to always shuffle with a fixed seed so that the split is reproducible
95 | random.seed(230)
96 | filenames.sort()
97 | random.shuffle(filenames)
98 |
99 | split = int(0.9 * len(filenames))
100 | train_filenames = filenames[:split]
101 | dev_filenames = filenames[split:]
102 |
103 | filenames = {'train': train_filenames,
104 | 'dev': dev_filenames,
105 | 'test': test_filenames}
106 |
107 | if not os.path.exists(args.output_dir):
108 | os.mkdir(args.output_dir)
109 | else:
110 | print("Warning: output dir {} already exists".format(args.output_dir))
111 |
112 | # Preprocess train, dev and test
113 | for split in ['train', 'dev', 'test']:
114 | output_dir_split = os.path.join(args.output_dir, '{}_moments'.format(split))
115 | if not os.path.exists(output_dir_split):
116 | os.mkdir(output_dir_split)
117 | else:
118 | print("Warning: dir {} already exists".format(output_dir_split))
119 |
120 | print("Processing {} data, saving preprocessed data to {}".format(split, output_dir_split))
121 | for i, filename in enumerate(tqdm(filenames[split])):
122 | split_resize_and_save(filename, i, output_dir_split, dt=args.dt, size=SIZE)
123 |
124 | print("Done building dataset")
125 |
--------------------------------------------------------------------------------
/Colorizing-with-GANs/dataset.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import numpy as np
3 | import tensorflow as tf
4 | from scipy.misc import imread
5 | from abc import abstractmethod
6 | from utils import unpickle
7 |
8 | CIFAR10_DATASET = 'cifar10'
9 | PLACES365_DATASET = 'places365'
10 | MOMENTSINTIME_DATASET = 'momentsintime'
11 |
12 | class BaseDataset():
13 | def __init__(self, name, path, training=True, augment=True):
14 | self.name = name
15 | self.augment = augment and training
16 | self.training = training
17 | self.path = path
18 | self._data = []
19 |
20 | def __len__(self):
21 | return len(self.data)
22 |
23 | def __iter__(self):
24 | total = len(self)
25 | start = 0
26 |
27 | while start < total:
28 | item = self[start]
29 | start += 1
30 | yield item
31 |
32 | raise StopIteration
33 |
34 | def __getitem__(self, index):
35 | val = self.data[index]
36 | try:
37 | # OLD : img = imread(val) if isinstance(val, str) else val
38 | img = np.load(val) if isinstance(val, str) else val
39 |
40 | if self.augment and np.random.binomial(1, 0.5) == 1:
41 | img = img[:, ::-1, :]
42 |
43 | except:
44 | img = None
45 |
46 | return img
47 |
48 | def generator(self, batch_size, recursive=False):
49 | start = 0
50 | total = len(self)
51 |
52 | while True:
53 | while start < total:
54 | end = np.min([start + batch_size, total])
55 | items = []
56 |
57 | for ix in range(start, end):
58 | item = self[ix]
59 | if item is not None:
60 | items.append(item)
61 |
62 | start = end
63 | yield np.array(items)
64 |
65 | if recursive:
66 | start = 0
67 |
68 | else:
69 | raise StopIteration
70 |
71 |
72 | @property
73 | def data(self):
74 | if len(self._data) == 0:
75 | self._data = self.load()
76 | np.random.shuffle(self._data)
77 |
78 | return self._data
79 |
80 | @abstractmethod
81 | def load(self):
82 | return []
83 |
84 |
85 | class Cifar10Dataset(BaseDataset):
86 | def __init__(self, path, training=True, augment=True):
87 | super(Cifar10Dataset, self).__init__(CIFAR10_DATASET, path, training, augment)
88 |
89 | def load(self):
90 | data = []
91 | if self.training:
92 | for i in range(1, 6):
93 | filename = '{}/data_batch_{}'.format(self.path, i)
94 | batch_data = unpickle(filename)
95 | if len(data) > 0:
96 | data = np.vstack((data, batch_data[b'data']))
97 | else:
98 | data = batch_data[b'data']
99 |
100 | else:
101 | filename = '{}/test_batch'.format(self.path)
102 | batch_data = unpickle(filename)
103 | data = batch_data[b'data']
104 |
105 | w = 32
106 | h = 32
107 | s = w * h
108 | data = np.array(data)
109 | data = np.dstack((data[:, :s], data[:, s:2 * s], data[:, 2 * s:]))
110 | data = data.reshape((-1, w, h, 3))
111 | return data
112 |
113 |
114 | class Places365Dataset(BaseDataset):
115 | def __init__(self, path, training=True, augment=True):
116 | super(Places365Dataset, self).__init__(PLACES365_DATASET, path, training, augment)
117 |
118 | def load(self):
119 | if self.training:
120 | data = np.array(
121 | glob.glob(self.path + '/data_256/**/*.jpg', recursive=True))
122 |
123 | else:
124 | data = np.array(glob.glob(self.path + '/val_256/*.jpg'))
125 |
126 | return data
127 |
128 |
129 | class MomentsInTimeDataset(BaseDataset):
130 | def __init__(self, path, training=True, augment=True):
131 | super(MomentsInTimeDataset, self).__init__(MOMENTSINTIME_DATASET, path, training, augment)
132 |
133 | def load(self):
134 | if self.training:
135 | #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/train_moments/*"))
136 | data = np.array(glob.glob("." + self.path + "/train_moments/*"))
137 | else:
138 | #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/dev_moments/*"))
139 | data = np.array(glob.glob("." + self.path + "/dev_moments/*"))
140 |
141 | return data
142 |
--------------------------------------------------------------------------------
/Colorizing-with-GANs/img/cgan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/cgan.png
--------------------------------------------------------------------------------
/Colorizing-with-GANs/img/con_gan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/con_gan.png
--------------------------------------------------------------------------------
/Colorizing-with-GANs/img/discriminator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/discriminator.png
--------------------------------------------------------------------------------
/Colorizing-with-GANs/img/gan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/gan.png
--------------------------------------------------------------------------------
/Colorizing-with-GANs/img/gan_new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/gan_new.png
--------------------------------------------------------------------------------
/Colorizing-with-GANs/img/places365.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/places365.jpg
--------------------------------------------------------------------------------
/Colorizing-with-GANs/img/places365.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/places365.png
--------------------------------------------------------------------------------
/Colorizing-with-GANs/img/unet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Colorizing-with-GANs/img/unet.png
--------------------------------------------------------------------------------
/Colorizing-with-GANs/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import numpy as np
4 | import tensorflow as tf
5 | from options import ModelOptions
6 | from models import Cifar10Model, Places365Model, MomentsInTimeModel
7 | from dataset import CIFAR10_DATASET, PLACES365_DATASET, MOMENTSINTIME_DATASET
8 |
9 |
10 | def main(options):
11 |
12 | # reset tensorflow graph
13 | tf.reset_default_graph()
14 |
15 | # initialize random seed
16 | tf.set_random_seed(options.seed)
17 | np.random.seed(options.seed)
18 | random.seed(options.seed)
19 |
20 | # create a session environment
21 | with tf.Session() as sess:
22 |
23 | if options.dataset == CIFAR10_DATASET:
24 | model = Cifar10Model(sess, options)
25 |
26 | elif options.dataset == PLACES365_DATASET:
27 | model = Places365Model(sess, options)
28 |
29 | elif options.dataset == MOMENTSINTIME_DATASET:
30 | model = MomentsInTimeModel(sess, options)
31 |
32 | else:
33 | model = MomentsInTimeModel(sess, options)
34 |
35 | if not os.path.exists(options.checkpoints_path):
36 | os.makedirs(options.checkpoints_path)
37 |
38 | if options.log:
39 | open(model.train_log_file, 'w').close()
40 | open(model.test_log_file, 'w').close()
41 |
42 | # build the model and initialize
43 | model.build()
44 | sess.run(tf.global_variables_initializer())
45 |
46 |
47 | # load model only after global variables initialization
48 | model.load()
49 |
50 |
51 | if options.mode == 0:
52 | args = vars(options)
53 | print('\n------------ Options -------------')
54 | with open(os.path.join(options.checkpoints_path, 'options.dat'), 'w') as f:
55 | for k, v in sorted(args.items()):
56 | print('%s: %s' % (str(k), str(v)))
57 | f.write('%s: %s\n' % (str(k), str(v)))
58 | print('-------------- End ----------------\n')
59 |
60 | model.train()
61 |
62 | elif options.mode == 1:
63 | model.evaluate()
64 | while True:
65 | model.sample()
66 |
67 | else:
68 | model.turing_test()
69 |
70 |
71 | if __name__ == "__main__":
72 | main(ModelOptions().parse())
73 |
--------------------------------------------------------------------------------
/Colorizing-with-GANs/networks.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | from ops import conv2d, conv2d_transpose, pixelwise_accuracy
4 |
5 |
6 | class Discriminator(object):
7 | def __init__(self, name, kernels):
8 | self.name = name
9 | self.kernels = kernels
10 | self.var_list = []
11 |
12 | def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None):
13 | output = inputs
14 | with tf.variable_scope(self.name, reuse=reuse_variables):
15 | for index, kernel in enumerate(self.kernels):
16 |
17 | # not use batch-norm in the first layer
18 | bnorm = False if index == 0 else True
19 | name = 'conv' + str(index)
20 | output = conv2d(
21 | inputs=output,
22 | name=name,
23 | kernel_size=kernel_size,
24 | filters=kernel[0],
25 | strides=kernel[1],
26 | bnorm=bnorm,
27 | activation=tf.nn.leaky_relu,
28 | seed=seed
29 | )
30 |
31 | if kernel[2] > 0:
32 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
33 |
34 | output = conv2d(
35 | inputs=output,
36 | name='conv_last',
37 | filters=1,
38 | kernel_size=4, # last layer kernel size = 4
39 | strides=1, # last layer stride = 1
40 | bnorm=False, # do not use batch-norm for the last layer
41 | seed=seed
42 | )
43 |
44 | self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name)
45 |
46 | return output
47 |
48 |
49 | class Generator(object):
50 | def __init__(self, name, encoder_kernels, decoder_kernels, output_channels=3):
51 | self.name = name
52 | self.encoder_kernels = encoder_kernels
53 | self.decoder_kernels = decoder_kernels
54 | self.output_channels = output_channels
55 | self.var_list = []
56 |
57 | def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None):
58 | output = inputs
59 |
60 | with tf.variable_scope(self.name, reuse=reuse_variables):
61 |
62 | layers = []
63 |
64 | # encoder branch
65 | for index, kernel in enumerate(self.encoder_kernels):
66 |
67 | name = 'conv' + str(index)
68 | output = conv2d(
69 | inputs=output,
70 | name=name,
71 | kernel_size=kernel_size,
72 | filters=kernel[0],
73 | strides=kernel[1],
74 | activation=tf.nn.leaky_relu,
75 | seed=seed
76 | )
77 |
78 | # save contracting path layers to be used for skip connections
79 | layers.append(output)
80 |
81 | if kernel[2] > 0:
82 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
83 |
84 | # decoder branch
85 | for index, kernel in enumerate(self.decoder_kernels):
86 |
87 | name = 'deconv' + str(index)
88 | output = conv2d_transpose(
89 | inputs=output,
90 | name=name,
91 | kernel_size=kernel_size,
92 | filters=kernel[0],
93 | strides=kernel[1],
94 | activation=tf.nn.relu,
95 | seed=seed
96 | )
97 |
98 | if kernel[2] > 0:
99 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
100 |
101 | # concat the layer from the contracting path with the output of the current layer
102 | # concat only the channels (axis=3)
103 | output = tf.concat([layers[len(layers) - index - 2], output], axis=3)
104 |
105 | output = conv2d(
106 | inputs=output,
107 | name='conv_last',
108 | filters=self.output_channels, # number of output chanels
109 | kernel_size=1, # last layer kernel size = 1
110 | strides=1, # last layer stride = 1
111 | bnorm=False, # do not use batch-norm for the last layer
112 | activation=tf.nn.tanh, # tanh activation function for the output
113 | seed=seed
114 | )
115 |
116 | self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name)
117 |
118 | return output
119 |
--------------------------------------------------------------------------------
/Colorizing-with-GANs/ops.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 | COLORSPACE_RGB = 'RGB'
5 | COLORSPACE_LAB = 'LAB'
6 | tf.nn.softmax_cross_entropy_with_logits_v2
7 |
8 | def conv2d(inputs, filters, name, kernel_size=4, strides=2, bnorm=True, activation=None, seed=None):
9 | """
10 | Creates a conv2D block
11 | """
12 | initializer=tf.variance_scaling_initializer(seed=seed)
13 | res = tf.layers.conv2d(
14 | name=name,
15 | inputs=inputs,
16 | filters=filters,
17 | kernel_size=kernel_size,
18 | strides=strides,
19 | padding="same",
20 | kernel_initializer=initializer)
21 |
22 | if bnorm:
23 | res = tf.layers.batch_normalization(inputs=res, name='bn_' + name, training=True)
24 |
25 | # activation after batch-norm
26 | if activation is not None:
27 | res = activation(res)
28 |
29 | return res
30 |
31 |
32 | def conv2d_transpose(inputs, filters, name, kernel_size=4, strides=2, bnorm=True, activation=None, seed=None):
33 | """
34 | Creates a conv2D-transpose block
35 | """
36 | initializer=tf.variance_scaling_initializer(seed=seed)
37 | res = tf.layers.conv2d_transpose(
38 | name=name,
39 | inputs=inputs,
40 | filters=filters,
41 | kernel_size=kernel_size,
42 | strides=strides,
43 | padding="same",
44 | kernel_initializer=initializer)
45 |
46 | if bnorm:
47 | res = tf.layers.batch_normalization(inputs=res, name='bn_' + name, training=True)
48 |
49 | # activation after batch-norm
50 | if activation is not None:
51 | res = activation(res)
52 |
53 | return res
54 |
55 |
56 | def pixelwise_accuracy(img_real, img_fake, colorspace, thresh):
57 | """
58 | Measures the accuracy of the colorization process by comparing pixels
59 | """
60 | img_real = postprocess(img_real, colorspace, COLORSPACE_LAB)
61 | img_fake = postprocess(img_fake, colorspace, COLORSPACE_LAB)
62 |
63 | diffL = tf.abs(tf.round(img_real[..., 0]) - tf.round(img_fake[..., 0]))
64 | diffA = tf.abs(tf.round(img_real[..., 1]) - tf.round(img_fake[..., 1]))
65 | diffB = tf.abs(tf.round(img_real[..., 2]) - tf.round(img_fake[..., 2]))
66 |
67 | # within %thresh of the original
68 | predL = tf.cast(tf.less_equal(diffL, 1 * thresh), tf.float64) # L: [0, 100]
69 | predA = tf.cast(tf.less_equal(diffA, 2.2 * thresh), tf.float64) # A: [-110, 110]
70 | predB = tf.cast(tf.less_equal(diffB, 2.2 * thresh), tf.float64) # B: [-110, 110]
71 |
72 | # all three channels are within the threshold
73 | pred = predL * predA * predB
74 |
75 | return tf.reduce_mean(pred)
76 |
77 |
78 | def preprocess(img, colorspace_in, colorspace_out):
79 | if colorspace_out.upper() == COLORSPACE_RGB:
80 | if colorspace_in == COLORSPACE_LAB:
81 | img = lab_to_rgb(img)
82 |
83 | # [0, 1] => [-1, 1]
84 | img = (img / 255.0) * 2 - 1
85 |
86 | elif colorspace_out.upper() == COLORSPACE_LAB:
87 | if colorspace_in == COLORSPACE_RGB:
88 | img = rgb_to_lab(img / 255.0)
89 |
90 | L_chan, a_chan, b_chan = tf.unstack(img, axis=3)
91 |
92 | # L: [0, 100] => [-1, 1]
93 | # A, B: [-110, 110] => [-1, 1]
94 | img = tf.stack([L_chan / 50 - 1, a_chan / 110, b_chan / 110], axis=3)
95 |
96 | return img
97 |
98 |
99 | def postprocess(img, colorspace_in, colorspace_out):
100 | if colorspace_in.upper() == COLORSPACE_RGB:
101 | # [-1, 1] => [0, 1]
102 | img = (img + 1) / 2
103 |
104 | if colorspace_out == COLORSPACE_LAB:
105 | img = rgb_to_lab(img)
106 |
107 | elif colorspace_in.upper() == COLORSPACE_LAB:
108 | L_chan, a_chan, b_chan = tf.unstack(img, axis=3)
109 |
110 | # L: [-1, 1] => [0, 100]
111 | # A, B: [-1, 1] => [-110, 110]
112 | img = tf.stack([(L_chan + 1) / 2 * 100, a_chan * 110, b_chan * 110], axis=3)
113 |
114 | if colorspace_out == COLORSPACE_RGB:
115 | img = lab_to_rgb(img)
116 |
117 | return img
118 |
119 |
120 | def rgb_to_lab(srgb):
121 | # based on https://github.com/torch/image/blob/9f65c30167b2048ecbe8b7befdc6b2d6d12baee9/generic/image.c
122 | with tf.name_scope("rgb_to_lab"):
123 | srgb_pixels = tf.reshape(srgb, [-1, 3])
124 |
125 | with tf.name_scope("srgb_to_xyz"):
126 | linear_mask = tf.cast(srgb_pixels <= 0.04045, dtype=tf.float32)
127 | exponential_mask = tf.cast(srgb_pixels > 0.04045, dtype=tf.float32)
128 | rgb_pixels = (srgb_pixels / 12.92 * linear_mask) + (((srgb_pixels + 0.055) / 1.055) ** 2.4) * exponential_mask
129 | rgb_to_xyz = tf.constant([
130 | # X Y Z
131 | [0.412453, 0.212671, 0.019334], # R
132 | [0.357580, 0.715160, 0.119193], # G
133 | [0.180423, 0.072169, 0.950227], # B
134 | ])
135 | xyz_pixels = tf.matmul(rgb_pixels, rgb_to_xyz)
136 |
137 | # https://en.wikipedia.org/wiki/Lab_color_space#CIELAB-CIEXYZ_conversions
138 | with tf.name_scope("xyz_to_cielab"):
139 |
140 | # normalize for D65 white point
141 | xyz_normalized_pixels = tf.multiply(xyz_pixels, [1 / 0.950456, 1.0, 1 / 1.088754])
142 |
143 | epsilon = 6 / 29
144 | linear_mask = tf.cast(xyz_normalized_pixels <= (epsilon**3), dtype=tf.float32)
145 | exponential_mask = tf.cast(xyz_normalized_pixels > (epsilon**3), dtype=tf.float32)
146 | fxfyfz_pixels = (xyz_normalized_pixels / (3 * epsilon**2) + 4 / 29) * linear_mask + (xyz_normalized_pixels ** (1 / 3)) * exponential_mask
147 |
148 | # convert to lab
149 | fxfyfz_to_lab = tf.constant([
150 | # l a b
151 | [0.0, 500.0, 0.0], # fx
152 | [116.0, -500.0, 200.0], # fy
153 | [0.0, 0.0, -200.0], # fz
154 | ])
155 | lab_pixels = tf.matmul(fxfyfz_pixels, fxfyfz_to_lab) + tf.constant([-16.0, 0.0, 0.0])
156 |
157 | return tf.reshape(lab_pixels, tf.shape(srgb))
158 |
159 |
160 | def lab_to_rgb(lab):
161 | with tf.name_scope("lab_to_rgb"):
162 | lab_pixels = tf.reshape(lab, [-1, 3])
163 |
164 | # https://en.wikipedia.org/wiki/Lab_color_space#CIELAB-CIEXYZ_conversions
165 | with tf.name_scope("cielab_to_xyz"):
166 | # convert to fxfyfz
167 | lab_to_fxfyfz = tf.constant([
168 | # fx fy fz
169 | [1 / 116.0, 1 / 116.0, 1 / 116.0], # l
170 | [1 / 500.0, 0.0, 0.0], # a
171 | [0.0, 0.0, -1 / 200.0], # b
172 | ])
173 | fxfyfz_pixels = tf.matmul(lab_pixels + tf.constant([16.0, 0.0, 0.0]), lab_to_fxfyfz)
174 |
175 | # convert to xyz
176 | epsilon = 6 / 29
177 | linear_mask = tf.cast(fxfyfz_pixels <= epsilon, dtype=tf.float32)
178 | exponential_mask = tf.cast(fxfyfz_pixels > epsilon, dtype=tf.float32)
179 | xyz_pixels = (3 * epsilon**2 * (fxfyfz_pixels - 4 / 29)) * linear_mask + (fxfyfz_pixels ** 3) * exponential_mask
180 |
181 | # denormalize for D65 white point
182 | xyz_pixels = tf.multiply(xyz_pixels, [0.950456, 1.0, 1.088754])
183 |
184 | with tf.name_scope("xyz_to_srgb"):
185 | xyz_to_rgb = tf.constant([
186 | # r g b
187 | [3.2404542, -0.9692660, 0.0556434], # x
188 | [-1.5371385, 1.8760108, -0.2040259], # y
189 | [-0.4985314, 0.0415560, 1.0572252], # z
190 | ])
191 | rgb_pixels = tf.matmul(xyz_pixels, xyz_to_rgb)
192 | # avoid a slightly negative number messing up the conversion
193 | rgb_pixels = tf.clip_by_value(rgb_pixels, 0.0, 1.0)
194 | linear_mask = tf.cast(rgb_pixels <= 0.0031308, dtype=tf.float32)
195 | exponential_mask = tf.cast(rgb_pixels > 0.0031308, dtype=tf.float32)
196 | srgb_pixels = (rgb_pixels * 12.92 * linear_mask) + ((rgb_pixels ** (1 / 2.4) * 1.055) - 0.055) * exponential_mask
197 |
198 | return tf.reshape(srgb_pixels, tf.shape(lab))
199 |
--------------------------------------------------------------------------------
/Colorizing-with-GANs/options.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import random
4 | import argparse
5 |
6 |
7 | def str2bool(v):
8 | if v.lower() in ('yes', 'true', 't', 'y', '1'):
9 | return True
10 | elif v.lower() in ('no', 'false', 'f', 'n', '0'):
11 | return False
12 | else:
13 | raise argparse.ArgumentTypeError('Boolean value expected.')
14 |
15 |
16 | class ModelOptions:
17 | def __init__(self):
18 | parser = argparse.ArgumentParser(description='Colorization with GANs')
19 | parser.add_argument('--seed', type=int, default=0, metavar='S', help='random seed (default: 0)')
20 | parser.add_argument('--name', type=str, default='CGAN', help='arbitrary model name (default: CGAN)')
21 | parser.add_argument('--mode', default=0, help='run mode [0: train, 1: evaluate, 2: test] (default: 0)')
22 | parser.add_argument('--dataset', type=str, default='momentsintime', help='the name of dataset [places365, cifar10] (default: momentsintime)')
23 | parser.add_argument('--dataset-path', type=str, default='./data', help='dataset path (default: ./data)')
24 | parser.add_argument('--checkpoints-path', type=str, default='./checkpoints', help='models are saved here (default: ./checkpoints)')
25 | parser.add_argument('--batch-size', type=int, default=16, metavar='N', help='input batch size for training (default: 16)')
26 | parser.add_argument('--color-space', type=str, default='lab', help='model color space [lab, rgb] (default: lab)')
27 | parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 30)')
28 | parser.add_argument('--lr', type=float, default=5e-3, metavar='LR', help='learning rate (default: 3e-4)')
29 | parser.add_argument('--lr-decay-rate', type=float, default=0.1, help='learning rate exponentially decay rate (default: 0.1)')
30 | parser.add_argument('--lr-decay-steps', type=float, default=25e2, help='learning rate exponentially decay steps (default: 5e5)')
31 | parser.add_argument('--beta1', type=float, default=0, help='momentum term of adam optimizer (default: 0)')
32 | parser.add_argument("--l1-weight", type=float, default=100.0, help="weight on L1 term for generator gradient (default: 100.0)")
33 | parser.add_argument('--augment', type=str2bool, default=True, help='True for augmentation (default: True)')
34 | parser.add_argument('--label-smoothing', type=str2bool, default=False, help='True for one-sided label smoothing (default: False)')
35 | parser.add_argument('--acc-thresh', type=float, default=2.0, help="accuracy threshold (default: 2.0)")
36 | parser.add_argument('--kernel-size', type=int, default=4, help="default kernel size (default: 4)")
37 | parser.add_argument('--save', type=str2bool, default=True, help='True for saving (default: True)')
38 | parser.add_argument('--save-interval', type=int, default=100, help='how many batches to wait before saving model (default: 1000)')
39 | parser.add_argument('--sample', type=str2bool, default=True, help='True for sampling (default: True)')
40 | parser.add_argument('--sample-size', type=int, default=8, help='number of images to sample (default: 8)')
41 | parser.add_argument('--sample-interval', type=int, default=100, help='how many batches to wait before sampling (default: 1000)')
42 | parser.add_argument('--validate', type=str2bool, default=True, help='True for validation (default: True)')
43 | parser.add_argument('--validate-interval', type=int, default=0, help='how many batches to wait before validating (default: 0)')
44 | parser.add_argument('--log', type=str2bool, default=True, help='True for logging (default: True)')
45 | parser.add_argument('--log-interval', type=int, default=10, help='how many iterations to wait before logging training status (default: 10)')
46 | parser.add_argument('--visualize', type=str2bool, default=False, help='True for accuracy visualization (default: False)')
47 | parser.add_argument('--visualize-window', type=int, default=100, help='the exponentially moving average window width (default: 100)')
48 | parser.add_argument('--test-size', type=int, default=100, metavar='N', help='number of Turing tests (default: 100)')
49 | parser.add_argument('--test-delay', type=int, default=0, metavar='N', help='number of seconds to wait when doing Turing test, 0 for unlimited (default: 0)')
50 | parser.add_argument('--gpu-ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU')
51 | # to recolorize a video clip
52 | parser.add_argument('--filename', type=str, default='*', help='Filename of input BW video')
53 | parser.add_argument('--input_dir', type=str, default='../data/examples/converted', help='Directory of input files')
54 | parser.add_argument('--output_dir', type=str, default='../data/examples/recolorized', help='Directory of output files')
55 |
56 | self._parser = parser
57 |
58 | def parse(self):
59 | opt = self._parser.parse_args()
60 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_ids
61 |
62 | opt.color_space = opt.color_space.upper()
63 |
64 | if opt.seed == 0:
65 | opt.seed = random.randint(0, 2**31 - 1)
66 |
67 | if (opt.dataset_path == './data') or (opt.dataset_path == './dataset'):
68 | opt.dataset_path += ('/' + opt.dataset)
69 |
70 | if opt.checkpoints_path == './checkpoints':
71 | opt.checkpoints_path += ('/' + opt.dataset)
72 |
73 | return opt
74 |
--------------------------------------------------------------------------------
/Colorizing-with-GANs/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy ~= 1.14.3
2 | scipy ~= 1.0.1
3 | future ~= 0.16.0
4 | matplotlib ~= 2.2.2
5 | pillow ~= 5.0.0
6 |
--------------------------------------------------------------------------------
/Colorizing-with-GANs/setup.cfg:
--------------------------------------------------------------------------------
1 | [pycodestyle]
2 | ignore = E303
3 | max-line-length = 200
--------------------------------------------------------------------------------
/Colorizing-with-GANs/test-eval.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 |
3 | options = ModelOptions().parse()
4 | options.mode = 1
5 | main(options)
6 |
--------------------------------------------------------------------------------
/Colorizing-with-GANs/test-turing.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 |
3 | options = ModelOptions().parse()
4 | options.mode = 2
5 | main(options)
6 |
--------------------------------------------------------------------------------
/Colorizing-with-GANs/train.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 |
3 | options = ModelOptions().parse()
4 | options.mode = 0
5 | main(options)
6 |
--------------------------------------------------------------------------------
/Colorizing-with-GANs/utils.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import numpy as np
3 | from PIL import Image
4 | import matplotlib.pyplot as plt
5 |
6 |
7 | def stitch_images(grayscale, original, pred):
8 | gap = 5
9 | width, height = original[0][:, :, 0].shape
10 | img_per_row = 2 if width > 200 else 4
11 | img = Image.new('RGB', (width * img_per_row * 3 + gap * (img_per_row - 1), height * int(len(original) / img_per_row)))
12 |
13 | grayscale = np.array(grayscale).squeeze()
14 | original = np.array(original)
15 | pred = np.array(pred)
16 |
17 | for ix in range(len(original)):
18 | xoffset = int(ix % img_per_row) * width * 3 + int(ix % img_per_row) * gap
19 | yoffset = int(ix / img_per_row) * height
20 | im1 = Image.fromarray(grayscale[ix])
21 | im2 = Image.fromarray(original[ix])
22 | im3 = Image.fromarray((pred[ix] * 255).astype(np.uint8))
23 | img.paste(im1, (xoffset, yoffset))
24 | img.paste(im2, (xoffset + width, yoffset))
25 | img.paste(im3, (xoffset + width + width, yoffset))
26 |
27 | return img
28 |
29 |
30 | def unpickle(file):
31 | with open(file, 'rb') as fo:
32 | dict = pickle.load(fo, encoding='bytes')
33 | return dict
34 |
35 |
36 | def moving_average(data, window_width):
37 | cumsum_vec = np.cumsum(np.insert(data, 0, 0))
38 | ma_vec = (cumsum_vec[window_width:] - cumsum_vec[:-window_width]) / window_width
39 | return ma_vec
40 |
41 |
42 | def imshow(img, title=''):
43 | fig = plt.gcf()
44 | fig.canvas.set_window_title(title)
45 | plt.axis('off')
46 | plt.imshow(img, interpolation='none')
47 | plt.show()
48 |
49 |
50 | def turing_test(real_img, fake_img, delay=0):
51 | height, width, _ = real_img.shape
52 | imgs = np.array([real_img, (fake_img * 255).astype(np.uint8)])
53 | real_index = np.random.binomial(1, 0.5)
54 | fake_index = (real_index + 1) % 2
55 |
56 | img = Image.new('RGB', (2 + width * 2, height))
57 | img.paste(Image.fromarray(imgs[real_index]), (0, 0))
58 | img.paste(Image.fromarray(imgs[fake_index]), (2 + width, 0))
59 |
60 | img.success = 0
61 |
62 | def onclick(event):
63 | if event.xdata is not None:
64 | if event.x < width and real_index == 0:
65 | img.success = 1
66 |
67 | elif event.x > width and real_index == 1:
68 | img.success = 1
69 |
70 | plt.gcf().canvas.stop_event_loop()
71 |
72 | plt.ion()
73 | plt.gcf().canvas.mpl_connect('button_press_event', onclick)
74 | plt.title('click on the real image')
75 | plt.axis('off')
76 | plt.imshow(img, interpolation='none')
77 | plt.show()
78 | plt.draw()
79 | plt.gcf().canvas.start_event_loop(delay)
80 |
81 | return img.success
82 |
83 |
84 | def visualize(train_log_file, test_log_file, window_width, title=''):
85 | train_data = np.loadtxt(train_log_file)
86 | test_data = np.loadtxt(test_log_file)
87 |
88 | if len(train_data.shape) < 2:
89 | return
90 |
91 | if len(train_data) < window_width:
92 | window_width = len(train_data) - 1
93 |
94 | fig = plt.gcf()
95 | fig.canvas.set_window_title(title)
96 |
97 | plt.ion()
98 | plt.subplot('121')
99 | plt.cla()
100 | if len(train_data) > 1:
101 | plt.plot(moving_average(train_data[:, 8], window_width))
102 | plt.title('train')
103 |
104 | plt.subplot('122')
105 | plt.cla()
106 | if len(test_data) > 1:
107 | plt.plot(test_data[:, 8])
108 | plt.title('test')
109 |
110 | plt.show()
111 | plt.draw()
112 | plt.pause(.01)
113 |
--------------------------------------------------------------------------------
/Colorizing-with-GANs/video_colorize_GAN.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import argparse
4 |
5 | import cv2
6 | import numpy as np
7 | from PIL import Image
8 | from skimage import img_as_ubyte, img_as_float
9 | import skimage.color as color
10 | import scipy.ndimage.interpolation as sni
11 | from ops import postprocess
12 | from ops import COLORSPACE_RGB, COLORSPACE_LAB
13 |
14 | import tensorflow as tf
15 | from options import ModelOptions
16 | from models import MomentsInTimeModel
17 |
18 |
19 | def image_colorization_propagation(model, img_bw_in, img_rgb_prev, options):
20 |
21 | # colorize the image based on the previous one
22 | feed_dic = {model.input_rgb: np.expand_dims(img_bw_in, axis=0), model.input_rgb_prev: np.expand_dims(img_rgb_prev, axis=0)}
23 | fake_image, _ = model.sess.run([model.sampler, model.input_gray], feed_dict=feed_dic)
24 | fake_image = postprocess(tf.convert_to_tensor(fake_image), colorspace_in=options.color_space, colorspace_out=COLORSPACE_RGB)
25 |
26 | # evalute the tensor
27 | img_rgb_out = fake_image.eval()
28 | img_rgb_out = (img_rgb_out.squeeze(0) * 255).astype(np.uint8)
29 |
30 | return img_rgb_out
31 |
32 | def bw2color(options, inputname, inputpath, outputpath):
33 | if inputname.endswith(".mp4"):
34 | # size of the input frames
35 | size = 256
36 |
37 | # check that the video exists
38 | path_to_video = os.path.join(inputpath, inputname)
39 | if not os.path.exists(path_to_video):
40 | print("The file :", path_to_video, "does not exist !")
41 |
42 | # store informations about the original video
43 | cap = cv2.VideoCapture(os.path.join(path_to_video))
44 | # original dimensions
45 | width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
46 | totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
47 | fourcc = cv2.VideoWriter_fourcc(*'mp4v');
48 | # parameters of output file
49 | # dimensions of the output image
50 | new_width, new_height = size, size
51 | # number of frames
52 | fps = 30.0
53 |
54 | # recolorized output video
55 | color_out = cv2.VideoWriter(
56 | os.path.join(outputpath, 'color_' + inputname),
57 | fourcc,
58 | fps,
59 | (new_width, new_height),
60 | isColor=True
61 | )
62 |
63 | # TO CHANGE to DL colorization of 1st frame
64 | # pick the first frame from the original video clip as the first reference
65 | cap_temp = cv2.VideoCapture(os.path.join(inputpath, "color" + inputname[2:]))
66 |
67 | ret_temp, frame_prev = cap_temp.read()
68 | # convert BGR to RGB convention
69 | frame_prev = frame_prev[:,:,::-1]
70 | frame_prev = cv2.resize(frame_prev, (size, size))
71 |
72 | # count the number of recolorized frames
73 | frames_processed = 0
74 |
75 | with tf.Session() as sess:
76 |
77 | model = MomentsInTimeModel(sess, options)
78 |
79 | # build the model and initialize
80 | model.build()
81 | sess.run(tf.global_variables_initializer())
82 |
83 | # load model only after global variables initialization
84 | model.load()
85 |
86 | while(cap.isOpened()):
87 | ret, frame_in = cap.read()
88 |
89 | # check if we are not at the end of the video
90 | if ret==True:
91 | # convert BGR to RGB convention
92 | frame_in = frame_in[:,:,::-1]
93 | # resize the frame to match the input size of the GAN
94 | frame_in = cv2.resize(frame_in, (size, size))
95 |
96 | # colorize the BW frame
97 | frame_out = image_colorization_propagation(model, frame_in, frame_prev, options)
98 |
99 | #generate sample
100 | get_image = False
101 | if get_image:
102 | img = Image.fromarray(frame_out)
103 |
104 | if not os.path.exists(model.samples_dir):
105 | os.makedirs(model.samples_dir)
106 |
107 | sample = model.options.dataset + "_" + inputname + "_" + str(frames_processed).zfill(5) + ".png"
108 | img.save(os.path.join(model.samples_dir, sample))
109 |
110 | # save the recolorized frame
111 | frame_prev = frame_out
112 | # convert RGB to BGR convention
113 | frame_out = frame_out[:,:,::-1]
114 | # write the color frame
115 | color_out.write(frame_out)
116 |
117 | # print progress
118 | frames_processed += 1
119 | print("Processed {}/{} frames ({}%)".format(frames_processed, totalFrames, frames_processed * 100 //totalFrames), end="\r")
120 | if cv2.waitKey(1) & 0xFF == ord('q'):
121 | break
122 | # end of the video
123 | else:
124 | break
125 |
126 | # release everything if job is finished
127 | cap.release()
128 | color_out.release()
129 |
130 | def main():
131 |
132 | # reset tensorflow graph
133 | tf.reset_default_graph()
134 |
135 | options = ModelOptions().parse()
136 |
137 | if options.filename == '*':
138 | for filename in os.listdir(options.input_dir):
139 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
140 | else:
141 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
142 |
143 | # cleanup
144 | cv2.destroyAllWindows()
145 |
146 | return 0
147 |
148 | if __name__ == '__main__':
149 | main()
150 |
--------------------------------------------------------------------------------
/Colorizing-with-GANs/video_colorize_GAN_1st-truth-ref.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import argparse
4 |
5 | import cv2
6 | import numpy as np
7 | from PIL import Image
8 | from skimage import img_as_ubyte, img_as_float
9 | import skimage.color as color
10 | import scipy.ndimage.interpolation as sni
11 | from ops import postprocess
12 | from ops import COLORSPACE_RGB, COLORSPACE_LAB
13 |
14 | import tensorflow as tf
15 | from options import ModelOptions
16 | from models import MomentsInTimeModel
17 |
18 |
19 | def image_colorization_propagation(model, img_bw_in, img_rgb_prev, options):
20 |
21 | # colorize the image based on the previous one
22 | feed_dic = {model.input_rgb: np.expand_dims(img_bw_in, axis=0), model.input_rgb_prev: np.expand_dims(img_rgb_prev, axis=0)}
23 | fake_image, _ = model.sess.run([model.sampler, model.input_gray], feed_dict=feed_dic)
24 | fake_image = postprocess(tf.convert_to_tensor(fake_image), colorspace_in=options.color_space, colorspace_out=COLORSPACE_RGB)
25 |
26 | # evalute the tensor
27 | img_rgb_out = fake_image.eval()
28 | img_rgb_out = (img_rgb_out.squeeze(0) * 255).astype(np.uint8)
29 |
30 | return img_rgb_out
31 |
32 | def bw2color(options, inputname, inputpath, outputpath):
33 | if inputname.endswith(".mp4"):
34 | # size of the input frames
35 | size = 256
36 |
37 | # check that the video exists
38 | path_to_video = os.path.join(inputpath, inputname)
39 | if not os.path.exists(path_to_video):
40 | print("The file :", path_to_video, "does not exist !")
41 |
42 | # store informations about the original video
43 | cap = cv2.VideoCapture(os.path.join(path_to_video))
44 | # original dimensions
45 | width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
46 | totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
47 | fourcc = cv2.VideoWriter_fourcc(*'mp4v');
48 | # parameters of output file
49 | # dimensions of the output image
50 | new_width, new_height = size, size
51 | # number of frames
52 | fps = 30.0
53 |
54 | # recolorized output video
55 | color_out = cv2.VideoWriter(
56 | os.path.join(outputpath, 'color_' + inputname),
57 | fourcc,
58 | fps,
59 | (new_width, new_height),
60 | isColor=True
61 | )
62 |
63 | # TO CHANGE to DL colorization of 1st frame
64 | # pick the first frame from the original video clip as the first reference
65 | cap_temp = cv2.VideoCapture(os.path.join(inputpath, "color" + inputname[2:]))
66 |
67 | ret_temp, frame_prev = cap_temp.read()
68 | # convert BGR to RGB convention
69 | frame_prev = frame_prev[:,:,::-1]
70 | frame_prev = cv2.resize(frame_prev, (size, size))
71 |
72 | # count the number of recolorized frames
73 | frames_processed = 0
74 |
75 | with tf.Session() as sess:
76 |
77 | model = MomentsInTimeModel(sess, options)
78 |
79 | # build the model and initialize
80 | model.build()
81 | sess.run(tf.global_variables_initializer())
82 |
83 | # load model only after global variables initialization
84 | model.load()
85 |
86 | while(cap.isOpened()):
87 | ret, frame_in = cap.read()
88 |
89 | # check if we are not at the end of the video
90 | if ret==True:
91 | # convert BGR to RGB convention
92 | frame_in = frame_in[:,:,::-1]
93 | # resize the frame to match the input size of the GAN
94 | frame_in = cv2.resize(frame_in, (size, size))
95 |
96 | # colorize the BW frame
97 | frame_out = image_colorization_propagation(model, frame_in, frame_prev, options)
98 |
99 | #generate sample
100 | get_image = False
101 | if get_image:
102 | img = Image.fromarray(frame_out)
103 |
104 | if not os.path.exists(model.samples_dir):
105 | os.makedirs(model.samples_dir)
106 |
107 | sample = model.options.dataset + "_" + inputname + "_" + str(frames_processed).zfill(5) + ".png"
108 | img.save(os.path.join(model.samples_dir, sample))
109 |
110 | # save the recolorized frame
111 | #frame_prev = frame_out
112 | # convert RGB to BGR convention
113 | frame_out = frame_out[:,:,::-1]
114 | # write the color frame
115 | color_out.write(frame_out)
116 | #break
117 |
118 | # print progress
119 | frames_processed += 1
120 | print("Processed {}/{} frames ({}%)".format(frames_processed, totalFrames, frames_processed * 100 //totalFrames), end="\r")
121 | if cv2.waitKey(1) & 0xFF == ord('q'):
122 | break
123 | # end of the video
124 | else:
125 | break
126 |
127 | # release everything if job is finished
128 | cap.release()
129 | color_out.release()
130 |
131 | def main():
132 |
133 | # reset tensorflow graph
134 | tf.reset_default_graph()
135 |
136 | options = ModelOptions().parse()
137 |
138 | if options.filename == '*':
139 | for filename in os.listdir(options.input_dir):
140 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
141 | else:
142 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
143 |
144 | # cleanup
145 | cv2.destroyAllWindows()
146 |
147 | return 0
148 |
149 | if __name__ == '__main__':
150 | main()
151 |
--------------------------------------------------------------------------------
/Colorizing-with-GANs/video_colorize_GAN_truth-ref.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import argparse
4 |
5 | import cv2
6 | import numpy as np
7 | from PIL import Image
8 | from skimage import img_as_ubyte, img_as_float
9 | import skimage.color as color
10 | import scipy.ndimage.interpolation as sni
11 | from ops import postprocess
12 | from ops import COLORSPACE_RGB, COLORSPACE_LAB
13 |
14 | import tensorflow as tf
15 | from options import ModelOptions
16 | from models import MomentsInTimeModel
17 |
18 |
19 | def image_colorization_propagation(model, img_bw_in, img_rgb_prev, options):
20 |
21 | # colorize the image based on the previous one
22 | feed_dic = {model.input_rgb: np.expand_dims(img_bw_in, axis=0), model.input_rgb_prev: np.expand_dims(img_rgb_prev, axis=0)}
23 | fake_image, _ = model.sess.run([model.sampler, model.input_gray], feed_dict=feed_dic)
24 | fake_image = postprocess(tf.convert_to_tensor(fake_image), colorspace_in=options.color_space, colorspace_out=COLORSPACE_RGB)
25 |
26 | # evalute the tensor
27 | img_rgb_out = fake_image.eval()
28 | img_rgb_out = (img_rgb_out.squeeze(0) * 255).astype(np.uint8)
29 |
30 | return img_rgb_out
31 |
32 | def bw2color(options, inputname, inputpath, outputpath):
33 | if inputname.endswith(".mp4"):
34 | # size of the input frames
35 | size = 256
36 |
37 | # check that the video exists
38 | path_to_video = os.path.join(inputpath, inputname)
39 | if not os.path.exists(path_to_video):
40 | print("The file :", path_to_video, "does not exist !")
41 |
42 | # store informations about the original video
43 | cap = cv2.VideoCapture(os.path.join(path_to_video))
44 | # original dimensions
45 | width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
46 | totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
47 | fourcc = cv2.VideoWriter_fourcc(*'mp4v');
48 | # parameters of output file
49 | # dimensions of the output image
50 | new_width, new_height = size, size
51 | # number of frames
52 | fps = 30.0
53 |
54 | # recolorized output video
55 | color_out = cv2.VideoWriter(
56 | os.path.join(outputpath, 'color_' + inputname),
57 | fourcc,
58 | fps,
59 | (new_width, new_height),
60 | isColor=True
61 | )
62 |
63 | # TO CHANGE to DL colorization of 1st frame
64 | # pick the first frame from the original video clip as the first reference
65 | cap_temp = cv2.VideoCapture(os.path.join(inputpath, "color" + inputname[2:]))
66 |
67 | # count the number of recolorized frames
68 | frames_processed = 0
69 |
70 | with tf.Session() as sess:
71 |
72 | model = MomentsInTimeModel(sess, options)
73 |
74 | # build the model and initialize
75 | model.build()
76 | sess.run(tf.global_variables_initializer())
77 |
78 | # load model only after global variables initialization
79 | model.load()
80 |
81 | while(cap.isOpened()):
82 | ret, frame_in = cap.read()
83 |
84 | ret_temp, frame_prev = cap_temp.read()
85 |
86 | # check if we are not at the end of the video
87 | if ret==True:
88 | frame_prev = frame_prev[:,:,::-1]
89 | frame_prev = cv2.resize(frame_prev, (size, size))
90 |
91 | # convert BGR to RGB convention
92 | frame_in = frame_in[:,:,::-1]
93 | # resize the frame to match the input size of the GAN
94 | frame_in = cv2.resize(frame_in, (size, size))
95 |
96 | # colorize the BW frame
97 | frame_out = image_colorization_propagation(model, frame_in, frame_prev, options)
98 |
99 | #generate sample
100 | get_image = False
101 | if get_image:
102 | img = Image.fromarray(frame_out)
103 |
104 | if not os.path.exists(model.samples_dir):
105 | os.makedirs(model.samples_dir)
106 |
107 | sample = model.options.dataset + "_" + inputname + "_" + str(frames_processed).zfill(5) + ".png"
108 | img.save(os.path.join(model.samples_dir, sample))
109 |
110 | # save the recolorized frame
111 | #frame_prev = frame_out
112 | # convert RGB to BGR convention
113 | frame_out = frame_out[:,:,::-1]
114 | # write the color frame
115 | color_out.write(frame_out)
116 | #break
117 |
118 | # print progress
119 | frames_processed += 1
120 | print("Processed {}/{} frames ({}%)".format(frames_processed, totalFrames, frames_processed * 100 //totalFrames), end="\r")
121 | if cv2.waitKey(1) & 0xFF == ord('q'):
122 | break
123 | # end of the video
124 | else:
125 | break
126 |
127 | # release everything if job is finished
128 | cap.release()
129 | color_out.release()
130 |
131 | def main():
132 |
133 | # reset tensorflow graph
134 | tf.reset_default_graph()
135 |
136 | options = ModelOptions().parse()
137 |
138 | if options.filename == '*':
139 | for filename in os.listdir(options.input_dir):
140 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
141 | else:
142 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
143 |
144 | # cleanup
145 | cv2.destroyAllWindows()
146 |
147 | return 0
148 |
149 | if __name__ == '__main__':
150 | main()
151 |
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/models/.gitignore:
--------------------------------------------------------------------------------
1 | *.caffemodel
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/models/alexnet_deploy.prototxt:
--------------------------------------------------------------------------------
1 | name: "Colornet"
2 | layer {
3 | name: "data"
4 | top: "data" # BGR [0,255] ***non-mean centered***
5 | type: "Input"
6 | input_param { shape { dim: 1 dim: 3 dim: 227 dim: 227 } }
7 | }
8 | # **************************
9 | # ***** PROCESS COLORS *****
10 | # **************************
11 | layer { # Convert to lab
12 | name: "img_lab"
13 | type: "ColorConv"
14 | bottom: "data"
15 | top: "img_lab"
16 | propagate_down: false
17 | color_conv_param {
18 | input: 0 # BGR
19 | output: 3 # Lab
20 | }
21 | }
22 | layer {
23 | name: "img_slice"
24 | type: "Slice"
25 | bottom: "img_lab"
26 | top: "img_l" # [0,100]
27 | top: "data_ab" # [-110,110]
28 | propagate_down: false
29 | slice_param {
30 | axis: 1
31 | slice_point: 1
32 | }
33 | }
34 | layer {
35 | name: "silence_ab"
36 | type: "Silence"
37 | bottom: "data_ab"
38 | }
39 | layer { # 0-center lightness channel
40 | name: "data_l"
41 | type: "Convolution"
42 | bottom: "img_l"
43 | top: "data_l" # scaled and centered lightness value
44 | propagate_down: false
45 | param {lr_mult: 0 decay_mult: 0}
46 | param {lr_mult: 0 decay_mult: 0}
47 | convolution_param {
48 | kernel_size: 1
49 | num_output: 1
50 | }
51 | }
52 | layer {
53 | name: "conv1"
54 | type: "Convolution"
55 | bottom: "data_l"
56 | top: "conv1"
57 | param { lr_mult: 1 decay_mult: 1 }
58 | param { lr_mult: 2 decay_mult: 0 }
59 | convolution_param {
60 | num_output: 96
61 | kernel_size: 11
62 | stride: 4
63 | weight_filler {
64 | type: "gaussian"
65 | std: 0.01
66 | }
67 | bias_filler {
68 | type: "constant"
69 | value: 0
70 | }
71 | }
72 | }
73 | layer {
74 | name: "relu1"
75 | type: "ReLU"
76 | bottom: "conv1"
77 | top: "conv1"
78 | }
79 | layer {
80 | name: "pool1"
81 | type: "Pooling"
82 | bottom: "conv1"
83 | top: "pool1"
84 | pooling_param {
85 | pool: MAX
86 | kernel_size: 3
87 | stride: 2
88 | }
89 | }
90 | layer {
91 | name: "conv2"
92 | type: "Convolution"
93 | bottom: "pool1"
94 | top: "conv2"
95 | param { lr_mult: 1 decay_mult: 1 }
96 | param { lr_mult: 2 decay_mult: 0 }
97 | convolution_param {
98 | num_output: 256
99 | pad: 2
100 | kernel_size: 5
101 | group: 2
102 | weight_filler {
103 | type: "gaussian"
104 | std: 0.01
105 | }
106 | bias_filler {
107 | type: "constant"
108 | value: 1
109 | }
110 | }
111 | }
112 | layer {
113 | name: "relu2"
114 | type: "ReLU"
115 | bottom: "conv2"
116 | top: "conv2"
117 | }
118 | layer {
119 | name: "pool2"
120 | type: "Pooling"
121 | bottom: "conv2"
122 | top: "pool2"
123 | pooling_param {
124 | pool: MAX
125 | kernel_size: 3
126 | stride: 2
127 | }
128 | }
129 | layer {
130 | name: "conv3"
131 | type: "Convolution"
132 | bottom: "pool2"
133 | top: "conv3"
134 | param { lr_mult: 1 decay_mult: 1 }
135 | param { lr_mult: 2 decay_mult: 0 }
136 | convolution_param {
137 | num_output: 384
138 | pad: 1
139 | kernel_size: 3
140 | weight_filler {
141 | type: "gaussian"
142 | std: 0.01
143 | }
144 | bias_filler {
145 | type: "constant"
146 | value: 0
147 | }
148 | }
149 | }
150 | layer {
151 | name: "relu3"
152 | type: "ReLU"
153 | bottom: "conv3"
154 | top: "conv3"
155 | }
156 | layer {
157 | name: "conv4"
158 | type: "Convolution"
159 | bottom: "conv3"
160 | top: "conv4"
161 | param { lr_mult: 1 decay_mult: 1 }
162 | param { lr_mult: 2 decay_mult: 0 }
163 | convolution_param {
164 | num_output: 384
165 | pad: 1
166 | kernel_size: 3
167 | group: 2
168 | weight_filler {
169 | type: "gaussian"
170 | std: 0.01
171 | }
172 | bias_filler {
173 | type: "constant"
174 | value: 1
175 | }
176 | }
177 | }
178 | layer {
179 | name: "relu4"
180 | type: "ReLU"
181 | bottom: "conv4"
182 | top: "conv4"
183 | }
184 | layer {
185 | name: "conv5"
186 | type: "Convolution"
187 | bottom: "conv4"
188 | top: "conv5"
189 | param { lr_mult: 1 decay_mult: 1 }
190 | param { lr_mult: 2 decay_mult: 0 }
191 | convolution_param {
192 | num_output: 256
193 | pad: 1
194 | kernel_size: 3
195 | group: 2
196 | weight_filler {
197 | type: "gaussian"
198 | std: 0.01
199 | }
200 | bias_filler {
201 | type: "constant"
202 | value: 1
203 | }
204 | }
205 | }
206 | layer {
207 | name: "relu5"
208 | type: "ReLU"
209 | bottom: "conv5"
210 | top: "conv5"
211 | }
212 | layer {
213 | name: "pool5"
214 | type: "Pooling"
215 | bottom: "conv5"
216 | top: "pool5"
217 | pooling_param {
218 | pool: MAX
219 | kernel_size: 3
220 | stride: 2
221 | }
222 | }
223 | layer {
224 | name: "fc6"
225 | type: "Convolution"
226 | bottom: "pool5"
227 | top: "fc6"
228 | param { lr_mult: 1 decay_mult: 1 }
229 | param { lr_mult: 2 decay_mult: 0 }
230 | convolution_param {
231 | kernel_size: 6
232 | stride: 1
233 | num_output: 4096
234 | }
235 | }
236 | layer {
237 | name: "relu6"
238 | type: "ReLU"
239 | bottom: "fc6"
240 | top: "fc6"
241 | }
242 | layer {
243 | name: "drop6"
244 | type: "Dropout"
245 | bottom: "fc6"
246 | top: "fc6"
247 | dropout_param {
248 | dropout_ratio: 0.5
249 | }
250 | }
251 | layer {
252 | name: "fc7"
253 | type: "Convolution"
254 | bottom: "fc6"
255 | top: "fc7"
256 | param { lr_mult: 1 decay_mult: 1 }
257 | param { lr_mult: 2 decay_mult: 0 }
258 | convolution_param {
259 | kernel_size: 1
260 | stride: 1
261 | num_output: 4096
262 | }
263 | }
264 | layer {
265 | name: "relu7"
266 | type: "ReLU"
267 | bottom: "fc7"
268 | top: "fc7"
269 | }
270 | layer {
271 | name: "drop7"
272 | type: "Dropout"
273 | bottom: "fc7"
274 | top: "fc7"
275 | dropout_param {
276 | dropout_ratio: 0.5
277 | }
278 | }
279 |
280 |
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/models/alexnet_deploy_fc.prototxt:
--------------------------------------------------------------------------------
1 | name: "Colornet"
2 | layer {
3 | name: "data"
4 | top: "data" # BGR [0,255] ***non-mean centered***
5 | type: "Input"
6 | input_param { shape { dim: 1 dim: 3 dim: 227 dim: 227 } }
7 | }
8 | # **************************
9 | # ***** PROCESS COLORS *****
10 | # **************************
11 | layer { # Convert to lab
12 | name: "img_lab"
13 | type: "ColorConv"
14 | bottom: "data"
15 | top: "img_lab"
16 | propagate_down: false
17 | color_conv_param {
18 | input: 0 # BGR
19 | output: 3 # Lab
20 | }
21 | }
22 | layer {
23 | name: "img_slice"
24 | type: "Slice"
25 | bottom: "img_lab"
26 | top: "img_l" # [0,100]
27 | top: "data_ab" # [-110,110]
28 | propagate_down: false
29 | slice_param {
30 | axis: 1
31 | slice_point: 1
32 | }
33 | }
34 | layer {
35 | name: "silence_ab"
36 | type: "Silence"
37 | bottom: "data_ab"
38 | }
39 | layer { # 0-center lightness channel
40 | name: "data_l"
41 | type: "Convolution"
42 | bottom: "img_l"
43 | top: "data_l" # scaled and centered lightness value
44 | propagate_down: false
45 | param {lr_mult: 0 decay_mult: 0}
46 | param {lr_mult: 0 decay_mult: 0}
47 | convolution_param {
48 | kernel_size: 1
49 | num_output: 1
50 | }
51 | }
52 | layer {
53 | name: "conv1"
54 | type: "Convolution"
55 | bottom: "data_l"
56 | top: "conv1"
57 | param { lr_mult: 1 decay_mult: 1 }
58 | param { lr_mult: 2 decay_mult: 0 }
59 | convolution_param {
60 | num_output: 96
61 | kernel_size: 11
62 | stride: 4
63 | weight_filler {
64 | type: "gaussian"
65 | std: 0.01
66 | }
67 | bias_filler {
68 | type: "constant"
69 | value: 0
70 | }
71 | }
72 | }
73 | layer {
74 | name: "relu1"
75 | type: "ReLU"
76 | bottom: "conv1"
77 | top: "conv1"
78 | }
79 | layer {
80 | name: "pool1"
81 | type: "Pooling"
82 | bottom: "conv1"
83 | top: "pool1"
84 | pooling_param {
85 | pool: MAX
86 | kernel_size: 3
87 | stride: 2
88 | }
89 | }
90 | layer {
91 | name: "conv2"
92 | type: "Convolution"
93 | bottom: "pool1"
94 | top: "conv2"
95 | param { lr_mult: 1 decay_mult: 1 }
96 | param { lr_mult: 2 decay_mult: 0 }
97 | convolution_param {
98 | num_output: 256
99 | pad: 2
100 | kernel_size: 5
101 | group: 2
102 | weight_filler {
103 | type: "gaussian"
104 | std: 0.01
105 | }
106 | bias_filler {
107 | type: "constant"
108 | value: 1
109 | }
110 | }
111 | }
112 | layer {
113 | name: "relu2"
114 | type: "ReLU"
115 | bottom: "conv2"
116 | top: "conv2"
117 | }
118 | layer {
119 | name: "pool2"
120 | type: "Pooling"
121 | bottom: "conv2"
122 | top: "pool2"
123 | pooling_param {
124 | pool: MAX
125 | kernel_size: 3
126 | stride: 2
127 | }
128 | }
129 | layer {
130 | name: "conv3"
131 | type: "Convolution"
132 | bottom: "pool2"
133 | top: "conv3"
134 | param { lr_mult: 1 decay_mult: 1 }
135 | param { lr_mult: 2 decay_mult: 0 }
136 | convolution_param {
137 | num_output: 384
138 | pad: 1
139 | kernel_size: 3
140 | weight_filler {
141 | type: "gaussian"
142 | std: 0.01
143 | }
144 | bias_filler {
145 | type: "constant"
146 | value: 0
147 | }
148 | }
149 | }
150 | layer {
151 | name: "relu3"
152 | type: "ReLU"
153 | bottom: "conv3"
154 | top: "conv3"
155 | }
156 | layer {
157 | name: "conv4"
158 | type: "Convolution"
159 | bottom: "conv3"
160 | top: "conv4"
161 | param { lr_mult: 1 decay_mult: 1 }
162 | param { lr_mult: 2 decay_mult: 0 }
163 | convolution_param {
164 | num_output: 384
165 | pad: 1
166 | kernel_size: 3
167 | group: 2
168 | weight_filler {
169 | type: "gaussian"
170 | std: 0.01
171 | }
172 | bias_filler {
173 | type: "constant"
174 | value: 1
175 | }
176 | }
177 | }
178 | layer {
179 | name: "relu4"
180 | type: "ReLU"
181 | bottom: "conv4"
182 | top: "conv4"
183 | }
184 | layer {
185 | name: "conv5"
186 | type: "Convolution"
187 | bottom: "conv4"
188 | top: "conv5"
189 | param { lr_mult: 1 decay_mult: 1 }
190 | param { lr_mult: 2 decay_mult: 0 }
191 | convolution_param {
192 | num_output: 256
193 | pad: 1
194 | kernel_size: 3
195 | group: 2
196 | weight_filler {
197 | type: "gaussian"
198 | std: 0.01
199 | }
200 | bias_filler {
201 | type: "constant"
202 | value: 1
203 | }
204 | }
205 | }
206 | layer {
207 | name: "relu5"
208 | type: "ReLU"
209 | bottom: "conv5"
210 | top: "conv5"
211 | }
212 | layer {
213 | name: "pool5"
214 | type: "Pooling"
215 | bottom: "conv5"
216 | top: "pool5"
217 | pooling_param {
218 | pool: MAX
219 | kernel_size: 3
220 | stride: 2
221 | }
222 | }
223 | layer {
224 | name: "fc6"
225 | type: "InnerProduct"
226 | bottom: "pool5"
227 | top: "fc6"
228 | param { lr_mult: 1 decay_mult: 1 }
229 | param { lr_mult: 2 decay_mult: 0 }
230 | inner_product_param {
231 | num_output: 4096
232 | }
233 | }
234 | layer {
235 | name: "relu6"
236 | type: "ReLU"
237 | bottom: "fc6"
238 | top: "fc6"
239 | }
240 | layer {
241 | name: "drop6"
242 | type: "Dropout"
243 | bottom: "fc6"
244 | top: "fc6"
245 | dropout_param {
246 | dropout_ratio: 0.5
247 | }
248 | }
249 | layer {
250 | name: "fc7"
251 | type: "InnerProduct"
252 | bottom: "fc6"
253 | top: "fc7"
254 | param { lr_mult: 1 decay_mult: 1 }
255 | param { lr_mult: 2 decay_mult: 0 }
256 | inner_product_param {
257 | num_output: 4096
258 | }
259 | }
260 | layer {
261 | name: "relu7"
262 | type: "ReLU"
263 | bottom: "fc7"
264 | top: "fc7"
265 | }
266 | layer {
267 | name: "drop7"
268 | type: "Dropout"
269 | bottom: "fc7"
270 | top: "fc7"
271 | dropout_param {
272 | dropout_ratio: 0.5
273 | }
274 | }
275 |
276 |
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/models/alexnet_deploy_lab.prototxt:
--------------------------------------------------------------------------------
1 | name: "Colornet"
2 | layer {
3 | name: "img_lab"
4 | top: "img_lab" # Lab color space
5 | type: "Input"
6 | input_param { shape { dim: 1 dim: 3 dim: 227 dim: 227 } }
7 | }
8 | # **************************
9 | # ***** PROCESS COLORS *****
10 | # **************************
11 | # layer { # Convert to lab
12 | # name: "img_lab"
13 | # type: "ColorConv"
14 | # bottom: "data"
15 | # top: "img_lab"
16 | # propagate_down: false
17 | # color_conv_param {
18 | # input: 0 # BGR
19 | # output: 3 # Lab
20 | # }
21 | # }
22 | layer {
23 | name: "img_slice"
24 | type: "Slice"
25 | bottom: "img_lab"
26 | top: "img_l" # [0,100]
27 | top: "data_ab" # [-110,110]
28 | propagate_down: false
29 | slice_param {
30 | axis: 1
31 | slice_point: 1
32 | }
33 | }
34 | layer {
35 | name: "silence_ab"
36 | type: "Silence"
37 | bottom: "data_ab"
38 | }
39 | layer { # 0-center lightness channel
40 | name: "data_l"
41 | type: "Convolution"
42 | bottom: "img_l"
43 | top: "data_l" # scaled and centered lightness value
44 | propagate_down: false
45 | param {lr_mult: 0 decay_mult: 0}
46 | param {lr_mult: 0 decay_mult: 0}
47 | convolution_param {
48 | kernel_size: 1
49 | num_output: 1
50 | }
51 | }
52 | layer {
53 | name: "conv1"
54 | type: "Convolution"
55 | bottom: "data_l"
56 | top: "conv1"
57 | param { lr_mult: 1 decay_mult: 1 }
58 | param { lr_mult: 2 decay_mult: 0 }
59 | convolution_param {
60 | num_output: 96
61 | kernel_size: 11
62 | stride: 4
63 | weight_filler {
64 | type: "gaussian"
65 | std: 0.01
66 | }
67 | bias_filler {
68 | type: "constant"
69 | value: 0
70 | }
71 | }
72 | }
73 | layer {
74 | name: "relu1"
75 | type: "ReLU"
76 | bottom: "conv1"
77 | top: "conv1"
78 | }
79 | layer {
80 | name: "pool1"
81 | type: "Pooling"
82 | bottom: "conv1"
83 | top: "pool1"
84 | pooling_param {
85 | pool: MAX
86 | kernel_size: 3
87 | stride: 2
88 | }
89 | }
90 | layer {
91 | name: "conv2"
92 | type: "Convolution"
93 | bottom: "pool1"
94 | top: "conv2"
95 | param { lr_mult: 1 decay_mult: 1 }
96 | param { lr_mult: 2 decay_mult: 0 }
97 | convolution_param {
98 | num_output: 256
99 | pad: 2
100 | kernel_size: 5
101 | group: 2
102 | weight_filler {
103 | type: "gaussian"
104 | std: 0.01
105 | }
106 | bias_filler {
107 | type: "constant"
108 | value: 1
109 | }
110 | }
111 | }
112 | layer {
113 | name: "relu2"
114 | type: "ReLU"
115 | bottom: "conv2"
116 | top: "conv2"
117 | }
118 | layer {
119 | name: "pool2"
120 | type: "Pooling"
121 | bottom: "conv2"
122 | top: "pool2"
123 | pooling_param {
124 | pool: MAX
125 | kernel_size: 3
126 | stride: 2
127 | }
128 | }
129 | layer {
130 | name: "conv3"
131 | type: "Convolution"
132 | bottom: "pool2"
133 | top: "conv3"
134 | param { lr_mult: 1 decay_mult: 1 }
135 | param { lr_mult: 2 decay_mult: 0 }
136 | convolution_param {
137 | num_output: 384
138 | pad: 1
139 | kernel_size: 3
140 | weight_filler {
141 | type: "gaussian"
142 | std: 0.01
143 | }
144 | bias_filler {
145 | type: "constant"
146 | value: 0
147 | }
148 | }
149 | }
150 | layer {
151 | name: "relu3"
152 | type: "ReLU"
153 | bottom: "conv3"
154 | top: "conv3"
155 | }
156 | layer {
157 | name: "conv4"
158 | type: "Convolution"
159 | bottom: "conv3"
160 | top: "conv4"
161 | param { lr_mult: 1 decay_mult: 1 }
162 | param { lr_mult: 2 decay_mult: 0 }
163 | convolution_param {
164 | num_output: 384
165 | pad: 1
166 | kernel_size: 3
167 | group: 2
168 | weight_filler {
169 | type: "gaussian"
170 | std: 0.01
171 | }
172 | bias_filler {
173 | type: "constant"
174 | value: 1
175 | }
176 | }
177 | }
178 | layer {
179 | name: "relu4"
180 | type: "ReLU"
181 | bottom: "conv4"
182 | top: "conv4"
183 | }
184 | layer {
185 | name: "conv5"
186 | type: "Convolution"
187 | bottom: "conv4"
188 | top: "conv5"
189 | param { lr_mult: 1 decay_mult: 1 }
190 | param { lr_mult: 2 decay_mult: 0 }
191 | convolution_param {
192 | num_output: 256
193 | pad: 1
194 | kernel_size: 3
195 | group: 2
196 | weight_filler {
197 | type: "gaussian"
198 | std: 0.01
199 | }
200 | bias_filler {
201 | type: "constant"
202 | value: 1
203 | }
204 | }
205 | }
206 | layer {
207 | name: "relu5"
208 | type: "ReLU"
209 | bottom: "conv5"
210 | top: "conv5"
211 | }
212 | layer {
213 | name: "pool5"
214 | type: "Pooling"
215 | bottom: "conv5"
216 | top: "pool5"
217 | pooling_param {
218 | pool: MAX
219 | kernel_size: 3
220 | stride: 2
221 | }
222 | }
223 | layer {
224 | name: "fc6"
225 | type: "Convolution"
226 | bottom: "pool5"
227 | top: "fc6"
228 | param { lr_mult: 1 decay_mult: 1 }
229 | param { lr_mult: 2 decay_mult: 0 }
230 | convolution_param {
231 | kernel_size: 6
232 | stride: 1
233 | num_output: 4096
234 | }
235 | }
236 | layer {
237 | name: "relu6"
238 | type: "ReLU"
239 | bottom: "fc6"
240 | top: "fc6"
241 | }
242 | layer {
243 | name: "drop6"
244 | type: "Dropout"
245 | bottom: "fc6"
246 | top: "fc6"
247 | dropout_param {
248 | dropout_ratio: 0.5
249 | }
250 | }
251 | layer {
252 | name: "fc7"
253 | type: "Convolution"
254 | bottom: "fc6"
255 | top: "fc7"
256 | param { lr_mult: 1 decay_mult: 1 }
257 | param { lr_mult: 2 decay_mult: 0 }
258 | convolution_param {
259 | kernel_size: 1
260 | stride: 1
261 | num_output: 4096
262 | }
263 | }
264 | layer {
265 | name: "relu7"
266 | type: "ReLU"
267 | bottom: "fc7"
268 | top: "fc7"
269 | }
270 | layer {
271 | name: "drop7"
272 | type: "Dropout"
273 | bottom: "fc7"
274 | top: "fc7"
275 | dropout_param {
276 | dropout_ratio: 0.5
277 | }
278 | }
279 |
280 |
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/models/alexnet_deploy_lab_fc.prototxt:
--------------------------------------------------------------------------------
1 | name: "Colornet"
2 | layer {
3 | name: "img_lab"
4 | top: "img_lab" # Lab color space
5 | type: "Input"
6 | input_param { shape { dim: 1 dim: 3 dim: 227 dim: 227 } }
7 | }
8 | # **************************
9 | # ***** PROCESS COLORS *****
10 | # **************************
11 | # layer { # Convert to lab
12 | # name: "img_lab"
13 | # type: "ColorConv"
14 | # bottom: "data"
15 | # top: "img_lab"
16 | # propagate_down: false
17 | # color_conv_param {
18 | # input: 0 # BGR
19 | # output: 3 # Lab
20 | # }
21 | # }
22 | layer {
23 | name: "img_slice"
24 | type: "Slice"
25 | bottom: "img_lab"
26 | top: "img_l" # [0,100]
27 | top: "data_ab" # [-110,110]
28 | propagate_down: false
29 | slice_param {
30 | axis: 1
31 | slice_point: 1
32 | }
33 | }
34 | layer {
35 | name: "silence_ab"
36 | type: "Silence"
37 | bottom: "data_ab"
38 | }
39 | layer { # 0-center lightness channel
40 | name: "data_l"
41 | type: "Convolution"
42 | bottom: "img_l"
43 | top: "data_l" # scaled and centered lightness value
44 | propagate_down: false
45 | param {lr_mult: 0 decay_mult: 0}
46 | param {lr_mult: 0 decay_mult: 0}
47 | convolution_param {
48 | kernel_size: 1
49 | num_output: 1
50 | }
51 | }
52 | layer {
53 | name: "conv1"
54 | type: "Convolution"
55 | bottom: "data_l"
56 | top: "conv1"
57 | param { lr_mult: 1 decay_mult: 1 }
58 | param { lr_mult: 2 decay_mult: 0 }
59 | convolution_param {
60 | num_output: 96
61 | kernel_size: 11
62 | stride: 4
63 | weight_filler {
64 | type: "gaussian"
65 | std: 0.01
66 | }
67 | bias_filler {
68 | type: "constant"
69 | value: 0
70 | }
71 | }
72 | }
73 | layer {
74 | name: "relu1"
75 | type: "ReLU"
76 | bottom: "conv1"
77 | top: "conv1"
78 | }
79 | layer {
80 | name: "pool1"
81 | type: "Pooling"
82 | bottom: "conv1"
83 | top: "pool1"
84 | pooling_param {
85 | pool: MAX
86 | kernel_size: 3
87 | stride: 2
88 | }
89 | }
90 | layer {
91 | name: "conv2"
92 | type: "Convolution"
93 | bottom: "pool1"
94 | top: "conv2"
95 | param { lr_mult: 1 decay_mult: 1 }
96 | param { lr_mult: 2 decay_mult: 0 }
97 | convolution_param {
98 | num_output: 256
99 | pad: 2
100 | kernel_size: 5
101 | group: 2
102 | weight_filler {
103 | type: "gaussian"
104 | std: 0.01
105 | }
106 | bias_filler {
107 | type: "constant"
108 | value: 1
109 | }
110 | }
111 | }
112 | layer {
113 | name: "relu2"
114 | type: "ReLU"
115 | bottom: "conv2"
116 | top: "conv2"
117 | }
118 | layer {
119 | name: "pool2"
120 | type: "Pooling"
121 | bottom: "conv2"
122 | top: "pool2"
123 | pooling_param {
124 | pool: MAX
125 | kernel_size: 3
126 | stride: 2
127 | }
128 | }
129 | layer {
130 | name: "conv3"
131 | type: "Convolution"
132 | bottom: "pool2"
133 | top: "conv3"
134 | param { lr_mult: 1 decay_mult: 1 }
135 | param { lr_mult: 2 decay_mult: 0 }
136 | convolution_param {
137 | num_output: 384
138 | pad: 1
139 | kernel_size: 3
140 | weight_filler {
141 | type: "gaussian"
142 | std: 0.01
143 | }
144 | bias_filler {
145 | type: "constant"
146 | value: 0
147 | }
148 | }
149 | }
150 | layer {
151 | name: "relu3"
152 | type: "ReLU"
153 | bottom: "conv3"
154 | top: "conv3"
155 | }
156 | layer {
157 | name: "conv4"
158 | type: "Convolution"
159 | bottom: "conv3"
160 | top: "conv4"
161 | param { lr_mult: 1 decay_mult: 1 }
162 | param { lr_mult: 2 decay_mult: 0 }
163 | convolution_param {
164 | num_output: 384
165 | pad: 1
166 | kernel_size: 3
167 | group: 2
168 | weight_filler {
169 | type: "gaussian"
170 | std: 0.01
171 | }
172 | bias_filler {
173 | type: "constant"
174 | value: 1
175 | }
176 | }
177 | }
178 | layer {
179 | name: "relu4"
180 | type: "ReLU"
181 | bottom: "conv4"
182 | top: "conv4"
183 | }
184 | layer {
185 | name: "conv5"
186 | type: "Convolution"
187 | bottom: "conv4"
188 | top: "conv5"
189 | param { lr_mult: 1 decay_mult: 1 }
190 | param { lr_mult: 2 decay_mult: 0 }
191 | convolution_param {
192 | num_output: 256
193 | pad: 1
194 | kernel_size: 3
195 | group: 2
196 | weight_filler {
197 | type: "gaussian"
198 | std: 0.01
199 | }
200 | bias_filler {
201 | type: "constant"
202 | value: 1
203 | }
204 | }
205 | }
206 | layer {
207 | name: "relu5"
208 | type: "ReLU"
209 | bottom: "conv5"
210 | top: "conv5"
211 | }
212 | layer {
213 | name: "pool5"
214 | type: "Pooling"
215 | bottom: "conv5"
216 | top: "pool5"
217 | pooling_param {
218 | pool: MAX
219 | kernel_size: 3
220 | stride: 2
221 | }
222 | }
223 | layer {
224 | name: "fc6"
225 | type: "InnerProduct"
226 | bottom: "pool5"
227 | top: "fc6"
228 | param { lr_mult: 1 decay_mult: 1 }
229 | param { lr_mult: 2 decay_mult: 0 }
230 | inner_product_param {
231 | num_output: 4096
232 | }
233 | }
234 | layer {
235 | name: "relu6"
236 | type: "ReLU"
237 | bottom: "fc6"
238 | top: "fc6"
239 | }
240 | layer {
241 | name: "drop6"
242 | type: "Dropout"
243 | bottom: "fc6"
244 | top: "fc6"
245 | dropout_param {
246 | dropout_ratio: 0.5
247 | }
248 | }
249 | layer {
250 | name: "fc7"
251 | type: "InnerProduct"
252 | bottom: "fc6"
253 | top: "fc7"
254 | param { lr_mult: 1 decay_mult: 1 }
255 | param { lr_mult: 2 decay_mult: 0 }
256 | inner_product_param {
257 | num_output: 4096
258 | }
259 | }
260 | layer {
261 | name: "relu7"
262 | type: "ReLU"
263 | bottom: "fc7"
264 | top: "fc7"
265 | }
266 | layer {
267 | name: "drop7"
268 | type: "Dropout"
269 | bottom: "fc7"
270 | top: "fc7"
271 | dropout_param {
272 | dropout_ratio: 0.5
273 | }
274 | }
275 |
276 |
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/models/fetch_alexnet_model.sh:
--------------------------------------------------------------------------------
1 |
2 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/alexnet_release_450000.caffemodel -O ./models/alexnet_release_450000.caffemodel
3 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/alexnet_release_450000_nobn.caffemodel -O ./models/alexnet_release_450000_nobn.caffemodel
4 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/alexnet_release_450000_nobn_rs.caffemodel -O ./models/alexnet_release_450000_nobn_rs.caffemodel
5 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/alexnet_release_450000_nobn_fc_rs.caffemodel -O ./models/alexnet_release_450000_nobn_fc_rs.caffemodel
6 |
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/models/fetch_release_models.sh:
--------------------------------------------------------------------------------
1 |
2 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/colorization_release_v2.caffemodel -O ./models/colorization_release_v2.caffemodel
3 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/colorization_release_v2_norebal.caffemodel -O ./models/colorization_release_v2_norebal.caffemodel
4 | wget http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v1/colorization_release_v1.caffemodel -O ./models/colorization_release_v1.caffemodel
5 |
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/batch_norm_absorb.py:
--------------------------------------------------------------------------------
1 |
2 | # **************************************
3 | # ***** Richard Zhang / 2016.06.04 *****
4 | # **************************************
5 | # Absorb batch norm into convolution layers
6 | # This script only supports the conv-batchnorm configuration
7 | # Currently unsupported:
8 | # - deconv layers
9 | # - fc layers
10 | # - batchnorm before linear layer
11 |
12 | import caffe
13 | import os
14 | import string
15 | import numpy as np
16 | import argparse
17 | import matplotlib.pyplot as plt
18 |
19 | def parse_args():
20 | parser = argparse.ArgumentParser(description='BatchNorm absorption')
21 | parser.add_argument('--gpu', dest='gpu', help='gpu id', type=int, default=0)
22 | parser.add_argument('--prototxt_in',dest='prototxt_in',help='prototxt with batchnorm', type=str, default='')
23 | parser.add_argument('--prototxt_out',dest='prototxt_out',help='prototxt without batchnorm', type=str, default='')
24 | parser.add_argument('--caffemodel_in',dest='caffemodel_in',help='caffemodel with batchnorm', type=str, default='')
25 | parser.add_argument('--caffemodel_out',dest='caffemodel_out',help='caffemodel without batchnorm, to be saved', type=str, default='')
26 |
27 | args = parser.parse_args()
28 | return args
29 |
30 | if __name__ == '__main__':
31 | args = parse_args()
32 |
33 | gpu_id = args.gpu
34 | PROTOTXT1_PATH = args.prototxt_in
35 | PROTOTXT2_PATH = args.prototxt_out # no batch norm
36 | MODEL_PATH = args.caffemodel_in
37 | MODEL2_PATH = args.caffemodel_out # to be saved off
38 |
39 | caffe.set_mode_gpu()
40 | caffe.set_device(gpu_id)
41 |
42 | net1 = caffe.Net(PROTOTXT1_PATH, MODEL_PATH, caffe.TEST)
43 | net2 = caffe.Net(PROTOTXT2_PATH, MODEL_PATH, caffe.TEST)
44 |
45 | # call forward on net1, net2
46 | net1.forward()
47 | net2.forward()
48 |
49 | # identify batch norms and paired linear layers
50 | BN_INDS = np.where(np.array([layer.type for layer in net1.layers])=='BatchNorm')[0]
51 | BN_NAMES = np.zeros(BN_INDS.shape,dtype='S50') # batch norm layer names
52 | LIN_NAMES = np.zeros(BN_INDS.shape,dtype='S50') # linear layer names
53 | PRE_NAMES = np.zeros(BN_INDS.shape,dtype='S50') # blob right before
54 | POST_NAMES = np.zeros(BN_INDS.shape,dtype='S50') # blob right after
55 |
56 | PRE_POST = -1+np.zeros(BN_INDS.shape) # 0 - pre, 1 - post
57 | CONV_DECONV = -1+np.zeros(BN_INDS.shape) # 0 - conv, 1 - deconv
58 |
59 | # identify layers which are paired with batch norms (only supporting convolution)
60 | for (ll,bn_ind) in enumerate(BN_INDS):
61 | BN_NAMES[ll] = net1._layer_names[bn_ind]
62 | if(net1.layers[bn_ind-1].type=='Convolution' or net1.layers[bn_ind-1].type=='Deconvolution'):
63 | PRE_POST[ll] = 0
64 | LIN_NAMES[ll] = net1._layer_names[bn_ind-1]
65 | POST_NAMES[ll] = net1._layer_names[bn_ind+1]
66 | if(net1.layers[bn_ind-1].type=='Convolution'):
67 | CONV_DECONV[ll] = 0
68 | elif(net1.layers[bn_ind-1].type=='Deconvolution'):
69 | CONV_DECONV[ll] = 1
70 | elif(net1.layers[bn_ind+1].type=='Convolution' or net1.layers[bn_ind+1].type=='Deconvolution'):
71 | PRE_POST[ll] = 1
72 | LIN_NAMES[ll] = net1._layer_names[bn_ind+1]
73 | POST_NAMES[ll] = net1._layer_names[bn_ind+3]
74 | if(net1.layers[bn_ind+1].type=='Convolution'):
75 | CONV_DECONV[ll] = 0
76 | elif(net1.layers[bn_ind+1].type=='Deconvolution'):
77 | CONV_DECONV[ll] = 1
78 | else:
79 | PRE_POST[ll] = -1
80 | PRE_NAMES[ll] = net1.bottom_names[BN_NAMES[ll]][0]
81 |
82 | LIN_INDS = BN_INDS+PRE_POST # linear layer indices
83 | ALL_SLOPES = {}
84 |
85 | # compute batch norm parameters on net1 in first layer
86 | # absorb into weights in first layer
87 | for ll in range(BN_INDS.size):
88 | bn_ind = BN_INDS[ll]
89 | BN_NAME = BN_NAMES[ll]
90 | PRE_NAME = PRE_NAMES[ll]
91 | POST_NAME = POST_NAMES[ll]
92 | LIN_NAME = LIN_NAMES[ll]
93 |
94 | print 'LAYERS %s, %s'%(PRE_NAME,BN_NAME)
95 | # print net1.blobs[BN_NAME].data.shape
96 | # print net1.blobs[PRE_NAME].data.shape
97 |
98 | C = net1.blobs[BN_NAME].data.shape[1]
99 | in_blob = net1.blobs[PRE_NAME].data
100 | bn_blob = net1.blobs[BN_NAME].data
101 |
102 | scale_factor = 1./net1.params[BN_NAME][2].data[...]
103 | mean = scale_factor * net1.params[BN_NAME][0].data[...]
104 | scale = scale_factor * net1.params[BN_NAME][1].data[...]
105 |
106 | slopes = np.sqrt(1./scale)
107 | offs = -mean*slopes
108 |
109 | print ' Computing error on data...'
110 | bn_blob_rep = in_blob*slopes[np.newaxis,:,np.newaxis,np.newaxis]+offs[np.newaxis,:,np.newaxis,np.newaxis]
111 |
112 | # Visually verify that factors are correct
113 | print ' Maximum error: %.3e'%np.max(np.abs(bn_blob_rep[bn_blob>0] - bn_blob[bn_blob>0]))
114 | print ' RMS error: %.3e'%np.linalg.norm(bn_blob_rep[bn_blob>0] - bn_blob[bn_blob>0])
115 | print ' RMS signal: %.3e'%np.linalg.norm(bn_blob_rep[bn_blob>0])
116 |
117 | print ' Absorbing slope and offset...'
118 | # absorb slope and offset into appropriate parameter
119 | if(PRE_POST[ll]==0): # linear layer is before
120 | if(CONV_DECONV[ll]==0): # convolution
121 | net2.params[LIN_NAME][0].data[...] = net1.params[LIN_NAME][0].data[...]*slopes[:,np.newaxis,np.newaxis,np.newaxis]
122 | net2.params[LIN_NAME][1].data[...] = offs + (slopes*net1.params[LIN_NAME][1].data)
123 | elif(CONV_DECONV[ll]==1): # deconvolution
124 | print '*** Deconvolution not implemented ***'
125 | elif(PRE_POST[ll]==1): # batchnorm is BEFORE linear layer
126 | print '*** Not implemented ***'
127 |
128 | net2.save(MODEL2_PATH)
129 |
130 | for arg in vars(args):
131 | print('[%s] =' % arg, getattr(args, arg))
132 | print 'Saving model into: %s'%MODEL2_PATH
133 |
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/caffe_traininglayers.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Deep-Learning-Colorization/resources/caffe_traininglayers.pyc
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/conv_into_fc.py:
--------------------------------------------------------------------------------
1 |
2 | import caffe
3 | import os
4 | import string
5 | import numpy as np
6 | import argparse
7 | import matplotlib.pyplot as plt
8 |
9 | def parse_args():
10 | parser = argparse.ArgumentParser(description='Convert conv layers into FC layers')
11 | parser.add_argument('--gpu', dest='gpu', help='gpu id', type=int, default=0)
12 | parser.add_argument('--prototxt_in',dest='prototxt_in',help='prototxt with conv layers', type=str, default='')
13 | parser.add_argument('--prototxt_out',dest='prototxt_out',help='prototxt with fc layers', type=str, default='')
14 | parser.add_argument('--caffemodel_in',dest='caffemodel_in',help='caffemodel with conv layers', type=str, default='')
15 | parser.add_argument('--caffemodel_out',dest='caffemodel_out',help='caffemodel with fc layers, to be saved', type=str, default='')
16 | parser.add_argument('--dummymodel',dest='dummymodel',help='blank caffemodel',type=str,default='./models/dummy.caffemodel')
17 |
18 | args = parser.parse_args()
19 | return args
20 |
21 | if __name__ == '__main__':
22 | args = parse_args()
23 |
24 | gpu_id = args.gpu
25 | PROTOTXT1_PATH = args.prototxt_in
26 | PROTOTXT2_PATH = args.prototxt_out # no batch norm
27 | MODEL_PATH = args.caffemodel_in
28 | DUMMYMODEL_PATH = args.dummymodel
29 | MODEL2_PATH = args.caffemodel_out # to be saved off
30 |
31 | caffe.set_mode_gpu()
32 | caffe.set_device(gpu_id)
33 |
34 | net1 = caffe.Net(PROTOTXT1_PATH, MODEL_PATH, caffe.TEST)
35 | net2 = caffe.Net(PROTOTXT2_PATH, DUMMYMODEL_PATH, caffe.TEST)
36 |
37 | import rz_fcns as rz
38 | rz.caffe_param_shapes(net1,to_print=True)
39 | rz.caffe_param_shapes(net2,to_print=True)
40 | rz.caffe_shapes(net2,to_print=True)
41 |
42 | # CONV_INDS = np.where(np.array([layer.type for layer in net1.layers])=='Convolution')[0]
43 | print net1.params.keys()
44 | print net2.params.keys()
45 |
46 | for (ll,layer) in enumerate(net2.params.keys()):
47 | P = len(net2.params[layer]) # number of blobs
48 | if(P>0):
49 | for pp in range(P):
50 | ndim1 = net1.params[layer][pp].data.ndim
51 | ndim2 = net2.params[layer][pp].data.ndim
52 |
53 | print('Copying layer %s, param blob %i (%i-dim => %i-dim)'%(layer,pp,ndim1,ndim2))
54 | if(ndim1==ndim2):
55 | print(' Same dimensionality...')
56 | net2.params[layer][pp].data[...] = net1.params[layer][pp].data[...]
57 | else:
58 | print(' Different dimensionality...')
59 | net2.params[layer][pp].data[...] = net1.params[layer][pp].data[...].reshape(net2.params[layer][pp].data[...].shape)
60 |
61 | net2.save(MODEL2_PATH)
62 |
63 | for arg in vars(args):
64 | print('[%s] =' % arg, getattr(args, arg))
65 | print 'Saving model into: %s'%MODEL2_PATH
66 |
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/magic_init/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2016, Philipp Krähenbühl
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | 1. Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 | this list of conditions and the following disclaimer in the documentation
11 | and/or other materials provided with the distribution.
12 |
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 |
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies,
26 | either expressed or implied, of the FreeBSD Project.
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/magic_init/README.md:
--------------------------------------------------------------------------------
1 | # Data-dependent initialization of convolutional neural networks
2 |
3 | Created by Philipp Krähenbühl.
4 |
5 | ### Introduction
6 |
7 | This code implements the initialization presented in our [arXiv tech report](http://arxiv.org/abs/1511.06856), which is under submission at ICLR 2016.
8 |
9 | *This is a reimplementation and currently work in progress. Use at your own risk.*
10 |
11 | ### License
12 |
13 | This code is released under the BSD License (refer to the LICENSE file for details).
14 |
15 | ### Citing
16 |
17 | If you find our initialization useful in your research, please consider citing:
18 |
19 | @article{krahenbuhl2015data,
20 | title={Data-dependent Initializations of Convolutional Neural Networks},
21 | author={Kr{\"a}henb{\"u}hl, Philipp and Doersch, Carl and Donahue, Jeff and Darrell, Trevor},
22 | journal={arXiv preprint arXiv:1511.06856},
23 | year={2015}
24 | }
25 |
26 | ### Setup
27 |
28 | Checkout the project and create a symlink to caffe in the `magic_init` directory:
29 | ```Shell
30 | ln -s path/to/caffe/python/caffe caffe
31 | ```
32 |
33 | ### Examples
34 |
35 | Here is a quick example on how to initialize alexnet:
36 | ```bash
37 | python magic_init.py path/to/alexnet/deploy.prototxt path/to/output.caffemodel -d "path/to/some/images/*.png" -q -nit 10 -cs
38 | ```
39 | Here ```-d``` flag allows you to initialize the network using your own images. Feel free to use imagenet, Pascal, COCO or whatever you have at hand, it shouldn't make a big difference. The ```-q``` (queit) flag suppresses all the caffe logging, ```-nit``` controls the number of batches used (while ```-bs``` controls the batch size). Finally ```-cs``` rescales the gradients accross layers. This rescaling currently works best for feed-forward networks, and might not work too well for DAG structured networks (we are working on that).
40 |
41 | To run the k-means initialization use:
42 | ```bash
43 | python magic_init.py path/to/alexnet/deploy.prototxt path/to/output.caffemodel -d "path/to/some/images/*.png" -q -nit 10 -cs -t kmeans
44 | ```
45 |
46 | Finally, ```python magic_init.py -h``` should provide you with more help.
47 |
48 |
49 | ### Pro tips
50 | If you're numpy implementation is based on openblas, try disabeling threading ```export OPENBLAS_NUM_THREADS=1```, it can improve the runtime performance a bit.
51 |
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/magic_init/load.py:
--------------------------------------------------------------------------------
1 | import caffe
2 |
3 | def parseProtoString(s):
4 | from google.protobuf import text_format
5 | from caffe.proto import caffe_pb2 as pb
6 | proto_net = pb.NetParameter()
7 | text_format.Merge(s, proto_net)
8 | return proto_net
9 |
10 |
11 | def get_param(l, exclude=set(['top', 'bottom', 'name', 'type'])):
12 | if not hasattr(l,'ListFields'):
13 | if hasattr(l,'__delitem__'):
14 | return list(l)
15 | return l
16 | r = dict()
17 | for f, v in l.ListFields():
18 | if f.name not in exclude:
19 | r[f.name] = get_param(v, [])
20 | return r
21 |
22 | class ProtoDesc:
23 | def __init__(self, prototxt):
24 | from os import path
25 | self.prototxt = prototxt
26 | self.parsed_proto = parseProtoString(open(self.prototxt, 'r').read())
27 | # Guess the input dimension
28 | self.input_dim = (3, 227, 227)
29 | net = self.parsed_proto
30 | if len(net.input_dim) > 0:
31 | self.input_dim = net.input_dim[1:]
32 | else:
33 | lrs = net.layer
34 | cs = [l.transform_param.crop_size for l in lrs
35 | if l.HasField('transform_param')]
36 | if len(cs):
37 | self.input_dim = (3, cs[0], cs[0])
38 |
39 | def __call__(self, clip=None, **inputs):
40 | from caffe import layers as L
41 | from collections import OrderedDict
42 | net = self.parsed_proto
43 | blobs = OrderedDict(inputs)
44 | for l in net.layer:
45 | if l.name not in inputs:
46 | in_place = l.top == l.bottom
47 | param = get_param(l)
48 | assert all([b in blobs for b in l.bottom]), "Some bottoms not founds: " + ', '.join([b for b in l.bottom if not b in blobs])
49 | tops = getattr(L, l.type)(*[blobs[b] for b in l.bottom],
50 | ntop=len(l.top), in_place=in_place,
51 | name=l.name,
52 | **param)
53 | if len(l.top) <= 1:
54 | tops = [tops]
55 | for i, t in enumerate(l.top):
56 | blobs[t] = tops[i]
57 | if l.name == clip:
58 | break
59 | return list(blobs.values())[-1]
60 |
61 |
62 |
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/magic_init/load.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Deep-Learning-Colorization/resources/magic_init/load.pyc
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/magic_init/measure_stat.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | from magic_init import *
3 |
4 | class BCOLORS:
5 | HEADER = '\033[95m'
6 | OKBLUE = '\033[94m'
7 | OKGREEN = '\033[92m'
8 | WARNING = '\033[93m'
9 | FAIL = '\033[91m'
10 | ENDC = '\033[0m'
11 | BOLD = '\033[1m'
12 | UNDERLINE = '\033[4m'
13 |
14 | class NOCOLORS:
15 | HEADER = ''
16 | OKBLUE = ''
17 | OKGREEN = ''
18 | WARNING = ''
19 | FAIL = ''
20 | ENDC = ''
21 | BOLD = ''
22 | UNDERLINE = ''
23 |
24 | def coloredNumbers(v, color=None, fmt='%6.2f', max_display=300, bcolors=BCOLORS):
25 | import numpy as np
26 | # Display a numpy array and highlight the min and max values [required a nice linux
27 | # terminal supporting colors]
28 | r = ""
29 | mn, mx = np.min(v), np.max(v)
30 | for k,i in enumerate(v):
31 | if len(v) > max_display and k > max_display/2 and k < len(v) - max_display/2:
32 | if r[-1] != '.':
33 | r += '...'
34 | continue
35 | if i <= mn + 1e-3:
36 | r += bcolors.BOLD+bcolors.FAIL
37 | elif i + 1e-3 >= mx:
38 | r += bcolors.BOLD+bcolors.FAIL
39 | elif color is not None:
40 | r += color
41 | r += (fmt+' ')%i
42 | r += bcolors.ENDC
43 | r += bcolors.ENDC
44 | return r
45 |
46 | def computeGradientRatio(net, NIT=1):
47 | import numpy as np
48 | last_layer = 0
49 | for i, (n, l) in enumerate(zip(net._layer_names, net.layers)):
50 | if l.type not in STRIP_LAYER:
51 | last_layer = i
52 | last_tops = net.top_names[net._layer_names[last_layer]]
53 |
54 | var = {}
55 | for it in range(NIT):
56 | net._forward(0, last_layer)
57 | # Reset the diffs
58 | for l in net.layers:
59 | for b in l.blobs:
60 | b.diff[...] = 0
61 | # Set the top diffs
62 | for t in last_tops:
63 | net.blobs[t].diff[...] = np.random.normal(0, 1, net.blobs[t].shape)
64 | net._backward(last_layer, 0)
65 | for i, (n, l) in enumerate(zip(net._layer_names, net.layers)):
66 | if len(l.blobs) > 0:
67 | assert l.type in PARAMETER_LAYERS, "Parameter layer '%s' currently not supported"%l.type
68 | b = l.blobs[0]
69 | r = np.mean(b.diff.swapaxes(0,1).reshape((b.diff.shape[1],-1))**2, axis=1) / np.mean(b.data**2)
70 | if n in var: var[n] += r / NIT
71 | else: var[n] = r / NIT
72 | std = {n: np.sqrt(var[n]) for n in var}
73 | return {n: np.std(s) / np.mean(s) for n,s in std.items()}, {n: np.mean(s) for n,s in std.items()}
74 |
75 | def printMeanStddev(net, NIT=10, show_all=False, show_color=True, quiet=False):
76 | import numpy as np
77 | bcolors = NOCOLORS
78 | if show_color: bcolors = BCOLORS
79 |
80 | layer_names = list(net._layer_names)
81 | if not show_all:
82 | layer_names = [n for n, l in zip(net._layer_names, net.layers) if len(l.blobs)>0]
83 | if 'data' in net._layer_names:
84 | layer_names.append('data')
85 |
86 | # When was a blob last used
87 | last_used = {}
88 | # Make sure all layers are supported, and compute the range each blob is used in
89 | for i, (n, l) in enumerate(zip(net._layer_names, net.layers)):
90 | for b in net.bottom_names[n]:
91 | last_used[b] = i
92 |
93 | active_data, cvar = {}, {}
94 | for i, (n, l) in enumerate(zip(net._layer_names, net.layers)):
95 | # Run the network forward
96 | new_data = forward(net, i, NIT, {b: active_data[b] for b in net.bottom_names[n]}, net.top_names[n])
97 | active_data.update(new_data)
98 |
99 | if len(net.top_names[n]) > 0 and n in layer_names:
100 | m = net.top_names[n][0]
101 | D = flattenData(new_data[m])
102 | mean = np.mean(D, axis=0)
103 | stddev = np.std(D, axis=0)
104 | if not quiet:
105 | print( bcolors.BOLD, ' '*5, n, ':', m, ' '*5, bcolors.ENDC )
106 | print( 'mean ', coloredNumbers(mean, bcolors.OKGREEN, bcolors=bcolors) )
107 | print( 'stddev', coloredNumbers(stddev, bcolors.OKBLUE, bcolors=bcolors) )
108 | print( 'coef of variation ', bcolors.OKGREEN, stddev.std() / stddev.mean(), bcolors.ENDC )
109 | print()
110 | cvar[n] = stddev.std() / stddev.mean()
111 | # Delete all unused data
112 | for k in list(active_data):
113 | if k not in last_used or last_used[k] == i:
114 | del active_data[k]
115 | return cvar
116 |
117 | def main():
118 | from argparse import ArgumentParser
119 | from os import path
120 |
121 | parser = ArgumentParser()
122 | parser.add_argument('prototxt')
123 | parser.add_argument('-l', '--load', help='Load a caffemodel')
124 | parser.add_argument('-d', '--data', default=None, help='Image list to use [default prototxt data]')
125 | #parser.add_argument('-q', action='store_true', help='Quiet execution')
126 | parser.add_argument('-sm', action='store_true', help='Summary only')
127 | parser.add_argument('-q', action='store_true', help='Quiet execution')
128 | parser.add_argument('-a', '--all', action='store_true', help='Show the statistic for all layers')
129 | parser.add_argument('-nc', action='store_true', help='Do not use color')
130 | parser.add_argument('-s', type=float, default=1.0, help='Scale the input [only custom data "-d"]')
131 | parser.add_argument('-bs', type=int, default=16, help='Batch size [only custom data "-d"]')
132 | parser.add_argument('-nit', type=int, default=10, help='Number of iterations')
133 | parser.add_argument('--gpu', type=int, default=0, help='What gpu to run it on?')
134 | args = parser.parse_args()
135 |
136 | if args.q:
137 | from os import environ
138 | environ['GLOG_minloglevel'] = '2'
139 | import caffe, load
140 | from caffe import NetSpec, layers as L
141 |
142 | caffe.set_mode_gpu()
143 | if args.gpu is not None:
144 | caffe.set_device(args.gpu)
145 |
146 | if args.data is not None:
147 | model = load.ProtoDesc(args.prototxt)
148 | net = NetSpec()
149 | fl = getFileList(args.data)
150 | if len(fl) == 0:
151 | print("Unknown data type for '%s'"%args.data)
152 | exit(1)
153 | from tempfile import NamedTemporaryFile
154 | f = NamedTemporaryFile('w')
155 | f.write('\n'.join([path.abspath(i)+' 0' for i in fl]))
156 | f.flush()
157 | net.data, net.label = L.ImageData(source=f.name, batch_size=args.bs, new_width=model.input_dim[-1], new_height=model.input_dim[-1], transform_param=dict(mean_value=[104,117,123], scale=args.s),ntop=2)
158 | net.out = model(data=net.data, label=net.label)
159 | n = netFromString('force_backward:true\n'+str(net.to_proto()), caffe.TRAIN )
160 | else:
161 | n = caffe.Net(args.prototxt, caffe.TRAIN)
162 |
163 | if args.load is not None:
164 | n.copy_from(args.load)
165 |
166 | cvar = printMeanStddev(n, NIT=args.nit, show_all=args.all, show_color=not args.nc, quiet=args.sm)
167 | cv, gr = computeGradientRatio(n, NIT=args.nit)
168 | print()
169 | print(' Summary ')
170 | print('-----------')
171 | print()
172 | print('layer name out cvar rate cvar rate mean')
173 | for l in n._layer_names:
174 | if l in cvar and l in cv and l in gr:
175 | print('%-30s %10.2f %10.2f %10.2f'%(l, cvar[l], cv[l], gr[l]) )
176 |
177 | if __name__ == "__main__":
178 | main()
179 |
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/prior_probs.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Deep-Learning-Colorization/resources/prior_probs.npy
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/pts_in_hull.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/Deep-Learning-Colorization/resources/pts_in_hull.npy
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/softmax_cross_entropy_loss_layer.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | // #include "caffe/layer.hpp"
6 | // #include "caffe/util/math_functions.hpp"
7 | // #include "caffe/vision_layer.hpp"
8 | #include "caffe/layers/softmax_cross_entropy_loss_layer.hpp"
9 | #include "caffe/util/math_functions.hpp"
10 |
11 | namespace caffe {
12 |
13 | template
14 | void SoftmaxCrossEntropyLossLayer::LayerSetUp(
15 | const vector*>& bottom, const vector*>& top) {
16 | LossLayer::LayerSetUp(bottom, top);
17 | softmax_bottom_vec_.clear();
18 | softmax_bottom_vec_.push_back(bottom[0]);
19 | softmax_top_vec_.clear();
20 | softmax_top_vec_.push_back(softmax_output_.get());
21 | softmax_layer_->SetUp(softmax_bottom_vec_, softmax_top_vec_);
22 | }
23 |
24 | template
25 | void SoftmaxCrossEntropyLossLayer::Reshape(
26 | const vector*>& bottom, const vector*>& top) {
27 | LossLayer::Reshape(bottom, top);
28 | CHECK_EQ(bottom[0]->count(), bottom[1]->count()) <<
29 | "SOFTMAX_CROSS_ENTROPY_LOSS layer inputs must have the same count.";
30 | softmax_layer_->Reshape(softmax_bottom_vec_, softmax_top_vec_);
31 | }
32 |
33 | template
34 | void SoftmaxCrossEntropyLossLayer::Forward_cpu(
35 | const vector*>& bottom, const vector*>& top) {
36 | // The forward pass computes the softmax outputs.
37 | softmax_bottom_vec_[0] = bottom[0];
38 | softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);
39 | // Compute the loss (negative log likelihood)
40 | const int count = bottom[0]->count();
41 | const int num = bottom[0]->num();
42 | // Stable version of loss computation from input data
43 | // const Dtype* input_data = bottom[0]->cpu_data();
44 | const Dtype* target = bottom[1]->cpu_data();
45 | Dtype loss = 0;
46 | const Dtype* softmax_output_data = softmax_top_vec_[0]->cpu_data();
47 |
48 | // First compute max of input data
49 | for (int i = 0; i < count; ++i) {
50 | //loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -
51 | // log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
52 | if (target[i] > 0 ) {
53 | loss -= target[i] * (log(softmax_output_data[i]) - log(target[i]));
54 | }
55 | }
56 |
57 | top[0]->mutable_cpu_data()[0] = loss / num;
58 | }
59 |
60 | template
61 | void SoftmaxCrossEntropyLossLayer::Backward_cpu(
62 | const vector*>& top, const vector& propagate_down,
63 | const vector*>& bottom) {
64 | if (propagate_down[1]) {
65 | LOG(FATAL) << this->type()
66 | << " Layer cannot backpropagate to label inputs.";
67 | }
68 | if (propagate_down[0]) {
69 | // First, compute the diff
70 | const int count = bottom[0]->count();
71 | const int num = bottom[0]->num();
72 | const Dtype* softmax_output_data = softmax_output_->cpu_data();
73 | const Dtype* target = bottom[1]->cpu_data();
74 | Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
75 | // Gradient is: target[i] - softmax_output_data[i]
76 | caffe_sub(count, softmax_output_data, target, bottom_diff);
77 | // Scale down gradient
78 | const Dtype loss_weight = top[0]->cpu_diff()[0];
79 | caffe_scal(count, loss_weight / num, bottom_diff);
80 | }
81 | }
82 |
83 | #ifdef CPU_ONLY
84 | STUB_GPU(SoftmaxCrossEntropyLossLayer);
85 | #endif
86 |
87 | INSTANTIATE_CLASS(SoftmaxCrossEntropyLossLayer);
88 | REGISTER_LAYER_CLASS(SoftmaxCrossEntropyLoss);
89 |
90 | } // namespace caffe
91 |
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/softmax_cross_entropy_loss_layer.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | // #include "caffe/layer.hpp"
6 | // #include "caffe/util/math_functions.hpp"
7 | // #include "caffe/vision_layer.hpp"
8 | #include "caffe/layers/softmax_cross_entropy_loss_layer.hpp"
9 | #include "caffe/util/math_functions.hpp"
10 |
11 | namespace caffe {
12 |
13 | template
14 | void SoftmaxCrossEntropyLossLayer::Forward_gpu(
15 | const vector*>& bottom, const vector*>& top) {
16 | // The forward pass computes the softmax outputs.
17 | softmax_bottom_vec_[0] = bottom[0];
18 | softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);
19 | // Compute the loss (negative log likelihood)
20 | const int count = bottom[0]->count();
21 | const int num = bottom[0]->num();
22 | // Stable version of loss computation from input data
23 | const Dtype* input_data = bottom[0]->cpu_data();
24 | const Dtype* target = bottom[1]->cpu_data();
25 | // Output of softmax forward pass
26 | const Dtype* softmax_output = softmax_top_vec_[0]->cpu_data();
27 | Dtype loss = 0;
28 | for (int i = 0; i < count; ++i) {
29 | //loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -
30 | // log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
31 | if ( target[i] > 0 ) {
32 | // loss -= target[i] * ( log(softmax_output[i]) - log(target[i]) );
33 | // if(softmax_output[i] > 0) {
34 | loss -= target[i] * ( log(softmax_output[i]+1.0e-35) - log(target[i]) );
35 | // }
36 | // else {
37 | // // LOG(INFO) << "Prediction was 0";
38 | // loss -= target[i] * ( log(1.0e-35) - log(target[i]) ); // adding epsilon
39 | // }
40 | }
41 | }
42 | top[0]->mutable_cpu_data()[0] = loss / num;
43 | }
44 |
45 | template
46 | void SoftmaxCrossEntropyLossLayer::Backward_gpu(
47 | const vector*>& top, const vector& propagate_down,
48 | const vector*>& bottom) {
49 | if (propagate_down[1]) {
50 | LOG(FATAL) << this->type()
51 | << " Layer cannot backpropagate to label inputs.";
52 | }
53 | if (propagate_down[0]) {
54 | // First, compute the diff
55 | const int count = bottom[0]->count();
56 | const int num = bottom[0]->num();
57 | const Dtype* softmax_output_data = softmax_output_->gpu_data();
58 | const Dtype* target = bottom[1]->gpu_data();
59 | Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
60 | caffe_copy(count, softmax_output_data, bottom_diff);
61 | caffe_gpu_axpy(count, Dtype(-1), target, bottom_diff);
62 | // Scale down gradient
63 | const Dtype loss_weight = top[0]->cpu_diff()[0];
64 | caffe_gpu_scal(count, loss_weight / num, bottom_diff);
65 | }
66 | }
67 |
68 | INSTANTIATE_LAYER_GPU_FUNCS(SoftmaxCrossEntropyLossLayer);
69 |
70 |
71 | } // namespace caffe
72 |
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/resources/softmax_cross_entropy_loss_layer.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CAFFE_SOFTMAX_CROSS_ENTROPY_LOSS_LAYER_HPP_
2 | #define CAFFE_SOFTMAX_CROSS_ENTROPY_LOSS_LAYER_HPP_
3 |
4 | #include
5 |
6 | #include "caffe/blob.hpp"
7 | #include "caffe/layer.hpp"
8 | #include "caffe/proto/caffe.pb.h"
9 |
10 | #include "caffe/layers/loss_layer.hpp"
11 | // #include "caffe/layers/sigmoid_layer.hpp"
12 | #include "caffe/layers/softmax_layer.hpp"
13 |
14 | namespace caffe {
15 |
16 | // Forward declare SoftmaxLayer for use in SoftmaxWithLossLayer.
17 | template class SoftmaxLayer;
18 |
19 | template
20 | class SoftmaxCrossEntropyLossLayer : public LossLayer {
21 | public:
22 | explicit SoftmaxCrossEntropyLossLayer(const LayerParameter& param)
23 | : LossLayer(param),
24 | softmax_layer_(new SoftmaxLayer(param)),
25 | softmax_output_(new Blob()) {}
26 | virtual void LayerSetUp(const vector*>& bottom,
27 | const vector*>& top);
28 | virtual void Reshape(const vector*>& bottom,
29 | const vector*>& top);
30 |
31 | virtual inline const char* type() const { return "SoftmaxCrossEntropyLoss"; }
32 |
33 | protected:
34 | /// @copydoc SigmoidCrossEntropyLossLayer
35 | virtual void Forward_cpu(const vector*>& bottom,
36 | const vector*>& top);
37 | virtual void Forward_gpu(const vector*>& bottom,
38 | const vector*>& top);
39 |
40 | /**
41 | * @brief Computes the softmax cross-entropy loss error gradient w.r.t. the
42 | * predictions.
43 | *
44 | * Gradients cannot be computed with respect to the target inputs (bottom[1]),
45 | * so this method ignores bottom[1] and requires !propagate_down[1], crashing
46 | * if propagate_down[1] is set.
47 | *
48 | * @param top output Blob vector (length 1), providing the error gradient with
49 | * respect to the outputs
50 | * -# @f$ (1 \times 1 \times 1 \times 1) @f$
51 | * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$,
52 | * as @f$ \lambda @f$ is the coefficient of this layer's output
53 | * @f$\ell_i@f$ in the overall Net loss
54 | * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence
55 | * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$.
56 | * (*Assuming that this top Blob is not used as a bottom (input) by any
57 | * other layer of the Net.)
58 | * @param propagate_down see Layer::Backward.
59 | * propagate_down[1] must be false as gradient computation with respect
60 | * to the targets is not implemented.
61 | * @param bottom input Blob vector (length 2)
62 | * -# @f$ (N \times C \times H \times W) @f$
63 | * the predictions @f$x@f$; Backward computes diff
64 | * @f$ \frac{\partial E}{\partial x} =
65 | * \frac{1}{n} \sum\limits_{n=1}^N (\hat{p}_n - p_n)
66 | * @f$
67 | * -# @f$ (N \times 1 \times 1 \times 1) @f$
68 | * the labels -- ignored as we can't compute their error gradients
69 | */
70 | virtual void Backward_cpu(const vector*>& top,
71 | const vector& propagate_down, const vector*>& bottom);
72 | virtual void Backward_gpu(const vector*>& top,
73 | const vector& propagate_down, const vector*>& bottom);
74 |
75 | /// The internal SoftmaxLayer used to map predictions to probabilities.
76 | shared_ptr > softmax_layer_;
77 | /// sigmoid_output stores the output of the SigmoidLayer.
78 | shared_ptr > softmax_output_;
79 | /// bottom vector holder to call the underlying SigmoidLayer::Forward
80 | vector*> softmax_bottom_vec_;
81 | /// top vector holder to call the underlying SigmoidLayer::Forward
82 | vector*> softmax_top_vec_;
83 | };
84 |
85 | } // namespace caffe
86 |
87 | #endif // CAFFE_SOFTMAX_CROSS_ENTROPY_LOSS_LAYER_HPP_
88 |
--------------------------------------------------------------------------------
/Deep-Learning-Colorization/video_colorize_parallel.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import argparse
4 |
5 | import cv2
6 | import numpy as np
7 | from skimage import img_as_float
8 | import skimage.color as color
9 | import scipy.ndimage.interpolation as sni
10 | import caffe
11 |
12 | def parse_args():
13 | parser = argparse.ArgumentParser()
14 | parser.add_argument('--filename', type=str, default='*', help='Filename of input BW video')
15 | parser.add_argument('--input_dir', type=str, default='/home/ubuntu/Automatic-Video-Colorization/data/examples/converted/', help='Directory of input files')
16 | parser.add_argument('--output_dir', type=str, default='/home/ubuntu/Automatic-Video-Colorization/data/examples/recolorized/', help='Directory of output files')
17 | parser.add_argument('--gpu', dest='gpu', help='gpu id', type=int, default=0)
18 | parser.add_argument('--prototxt',dest='prototxt',help='prototxt filepath', type=str, default='./models/colorization_deploy_v2.prototxt')
19 | parser.add_argument('--caffemodel',dest='caffemodel',help='caffemodel filepath', type=str, default='./models/colorization_release_v2.caffemodel')
20 |
21 | args = parser.parse_args()
22 | return args
23 |
24 | def image_colorization(frame, args):
25 |
26 | caffe.set_mode_gpu()
27 | caffe.set_device(args.gpu)
28 |
29 | # Select desired model
30 | net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
31 |
32 | (H_in,W_in) = net.blobs['data_l'].data.shape[2:] # get input shape
33 | (H_out,W_out) = net.blobs['class8_ab'].data.shape[2:] # get output shape
34 |
35 | pts_in_hull = np.load('./resources/pts_in_hull.npy') # load cluster centers
36 | net.params['class8_ab'][0].data[:,:,0,0] = pts_in_hull.transpose((1,0)) # populate cluster centers as 1x1 convolution kernel
37 | # print 'Annealed-Mean Parameters populated'
38 |
39 | # load the original image
40 | img_rgb = img_as_float(frame).astype(np.float32)
41 |
42 | img_lab = color.rgb2lab(img_rgb) # convert image to lab color space
43 | img_l = img_lab[:,:,0] # pull out L channel
44 | (H_orig,W_orig) = img_rgb.shape[:2] # original image size
45 |
46 | # create grayscale version of image (just for displaying)
47 | img_lab_bw = img_lab.copy()
48 | img_lab_bw[:,:,1:] = 0
49 | img_rgb_bw = color.lab2rgb(img_lab_bw)
50 |
51 | # resize image to network input size
52 | img_rs = caffe.io.resize_image(img_rgb,(H_in,W_in)) # resize image to network input size
53 | img_lab_rs = color.rgb2lab(img_rs)
54 | img_l_rs = img_lab_rs[:,:,0]
55 |
56 | net.blobs['data_l'].data[0,0,:,:] = img_l_rs-50 # subtract 50 for mean-centering
57 | net.forward() # run network
58 |
59 | ab_dec = net.blobs['class8_ab'].data[0,:,:,:].transpose((1,2,0)) # this is our result
60 | ab_dec_us = sni.zoom(ab_dec,(1.*H_orig/H_out,1.*W_orig/W_out,1)) # upsample to match size of original image L
61 | img_lab_out = np.concatenate((img_l[:,:,np.newaxis],ab_dec_us),axis=2) # concatenate with original image L
62 | img_rgb_out = (255*np.clip(color.lab2rgb(img_lab_out),0,1)).astype('uint8') # convert back to rgb
63 |
64 | return img_rgb_out
65 |
66 | def bw2color(args, inputname, inputpath, outputpath):
67 | if inputname.endswith(".mp4"):
68 |
69 | # store informations about the original video
70 | cap = cv2.VideoCapture(inputpath + inputname)
71 | # original dimensions
72 | width, height = int(cap.get(3)), int(cap.get(4))
73 |
74 | fourcc = cv2.VideoWriter_fourcc(*'mp4v');
75 |
76 | # parameters of output file
77 | # dimensions of the output image
78 | new_width, new_height = width, height
79 | # number of frames
80 | fps = 30.0
81 |
82 | # recolorized output video
83 | color_out = cv2.VideoWriter(
84 | outputpath + 'color_' + inputname,
85 | fourcc,
86 | fps,
87 | (new_width, new_height),
88 | isColor=True
89 | )
90 |
91 | while(cap.isOpened()):
92 | ret, frame_in = cap.read()
93 | # check if we are not at the end of the video
94 | if ret==True:
95 | # convert BGR to RGB convention
96 | frame_in = frame_in[:,:,::-1]
97 | # colorize the BW frame
98 | frame_out = image_colorization(frame_in, args)
99 | # convert RGB to BGR convention
100 | frame_out = frame_out[:,:,::-1]
101 | # write the color frame
102 | color_out.write(frame_out)
103 |
104 | if cv2.waitKey(1) & 0xFF == ord('q'):
105 | break
106 | # end of the video
107 | else:
108 | break
109 |
110 | # release everything if job is finished
111 | cap.release()
112 | color_out.release()
113 |
114 | def main():
115 | args = parse_args()
116 |
117 | if args.filename == '*':
118 | for filename in os.listdir(args.input_dir):
119 | bw2color(args, inputname = filename, inputpath = args.input_dir, outputpath = args.output_dir)
120 | else:
121 | bw2color(args, inputname = args.filename, inputpath = args.input_dir, outputpath = args.output_dir)
122 |
123 | # cleanup
124 | cv2.destroyAllWindows()
125 |
126 | return 0
127 |
128 | if __name__ == '__main__':
129 | main()
130 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CS230-Final-Project
2 |
3 | ### Converting videos
4 |
5 | 1. Create the data directories
6 | ```
7 | mkdir data; mkdir data/raw; mkdir data/converted;
8 | ```
9 | 2. Place videos inside 'data/raw' directory
10 | 3. Run the conversion script
11 |
12 | For all videos inside 'data/raw' directory
13 | ```
14 | python3 converter.py
15 | ```
16 |
17 | For one specific video 'filename'
18 | ```
19 | python3 converter.py --inputname filename
20 | ```
21 |
22 | To convert all videos in the data/raw folder to a consistent fps and resolution:
23 | ```
24 | python3 converter.py --fps 30 --out_dim 640 360
25 | ```
26 |
27 | #### Moments in Time (Mini) Dataset
28 | Download and unzip the dataset
29 | ```
30 | wget http://data.csail.mit.edu/soundnet/actions3/split1/Moments_in_Time_Mini.zip
31 | unzip Moments_in_Time_Mini.zip -d data/.
32 | ```
33 | Pre-process the dataset
34 | ```
35 | ./convert_moment_dataset.sh
36 | ```
37 |
38 | ## Running the baseline on a specific video
39 | Go into the folder "Deep-Learning-Colorization"
40 |
41 | Run ```./models/fetch_release_models.sh``` to download the model.
42 |
43 | Then run the following command to colorize your video :
44 | ```
45 | python3 video_colorize_parallel.py --filename --input_dir --output_dir
46 | ```
47 |
48 | ## Requirements
49 |
50 | ### Dependencies
51 |
52 | You can install Python dependencies using `pip install -r requirements.txt`
53 |
54 |
55 | ### Issues with CUDA
56 |
57 | When running `import tensorflow as tf`, if you encounter the following error:
58 | ```
59 | ImportError: libcublas.so.9.0: cannot open shared object file: No such file or directory
60 | ```
61 |
62 | Run the following to create links:
63 | ```
64 | sudo ln -s /usr/lib/x86_64-linux-gnu/libcublas.so.9.1.85 /usr/lib/x86_64-linux-gnu/libcublas.so.9.0
65 | sudo ln -s /usr/lib/x86_64-linux-gnu/libcusolver.so.9.1.85 /usr/lib/x86_64-linux-gnu/libcusolver.so.9.0
66 | ```
67 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
103 | # custom
104 | _TODO
105 | checkpoints
106 | plots
107 | vcs.xml
108 | .idea
109 | .vscode
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/README.md:
--------------------------------------------------------------------------------
1 | # Image Colorization with Generative Adversarial Networks
2 | In this work, we generalize the colorization procedure using a conditional Deep Convolutional Generative Adversarial Network (DCGAN) as as suggested by [Pix2Pix]. The network is trained on the datasets [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) and [Places365](http://places2.csail.mit.edu). Some of the results from Places365 dataset are [shown here.](#places365-results)
3 |
4 | ## Prerequisites
5 | - Linux
6 | - Tensorflow 1.7
7 | - NVIDIA GPU (12G or 24G memory) + CUDA cuDNN
8 |
9 | ## Getting Started
10 | ### Installation
11 | - Clone this repo:
12 | ```bash
13 | git clone https://github.com/ImagingLab/Colorizing-with-GANs.git
14 | cd Colorizing-with-GANs
15 | ```
16 | - Install Tensorflow and dependencies from https://www.tensorflow.org/install/
17 | - Install python requirements:
18 | ```bash
19 | pip install -r requirements.txt
20 | ```
21 |
22 | ### Dataset
23 | - We use [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) and [Places365](http://places2.csail.mit.edu) datasets. To train a model on the full dataset, download datasets from official websites.
24 | After downloading, put then under the `datasets` folder.
25 |
26 | ### Training
27 | - To train the model, run `main.py` script
28 | ```bash
29 | python main.py
30 | ```
31 | - To train the model on places365 dataset with tuned hyperparameters:
32 | ```
33 | python train.py \
34 | --seed 100 \
35 | --dataset places365 \
36 | --dataset-path ./dataset/places365 \
37 | --checkpoints-path ./checkpoints \
38 | --batch-size 16 \
39 | --epochs 10 \
40 | --lr 3e-4 \
41 | --label-smoothing 1
42 |
43 | ```
44 |
45 | - To train the model of cifar10 dataset with tuned hyperparameters:
46 | ```
47 | python train.py \
48 | --seed 100 \
49 | --dataset cifar10 \
50 | --dataset-path ./dataset/cifar10 \
51 | --checkpoints-path ./checkpoints \
52 | --batch-size 128 \
53 | --epochs 200 \
54 | --lr 3e-4 \
55 | --lr-decay-steps 1e4 \
56 | --augment True
57 |
58 | ```
59 |
60 | ### Evaluate
61 | - Download the pre-trained weights [from here.](https://drive.google.com/open?id=1jTsAUAKrMiHO2gn7s-fFZ_zUSzgKoPyp) and copy them in the `checkpoints` folder.
62 | - To evaluate the model quantitatively on the test-set, run `test-eval.py` script:
63 | ```bash
64 | python test-eval.py
65 | ```
66 |
67 | ### Turing Test
68 | - Download the pre-trained weights [from here.](https://drive.google.com/open?id=1jTsAUAKrMiHO2gn7s-fFZ_zUSzgKoPyp) and copy them in the `checkpoints` folder.
69 | - To evaluate the model qualitatively using visual Turing test, run `test-turing.py`:
70 | ```bash
71 | python test-turing.py
72 | ```
73 |
74 | - To apply time-based visual Turing test run (2 seconds decision time):
75 | ```bash
76 | python test-turing.py --test-delay 2
77 | ```
78 |
79 |
80 | ## Method
81 |
82 | ### Generative Adversarial Network
83 | Both generator and discriminator use CNNs. The generator is trained to minimize the probability that the discriminator makes a correct prediction in generated data, while discriminator is trained to maximize the probability of assigning the correct label. This is presented as a single minimax game problem:
84 |
85 |
86 |
87 | In our model, we have redefined the generator's cost function by maximizing the probability of the discriminator being mistaken, as opposed to minimizing the probability of the discriminator being correct. In addition, the cost function was further modified by adding an L1 based regularizer. This will theoretically preserve the structure of the original images and prevent the generator from assigning arbitrary colors to pixels just to fool the discriminator:
88 |
89 |
90 |
91 |
92 | ### Conditional GAN
93 | In a traditional GAN, the input of the generator is randomly generated noise data z. However, this approach is not applicable to the automatic colorization problem due to the nature of its inputs. The generator must be modified to accept grayscale images as inputs rather than noise. This problem was addressed by using a variant of GAN called [conditional generative adversarial networks](https://arxiv.org/abs/1411.1784). Since no noise is introduced, the input of the generator is treated as zero noise with the grayscale input as a prior:
94 |
95 |
96 |
97 | The discriminator gets colored images from both generator and original data along with the grayscale input as the condition and tries to tell which pair contains the true colored image:
98 |
99 |
100 |
101 |
102 | ### Networks Architecture
103 | The architecture of generator is inspired by [U-Net](https://arxiv.org/abs/1505.04597): The architecture of the model is symmetric, with `n` encoding units and `n` decoding units. The contracting path consists of 4x4 convolution layers with stride 2 for downsampling, each followed by batch normalization and Leaky-ReLU activation function with the slope of 0.2. The number of channels are doubled after each step. Each unit in the expansive path consists of a 4x4 transposed convolutional layer with stride 2 for upsampling, concatenation with the activation map of the mirroring layer in the contracting path, followed by batch normalization and ReLU activation function. The last layer of the network is a 1x1 convolution which is equivalent to cross-channel parametric pooling layer. We use `tanh` function for the last layer.
104 |
105 |
106 |
107 |
108 | For discriminator, we use similar architecture as the baselines contractive path: a series of 4x4 convolutional layers with stride 2 with the number of channels being doubled after each downsampling. All convolution layers are followed by batch normalization, leaky ReLU activation with slope 0.2. After the last layer, a convolution is applied to map to a 1 dimensional output, followed by a sigmoid function to return a probability value of the input being real or fake
109 |
110 |
111 |
112 |
113 | ## Places365 Results
114 | Colorization results with Places365. (a) Grayscale. (b) Original Image. (c) Colorized with GAN.
115 |
116 |
117 |
118 |
119 | ## Citation
120 | If you use this code for your research, please cite our paper Image Colorization with Generative Adversarial Networks:
121 |
122 | ```
123 | @inproceedings{nazeri2018image,
124 | title={Image Colorization Using Generative Adversarial Networks},
125 | author={Nazeri, Kamyar and Ng, Eric and Ebrahimi, Mehran},
126 | booktitle={International Conference on Articulated Motion and Deformable Objects},
127 | pages={85--94},
128 | year={2018},
129 | organization={Springer}
130 | }
131 | ```
132 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/__init__.py:
--------------------------------------------------------------------------------
1 | from .options import *
2 | from .models import *
3 | from .utils import *
4 | from .dataset import *
5 | from .main import *
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/build_dataset.py:
--------------------------------------------------------------------------------
1 | """Split the SIGNS dataset into train/dev/test and resize images to 64x64.
2 |
3 | The SIGNS dataset comes in the following format:
4 | train_signs/
5 | 0_IMG_5864.jpg
6 | ...
7 | test_signs/
8 | 0_IMG_5942.jpg
9 | ...
10 |
11 | Original images have size (3024, 3024).
12 | Resizing to (64, 64) reduces the dataset size from 1.16 GB to 4.7 MB, and loading smaller images
13 | makes training faster.
14 |
15 | We already have a test set created, so we only need to split "train_signs" into train and dev sets.
16 | Because we don't have a lot of images and we want that the statistics on the dev set be as
17 | representative as possible, we'll take 20% of "train_signs" as dev set.
18 | """
19 |
20 | import argparse
21 | import random
22 | import os
23 |
24 | import numpy as np
25 |
26 | from PIL import Image
27 | from tqdm import tqdm
28 | import cv2
29 |
30 | # size of the resized frames
31 | SIZE = 256
32 |
33 | # subfolder of the "Moments_in_Time" dataset to consider
34 | SUBFOLDER = "/baking"
35 |
36 | parser = argparse.ArgumentParser()
37 | parser.add_argument('--data_dir', default='../data/Moments_in_Time_Mini', help="Directory with the Moments in Time dataset")
38 | parser.add_argument('--output_dir', default='../data/momentsintime_ref', help="Where to write the new data")
39 | parser.add_argument('--dt', type=int, default=1, help="Time between consecutives frames")
40 |
41 |
42 | def split_resize_and_save(filename, i, output_dir, dt=1, size=SIZE):
43 | """Split the video clip in pair of consecutive frames (t, t+dt), resize the frames, and save the pairs to the `output_dir`"""
44 |
45 | vidcap = cv2.VideoCapture(filename)
46 |
47 | success, frame = vidcap.read()
48 | # convert BGR to RGB convention
49 | frame = frame[:,:,::-1]
50 | # default : use bilinear interpolation
51 | frame_prev = cv2.resize(frame, (size, size))
52 | # save the first frame as the "color palette" reference
53 | frame_ref = frame_prev
54 |
55 | # counter to build pairs of consecutive frames
56 | count = 1
57 |
58 | while success:
59 | count += 1
60 |
61 | success, frame = vidcap.read()
62 |
63 | if success:
64 | # convert BGR to RGB convention
65 | frame = frame[:,:,::-1]
66 | # default : use bilinear interpolation
67 | frame = cv2.resize(frame, (size, size))
68 | else:
69 | break
70 | #print('Read a new frame: ', success)
71 |
72 | if count % (1+dt) == 0:
73 | img = np.concatenate((frame, frame_prev, frame_ref), 2)
74 | frame_prev = frame
75 | np.save(output_dir + "/video{}_frame{}".format(i, count), img)
76 |
77 | if __name__ == '__main__':
78 | args = parser.parse_args()
79 | # Define the output directory
80 | args.output_dir = args.output_dir + "_dt" + str(args.dt)
81 |
82 | assert os.path.isdir(args.data_dir), "Couldn't find the dataset at {}".format(args.data_dir)
83 |
84 | # Define the data directories
85 | train_data_dir = os.path.join(args.data_dir, 'training' + SUBFOLDER)
86 | test_data_dir = os.path.join(args.data_dir, 'validation' + SUBFOLDER)
87 |
88 | # Get the filenames in each directory (train and test)
89 | filenames = os.listdir(train_data_dir)
90 | filenames = [os.path.join(train_data_dir, f) for f in filenames if f.endswith('.mp4')]
91 |
92 | test_filenames = os.listdir(test_data_dir)
93 | test_filenames = [os.path.join(test_data_dir, f) for f in test_filenames if f.endswith('.mp4')]
94 |
95 | # Split the images in 'train_moments' into 80% train and 20% dev
96 | # Make sure to always shuffle with a fixed seed so that the split is reproducible
97 | random.seed(230)
98 | filenames.sort()
99 | random.shuffle(filenames)
100 |
101 | split = int(0.9 * len(filenames))
102 | train_filenames = filenames[:split]
103 | dev_filenames = filenames[split:]
104 |
105 | filenames = {'train': train_filenames,
106 | 'dev': dev_filenames,
107 | 'test': test_filenames}
108 |
109 | if not os.path.exists(args.output_dir):
110 | os.mkdir(args.output_dir)
111 | else:
112 | print("Warning: output dir {} already exists".format(args.output_dir))
113 |
114 | # Preprocess train, dev and test
115 | for split in ['train', 'dev', 'test']:
116 | output_dir_split = os.path.join(args.output_dir, '{}_moments'.format(split))
117 | if not os.path.exists(output_dir_split):
118 | os.mkdir(output_dir_split)
119 | else:
120 | print("Warning: dir {} already exists".format(output_dir_split))
121 |
122 | print("Processing {} data, saving preprocessed data to {}".format(split, output_dir_split))
123 | for i, filename in enumerate(tqdm(filenames[split])):
124 | split_resize_and_save(filename, i, output_dir_split, dt=args.dt, size=SIZE)
125 |
126 | print("Done building dataset")
127 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/dataset.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import numpy as np
3 | import tensorflow as tf
4 | from scipy.misc import imread
5 | from abc import abstractmethod
6 | from utils import unpickle
7 |
8 | CIFAR10_DATASET = 'cifar10'
9 | PLACES365_DATASET = 'places365'
10 | MOMENTSINTIME_DATASET = 'momentsintime'
11 |
12 | class BaseDataset():
13 | def __init__(self, name, path, training=True, augment=True):
14 | self.name = name
15 | self.augment = augment and training
16 | self.training = training
17 | self.path = path
18 | self._data = []
19 |
20 | def __len__(self):
21 | return len(self.data)
22 |
23 | def __iter__(self):
24 | total = len(self)
25 | start = 0
26 |
27 | while start < total:
28 | item = self[start]
29 | start += 1
30 | yield item
31 |
32 | raise StopIteration
33 |
34 | def __getitem__(self, index):
35 | val = self.data[index]
36 | try:
37 | # OLD : img = imread(val) if isinstance(val, str) else val
38 | img = np.load(val) if isinstance(val, str) else val
39 |
40 | if self.augment and np.random.binomial(1, 0.5) == 1:
41 | img = img[:, ::-1, :]
42 |
43 | except:
44 | img = None
45 |
46 | return img
47 |
48 | def generator(self, batch_size, recursive=False):
49 | start = 0
50 | total = len(self)
51 |
52 | while True:
53 | while start < total:
54 | end = np.min([start + batch_size, total])
55 | items = []
56 |
57 | for ix in range(start, end):
58 | item = self[ix]
59 | if item is not None:
60 | items.append(item)
61 |
62 | start = end
63 | yield np.array(items)
64 |
65 | if recursive:
66 | start = 0
67 |
68 | else:
69 | raise StopIteration
70 |
71 |
72 | @property
73 | def data(self):
74 | if len(self._data) == 0:
75 | self._data = self.load()
76 | np.random.shuffle(self._data)
77 |
78 | return self._data
79 |
80 | @abstractmethod
81 | def load(self):
82 | return []
83 |
84 |
85 | class Cifar10Dataset(BaseDataset):
86 | def __init__(self, path, training=True, augment=True):
87 | super(Cifar10Dataset, self).__init__(CIFAR10_DATASET, path, training, augment)
88 |
89 | def load(self):
90 | data = []
91 | if self.training:
92 | for i in range(1, 6):
93 | filename = '{}/data_batch_{}'.format(self.path, i)
94 | batch_data = unpickle(filename)
95 | if len(data) > 0:
96 | data = np.vstack((data, batch_data[b'data']))
97 | else:
98 | data = batch_data[b'data']
99 |
100 | else:
101 | filename = '{}/test_batch'.format(self.path)
102 | batch_data = unpickle(filename)
103 | data = batch_data[b'data']
104 |
105 | w = 32
106 | h = 32
107 | s = w * h
108 | data = np.array(data)
109 | data = np.dstack((data[:, :s], data[:, s:2 * s], data[:, 2 * s:]))
110 | data = data.reshape((-1, w, h, 3))
111 | return data
112 |
113 |
114 | class Places365Dataset(BaseDataset):
115 | def __init__(self, path, training=True, augment=True):
116 | super(Places365Dataset, self).__init__(PLACES365_DATASET, path, training, augment)
117 |
118 | def load(self):
119 | if self.training:
120 | data = np.array(
121 | glob.glob(self.path + '/data_256/**/*.jpg', recursive=True))
122 |
123 | else:
124 | data = np.array(glob.glob(self.path + '/val_256/*.jpg'))
125 |
126 | return data
127 |
128 |
129 | class MomentsInTimeDataset(BaseDataset):
130 | def __init__(self, path, training=True, augment=True):
131 | super(MomentsInTimeDataset, self).__init__(MOMENTSINTIME_DATASET, path, training, augment)
132 |
133 | def load(self):
134 | if self.training:
135 | #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/train_moments/*"))
136 | data = np.array(glob.glob("." + self.path + "/train_moments/*"))
137 | else:
138 | #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/dev_moments/*"))
139 | data = np.array(glob.glob("." + self.path + "/dev_moments/*"))
140 |
141 | return data
142 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import numpy as np
4 | import tensorflow as tf
5 | from options import ModelOptions
6 | from models import Cifar10Model, Places365Model, MomentsInTimeModel
7 | from dataset import CIFAR10_DATASET, PLACES365_DATASET, MOMENTSINTIME_DATASET
8 |
9 |
10 | def main(options):
11 |
12 | # reset tensorflow graph
13 | tf.reset_default_graph()
14 |
15 | # initialize random seed
16 | tf.set_random_seed(options.seed)
17 | np.random.seed(options.seed)
18 | random.seed(options.seed)
19 |
20 | # create a session environment
21 | with tf.Session() as sess:
22 |
23 | if options.dataset == CIFAR10_DATASET:
24 | model = Cifar10Model(sess, options)
25 |
26 | elif options.dataset == PLACES365_DATASET:
27 | model = Places365Model(sess, options)
28 |
29 | elif options.dataset == MOMENTSINTIME_DATASET:
30 | model = MomentsInTimeModel(sess, options)
31 |
32 | else:
33 | model = MomentsInTimeModel(sess, options)
34 |
35 | if not os.path.exists(options.checkpoints_path):
36 | os.makedirs(options.checkpoints_path)
37 |
38 | if options.log:
39 | open(model.train_log_file, 'w').close()
40 | open(model.test_log_file, 'w').close()
41 |
42 | # build the model and initialize
43 | model.build()
44 | sess.run(tf.global_variables_initializer())
45 |
46 |
47 | # load model only after global variables initialization
48 | model.load()
49 |
50 |
51 | if options.mode == 0:
52 | args = vars(options)
53 | print('\n------------ Options -------------')
54 | with open(os.path.join(options.checkpoints_path, 'options.dat'), 'w') as f:
55 | for k, v in sorted(args.items()):
56 | print('%s: %s' % (str(k), str(v)))
57 | f.write('%s: %s\n' % (str(k), str(v)))
58 | print('-------------- End ----------------\n')
59 |
60 | model.train()
61 |
62 | elif options.mode == 1:
63 | model.evaluate()
64 | while True:
65 | model.sample()
66 |
67 | else:
68 | model.turing_test()
69 |
70 |
71 | if __name__ == "__main__":
72 | main(ModelOptions().parse())
73 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/networks.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | from ops import conv2d, conv2d_transpose, pixelwise_accuracy
4 |
5 |
6 | class Discriminator(object):
7 | def __init__(self, name, kernels):
8 | self.name = name
9 | self.kernels = kernels
10 | self.var_list = []
11 |
12 | def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None):
13 | output = inputs
14 | with tf.variable_scope(self.name, reuse=reuse_variables):
15 | for index, kernel in enumerate(self.kernels):
16 |
17 | # not use batch-norm in the first layer
18 | bnorm = False if index == 0 else True
19 | name = 'conv' + str(index)
20 | output = conv2d(
21 | inputs=output,
22 | name=name,
23 | kernel_size=kernel_size,
24 | filters=kernel[0],
25 | strides=kernel[1],
26 | bnorm=bnorm,
27 | activation=tf.nn.leaky_relu,
28 | seed=seed
29 | )
30 |
31 | if kernel[2] > 0:
32 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
33 |
34 | output = conv2d(
35 | inputs=output,
36 | name='conv_last',
37 | filters=1,
38 | kernel_size=4, # last layer kernel size = 4
39 | strides=1, # last layer stride = 1
40 | bnorm=False, # do not use batch-norm for the last layer
41 | seed=seed
42 | )
43 |
44 | self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name)
45 |
46 | return output
47 |
48 |
49 | class Generator(object):
50 | def __init__(self, name, encoder_kernels, decoder_kernels, output_channels=3):
51 | self.name = name
52 | self.encoder_kernels = encoder_kernels
53 | self.decoder_kernels = decoder_kernels
54 | self.output_channels = output_channels
55 | self.var_list = []
56 |
57 | def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None):
58 | output = inputs
59 |
60 | with tf.variable_scope(self.name, reuse=reuse_variables):
61 |
62 | layers = []
63 |
64 | # encoder branch
65 | for index, kernel in enumerate(self.encoder_kernels):
66 |
67 | name = 'conv' + str(index)
68 | output = conv2d(
69 | inputs=output,
70 | name=name,
71 | kernel_size=kernel_size,
72 | filters=kernel[0],
73 | strides=kernel[1],
74 | activation=tf.nn.leaky_relu,
75 | seed=seed
76 | )
77 |
78 | # save contracting path layers to be used for skip connections
79 | layers.append(output)
80 |
81 | if kernel[2] > 0:
82 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
83 |
84 | # decoder branch
85 | for index, kernel in enumerate(self.decoder_kernels):
86 |
87 | name = 'deconv' + str(index)
88 | output = conv2d_transpose(
89 | inputs=output,
90 | name=name,
91 | kernel_size=kernel_size,
92 | filters=kernel[0],
93 | strides=kernel[1],
94 | activation=tf.nn.relu,
95 | seed=seed
96 | )
97 |
98 | if kernel[2] > 0:
99 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
100 |
101 | # concat the layer from the contracting path with the output of the current layer
102 | # concat only the channels (axis=3)
103 | output = tf.concat([layers[len(layers) - index - 2], output], axis=3)
104 |
105 | output = conv2d(
106 | inputs=output,
107 | name='conv_last',
108 | filters=self.output_channels, # number of output chanels
109 | kernel_size=1, # last layer kernel size = 1
110 | strides=1, # last layer stride = 1
111 | bnorm=False, # do not use batch-norm for the last layer
112 | activation=tf.nn.tanh, # tanh activation function for the output
113 | seed=seed
114 | )
115 |
116 | self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name)
117 |
118 | return output
119 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/ops.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 | COLORSPACE_RGB = 'RGB'
5 | COLORSPACE_LAB = 'LAB'
6 | tf.nn.softmax_cross_entropy_with_logits_v2
7 |
8 | def conv2d(inputs, filters, name, kernel_size=4, strides=2, bnorm=True, activation=None, seed=None):
9 | """
10 | Creates a conv2D block
11 | """
12 | initializer=tf.variance_scaling_initializer(seed=seed)
13 | res = tf.layers.conv2d(
14 | name=name,
15 | inputs=inputs,
16 | filters=filters,
17 | kernel_size=kernel_size,
18 | strides=strides,
19 | padding="same",
20 | kernel_initializer=initializer)
21 |
22 | if bnorm:
23 | res = tf.layers.batch_normalization(inputs=res, name='bn_' + name, training=True)
24 |
25 | # activation after batch-norm
26 | if activation is not None:
27 | res = activation(res)
28 |
29 | return res
30 |
31 |
32 | def conv2d_transpose(inputs, filters, name, kernel_size=4, strides=2, bnorm=True, activation=None, seed=None):
33 | """
34 | Creates a conv2D-transpose block
35 | """
36 | initializer=tf.variance_scaling_initializer(seed=seed)
37 | res = tf.layers.conv2d_transpose(
38 | name=name,
39 | inputs=inputs,
40 | filters=filters,
41 | kernel_size=kernel_size,
42 | strides=strides,
43 | padding="same",
44 | kernel_initializer=initializer)
45 |
46 | if bnorm:
47 | res = tf.layers.batch_normalization(inputs=res, name='bn_' + name, training=True)
48 |
49 | # activation after batch-norm
50 | if activation is not None:
51 | res = activation(res)
52 |
53 | return res
54 |
55 |
56 | def pixelwise_accuracy(img_real, img_fake, colorspace, thresh):
57 | """
58 | Measures the accuracy of the colorization process by comparing pixels
59 | """
60 | img_real = postprocess(img_real, colorspace, COLORSPACE_LAB)
61 | img_fake = postprocess(img_fake, colorspace, COLORSPACE_LAB)
62 |
63 | diffL = tf.abs(tf.round(img_real[..., 0]) - tf.round(img_fake[..., 0]))
64 | diffA = tf.abs(tf.round(img_real[..., 1]) - tf.round(img_fake[..., 1]))
65 | diffB = tf.abs(tf.round(img_real[..., 2]) - tf.round(img_fake[..., 2]))
66 |
67 | # within %thresh of the original
68 | predL = tf.cast(tf.less_equal(diffL, 1 * thresh), tf.float64) # L: [0, 100]
69 | predA = tf.cast(tf.less_equal(diffA, 2.2 * thresh), tf.float64) # A: [-110, 110]
70 | predB = tf.cast(tf.less_equal(diffB, 2.2 * thresh), tf.float64) # B: [-110, 110]
71 |
72 | # all three channels are within the threshold
73 | pred = predL * predA * predB
74 |
75 | return tf.reduce_mean(pred)
76 |
77 |
78 | def preprocess(img, colorspace_in, colorspace_out):
79 | if colorspace_out.upper() == COLORSPACE_RGB:
80 | if colorspace_in == COLORSPACE_LAB:
81 | img = lab_to_rgb(img)
82 |
83 | # [0, 1] => [-1, 1]
84 | img = (img / 255.0) * 2 - 1
85 |
86 | elif colorspace_out.upper() == COLORSPACE_LAB:
87 | if colorspace_in == COLORSPACE_RGB:
88 | img = rgb_to_lab(img / 255.0)
89 |
90 | L_chan, a_chan, b_chan = tf.unstack(img, axis=3)
91 |
92 | # L: [0, 100] => [-1, 1]
93 | # A, B: [-110, 110] => [-1, 1]
94 | img = tf.stack([L_chan / 50 - 1, a_chan / 110, b_chan / 110], axis=3)
95 |
96 | return img
97 |
98 |
99 | def postprocess(img, colorspace_in, colorspace_out):
100 | if colorspace_in.upper() == COLORSPACE_RGB:
101 | # [-1, 1] => [0, 1]
102 | img = (img + 1) / 2
103 |
104 | if colorspace_out == COLORSPACE_LAB:
105 | img = rgb_to_lab(img)
106 |
107 | elif colorspace_in.upper() == COLORSPACE_LAB:
108 | L_chan, a_chan, b_chan = tf.unstack(img, axis=3)
109 |
110 | # L: [-1, 1] => [0, 100]
111 | # A, B: [-1, 1] => [-110, 110]
112 | img = tf.stack([(L_chan + 1) / 2 * 100, a_chan * 110, b_chan * 110], axis=3)
113 |
114 | if colorspace_out == COLORSPACE_RGB:
115 | img = lab_to_rgb(img)
116 |
117 | return img
118 |
119 |
120 | def rgb_to_lab(srgb):
121 | # based on https://github.com/torch/image/blob/9f65c30167b2048ecbe8b7befdc6b2d6d12baee9/generic/image.c
122 | with tf.name_scope("rgb_to_lab"):
123 | srgb_pixels = tf.reshape(srgb, [-1, 3])
124 |
125 | with tf.name_scope("srgb_to_xyz"):
126 | linear_mask = tf.cast(srgb_pixels <= 0.04045, dtype=tf.float32)
127 | exponential_mask = tf.cast(srgb_pixels > 0.04045, dtype=tf.float32)
128 | rgb_pixels = (srgb_pixels / 12.92 * linear_mask) + (((srgb_pixels + 0.055) / 1.055) ** 2.4) * exponential_mask
129 | rgb_to_xyz = tf.constant([
130 | # X Y Z
131 | [0.412453, 0.212671, 0.019334], # R
132 | [0.357580, 0.715160, 0.119193], # G
133 | [0.180423, 0.072169, 0.950227], # B
134 | ])
135 | xyz_pixels = tf.matmul(rgb_pixels, rgb_to_xyz)
136 |
137 | # https://en.wikipedia.org/wiki/Lab_color_space#CIELAB-CIEXYZ_conversions
138 | with tf.name_scope("xyz_to_cielab"):
139 |
140 | # normalize for D65 white point
141 | xyz_normalized_pixels = tf.multiply(xyz_pixels, [1 / 0.950456, 1.0, 1 / 1.088754])
142 |
143 | epsilon = 6 / 29
144 | linear_mask = tf.cast(xyz_normalized_pixels <= (epsilon**3), dtype=tf.float32)
145 | exponential_mask = tf.cast(xyz_normalized_pixels > (epsilon**3), dtype=tf.float32)
146 | fxfyfz_pixels = (xyz_normalized_pixels / (3 * epsilon**2) + 4 / 29) * linear_mask + (xyz_normalized_pixels ** (1 / 3)) * exponential_mask
147 |
148 | # convert to lab
149 | fxfyfz_to_lab = tf.constant([
150 | # l a b
151 | [0.0, 500.0, 0.0], # fx
152 | [116.0, -500.0, 200.0], # fy
153 | [0.0, 0.0, -200.0], # fz
154 | ])
155 | lab_pixels = tf.matmul(fxfyfz_pixels, fxfyfz_to_lab) + tf.constant([-16.0, 0.0, 0.0])
156 |
157 | return tf.reshape(lab_pixels, tf.shape(srgb))
158 |
159 |
160 | def lab_to_rgb(lab):
161 | with tf.name_scope("lab_to_rgb"):
162 | lab_pixels = tf.reshape(lab, [-1, 3])
163 |
164 | # https://en.wikipedia.org/wiki/Lab_color_space#CIELAB-CIEXYZ_conversions
165 | with tf.name_scope("cielab_to_xyz"):
166 | # convert to fxfyfz
167 | lab_to_fxfyfz = tf.constant([
168 | # fx fy fz
169 | [1 / 116.0, 1 / 116.0, 1 / 116.0], # l
170 | [1 / 500.0, 0.0, 0.0], # a
171 | [0.0, 0.0, -1 / 200.0], # b
172 | ])
173 | fxfyfz_pixels = tf.matmul(lab_pixels + tf.constant([16.0, 0.0, 0.0]), lab_to_fxfyfz)
174 |
175 | # convert to xyz
176 | epsilon = 6 / 29
177 | linear_mask = tf.cast(fxfyfz_pixels <= epsilon, dtype=tf.float32)
178 | exponential_mask = tf.cast(fxfyfz_pixels > epsilon, dtype=tf.float32)
179 | xyz_pixels = (3 * epsilon**2 * (fxfyfz_pixels - 4 / 29)) * linear_mask + (fxfyfz_pixels ** 3) * exponential_mask
180 |
181 | # denormalize for D65 white point
182 | xyz_pixels = tf.multiply(xyz_pixels, [0.950456, 1.0, 1.088754])
183 |
184 | with tf.name_scope("xyz_to_srgb"):
185 | xyz_to_rgb = tf.constant([
186 | # r g b
187 | [3.2404542, -0.9692660, 0.0556434], # x
188 | [-1.5371385, 1.8760108, -0.2040259], # y
189 | [-0.4985314, 0.0415560, 1.0572252], # z
190 | ])
191 | rgb_pixels = tf.matmul(xyz_pixels, xyz_to_rgb)
192 | # avoid a slightly negative number messing up the conversion
193 | rgb_pixels = tf.clip_by_value(rgb_pixels, 0.0, 1.0)
194 | linear_mask = tf.cast(rgb_pixels <= 0.0031308, dtype=tf.float32)
195 | exponential_mask = tf.cast(rgb_pixels > 0.0031308, dtype=tf.float32)
196 | srgb_pixels = (rgb_pixels * 12.92 * linear_mask) + ((rgb_pixels ** (1 / 2.4) * 1.055) - 0.055) * exponential_mask
197 |
198 | return tf.reshape(srgb_pixels, tf.shape(lab))
199 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/options.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import random
4 | import argparse
5 |
6 |
7 | def str2bool(v):
8 | if v.lower() in ('yes', 'true', 't', 'y', '1'):
9 | return True
10 | elif v.lower() in ('no', 'false', 'f', 'n', '0'):
11 | return False
12 | else:
13 | raise argparse.ArgumentTypeError('Boolean value expected.')
14 |
15 |
16 | class ModelOptions:
17 | def __init__(self):
18 | parser = argparse.ArgumentParser(description='Colorization with GANs')
19 | parser.add_argument('--seed', type=int, default=0, metavar='S', help='random seed (default: 0)')
20 | parser.add_argument('--name', type=str, default='CGAN', help='arbitrary model name (default: CGAN)')
21 | parser.add_argument('--mode', default=0, help='run mode [0: train, 1: evaluate, 2: test] (default: 0)')
22 | parser.add_argument('--dataset', type=str, default='momentsintime', help='the name of dataset [places365, cifar10] (default: momentsintime)')
23 | parser.add_argument('--dataset-path', type=str, default='./data', help='dataset path (default: ./data)')
24 | parser.add_argument('--checkpoints-path', type=str, default='./checkpoints', help='models are saved here (default: ./checkpoints)')
25 | parser.add_argument('--batch-size', type=int, default=16, metavar='N', help='input batch size for training (default: 16)')
26 | parser.add_argument('--color-space', type=str, default='lab', help='model color space [lab, rgb] (default: lab)')
27 | parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 30)')
28 | parser.add_argument('--lr', type=float, default=5e-3, metavar='LR', help='learning rate (default: 3e-4)')
29 | parser.add_argument('--lr-decay-rate', type=float, default=0.1, help='learning rate exponentially decay rate (default: 0.1)')
30 | parser.add_argument('--lr-decay-steps', type=float, default=25e2, help='learning rate exponentially decay steps (default: 5e5)')
31 | parser.add_argument('--beta1', type=float, default=0, help='momentum term of adam optimizer (default: 0)')
32 | parser.add_argument("--l1-weight", type=float, default=100.0, help="weight on L1 term for generator gradient (default: 100.0)")
33 | parser.add_argument('--augment', type=str2bool, default=True, help='True for augmentation (default: True)')
34 | parser.add_argument('--label-smoothing', type=str2bool, default=False, help='True for one-sided label smoothing (default: False)')
35 | parser.add_argument('--acc-thresh', type=float, default=2.0, help="accuracy threshold (default: 2.0)")
36 | parser.add_argument('--kernel-size', type=int, default=4, help="default kernel size (default: 4)")
37 | parser.add_argument('--save', type=str2bool, default=True, help='True for saving (default: True)')
38 | parser.add_argument('--save-interval', type=int, default=100, help='how many batches to wait before saving model (default: 1000)')
39 | parser.add_argument('--sample', type=str2bool, default=True, help='True for sampling (default: True)')
40 | parser.add_argument('--sample-size', type=int, default=8, help='number of images to sample (default: 8)')
41 | parser.add_argument('--sample-interval', type=int, default=100, help='how many batches to wait before sampling (default: 1000)')
42 | parser.add_argument('--validate', type=str2bool, default=True, help='True for validation (default: True)')
43 | parser.add_argument('--validate-interval', type=int, default=0, help='how many batches to wait before validating (default: 0)')
44 | parser.add_argument('--log', type=str2bool, default=True, help='True for logging (default: True)')
45 | parser.add_argument('--log-interval', type=int, default=10, help='how many iterations to wait before logging training status (default: 10)')
46 | parser.add_argument('--visualize', type=str2bool, default=False, help='True for accuracy visualization (default: False)')
47 | parser.add_argument('--visualize-window', type=int, default=100, help='the exponentially moving average window width (default: 100)')
48 | parser.add_argument('--test-size', type=int, default=100, metavar='N', help='number of Turing tests (default: 100)')
49 | parser.add_argument('--test-delay', type=int, default=0, metavar='N', help='number of seconds to wait when doing Turing test, 0 for unlimited (default: 0)')
50 | parser.add_argument('--gpu-ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU')
51 | # to recolorize a video clip
52 | parser.add_argument('--filename', type=str, default='*', help='Filename of input BW video')
53 | parser.add_argument('--input_dir', type=str, default='../data/examples/converted', help='Directory of input files')
54 | parser.add_argument('--output_dir', type=str, default='../data/examples/recolorized', help='Directory of output files')
55 |
56 | self._parser = parser
57 |
58 | def parse(self):
59 | opt = self._parser.parse_args()
60 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_ids
61 |
62 | opt.color_space = opt.color_space.upper()
63 |
64 | if opt.seed == 0:
65 | opt.seed = random.randint(0, 2**31 - 1)
66 |
67 | if (opt.dataset_path == './data') or (opt.dataset_path == './dataset'):
68 | opt.dataset_path += ('/' + opt.dataset)
69 |
70 | if opt.checkpoints_path == './checkpoints':
71 | opt.checkpoints_path += ('/' + opt.dataset)
72 |
73 | return opt
74 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy ~= 1.14.3
2 | scipy ~= 1.0.1
3 | future ~= 0.16.0
4 | matplotlib ~= 2.2.2
5 | pillow ~= 5.0.0
6 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/setup.cfg:
--------------------------------------------------------------------------------
1 | [pycodestyle]
2 | ignore = E303
3 | max-line-length = 200
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/__init__.py:
--------------------------------------------------------------------------------
1 | from .options import *
2 | from .models import *
3 | from .utils import *
4 | from .dataset import *
5 | from .main import *
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/build_dataset.py:
--------------------------------------------------------------------------------
1 | """Split the SIGNS dataset into train/dev/test and resize images to 64x64.
2 |
3 | The SIGNS dataset comes in the following format:
4 | train_signs/
5 | 0_IMG_5864.jpg
6 | ...
7 | test_signs/
8 | 0_IMG_5942.jpg
9 | ...
10 |
11 | Original images have size (3024, 3024).
12 | Resizing to (64, 64) reduces the dataset size from 1.16 GB to 4.7 MB, and loading smaller images
13 | makes training faster.
14 |
15 | We already have a test set created, so we only need to split "train_signs" into train and dev sets.
16 | Because we don't have a lot of images and we want that the statistics on the dev set be as
17 | representative as possible, we'll take 20% of "train_signs" as dev set.
18 | """
19 |
20 | import argparse
21 | import random
22 | import os
23 |
24 | import numpy as np
25 |
26 | from PIL import Image
27 | from tqdm import tqdm
28 | import cv2
29 |
30 | # size of the resized frames
31 | SIZE = 256
32 |
33 | # subfolder of the "Moments_in_Time" dataset to consider
34 | SUBFOLDER = "/baking"
35 |
36 | parser = argparse.ArgumentParser()
37 | parser.add_argument('--data_dir', default='../data/Moments_in_Time_Mini', help="Directory with the Moments in Time dataset")
38 | parser.add_argument('--output_dir', default='../data/momentsintime_ref', help="Where to write the new data")
39 | parser.add_argument('--dt', type=int, default=1, help="Time between consecutives frames")
40 |
41 |
42 | def split_resize_and_save(filename, i, output_dir, dt=1, size=SIZE):
43 | """Split the video clip in pair of consecutive frames (t, t+dt), resize the frames, and save the pairs to the `output_dir`"""
44 |
45 | vidcap = cv2.VideoCapture(filename)
46 |
47 | success, frame = vidcap.read()
48 | # convert BGR to RGB convention
49 | frame = frame[:,:,::-1]
50 | # default : use bilinear interpolation
51 | frame_prev = cv2.resize(frame, (size, size))
52 | # save the first frame as the "color palette" reference
53 | frame_ref = frame_prev
54 |
55 | # counter to build pairs of consecutive frames
56 | count = 1
57 |
58 | while success:
59 | count += 1
60 |
61 | success, frame = vidcap.read()
62 |
63 | if success:
64 | # convert BGR to RGB convention
65 | frame = frame[:,:,::-1]
66 | # default : use bilinear interpolation
67 | frame = cv2.resize(frame, (size, size))
68 | else:
69 | break
70 | #print('Read a new frame: ', success)
71 |
72 | if count % (1+dt) == 0:
73 | img = np.concatenate((frame, frame_prev, frame_ref), 2)
74 | frame_prev = frame
75 | np.save(output_dir + "/video{}_frame{}".format(i, count), img)
76 |
77 | if __name__ == '__main__':
78 | args = parser.parse_args()
79 | # Define the output directory
80 | args.output_dir = args.output_dir + "_dt" + str(args.dt)
81 |
82 | assert os.path.isdir(args.data_dir), "Couldn't find the dataset at {}".format(args.data_dir)
83 |
84 | # Define the data directories
85 | train_data_dir = os.path.join(args.data_dir, 'training' + SUBFOLDER)
86 | test_data_dir = os.path.join(args.data_dir, 'validation' + SUBFOLDER)
87 |
88 | # Get the filenames in each directory (train and test)
89 | filenames = os.listdir(train_data_dir)
90 | filenames = [os.path.join(train_data_dir, f) for f in filenames if f.endswith('.mp4')]
91 |
92 | test_filenames = os.listdir(test_data_dir)
93 | test_filenames = [os.path.join(test_data_dir, f) for f in test_filenames if f.endswith('.mp4')]
94 |
95 | # Split the images in 'train_moments' into 80% train and 20% dev
96 | # Make sure to always shuffle with a fixed seed so that the split is reproducible
97 | random.seed(230)
98 | filenames.sort()
99 | random.shuffle(filenames)
100 |
101 | split = int(0.9 * len(filenames))
102 | train_filenames = filenames[:split]
103 | dev_filenames = filenames[split:]
104 |
105 | filenames = {'train': train_filenames,
106 | 'dev': dev_filenames,
107 | 'test': test_filenames}
108 |
109 | if not os.path.exists(args.output_dir):
110 | os.mkdir(args.output_dir)
111 | else:
112 | print("Warning: output dir {} already exists".format(args.output_dir))
113 |
114 | # Preprocess train, dev and test
115 | for split in ['train', 'dev', 'test']:
116 | output_dir_split = os.path.join(args.output_dir, '{}_moments'.format(split))
117 | if not os.path.exists(output_dir_split):
118 | os.mkdir(output_dir_split)
119 | else:
120 | print("Warning: dir {} already exists".format(output_dir_split))
121 |
122 | print("Processing {} data, saving preprocessed data to {}".format(split, output_dir_split))
123 | for i, filename in enumerate(tqdm(filenames[split])):
124 | split_resize_and_save(filename, i, output_dir_split, dt=args.dt, size=SIZE)
125 |
126 | print("Done building dataset")
127 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/dataset.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import numpy as np
3 | import tensorflow as tf
4 | from scipy.misc import imread
5 | from abc import abstractmethod
6 | from utils import unpickle
7 |
8 | CIFAR10_DATASET = 'cifar10'
9 | PLACES365_DATASET = 'places365'
10 | MOMENTSINTIME_DATASET = 'momentsintime'
11 |
12 | class BaseDataset():
13 | def __init__(self, name, path, training=True, augment=True):
14 | self.name = name
15 | self.augment = augment and training
16 | self.training = training
17 | self.path = path
18 | self._data = []
19 |
20 | def __len__(self):
21 | return len(self.data)
22 |
23 | def __iter__(self):
24 | total = len(self)
25 | start = 0
26 |
27 | while start < total:
28 | item = self[start]
29 | start += 1
30 | yield item
31 |
32 | raise StopIteration
33 |
34 | def __getitem__(self, index):
35 | val = self.data[index]
36 | try:
37 | # OLD : img = imread(val) if isinstance(val, str) else val
38 | img = np.load(val) if isinstance(val, str) else val
39 |
40 | if self.augment and np.random.binomial(1, 0.5) == 1:
41 | img = img[:, ::-1, :]
42 |
43 | except:
44 | img = None
45 |
46 | return img
47 |
48 | def generator(self, batch_size, recursive=False):
49 | start = 0
50 | total = len(self)
51 |
52 | while True:
53 | while start < total:
54 | end = np.min([start + batch_size, total])
55 | items = []
56 |
57 | for ix in range(start, end):
58 | item = self[ix]
59 | if item is not None:
60 | items.append(item)
61 |
62 | start = end
63 | yield np.array(items)
64 |
65 | if recursive:
66 | start = 0
67 |
68 | else:
69 | raise StopIteration
70 |
71 |
72 | @property
73 | def data(self):
74 | if len(self._data) == 0:
75 | self._data = self.load()
76 | np.random.shuffle(self._data)
77 |
78 | return self._data
79 |
80 | @abstractmethod
81 | def load(self):
82 | return []
83 |
84 |
85 | class Cifar10Dataset(BaseDataset):
86 | def __init__(self, path, training=True, augment=True):
87 | super(Cifar10Dataset, self).__init__(CIFAR10_DATASET, path, training, augment)
88 |
89 | def load(self):
90 | data = []
91 | if self.training:
92 | for i in range(1, 6):
93 | filename = '{}/data_batch_{}'.format(self.path, i)
94 | batch_data = unpickle(filename)
95 | if len(data) > 0:
96 | data = np.vstack((data, batch_data[b'data']))
97 | else:
98 | data = batch_data[b'data']
99 |
100 | else:
101 | filename = '{}/test_batch'.format(self.path)
102 | batch_data = unpickle(filename)
103 | data = batch_data[b'data']
104 |
105 | w = 32
106 | h = 32
107 | s = w * h
108 | data = np.array(data)
109 | data = np.dstack((data[:, :s], data[:, s:2 * s], data[:, 2 * s:]))
110 | data = data.reshape((-1, w, h, 3))
111 | return data
112 |
113 |
114 | class Places365Dataset(BaseDataset):
115 | def __init__(self, path, training=True, augment=True):
116 | super(Places365Dataset, self).__init__(PLACES365_DATASET, path, training, augment)
117 |
118 | def load(self):
119 | if self.training:
120 | data = np.array(
121 | glob.glob(self.path + '/data_256/**/*.jpg', recursive=True))
122 |
123 | else:
124 | data = np.array(glob.glob(self.path + '/val_256/*.jpg'))
125 |
126 | return data
127 |
128 |
129 | class MomentsInTimeDataset(BaseDataset):
130 | def __init__(self, path, training=True, augment=True):
131 | super(MomentsInTimeDataset, self).__init__(MOMENTSINTIME_DATASET, path, training, augment)
132 |
133 | def load(self):
134 | if self.training:
135 | #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/train_moments/*"))
136 | data = np.array(glob.glob("." + self.path + "/train_moments/*"))
137 | else:
138 | #data = np.array(glob.glob("/home/ubuntu/Automatic-Video-Colorization/data/momentsintime/dev_moments/*"))
139 | data = np.array(glob.glob("." + self.path + "/dev_moments/*"))
140 |
141 | return data
142 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import numpy as np
4 | import tensorflow as tf
5 | from options import ModelOptions
6 | from models import Cifar10Model, Places365Model, MomentsInTimeModel
7 | from dataset import CIFAR10_DATASET, PLACES365_DATASET, MOMENTSINTIME_DATASET
8 |
9 |
10 | def main(options):
11 |
12 | # reset tensorflow graph
13 | tf.reset_default_graph()
14 |
15 | # initialize random seed
16 | tf.set_random_seed(options.seed)
17 | np.random.seed(options.seed)
18 | random.seed(options.seed)
19 |
20 | # create a session environment
21 | with tf.Session() as sess:
22 |
23 | if options.dataset == CIFAR10_DATASET:
24 | model = Cifar10Model(sess, options)
25 |
26 | elif options.dataset == PLACES365_DATASET:
27 | model = Places365Model(sess, options)
28 |
29 | elif options.dataset == MOMENTSINTIME_DATASET:
30 | model = MomentsInTimeModel(sess, options)
31 |
32 | else:
33 | model = MomentsInTimeModel(sess, options)
34 |
35 | if not os.path.exists(options.checkpoints_path):
36 | os.makedirs(options.checkpoints_path)
37 |
38 | if options.log:
39 | open(model.train_log_file, 'w').close()
40 | open(model.test_log_file, 'w').close()
41 |
42 | # build the model and initialize
43 | model.build()
44 | sess.run(tf.global_variables_initializer())
45 |
46 |
47 | # load model only after global variables initialization
48 | model.load()
49 |
50 |
51 | if options.mode == 0:
52 | args = vars(options)
53 | print('\n------------ Options -------------')
54 | with open(os.path.join(options.checkpoints_path, 'options.dat'), 'w') as f:
55 | for k, v in sorted(args.items()):
56 | print('%s: %s' % (str(k), str(v)))
57 | f.write('%s: %s\n' % (str(k), str(v)))
58 | print('-------------- End ----------------\n')
59 |
60 | model.train()
61 |
62 | elif options.mode == 1:
63 | model.evaluate()
64 | while True:
65 | model.sample()
66 |
67 | else:
68 | model.turing_test()
69 |
70 |
71 | if __name__ == "__main__":
72 | main(ModelOptions().parse())
73 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/networks.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | from ops import conv2d, conv2d_transpose, pixelwise_accuracy
4 |
5 |
6 | class Discriminator(object):
7 | def __init__(self, name, kernels):
8 | self.name = name
9 | self.kernels = kernels
10 | self.var_list = []
11 |
12 | def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None):
13 | output = inputs
14 | with tf.variable_scope(self.name, reuse=reuse_variables):
15 | for index, kernel in enumerate(self.kernels):
16 |
17 | # not use batch-norm in the first layer
18 | bnorm = False if index == 0 else True
19 | name = 'conv' + str(index)
20 | output = conv2d(
21 | inputs=output,
22 | name=name,
23 | kernel_size=kernel_size,
24 | filters=kernel[0],
25 | strides=kernel[1],
26 | bnorm=bnorm,
27 | activation=tf.nn.leaky_relu,
28 | seed=seed
29 | )
30 |
31 | if kernel[2] > 0:
32 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
33 |
34 | output = conv2d(
35 | inputs=output,
36 | name='conv_last',
37 | filters=1,
38 | kernel_size=4, # last layer kernel size = 4
39 | strides=1, # last layer stride = 1
40 | bnorm=False, # do not use batch-norm for the last layer
41 | seed=seed
42 | )
43 |
44 | self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name)
45 |
46 | return output
47 |
48 |
49 | class Generator(object):
50 | def __init__(self, name, encoder_kernels, decoder_kernels, output_channels=3):
51 | self.name = name
52 | self.encoder_kernels = encoder_kernels
53 | self.decoder_kernels = decoder_kernels
54 | self.output_channels = output_channels
55 | self.var_list = []
56 |
57 | def create(self, inputs, kernel_size=None, seed=None, reuse_variables=None):
58 | output = inputs
59 |
60 | with tf.variable_scope(self.name, reuse=reuse_variables):
61 |
62 | layers = []
63 |
64 | # encoder branch
65 | for index, kernel in enumerate(self.encoder_kernels):
66 |
67 | name = 'conv' + str(index)
68 | output = conv2d(
69 | inputs=output,
70 | name=name,
71 | kernel_size=kernel_size,
72 | filters=kernel[0],
73 | strides=kernel[1],
74 | activation=tf.nn.leaky_relu,
75 | seed=seed
76 | )
77 |
78 | # save contracting path layers to be used for skip connections
79 | layers.append(output)
80 |
81 | if kernel[2] > 0:
82 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
83 |
84 | # decoder branch
85 | for index, kernel in enumerate(self.decoder_kernels):
86 |
87 | name = 'deconv' + str(index)
88 | output = conv2d_transpose(
89 | inputs=output,
90 | name=name,
91 | kernel_size=kernel_size,
92 | filters=kernel[0],
93 | strides=kernel[1],
94 | activation=tf.nn.relu,
95 | seed=seed
96 | )
97 |
98 | if kernel[2] > 0:
99 | output = tf.nn.dropout(output, keep_prob=1 - kernel[2], name='dropout_' + name, seed=seed)
100 |
101 | # concat the layer from the contracting path with the output of the current layer
102 | # concat only the channels (axis=3)
103 | output = tf.concat([layers[len(layers) - index - 2], output], axis=3)
104 |
105 | output = conv2d(
106 | inputs=output,
107 | name='conv_last',
108 | filters=self.output_channels, # number of output chanels
109 | kernel_size=1, # last layer kernel size = 1
110 | strides=1, # last layer stride = 1
111 | bnorm=False, # do not use batch-norm for the last layer
112 | activation=tf.nn.tanh, # tanh activation function for the output
113 | seed=seed
114 | )
115 |
116 | self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name)
117 |
118 | return output
119 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/options.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import random
4 | import argparse
5 |
6 |
7 | def str2bool(v):
8 | if v.lower() in ('yes', 'true', 't', 'y', '1'):
9 | return True
10 | elif v.lower() in ('no', 'false', 'f', 'n', '0'):
11 | return False
12 | else:
13 | raise argparse.ArgumentTypeError('Boolean value expected.')
14 |
15 |
16 | class ModelOptions:
17 | def __init__(self):
18 | parser = argparse.ArgumentParser(description='Colorization with GANs')
19 | parser.add_argument('--seed', type=int, default=0, metavar='S', help='random seed (default: 0)')
20 | parser.add_argument('--name', type=str, default='CGAN', help='arbitrary model name (default: CGAN)')
21 | parser.add_argument('--mode', default=0, help='run mode [0: train, 1: evaluate, 2: test] (default: 0)')
22 | parser.add_argument('--dataset', type=str, default='momentsintime', help='the name of dataset [places365, cifar10] (default: momentsintime)')
23 | parser.add_argument('--dataset-path', type=str, default='./data', help='dataset path (default: ./data)')
24 | parser.add_argument('--checkpoints-path', type=str, default='./checkpoints', help='models are saved here (default: ./checkpoints)')
25 | parser.add_argument('--batch-size', type=int, default=16, metavar='N', help='input batch size for training (default: 16)')
26 | parser.add_argument('--color-space', type=str, default='lab', help='model color space [lab, rgb] (default: lab)')
27 | parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 30)')
28 | parser.add_argument('--lr', type=float, default=5e-3, metavar='LR', help='learning rate (default: 3e-4)')
29 | parser.add_argument('--lr-decay-rate', type=float, default=0.1, help='learning rate exponentially decay rate (default: 0.1)')
30 | parser.add_argument('--lr-decay-steps', type=float, default=25e2, help='learning rate exponentially decay steps (default: 5e5)')
31 | parser.add_argument('--beta1', type=float, default=0, help='momentum term of adam optimizer (default: 0)')
32 | parser.add_argument("--l1-weight", type=float, default=100.0, help="weight on L1 term for generator gradient (default: 100.0)")
33 | parser.add_argument('--augment', type=str2bool, default=True, help='True for augmentation (default: True)')
34 | parser.add_argument('--label-smoothing', type=str2bool, default=False, help='True for one-sided label smoothing (default: False)')
35 | parser.add_argument('--acc-thresh', type=float, default=2.0, help="accuracy threshold (default: 2.0)")
36 | parser.add_argument('--kernel-size', type=int, default=4, help="default kernel size (default: 4)")
37 | parser.add_argument('--save', type=str2bool, default=True, help='True for saving (default: True)')
38 | parser.add_argument('--save-interval', type=int, default=100, help='how many batches to wait before saving model (default: 1000)')
39 | parser.add_argument('--sample', type=str2bool, default=True, help='True for sampling (default: True)')
40 | parser.add_argument('--sample-size', type=int, default=8, help='number of images to sample (default: 8)')
41 | parser.add_argument('--sample-interval', type=int, default=100, help='how many batches to wait before sampling (default: 1000)')
42 | parser.add_argument('--validate', type=str2bool, default=True, help='True for validation (default: True)')
43 | parser.add_argument('--validate-interval', type=int, default=0, help='how many batches to wait before validating (default: 0)')
44 | parser.add_argument('--log', type=str2bool, default=True, help='True for logging (default: True)')
45 | parser.add_argument('--log-interval', type=int, default=10, help='how many iterations to wait before logging training status (default: 10)')
46 | parser.add_argument('--visualize', type=str2bool, default=False, help='True for accuracy visualization (default: False)')
47 | parser.add_argument('--visualize-window', type=int, default=100, help='the exponentially moving average window width (default: 100)')
48 | parser.add_argument('--test-size', type=int, default=100, metavar='N', help='number of Turing tests (default: 100)')
49 | parser.add_argument('--test-delay', type=int, default=0, metavar='N', help='number of seconds to wait when doing Turing test, 0 for unlimited (default: 0)')
50 | parser.add_argument('--gpu-ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU')
51 | # to recolorize a video clip
52 | parser.add_argument('--filename', type=str, default='*', help='Filename of input BW video')
53 | parser.add_argument('--input_dir', type=str, default='../data/examples/converted', help='Directory of input files')
54 | parser.add_argument('--output_dir', type=str, default='../data/examples/recolorized', help='Directory of output files')
55 |
56 | self._parser = parser
57 |
58 | def parse(self):
59 | opt = self._parser.parse_args()
60 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_ids
61 |
62 | opt.color_space = opt.color_space.upper()
63 |
64 | if opt.seed == 0:
65 | opt.seed = random.randint(0, 2**31 - 1)
66 |
67 | if (opt.dataset_path == './data') or (opt.dataset_path == './dataset'):
68 | opt.dataset_path += ('/' + opt.dataset)
69 |
70 | if opt.checkpoints_path == './checkpoints':
71 | opt.checkpoints_path += ('/' + opt.dataset)
72 |
73 | return opt
74 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/test-eval.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 |
3 | options = ModelOptions().parse()
4 | options.mode = 1
5 | main(options)
6 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/test-turing.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 |
3 | options = ModelOptions().parse()
4 | options.mode = 2
5 | main(options)
6 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/train.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 |
3 | options = ModelOptions().parse()
4 | options.mode = 0
5 | main(options)
6 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/utils.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import numpy as np
3 | from PIL import Image
4 | import matplotlib.pyplot as plt
5 |
6 |
7 | def stitch_images(grayscale, original, pred):
8 | gap = 5
9 | width, height = original[0][:, :, 0].shape
10 | img_per_row = 2 if width > 200 else 4
11 | img = Image.new('RGB', (width * img_per_row * 3 + gap * (img_per_row - 1), height * int(len(original) / img_per_row)))
12 |
13 | grayscale = np.array(grayscale).squeeze()
14 | original = np.array(original)
15 | pred = np.array(pred)
16 |
17 | for ix in range(len(original)):
18 | xoffset = int(ix % img_per_row) * width * 3 + int(ix % img_per_row) * gap
19 | yoffset = int(ix / img_per_row) * height
20 | im1 = Image.fromarray(grayscale[ix])
21 | im2 = Image.fromarray(original[ix])
22 | im3 = Image.fromarray((pred[ix] * 255).astype(np.uint8))
23 | img.paste(im1, (xoffset, yoffset))
24 | img.paste(im2, (xoffset + width, yoffset))
25 | img.paste(im3, (xoffset + width + width, yoffset))
26 |
27 | return img
28 |
29 |
30 | def unpickle(file):
31 | with open(file, 'rb') as fo:
32 | dict = pickle.load(fo, encoding='bytes')
33 | return dict
34 |
35 |
36 | def moving_average(data, window_width):
37 | cumsum_vec = np.cumsum(np.insert(data, 0, 0))
38 | ma_vec = (cumsum_vec[window_width:] - cumsum_vec[:-window_width]) / window_width
39 | return ma_vec
40 |
41 |
42 | def imshow(img, title=''):
43 | fig = plt.gcf()
44 | fig.canvas.set_window_title(title)
45 | plt.axis('off')
46 | plt.imshow(img, interpolation='none')
47 | plt.show()
48 |
49 |
50 | def turing_test(real_img, fake_img, delay=0):
51 | height, width, _ = real_img.shape
52 | imgs = np.array([real_img, (fake_img * 255).astype(np.uint8)])
53 | real_index = np.random.binomial(1, 0.5)
54 | fake_index = (real_index + 1) % 2
55 |
56 | img = Image.new('RGB', (2 + width * 2, height))
57 | img.paste(Image.fromarray(imgs[real_index]), (0, 0))
58 | img.paste(Image.fromarray(imgs[fake_index]), (2 + width, 0))
59 |
60 | img.success = 0
61 |
62 | def onclick(event):
63 | if event.xdata is not None:
64 | if event.x < width and real_index == 0:
65 | img.success = 1
66 |
67 | elif event.x > width and real_index == 1:
68 | img.success = 1
69 |
70 | plt.gcf().canvas.stop_event_loop()
71 |
72 | plt.ion()
73 | plt.gcf().canvas.mpl_connect('button_press_event', onclick)
74 | plt.title('click on the real image')
75 | plt.axis('off')
76 | plt.imshow(img, interpolation='none')
77 | plt.show()
78 | plt.draw()
79 | plt.gcf().canvas.start_event_loop(delay)
80 |
81 | return img.success
82 |
83 |
84 | def visualize(train_log_file, test_log_file, window_width, title=''):
85 | train_data = np.loadtxt(train_log_file)
86 | test_data = np.loadtxt(test_log_file)
87 |
88 | if len(train_data.shape) < 2:
89 | return
90 |
91 | if len(train_data) < window_width:
92 | window_width = len(train_data) - 1
93 |
94 | fig = plt.gcf()
95 | fig.canvas.set_window_title(title)
96 |
97 | plt.ion()
98 | plt.subplot('121')
99 | plt.cla()
100 | if len(train_data) > 1:
101 | plt.plot(moving_average(train_data[:, 8], window_width))
102 | plt.title('train')
103 |
104 | plt.subplot('122')
105 | plt.cla()
106 | if len(test_data) > 1:
107 | plt.plot(test_data[:, 8])
108 | plt.title('test')
109 |
110 | plt.show()
111 | plt.draw()
112 | plt.pause(.01)
113 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/src/video_colorize_GAN.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import argparse
4 |
5 | import cv2
6 | import numpy as np
7 | from PIL import Image
8 | from skimage import img_as_ubyte, img_as_float
9 | import skimage.color as color
10 | import scipy.ndimage.interpolation as sni
11 | from ops import postprocess
12 | from ops import COLORSPACE_RGB, COLORSPACE_LAB
13 |
14 | import tensorflow as tf
15 | from options import ModelOptions
16 | from models import MomentsInTimeModel
17 |
18 |
19 | def image_colorization_propagation(model, img_bw_in, img_rgb_prev, img_rgb_first, options):
20 |
21 | # colorize the image based on the previous one
22 | feed_dic = {model.input_rgb: np.expand_dims(img_bw_in, axis=0), model.input_rgb_prev: np.expand_dims(img_rgb_prev, axis=0), model.input_rgb_first: np.expand_dims(img_rgb_first, axis=0)}
23 | fake_image, _ = model.sess.run([model.sampler, model.input_gray], feed_dict=feed_dic)
24 | fake_image = postprocess(tf.convert_to_tensor(fake_image), colorspace_in=options.color_space, colorspace_out=COLORSPACE_RGB)
25 |
26 | # evalute the tensor
27 | img_rgb_out = fake_image.eval()
28 | img_rgb_out = (img_rgb_out.squeeze(0) * 255).astype(np.uint8)
29 |
30 | return img_rgb_out
31 |
32 | def bw2color(options, inputname, inputpath, outputpath):
33 | if inputname.endswith(".mp4"):
34 | # size of the input frames
35 | size = 256
36 |
37 | # check that the video exists
38 | path_to_video = os.path.join(inputpath, inputname)
39 | if not os.path.exists(path_to_video):
40 | print("The file :", path_to_video, "does not exist !")
41 |
42 | # store informations about the original video
43 | cap = cv2.VideoCapture(os.path.join(path_to_video))
44 | # original dimensions
45 | width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
46 | totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
47 | fourcc = cv2.VideoWriter_fourcc(*'mp4v');
48 | # parameters of output file
49 | # dimensions of the output image
50 | new_width, new_height = size, size
51 | # number of frames
52 | fps = 30.0
53 |
54 | # recolorized output video
55 | color_out = cv2.VideoWriter(
56 | os.path.join(outputpath, 'color_' + inputname),
57 | fourcc,
58 | fps,
59 | (new_width, new_height),
60 | isColor=True
61 | )
62 |
63 | # TO CHANGE to DL colorization of 1st frame
64 | # pick the first frame from the original video clip as the first reference
65 | cap_temp = cv2.VideoCapture(os.path.join(inputpath, "color" + inputname[2:]))
66 |
67 | ret_temp, frame_prev = cap_temp.read()
68 | # convert BGR to RGB convention
69 | frame_prev = frame_prev[:,:,::-1]
70 | frame_prev = cv2.resize(frame_prev, (size, size))
71 | # save the first frame as the reference
72 | frame_ref = frame_prev
73 |
74 | # count the number of recolorized frames
75 | frames_processed = 0
76 |
77 | with tf.Session() as sess:
78 |
79 | model = MomentsInTimeModel(sess, options)
80 |
81 | # build the model and initialize
82 | model.build()
83 | sess.run(tf.global_variables_initializer())
84 |
85 | # load model only after global variables initialization
86 | model.load()
87 |
88 | while(cap.isOpened()):
89 | ret, frame_in = cap.read()
90 |
91 | # check if we are not at the end of the video
92 | if ret==True:
93 | # convert BGR to RGB convention
94 | frame_in = frame_in[:,:,::-1]
95 | # resize the frame to match the input size of the GAN
96 | frame_in = cv2.resize(frame_in, (size, size))
97 |
98 | # colorize the BW frame
99 | frame_out = image_colorization_propagation(model, frame_in, frame_prev, frame_ref, options)
100 |
101 | #generate sample
102 | get_image = False
103 | if get_image:
104 | img = Image.fromarray(frame_out)
105 |
106 | if not os.path.exists(model.samples_dir):
107 | os.makedirs(model.samples_dir)
108 |
109 | sample = model.options.dataset + "_" + inputname + "_" + str(frames_processed).zfill(5) + ".png"
110 | img.save(os.path.join(model.samples_dir, sample))
111 |
112 | # save the recolorized frame
113 | frame_prev = frame_out
114 | # convert RGB to BGR convention
115 | frame_out = frame_out[:,:,::-1]
116 | # write the color frame
117 | color_out.write(frame_out)
118 |
119 | # print progress
120 | frames_processed += 1
121 | print("Processed {}/{} frames ({}%)".format(frames_processed, totalFrames, frames_processed * 100 //totalFrames), end="\r")
122 | if cv2.waitKey(1) & 0xFF == ord('q'):
123 | break
124 | # end of the video
125 | else:
126 | break
127 |
128 | # release everything if job is finished
129 | cap.release()
130 | color_out.release()
131 |
132 | def main():
133 |
134 | # reset tensorflow graph
135 | tf.reset_default_graph()
136 |
137 | options = ModelOptions().parse()
138 |
139 | if options.filename == '*':
140 | for filename in os.listdir(options.input_dir):
141 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
142 | else:
143 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
144 |
145 | # cleanup
146 | cv2.destroyAllWindows()
147 |
148 | return 0
149 |
150 | if __name__ == '__main__':
151 | main()
152 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/test-eval.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 |
3 | options = ModelOptions().parse()
4 | options.mode = 1
5 | main(options)
6 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/test-turing.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 |
3 | options = ModelOptions().parse()
4 | options.mode = 2
5 | main(options)
6 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/train.py:
--------------------------------------------------------------------------------
1 | from src import ModelOptions, main
2 |
3 | options = ModelOptions().parse()
4 | options.mode = 0
5 | main(options)
6 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/utils.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import numpy as np
3 | from PIL import Image
4 | import matplotlib.pyplot as plt
5 |
6 |
7 | def stitch_images(grayscale, original, pred):
8 | gap = 5
9 | width, height = original[0][:, :, 0].shape
10 | img_per_row = 2 if width > 200 else 4
11 | img = Image.new('RGB', (width * img_per_row * 3 + gap * (img_per_row - 1), height * int(len(original) / img_per_row)))
12 |
13 | grayscale = np.array(grayscale).squeeze()
14 | original = np.array(original)
15 | pred = np.array(pred)
16 |
17 | for ix in range(len(original)):
18 | xoffset = int(ix % img_per_row) * width * 3 + int(ix % img_per_row) * gap
19 | yoffset = int(ix / img_per_row) * height
20 | im1 = Image.fromarray(grayscale[ix])
21 | im2 = Image.fromarray(original[ix])
22 | im3 = Image.fromarray((pred[ix] * 255).astype(np.uint8))
23 | img.paste(im1, (xoffset, yoffset))
24 | img.paste(im2, (xoffset + width, yoffset))
25 | img.paste(im3, (xoffset + width + width, yoffset))
26 |
27 | return img
28 |
29 |
30 | def unpickle(file):
31 | with open(file, 'rb') as fo:
32 | dict = pickle.load(fo, encoding='bytes')
33 | return dict
34 |
35 |
36 | def moving_average(data, window_width):
37 | cumsum_vec = np.cumsum(np.insert(data, 0, 0))
38 | ma_vec = (cumsum_vec[window_width:] - cumsum_vec[:-window_width]) / window_width
39 | return ma_vec
40 |
41 |
42 | def imshow(img, title=''):
43 | fig = plt.gcf()
44 | fig.canvas.set_window_title(title)
45 | plt.axis('off')
46 | plt.imshow(img, interpolation='none')
47 | plt.show()
48 |
49 |
50 | def turing_test(real_img, fake_img, delay=0):
51 | height, width, _ = real_img.shape
52 | imgs = np.array([real_img, (fake_img * 255).astype(np.uint8)])
53 | real_index = np.random.binomial(1, 0.5)
54 | fake_index = (real_index + 1) % 2
55 |
56 | img = Image.new('RGB', (2 + width * 2, height))
57 | img.paste(Image.fromarray(imgs[real_index]), (0, 0))
58 | img.paste(Image.fromarray(imgs[fake_index]), (2 + width, 0))
59 |
60 | img.success = 0
61 |
62 | def onclick(event):
63 | if event.xdata is not None:
64 | if event.x < width and real_index == 0:
65 | img.success = 1
66 |
67 | elif event.x > width and real_index == 1:
68 | img.success = 1
69 |
70 | plt.gcf().canvas.stop_event_loop()
71 |
72 | plt.ion()
73 | plt.gcf().canvas.mpl_connect('button_press_event', onclick)
74 | plt.title('click on the real image')
75 | plt.axis('off')
76 | plt.imshow(img, interpolation='none')
77 | plt.show()
78 | plt.draw()
79 | plt.gcf().canvas.start_event_loop(delay)
80 |
81 | return img.success
82 |
83 |
84 | def visualize(train_log_file, test_log_file, window_width, title=''):
85 | train_data = np.loadtxt(train_log_file)
86 | test_data = np.loadtxt(test_log_file)
87 |
88 | if len(train_data.shape) < 2:
89 | return
90 |
91 | if len(train_data) < window_width:
92 | window_width = len(train_data) - 1
93 |
94 | fig = plt.gcf()
95 | fig.canvas.set_window_title(title)
96 |
97 | plt.ion()
98 | plt.subplot('121')
99 | plt.cla()
100 | if len(train_data) > 1:
101 | plt.plot(moving_average(train_data[:, 8], window_width))
102 | plt.title('train')
103 |
104 | plt.subplot('122')
105 | plt.cla()
106 | if len(test_data) > 1:
107 | plt.plot(test_data[:, 8])
108 | plt.title('test')
109 |
110 | plt.show()
111 | plt.draw()
112 | plt.pause(.01)
113 |
--------------------------------------------------------------------------------
/Ref-GAN-Colorization/video_colorize_GAN.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import argparse
4 |
5 | import cv2
6 | import numpy as np
7 | from PIL import Image
8 | from skimage import img_as_ubyte, img_as_float
9 | import skimage.color as color
10 | import scipy.ndimage.interpolation as sni
11 | from ops import postprocess
12 | from ops import COLORSPACE_RGB, COLORSPACE_LAB
13 |
14 | import tensorflow as tf
15 | from options import ModelOptions
16 | from models import MomentsInTimeModel
17 |
18 |
19 | def image_colorization_propagation(model, img_bw_in, img_rgb_prev, img_rgb_first, options):
20 |
21 | # colorize the image based on the previous one
22 | feed_dic = {model.input_rgb: np.expand_dims(img_bw_in, axis=0), model.input_rgb_prev: np.expand_dims(img_rgb_prev, axis=0), model.input_rgb_first: np.expand_dims(img_rgb_first, axis=0)}
23 | fake_image, _ = model.sess.run([model.sampler, model.input_gray], feed_dict=feed_dic)
24 | fake_image = postprocess(tf.convert_to_tensor(fake_image), colorspace_in=options.color_space, colorspace_out=COLORSPACE_RGB)
25 |
26 | # evalute the tensor
27 | img_rgb_out = fake_image.eval()
28 | img_rgb_out = (img_rgb_out.squeeze(0) * 255).astype(np.uint8)
29 |
30 | return img_rgb_out
31 |
32 | def bw2color(options, inputname, inputpath, outputpath):
33 | if inputname.endswith(".mp4"):
34 | # size of the input frames
35 | size = 256
36 |
37 | # check that the video exists
38 | path_to_video = os.path.join(inputpath, inputname)
39 | if not os.path.exists(path_to_video):
40 | print("The file :", path_to_video, "does not exist !")
41 |
42 | # store informations about the original video
43 | cap = cv2.VideoCapture(os.path.join(path_to_video))
44 | # original dimensions
45 | width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
46 | totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
47 | fourcc = cv2.VideoWriter_fourcc(*'mp4v');
48 | # parameters of output file
49 | # dimensions of the output image
50 | new_width, new_height = size, size
51 | # number of frames
52 | fps = 30.0
53 |
54 | # recolorized output video
55 | color_out = cv2.VideoWriter(
56 | os.path.join(outputpath, 'color_' + inputname),
57 | fourcc,
58 | fps,
59 | (new_width, new_height),
60 | isColor=True
61 | )
62 |
63 | # TO CHANGE to DL colorization of 1st frame
64 | # pick the first frame from the original video clip as the first reference
65 | cap_temp = cv2.VideoCapture(os.path.join(inputpath, "color" + inputname[2:]))
66 |
67 | ret_temp, frame_prev = cap_temp.read()
68 | # convert BGR to RGB convention
69 | frame_prev = frame_prev[:,:,::-1]
70 | frame_prev = cv2.resize(frame_prev, (size, size))
71 | # save the first frame as the reference
72 | frame_ref = frame_prev
73 |
74 | # count the number of recolorized frames
75 | frames_processed = 0
76 |
77 | with tf.Session() as sess:
78 |
79 | model = MomentsInTimeModel(sess, options)
80 |
81 | # build the model and initialize
82 | model.build()
83 | sess.run(tf.global_variables_initializer())
84 |
85 | # load model only after global variables initialization
86 | model.load()
87 |
88 | while(cap.isOpened()):
89 | ret, frame_in = cap.read()
90 |
91 | # check if we are not at the end of the video
92 | if ret==True:
93 | # convert BGR to RGB convention
94 | frame_in = frame_in[:,:,::-1]
95 | # resize the frame to match the input size of the GAN
96 | frame_in = cv2.resize(frame_in, (size, size))
97 |
98 | # colorize the BW frame
99 | frame_out = image_colorization_propagation(model, frame_in, frame_prev, frame_ref, options)
100 |
101 | #generate sample
102 | get_image = False
103 | if get_image:
104 | img = Image.fromarray(frame_out)
105 |
106 | if not os.path.exists(model.samples_dir):
107 | os.makedirs(model.samples_dir)
108 |
109 | sample = model.options.dataset + "_" + inputname + "_" + str(frames_processed).zfill(5) + ".png"
110 | img.save(os.path.join(model.samples_dir, sample))
111 |
112 | # save the recolorized frame
113 | frame_prev = frame_out
114 | # convert RGB to BGR convention
115 | frame_out = frame_out[:,:,::-1]
116 | # write the color frame
117 | color_out.write(frame_out)
118 |
119 | # print progress
120 | frames_processed += 1
121 | print("Processed {}/{} frames ({}%)".format(frames_processed, totalFrames, frames_processed * 100 //totalFrames), end="\r")
122 | if cv2.waitKey(1) & 0xFF == ord('q'):
123 | break
124 | # end of the video
125 | else:
126 | break
127 |
128 | # release everything if job is finished
129 | cap.release()
130 | color_out.release()
131 |
132 | def main():
133 |
134 | # reset tensorflow graph
135 | tf.reset_default_graph()
136 |
137 | options = ModelOptions().parse()
138 |
139 | if options.filename == '*':
140 | for filename in os.listdir(options.input_dir):
141 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
142 | else:
143 | bw2color(options, inputname = options.filename, inputpath = options.input_dir, outputpath = options.output_dir)
144 |
145 | # cleanup
146 | cv2.destroyAllWindows()
147 |
148 | return 0
149 |
150 | if __name__ == '__main__':
151 | main()
152 |
--------------------------------------------------------------------------------
/automatic-video-colorization.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/automatic-video-colorization.pdf
--------------------------------------------------------------------------------
/configuration.txt:
--------------------------------------------------------------------------------
1 | # AWS instance : ec2 p2.xlarge
2 | # ubuntu version : 18.04 (64 bits)
3 | # python version : 3.6
4 |
5 | # Conda installation
6 | # Nvidia drivers
7 | sudo apt install ubuntu-drivers-common
8 | # CHECK : ubuntu-drivers devices
9 | sudo ubuntu-drivers autoinstall
10 | # CHECK : nvidia-smi
11 | # CUDA
12 | sudo apt install nvidia-cuda-toolkit
13 | # CuDNN
14 | # register at nvidia developers https://developer.nvidia.com/cudnn
15 | # download the corresponding runtime library (DEB)
16 | sudo apt install \path\.deb
17 |
18 | # REBOOT instance
19 |
20 | # Automatic Image Colorization
21 | # GitHUb repo cloning
22 | git clone -b master --single-branch https://github.com/richzhang/colorization.git
23 | # download model
24 | ./models/fetch_release_models.sh
25 |
26 | # Caffe (DL framework used in repo) installation
27 | sudo apt install caffe-cuda
28 |
29 | # Image visualisation
30 | sudo apt install eog
31 | # EXEMPLE : eog image.jpg
--------------------------------------------------------------------------------
/convert_moment_dataset.sh:
--------------------------------------------------------------------------------
1 | if [ ! -d data/Moments_in_Time_Mini ]; then
2 | echo "Moments_in_Time_Mini dataset not downloaded";
3 | exit;
4 | fi
5 |
6 | mkdir -p data/Moments_processed;
7 |
8 | for directory in $(find data/Moments_in_Time_Mini/training -type d -mindepth 1);
9 | do
10 | echo "Converting videos in directory $directory";
11 | python3 converter.py --input_dir "$directory/" --output_dir data/Moments_processed/;
12 | done
13 |
--------------------------------------------------------------------------------
/converter.py:
--------------------------------------------------------------------------------
1 | # convert Color to BW video clips
2 |
3 | import os
4 | import argparse
5 |
6 | import numpy as np
7 | import cv2
8 |
9 | def parse_args():
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument('--filename', type=str, default='*', help='Filename of input video')
12 | parser.add_argument('--input_dir', type=str, default='data/raw/', help='Directory of input files')
13 | parser.add_argument('--output_dir', type=str, default='data/converted/', help='Directory of output files')
14 | parser.add_argument('--out_dim', type=int, nargs=2, default=None, help='Dimensions of output frames (width, height)')
15 | parser.add_argument('--fps', type=int, default=None, help='Number of fps of output files')
16 |
17 | args = parser.parse_args()
18 | return args
19 |
20 | def parse_config(args):
21 | with open('config.yml', 'r') as f:
22 | config = yaml.load(f)
23 | if not os.path.exists(args.log_dir):
24 | os.makedirs(args.log_dir)
25 | with open(os.path.join(args.log_dir, 'config.yml'), 'w') as f:
26 | yaml.dump(config, f, default_flow_style=False)
27 | return dict2namespace(config)
28 |
29 | def color2bw(inputname, inputpath, outputpath, out_dim, fps):
30 | if inputname.endswith(".mp4"):
31 |
32 | # store informations about the original video
33 | cap = cv2.VideoCapture(inputpath + inputname)
34 | # original dimensions
35 | width, height = int(cap.get(3)), int(cap.get(4))
36 |
37 |
38 | fourcc = cv2.VideoWriter_fourcc(*'mp4v');
39 |
40 | # parameters of output file
41 | if out_dim == None:
42 | # dimensions of the output image
43 | new_width, new_height = width, height
44 | else:
45 | new_width, new_height = out_dim
46 | if fps == None:
47 | # number of frames
48 | fps = 30.0
49 |
50 | # grayscale output video
51 | gray_out = cv2.VideoWriter(
52 | outputpath + 'bw_' + inputname,
53 | fourcc,
54 | fps,
55 | (new_width, new_height),
56 | isColor=False
57 | )
58 |
59 | # color output video
60 | color_out = cv2.VideoWriter(
61 | outputpath + 'color_' + inputname,
62 | fourcc,
63 | fps,
64 | (new_width, new_height),
65 | isColor=True
66 | )
67 |
68 |
69 | while(cap.isOpened()):
70 | ret, frame = cap.read()
71 | # check if we are not at the end of the video
72 | if ret==True:
73 |
74 | #resize frame
75 | frame = cv2.resize(frame, (new_width, new_height), interpolation = cv2.INTER_LINEAR)
76 |
77 | # write the color frame
78 | color_out.write(frame)
79 |
80 | # change color to BW
81 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
82 |
83 | # write the grayscaled frame
84 | gray_out.write(frame)
85 |
86 | if cv2.waitKey(1) & 0xFF == ord('q'):
87 | break
88 | # end of the video
89 | else:
90 | break
91 |
92 | # release everything if job is finished
93 | cap.release()
94 | gray_out.release()
95 | color_out.release()
96 |
97 | def main():
98 | args = parse_args()
99 |
100 | if args.filename == '*':
101 | for filename in os.listdir(args.input_dir):
102 | color2bw(inputname = filename, inputpath = args.input_dir, outputpath = args.output_dir, out_dim = args.out_dim, fps = args.fps)
103 | else:
104 | color2bw(inputname = args.filename, inputpath = args.input_dir, outputpath = args.output_dir, out_dim = args.out_dim, fps = args.fps)
105 |
106 | # cleanup
107 | cv2.destroyAllWindows()
108 |
109 | return 0
110 |
111 | if __name__ == '__main__':
112 | main()
113 |
--------------------------------------------------------------------------------
/cs230_poster.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ColasGael/Automatic-Video-Colorization/5c656328c0329ba17fa52e4c0bc0fee846465b8b/cs230_poster.pdf
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 |
4 | matplotlib
5 | pillow
6 |
7 | scikit-image # to convert color images from RGB to LAB color space
8 | opencv-python # to read video clips
9 |
10 | tqdm # to visualize progress bar
--------------------------------------------------------------------------------
/synthesize_results.py:
--------------------------------------------------------------------------------
1 | """Aggregates results from the metrics_eval_best_weights.json in a parent folder"""
2 |
3 | import argparse
4 | import json
5 | import os
6 |
7 | from tabulate import tabulate
8 |
9 |
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument('--parent_dir', default='experiments',
12 | help='Directory containing results of experiments')
13 |
14 |
15 | def aggregate_metrics(parent_dir, metrics):
16 | """Aggregate the metrics of all experiments in folder `parent_dir`.
17 |
18 | Assumes that `parent_dir` contains multiple experiments, with their results stored in
19 | `parent_dir/subdir/metrics_dev.json`
20 |
21 | Args:
22 | parent_dir: (string) path to directory containing experiments results
23 | metrics: (dict) subdir -> {'accuracy': ..., ...}
24 | """
25 | # Get the metrics for the folder if it has results from an experiment
26 | metrics_file = os.path.join(parent_dir, 'metrics_eval_best_weights.json')
27 | if os.path.isfile(metrics_file):
28 | with open(metrics_file, 'r') as f:
29 | metrics[parent_dir] = json.load(f)
30 |
31 | # Check every subdirectory of parent_dir
32 | for subdir in os.listdir(parent_dir):
33 | if not os.path.isdir(os.path.join(parent_dir, subdir)):
34 | continue
35 | else:
36 | aggregate_metrics(os.path.join(parent_dir, subdir), metrics)
37 |
38 |
39 | def metrics_to_table(metrics):
40 | # Get the headers from the first subdir. Assumes everything has the same metrics
41 | headers = metrics[list(metrics.keys())[0]].keys()
42 | table = [[subdir] + [values[h] for h in headers] for subdir, values in metrics.items()]
43 | res = tabulate(table, headers, tablefmt='pipe')
44 |
45 | return res
46 |
47 |
48 | if __name__ == "__main__":
49 | args = parser.parse_args()
50 |
51 | # Aggregate metrics from args.parent_dir directory
52 | metrics = dict()
53 | aggregate_metrics(args.parent_dir, metrics)
54 | table = metrics_to_table(metrics)
55 |
56 | # Display the table to terminal
57 | print(table)
58 |
59 | # Save results in parent_dir/results.md
60 | save_file = os.path.join(args.parent_dir, "results.md")
61 | with open(save_file, 'w') as f:
62 | f.write(table)
63 |
--------------------------------------------------------------------------------