├── .circleci
    └── config.yml
├── LICENSE.txt
├── README.md
├── configs
    └── example_config.yml
├── data
    └── README.md
├── deep_image_compression
    ├── __init__.py
    ├── balle2018.py
    ├── batch_psnr.py
    ├── bin
    │   ├── data_ingestion
    │   ├── data_processing
    │   ├── model_analysis_batch_images
    │   ├── model_analysis_single_image
    │   ├── model_inference_compress_balle2018
    │   ├── model_inference_compress_my_approach
    │   ├── model_inference_decompress_balle2018
    │   ├── model_inference_decompress_my_approach
    │   ├── model_training_balle2018
    │   ├── model_training_my_approach
    │   └── rename_reconstructed_images
    ├── my_approach.py
    ├── single_psnr.py
    └── static
    │   └── img
    │       ├── MSE_comparison.png
    │       ├── baseline_comparison.png
    │       ├── bpp_comparison.png
    │       ├── example_JPEG_HEIC.png
    │       ├── example_balle2018_my_approach.png
    │       ├── model_improvement.png
    │       ├── pipeline.png
    │       └── result_table.png
├── requirements.txt
├── setup.cfg
├── setup.py
└── tests
    ├── test_balle2018.py
    ├── test_batch_psnr.py
    ├── test_deep_image_compression.py
    └── test_single_psnr.py


/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | # Python CircleCI 2.0 configuration file
 2 | #
 3 | # Check https://circleci.com/docs/2.0/language-python/ for more details
 4 | #
 5 | version: 2
 6 | jobs:
 7 |   build:
 8 |     docker:
 9 |       # specify the version you desire here
10 |       # use `-browsers` prefix for selenium tests, e.g. `3.6.1-browsers`
11 |       - image: circleci/python:3.6.1
12 | 
13 |       # Specify service dependencies here if necessary
14 |       # CircleCI maintains a library of pre-built images
15 |       # documented at https://circleci.com/docs/2.0/circleci-images/
16 |       # - image: circleci/postgres:9.4
17 | 
18 |     working_directory: ~/repo
19 | 
20 |     steps:
21 |       - checkout
22 | 
23 |       # Download and cache dependencies
24 |       - restore_cache:
25 |           keys:
26 |             - v1-dependencies-{{ checksum "requirements.txt" }}
27 |             # fallback to using the latest cache if no exact match is found
28 |             - v1-dependencies-
29 | 
30 |       - run:
31 |           name: install dependencies
32 |           command: |
33 |             python3 -m venv venv
34 |             . venv/bin/activate
35 |             pip install -r requirements.txt
36 | 
37 |       - save_cache:
38 |           paths:
39 |             - ./venv
40 |           key: v1-dependencies-{{ checksum "requirements.txt" }}
41 | 
42 |       # run tests!
43 |       # this example uses Django's built-in test-runner
44 |       # other common Python testing frameworks include pytest and nose
45 |       # https://pytest.org
46 |       # https://nose.readthedocs.io
47 |       - run:
48 |           name: run tests
49 |           command: |
50 |             . venv/bin/activate
51 |             python manage.py test
52 | 
53 |       - store_artifacts:
54 |           path: test-reports
55 |           destination: test-reports
56 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Licheng Xiao
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## [Project Demo Slides](https://bit.ly/deepimagecompressionslides)
  2 | ## [Package](https://pypi.org/project/deep-image-compression/)
  3 | 
  4 | # Deep Image Compression: Extreme Image Compression Using Deep Learning
  5 | ![image of pipline](https://github.com/LichengXiao2017/deep-image-compression/blob/master/deep_image_compression/static/img/pipeline.png)
  6 | 
  7 | Deep Image Compression is an end-to-end tool for extreme image compression
  8 | using deep learning. It outperforms JEPG, HEIC(state-of-the-art traditional
  9 | image compression method, derived from H.265, available in iPhone and Mac) and
 10 | Balle's approach in 2018 (state-of-the-art open source deep learning approach,
 11 | proposed by Balle et al in "Variational Image Compression with a Scale
 12 | Hyperprior").
 13 | 
 14 | The baseline model and algorithm(Balle2018) was cloned from the [Data
 15 | compression in TensorFlow](https://github.com/tensorflow/compression) repo in
 16 | September, 2019.
 17 | 
 18 | In my approach, I changed the training dataset, and modified the model
 19 | structure, as is shown in the following figure.
 20 | 
 21 | ![image of model structure modification](https://github.com/LichengXiao2017/deep-image-compression/blob/master/deep_image_compression/static/img/model_improvement.png)
 22 | 
 23 | ## The directory structure of this repo is the following:
 24 | - **deep-image-compression** : contains all the source code
 25 |   - **bin** : contains all executable scripts
 26 |   - **model** : contains model checkpoints
 27 |   - **static** : contains images used in the README.md
 28 | - **tests** : contains all the unit tests
 29 | - **data** : contains data for training, validation and unit test
 30 | - **configs** : contains config files for hyperparameters during training and evaluation
 31 | - **docs** : contains documentations
 32 | - **examples** : contains jupyter notebook examples of the workflow
 33 | 
 34 | 
 35 | 
 36 | ## Setup
 37 | 
 38 | #### Installation
 39 | First, clone this github repo.
 40 | ```
 41 | git clone https://github.com/LichengXiao2017/deep-image-compression.git
 42 | cd deep-image-compression
 43 | ```
 44 | Then, install deep-image-compression package. You can install it within venv or
 45 | with --user option.
 46 | ```
 47 | python3 -m pip install deep-image-compression --user
 48 | ```
 49 | #### Requisites
 50 | All the following requisites will be automatically installed when you install
 51 | the deep-image-compression package.
 52 | 1. 'tensorflow-gpu==1.15.0-rc1',
 53 | 2. 'absl-py==0.8.0',
 54 | 3. 'opencv-python==4.1.1.26',
 55 | 4. 'argparse==1.4.0',
 56 | 5. 'glob3==0.0.1',
 57 | 6. 'tensorflow_compression==1.2',
 58 | 7. 'numpy==1.16.4',
 59 | 
 60 | #### Environment setup
 61 | It's highly recommended that workstation running this repo to have at least 1
 62 | GPU. The repo has been tested on Nvidia GTX 1070 (8GB memory).
 63 | The repo currently support only single GPU. It's suggested that you specify
 64 | the GPU you are going to use before running the scripts. For example, if you
 65 | want to use the first GPU, type the following command in terminal.
 66 | ```
 67 | export CUDA_VISIBLE_DEVICES=0
 68 | ```
 69 | Other processes running on this GPU might cause problem, so please run this
 70 | repo on a vacant GPU.
 71 | 
 72 | ## Steps to run
 73 | 
 74 | ### Step1: Configuration
 75 | Configurations are not combined into single config file yet.
 76 | Here are a list of scripts and variables that need configuration before running:
 77 | 1. bin/data_ingestion
 78 |       - DATA_PATH
 79 | 2. bin/data_processing
 80 |       - IMAGE_PATH
 81 | 3. bin/model_training_balle2018
 82 |       - TRAIN_DATA_PATH
 83 |       - MODEL_PATH
 84 |       - LAMBDA
 85 |       - NUM_FILTERS
 86 |       - MAX_TRAIN_STEPS
 87 | 4. bin/model_training_my_approach
 88 |       - TRAIN_DATA_PATH
 89 |       - MODEL_PATH
 90 |       - LAMBDA
 91 |       - NUM_FILTERS
 92 |       - MAX_TRAIN_STEPS
 93 |       - MAIN_LEARNING_RATE
 94 |       - AUX_LEARNING_RATE
 95 |       - TRAIN_BATCH_SIZE
 96 | 5. bin/model_inference_compress_balle2018
 97 |       - TEST_DATA_PATH
 98 |       - MODEL_PATH
 99 |       - NUM_FILTERS
100 | 6. bin/model_inference_compress_my_approach
101 |       - TEST_DATA_PATH
102 |       - MODEL_PATH
103 |       - NUM_FILTERS
104 | 7. bin/model_inference_decompress_my_approach
105 |       - TEST_DATA_PATH
106 |       - MODEL_PATH
107 |       - NUM_FILTERS
108 | 8. bin/model_inference_decompress_my_approach
109 |       - TEST_DATA_PATH
110 |       - MODEL_PATH
111 |       - NUM_FILTERS
112 | 9. bin/rename_reconstructed_images
113 |       - RECONSTRUCTED_IMAGE_PATH
114 | 10. bin/model_analysis_single_image
115 |       - ORIGINAL_IMAGE_PATH
116 |       - COMPRESSED_IMAGE_PATH
117 |       - RECONSTRUCTED_IMAGE_PATH
118 | 11. bin/model_analysis_batch_images
119 |       - ORIGINAL_IMAGE_FOLDER_PATH
120 |       - COMPRESSED_IMAGE_FOLDER_PATH
121 |       - RECONSTRUCTED_IMAGE_FOLDER_PATH
122 | 
123 | In the future, these configurations will be combined into single config file
124 | under configs/
125 | 
126 | ### Step2: Prepare and Preprocess
127 | #### - Download dataset
128 | Download and unzip the dataset you want to use for training by running:
129 | ```
130 | bin/data_ingestion
131 | ```
132 | 
133 | The datasets used in this project is:
134 | - **The [CLIC dataset](https://www.compression.cc)**
135 | You can also modify data_ingestion and use your own training dataset.
136 | 
137 | #### - Convert color domain to RGB
138 | This repo currently support only RGB color domain.
139 | Convert the dataset to RGB domain by running:
140 | ```
141 | bin/data_processing
142 | ```
143 | ### Step3: Train model
144 | To train the baseline (Balle2018) model, run:
145 | ```
146 | bin/model_training_balle2018
147 | ```
148 | To train my approach model, run:
149 | ```
150 | bin/model_training_my_approach
151 | ```
152 | ### Step4: Inference model
153 | Compression will convert a .png file to .png.tfci file.
154 | To compress image using Balle2018 model, run:
155 | ```
156 | bin/model_inference_compress_balle2018
157 | ```
158 | To compress image using my approach model, run:
159 | ```
160 | bin/model_inference_compress_my_approach
161 | ```
162 | 
163 | Decompression will convert a .png.tfci file to .png.tfci.png file.
164 | To decompress image using Balle2018 model, run:
165 | ```
166 | bin/model_inference_decompress_balle2018
167 | ```
168 | To decompress image using my approach model, run:
169 | ```
170 | bin/model_inference_decompress_my_approach
171 | ```
172 | 
173 | ### Step5: Evaluate model
174 | #### rename decompressed images
175 | To maintain the same order of files when evaluating a list of images, you need
176 | to rename .png.tfci.png files into .png files before evaluation, run:
177 | ```
178 | bin/rename_reconstructed_images
179 | ```
180 | #### evaluate single image
181 | ```
182 | bin/model_analysis_single_image
183 | ```
184 | #### evaluate a list images
185 | ```
186 | bin/model_analysis_batch_images
187 | ```
188 | 
189 | 
190 | ## Analysis
191 | 
192 | #### Comparison of Balle (2018) and other approaches using a RD-curve over Kodak dataset:
193 | ![image of baseline comparison](https://github.com/LichengXiao2017/deep-image-compression/blob/master/deep_image_compression/static/img/baseline_comparison.png)
194 | 
195 | #### Final result:
196 | 
197 | The following graphs show that, compared with Balle (2018), my approach achieved lower bpp(bit per pixel) with
198 | similar MSE(mean square error) during training.
199 | 
200 | ![image of MSE comparison](https://github.com/LichengXiao2017/deep-image-compression/blob/master/deep_image_compression/static/img/MSE_comparison.png)
201 | 
202 | ![image of bpp comparison](https://github.com/LichengXiao2017/deep-image-compression/blob/master/deep_image_compression/static/img/bpp_comparison.png)
203 | 
204 | The following table shows the test results on Kodak dataset
205 | ![image of results](https://github.com/LichengXiao2017/deep-image-compression/blob/master/deep_image_compression/static/img/result_table.png)
206 | 
207 | Note:
208 | 1. The metrics in the table is averaged on all images from Kodak dataset
209 | 2. The encoding and decoding time are manually recorded
210 | 


--------------------------------------------------------------------------------
/configs/example_config.yml:
--------------------------------------------------------------------------------
 1 | # Dataset Stuff -------------------------------------------------
 2 | #
 3 | data_path: ~/data
 4 | output_path: ~/output
 5 | 
 6 | val_size: 10000
 7 | train_chunk_size: 40000
 8 | 
 9 | 
10 | # Training Hyperparams --------------------------------------
11 | 
12 | batch_size: 128
13 | num_epochs: 200
14 | validation_every: 1
15 | 
16 | weight_decay: 0.0005
17 | 
18 | learning_rate_schedule:
19 |     init: 0.1
20 |     final: 0.0001
21 | 
22 | momentum_schedule:
23 |     0: 0.0
24 |     1: 0.5
25 |     2: 0.9
26 | 
27 | layer_config:
28 |     0:
29 |         layer_type: InputLayer
30 |         input_shape: [128, 1, 91, 64] 
31 | 
32 |     1:
33 |         layer_type: Conv2DLayer
34 |         n_filters: 64
35 |         filter_size: [8,59]
36 |         nonlinearity: rectifier
37 |         init_bias_value: 0.01
38 | 
39 |     2:
40 |         layer_type: MaxPooling2DLayer
41 |         pool_size: [6,3] 
42 |         ignore_border: False
43 | 
44 |     3:
45 |         layer_type: DenseLayer
46 |         n_outputs: 500
47 |         nonlinearity: rectifier
48 |         init_bias_value: 0.1
49 |         dropout: 0.5
50 | 
51 |     4:
52 |         layer_type: DenseLayer
53 |         n_outputs: 2
54 |         nonlinearity: sigmoid
55 |         init_bias_value: 0.1
56 |         dropout: 0.0
57 | 
58 |     5:
59 |         layer_type: OutputLayer


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | Extra credit for writing tests!
 2 | 
 3 | The structure of this directory should mirror the structure of your project
 4 | directory. For each file in your project directory, `<sub_dir>/<file>.py`
 5 | you'll have a test file here: `<sub_dir>/test_<file>.py`
 6 | 
 7 | If you want to learn more about tests, check out this video:
 8 | 
 9 | https://www.youtube.com/watch?v=6tNS--WetLI
10 | 
11 | If you end up writing tests, this is another good thing to know about:
12 | 
13 | https://docs.scipy.org/doc/numpy/reference/routines.testing.html
14 | 


--------------------------------------------------------------------------------
/deep_image_compression/__init__.py:
--------------------------------------------------------------------------------
1 | from deep_image_compression.single_psnr import SingleEvaluator
2 | from deep_image_compression.batch_psnr import BatchEvaluator
3 | 


--------------------------------------------------------------------------------
/deep_image_compression/balle2018.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright 2019 Google LLC. All Rights Reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ==============================================================================
 16 | """Nonlinear transform coder with hyperprior for RGB images.
 17 | 
 18 | This is the image compression model published in:
 19 | J. Ballé, D. Minnen, S. Singh, S.J. Hwang, N. Johnston:
 20 | "Variational Image Compression with a Scale Hyperprior"
 21 | Int. Conf. on Learning Representations (ICLR), 2018
 22 | https://arxiv.org/abs/1802.01436
 23 | 
 24 | This is meant as 'educational' code - you can use this to get started with your
 25 | own experiments. To reproduce the exact results from the paper, tuning of hyper-
 26 | parameters may be necessary. To compress images with published models, see
 27 | `tfci.py`.
 28 | """
 29 | 
 30 | from __future__ import absolute_import
 31 | from __future__ import division
 32 | from __future__ import print_function
 33 | 
 34 | import argparse
 35 | import glob
 36 | import sys
 37 | 
 38 | from absl import app
 39 | from absl.flags import argparse_flags
 40 | import numpy as np
 41 | import tensorflow.compat.v1 as tf
 42 | 
 43 | import tensorflow_compression as tfc
 44 | 
 45 | 
 46 | SCALES_MIN = 0.11
 47 | SCALES_MAX = 256
 48 | SCALES_LEVELS = 64
 49 | 
 50 | 
 51 | def read_png(filename):
 52 |   """Loads a PNG image file."""
 53 |   string = tf.read_file(filename)
 54 |   image = tf.image.decode_image(string, channels=3)
 55 |   image = tf.cast(image, tf.float32)
 56 |   image /= 255
 57 |   return image
 58 | 
 59 | 
 60 | def quantize_image(image):
 61 |   image = tf.round(image * 255)
 62 |   image = tf.saturate_cast(image, tf.uint8)
 63 |   return image
 64 | 
 65 | 
 66 | def write_png(filename, image):
 67 |   """Saves an image to a PNG file."""
 68 |   image = quantize_image(image)
 69 |   string = tf.image.encode_png(image)
 70 |   return tf.write_file(filename, string)
 71 | 
 72 | 
 73 | class AnalysisTransform(tf.keras.layers.Layer):
 74 |   """The analysis transform."""
 75 | 
 76 |   def __init__(self, num_filters, *args, **kwargs):
 77 |     self.num_filters = num_filters
 78 |     super(AnalysisTransform, self).__init__(*args, **kwargs)
 79 | 
 80 |   def build(self, input_shape):
 81 |     self._layers = [
 82 |         tfc.SignalConv2D(
 83 |             self.num_filters, (5, 5), name="layer_0", corr=True, strides_down=2,
 84 |             padding="same_zeros", use_bias=True,
 85 |             activation=tfc.GDN(name="gdn_0")),
 86 |         tfc.SignalConv2D(
 87 |             self.num_filters, (5, 5), name="layer_1", corr=True, strides_down=2,
 88 |             padding="same_zeros", use_bias=True,
 89 |             activation=tfc.GDN(name="gdn_1")),
 90 |         tfc.SignalConv2D(
 91 |             self.num_filters, (5, 5), name="layer_2", corr=True, strides_down=2,
 92 |             padding="same_zeros", use_bias=True,
 93 |             activation=tfc.GDN(name="gdn_2")),
 94 |         tfc.SignalConv2D(
 95 |             self.num_filters, (5, 5), name="layer_3", corr=True, strides_down=2,
 96 |             padding="same_zeros", use_bias=True,
 97 |             activation=None),
 98 |     ]
 99 |     super(AnalysisTransform, self).build(input_shape)
100 | 
101 |   def call(self, tensor):
102 |     for layer in self._layers:
103 |       tensor = layer(tensor)
104 |     return tensor
105 | 
106 | 
107 | class SynthesisTransform(tf.keras.layers.Layer):
108 |   """The synthesis transform."""
109 | 
110 |   def __init__(self, num_filters, *args, **kwargs):
111 |     self.num_filters = num_filters
112 |     super(SynthesisTransform, self).__init__(*args, **kwargs)
113 | 
114 |   def build(self, input_shape):
115 |     self._layers = [
116 |         tfc.SignalConv2D(
117 |             self.num_filters, (5, 5), name="layer_0", corr=False, strides_up=2,
118 |             padding="same_zeros", use_bias=True,
119 |             activation=tfc.GDN(name="igdn_0", inverse=True)),
120 |         tfc.SignalConv2D(
121 |             self.num_filters, (5, 5), name="layer_1", corr=False, strides_up=2,
122 |             padding="same_zeros", use_bias=True,
123 |             activation=tfc.GDN(name="igdn_1", inverse=True)),
124 |         tfc.SignalConv2D(
125 |             self.num_filters, (5, 5), name="layer_2", corr=False, strides_up=2,
126 |             padding="same_zeros", use_bias=True,
127 |             activation=tfc.GDN(name="igdn_2", inverse=True)),
128 |         tfc.SignalConv2D(
129 |             3, (5, 5), name="layer_3", corr=False, strides_up=2,
130 |             padding="same_zeros", use_bias=True,
131 |             activation=None),
132 |     ]
133 |     super(SynthesisTransform, self).build(input_shape)
134 | 
135 |   def call(self, tensor):
136 |     for layer in self._layers:
137 |       tensor = layer(tensor)
138 |     return tensor
139 | 
140 | 
141 | class HyperAnalysisTransform(tf.keras.layers.Layer):
142 |   """The analysis transform for the entropy model parameters."""
143 | 
144 |   def __init__(self, num_filters, *args, **kwargs):
145 |     self.num_filters = num_filters
146 |     super(HyperAnalysisTransform, self).__init__(*args, **kwargs)
147 | 
148 |   def build(self, input_shape):
149 |     self._layers = [
150 |         tfc.SignalConv2D(
151 |             self.num_filters, (3, 3), name="layer_0", corr=True, strides_down=1,
152 |             padding="same_zeros", use_bias=True,
153 |             activation=tf.nn.relu),
154 |         tfc.SignalConv2D(
155 |             self.num_filters, (5, 5), name="layer_1", corr=True, strides_down=2,
156 |             padding="same_zeros", use_bias=True,
157 |             activation=tf.nn.relu),
158 |         tfc.SignalConv2D(
159 |             self.num_filters, (5, 5), name="layer_2", corr=True, strides_down=2,
160 |             padding="same_zeros", use_bias=False,
161 |             activation=None),
162 |     ]
163 |     super(HyperAnalysisTransform, self).build(input_shape)
164 | 
165 |   def call(self, tensor):
166 |     for layer in self._layers:
167 |       tensor = layer(tensor)
168 |     return tensor
169 | 
170 | 
171 | class HyperSynthesisTransform(tf.keras.layers.Layer):
172 |   """The synthesis transform for the entropy model parameters."""
173 | 
174 |   def __init__(self, num_filters, *args, **kwargs):
175 |     self.num_filters = num_filters
176 |     super(HyperSynthesisTransform, self).__init__(*args, **kwargs)
177 | 
178 |   def build(self, input_shape):
179 |     self._layers = [
180 |         tfc.SignalConv2D(
181 |             self.num_filters, (5, 5), name="layer_0", corr=False, strides_up=2,
182 |             padding="same_zeros", use_bias=True, kernel_parameterizer=None,
183 |             activation=tf.nn.relu),
184 |         tfc.SignalConv2D(
185 |             self.num_filters, (5, 5), name="layer_1", corr=False, strides_up=2,
186 |             padding="same_zeros", use_bias=True, kernel_parameterizer=None,
187 |             activation=tf.nn.relu),
188 |         tfc.SignalConv2D(
189 |             self.num_filters, (3, 3), name="layer_2", corr=False, strides_up=1,
190 |             padding="same_zeros", use_bias=True, kernel_parameterizer=None,
191 |             activation=None),
192 |     ]
193 |     super(HyperSynthesisTransform, self).build(input_shape)
194 | 
195 |   def call(self, tensor):
196 |     for layer in self._layers:
197 |       tensor = layer(tensor)
198 |     return tensor
199 | 
200 | 
201 | def train(args):
202 |   """Trains the model."""
203 | 
204 |   if args.verbose:
205 |     tf.logging.set_verbosity(tf.logging.INFO)
206 | 
207 |   # Create input data pipeline.
208 |   with tf.device("/cpu:0"):
209 |     train_files = glob.glob(args.train_glob)
210 |     if not train_files:
211 |       raise RuntimeError(
212 |           "No training images found with glob '{}'.".format(args.train_glob))
213 |     train_dataset = tf.data.Dataset.from_tensor_slices(train_files)
214 |     train_dataset = train_dataset.shuffle(buffer_size=len(train_files)).repeat()
215 |     train_dataset = train_dataset.map(
216 |         read_png, num_parallel_calls=args.preprocess_threads)
217 |     train_dataset = train_dataset.map(
218 |         lambda x: tf.random_crop(x, (args.patchsize, args.patchsize, 3)))
219 |     train_dataset = train_dataset.batch(args.batchsize)
220 |     train_dataset = train_dataset.prefetch(32)
221 | 
222 |   num_pixels = args.batchsize * args.patchsize ** 2
223 | 
224 |   # Get training patch from dataset.
225 |   x = train_dataset.make_one_shot_iterator().get_next()
226 | 
227 |   # Instantiate model.
228 |   analysis_transform = AnalysisTransform(args.num_filters)
229 |   synthesis_transform = SynthesisTransform(args.num_filters)
230 |   hyper_analysis_transform = HyperAnalysisTransform(args.num_filters)
231 |   hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters)
232 |   entropy_bottleneck = tfc.EntropyBottleneck()
233 | 
234 |   # Build autoencoder and hyperprior.
235 |   y = analysis_transform(x)
236 |   z = hyper_analysis_transform(abs(y))
237 |   z_tilde, z_likelihoods = entropy_bottleneck(z, training=True)
238 |   sigma = hyper_synthesis_transform(z_tilde)
239 |   scale_table = np.exp(np.linspace(
240 |       np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
241 |   conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table)
242 |   y_tilde, y_likelihoods = conditional_bottleneck(y, training=True)
243 |   x_tilde = synthesis_transform(y_tilde)
244 | 
245 |   # Total number of bits divided by number of pixels.
246 |   train_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) +
247 |                tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels)
248 | 
249 |   # Mean squared error across pixels.
250 |   train_mse = tf.reduce_mean(tf.squared_difference(x, x_tilde))
251 |   # Multiply by 255^2 to correct for rescaling.
252 |   train_mse *= 255 ** 2
253 | 
254 |   # The rate-distortion cost.
255 |   train_loss = args.lmbda * train_mse + train_bpp
256 | 
257 |   # Minimize loss and auxiliary loss, and execute update op.
258 |   step = tf.train.create_global_step()
259 |   main_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
260 |   main_step = main_optimizer.minimize(train_loss, global_step=step)
261 | 
262 |   aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
263 |   aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0])
264 | 
265 |   train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0])
266 | 
267 |   tf.summary.scalar("loss", train_loss)
268 |   tf.summary.scalar("bpp", train_bpp)
269 |   tf.summary.scalar("mse", train_mse)
270 | 
271 |   tf.summary.image("original", quantize_image(x))
272 |   tf.summary.image("reconstruction", quantize_image(x_tilde))
273 | 
274 |   hooks = [
275 |       tf.train.StopAtStepHook(last_step=args.last_step),
276 |       tf.train.NanTensorHook(train_loss),
277 |   ]
278 |   with tf.train.MonitoredTrainingSession(
279 |       hooks=hooks, checkpoint_dir=args.checkpoint_dir,
280 |       save_checkpoint_secs=300, save_summaries_secs=60) as sess:
281 |     while not sess.should_stop():
282 |       sess.run(train_op)
283 | 
284 | 
285 | def compress(args):
286 |   """Compresses an image."""
287 | 
288 |   # Load input image and add batch dimension.
289 |   x = read_png(args.input_file)
290 |   x = tf.expand_dims(x, 0)
291 |   x.set_shape([1, None, None, 3])
292 |   x_shape = tf.shape(x)
293 | 
294 |   # Instantiate model.
295 |   analysis_transform = AnalysisTransform(args.num_filters)
296 |   synthesis_transform = SynthesisTransform(args.num_filters)
297 |   hyper_analysis_transform = HyperAnalysisTransform(args.num_filters)
298 |   hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters)
299 |   entropy_bottleneck = tfc.EntropyBottleneck()
300 | 
301 |   # Transform and compress the image.
302 |   y = analysis_transform(x)
303 |   y_shape = tf.shape(y)
304 |   z = hyper_analysis_transform(abs(y))
305 |   z_hat, z_likelihoods = entropy_bottleneck(z, training=False)
306 |   sigma = hyper_synthesis_transform(z_hat)
307 |   sigma = sigma[:, :y_shape[1], :y_shape[2], :]
308 |   scale_table = np.exp(np.linspace(
309 |       np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
310 |   conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table)
311 |   side_string = entropy_bottleneck.compress(z)
312 |   string = conditional_bottleneck.compress(y)
313 | 
314 |   # Transform the quantized image back (if requested).
315 |   y_hat, y_likelihoods = conditional_bottleneck(y, training=False)
316 |   x_hat = synthesis_transform(y_hat)
317 |   x_hat = x_hat[:, :x_shape[1], :x_shape[2], :]
318 | 
319 |   num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32)
320 | 
321 |   # Total number of bits divided by number of pixels.
322 |   eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) +
323 |               tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels)
324 | 
325 |   # Bring both images back to 0..255 range.
326 |   x *= 255
327 |   x_hat = tf.clip_by_value(x_hat, 0, 1)
328 |   x_hat = tf.round(x_hat * 255)
329 | 
330 |   mse = tf.reduce_mean(tf.squared_difference(x, x_hat))
331 |   psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255))
332 |   msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255))
333 | 
334 |   with tf.Session() as sess:
335 |     # Load the latest model checkpoint, get the compressed string and the tensor
336 |     # shapes.
337 |     latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir)
338 |     tf.train.Saver().restore(sess, save_path=latest)
339 |     tensors = [string, side_string,
340 |                tf.shape(x)[1:-1], tf.shape(y)[1:-1], tf.shape(z)[1:-1]]
341 |     arrays = sess.run(tensors)
342 | 
343 |     # Write a binary file with the shape information and the compressed string.
344 |     packed = tfc.PackedTensors()
345 |     packed.pack(tensors, arrays)
346 |     with open(args.output_file, "wb") as f:
347 |       f.write(packed.string)
348 | 
349 |     # If requested, transform the quantized image back and measure performance.
350 |     if args.verbose:
351 |       eval_bpp, mse, psnr, msssim, num_pixels = sess.run(
352 |           [eval_bpp, mse, psnr, msssim, num_pixels])
353 | 
354 |       # The actual bits per pixel including overhead.
355 |       bpp = len(packed.string) * 8 / num_pixels
356 | 
357 |       print("Mean squared error: {:0.4f}".format(mse))
358 |       print("PSNR (dB): {:0.2f}".format(psnr))
359 |       print("Multiscale SSIM: {:0.4f}".format(msssim))
360 |       print("Multiscale SSIM (dB): {:0.2f}".format(-10 * np.log10(1 - msssim)))
361 |       print("Information content in bpp: {:0.4f}".format(eval_bpp))
362 |       print("Actual bits per pixel: {:0.4f}".format(bpp))
363 | 
364 | 
365 | def decompress(args):
366 |   """Decompresses an image."""
367 | 
368 |   # Read the shape information and compressed string from the binary file.
369 |   string = tf.placeholder(tf.string, [1])
370 |   side_string = tf.placeholder(tf.string, [1])
371 |   x_shape = tf.placeholder(tf.int32, [2])
372 |   y_shape = tf.placeholder(tf.int32, [2])
373 |   z_shape = tf.placeholder(tf.int32, [2])
374 |   with open(args.input_file, "rb") as f:
375 |     packed = tfc.PackedTensors(f.read())
376 |   tensors = [string, side_string, x_shape, y_shape, z_shape]
377 |   arrays = packed.unpack(tensors)
378 | 
379 |   # Instantiate model.
380 |   synthesis_transform = SynthesisTransform(args.num_filters)
381 |   hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters)
382 |   entropy_bottleneck = tfc.EntropyBottleneck(dtype=tf.float32)
383 | 
384 |   # Decompress and transform the image back.
385 |   z_shape = tf.concat([z_shape, [args.num_filters]], axis=0)
386 |   z_hat = entropy_bottleneck.decompress(
387 |       side_string, z_shape, channels=args.num_filters)
388 |   sigma = hyper_synthesis_transform(z_hat)
389 |   sigma = sigma[:, :y_shape[0], :y_shape[1], :]
390 |   scale_table = np.exp(np.linspace(
391 |       np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
392 |   conditional_bottleneck = tfc.GaussianConditional(
393 |       sigma, scale_table, dtype=tf.float32)
394 |   y_hat = conditional_bottleneck.decompress(string)
395 |   x_hat = synthesis_transform(y_hat)
396 | 
397 |   # Remove batch dimension, and crop away any extraneous padding on the bottom
398 |   # or right boundaries.
399 |   x_hat = x_hat[0, :x_shape[0], :x_shape[1], :]
400 | 
401 |   # Write reconstructed image out as a PNG file.
402 |   op = write_png(args.output_file, x_hat)
403 | 
404 |   # Load the latest model checkpoint, and perform the above actions.
405 |   with tf.Session() as sess:
406 |     latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir)
407 |     tf.train.Saver().restore(sess, save_path=latest)
408 |     sess.run(op, feed_dict=dict(zip(tensors, arrays)))
409 | 
410 | 
411 | def parse_args(argv):
412 |   """Parses command line arguments."""
413 |   parser = argparse_flags.ArgumentParser(
414 |       formatter_class=argparse.ArgumentDefaultsHelpFormatter)
415 | 
416 |   # High-level options.
417 |   parser.add_argument(
418 |       "--verbose", "-V", action="store_true",
419 |       help="Report bitrate and distortion when training or compressing.")
420 |   parser.add_argument(
421 |       "--num_filters", type=int, default=192,
422 |       help="Number of filters per layer.")
423 |   parser.add_argument(
424 |       "--checkpoint_dir", default="train",
425 |       help="Directory where to save/load model checkpoints.")
426 |   subparsers = parser.add_subparsers(
427 |       title="commands", dest="command",
428 |       help="What to do: 'train' loads training data and trains (or continues "
429 |            "to train) a new model. 'compress' reads an image file (lossless "
430 |            "PNG format) and writes a compressed binary file. 'decompress' "
431 |            "reads a binary file and reconstructs the image (in PNG format). "
432 |            "input and output filenames need to be provided for the latter "
433 |            "two options. Invoke '<command> -h' for more information.")
434 | 
435 |   # 'train' subcommand.
436 |   train_cmd = subparsers.add_parser(
437 |       "train",
438 |       formatter_class=argparse.ArgumentDefaultsHelpFormatter,
439 |       description="Trains (or continues to train) a new model.")
440 |   train_cmd.add_argument(
441 |       "--train_glob", default="images/*.png",
442 |       help="Glob pattern identifying training data. This pattern must expand "
443 |            "to a list of RGB images in PNG format.")
444 |   train_cmd.add_argument(
445 |       "--batchsize", type=int, default=8,
446 |       help="Batch size for training.")
447 |   train_cmd.add_argument(
448 |       "--patchsize", type=int, default=256,
449 |       help="Size of image patches for training.")
450 |   train_cmd.add_argument(
451 |       "--lambda", type=float, default=0.01, dest="lmbda",
452 |       help="Lambda for rate-distortion tradeoff.")
453 |   train_cmd.add_argument(
454 |       "--last_step", type=int, default=1000000,
455 |       help="Train up to this number of steps.")
456 |   train_cmd.add_argument(
457 |       "--preprocess_threads", type=int, default=16,
458 |       help="Number of CPU threads to use for parallel decoding of training "
459 |            "images.")
460 | 
461 |   # 'compress' subcommand.
462 |   compress_cmd = subparsers.add_parser(
463 |       "compress",
464 |       formatter_class=argparse.ArgumentDefaultsHelpFormatter,
465 |       description="Reads a PNG file, compresses it, and writes a TFCI file.")
466 | 
467 |   # 'decompress' subcommand.
468 |   decompress_cmd = subparsers.add_parser(
469 |       "decompress",
470 |       formatter_class=argparse.ArgumentDefaultsHelpFormatter,
471 |       description="Reads a TFCI file, reconstructs the image, and writes back "
472 |                   "a PNG file.")
473 | 
474 |   # Arguments for both 'compress' and 'decompress'.
475 |   for cmd, ext in ((compress_cmd, ".tfci"), (decompress_cmd, ".png")):
476 |     cmd.add_argument(
477 |         "input_file",
478 |         help="Input filename.")
479 |     cmd.add_argument(
480 |         "output_file", nargs="?",
481 |         help="Output filename (optional). If not provided, appends '{}' to "
482 |              "the input filename.".format(ext))
483 | 
484 |   # Parse arguments.
485 |   args = parser.parse_args(argv[1:])
486 |   if args.command is None:
487 |     parser.print_usage()
488 |     sys.exit(2)
489 |   return args
490 | 
491 | 
492 | def main(args):
493 |   # Invoke subcommand.
494 |   if args.command == "train":
495 |     train(args)
496 |   elif args.command == "compress":
497 |     if not args.output_file:
498 |       args.output_file = args.input_file + ".tfci"
499 |     compress(args)
500 |   elif args.command == "decompress":
501 |     if not args.output_file:
502 |       args.output_file = args.input_file + ".png"
503 |     decompress(args)
504 | 
505 | 
506 | if __name__ == "__main__":
507 |   app.run(main, flags_parser=parse_args)
508 | 


--------------------------------------------------------------------------------
/deep_image_compression/batch_psnr.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2019 Licheng Xiao. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ==============================================================================
16 | 
17 | import tensorflow as tf
18 | import numpy
19 | import math
20 | import cv2
21 | import os
22 | import logging
23 | from os import listdir
24 | from os.path import isfile, join
25 | from absl import flags
26 | 
27 | flags.DEFINE_string("original_path", default=None,
28 |                     help="Path for folder containing original image files.")
29 | flags.DEFINE_string("compressed_path", default=None,
30 |                     help="Path for folder containing compressed image files.")
31 | flags.DEFINE_string("reconstructed_path", default=None,
32 |                     help="Path for folder containing reconstructed image files.")
33 | FLAGS = flags.FLAGS
34 | 
35 | 
36 | class BatchEvaluator:
37 |     def get_batch_bpp_psnr_msssim(self,
38 |                                   original_path,
39 |                                   reconstructed_path,
40 |                                   compressed_path):
41 |         avg_psnr = 0
42 |         avg_bpp = 0
43 |         avg_msssim = 0
44 |         try:
45 |             original_files = [f for f in listdir(
46 |                 original_path) if isfile(join(original_path, f))]
47 |             compare_files = [f for f in listdir(reconstructed_path) if isfile(
48 |                 join(reconstructed_path, f))]
49 |             bin_files = [f for f in listdir(compressed_path) if isfile(
50 |                 join(compressed_path, f))]
51 |             sess = tf.Session()
52 |             for i in range(0, len(original_files)):
53 |                 original_img = original_files[i]
54 |                 compare_img = compare_files[i]
55 |                 bin_file = bin_files[i]
56 |                 original = cv2.imread(original_path + original_img)
57 |                 contrast = cv2.imread(reconstructed_path + compare_img)
58 |                 original = numpy.expand_dims(original, axis=0)
59 |                 contrast = numpy.expand_dims(contrast, axis=0)
60 |                 original_tensor = tf.convert_to_tensor(
61 |                     original, dtype=tf.uint8)
62 |                 contrast_tensor = tf.convert_to_tensor(
63 |                     contrast, dtype=tf.uint8)
64 |                 msssim_tensor = tf.image.ssim_multiscale(
65 |                     original_tensor, contrast_tensor, 255)
66 |                 psnr_tensor = tf.image.psnr(
67 |                     original_tensor, contrast_tensor, 255)
68 |                 msssim = sess.run(msssim_tensor)
69 |                 psnr = sess.run(psnr_tensor)
70 |                 first, h, w, bpp = numpy.shape(contrast)
71 |                 bpp = os.path.getsize(
72 |                     compressed_path + bin_file) * 8 / (h * w)
73 |                 avg_bpp += bpp
74 |                 avg_psnr += psnr
75 |                 avg_msssim += msssim
76 |             avg_bpp /= len(original_files)
77 |             avg_psnr /= len(original_files)
78 |             avg_msssim /= len(original_files)
79 |         except Exception as e:
80 |             logging.error(e)
81 |         if avg_psnr == 0:
82 |             logging.error('Error occurs, please check log for details.')
83 |         else:
84 |             logging.info('average psnr: ', avg_psnr, '\n',
85 |                          'average ms_ssim: ', avg_msssim, '\n',
86 |                          'average bpp: ', avg_bpp)
87 |             return avg_psnr, avg_msssim, avg_bpp
88 | 
89 | 
90 | def main(_):
91 |     batch_evaluator = BatchEvaluator()
92 |     batch_evaluator.get_batch_bpp_psnr_msssim(FLAGS.original_path,
93 |                                               FLAGS.reconstructed_path,
94 |                                               FLAGS.compressed_path)
95 | 
96 | 
97 | if __name__ == '__main__':
98 |     tf.app.run()
99 | 


--------------------------------------------------------------------------------
/deep_image_compression/bin/data_ingestion:
--------------------------------------------------------------------------------
 1 | # The training dataset described in paper "VARIATIONAL IMAGE COMPRESSION
 2 | # WITH A SCALE HYPERPRIOR" comprised approximately 1 million images scraped
 3 | # from the world wide web. But the paper didn't release the exact dataset.
 4 | 
 5 | # My previous research proved that the autoencoder model can converge on a very
 6 | # small dataset containing only 139 images, and still generalize well on test
 7 | # dataset with very different image content.
 8 | 
 9 | # To optimize model performance in compressing photos of natural scenes, which
10 | # is the also the optimization goal of traditional approaches like JPEG, I
11 | # selected a training dataset focusing on photos in the wild.
12 | 
13 | # The training and validation dataset is CLIC(Challenge on Learned Image
14 | # Compression) Dataset P("professional"), which is 1.9GB, and contains 2000
15 | # well-representative photos in the wild with resolution higher than 2K.
16 | 
17 | # The script will download CLIC dataset into deep-image-compression/data/raw
18 | 
19 | # Run this bash script at path deep-image-compression/ with the command:
20 | # '''bin/data_ingestion'''
21 | 
22 | # path to store training and validation image dataset
23 | DATA_PATH=path/to/store/images/dataset
24 | 
25 | wget https://data.vision.ee.ethz.ch/cvl/clic/professional_train.zip -O ${DATA_PATH}/professional_train.zip
26 | unzip ${DATA_PATH}/professional_train.zip -d ${DATA_PATH}/professional_train
27 | wget https://data.vision.ee.ethz.ch/cvl/clic/professional_valid.zip -O ${DATA_PATH}/professional_valid.zip
28 | unzip ${DATA_PATH}/professional_valid.zip -d ${DATA_PATH}/professional_valid
29 | wget https://data.vision.ee.ethz.ch/cvl/clic/mobile_train.zip -O ${DATA_PATH}/mobile_train.zip
30 | unzip ${DATA_PATH}/mobile_train.zip -d ${DATA_PATH}/mobile_train
31 | wget https://data.vision.ee.ethz.ch/cvl/clic/mobile_valid.zip -O ${DATA_PATH}/mobile_valid.zip
32 | unzip ${DATA_PATH}/mobile_valid.zip -d ${DATA_PATH}/mobile_valid
33 | 


--------------------------------------------------------------------------------
/deep_image_compression/bin/data_processing:
--------------------------------------------------------------------------------
 1 | # The training dataset described in paper "VARIATIONAL IMAGE COMPRESSION
 2 | # WITH A SCALE HYPERPRIOR" comprised approximately 1 million images scraped
 3 | # from the world wide web.
 4 | # The preprocessing of original approach contains two steps:
 5 |   # 1. Images with excessive saturation were screened out to reduce the number of
 6 |   # non-photographic images.
 7 |   # 2. To reduce existing compression artifacts, the
 8 |   # images were further downsampled by a randomized factor, such that the
 9 |   # minimum of their height and width equaled between 640 and 1200 pixels.
10 | 
11 | # In my approach, I use 2000 high quality photos in the wild from CLIC, which
12 | # eliminate the need for preprocessing as described in the original paper.
13 | 
14 | # When generating actual training dataset, the images are randomly sliced to
15 | # small patches with default size of 256 x 256. This part of code is included in
16 | # deep-image-compression/bin/deep_image_compression.sh
17 | 
18 | # Path for training images
19 | # IMAGE_PATH=path/to/validation/image/directory
20 | IMAGE_PATH=path/to/training/image/directory
21 | 
22 | # Rename bad image (illegal filename for bash processing) from training images.
23 | mv ${IMAGE_PATH}/ambitious-creative-co-rick-barrett-110145\ \(1\).png ${IMAGE_PATH}/ambitious-creative-co-rick-barrett-110145.png
24 | 
25 | # Convert training images to RGB colorspace
26 | for image in ${IMAGE_PATH}/*.png
27 | do
28 |   convert ${image} -colorspace RGB ${image}
29 |   echo 'converted ' ${image}
30 | done
31 | 


--------------------------------------------------------------------------------
/deep_image_compression/bin/model_analysis_batch_images:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # #### local path for JPEG
 4 | # ORIGINAL_IMAGE_FOLDER_PATH=../data/test/original/
 5 | # COMPRESSED_IMAGE_FOLDER_PATH=../data/test/compressed/JPEG/
 6 | # RECONSTRUCTED_IMAGE_FOLDER_PATH=../data/test/reconstructed/JPEG/
 7 | 
 8 | # #### local path for JPEG2000
 9 | # ORIGINAL_IMAGE_FOLDER_PATH=../data/test/original/
10 | # COMPRESSED_IMAGE_FOLDER_PATH=../data/test/compressed/JPEG2000/
11 | # RECONSTRUCTED_IMAGE_FOLDER_PATH=../data/test/reconstructed/JPEG2000/
12 | 
13 | # #### local path for HEIC (High Efficiency Image Container)
14 | # ORIGINAL_IMAGE_FOLDER_PATH=../data/test/original/
15 | # COMPRESSED_IMAGE_FOLDER_PATH=../data/test/compressed/HEIC/
16 | # RECONSTRUCTED_IMAGE_FOLDER_PATH=../data/test/reconstructed/HEIC/
17 | 
18 | #### local path for Balle's approach in 2018
19 | ORIGINAL_IMAGE_FOLDER_PATH=../data/test/original/
20 | COMPRESSED_IMAGE_FOLDER_PATH=../data/test/compressed/balle2018/
21 | RECONSTRUCTED_IMAGE_FOLDER_PATH=../data/test/reconstructed/balle2018/
22 | 
23 | # #### local path for my approach
24 | # ORIGINAL_IMAGE_FOLDER_PATH=../data/test/original/
25 | # COMPRESSED_IMAGE_FOLDER_PATH=../data/test/compressed/my_approach/
26 | # RECONSTRUCTED_IMAGE_FOLDER_PATH=../data/test/reconstructed/my_approach/
27 | 
28 | 
29 | #### Calculate bpp (bit per pixel), PSNR (Peak-Signal-to-Noise-Ratio) and
30 | #### MS-SSIM(Multi-Scale Structural Similarity) for single image compression
31 | 
32 | python3 batch_psnr.py \
33 |   --original_path ${ORIGINAL_IMAGE_FOLDER_PATH} \
34 |   --compressed_path ${COMPRESSED_IMAGE_FOLDER_PATH} \
35 |   --reconstructed_path ${RECONSTRUCTED_IMAGE_FOLDER_PATH}
36 | 


--------------------------------------------------------------------------------
/deep_image_compression/bin/model_analysis_single_image:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # #### local path for JPEG
 3 | # ORIGINAL_IMAGE_PATH=../data/test/original/kodim04.png
 4 | # COMPRESSED_IMAGE_PATH=../data/test/compressed/JPEG/kodim04.jpg
 5 | # RECONSTRUCTED_IMAGE_PATH=../data/test/reconstructed/JPEG/kodim04.jpg.png
 6 | 
 7 | # #### local path for JPEG2000
 8 | # ORIGINAL_IMAGE_PATH=../data/test/original/kodim04.png
 9 | # COMPRESSED_IMAGE_PATH=../data/test/compressed/JPEG2000/kodim04.jp2
10 | # RECONSTRUCTED_IMAGE_PATH=../data/test/reconstructed/JPEG2000/kodim04.jp2.png
11 | 
12 | # #### local path for HEIC (High Efficiency Image Container)
13 | # ORIGINAL_IMAGE_PATH=../data/test/original/kodim04.png
14 | # COMPRESSED_IMAGE_PATH=../data/test/compressed/HEIC/kodim04.heic
15 | # RECONSTRUCTED_IMAGE_PATH=../data/test/reconstructed/HEIC/kodim04.heic.png
16 | 
17 | #### local path for Balle's approach in 2018
18 | ORIGINAL_IMAGE_PATH=../data/test/original/kodim04.png
19 | COMPRESSED_IMAGE_PATH=../data/test/compressed/balle2018/kodim04.png.tfci
20 | RECONSTRUCTED_IMAGE_PATH=../data/test/reconstructed/balle2018/kodim04.png.tfci.png
21 | 
22 | # #### local path for my approach
23 | # ORIGINAL_IMAGE_PATH=../data/test/original/kodim04.png
24 | # COMPRESSED_IMAGE_PATH=../data/test/compressed/my_approach/kodim04.png.tfci
25 | # RECONSTRUCTED_IMAGE_PATH=../data/test/reconstructed/my_approach/kodim04.png.tfci.png
26 | 
27 | #### Calculate bpp (bit per pixel), PSNR (Peak-Signal-to-Noise-Ratio) and
28 | #### MS-SSIM(Multi-Scale Structural Similarity) for single image compression
29 | 
30 | python3 single_psnr.py \
31 |   --original_img ${ORIGINAL_IMAGE_PATH} \
32 |   --compressed_img ${COMPRESSED_IMAGE_PATH} \
33 |   --reconstructed_img ${RECONSTRUCTED_IMAGE_PATH} \
34 |   $@
35 | 


--------------------------------------------------------------------------------
/deep_image_compression/bin/model_inference_compress_balle2018:
--------------------------------------------------------------------------------
 1 | # Run this script at directory deep-image-compression/, with command:
 2 | # '''bin/model_inference_compress_balle2018'''
 3 | 
 4 | # Inference Balle2018 model and compress image
 5 | # Running time will be around 2X (~ 8 seconds per image on GTX 1070) compared
 6 | # with offcial evaluation script Using pretrained model, because building model
 7 | # from scratch and then restoring parameters from local checkpoint takes longer
 8 | # than restoring model from gzip file directly.
 9 | 
10 | TEST_DATA_PATH=/path/to/test/image.png
11 | MODEL_PATH="./model/balle2018"
12 | NUM_FILTERS=192
13 | 
14 | python3 balle2018.py \
15 | --num_filters ${NUM_FILTERS} \
16 | --checkpoint_dir ${MODEL_PATH} \
17 | compress \
18 | --input_file ${TEST_DATA_PATH}
19 | 


--------------------------------------------------------------------------------
/deep_image_compression/bin/model_inference_compress_my_approach:
--------------------------------------------------------------------------------
 1 | # Run this script at directory deep-image-compression/, with command:
 2 | # '''bin/model_inference_compress_my_approach'''
 3 | 
 4 | # Inference my approach model and compress image
 5 | # Running time will be around 9 seconds per image on GTX 1070, can be greatly
 6 | # shortened by pre-loading model checkpoint (actual inference time should be
 7 | # less than 1 second)
 8 | 
 9 | TEST_DATA_PATH=/path/to/test/image.png
10 | MODEL_PATH="./model/my_approach"
11 | NUM_FILTERS=192
12 | 
13 | python3 my_approach.py \
14 | --num_filters ${NUM_FILTERS} \
15 | --checkpoint_dir ${MODEL_PATH} \
16 | compress \
17 | --input_file ${TEST_DATA_PATH}
18 | 


--------------------------------------------------------------------------------
/deep_image_compression/bin/model_inference_decompress_balle2018:
--------------------------------------------------------------------------------
 1 | # Run this script at directory deep-image-compression/, with command:
 2 | # '''bin/model_inference_compress_balle2018'''
 3 | 
 4 | # Inference Balle2018 model and compress image
 5 | TEST_DATA_PATH=/path/to/test/image.png.tfci
 6 | MODEL_PATH="./model/balle2018"
 7 | NUM_FILTERS=192
 8 | 
 9 | python3 balle2018.py \
10 | --num_filters ${NUM_FILTERS} \
11 | --checkpoint_dir ${MODEL_PATH} \
12 | decompress \
13 | --input_file ${TEST_DATA_PATH}
14 | 


--------------------------------------------------------------------------------
/deep_image_compression/bin/model_inference_decompress_my_approach:
--------------------------------------------------------------------------------
 1 | # Run this script at directory deep-image-compression/, with command:
 2 | # '''bin/model_inference_compress_my_approach'''
 3 | 
 4 | # Inference my approach model and compress image
 5 | TEST_DATA_PATH=/path/to/test/image.png.tfci
 6 | MODEL_PATH="./model/my_approach"
 7 | NUM_FILTERS=192
 8 | 
 9 | python3 my_approach.py \
10 | --num_filters ${NUM_FILTERS} \
11 | --checkpoint_dir ${MODEL_PATH} \
12 | decompress \
13 | --input_file ${TEST_DATA_PATH}
14 | 


--------------------------------------------------------------------------------
/deep_image_compression/bin/model_training_balle2018:
--------------------------------------------------------------------------------
 1 | # Run this script at directory deep-image-compression/, with command:
 2 | # '''bin/model_training_balle2018'''
 3 | 
 4 | # Train Balle2018 model on professional_train dataset from CLIC
 5 | # LAMBDA need to be adjusted for different target compression ratio
 6 | 
 7 | TRAIN_DATA_PATH=../data/raw/professional_train
 8 | MODEL_PATH="./model/balle2018"
 9 | NUM_FILTERS=192
10 | LAMBDA=0.01
11 | PREPROCESS_THREADS=16
12 | TRAIN_BATCH_SIZE=8
13 | MAX_TRAIN_STEPS=1000000
14 | export CUDA_VISIBLE_DEVICES=0
15 | 
16 | python3 balle2018.py \
17 | --verbose \
18 | --num_filters ${NUM_FILTERS} \
19 | --checkpoint_dir ${MODEL_PATH} \
20 | train \
21 | --train_glob "${TRAIN_DATA_PATH}/*.png" \
22 | --lambda ${LAMBDA} \
23 | --preprocess_threads ${PREPROCESS_THREADS} \
24 | --batchsize ${TRAIN_BATCH_SIZE} \
25 | --last_step ${MAX_TRAIN_STEPS}
26 | 


--------------------------------------------------------------------------------
/deep_image_compression/bin/model_training_my_approach:
--------------------------------------------------------------------------------
 1 | # Run this script at directory deep-image-compression/, with command:
 2 | # '''bin/model_training_my_approach'''
 3 | 
 4 | # Train my approach model (improved version of Balle2018) on
 5 | # professional_train dataset from CLIC
 6 | 
 7 | # LAMBDA need to be adjusted for different target compression ratio
 8 | 
 9 | # TRAIN_BATCH_SIZE, MAIN_LEARNING_RATE, AUX_LEARNING_RATE can be adjusted to
10 | # optimize model performance
11 | 
12 | # Compared with Balle2018, the LAMBDA need to be scale to 2X to achieve
13 | # similar target bpp, since my approach has one more max pooling compared to
14 | # Balle2018
15 | 
16 | TRAIN_DATA_PATH=../data/raw/professional_train
17 | MODEL_PATH="./model/my_approach"
18 | NUM_FILTERS=192
19 | LAMBDA=0.02
20 | PREPROCESS_THREADS=16
21 | TRAIN_BATCH_SIZE=8
22 | MAX_TRAIN_STEPS=1000000
23 | MAIN_LEARNING_RATE=1e-4
24 | AUX_LEARNING_RATE=1e-3
25 | export CUDA_VISIBLE_DEVICES=0
26 | 
27 | python3 my_approach.py \
28 | --verbose \
29 | --num_filters ${NUM_FILTERS} \
30 | --checkpoint_dir ${MODEL_PATH} \
31 | train \
32 | --train_glob "${TRAIN_DATA_PATH}/*.png" \
33 | --lambda ${LAMBDA} \
34 | --preprocess_threads ${PREPROCESS_THREADS} \
35 | --batchsize ${TRAIN_BATCH_SIZE} \
36 | --last_step ${MAX_TRAIN_STEPS} \
37 | --main_learning_rate ${MAIN_LEARNING_RATE} \
38 | --aux_learning_rate ${AUX_LEARNING_RATE}
39 | 


--------------------------------------------------------------------------------
/deep_image_compression/bin/rename_reconstructed_images:
--------------------------------------------------------------------------------
 1 | # path for reconstructed images with suffix of ".png.tfci.png"
 2 | RECONSTRUCTED_IMAGE_PATH=/../data/test/reconstructed/balle2018
 3 | 
 4 | # replace suffix with ".png", so that the order of reconstructed images are the
 5 | # same with the original images.
 6 | for image in ${RECONSTRUCTED_IMAGE_PATH}/*
 7 | do
 8 |   mv ${image} ${image//png.tfci.png/png}
 9 | done
10 | 


--------------------------------------------------------------------------------
/deep_image_compression/my_approach.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright 2019 Licheng Xiao. All Rights Reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ==============================================================================
 16 | """
 17 | This model is improved version of the image compression model published in:
 18 | J. Ballé, D. Minnen, S. Singh, S.J. Hwang, N. Johnston:
 19 | "Variational Image Compression with a Scale Hyperprior"
 20 | Int. Conf. on Learning Representations (ICLR), 2018
 21 | https://arxiv.org/abs/1802.01436
 22 | 
 23 | The major modification is deeper network structure for encoder(analysis
 24 | tranform) and decoder(synthesis tranform).
 25 | """
 26 | 
 27 | from __future__ import absolute_import
 28 | from __future__ import division
 29 | from __future__ import print_function
 30 | 
 31 | import argparse
 32 | import glob
 33 | import sys
 34 | 
 35 | from absl import app
 36 | from absl.flags import argparse_flags
 37 | import numpy as np
 38 | import tensorflow.compat.v1 as tf
 39 | 
 40 | import tensorflow_compression as tfc
 41 | 
 42 | 
 43 | SCALES_MIN = 0.11
 44 | SCALES_MAX = 256
 45 | SCALES_LEVELS = 64
 46 | 
 47 | 
 48 | def read_png(filename):
 49 |   """Loads a PNG image file."""
 50 |   string = tf.read_file(filename)
 51 |   image = tf.image.decode_image(string, channels=3)
 52 |   image = tf.cast(image, tf.float32)
 53 |   image /= 255
 54 |   return image
 55 | 
 56 | 
 57 | def quantize_image(image):
 58 |   image = tf.round(image * 255)
 59 |   image = tf.saturate_cast(image, tf.uint8)
 60 |   return image
 61 | 
 62 | 
 63 | def write_png(filename, image):
 64 |   """Saves an image to a PNG file."""
 65 |   image = quantize_image(image)
 66 |   string = tf.image.encode_png(image)
 67 |   return tf.write_file(filename, string)
 68 | 
 69 | 
 70 | class AnalysisTransform(tf.keras.layers.Layer):
 71 |   """The analysis transform."""
 72 | 
 73 |   def __init__(self, num_filters, *args, **kwargs):
 74 |     self.num_filters = num_filters
 75 |     super(AnalysisTransform, self).__init__(*args, **kwargs)
 76 | 
 77 |   def build(self, input_shape):
 78 |     self._layers = [
 79 |         tfc.SignalConv2D(
 80 |             self.num_filters, (3, 3), name="layer_0", corr=True, strides_down=1,
 81 |             padding="same_zeros", use_bias=True,
 82 |             activation=tfc.GDN(name="gdn_0")),
 83 |         tfc.SignalConv2D(
 84 |             self.num_filters, (3, 3), name="layer_1", corr=True, strides_down=2,
 85 |             padding="same_zeros", use_bias=True,
 86 |             activation=tfc.GDN(name="gdn_1")),
 87 |         tfc.SignalConv2D(
 88 |             self.num_filters, (3, 3), name="layer_2", corr=True, strides_down=1,
 89 |             padding="same_zeros", use_bias=True,
 90 |             activation=tfc.GDN(name="gdn_2")),
 91 |         tfc.SignalConv2D(
 92 |             self.num_filters, (3, 3), name="layer_3", corr=True, strides_down=2,
 93 |             padding="same_zeros", use_bias=True,
 94 |             activation=tfc.GDN(name="gdn_3")),
 95 |         tfc.SignalConv2D(
 96 |             self.num_filters, (3, 3), name="layer_4", corr=True, strides_down=1,
 97 |             padding="same_zeros", use_bias=True,
 98 |             activation=tfc.GDN(name="gdn_4")),
 99 |         tfc.SignalConv2D(
100 |             self.num_filters, (3, 3), name="layer_5", corr=True, strides_down=2,
101 |             padding="same_zeros", use_bias=True,
102 |             activation=tfc.GDN(name="gdn_5")),
103 |         tfc.SignalConv2D(
104 |             self.num_filters, (3, 3), name="layer_6", corr=True, strides_down=1,
105 |             padding="same_zeros", use_bias=True,
106 |             activation=tfc.GDN(name="gdn_6")),
107 |         tfc.SignalConv2D(
108 |             self.num_filters, (3, 3), name="layer_7", corr=True, strides_down=2,
109 |             padding="same_zeros", use_bias=True,
110 |             activation=None),
111 |     ]
112 |     super(AnalysisTransform, self).build(input_shape)
113 | 
114 |   def call(self, tensor):
115 |     for layer in self._layers:
116 |       tensor = layer(tensor)
117 |     return tensor
118 | 
119 | 
120 | class SynthesisTransform(tf.keras.layers.Layer):
121 |   """The synthesis transform."""
122 | 
123 |   def __init__(self, num_filters, *args, **kwargs):
124 |     self.num_filters = num_filters
125 |     super(SynthesisTransform, self).__init__(*args, **kwargs)
126 | 
127 |   def build(self, input_shape):
128 |     self._layers = [
129 |         tfc.SignalConv2D(
130 |             self.num_filters, (3, 3), name="layer_0", corr=False, strides_up=1,
131 |             padding="same_zeros", use_bias=True,
132 |             activation=tfc.GDN(name="igdn_0", inverse=True)),
133 |         tfc.SignalConv2D(
134 |             self.num_filters, (3, 3), name="layer_1", corr=False, strides_up=2,
135 |             padding="same_zeros", use_bias=True,
136 |             activation=tfc.GDN(name="igdn_1", inverse=True)),
137 |         tfc.SignalConv2D(
138 |             self.num_filters, (3, 3), name="layer_2", corr=False, strides_up=1,
139 |             padding="same_zeros", use_bias=True,
140 |             activation=tfc.GDN(name="igdn_2", inverse=True)),
141 |         tfc.SignalConv2D(
142 |             self.num_filters, (3, 3), name="layer_3", corr=False, strides_up=2,
143 |             padding="same_zeros", use_bias=True,
144 |             activation=tfc.GDN(name="igdn_3", inverse=True)),
145 |         tfc.SignalConv2D(
146 |             self.num_filters, (3, 3), name="layer_4", corr=False, strides_up=1,
147 |             padding="same_zeros", use_bias=True,
148 |             activation=tfc.GDN(name="igdn_4", inverse=True)),
149 |         tfc.SignalConv2D(
150 |             self.num_filters, (3, 3), name="layer_5", corr=False, strides_up=2,
151 |             padding="same_zeros", use_bias=True,
152 |             activation=tfc.GDN(name="igdn_5", inverse=True)),
153 |         tfc.SignalConv2D(
154 |             self.num_filters, (3, 3), name="layer_6", corr=False, strides_up=1,
155 |             padding="same_zeros", use_bias=True,
156 |             activation=tfc.GDN(name="igdn_6", inverse=True)),
157 |         tfc.SignalConv2D(
158 |             3, (3, 3), name="layer_7", corr=False, strides_up=2,
159 |             padding="same_zeros", use_bias=True,
160 |             activation=None),
161 |     ]
162 |     super(SynthesisTransform, self).build(input_shape)
163 | 
164 |   def call(self, tensor):
165 |     for layer in self._layers:
166 |       tensor = layer(tensor)
167 |     return tensor
168 | 
169 | 
170 | class HyperAnalysisTransform(tf.keras.layers.Layer):
171 |   """The analysis transform for the entropy model parameters."""
172 | 
173 |   def __init__(self, num_filters, *args, **kwargs):
174 |     self.num_filters = num_filters
175 |     super(HyperAnalysisTransform, self).__init__(*args, **kwargs)
176 | 
177 |   def build(self, input_shape):
178 |     self._layers = [
179 |         tfc.SignalConv2D(
180 |             self.num_filters, (3, 3), name="layer_0", corr=True, strides_down=1,
181 |             padding="same_zeros", use_bias=True,
182 |             activation=tf.nn.relu),
183 |         tfc.SignalConv2D(
184 |             self.num_filters, (5, 5), name="layer_1", corr=True, strides_down=2,
185 |             padding="same_zeros", use_bias=True,
186 |             activation=tf.nn.relu),
187 |         tfc.SignalConv2D(
188 |             self.num_filters, (5, 5), name="layer_2", corr=True, strides_down=2,
189 |             padding="same_zeros", use_bias=False,
190 |             activation=None),
191 |     ]
192 |     super(HyperAnalysisTransform, self).build(input_shape)
193 | 
194 |   def call(self, tensor):
195 |     for layer in self._layers:
196 |       tensor = layer(tensor)
197 |     return tensor
198 | 
199 | 
200 | class HyperSynthesisTransform(tf.keras.layers.Layer):
201 |   """The synthesis transform for the entropy model parameters."""
202 | 
203 |   def __init__(self, num_filters, *args, **kwargs):
204 |     self.num_filters = num_filters
205 |     super(HyperSynthesisTransform, self).__init__(*args, **kwargs)
206 | 
207 |   def build(self, input_shape):
208 |     self._layers = [
209 |         tfc.SignalConv2D(
210 |             self.num_filters, (5, 5), name="layer_0", corr=False, strides_up=2,
211 |             padding="same_zeros", use_bias=True, kernel_parameterizer=None,
212 |             activation=tf.nn.relu),
213 |         tfc.SignalConv2D(
214 |             self.num_filters, (5, 5), name="layer_1", corr=False, strides_up=2,
215 |             padding="same_zeros", use_bias=True, kernel_parameterizer=None,
216 |             activation=tf.nn.relu),
217 |         tfc.SignalConv2D(
218 |             self.num_filters, (3, 3), name="layer_2", corr=False, strides_up=1,
219 |             padding="same_zeros", use_bias=True, kernel_parameterizer=None,
220 |             activation=None),
221 |     ]
222 |     super(HyperSynthesisTransform, self).build(input_shape)
223 | 
224 |   def call(self, tensor):
225 |     for layer in self._layers:
226 |       tensor = layer(tensor)
227 |     return tensor
228 | 
229 | 
230 | def train(args):
231 |   """Trains the model."""
232 | 
233 |   if args.verbose:
234 |     tf.logging.set_verbosity(tf.logging.INFO)
235 | 
236 |   # Create input data pipeline.
237 |   with tf.device("/cpu:0"):
238 |     train_files = glob.glob(args.train_glob)
239 |     if not train_files:
240 |       raise RuntimeError(
241 |           "No training images found with glob '{}'.".format(args.train_glob))
242 |     train_dataset = tf.data.Dataset.from_tensor_slices(train_files)
243 |     train_dataset = train_dataset.shuffle(buffer_size=len(train_files)).repeat()
244 |     train_dataset = train_dataset.map(
245 |         read_png, num_parallel_calls=args.preprocess_threads)
246 |     train_dataset = train_dataset.map(
247 |         lambda x: tf.random_crop(x, (args.patchsize, args.patchsize, 3)))
248 |     train_dataset = train_dataset.batch(args.batchsize)
249 |     train_dataset = train_dataset.prefetch(32)
250 | 
251 |   num_pixels = args.batchsize * args.patchsize ** 2
252 | 
253 |   # Get training patch from dataset.
254 |   x = train_dataset.make_one_shot_iterator().get_next()
255 | 
256 |   # Instantiate model.
257 |   analysis_transform = AnalysisTransform(args.num_filters)
258 |   synthesis_transform = SynthesisTransform(args.num_filters)
259 |   hyper_analysis_transform = HyperAnalysisTransform(args.num_filters)
260 |   hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters)
261 |   entropy_bottleneck = tfc.EntropyBottleneck()
262 | 
263 |   # Build autoencoder and hyperprior.
264 |   y = analysis_transform(x)
265 |   z = hyper_analysis_transform(abs(y))
266 |   z_tilde, z_likelihoods = entropy_bottleneck(z, training=True)
267 |   sigma = hyper_synthesis_transform(z_tilde)
268 |   scale_table = np.exp(np.linspace(
269 |       np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
270 |   conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table)
271 |   y_tilde, y_likelihoods = conditional_bottleneck(y, training=True)
272 |   x_tilde = synthesis_transform(y_tilde)
273 | 
274 |   # Total number of bits divided by number of pixels.
275 |   train_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) +
276 |                tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels)
277 | 
278 |   # Mean squared error across pixels.
279 |   train_mse = tf.reduce_mean(tf.squared_difference(x, x_tilde))
280 |   # Multiply by 255^2 to correct for rescaling.
281 |   train_mse *= 255 ** 2
282 | 
283 |   # The rate-distortion cost.
284 |   train_loss = args.lmbda * train_mse + train_bpp
285 | 
286 |   # Minimize loss and auxiliary loss, and execute update op.
287 |   step = tf.train.create_global_step()
288 |   main_optimizer = tf.train.AdamOptimizer(learning_rate=args.main_learning_rate)
289 |   main_step = main_optimizer.minimize(train_loss, global_step=step)
290 | 
291 |   aux_optimizer = tf.train.AdamOptimizer(learning_rate=args.aux_learning_rate)
292 |   aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0])
293 | 
294 |   train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0])
295 | 
296 |   tf.summary.scalar("loss", train_loss)
297 |   tf.summary.scalar("bpp", train_bpp)
298 |   tf.summary.scalar("mse", train_mse)
299 | 
300 |   tf.summary.image("original", quantize_image(x))
301 |   tf.summary.image("reconstruction", quantize_image(x_tilde))
302 | 
303 |   hooks = [
304 |       tf.train.StopAtStepHook(last_step=args.last_step),
305 |       tf.train.NanTensorHook(train_loss),
306 |   ]
307 |   with tf.train.MonitoredTrainingSession(
308 |       hooks=hooks, checkpoint_dir=args.checkpoint_dir,
309 |       save_checkpoint_secs=300, save_summaries_secs=60) as sess:
310 |     while not sess.should_stop():
311 |       sess.run(train_op)
312 | 
313 | 
314 | def compress(args):
315 |   """Compresses an image."""
316 | 
317 |   # Load input image and add batch dimension.
318 |   x = read_png(args.input_file)
319 |   x = tf.expand_dims(x, 0)
320 |   x.set_shape([1, None, None, 3])
321 |   x_shape = tf.shape(x)
322 | 
323 |   # Instantiate model.
324 |   analysis_transform = AnalysisTransform(args.num_filters)
325 |   synthesis_transform = SynthesisTransform(args.num_filters)
326 |   hyper_analysis_transform = HyperAnalysisTransform(args.num_filters)
327 |   hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters)
328 |   entropy_bottleneck = tfc.EntropyBottleneck()
329 | 
330 |   # Transform and compress the image.
331 |   y = analysis_transform(x)
332 |   y_shape = tf.shape(y)
333 |   z = hyper_analysis_transform(abs(y))
334 |   z_hat, z_likelihoods = entropy_bottleneck(z, training=False)
335 |   sigma = hyper_synthesis_transform(z_hat)
336 |   sigma = sigma[:, :y_shape[1], :y_shape[2], :]
337 |   scale_table = np.exp(np.linspace(
338 |       np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
339 |   conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table)
340 |   side_string = entropy_bottleneck.compress(z)
341 |   string = conditional_bottleneck.compress(y)
342 | 
343 |   # Transform the quantized image back (if requested).
344 |   y_hat, y_likelihoods = conditional_bottleneck(y, training=False)
345 |   x_hat = synthesis_transform(y_hat)
346 |   x_hat = x_hat[:, :x_shape[1], :x_shape[2], :]
347 | 
348 |   num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32)
349 | 
350 |   # Total number of bits divided by number of pixels.
351 |   eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) +
352 |               tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels)
353 | 
354 |   # Bring both images back to 0..255 range.
355 |   x *= 255
356 |   x_hat = tf.clip_by_value(x_hat, 0, 1)
357 |   x_hat = tf.round(x_hat * 255)
358 | 
359 |   mse = tf.reduce_mean(tf.squared_difference(x, x_hat))
360 |   psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255))
361 |   msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255))
362 | 
363 |   with tf.Session() as sess:
364 |     # Load the latest model checkpoint, get the compressed string and the tensor
365 |     # shapes.
366 |     latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir)
367 |     tf.train.Saver().restore(sess, save_path=latest)
368 |     tensors = [string, side_string,
369 |                tf.shape(x)[1:-1], tf.shape(y)[1:-1], tf.shape(z)[1:-1]]
370 |     arrays = sess.run(tensors)
371 | 
372 |     # Write a binary file with the shape information and the compressed string.
373 |     packed = tfc.PackedTensors()
374 |     packed.pack(tensors, arrays)
375 |     with open(args.output_file, "wb") as f:
376 |       f.write(packed.string)
377 | 
378 |     # If requested, transform the quantized image back and measure performance.
379 |     if args.verbose:
380 |       eval_bpp, mse, psnr, msssim, num_pixels = sess.run(
381 |           [eval_bpp, mse, psnr, msssim, num_pixels])
382 | 
383 |       # The actual bits per pixel including overhead.
384 |       bpp = len(packed.string) * 8 / num_pixels
385 | 
386 |       tf.logging.info("Mean squared error: {:0.4f}".format(mse))
387 |       tf.logging.info("PSNR (dB): {:0.2f}".format(psnr))
388 |       tf.logging.info("Multiscale SSIM: {:0.4f}".format(msssim))
389 |       tf.logging.info("Multiscale SSIM (dB): {:0.2f}".format(-10 * np.log10(1 - msssim)))
390 |       tf.logging.info("Information content in bpp: {:0.4f}".format(eval_bpp))
391 |       tf.logging.info("Actual bits per pixel: {:0.4f}".format(bpp))
392 | 
393 | 
394 | def decompress(args):
395 |   """Decompresses an image."""
396 | 
397 |   # Read the shape information and compressed string from the binary file.
398 |   string = tf.placeholder(tf.string, [1])
399 |   side_string = tf.placeholder(tf.string, [1])
400 |   x_shape = tf.placeholder(tf.int32, [2])
401 |   y_shape = tf.placeholder(tf.int32, [2])
402 |   z_shape = tf.placeholder(tf.int32, [2])
403 |   with open(args.input_file, "rb") as f:
404 |     packed = tfc.PackedTensors(f.read())
405 |   tensors = [string, side_string, x_shape, y_shape, z_shape]
406 |   arrays = packed.unpack(tensors)
407 | 
408 |   # Instantiate model.
409 |   synthesis_transform = SynthesisTransform(args.num_filters)
410 |   hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters)
411 |   entropy_bottleneck = tfc.EntropyBottleneck(dtype=tf.float32)
412 | 
413 |   # Decompress and transform the image back.
414 |   z_shape = tf.concat([z_shape, [args.num_filters]], axis=0)
415 |   z_hat = entropy_bottleneck.decompress(
416 |       side_string, z_shape, channels=args.num_filters)
417 |   sigma = hyper_synthesis_transform(z_hat)
418 |   sigma = sigma[:, :y_shape[0], :y_shape[1], :]
419 |   scale_table = np.exp(np.linspace(
420 |       np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
421 |   conditional_bottleneck = tfc.GaussianConditional(
422 |       sigma, scale_table, dtype=tf.float32)
423 |   y_hat = conditional_bottleneck.decompress(string)
424 |   x_hat = synthesis_transform(y_hat)
425 | 
426 |   # Remove batch dimension, and crop away any extraneous padding on the bottom
427 |   # or right boundaries.
428 |   x_hat = x_hat[0, :x_shape[0], :x_shape[1], :]
429 | 
430 |   # Write reconstructed image out as a PNG file.
431 |   op = write_png(args.output_file, x_hat)
432 | 
433 |   # Load the latest model checkpoint, and perform the above actions.
434 |   with tf.Session() as sess:
435 |     latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir)
436 |     tf.train.Saver().restore(sess, save_path=latest)
437 |     sess.run(op, feed_dict=dict(zip(tensors, arrays)))
438 | 
439 | 
440 | def parse_args(argv):
441 |   """Parses command line arguments."""
442 |   parser = argparse_flags.ArgumentParser(
443 |       formatter_class=argparse.ArgumentDefaultsHelpFormatter)
444 | 
445 |   # High-level options.
446 |   parser.add_argument(
447 |       "--verbose", "-V", action="store_true",
448 |       help="Report bitrate and distortion when training or compressing.")
449 |   parser.add_argument(
450 |       "--num_filters", type=int, default=192,
451 |       help="Number of filters per layer.")
452 |   parser.add_argument(
453 |       "--checkpoint_dir", default="train",
454 |       help="Directory where to save/load model checkpoints.")
455 |   subparsers = parser.add_subparsers(
456 |       title="commands", dest="command",
457 |       help="What to do: 'train' loads training data and trains (or continues "
458 |            "to train) a new model. 'compress' reads an image file (lossless "
459 |            "PNG format) and writes a compressed binary file. 'decompress' "
460 |            "reads a binary file and reconstructs the image (in PNG format). "
461 |            "input and output filenames need to be provided for the latter "
462 |            "two options. Invoke '<command> -h' for more information.")
463 | 
464 |   # 'train' subcommand.
465 |   train_cmd = subparsers.add_parser(
466 |       "train",
467 |       formatter_class=argparse.ArgumentDefaultsHelpFormatter,
468 |       description="Trains (or continues to train) a new model.")
469 |   train_cmd.add_argument(
470 |       "--train_glob", default="images/*.png",
471 |       help="Glob pattern identifying training data. This pattern must expand "
472 |            "to a list of RGB images in PNG format.")
473 |   train_cmd.add_argument(
474 |       "--batchsize", type=int, default=8,
475 |       help="Batch size for training.")
476 |   train_cmd.add_argument(
477 |       "--patchsize", type=int, default=256,
478 |       help="Size of image patches for training.")
479 |   train_cmd.add_argument(
480 |       "--lambda", type=float, default=0.01, dest="lmbda",
481 |       help="Lambda for rate-distortion tradeoff.")
482 |   train_cmd.add_argument(
483 |       "--last_step", type=int, default=1000000,
484 |       help="Train up to this number of steps.")
485 |   train_cmd.add_argument(
486 |       "--preprocess_threads", type=int, default=16,
487 |       help="Number of CPU threads to use for parallel decoding of training "
488 |            "images.")
489 |   train_cmd.add_argument(
490 |       "--main_learning_rate", type=float, default=1e-4,
491 |       help="Learning rate of main optimizer for autoencoder")
492 |   train_cmd.add_argument(
493 |       "--aux_learning_rate", type=float, default=1e-3,
494 |       help="Learning rate of auxiliary optimizer for entropy bottleneck")
495 | 
496 |   # 'compress' subcommand.
497 |   compress_cmd = subparsers.add_parser(
498 |       "compress",
499 |       formatter_class=argparse.ArgumentDefaultsHelpFormatter,
500 |       description="Reads a PNG file, compresses it, and writes a TFCI file.")
501 | 
502 |   # 'decompress' subcommand.
503 |   decompress_cmd = subparsers.add_parser(
504 |       "decompress",
505 |       formatter_class=argparse.ArgumentDefaultsHelpFormatter,
506 |       description="Reads a TFCI file, reconstructs the image, and writes back "
507 |                   "a PNG file.")
508 | 
509 |   # Arguments for both 'compress' and 'decompress'.
510 |   for cmd, ext in ((compress_cmd, ".tfci"), (decompress_cmd, ".png")):
511 |     cmd.add_argument(
512 |         "input_file",
513 |         help="Input filename.")
514 |     cmd.add_argument(
515 |         "output_file", nargs="?",
516 |         help="Output filename (optional). If not provided, appends '{}' to "
517 |              "the input filename.".format(ext))
518 | 
519 |   # Parse arguments.
520 |   args = parser.parse_args(argv[1:])
521 |   if args.command is None:
522 |     parser.print_usage()
523 |     sys.exit(2)
524 |   return args
525 | 
526 | 
527 | def main(args):
528 |   # Invoke subcommand.
529 |   if args.command == "train":
530 |     train(args)
531 |   elif args.command == "compress":
532 |     if not args.output_file:
533 |       args.output_file = args.input_file + ".tfci"
534 |     compress(args)
535 |   elif args.command == "decompress":
536 |     if not args.output_file:
537 |       args.output_file = args.input_file + ".png"
538 |     decompress(args)
539 | 
540 | 
541 | if __name__ == "__main__":
542 |   app.run(main, flags_parser=parse_args)
543 | 


--------------------------------------------------------------------------------
/deep_image_compression/single_psnr.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2019 Licheng Xiao. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ==============================================================================
16 | 
17 | import tensorflow as tf
18 | import numpy
19 | import math
20 | import cv2
21 | import os
22 | import logging
23 | from os import listdir
24 | from os.path import isfile, join
25 | from absl import flags
26 | 
27 | flags.DEFINE_string("original_img", default=None,
28 |                     help="Path for original image file.")
29 | flags.DEFINE_string("compressed_img", default=None,
30 |                     help="Path for compressed image file.")
31 | flags.DEFINE_string("reconstructed_img", default=None,
32 |                     help="Path for reconstructed image file.")
33 | FLAGS = flags.FLAGS
34 | 
35 | 
36 | class SingleEvaluator:
37 |     def get_psnr_msssim_bpp(self, original_img, reconstructed_img, compressed_img):
38 |         psnr = 0
39 |         msssim = 0
40 |         bpp = 0
41 |         try:
42 |             sess = tf.Session()
43 |             original = cv2.imread(original_img)
44 |             contrast = cv2.imread(reconstructed_img)
45 |             original = numpy.expand_dims(original, axis=0)
46 |             contrast = numpy.expand_dims(contrast, axis=0)
47 |             original_tensor = tf.convert_to_tensor(original, dtype=tf.uint8)
48 |             contrast_tensor = tf.convert_to_tensor(contrast, dtype=tf.uint8)
49 |             msssim_tensor = tf.image.ssim_multiscale(
50 |                 original_tensor, contrast_tensor, 255)
51 |             psnr_tensor = tf.image.psnr(original_tensor, contrast_tensor, 255)
52 |             msssim = sess.run(msssim_tensor)
53 |             psnr = sess.run(psnr_tensor)
54 |             first, h, w, bpp = numpy.shape(contrast)
55 |             bpp = os.path.getsize(compressed_img) * 8 / (h * w)
56 |         except Exception as e:
57 |             logging.error(e)
58 |         if psnr == 0:
59 |             logging.error('Error occurs, please check log for details.')
60 |         else:
61 |             logging.info('psnr: ', psnr, '\n',
62 |                          'ms_ssim: ', msssim, '\n',
63 |                          'bpp: ', bpp)
64 |             return psnr, msssim, bpp
65 | 
66 | 
67 | def main(_):
68 |     single_evaluator = SingleEvaluator()
69 |     single_evaluator.get_psnr_msssim_bpp(FLAGS.original_img,
70 |                                          FLAGS.reconstructed_img,
71 |                                          FLAGS.compressed_img)
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     tf.app.run()
76 | 


--------------------------------------------------------------------------------
/deep_image_compression/static/img/MSE_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LichengXiao2017/deep-image-compression/cf6e5699bad4d7b4a0dd8db6da72aa0c56e3d1e4/deep_image_compression/static/img/MSE_comparison.png


--------------------------------------------------------------------------------
/deep_image_compression/static/img/baseline_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LichengXiao2017/deep-image-compression/cf6e5699bad4d7b4a0dd8db6da72aa0c56e3d1e4/deep_image_compression/static/img/baseline_comparison.png


--------------------------------------------------------------------------------
/deep_image_compression/static/img/bpp_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LichengXiao2017/deep-image-compression/cf6e5699bad4d7b4a0dd8db6da72aa0c56e3d1e4/deep_image_compression/static/img/bpp_comparison.png


--------------------------------------------------------------------------------
/deep_image_compression/static/img/example_JPEG_HEIC.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LichengXiao2017/deep-image-compression/cf6e5699bad4d7b4a0dd8db6da72aa0c56e3d1e4/deep_image_compression/static/img/example_JPEG_HEIC.png


--------------------------------------------------------------------------------
/deep_image_compression/static/img/example_balle2018_my_approach.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LichengXiao2017/deep-image-compression/cf6e5699bad4d7b4a0dd8db6da72aa0c56e3d1e4/deep_image_compression/static/img/example_balle2018_my_approach.png


--------------------------------------------------------------------------------
/deep_image_compression/static/img/model_improvement.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LichengXiao2017/deep-image-compression/cf6e5699bad4d7b4a0dd8db6da72aa0c56e3d1e4/deep_image_compression/static/img/model_improvement.png


--------------------------------------------------------------------------------
/deep_image_compression/static/img/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LichengXiao2017/deep-image-compression/cf6e5699bad4d7b4a0dd8db6da72aa0c56e3d1e4/deep_image_compression/static/img/pipeline.png


--------------------------------------------------------------------------------
/deep_image_compression/static/img/result_table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LichengXiao2017/deep-image-compression/cf6e5699bad4d7b4a0dd8db6da72aa0c56e3d1e4/deep_image_compression/static/img/result_table.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # To install these Python dependencies, please type:
2 | # pip install -r requirements.txt
3 | 
4 | tensorflow-gpu==1.15.0-rc1
5 | absl-py==0.8.0
6 | opencv-python==4.1.1.26
7 | pytest==5.1.0
8 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | description-file = README.md
 3 | 
 4 | [aliases]
 5 | test = pytest
 6 | 
 7 | [build_sphinx]
 8 | source-dir = docs/
 9 | build-dir = docs/_build
10 | all_files = 1
11 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from distutils.core import setup
 3 | 
 4 | install_requires = [
 5 |     'tensorflow-gpu==1.14.0',
 6 |     'absl-py==0.8.0',
 7 |     'opencv-python==4.1.1.26',
 8 |     'argparse==1.4.0',
 9 |     'glob3==0.0.1',
10 |     'tensorflow_compression==1.2',
11 |     'numpy==1.16.4',
12 | ]
13 | 
14 | tests_require = [
15 |     'pytest>=2.8.0',
16 | ]
17 | 
18 | extras_require = {
19 |     'docs': [
20 |         'Sphinx<1.5.0,>=1.4.2',
21 |         'docutils<0.13,>=0.12',
22 |     ],
23 | }
24 | 
25 | setup_requires = ['pytest-runner>=2.6.2', ]
26 | 
27 | setup(
28 |     name='deep-image-compression',         # How you named your package folder (MyLib)
29 |     packages=['deep_image_compression'],   # Chose the same as "name"
30 |     version='0.2',      # Start with a small number and increase it with every change you make
31 |     # Chose a license from here: https://help.github.com/articles/licensing-a-repository
32 |     license='MIT',
33 |     # Give a short description about your library
34 |     description='A tool to build, train and analyze deep learning models for image compression',
35 |     author='Licheng Xiao',                   # Type in your name
36 |     author_email='david.xiao.2008@gmail.com',      # Type in your E-Mail
37 |     # Provide either the link to your github or to your website
38 |     url='https://github.com/LichengXiao2017/deep-image-compression',
39 |     # I explain this later on
40 |     download_url='https://github.com/LichengXiao2017/deep-image-compression/archive/v_02.tar.gz',
41 |     # Keywords that define your package best
42 |     keywords=['image', 'compression', 'deep learning'],
43 |     install_requires=install_requires,
44 |     classifiers=[
45 |         # Chose either "3 - Alpha", "4 - Beta" or "5 - Production/Stable" as the current state of your package
46 |         'Development Status :: 3 - Alpha',
47 |         # Define that your audience are developers
48 |         'Intended Audience :: Developers',
49 |         'Topic :: Software Development :: Build Tools',
50 |         'License :: OSI Approved :: MIT License',   # Again, pick a license
51 |         # Specify which pyhton versions that you want to support
52 |         'Programming Language :: Python :: 3',
53 |         'Programming Language :: Python :: 3.4',
54 |         'Programming Language :: Python :: 3.5',
55 |         'Programming Language :: Python :: 3.6',
56 |         'Programming Language :: Python :: 3.7',
57 |     ],
58 | )
59 | 


--------------------------------------------------------------------------------
/tests/test_balle2018.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LichengXiao2017/deep-image-compression/cf6e5699bad4d7b4a0dd8db6da72aa0c56e3d1e4/tests/test_balle2018.py


--------------------------------------------------------------------------------
/tests/test_batch_psnr.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LichengXiao2017/deep-image-compression/cf6e5699bad4d7b4a0dd8db6da72aa0c56e3d1e4/tests/test_batch_psnr.py


--------------------------------------------------------------------------------
/tests/test_deep_image_compression.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LichengXiao2017/deep-image-compression/cf6e5699bad4d7b4a0dd8db6da72aa0c56e3d1e4/tests/test_deep_image_compression.py


--------------------------------------------------------------------------------
/tests/test_single_psnr.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LichengXiao2017/deep-image-compression/cf6e5699bad4d7b4a0dd8db6da72aa0c56e3d1e4/tests/test_single_psnr.py


--------------------------------------------------------------------------------