63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Real-time image enhancement DL android App and web App
2 | An deep learning project focusing on deploying pretrained models on mobile device and cloud. It's implemented during 3 weeks Insight AI fellowship program.
3 |
4 | The pretrained models give credit to [Deep Bilateral Learning
5 | for Real-Time Image Enhancement](https://groups.csail.mit.edu/graphics/hdrnet/)
6 |
7 | ## Setup
8 |
9 | ### Dependencies
10 |
11 | To install the Python dependencies, run:
12 |
13 | pip install -r requirements.txt
14 |
15 | ## Usage
16 |
17 | To download the pretrained models, please refer to [Deep Bilateral Learning
18 | for Real-Time Image Enhancement](https://groups.csail.mit.edu/graphics/hdrnet/)
19 |
20 |
21 | To prepare a model for use on mobile, freeze the graph, and optimize the network:
22 |
23 | ./scripts/freeze_graph.py
24 | ./scripts/optimize_graph.py
25 |
26 | To test the prepares model for use on web app or mobile:
27 |
28 | ./scripts/test_pb_graph.py
29 |
30 |
31 | ## Serving the Hdrnet model on cloud
32 | ### [photoAI](http://photo-ai.surge.sh/)
33 |
34 |
35 |
36 |
37 | Now this web app 'photoAI' are serving 3 different pretrained models: face brightening, edge enhancing, hdr+
38 |
39 |
40 | ## Deloy Hdrnet model on android by Tensorflow mobile
41 |
42 | In order to deploy this model on android, I have to implement a custom tensorflow op (CUDA version) by opencl so that the op can run on mobile.
43 | Need to clean some code, to be updated.
44 |
45 |
46 |
47 |
48 |
49 | ## Inference performance comparison with and without XLA
50 |
51 | Some tests with XLA fused operation optimization. Images below shows tests with 1 batch (20) 1500*1000 pictures. Didn't see any improvements by using XLA. I think there are mainly two reasons: bilateralSliceApply is a very heavy computation custom op, and cannot be fused by XLA; XLA is still at early stage.
52 |
53 | Inference without XLA JIT
54 |
55 |
56 |
57 |
58 | Inference with XLA JIT
59 |
60 |
61 |
62 |
63 | ## Known issues and limitations
64 |
65 | * Tensorflow mobile doesn't support custom op, especially for ops implemented in CUDA. The hdrnet model used a custom op - BilateralSliceApply op - is GPU only.
66 |
67 | * The pre-trained HDR+ model trained on a specially formatted 16-bit linear input. When feeding general images will receive outputs with weird color.
--------------------------------------------------------------------------------
/photoai/js/index.js:
--------------------------------------------------------------------------------
1 | var dataURL;
2 | var server = "http://184.105.86.228:9999/infer";
3 |
4 | function readURL(input) {
5 | $('#res img:last-child').remove();
6 | if (input.files && input.files[0]) {
7 |
8 | var reader = new FileReader();
9 |
10 | reader.onload = function(e) {
11 | $('.image-upload-wrap').hide();
12 |
13 | $('.file-upload-image').attr('src', e.target.result);
14 | $('.file-upload-content').show();
15 |
16 | $('.image-title').html(input.files[0].name);
17 | dataURL = reader.result;
18 | $('#mode').show();
19 | };
20 |
21 | reader.readAsDataURL(input.files[0]);
22 |
23 | } else {
24 | removeUpload();
25 | }
26 | }
27 |
28 | function removeUpload() {
29 | $('.file-upload-input').replaceWith($('.file-upload-input').clone());
30 | $('.file-upload-content').hide();
31 | $('.image-upload-wrap').show();
32 | $('#mode').hide();
33 | $('#res img:last-child').remove();
34 | }
35 |
36 |
37 | $('.image-upload-wrap').bind('dragover', function () {
38 | $('.image-upload-wrap').addClass('image-dropping');
39 | });
40 | $('.image-upload-wrap').bind('dragleave', function () {
41 | $('.image-upload-wrap').removeClass('image-dropping');
42 | });
43 |
44 |
45 | function serverRequest(mode) {
46 | // var http = new XMLHttpRequest();
47 | // var url = server;
48 | // var params = {
49 | // mode: mode,
50 | // data: dataURL
51 | // };
52 | // http.open("POST", url, true);
53 | //
54 | // //Send the proper header information along with the request
55 | // //http.setRequestHeader("Content-type", "application/x-www-form-urlencoded");
56 | //
57 | // http.onreadystatechange = function(data) {//Call a function when the state changes.
58 | // if(http.readyState == 4 && http.status == 200) {
59 | // $("", {
60 | // "src": "data:image/jpeg;base64," + data['data'],
61 | // "class": "file-upload-image"
62 | // }).appendTo("#res");
63 | // }
64 | // }
65 | // http.send(params);
66 |
67 | $.ajax({
68 | type: 'POST',
69 | url: server,
70 | crossDomain: true,
71 | data: {
72 | mode: mode,
73 | data: dataURL
74 | },
75 | dataType: 'json',
76 | success: function(data) {
77 | $("", {
78 | "src": "data:image/jpeg;base64," + data['data'],
79 | "class": "file-upload-image"
80 | }).appendTo("#res")
81 | // $('#res').attr('src', "data:image/jpeg;base64," + data['data'])
82 | },
83 | error: function(jqXHR, textStatus, errorThrown){
84 | console.log('error')
85 | console.log(jqXHR)
86 | console.log(textStatus)
87 | console.log(errorThrown)
88 | }
89 | })
90 | }
--------------------------------------------------------------------------------
/photoai/css/style.css:
--------------------------------------------------------------------------------
1 | body {
2 | font-family: sans-serif;
3 | background-color: #eeeeee;
4 | }
5 |
6 | .file-upload {
7 | background-color: #ffffff;
8 | width: 650px;
9 | display: inline;
10 | float: left;
11 | margin: 10px;
12 | padding: 20px;
13 | }
14 |
15 | .file-upload-right {
16 | float: right;
17 | }
18 |
19 | .file-upload-btn {
20 | width: 100%;
21 | margin: 0;
22 | color: #fff;
23 | background: #1FB264;
24 | border: none;
25 | padding: 10px;
26 | border-radius: 4px;
27 | border-bottom: 4px solid #15824B;
28 | transition: all .2s ease;
29 | outline: none;
30 | text-transform: uppercase;
31 | font-weight: 700;
32 | }
33 |
34 | .mode-btn {
35 | width: 30%;
36 | margin-left: 8px;
37 | margin-right: 8px;
38 | }
39 |
40 | .p {
41 | margin-left: 12px;
42 | margin-top: 20px;
43 | margin-bottom: 10px;
44 | }
45 |
46 | .file-upload-btn:hover {
47 | background: #1AA059;
48 | color: #ffffff;
49 | transition: all .2s ease;
50 | cursor: pointer;
51 | }
52 |
53 | .file-upload-btn:active {
54 | border: 0;
55 | transition: all .2s ease;
56 | }
57 |
58 | .file-upload-content {
59 | display: none;
60 | text-align: center;
61 | }
62 |
63 | .file-upload-input {
64 | position: absolute;
65 | margin: 0;
66 | padding: 0;
67 | width: 100%;
68 | height: 100%;
69 | outline: none;
70 | opacity: 0;
71 | cursor: pointer;
72 | }
73 |
74 | .image-upload-wrap {
75 | margin-top: 20px;
76 | border: 4px dashed #1FB264;
77 | position: relative;
78 | }
79 |
80 | .image-dropping,
81 | .image-upload-wrap:hover {
82 | background-color: #1FB264;
83 | border: 4px dashed #ffffff;
84 | }
85 |
86 | .image-title-wrap {
87 | padding: 0 15px 15px 15px;
88 | color: #222;
89 | }
90 |
91 | .drag-text {
92 | text-align: center;
93 | }
94 |
95 | .drag-text h3 {
96 | font-weight: 100;
97 | text-transform: uppercase;
98 | color: #15824B;
99 | padding: 60px 0;
100 | }
101 |
102 | .file-upload-image {
103 | max-height: 600px;
104 | max-width: 600px;
105 | margin: auto;
106 | padding: 20px;
107 | }
108 |
109 | .remove-image {
110 | width: 200px;
111 | margin: 0;
112 | color: #fff;
113 | background: #cd4535;
114 | border: none;
115 | padding: 10px;
116 | border-radius: 4px;
117 | border-bottom: 4px solid #b02818;
118 | transition: all .2s ease;
119 | outline: none;
120 | text-transform: uppercase;
121 | font-weight: 700;
122 | }
123 |
124 | .remove-image:hover {
125 | background: #c13b2a;
126 | color: #ffffff;
127 | transition: all .2s ease;
128 | cursor: pointer;
129 | }
130 |
131 | .remove-image:active {
132 | border: 0;
133 | transition: all .2s ease;
134 | }
135 |
--------------------------------------------------------------------------------
/hdrnet/hdrnet_ops.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Python interface to custom Tensorflow operations for HDRnet."""
16 |
17 | import os
18 | import tensorflow as tf
19 | from tensorflow.python.framework import ops
20 |
21 | __all__ = ['bilateral_slice']
22 |
23 | path = os.path.dirname(os.path.abspath(__file__))
24 | path = tf.resource_loader.get_path_to_datafile(
25 | os.path.join(path, 'lib', 'hdrnet_ops.so'))
26 |
27 | _hdrnet = tf.load_op_library(path)
28 |
29 | # -- Register operations ------------------------------------------------------
30 | bilateral_slice = _hdrnet.bilateral_slice
31 | bilateral_slice_apply = _hdrnet.bilateral_slice_apply
32 |
33 | # ----------- Register gradients ----------------------------------------------
34 | @ops.RegisterGradient('BilateralSlice')
35 | def _bilateral_slice_grad(op, grad):
36 | grid_tensor = op.inputs[0]
37 | guide_tensor = op.inputs[1]
38 | return _hdrnet.bilateral_slice_grad(grid_tensor, guide_tensor, grad)
39 |
40 |
41 | @ops.RegisterGradient('BilateralSliceApply')
42 | def _bilateral_slice_grad(op, grad):
43 | grid_tensor = op.inputs[0]
44 | guide_tensor = op.inputs[1]
45 | input_tensor = op.inputs[2]
46 | has_offset = op.get_attr('has_offset')
47 | return _hdrnet.bilateral_slice_apply_grad(
48 | grid_tensor, guide_tensor, input_tensor, grad, has_offset=has_offset)
49 |
50 |
51 | # ----------- Register Shape inference ----------------------------------------
52 | @ops.RegisterShape('BilateralSlice')
53 | def _bilateral_slice_shape(op):
54 | input_tensor = op.inputs[0]
55 | guide_tensor = op.inputs[1]
56 | return [guide_tensor.get_shape().concatenate(input_tensor.get_shape()[-1])]
57 |
58 |
59 | @ops.RegisterShape('BilateralSliceApply')
60 | def _bilateral_slice_shape(op):
61 | grid_tensor = op.inputs[0]
62 | guide_tensor = op.inputs[1]
63 | input_tensor = op.inputs[2]
64 |
65 | has_offset = op.get_attr('has_offset')
66 | chan_in = input_tensor.get_shape()[-1]
67 | chan_grid = grid_tensor.get_shape()[-1]
68 |
69 | if has_offset:
70 | chan_out = chan_grid // (chan_in+1)
71 | else:
72 | chan_out = chan_grid // chan_in
73 | return [guide_tensor.get_shape().concatenate(chan_out)]
74 |
--------------------------------------------------------------------------------
/scripts/test_pb_graph.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 |
4 | # Copyright 2018 Fei Cheng
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | import tensorflow as tf
19 | import hdrnet.models as models
20 | import cv2
21 | import numpy as np
22 | import skimage
23 | import skimage.io
24 | import skimage.transform
25 | from PIL import Image
26 | import argparse
27 |
28 |
29 | def load_graph(pb_graph_file):
30 | # load the protobuf file from the disk and parse it to retrieve the
31 | # unserialized graph_def
32 | with tf.gfile.GFile(pb_graph_file, "rb") as f:
33 | graph_def = tf.GraphDef()
34 | graph_def.ParseFromString(f.read())
35 |
36 | # import the graph_def into a new Graph and returns it
37 | with tf.Graph().as_default() as graph:
38 | tf.import_graph_def(graph_def)
39 | return graph
40 |
41 | def main(args):
42 | input_path = args.input_image
43 | im_input = cv2.imread(input_path, -1) # -1 means read as is, no conversions.
44 | if im_input.shape[2] == 4:
45 | im_input = im_input[:, :, :3]
46 |
47 | im_input = np.flip(im_input, 2) # OpenCV reads BGR, convert back to RGB.
48 | im_input = skimage.img_as_float(im_input)
49 |
50 | lowres_input = skimage.transform.resize(im_input, [256, 256], order=0)
51 | im_input = im_input[np.newaxis, :, :, :]
52 | lowres_input = lowres_input[np.newaxis, :, :, :]
53 |
54 | graph = load_graph(args.pb_file)
55 |
56 | # nodes names need to be customized if graph changed
57 | fullres = graph.get_tensor_by_name('fullres_input:0')
58 | lowres = graph.get_tensor_by_name('lowres_input:0')
59 | out = graph.get_tensor_by_name('output_img:0')
60 |
61 | with tf.Session(graph=graph) as sess:
62 | feed_dict = {
63 | fullres: im_input,
64 | lowres: lowres_input
65 | }
66 | # run the inference
67 | y_out = sess.run(out, feed_dict=feed_dict)
68 |
69 | img = Image.fromarray(y_out, 'RGB')
70 | img.save(args.output_image)
71 |
72 |
73 | if __name__ == '__main__':
74 | parser = argparse.ArgumentParser()
75 | parser.add_argument('pb_file', default=None, help='path to the optimized graph')
76 | parser.add_argument('input_image', default=None, help='input image path')
77 | parser.add_argument('output_image', default=None, help='output image path')
78 |
79 | args = parser.parse_args()
80 | main(args)
--------------------------------------------------------------------------------
/scripts/optimize_graph.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | # Copyright 2018 Fei Cheng
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import argparse
18 | import tensorflow as tf
19 | import hdrnet.models as models
20 | from tensorflow.core.framework import graph_pb2
21 | from tensorflow.python.framework import errors
22 | from tensorflow.python.pywrap_tensorflow import TransformGraphWithStringInputs
23 | from tensorflow.python.util import compat
24 |
25 | def TransformGraph(input_graph_def, inputs, outputs, transforms):
26 | """Python wrapper for the Graph Transform Tool.
27 |
28 | Gives access to all graph transforms available through the command line tool.
29 | See documentation at https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/graph_transforms/README.md
30 | for full details of the options available.
31 |
32 | Args:
33 | input_graph_def: GraphDef object containing a model to be transformed.
34 | inputs: List of node names for the model inputs.
35 | outputs: List of node names for the model outputs.
36 | transforms: List of strings containing transform names and parameters.
37 |
38 | Returns:
39 | New GraphDef with transforms applied.
40 | """
41 |
42 | input_graph_def_string = input_graph_def.SerializeToString()
43 | inputs_string = compat.as_bytes(",".join(inputs))
44 | outputs_string = compat.as_bytes(",".join(outputs))
45 | transforms_string = compat.as_bytes(" ".join(transforms))
46 | with errors.raise_exception_on_not_ok_status() as status:
47 | output_graph_def_string = TransformGraphWithStringInputs(
48 | input_graph_def_string, inputs_string, outputs_string,
49 | transforms_string, status)
50 | output_graph_def = graph_pb2.GraphDef()
51 | output_graph_def.ParseFromString(output_graph_def_string)
52 | return output_graph_def
53 |
54 |
55 | def load_graph(frozen_graph_path):
56 | # load the protobuf file from the disk and parse it to retrieve the
57 | # unserialized graph_def
58 | with tf.gfile.GFile(frozen_graph_path, "rb") as f:
59 | graph_def = tf.GraphDef()
60 | graph_def.ParseFromString(f.read())
61 | return graph_def
62 |
63 |
64 | def write_trans_graph(output_graph, output_graph_def):
65 | with tf.gfile.GFile(output_graph, "wb") as f:
66 | f.write(output_graph_def.SerializeToString())
67 |
68 |
69 | def main(args):
70 | graph_def = load_graph(args.frozen_path)
71 | out = TransformGraph(graph_def, args.input_nodes, args.output_nodes,
72 | ['strip_unused_nodes', 'remove_nodes(op=Identity, op=CheckNumerics)', 'merge_duplicate_nodes',
73 | 'fold_constants(ignore_errors=true)', 'fold_batch_norms', 'sort_by_execution_order',
74 | 'strip_unused_nodes'])
75 | write_trans_graph(args.optimized_path, out)
76 |
77 |
78 | if __name__ == '__main__':
79 | parser = argparse.ArgumentParser()
80 | parser.add_argument('frozen_path', default=None, help='path to the saved frozen graph')
81 | parser.add_argument('optimized_path', default=None, help='path to output optimized graph')
82 | parser.add_argument('input_nodes', nargs='+', help='input nodes names of the graph')
83 | parser.add_argument('output_nodes', nargs='+', help='output nodes names of the graph')
84 |
85 | args = parser.parse_args()
86 | main(args)
--------------------------------------------------------------------------------
/web_serving/inference.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | # Copyright 2018 Fei Cheng
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import tensorflow as tf
18 | import hdrnet.models as models
19 | import numpy as np
20 | import skimage
21 | import skimage.io
22 | import skimage.transform
23 | import base64
24 | import cv2
25 | from PIL import Image
26 | import re
27 | import cStringIO
28 |
29 | class Hdrnet(object):
30 | def __init__(self, checkpoint, dir):
31 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
32 | self.checkpoint = checkpoint
33 | self.graph = self.load_graph(checkpoint)
34 | self.sess = tf.Session(graph=self.graph)
35 | self.count = 0
36 | self.dir = dir
37 |
38 | def load_graph(self, graph):
39 | # load the protobuf file from the disk and parse it to retrieve the
40 | # unserialized graph_def
41 | with tf.gfile.GFile(graph, "rb") as f:
42 | graph_def = tf.GraphDef()
43 | graph_def.ParseFromString(f.read())
44 |
45 | # import the graph_def into a new Graph and returns it
46 | with tf.Graph().as_default() as graph:
47 | tf.import_graph_def(graph_def)
48 | return graph
49 |
50 | def preprocess(self, url_data):
51 | img_dict = re.match("data:(?P.*?);(?P.*?),(?P.*)", url_data).groupdict()
52 | #file = img_dict['data'].decode(img_dict['encoding'], 'strict')
53 | data = base64.b64decode(img_dict['data'])
54 | with open('/tmp/' + self.dir + '/'+str(self.count)+'.jpeg', 'wb') as f:
55 | f.write(data)
56 | np_data = cv2.imread('/tmp/' + self.dir + '/'+str(self.count)+'.jpeg', -1)
57 | print(np_data.shape)
58 | self.count += 1
59 | return np_data
60 |
61 |
62 | def infer(self, data):
63 | """ Perform inferencing. In other words, generate a paraphrase
64 | for the source sentence.
65 |
66 | Args:
67 | file : input buffer from memory
68 |
69 | Returns:
70 | new_image: numpy array
71 | """
72 |
73 | im_input = self.preprocess(data)
74 | # im_input = cv2.imdecode(img, -1) # -1 means read as is, no conversions.
75 | if im_input.shape[2] == 4:
76 | im_input = im_input[:, :, :3]
77 |
78 | im_input = np.flip(im_input, 2) # OpenCV reads BGR, convert back to RGB.
79 |
80 | if im_input.dtype == np.uint16 and self.dir == 'hdr':
81 | # im_input = im_input / 32767.0
82 | # im_input = im_input / 32767.0 /2
83 | # im_input = im_input / (1.0*2**16)
84 | im_input = skimage.img_as_float(im_input)
85 | else:
86 | im_input = skimage.img_as_float(im_input)
87 |
88 | lowres_input = skimage.transform.resize(im_input, [256, 256], order=0)
89 | im_input = im_input[np.newaxis, :, :, :]
90 | lowres_input = lowres_input[np.newaxis, :, :, :]
91 |
92 |
93 | fullres = self.graph.get_tensor_by_name('import/fullres_input:0')
94 | lowres = self.graph.get_tensor_by_name('import/lowres_input:0')
95 | out = self.graph.get_tensor_by_name('import/output_img:0')
96 |
97 | feed_dict = {
98 | fullres: im_input,
99 | lowres: lowres_input
100 | }
101 |
102 | y_out = self.sess.run(out, feed_dict=feed_dict)
103 |
104 | img = Image.fromarray(y_out, 'RGB')
105 | buffer = cStringIO.StringIO()
106 | img.save(buffer, format='JPEG')
107 | return buffer.getvalue()
108 |
109 |
110 | def main():
111 | import argparse
112 | parser = argparse.ArgumentParser()
113 | parser.add_argument('checkpoint', type=str, help='optimized graph path')
114 | parser.add_argument('input_image', type=str, help='input image file')
115 | parser.add_argument('output_image', type=str, help='output image path')
116 | args = parser.parse_args()
117 | hdrnet = Hdrnet(args.checkpoint)
118 |
119 | with open(args.image_file, 'rb') as f:
120 | img = f.read()
121 | new_image = hdrnet.infer(img)
122 |
123 | return new_image
124 | # img = Image.fromarray(new_image, 'RGB')
125 | # img.save(args.output_image)
126 |
127 | if __name__ == '__main__':
128 | main()
129 |
--------------------------------------------------------------------------------
/hdrnet/layers.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Shortcuts for some graph operators."""
16 |
17 | import tensorflow as tf
18 | import numpy as np
19 |
20 | from hdrnet import hdrnet_ops
21 |
22 | w_initializer = tf.contrib.layers.variance_scaling_initializer
23 | b_initializer = tf.constant_initializer
24 |
25 | def conv(inputs, num_outputs, kernel_size, stride=1, rate=1,
26 | use_bias=True,
27 | batch_norm=False, is_training=False,
28 | activation_fn=tf.nn.relu,
29 | scope=None, reuse=False):
30 | if batch_norm:
31 | normalizer_fn = tf.contrib.layers.batch_norm
32 | b_init = None
33 | else:
34 | normalizer_fn = None
35 | if use_bias:
36 | b_init = b_initializer(0.0)
37 | else:
38 | b_init = None
39 |
40 | output = tf.contrib.layers.convolution2d(
41 | inputs=inputs,
42 | num_outputs=num_outputs, kernel_size=kernel_size,
43 | stride=stride, padding='SAME',
44 | rate=rate,
45 | weights_initializer=w_initializer(),
46 | weights_regularizer=tf.contrib.layers.l2_regularizer(1.0),
47 | biases_initializer=b_init,
48 | normalizer_fn=normalizer_fn,
49 | normalizer_params={
50 | 'center':True, 'is_training':is_training,
51 | 'variables_collections':{
52 | 'beta':[tf.GraphKeys.BIASES],
53 | 'moving_mean':[tf.GraphKeys.MOVING_AVERAGE_VARIABLES],
54 | 'moving_variance':[tf.GraphKeys.MOVING_AVERAGE_VARIABLES]},
55 | },
56 | activation_fn=activation_fn,
57 | variables_collections={'weights':[tf.GraphKeys.WEIGHTS], 'biases':[tf.GraphKeys.BIASES]},
58 | outputs_collections=[tf.GraphKeys.ACTIVATIONS],
59 | scope=scope, reuse=reuse)
60 | return output
61 |
62 |
63 | def fc(inputs, num_outputs,
64 | use_bias=True,
65 | batch_norm=False, is_training=False,
66 | activation_fn=tf.nn.relu,
67 | scope=None):
68 | if batch_norm:
69 | normalizer_fn = tf.contrib.layers.batch_norm
70 | b_init = None
71 | else:
72 | normalizer_fn = None
73 | if use_bias:
74 | b_init = b_initializer(0.0)
75 | else:
76 | b_init = None
77 |
78 | output = tf.contrib.layers.fully_connected(
79 | inputs=inputs,
80 | num_outputs=num_outputs,
81 | weights_initializer=w_initializer(),
82 | weights_regularizer=tf.contrib.layers.l2_regularizer(1.0),
83 | biases_initializer=b_init,
84 | normalizer_fn=normalizer_fn,
85 | normalizer_params={
86 | 'center':True, 'is_training':is_training,
87 | 'variables_collections':{
88 | 'beta':[tf.GraphKeys.BIASES],
89 | 'moving_mean':[tf.GraphKeys.MOVING_AVERAGE_VARIABLES],
90 | 'moving_variance':[tf.GraphKeys.MOVING_AVERAGE_VARIABLES]},
91 | },
92 | activation_fn=activation_fn,
93 | variables_collections={'weights':[tf.GraphKeys.WEIGHTS], 'biases':[tf.GraphKeys.BIASES]},
94 | scope=scope)
95 | return output
96 |
97 |
98 | # -----------------------------------------------------------------------------
99 |
100 | # pylint: disable=redefined-builtin
101 | def bilateral_slice(grid, guide, name=None):
102 | """Slices into a bilateral grid using the guide map.
103 |
104 | Args:
105 | grid: (Tensor) [batch_size, grid_h, grid_w, depth, n_outputs]
106 | grid to slice from.
107 | guide: (Tensor) [batch_size, h, w ] guide map to slice along.
108 | name: (string) name for the operation.
109 | Returns:
110 | sliced: (Tensor) [batch_size, h, w, n_outputs] sliced output.
111 | """
112 |
113 | with tf.name_scope(name):
114 | gridshape = grid.get_shape().as_list()
115 | if len(gridshape) == 6:
116 | _, _, _, _, n_out, n_in = gridshape
117 | grid = tf.concat(tf.unstack(grid, None, axis=5), 4)
118 |
119 | sliced = hdrnet_ops.bilateral_slice(grid, guide)
120 |
121 | if len(gridshape) == 6:
122 | sliced = tf.stack(tf.split(sliced, n_in, axis=3), axis=4)
123 | return sliced
124 | # pylint: enable=redefined-builtin
125 |
126 |
127 | def bilateral_slice_apply(grid, guide, input_image, has_offset=True, name=None):
128 | """Slices into a bilateral grid using the guide map.
129 |
130 | Args:
131 | grid: (Tensor) [batch_size, grid_h, grid_w, depth, n_outputs]
132 | grid to slice from.
133 | guide: (Tensor) [batch_size, h, w ] guide map to slice along.
134 | input_image: (Tensor) [batch_size, h, w, n_input] input data onto which to
135 | apply the affine transform.
136 | name: (string) name for the operation.
137 | Returns:
138 | sliced: (Tensor) [batch_size, h, w, n_outputs] sliced output.
139 | """
140 |
141 | with tf.name_scope(name):
142 | gridshape = grid.get_shape().as_list()
143 | if len(gridshape) == 6:
144 | gs = tf.shape(grid)
145 | _, _, _, _, n_out, n_in = gridshape
146 | grid = tf.reshape(grid, tf.stack([gs[0], gs[1], gs[2], gs[3], gs[4]*gs[5]]))
147 | # grid = tf.concat(tf.unstack(grid, None, axis=5), 4)
148 |
149 | sliced = hdrnet_ops.bilateral_slice_apply(grid, guide, input_image, has_offset=has_offset)
150 | return sliced
151 | # pylint: enable=redefined-builtin
152 |
153 |
154 | # pylint: disable=redefined-builtin
155 | def apply(sliced, input_image, has_affine_term=True, name=None):
156 | """Applies a sliced affined model to the input image.
157 |
158 | Args:
159 | sliced: (Tensor) [batch_size, h, w, n_output, n_input+1] affine coefficients
160 | input_image: (Tensor) [batch_size, h, w, n_input] input data onto which to
161 | apply the affine transform.
162 | name: (string) name for the operation.
163 | Returns:
164 | ret: (Tensor) [batch_size, h, w, n_output] the transformed data.
165 | Raises:
166 | ValueError: if the input is not properly dimensioned.
167 | ValueError: if the affine model parameter dimensions do not match the input.
168 | """
169 |
170 | with tf.name_scope(name):
171 | if len(input_image.get_shape().as_list()) != 4:
172 | raise ValueError('input image should have dims [b,h,w,n_in].')
173 | in_shape = input_image.get_shape().as_list()
174 | sliced_shape = sliced.get_shape().as_list()
175 | if (in_shape[:-1] != sliced_shape[:-2]):
176 | raise ValueError('input image and affine coefficients'
177 | ' dimensions do not match: {} and {}'.format(
178 | in_shape, sliced_shape))
179 | _, _, _, n_out, n_in = sliced.get_shape().as_list()
180 | if has_affine_term:
181 | n_in -= 1
182 |
183 | scale = sliced[:, :, :, :, :n_in]
184 |
185 | if has_affine_term:
186 | offset = sliced[:, :, :, :, n_in]
187 |
188 | out_channels = []
189 | for chan in range(n_out):
190 | ret = scale[:, :, :, chan, 0]*input_image[:, :, :, 0]
191 | for chan_i in range(1, n_in):
192 | ret += scale[:, :, :, chan, chan_i]*input_image[:, :, :, chan_i]
193 | if has_affine_term:
194 | ret += offset[:, :, :, chan]
195 | ret = tf.expand_dims(ret, 3)
196 | out_channels.append(ret)
197 |
198 | ret = tf.concat(out_channels, 3)
199 |
200 | return ret
201 | # pylint: enable=redefined-builtin
202 |
--------------------------------------------------------------------------------
/tensorflow_serving/hdr_saved.py:
--------------------------------------------------------------------------------
1 | import shutil
2 |
3 | import tensorflow as tf
4 |
5 | import hdrnet.models as models
6 | import hdrnet.utils as utils
7 | import os
8 | import numpy as np
9 |
10 |
11 | tf.app.flags.DEFINE_string('checkpoint_dir', '/tmp/checkpoint_dir/faces',
12 | """Directory where to read training checkpoints.""")
13 | tf.app.flags.DEFINE_string('output_dir', '/tmp/hdrnet_output',
14 | """Directory where to export inference model.""")
15 | tf.app.flags.DEFINE_integer('model_version', 1,
16 | """Version number of the model.""")
17 | tf.app.flags.DEFINE_integer('image_size', 256,
18 | """Needs to provide same value as in training.""")
19 |
20 | FLAGS = tf.app.flags.FLAGS
21 |
22 |
23 | def preprocess_image(image_buffer):
24 | '''
25 | Preprocess JPEG encoded bytes to 3D float Tensor and rescales
26 | it so that pixels are in a range of [-1, 1]
27 | :param image_buffer: Buffer that contains JPEG image
28 | :return: 4D image tensor (1, width, height,channels) with pixels scaled
29 | to [-1, 1]. First dimension is a batch size (1 is our case)
30 | '''
31 |
32 | # Decode the string as an RGB JPEG.
33 | # Note that the resulting image contains an unknown height and width
34 | # that is set dynamically by decode_jpeg. In other words, the height
35 | # and width of image is unknown at compile-time.
36 | image = tf.image.decode_jpeg(image_buffer, channels=3, dct_method='INTEGER_ACCURATE')
37 |
38 | # After this point, all image pixels reside in [0,1)
39 | # until the very end, when they're rescaled to (-1, 1). The various
40 | # adjust_* ops all require this range for dtype float.
41 | image = tf.image.convert_image_dtype(image, dtype=tf.float32)
42 |
43 | # Networks accept images in batches.
44 | # The first dimension usually represents the batch size.
45 | # In our case the batch size is one.
46 | #image = tf.expand_dims(image, 0)
47 |
48 | return image
49 |
50 |
51 | def preprocess_low_image(image_buffer):
52 | """Preprocess JPEG encoded bytes to 3D float Tensor."""
53 |
54 | # Decode the string as an RGB JPEG.
55 | # Note that the resulting image contains an unknown height and width
56 | # that is set dynamically by decode_jpeg. In other words, the height
57 | # and width of image is unknown at compile-time.
58 | image = tf.image.decode_jpeg(image_buffer, channels=3, dct_method='INTEGER_ACCURATE')
59 | # After this point, all image pixels reside in [0,1)
60 | # until the very end, when they're rescaled to (-1, 1). The various
61 | # adjust_* ops all require this range for dtype float.
62 | image = tf.image.convert_image_dtype(image, dtype=tf.float32)
63 | # Crop the central region of the image with an area containing 87.5% of
64 | # the original image.
65 | # image = tf.image.central_crop(image, central_fraction=0.875)
66 | # Resize the image to the original height and width.
67 | image = tf.expand_dims(image, 0)
68 | image = tf.image.resize_nearest_neighbor(image, [FLAGS.image_size, FLAGS.image_size], align_corners=False)
69 | image = tf.squeeze(image, [0])
70 | # Finally, rescale to [-1,1] instead of [0, 1)
71 | #image = tf.subtract(image, 0.5)
72 | #image = tf.multiply(image, 2.0)
73 | return image
74 |
75 | # def cv_preprocess_low_image(image_buffer):
76 | # record_defaults = [['']] * (256 * 256 * 3)
77 | # flat = tf.decode_csv(image_buffer, record_defaults=record_defaults)
78 | # flat = tf.string_to_number(flat, out_type=tf.float32)
79 | # return tf.expand_dims(tf.reshape(flat, [256, 256, 3]), 0)
80 | #
81 | #
82 | # def cv_preprocess_image(image_buffer):
83 | # #array = np.load(image_buffer)
84 | #
85 | # record_defaults = [['']] * 1920
86 | # flat = tf.stack(tf.decode_csv(image_buffer, record_defaults=record_defaults))
87 | # flat = tf.string_to_number(flat, out_type=tf.float32)
88 | #
89 | # #array = tf.convert_to_tensor(array, dtype=tf.float32)
90 | # return tf.expand_dims(tf.reshape(flat, [1920, 1080, 3]), 0)
91 |
92 |
93 | def main(_):
94 | with tf.Graph().as_default():
95 | # Inject placeholder into the graph
96 | serialized_tf_example = tf.placeholder(tf.string, name='input_image')
97 | serialized_low_example = tf.placeholder(tf.string, name='low_image')
98 | #serialized_shape = tf.placeholder(tf.string, name='shape_image')
99 | feature_configs = {
100 | 'image/encoded': tf.FixedLenFeature(
101 | shape=[], dtype=tf.string)
102 | }
103 | tf_example = tf.parse_example(serialized_tf_example, feature_configs)
104 | tf_low_example = tf.parse_example(serialized_low_example, feature_configs)
105 | #tf_low_shape = tf.parse_example(serialized_shape, feature_configs)
106 |
107 | jpegs = tf_example['image/encoded']
108 | low_jpegs = tf_low_example['image/encoded']
109 | #shape_jpegs = tf_low_shape['image/encoded']
110 |
111 | full_images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32)
112 | low_images = tf.map_fn(preprocess_low_image, low_jpegs, dtype=tf.float32)
113 | #full_images = tf.squeeze(full_images, [0])
114 | #low_images = tf.squeeze(low_images, [0])
115 |
116 | # now the image shape is (1, ?, ?, 3)
117 |
118 | # Create model
119 | checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
120 |
121 | metapath = ".".join([checkpoint_path, "meta"])
122 | tf.train.import_meta_graph(metapath)
123 | with tf.Session() as sess:
124 | model_params = utils.get_model_params(sess)
125 | mdl = getattr(models, model_params['model_name'])
126 |
127 | with tf.variable_scope('inference'):
128 | prediction = mdl.inference(low_images, full_images, model_params, is_training=False)
129 | output = tf.cast(255.0 * tf.squeeze(tf.clip_by_value(prediction, 0, 1)), tf.uint8)
130 | #output_img = tf.image.encode_png(tf.image.convert_image_dtype(output[0], dtype=tf.uint8))
131 |
132 |
133 | # Create saver to restore from checkpoints
134 | saver = tf.train.Saver()
135 |
136 | with tf.Session() as sess:
137 | sess.run(tf.global_variables_initializer())
138 | # Restore the model from last checkpoints
139 | ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
140 | saver.restore(sess, ckpt.model_checkpoint_path)
141 |
142 | # (re-)create export directory
143 | export_path = os.path.join(
144 | tf.compat.as_bytes(FLAGS.output_dir),
145 | tf.compat.as_bytes(str(FLAGS.model_version)))
146 | if os.path.exists(export_path):
147 | shutil.rmtree(export_path)
148 |
149 | # create model builder
150 | builder = tf.saved_model.builder.SavedModelBuilder(export_path)
151 |
152 | # create tensors info
153 | predict_tensor_inputs_info = tf.saved_model.utils.build_tensor_info(jpegs)
154 | predict_tensor_low_info = tf.saved_model.utils.build_tensor_info(low_jpegs)
155 | #predict_tensor_shape_info = tf.saved_model.utils.build_tensor_info(shape_jpegs)
156 | predict_tensor_scores_info = tf.saved_model.utils.build_tensor_info(output)
157 |
158 | # build prediction signature
159 | prediction_signature = (
160 | tf.saved_model.signature_def_utils.build_signature_def(
161 | inputs={'images': predict_tensor_inputs_info,
162 | 'low': predict_tensor_low_info},
163 | #'shape': predict_tensor_shape_info},
164 | outputs={'result': predict_tensor_scores_info},
165 | method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
166 | )
167 | )
168 |
169 | # save the model
170 | #legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')
171 | builder.add_meta_graph_and_variables(
172 | sess, [tf.saved_model.tag_constants.SERVING],
173 | signature_def_map={
174 | 'predict_images': prediction_signature
175 | })
176 | #legacy_init_op=legacy_init_op)
177 |
178 | builder.save()
179 |
180 | print("Successfully exported hdr model version '{}' into '{}'".format(
181 | FLAGS.model_version, FLAGS.output_dir))
182 |
183 | if __name__ == '__main__':
184 | tf.app.run()
--------------------------------------------------------------------------------
/hdrnet/models.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Defines computation graphs."""
16 |
17 | import tensorflow as tf
18 | import numpy as np
19 | import os
20 |
21 | from hdrnet.layers import (conv, fc, bilateral_slice_apply)
22 |
23 | __all__ = [
24 | 'HDRNetCurves',
25 | 'HDRNetPointwiseNNGuide',
26 | 'HDRNetGaussianPyrNN',
27 | ]
28 |
29 |
30 | class HDRNetCurves(object):
31 | """Main model, as submitted in January 2017.
32 | """
33 |
34 | @classmethod
35 | def n_out(cls):
36 | return 3
37 |
38 | @classmethod
39 | def n_in(cls):
40 | return 3+1
41 |
42 | @classmethod
43 | def inference(cls, lowres_input, fullres_input, params,
44 | is_training=False):
45 |
46 | with tf.variable_scope('coefficients'):
47 | bilateral_coeffs = cls._coefficients(lowres_input, params, is_training)
48 | tf.add_to_collection('bilateral_coefficients', bilateral_coeffs)
49 |
50 | with tf.variable_scope('guide'):
51 | guide = cls._guide(fullres_input, params, is_training)
52 | tf.add_to_collection('guide', guide)
53 |
54 | with tf.variable_scope('output'):
55 | output = cls._output(
56 | fullres_input, guide, bilateral_coeffs)
57 | tf.add_to_collection('output', output)
58 |
59 | return output
60 |
61 | @classmethod
62 | def _coefficients(cls, input_tensor, params, is_training):
63 | bs = input_tensor.get_shape().as_list()[0]
64 | gd = params['luma_bins']
65 | cm = params['channel_multiplier']
66 | spatial_bin = params['spatial_bin']
67 |
68 | # -----------------------------------------------------------------------
69 | with tf.variable_scope('splat'):
70 | n_ds_layers = int(np.log2(params['net_input_size']/spatial_bin))
71 |
72 | current_layer = input_tensor
73 | for i in range(n_ds_layers):
74 | if i > 0: # don't normalize first layer
75 | use_bn = params['batch_norm']
76 | else:
77 | use_bn = False
78 | current_layer = conv(current_layer, cm*(2**i)*gd, 3, stride=2,
79 | batch_norm=use_bn, is_training=is_training,
80 | scope='conv{}'.format(i+1))
81 |
82 | splat_features = current_layer
83 | # -----------------------------------------------------------------------
84 |
85 | # -----------------------------------------------------------------------
86 | with tf.variable_scope('global'):
87 | n_global_layers = int(np.log2(spatial_bin/4)) # 4x4 at the coarsest lvl
88 |
89 | current_layer = splat_features
90 | for i in range(2):
91 | current_layer = conv(current_layer, 8*cm*gd, 3, stride=2,
92 | batch_norm=params['batch_norm'], is_training=is_training,
93 | scope="conv{}".format(i+1))
94 | _, lh, lw, lc = current_layer.get_shape().as_list()
95 | current_layer = tf.reshape(current_layer, [bs, lh*lw*lc])
96 |
97 | current_layer = fc(current_layer, 32*cm*gd,
98 | batch_norm=params['batch_norm'], is_training=is_training,
99 | scope="fc1")
100 | current_layer = fc(current_layer, 16*cm*gd,
101 | batch_norm=params['batch_norm'], is_training=is_training,
102 | scope="fc2")
103 | # don't normalize before fusion
104 | current_layer = fc(current_layer, 8*cm*gd, activation_fn=None, scope="fc3")
105 | global_features = current_layer
106 | # -----------------------------------------------------------------------
107 |
108 | # -----------------------------------------------------------------------
109 | with tf.variable_scope('local'):
110 | current_layer = splat_features
111 | current_layer = conv(current_layer, 8*cm*gd, 3,
112 | batch_norm=params['batch_norm'],
113 | is_training=is_training,
114 | scope='conv1')
115 | # don't normalize before fusion
116 | current_layer = conv(current_layer, 8*cm*gd, 3, activation_fn=None,
117 | use_bias=False, scope='conv2')
118 | grid_features = current_layer
119 | # -----------------------------------------------------------------------
120 |
121 | # -----------------------------------------------------------------------
122 | with tf.name_scope('fusion'):
123 | fusion_grid = grid_features
124 | fusion_global = tf.reshape(global_features, [bs, 1, 1, 8*cm*gd])
125 | fusion = tf.nn.relu(fusion_grid+fusion_global)
126 | # -----------------------------------------------------------------------
127 |
128 | # -----------------------------------------------------------------------
129 | with tf.variable_scope('prediction'):
130 | current_layer = fusion
131 | current_layer = conv(current_layer, gd*cls.n_out()*cls.n_in(), 1,
132 | activation_fn=None, scope='conv1')
133 |
134 | with tf.name_scope('unroll_grid'):
135 | current_layer = tf.stack(
136 | tf.split(current_layer, cls.n_out()*cls.n_in(), axis=3), axis=4)
137 | current_layer = tf.stack(
138 | tf.split(current_layer, cls.n_in(), axis=4), axis=5)
139 | tf.add_to_collection('packed_coefficients', current_layer)
140 | # -----------------------------------------------------------------------
141 |
142 | return current_layer
143 |
144 | @classmethod
145 | def _guide(cls, input_tensor, params, is_training):
146 | npts = 16 # number of control points for the curve
147 | nchans = input_tensor.get_shape().as_list()[-1]
148 |
149 | guidemap = input_tensor
150 |
151 | # Color space change
152 | idtity = np.identity(nchans, dtype=np.float32) + np.random.randn(1).astype(np.float32)*1e-4
153 | ccm = tf.get_variable('ccm', dtype=tf.float32, initializer=idtity)
154 | with tf.name_scope('ccm'):
155 | ccm_bias = tf.get_variable('ccm_bias', shape=[nchans,], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
156 |
157 | guidemap = tf.matmul(tf.reshape(input_tensor, [-1, nchans]), ccm)
158 | guidemap = tf.nn.bias_add(guidemap, ccm_bias, name='ccm_bias_add')
159 |
160 | guidemap = tf.reshape(guidemap, tf.shape(input_tensor))
161 |
162 | # Per-channel curve
163 | with tf.name_scope('curve'):
164 | shifts_ = np.linspace(0, 1, npts, endpoint=False, dtype=np.float32)
165 | shifts_ = shifts_[np.newaxis, np.newaxis, np.newaxis, :]
166 | shifts_ = np.tile(shifts_, (1, 1, nchans, 1))
167 |
168 | guidemap = tf.expand_dims(guidemap, 4)
169 | shifts = tf.get_variable('shifts', dtype=tf.float32, initializer=shifts_)
170 |
171 | slopes_ = np.zeros([1, 1, 1, nchans, npts], dtype=np.float32)
172 | slopes_[:, :, :, :, 0] = 1.0
173 | slopes = tf.get_variable('slopes', dtype=tf.float32, initializer=slopes_)
174 |
175 | guidemap = tf.reduce_sum(slopes*tf.nn.relu(guidemap-shifts), reduction_indices=[4])
176 |
177 | guidemap = tf.contrib.layers.convolution2d(
178 | inputs=guidemap,
179 | num_outputs=1, kernel_size=1,
180 | weights_initializer=tf.constant_initializer(1.0/nchans),
181 | biases_initializer=tf.constant_initializer(0),
182 | activation_fn=None,
183 | variables_collections={'weights':[tf.GraphKeys.WEIGHTS], 'biases':[tf.GraphKeys.BIASES]},
184 | outputs_collections=[tf.GraphKeys.ACTIVATIONS],
185 | scope='channel_mixing')
186 |
187 | guidemap = tf.clip_by_value(guidemap, 0, 1)
188 | guidemap = tf.squeeze(guidemap, squeeze_dims=[3,])
189 |
190 | return guidemap
191 |
192 | @classmethod
193 | def _output(cls, im, guide, coeffs):
194 | with tf.device('/gpu:0'):
195 | out = bilateral_slice_apply(coeffs, guide, im, has_offset=True, name='slice')
196 | return out
197 |
198 |
199 | class HDRNetPointwiseNNGuide(HDRNetCurves):
200 | """Replaces the pointwise curves in the guide by a pointwise neural net.
201 | """
202 | @classmethod
203 | def _guide(cls, input_tensor, params, is_training):
204 | n_guide_feats = params['guide_complexity']
205 | guidemap = conv(input_tensor, n_guide_feats, 1,
206 | batch_norm=True, is_training=is_training,
207 | scope='conv1')
208 | guidemap = conv(guidemap, 1, 1, activation_fn=tf.nn.sigmoid, scope='conv2')
209 | guidemap = tf.squeeze(guidemap, squeeze_dims=[3,])
210 | return guidemap
211 |
212 |
213 | class HDRNetGaussianPyrNN(HDRNetPointwiseNNGuide):
214 | """Replace input to the affine model by a pyramid
215 | """
216 | @classmethod
217 | def n_scales(cls):
218 | return 3
219 |
220 | @classmethod
221 | def n_out(cls):
222 | return 3*cls.n_scales()
223 |
224 | @classmethod
225 | def n_in(cls):
226 | return 3+1
227 |
228 | @classmethod
229 | def inference(cls, lowres_input, fullres_input, params,
230 | is_training=False):
231 |
232 | with tf.variable_scope('coefficients'):
233 | bilateral_coeffs = cls._coefficients(lowres_input, params, is_training)
234 | tf.add_to_collection('bilateral_coefficients', bilateral_coeffs)
235 |
236 | with tf.variable_scope('multiscale'):
237 | multiscale = cls._multiscale_input(fullres_input)
238 | for m in multiscale:
239 | tf.add_to_collection('multiscale', m)
240 |
241 | with tf.variable_scope('guide'):
242 | guide = cls._guide(multiscale, params, is_training)
243 | for g in guide:
244 | tf.add_to_collection('guide', g)
245 |
246 | with tf.variable_scope('output'):
247 | output = cls._output(multiscale, guide, bilateral_coeffs)
248 | tf.add_to_collection('output', output)
249 |
250 | return output
251 |
252 | @classmethod
253 | def _multiscale_input(cls, fullres_input):
254 | full_sz = tf.shape(fullres_input)[1:3]
255 | sz = full_sz
256 |
257 | current_level = fullres_input
258 | lvls = [current_level]
259 | for lvl in range(cls.n_scales()-1):
260 | sz = sz / 2
261 | current_level = tf.image.resize_images(
262 | current_level, sz, tf.image.ResizeMethod.BILINEAR,
263 | align_corners=True)
264 | lvls.append(current_level)
265 | return lvls
266 |
267 | @classmethod
268 | def _guide(cls, multiscale, params, is_training):
269 | guide_lvls = []
270 | for il, lvl in enumerate(multiscale):
271 | with tf.variable_scope('level_{}'.format(il)):
272 | guide_lvl = HDRNetPointwiseNNGuide._guide(lvl, params, is_training)
273 | guide_lvls.append(guide_lvl)
274 | return guide_lvls
275 |
276 | @classmethod
277 | def _output(cls, lvls, guide_lvls, coeffs):
278 | for il, (lvl, guide_lvl) in enumerate(reversed(zip(lvls, guide_lvls))):
279 | c = coeffs[:, :, :, :, il*3:(il+1)*3, :]
280 | out_lvl = HDRNetPointwiseNNGuide._output(lvl, guide_lvl, c)
281 |
282 | if il == 0:
283 | current = out_lvl
284 | else:
285 | sz = tf.shape(out_lvl)[1:3]
286 | current = tf.image.resize_images(current, sz, tf.image.ResizeMethod.BILINEAR, align_corners=True)
287 | current = tf.add(current, out_lvl)
288 |
289 | return current
290 |
291 |
292 |
--------------------------------------------------------------------------------
/scripts/freeze_graph.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | # Copyright 2016 Google Inc.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # Modifications Copyright 2018 Fei Cheng
18 | #
19 | # Licensed under the Apache License, Version 2.0 (the "License");
20 | # you may not use this file except in compliance with the License.
21 | # You may obtain a copy of the License at
22 | #
23 | # http://www.apache.org/licenses/LICENSE-2.0
24 | #
25 | # Unless required by applicable law or agreed to in writing, software
26 | # distributed under the License is distributed on an "AS IS" BASIS,
27 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28 | # See the License for the specific language governing permissions and
29 | # limitations under the License.
30 |
31 | """Freeze graph weights; use to optimize runtime."""
32 |
33 | import argparse
34 | import logging
35 | import numpy as np
36 | import os
37 | import tensorflow as tf
38 | from tensorflow.python.tools import freeze_graph
39 | from tensorflow.core.framework import graph_pb2
40 |
41 | import hdrnet.utils as utils
42 | import hdrnet.models as models
43 |
44 | logging.basicConfig(format="[%(process)d] %(levelname)s %(filename)s:%(lineno)s | %(message)s")
45 | log = logging.getLogger("train")
46 | log.setLevel(logging.INFO)
47 |
48 |
49 | def save(data, filepath):
50 | log.info("Saving {}".format(filepath))
51 | with open(filepath, 'wb') as fid:
52 | fid.write(data.tobytes())
53 |
54 |
55 | def main(args):
56 | # Read model parameters
57 | checkpoint_path = tf.train.latest_checkpoint(args.checkpoint_dir)
58 | if checkpoint_path is None:
59 | log.error('Could not find a checkpoint in {}'.format(args.checkpoint_dir))
60 | return
61 | metapath = ".".join([checkpoint_path, "meta"])
62 | log.info("Loading {}".format(metapath))
63 | tf.train.import_meta_graph(metapath)
64 | with tf.Session() as sess:
65 | model_params = utils.get_model_params(sess)
66 |
67 | if not hasattr(models, model_params['model_name']):
68 | log.error("Model {} does not exist".format(model_params['model_name']))
69 | return
70 | mdl = getattr(models, model_params['model_name'])
71 |
72 | # Instantiate new evaluation graph
73 | tf.reset_default_graph()
74 | sz = model_params['net_input_size']
75 |
76 | log.info("Model {}".format(model_params['model_name']))
77 |
78 | #
79 | # identify the input and output tensors to export
80 | # the part of graph you'd like to freeze
81 | #
82 | fullres_input = tf.placeholder(tf.float32, (1, None, None, 3), name='fullres_input')
83 | input_tensor = tf.placeholder(tf.float32, (1, sz, sz, 3), name='lowres_input')
84 | with tf.variable_scope('inference'):
85 | prediction = mdl.inference(input_tensor, fullres_input, model_params, is_training=False)
86 | if model_params["model_name"] == "HDRNetGaussianPyrNN":
87 |
88 | # export seperate graphs for deploying models on android
89 | output_tensor = tf.get_collection('guide')[0]
90 | output_tensor = tf.reshape(output_tensor, [-1], name='guide')
91 | # output_tensor = tf.get_collection('packed_coefficients')[0]
92 | # gs = output_tensor.get_shape().as_list()
93 | # output_tensor = tf.reshape(tf.reshape(output_tensor, tf.stack([gs[0], gs[1], gs[2], gs[3], gs[4] * gs[5]])),
94 | # [-1], name="bilateral_coefficients")
95 | # output_tensor = tf.transpose(tf.squeeze(output_tensor), [3, 2, 0, 1, 4], name="bilateral_coefficients")
96 |
97 | # export the whole graph when deploying on cloud
98 | # output_tensor = tf.cast(255.0*tf.squeeze(tf.clip_by_value(output_tensor, 0, 1)), tf.uint8, name='output_img')
99 | log.info("Output shape".format(output_tensor.get_shape()))
100 | else:
101 | # export seperate graphs for deploying models on android
102 | output_tensor = tf.get_collection('guide')[0]
103 | output_tensor = tf.reshape(output_tensor, [-1], name='guide')
104 | # output_tensor = tf.get_collection('packed_coefficients')[0]
105 | # gs = output_tensor.get_shape().as_list()
106 | # output_tensor = tf.reshape(tf.reshape(output_tensor, tf.stack([gs[0], gs[1], gs[2], gs[3], gs[4]*gs[5]])),
107 | # [-1], name="bilateral_coefficients")
108 | # output_tensor = tf.transpose(tf.squeeze(output_tensor), [3, 2, 0, 1, 4], name="bilateral_coefficients")
109 |
110 | # export the whole graph when deploying on cloud
111 | # output_tensor = tf.cast(255.0*tf.squeeze(tf.clip_by_value(output_tensor, 0, 1)), tf.uint8, name='output_img')
112 | log.info("Output shape {}".format(output_tensor.get_shape()))
113 | saver = tf.train.Saver()
114 |
115 | gdef = tf.get_default_graph().as_graph_def()
116 |
117 | log.info("Restoring weights from {}".format(checkpoint_path))
118 | test_graph_name = "test_graph.pbtxt"
119 | with tf.Session() as sess:
120 | saver.restore(sess, checkpoint_path)
121 | tf.train.write_graph(sess.graph, args.checkpoint_dir, test_graph_name)
122 |
123 | input_graph_path = os.path.join(args.checkpoint_dir, test_graph_name)
124 | output_graph_path = os.path.join(args.checkpoint_dir, "frozen_graph.pb")
125 | input_saver_def_path = ""
126 | input_binary = False
127 | output_binary = True
128 | input_node_names = input_tensor.name.split(":")[0]
129 | output_node_names = output_tensor.name.split(":")[0]
130 | restore_op_name = "save/restore_all"
131 | filename_tensor_name = "save/Const:0"
132 | clear_devices = False
133 |
134 | log.info("Freezing to {}".format(output_graph_path))
135 | freeze_graph.freeze_graph(input_graph_path, input_saver_def_path,
136 | input_binary, checkpoint_path, output_node_names,
137 | restore_op_name, filename_tensor_name,
138 | output_graph_path, clear_devices, "")
139 | log.info('input tensor: {} {}'.format(input_tensor.name, input_tensor.shape))
140 | log.info('output tensor: {} {}'.format(output_tensor.name, output_tensor.shape))
141 |
142 | # Dump guide parameters
143 | if model_params['model_name'] == 'HDRNetCurves':
144 | g = tf.get_default_graph()
145 | ccm = g.get_tensor_by_name('inference/guide/ccm:0')
146 | ccm_bias = g.get_tensor_by_name('inference/guide/ccm_bias:0')
147 | shifts = g.get_tensor_by_name('inference/guide/shifts:0')
148 | slopes = g.get_tensor_by_name('inference/guide/slopes:0')
149 | mixing_weights = g.get_tensor_by_name('inference/guide/channel_mixing/weights:0')
150 | mixing_bias = g.get_tensor_by_name('inference/guide/channel_mixing/biases:0')
151 |
152 | ccm_, ccm_bias_, shifts_, slopes_, mixing_weights_, mixing_bias_ = sess.run(
153 | [ccm, ccm_bias, shifts, slopes, mixing_weights, mixing_bias])
154 | shifts_ = np.squeeze(shifts_).astype(np.float32)
155 | slopes_ = np.squeeze(slopes_).astype(np.float32)
156 | mix_matrix_dump = np.append(np.squeeze(mixing_weights_), mixing_bias_[0]).astype(np.float32)
157 | ccm34_ = np.vstack((ccm_, ccm_bias_[np.newaxis, :]))
158 |
159 | save(ccm34_.T, os.path.join(args.checkpoint_dir, 'guide_ccm_f32_3x4.bin'))
160 | save(shifts_.T, os.path.join(args.checkpoint_dir, 'guide_shifts_f32_16x3.bin'))
161 | save(slopes_.T, os.path.join(args.checkpoint_dir, 'guide_slopes_f32_16x3.bin'))
162 | save(mix_matrix_dump, os.path.join(args.checkpoint_dir, 'guide_mix_matrix_f32_1x4.bin'))
163 |
164 | elif model_params['model_name'] == "HDRNetGaussianPyrNN":
165 | g = tf.get_default_graph()
166 | for lvl in range(3):
167 | conv1_w = g.get_tensor_by_name('inference/guide/level_{}/conv1/weights:0'.format(lvl))
168 | conv1_b = g.get_tensor_by_name('inference/guide/level_{}/conv1/BatchNorm/beta:0'.format(lvl))
169 | conv1_mu = g.get_tensor_by_name('inference/guide/level_{}/conv1/BatchNorm/moving_mean:0'.format(lvl))
170 | conv1_sigma = g.get_tensor_by_name(
171 | 'inference/guide/level_{}/conv1/BatchNorm/moving_variance:0'.format(lvl))
172 | conv1_eps = g.get_tensor_by_name(
173 | 'inference/guide/level_{}/conv1/BatchNorm/batchnorm/add/y:0'.format(lvl))
174 | conv2_w = g.get_tensor_by_name('inference/guide/level_{}/conv2/weights:0'.format(lvl))
175 | conv2_b = g.get_tensor_by_name('inference/guide/level_{}/conv2/biases:0'.format(lvl))
176 |
177 | conv1w_, conv1b_, conv1mu_, conv1sigma_, conv1eps_, conv2w_, conv2b_ = sess.run(
178 | [conv1_w, conv1_b, conv1_mu, conv1_sigma, conv1_eps, conv2_w, conv2_b])
179 |
180 | conv1b_ -= conv1mu_ / np.sqrt((conv1sigma_ + conv1eps_))
181 | conv1w_ = conv1w_ / np.sqrt((conv1sigma_ + conv1eps_))
182 |
183 | conv1w_ = np.squeeze(conv1w_.astype(np.float32))
184 | conv1b_ = np.squeeze(conv1b_.astype(np.float32))
185 | conv1b_ = conv1b_[np.newaxis, :]
186 |
187 | conv2w_ = np.squeeze(conv2w_.astype(np.float32))
188 | conv2b_ = np.squeeze(conv2b_.astype(np.float32))
189 |
190 | conv2 = np.append(conv2w_, conv2b_)
191 | conv1 = np.vstack([conv1w_, conv1b_])
192 |
193 | save(conv1.T, os.path.join(args.checkpoint_dir, 'guide_level{}_conv1.bin'.format(lvl)))
194 | save(conv2, os.path.join(args.checkpoint_dir, 'guide_level{}_conv2.bin'.format(lvl)))
195 |
196 | elif model_params['model_name'] in "HDRNetPointwiseNNGuide":
197 | g = tf.get_default_graph()
198 | conv1_w = g.get_tensor_by_name('inference/guide/conv1/weights:0')
199 | conv1_b = g.get_tensor_by_name('inference/guide/conv1/BatchNorm/beta:0')
200 | conv1_mu = g.get_tensor_by_name('inference/guide/conv1/BatchNorm/moving_mean:0')
201 | conv1_sigma = g.get_tensor_by_name('inference/guide/conv1/BatchNorm/moving_variance:0')
202 | conv1_eps = g.get_tensor_by_name('inference/guide/conv1/BatchNorm/batchnorm/add/y:0')
203 | conv2_w = g.get_tensor_by_name('inference/guide/conv2/weights:0')
204 | conv2_b = g.get_tensor_by_name('inference/guide/conv2/biases:0')
205 |
206 | conv1w_, conv1b_, conv1mu_, conv1sigma_, conv1eps_, conv2w_, conv2b_ = sess.run(
207 | [conv1_w, conv1_b, conv1_mu, conv1_sigma, conv1_eps, conv2_w, conv2_b])
208 |
209 | conv1b_ -= conv1mu_ / np.sqrt((conv1sigma_ + conv1eps_))
210 | conv1w_ = conv1w_ / np.sqrt((conv1sigma_ + conv1eps_))
211 |
212 | conv1w_ = np.squeeze(conv1w_.astype(np.float32))
213 | conv1b_ = np.squeeze(conv1b_.astype(np.float32))
214 | conv1b_ = conv1b_[np.newaxis, :]
215 |
216 | conv2w_ = np.squeeze(conv2w_.astype(np.float32))
217 | conv2b_ = np.squeeze(conv2b_.astype(np.float32))
218 |
219 | conv2 = np.append(conv2w_, conv2b_)
220 | conv1 = np.vstack([conv1w_, conv1b_])
221 |
222 | save(conv1.T, os.path.join(args.checkpoint_dir, 'guide_conv1.bin'))
223 | save(conv2, os.path.join(args.checkpoint_dir, 'guide_conv2.bin'))
224 |
225 |
226 | if __name__ == '__main__':
227 | parser = argparse.ArgumentParser()
228 | parser.add_argument('checkpoint_dir', default=None, help='')
229 |
230 | args = parser.parse_args()
231 | main(args)
232 |
--------------------------------------------------------------------------------
/hdrnet/ops/bilateral_slice.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2016 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "tensorflow/core/framework/op.h"
16 | #include "tensorflow/core/framework/op_kernel.h"
17 |
18 | using namespace tensorflow;
19 |
20 | typedef Eigen::ThreadPoolDevice CPUDevice;
21 | typedef Eigen::GpuDevice GPUDevice;
22 |
23 | // -- OPS REGISTRAION ---------------------------------------------------------
24 | REGISTER_OP("BilateralSlice")
25 | .Input("in: float")
26 | .Input("guide: float")
27 | .Output("out: float")
28 | .Doc(R"doc(
29 | Slices input in in the location defined by guide, to produce output.
30 | )doc");
31 |
32 | REGISTER_OP("BilateralSliceGrad")
33 | .Input("in: float")
34 | .Input("guide: float")
35 | .Input("backprop: float")
36 | .Output("grid_grad: float")
37 | .Output("guide_grad: float");
38 |
39 | REGISTER_OP("BilateralSliceApply")
40 | .Input("grid: float")
41 | .Input("guide: float")
42 | .Input("input: float")
43 | .Attr("has_offset: bool")
44 | .Output("out: float")
45 | .Doc(R"doc(
46 | Slices input in in the location defined by guide and apply it, to produce output.
47 | )doc");
48 |
49 | REGISTER_OP("BilateralSliceApplyGrad")
50 | .Input("grid: float")
51 | .Input("guide: float")
52 | .Input("input: float")
53 | .Input("backprop: float")
54 | .Attr("has_offset: bool")
55 | .Output("grid_grad: float")
56 | .Output("guide_grad: float")
57 | .Output("input_grad: float");
58 | // ----------------------------------------------------------------------------
59 |
60 | // -- KERNEL LAUNCHERS --------------------------------------------------------
61 | bool BilateralSliceKernelLauncher(
62 | const GPUDevice& d,
63 | int bs, int gh, int gw, int gd, int chans,
64 | int h, int w,
65 | const float* const grid, const float* const guide, float* const out);
66 |
67 | bool BilateralSliceGradKernelLauncher(
68 | const GPUDevice& d,
69 | const float* const grid, const int64* grid_size,
70 | const float* const guide, const int64* guide_size,
71 | const float* const backprop,
72 | float* const grid_grad, float* const guide_grad);
73 |
74 | bool BilateralSliceApplyKernelLauncher(
75 | const GPUDevice& d,
76 | int bs, int gh, int gw, int gd,
77 | int input_chans, int output_chans, bool has_offset,
78 | int h, int w,
79 | const float* const grid, const float* const guide, const float* const input,
80 | float* const out);
81 |
82 | bool BilateralSliceApplyGradKernelLauncher(
83 | const GPUDevice& d,
84 | const float* const grid, const int64* grid_size,
85 | const float* const guide, const int64* guide_size,
86 | const float* const input, const int64* input_size,
87 | const float* const backprop,
88 | bool has_offset,
89 | float* const grid_grad, float* const guide_grad, float* const input_grad);
90 | // ----------------------------------------------------------------------------
91 |
92 |
93 | // ----------------------------------------------------------------------------
94 | class BilateralSliceOp : public OpKernel {
95 | public:
96 | explicit BilateralSliceOp(OpKernelConstruction* context) : OpKernel(context) {}
97 |
98 | void Compute(OpKernelContext* context) override {
99 | // Grab the inputs
100 | const Tensor& bilateral_grid = context->input(0);
101 | const Tensor& guide = context->input(1);
102 |
103 | OP_REQUIRES(
104 | context, bilateral_grid.dims() == 5,
105 | errors::InvalidArgument(
106 | R"msg(Input grid should be 5D (batch, height, width, depth, nchannels))msg"));
107 | OP_REQUIRES(
108 | context, guide.dims() == 3,
109 | errors::InvalidArgument(
110 | R"msg(Guide image should be 3D (batch, height, width))msg"));
111 |
112 | // Get shape of output tensor
113 | TensorShape shape;
114 | shape.AddDim(guide.dim_size(0)); // Batch size
115 | shape.AddDim(guide.dim_size(1)); // height
116 | shape.AddDim(guide.dim_size(2)); // width
117 | shape.AddDim(bilateral_grid.dim_size(4)); // channels
118 |
119 | // Allocate output tensor
120 | Tensor* output_tensor = NULL;
121 | OP_REQUIRES_OK(context, context->allocate_output(0, shape, &output_tensor));
122 |
123 | auto output = output_tensor->flat();
124 |
125 | const int64 *grid_size = bilateral_grid.shape().dim_sizes().data();
126 | const int64 *guide_size = guide.shape().dim_sizes().data();
127 |
128 | int h = guide.dim_size(1);
129 | int w = guide.dim_size(2);
130 | int bs = bilateral_grid.dim_size(0);
131 | int gh = bilateral_grid.dim_size(1);
132 | int gw = bilateral_grid.dim_size(2);
133 | int gd = bilateral_grid.dim_size(3);
134 | int chans = bilateral_grid.dim_size(4);
135 |
136 | // Call the cuda kernel launcher
137 | if (!context->status().ok()) {
138 | return;
139 | }
140 |
141 | bool status = BilateralSliceKernelLauncher(
142 | context->eigen_device(),
143 | bs, gh, gw, gd, chans,
144 | h, w,
145 | bilateral_grid.flat().data(), guide.flat().data(),
146 | output.data());
147 |
148 | if (!status) {
149 | context->SetStatus(
150 | errors::Internal("Failed launch BilateralSliceKernel."));
151 | }
152 | }
153 | };
154 |
155 |
156 | class BilateralSliceGradOp : public OpKernel {
157 | public:
158 | explicit BilateralSliceGradOp(OpKernelConstruction* context) : OpKernel(context) {}
159 |
160 | void Compute(OpKernelContext* context) override {
161 | // Grab the inputs
162 | const Tensor& bilateral_grid = context->input(0);
163 | const Tensor& guide = context->input(1);
164 | const Tensor& backprop = context->input(2);
165 |
166 | OP_REQUIRES(
167 | context, bilateral_grid.dims() == 5,
168 | errors::InvalidArgument(
169 | R"msg(Input grid should be 5D (batch, height, width, depth, nchannels))msg"));
170 | OP_REQUIRES(
171 | context, guide.dims() == 3,
172 | errors::InvalidArgument(
173 | R"msg(Guide image should be 3D (batch, height, width))msg"));
174 | OP_REQUIRES(
175 | context, backprop.dims() == 4,
176 | errors::InvalidArgument(
177 | R"msg(Backprop should be 4D (batch, height, width, nchannels))msg"));
178 |
179 | // Get shape of output tensor
180 | TensorShape grid_shape = bilateral_grid.shape();
181 | TensorShape guide_shape = guide.shape();
182 |
183 | // Allocate output tensor
184 | Tensor* grid_grad = NULL;
185 | OP_REQUIRES_OK(context, context->allocate_output(0, grid_shape,
186 | &grid_grad));
187 | Tensor* guide_grad = NULL;
188 | OP_REQUIRES_OK(context, context->allocate_output(1, guide_shape,
189 | &guide_grad));
190 |
191 | const int64 *grid_size = bilateral_grid.shape().dim_sizes().data();
192 | const int64 *guide_size = guide.shape().dim_sizes().data();
193 |
194 | auto grid_grad_array = grid_grad->template flat();
195 | auto guide_grad_array = guide_grad->template flat();
196 |
197 | // Call the cuda kernel launcher
198 | bool status = BilateralSliceGradKernelLauncher(
199 | context->eigen_device(),
200 | bilateral_grid.flat().data(), grid_size,
201 | guide.flat().data(), guide_size,
202 | backprop.flat().data(),
203 | grid_grad_array.data(), guide_grad_array.data());
204 |
205 | if (!status) {
206 | context->SetStatus(
207 | errors::Internal("Failed launch BilateralSliceGradKernel."));
208 | }
209 | }
210 | };
211 |
212 |
213 | class BilateralSliceApplyOp : public OpKernel {
214 | private:
215 | bool has_offset;
216 |
217 | public:
218 | explicit BilateralSliceApplyOp(OpKernelConstruction* context) : OpKernel(context) {
219 | OP_REQUIRES_OK(context, context->GetAttr("has_offset", &has_offset));
220 | }
221 |
222 | void Compute(OpKernelContext* context) override {
223 | // Grab the inputs
224 | const Tensor& bilateral_grid = context->input(0);
225 | const Tensor& guide = context->input(1);
226 | const Tensor& input = context->input(2);
227 |
228 | // Check tensor dims
229 | OP_REQUIRES(
230 | context, bilateral_grid.dims() == 5,
231 | errors::InvalidArgument(
232 | R"msg(Input grid should be 5D (batch, height, width, depth, nchannels))msg"));
233 | OP_REQUIRES(
234 | context, guide.dims() == 3,
235 | errors::InvalidArgument(
236 | R"msg(Guide image should be 3D (batch, height, width))msg"));
237 | OP_REQUIRES(
238 | context, input.dims() == 4,
239 | errors::InvalidArgument(
240 | R"msg(Guide image should be 4D (batch, height, width, nchannels))msg"));
241 |
242 | // Sizes
243 | const int64 *grid_size = bilateral_grid.shape().dim_sizes().data();
244 | const int64 *guide_size = guide.shape().dim_sizes().data();
245 | int h = guide.dim_size(1);
246 | int w = guide.dim_size(2);
247 | int bs = bilateral_grid.dim_size(0);
248 | int gh = bilateral_grid.dim_size(1);
249 | int gw = bilateral_grid.dim_size(2);
250 | int gd = bilateral_grid.dim_size(3);
251 | int coeffs_chans = bilateral_grid.dim_size(4);
252 | int input_chans = input.dim_size(3);
253 |
254 | OP_REQUIRES(
255 | context, input.dim_size(0) == guide.dim_size(0) && input.dim_size(1) == h && input.dim_size(2) == w,
256 | errors::InvalidArgument(
257 | R"msg(Input and guide size should match.)msg"));
258 | OP_REQUIRES(
259 | context, guide.dim_size(0) == bs,
260 | errors::InvalidArgument(
261 | R"msg(Batch sizes should match.)msg"));
262 |
263 | int output_chans = 0;
264 | if (has_offset) {
265 | OP_REQUIRES(
266 | context, coeffs_chans % (input_chans+1) == 0,
267 | errors::InvalidArgument(
268 | R"msg(Slicing with affine offset, coefficients grid should have n_out*(n_in+1) channels.)msg"));
269 | output_chans = coeffs_chans / (input_chans+1);
270 | } else {
271 | OP_REQUIRES(
272 | context, coeffs_chans % input_chans == 0,
273 | errors::InvalidArgument(
274 | R"msg(Slicing without affine offset, coefficients grid should have n_out*n_in channels.)msg"));
275 | output_chans = coeffs_chans / input_chans;
276 | }
277 |
278 | // Allocate output tensor
279 | TensorShape out_shape;
280 | out_shape.AddDim(bs);
281 | out_shape.AddDim(h);
282 | out_shape.AddDim(w);
283 | out_shape.AddDim(output_chans);
284 | Tensor* output_tensor = NULL;
285 | OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output_tensor));
286 |
287 | // Call the cuda kernel launcher
288 | auto output = output_tensor->flat();
289 | bool status = BilateralSliceApplyKernelLauncher(
290 | context->eigen_device(),
291 | bs, gh, gw, gd,
292 | input_chans, output_chans, has_offset,
293 | h, w,
294 | bilateral_grid.flat().data(), guide.flat().data(), input.flat().data(),
295 | output.data());
296 |
297 | if (!status) {
298 | context->SetStatus(
299 | errors::Internal("Failed to launch BilateralSliceApplyKernel."));
300 | }
301 | }
302 | };
303 |
304 | class BilateralSliceApplyGradOp : public OpKernel {
305 | private:
306 | bool has_offset;
307 |
308 | public:
309 | explicit BilateralSliceApplyGradOp(OpKernelConstruction* context) : OpKernel(context) {
310 | OP_REQUIRES_OK(context, context->GetAttr("has_offset", &has_offset));
311 | }
312 |
313 | void Compute(OpKernelContext* context) override {
314 | // Grab the inputs
315 | const Tensor& bilateral_grid = context->input(0);
316 | const Tensor& guide = context->input(1);
317 | const Tensor& input = context->input(2);
318 | const Tensor& backprop = context->input(3);
319 |
320 | OP_REQUIRES(
321 | context, bilateral_grid.dims() == 5,
322 | errors::InvalidArgument(
323 | R"msg(Input grid should be 5D (batch, height, width, depth, nchannels))msg"));
324 | OP_REQUIRES(
325 | context, guide.dims() == 3,
326 | errors::InvalidArgument(
327 | R"msg(Guide image should be 3D (batch, height, width))msg"));
328 | OP_REQUIRES(
329 | context, input.dims() == 4,
330 | errors::InvalidArgument(
331 | R"msg(Input image should be 4D (batch, height, width, nchannels))msg"));
332 | OP_REQUIRES(
333 | context, backprop.dims() == 4,
334 | errors::InvalidArgument(
335 | R"msg(Backprop should be 4D (batch, height, width, nchannels))msg"));
336 |
337 | // Get shape of output tensor
338 | TensorShape grid_shape = bilateral_grid.shape();
339 | TensorShape guide_shape = guide.shape();
340 | TensorShape input_shape = input.shape();
341 |
342 | // Allocate output tensor
343 | Tensor* grid_grad = NULL;
344 | OP_REQUIRES_OK(context, context->allocate_output(0, grid_shape,
345 | &grid_grad));
346 | Tensor* guide_grad = NULL;
347 | OP_REQUIRES_OK(context, context->allocate_output(1, guide_shape,
348 | &guide_grad));
349 | Tensor* input_grad = NULL;
350 | OP_REQUIRES_OK(context, context->allocate_output(2, input_shape,
351 | &input_grad));
352 |
353 | int64 grid_size[5]{bilateral_grid.dim_size(0),
354 | bilateral_grid.dim_size(1),
355 | bilateral_grid.dim_size(2),
356 | bilateral_grid.dim_size(3),
357 | bilateral_grid.dim_size(4)};
358 | int64 guide_size[3]{guide.dim_size(0),
359 | guide.dim_size(1),
360 | guide.dim_size(2)};
361 | int64 input_size[4]{input.dim_size(0),
362 | input.dim_size(1),
363 | input.dim_size(2),
364 | input.dim_size(3)};
365 |
366 | auto grid_grad_array = grid_grad->template flat();
367 | auto guide_grad_array = guide_grad->template flat();
368 | auto input_grad_array = input_grad->template flat();
369 |
370 | // Call the cuda kernel launcher
371 | bool status = BilateralSliceApplyGradKernelLauncher(
372 | context->eigen_device(),
373 | bilateral_grid.flat().data(), grid_size,
374 | guide.flat().data(), guide_size,
375 | input.flat().data(), input_size,
376 | backprop.flat().data(), has_offset,
377 | grid_grad_array.data(), guide_grad_array.data(), input_grad_array.data());
378 |
379 | if (!status) {
380 | context->SetStatus(
381 | errors::Internal("Failed launch BilateralSliceApplyGradKernel."));
382 | }
383 | }
384 | };
385 | // ----------------------------------------------------------------------------
386 |
387 | // -- KERNEL REGISTRATION -----------------------------------------------------
388 | REGISTER_KERNEL_BUILDER(Name("BilateralSlice").Device(DEVICE_GPU), BilateralSliceOp);
389 | REGISTER_KERNEL_BUILDER(Name("BilateralSliceGrad").Device(DEVICE_GPU), BilateralSliceGradOp);
390 | REGISTER_KERNEL_BUILDER(Name("BilateralSliceApply").Device(DEVICE_GPU), BilateralSliceApplyOp);
391 | REGISTER_KERNEL_BUILDER(Name("BilateralSliceApplyGrad").Device(DEVICE_GPU), BilateralSliceApplyGradOp);
392 | // ----------------------------------------------------------------------------
393 |
--------------------------------------------------------------------------------
/hdrnet/ops/bilateral_slice.cu.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2016 Google Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #if GOOGLE_CUDA
16 |
17 | #define EIGEN_USE_GPU
18 |
19 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
20 | #include "tensorflow/core/util/cuda_kernel_helper.h"
21 |
22 | #include "math.h"
23 |
24 | #include
25 |
26 | using namespace tensorflow;
27 |
28 | typedef Eigen::GpuDevice GPUDevice;
29 |
30 | __device__ float diff_abs(float x) {
31 | float eps = 1e-8;
32 | return sqrt(x*x+eps);
33 | }
34 |
35 | __device__ float d_diff_abs(float x) {
36 | float eps = 1e-8;
37 | return x/sqrt(x*x+eps);
38 | }
39 |
40 | __device__ float weight_z(float x) {
41 | float abx = diff_abs(x);
42 | return max(1.0f-abx, 0.0f);
43 | }
44 |
45 | __device__ float d_weight_z(float x) {
46 | float abx = diff_abs(x);
47 | if(abx > 1.0f) {
48 | return 0.0f;
49 | // return abx;
50 | } else {
51 | return d_diff_abs(x);
52 | }
53 | }
54 |
55 | __global__ void BilateralSliceKernel(
56 | int64 nthreads,
57 | const float* grid, const float* guide,
58 | const int bs, const int h, const int w, const int chans,
59 | const int gh, const int gw, const int gd,
60 | float* out)
61 | {
62 | // - Samples centered at 0.5.
63 | // - Repeating boundary conditions
64 |
65 | CUDA_1D_KERNEL_LOOP(idx, nthreads) {
66 | int c = idx % chans;
67 | int x = (idx / chans) % w;
68 | int y = (idx / (chans*w)) % h;
69 | int b = (idx / (chans*w*h));
70 |
71 | float gx = (x+0.5f)*gw/(1.0f*w);
72 | float gy = (y+0.5f)*gh/(1.0f*h);
73 | float gz = guide[x + w*(y + h*b)]*gd;
74 |
75 | int fx = static_cast(floor(gx-0.5f));
76 | int fy = static_cast(floor(gy-0.5f));
77 | int fz = static_cast(floor(gz-0.5f));
78 |
79 | int sz = chans;
80 | int sx = chans*gd;
81 | int sy = chans*gd*gw;
82 | int sb = chans*gd*gw*gh;
83 |
84 | float value = 0.0f;
85 | for (int xx = fx; xx < fx+2; ++xx) {
86 | int x_ = max(min(xx, gw-1), 0);
87 | float wx = max(1.0f-abs(xx+0.5-gx), 0.0f);
88 | for (int yy = fy; yy < fy+2; ++yy)
89 | {
90 | int y_ = max(min(yy, gh-1), 0);
91 | float wy = max(1.0f-abs(yy+0.5-gy), 0.0f);
92 | for (int zz = fz; zz < fz+2; ++zz)
93 | {
94 | int z_ = max(min(zz, gd-1), 0);
95 | float wz = weight_z(zz+0.5-gz);
96 | int grid_idx = c + sz*z_ + sx*x_ + sy*y_ + sb*b;
97 | value += grid[grid_idx]*wx*wy*wz;
98 | }
99 | }
100 | }
101 | out[idx] = value;
102 | }
103 | }
104 |
105 | __global__ void BilateralSliceGridGradKernel(
106 | int64 nthreads,
107 | const float* grid, const float* guide, const float* backprop,
108 | const int bs, const int h, const int w, const int chans,
109 | const int gh, const int gw, const int gd,
110 | float* out)
111 | {
112 | CUDA_1D_KERNEL_LOOP(idx, nthreads) {
113 | int c = idx % chans;
114 | int gz = (idx / chans) % gd;
115 | int gx = (idx / (chans*gd)) % gw;
116 | int gy = (idx / (chans*gd*gw)) % gh;
117 | int b = (idx / (chans*gd*gw*gh));
118 |
119 | float scale_w = w*1.0/gw;
120 | float scale_h = h*1.0/gh;
121 |
122 | int left_x = static_cast(floor(scale_w*(gx+0.5-1)));
123 | int right_x = static_cast(ceil(scale_w*(gx+0.5+1)));
124 | int left_y = static_cast(floor(scale_h*(gy+0.5-1)));
125 | int right_y = static_cast(ceil(scale_h*(gy+0.5+1)));
126 |
127 | int sx = chans;
128 | int sy = chans*w;
129 | int sb = chans*w*h;
130 |
131 | float value = 0.0f;
132 | for (int x = left_x; x < right_x; ++x)
133 | {
134 | int x_ = x;
135 |
136 | // mirror boundary
137 | if (x_ < 0) x_ = -x_-1;
138 | if (x_ >= w) x_ = 2*w-1-x_;
139 |
140 | // x_ = max(min(x_, w-1), 0);
141 | float gx2 = (x+0.5f)/scale_w;
142 | float wx = max(1.0f-abs(gx+0.5-gx2), 0.0f);
143 |
144 | for (int y = left_y; y < right_y; ++y)
145 | {
146 | int y_ = y;
147 |
148 | // mirror boundary
149 | if (y_ < 0) y_ = -y_-1;
150 | if (y_ >= h) y_ = 2*h-1-y_;
151 |
152 | // y_ = max(min(y_, h-1), 0);
153 | float gy2 = (y+0.5f)/scale_h;
154 | float wy = max(1.0f-abs(gy+0.5-gy2), 0.0f);
155 |
156 | int guide_idx = x_ + w*y_ + h*w*b;
157 | float gz2 = guide[guide_idx]*gd;
158 | // float wz = max(1.0f-diff_abs(gz+0.5f - gz2), 0.0f);
159 | float wz = weight_z(gz+0.5f-gz2);
160 | if ((gz==0 && gz2<0.5f) || (gz==gd-1 && gz2>gd-0.5f)) {
161 | wz = 1.0f;
162 | }
163 |
164 | int back_idx = c + sx*x_ + sy*y_ + sb*b;
165 | value += wz*wx*wy*backprop[back_idx];
166 | }
167 | }
168 | out[idx] = value;
169 | }
170 | }
171 |
172 | __global__ void BilateralSliceGuideGradKernel(
173 | int64 nthreads,
174 | const float* grid, const float* guide, const float* backprop,
175 | const int bs, const int h, const int w, const int chans,
176 | const int gh, const int gw, const int gd,
177 | float* out)
178 | {
179 | CUDA_1D_KERNEL_LOOP(idx, nthreads) {
180 | int x = idx % w;
181 | int y = (idx / w) % h;
182 | int b = (idx / (w*h));
183 |
184 | float gx = (x+0.5f)*gw/(1.0f*w);
185 | float gy = (y+0.5f)*gh/(1.0f*h);
186 | float gz = guide[x + w*(y + h*b)]*gd;
187 |
188 | int fx = static_cast(floor(gx-0.5f));
189 | int fy = static_cast(floor(gy-0.5f));
190 | int fz = static_cast(floor(gz-0.5f));
191 |
192 | int sz = chans;
193 | int sx = chans*gd;
194 | int sy = chans*gd*gw;
195 | int sb = chans*gd*gw*gh;
196 |
197 | float value = 0.0f;
198 | for (int c = 0; c < chans; ++c) {
199 | float chan_val = 0.0f;
200 | for (int xx = fx; xx < fx+2; ++xx) {
201 | int x_ = max(min(xx, gw-1), 0);
202 | float wx = max(1.0f-abs(xx+0.5-gx), 0.0f);
203 | for (int yy = fy; yy < fy+2; ++yy)
204 | {
205 | int y_ = max(min(yy, gh-1), 0);
206 | float wy = max(1.0f-abs(yy+0.5-gy), 0.0f);
207 | for (int zz = fz; zz < fz+2; ++zz)
208 | {
209 | int z_ = max(min(zz, gd-1), 0);
210 | float dwz = gd*d_weight_z(zz+0.5-gz);
211 |
212 | int grid_idx = c + sz*z_ + sx*x_ + sy*y_ + sb*b;
213 | chan_val += grid[grid_idx]*wx*wy*dwz;
214 | }
215 | }
216 | }
217 | chan_val *= backprop[c + chans*(x + w*(y + h*b))];
218 | value += chan_val;
219 | }
220 | out[idx] = value;
221 | }
222 | }
223 |
224 | bool BilateralSliceApplyKernelLauncher(
225 | const GPUDevice& d,
226 | int bs, int gh, int gw, int gd,
227 | int input_chans, int output_chans, bool has_offset,
228 | int h, int w,
229 | const float* const grid, const float* const guide, const float* const input,
230 | float* const out)
231 | {
232 | int total_count = bs*h*w*output_chans;
233 | if (total_count > 0) {
234 | CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
235 | BilateralSliceApplyKernel<<>>(
236 | total_count, grid, guide, input,
237 | bs, h, w, gh, gw, gd, input_chans, output_chans, has_offset,
238 | out);
239 | }
240 |
241 | return d.ok();
242 | }
243 |
244 | __global__ void BilateralSliceApplyKernel(
245 | int64 nthreads,
246 | const float* grid, const float* guide, const float* input,
247 | const int bs, const int h, const int w,
248 | const int gh, const int gw, const int gd,
249 | const int input_chans, const int output_chans, const bool has_offset,
250 | float* out)
251 | {
252 | // - Samples centered at 0.5.
253 | // - Repeating boundary conditions
254 |
255 | int grid_chans = input_chans*output_chans;
256 | int coeff_stride = input_chans;
257 | if(has_offset) {
258 | grid_chans += output_chans;
259 | coeff_stride += 1;
260 | }
261 |
262 | CUDA_1D_KERNEL_LOOP(idx, nthreads) {
263 | int out_c = idx % output_chans;
264 | int x = (idx / output_chans) % w;
265 | int y = (idx / (output_chans*w)) % h;
266 | int b = (idx / (output_chans*w*h));
267 |
268 | float gx = (x+0.5f)*gw/(1.0f*w);
269 | float gy = (y+0.5f)*gh/(1.0f*h);
270 | float gz = guide[x + w*(y + h*b)]*gd;
271 |
272 | int fx = static_cast(floor(gx-0.5f));
273 | int fy = static_cast(floor(gy-0.5f));
274 | int fz = static_cast(floor(gz-0.5f));
275 |
276 |
277 | // Grid strides
278 | int sz = grid_chans;
279 | int sx = grid_chans*gd;
280 | int sy = grid_chans*gd*gw;
281 | int sb = grid_chans*gd*gw*gh;
282 |
283 | float value = 0.0f;
284 | for (int in_c = 0; in_c < coeff_stride; ++in_c) {
285 | float coeff_sample = 0.0f;
286 | for (int xx = fx; xx < fx+2; ++xx) {
287 | int x_ = max(min(xx, gw-1), 0);
288 | float wx = max(1.0f-abs(xx+0.5-gx), 0.0f);
289 | for (int yy = fy; yy < fy+2; ++yy)
290 | {
291 | int y_ = max(min(yy, gh-1), 0);
292 | float wy = max(1.0f-abs(yy+0.5-gy), 0.0f);
293 | for (int zz = fz; zz < fz+2; ++zz)
294 | {
295 | int z_ = max(min(zz, gd-1), 0);
296 | float wz = weight_z(zz+0.5-gz);
297 | int grid_idx = (coeff_stride*out_c + in_c) + sz*z_ + sx*x_ + sy*y_ + sb*b;
298 | coeff_sample += grid[grid_idx]*wx*wy*wz;
299 | }
300 | }
301 | } // Grid trilinear interpolation
302 | if(in_c < input_chans) {
303 | int input_idx = in_c + input_chans*(x + w*(y + h*b));
304 | value += coeff_sample*input[input_idx];
305 | } else { // Offset term
306 | value += coeff_sample;
307 | }
308 | }
309 | out[idx] = value;
310 | }
311 | }
312 |
313 |
314 | __global__ void BilateralSliceApplyGridGradKernel(
315 | int64 nthreads,
316 | const float* grid, const float* guide, const float* input, const float* backprop,
317 | const int bs, const int h, const int w,
318 | const int gh, const int gw, const int gd,
319 | const int input_chans, const int output_chans, const bool has_offset,
320 | float* out)
321 | {
322 | int grid_chans = input_chans*output_chans;
323 | int coeff_stride = input_chans;
324 | if(has_offset) {
325 | grid_chans += output_chans;
326 | coeff_stride += 1;
327 | }
328 |
329 | CUDA_1D_KERNEL_LOOP(idx, nthreads) {
330 | int c = idx % grid_chans;
331 | int gz = (idx / grid_chans) % gd;
332 | int gx = (idx / (grid_chans*gd)) % gw;
333 | int gy = (idx / (grid_chans*gd*gw)) % gh;
334 | int b = (idx / (grid_chans*gd*gw*gh));
335 |
336 | float scale_w = w*1.0/gw;
337 | float scale_h = h*1.0/gh;
338 |
339 | int left_x = static_cast(floor(scale_w*(gx+0.5-1)));
340 | int right_x = static_cast(ceil(scale_w*(gx+0.5+1)));
341 | int left_y = static_cast(floor(scale_h*(gy+0.5-1)));
342 | int right_y = static_cast(ceil(scale_h*(gy+0.5+1)));
343 |
344 | // Strides in the output
345 | int sx = output_chans;
346 | int sy = output_chans*w;
347 | int sb = output_chans*w*h;
348 |
349 | // Strides in the input
350 | int isx = input_chans;
351 | int isy = input_chans*w;
352 | int isb = input_chans*w*h;
353 |
354 | int out_c = c / coeff_stride;
355 | int in_c = c % coeff_stride;
356 |
357 | float value = 0.0f;
358 | for (int x = left_x; x < right_x; ++x)
359 | {
360 | int x_ = x;
361 |
362 | // mirror boundary
363 | if (x_ < 0) x_ = -x_-1;
364 | if (x_ >= w) x_ = 2*w-1-x_;
365 |
366 | float gx2 = (x+0.5f)/scale_w;
367 | float wx = max(1.0f-abs(gx+0.5-gx2), 0.0f);
368 |
369 | for (int y = left_y; y < right_y; ++y)
370 | {
371 | int y_ = y;
372 |
373 | // mirror boundary
374 | if (y_ < 0) y_ = -y_-1;
375 | if (y_ >= h) y_ = 2*h-1-y_;
376 |
377 | float gy2 = (y+0.5f)/scale_h;
378 | float wy = max(1.0f-abs(gy+0.5-gy2), 0.0f);
379 |
380 | int guide_idx = x_ + w*y_ + h*w*b;
381 | float gz2 = guide[guide_idx]*gd;
382 | float wz = weight_z(gz+0.5f-gz2);
383 | if ((gz==0 && gz2<0.5f) || (gz==gd-1 && gz2>gd-0.5f)) {
384 | wz = 1.0f;
385 | }
386 |
387 | int back_idx = out_c + sx*x_ + sy*y_ + sb*b;
388 | if (in_c < input_chans) {
389 | int input_idx = in_c + isx*x_ + isy*y_ + isb*b;
390 | value += wz*wx*wy*backprop[back_idx]*input[input_idx];
391 | } else { // offset term
392 | value += wz*wx*wy*backprop[back_idx];
393 | }
394 | }
395 | }
396 | out[idx] = value;
397 | }
398 | }
399 |
400 |
401 | __global__ void BilateralSliceApplyGuideGradKernel(
402 | int64 nthreads,
403 | const float* grid, const float* guide, const float* input, const float* backprop,
404 | const int bs, const int h, const int w,
405 | const int gh, const int gw, const int gd,
406 | const int input_chans, const int output_chans, const bool has_offset,
407 | float* out)
408 | {
409 |
410 | int grid_chans = input_chans*output_chans;
411 | int coeff_stride = input_chans;
412 | if(has_offset) {
413 | grid_chans += output_chans;
414 | coeff_stride += 1;
415 | }
416 |
417 | CUDA_1D_KERNEL_LOOP(idx, nthreads) {
418 | int x = idx % w;
419 | int y = (idx / w) % h;
420 | int b = (idx / (w*h));
421 |
422 | float gx = (x+0.5f)*gw/(1.0f*w);
423 | float gy = (y+0.5f)*gh/(1.0f*h);
424 | float gz = guide[x + w*(y + h*b)]*gd;
425 |
426 | int fx = static_cast(floor(gx-0.5f));
427 | int fy = static_cast(floor(gy-0.5f));
428 | int fz = static_cast(floor(gz-0.5f));
429 |
430 | // Grid stride
431 | int sz = grid_chans;
432 | int sx = grid_chans*gd;
433 | int sy = grid_chans*gd*gw;
434 | int sb = grid_chans*gd*gw*gh;
435 |
436 | float out_sum = 0.0f;
437 | for (int out_c = 0; out_c < output_chans; ++out_c) {
438 |
439 | float in_sum = 0.0f;
440 | for (int in_c = 0; in_c < coeff_stride; ++in_c) {
441 |
442 | float grid_sum = 0.0f;
443 | for (int xx = fx; xx < fx+2; ++xx) {
444 | int x_ = max(min(xx, gw-1), 0);
445 | float wx = max(1.0f-abs(xx+0.5-gx), 0.0f);
446 | for (int yy = fy; yy < fy+2; ++yy)
447 | {
448 | int y_ = max(min(yy, gh-1), 0);
449 | float wy = max(1.0f-abs(yy+0.5-gy), 0.0f);
450 | for (int zz = fz; zz < fz+2; ++zz)
451 | {
452 | int z_ = max(min(zz, gd-1), 0);
453 | float dwz = gd*d_weight_z(zz+0.5-gz);
454 |
455 | int grid_idx = (coeff_stride*out_c + in_c) + sz*z_ + sx*x_ + sy*y_ + sb*b;
456 | grid_sum += grid[grid_idx]*wx*wy*dwz;
457 | } // z
458 | } // y
459 | } // x, grid trilinear interp
460 |
461 | if(in_c < input_chans) {
462 | in_sum += grid_sum*input[in_c + input_chans*(x + w*(y + h*b))];
463 | } else { // offset term
464 | in_sum += grid_sum;
465 | }
466 | } // in_c
467 |
468 | out_sum += in_sum*backprop[out_c + output_chans*(x + w*(y + h*b))];
469 | } // out_c
470 |
471 | out[idx] = out_sum;
472 | }
473 | }
474 |
475 |
476 | __global__ void BilateralSliceApplyInputGradKernel(
477 | int64 nthreads,
478 | const float* grid, const float* guide, const float* input, const float* backprop,
479 | const int bs, const int h, const int w,
480 | const int gh, const int gw, const int gd,
481 | const int input_chans, const int output_chans, const bool has_offset,
482 | float* out)
483 | {
484 | int grid_chans = input_chans*output_chans;
485 | int coeff_stride = input_chans;
486 | if(has_offset) {
487 | grid_chans += output_chans;
488 | coeff_stride += 1;
489 | }
490 |
491 | CUDA_1D_KERNEL_LOOP(idx, nthreads) {
492 | int in_c = idx % input_chans;
493 | int x = (idx / input_chans) % w;
494 | int y = (idx / (input_chans*w)) % h;
495 | int b = (idx / (input_chans*w*h));
496 |
497 | float gx = (x+0.5f)*gw/(1.0f*w);
498 | float gy = (y+0.5f)*gh/(1.0f*h);
499 | float gz = guide[x + w*(y + h*b)]*gd;
500 |
501 | int fx = static_cast(floor(gx-0.5f));
502 | int fy = static_cast(floor(gy-0.5f));
503 | int fz = static_cast(floor(gz-0.5f));
504 |
505 | // Grid stride
506 | int sz = grid_chans;
507 | int sx = grid_chans*gd;
508 | int sy = grid_chans*gd*gw;
509 | int sb = grid_chans*gd*gw*gh;
510 |
511 | float value = 0.0f;
512 | for (int out_c = 0; out_c < output_chans; ++out_c) {
513 | float chan_val = 0.0f;
514 | for (int xx = fx; xx < fx+2; ++xx) {
515 | int x_ = max(min(xx, gw-1), 0);
516 | float wx = max(1.0f-abs(xx+0.5-gx), 0.0f);
517 | for (int yy = fy; yy < fy+2; ++yy)
518 | {
519 | int y_ = max(min(yy, gh-1), 0);
520 | float wy = max(1.0f-abs(yy+0.5-gy), 0.0f);
521 | for (int zz = fz; zz < fz+2; ++zz)
522 | {
523 |
524 | int z_ = max(min(zz, gd-1), 0);
525 |
526 | float wz = weight_z(zz+0.5-gz);
527 |
528 | int grid_idx = (coeff_stride*out_c + in_c) + sz*z_ + sx*x_ + sy*y_ + sb*b;
529 | chan_val += grid[grid_idx]*wx*wy*wz;
530 | } // z
531 | } // y
532 | } // x, grid trilinear interp
533 |
534 | value += chan_val*backprop[out_c + output_chans*(x + w*(y + h*b))];
535 | } // out_c
536 | out[idx] = value;
537 | }
538 | }
539 |
540 |
541 | // -- KERNEL LAUNCHERS ---------------------------------------------------------
542 | bool BilateralSliceKernelLauncher(
543 | const GPUDevice& d,
544 | int bs, int gh, int gw, int gd, int chans,
545 | int h, int w,
546 | const float* const grid, const float* const guide, float* const out)
547 | {
548 | int total_count = bs*h*w*chans;
549 | if (total_count > 0) {
550 | CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
551 | BilateralSliceKernel<<>>(
552 | total_count, grid, guide,
553 | bs, h, w, chans, gh, gw, gd,
554 | out);
555 | }
556 |
557 | return d.ok();
558 | }
559 |
560 | bool BilateralSliceGradKernelLauncher(
561 | const GPUDevice& d,
562 | const float* grid, const int64* grid_size,
563 | const float* guide, const int64* guide_size,
564 | const float* backprop,
565 | float* grid_grad, float* guide_grad)
566 | {
567 | int64 bs = grid_size[0];
568 | int64 gh = grid_size[1];
569 | int64 gw = grid_size[2];
570 | int64 gd = grid_size[3];
571 | int64 chans = grid_size[4];
572 |
573 | int64 h = guide_size[1];
574 | int64 w = guide_size[2];
575 |
576 | int64 grid_count = bs*gh*gw*gd*chans;
577 | if (grid_count > 0) {
578 | CudaLaunchConfig config = GetCudaLaunchConfig(grid_count, d);
579 | BilateralSliceGridGradKernel<<>>(
580 | grid_count, grid, guide, backprop,
581 | bs, h, w, chans, gh, gw, gd,
582 | grid_grad);
583 | }
584 |
585 | int64 guide_count = bs*h*w;
586 | if (guide_count > 0) {
587 | CudaLaunchConfig config = GetCudaLaunchConfig(guide_count, d);
588 | BilateralSliceGuideGradKernel<<>>(
589 | guide_count, grid, guide, backprop,
590 | bs, h, w, chans, gh, gw, gd,
591 | guide_grad);
592 | }
593 |
594 | return d.ok();
595 | }
596 |
597 |
598 |
599 |
600 | bool BilateralSliceApplyGradKernelLauncher(
601 | const GPUDevice& d,
602 | const float* grid, const int64* grid_size,
603 | const float* guide, const int64* guide_size,
604 | const float* input, const int64* input_size,
605 | const float* backprop,
606 | bool has_offset,
607 | float* grid_grad, float* guide_grad, float* input_grad)
608 | {
609 | int64 gh = grid_size[1];
610 | int64 gw = grid_size[2];
611 | int64 gd = grid_size[3];
612 | int64 coeff_chans = grid_size[4];
613 | int64 bs = guide_size[0];
614 | int64 h = guide_size[1];
615 | int64 w = guide_size[2];
616 | int64 input_chans = input_size[3];
617 |
618 | int64 output_chans = 0;
619 | if (has_offset) {
620 | output_chans = coeff_chans/(input_chans+1);
621 | } else {
622 | output_chans = coeff_chans/input_chans;
623 | }
624 |
625 |
626 | int64 grid_count = bs*gh*gw*gd*coeff_chans;
627 | if (grid_count > 0) {
628 | CudaLaunchConfig config = GetCudaLaunchConfig(grid_count, d);
629 | BilateralSliceApplyGridGradKernel<<>>(
630 | grid_count, grid, guide, input, backprop,
631 | bs, h, w, gh, gw, gd,
632 | input_chans, output_chans, has_offset,
633 | grid_grad);
634 | }
635 |
636 | int64 guide_count = bs*h*w;
637 | if (guide_count > 0) {
638 | CudaLaunchConfig config = GetCudaLaunchConfig(guide_count, d);
639 | BilateralSliceApplyGuideGradKernel<<>>(
640 | guide_count, grid, guide, input, backprop,
641 | bs, h, w, gh, gw, gd,
642 | input_chans, output_chans, has_offset,
643 | guide_grad);
644 | }
645 |
646 | int64 input_count = bs*h*w*input_chans;
647 | if (input_count > 0) {
648 | CudaLaunchConfig config = GetCudaLaunchConfig(input_count, d);
649 | BilateralSliceApplyInputGradKernel<<>>(
650 | input_count, grid, guide, input, backprop,
651 | bs, h, w, gh, gw, gd,
652 | input_chans, output_chans, has_offset,
653 | input_grad);
654 | }
655 |
656 | return d.ok();
657 | }
658 |
659 | #endif
660 |
--------------------------------------------------------------------------------