├── xla.png
├── IMG_1410.jpg
├── example.png
├── no_xla.png
├── photoai
    ├── logo.png
    ├── license.txt
    ├── index.html
    ├── js
    │   └── index.js
    └── css
    │   └── style.css
├── web_app_page.png
├── hdrnet
    ├── lib
    │   └── hdrnet_ops.so
    ├── utils.py
    ├── ops
    │   ├── cuda
    │   │   └── cuda_config.h
    │   ├── bilateral_slice.cc
    │   └── bilateral_slice.cu.cc
    ├── hdrnet_ops.py
    ├── layers.py
    └── models.py
├── optimized_graph
    ├── optimized_hdr.pb
    ├── optimized_edge.pb
    └── optimized_face.pb
├── requirements.txt
├── web_serving
    ├── web.py
    └── inference.py
├── README.md
├── scripts
    ├── test_pb_graph.py
    ├── optimize_graph.py
    └── freeze_graph.py
└── tensorflow_serving
    └── hdr_saved.py


/xla.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/xla.png


--------------------------------------------------------------------------------
/IMG_1410.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/IMG_1410.jpg


--------------------------------------------------------------------------------
/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/example.png


--------------------------------------------------------------------------------
/no_xla.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/no_xla.png


--------------------------------------------------------------------------------
/photoai/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/photoai/logo.png


--------------------------------------------------------------------------------
/web_app_page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/web_app_page.png


--------------------------------------------------------------------------------
/hdrnet/lib/hdrnet_ops.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/hdrnet/lib/hdrnet_ops.so


--------------------------------------------------------------------------------
/optimized_graph/optimized_hdr.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/optimized_graph/optimized_hdr.pb


--------------------------------------------------------------------------------
/optimized_graph/optimized_edge.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/optimized_graph/optimized_edge.pb


--------------------------------------------------------------------------------
/optimized_graph/optimized_face.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/optimized_graph/optimized_face.pb


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | setproctitle==1.1.10
2 | numpy==1.12.0
3 | pyglib==0.1
4 | scikit_image==0.9.3
5 | tensorflow_gpu==1.1.0
6 | python_gflags==3.1.1
7 | python_magic==0.4.13
8 | 
9 | 


--------------------------------------------------------------------------------
/hdrnet/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """TF graph utilities."""
15 | 
16 | import tensorflow as tf
17 | 
18 | 
19 | def get_model_params(sess, param_collection="model_params"):
20 |   pcoll = tf.get_collection(param_collection)
21 |   params_ = {p.name.split(':')[0]: p for p in pcoll}
22 |   model_params = sess.run(params_)
23 |   return model_params
24 | 


--------------------------------------------------------------------------------
/hdrnet/ops/cuda/cuda_config.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  *     Unless required by applicable law or agreed to in writing, software
10 |  *     distributed under the License is distributed on an "AS IS" BASIS,
11 |  *     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  *     See the License for the specific language governing permissions and
13 |  *     limitations under the License.
14 |  *     ==============================================================================*/
15 | 
16 | // DO NOT EDIT: automatically generated file
17 | // #ifndef CUDA_CUDA_CONFIG_H_
18 | // #define CUDA_CUDA_CONFIG_H_
19 | //
20 | // #define TF_CUDA_CAPABILITIES CudaVersion("3.0")
21 | //
22 | // #define TF_CUDA_VERSION "8.0"
23 | // #define TF_CUDNN_VERSION "5"
24 | //
25 | // #define TF_CUDA_TOOLKIT_PATH "/usr/local/cuda-8.0"
26 | //
27 | // #endif  // CUDA_CUDA_CONFIG_H_
28 | 


--------------------------------------------------------------------------------
/photoai/license.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | <!--
 4 | Copyright (c) 2018 by Fei Cheng
 5 | 
 6 | 
 7 | Fork of an original work by Aaron Vanston (https://codepen.io/aaronvanston/pen/yNYOXR)
 8 | 
 9 | 
10 | 
11 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
16 | -->
17 | 


--------------------------------------------------------------------------------
/web_serving/web.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | # Copyright 2018 Fei Cheng
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from bottle import Bottle, route, run, template, request, response, BaseRequest
18 | BaseRequest.MEMFILE_MAX = 256000000
19 | from inference import Hdrnet
20 | import base64
21 | import os
22 | 
23 | if not os.path.isdir('/tmp/face'):
24 | 	os.mkdir('/tmp/face')
25 | 	os.mkdir('/tmp/hdr')
26 | 	os.mkdir('/tmp/edge')
27 | 
28 | # set the optimized graph path
29 | hdrnet_face = Hdrnet('optimized_graph.pb', 'face')
30 | hdrnet_edge = Hdrnet('optimized_edge.pb', 'edge')
31 | hdrnet_hdr = Hdrnet('optimized_hdr.pb', 'hdr')
32 | 
33 | 
34 | @route('/')
35 | def index():
36 |     return template('index')
37 | 
38 | @route('/infer', method=['POST'])
39 | def infer():
40 |     file = request.forms.get('data')
41 |     mode = request.forms.get('mode')
42 |     if mode == 'face':
43 |         data = hdrnet_face.infer(file)
44 |     elif mode == 'edge':
45 |         data = hdrnet_edge.infer(file)
46 |     elif mode == 'hdr':
47 |         data = hdrnet_hdr.infer(file)
48 | 
49 |     response.content_type = 'text/json'
50 |     response.set_header('Access-Control-Allow-Origin', '*')
51 |     return {'data': base64.b64encode(data)}
52 | 
53 | # set inet address
54 | run(host='10.64.25.231', port=9999, reload=True)
55 | 


--------------------------------------------------------------------------------
/photoai/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en" >
 3 | 
 4 | <head>
 5 |   <meta charset="UTF-8">
 6 |   <title>File upload input</title>
 7 |   
 8 |   
 9 |   
10 |       <link rel="stylesheet" href="css/style.css">
11 | 
12 |   
13 | </head>
14 | 
15 | <body>
16 | 
17 |   <script class="jsbin" src="https://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js"></script>
18 |   <div class="p">
19 |     <img src="logo.png">
20 |   </div>
21 | <div class="file-upload">
22 |   <button class="file-upload-btn" type="button" onclick="$('.file-upload-input').trigger( 'click' )">Add Image</button>
23 | 
24 |   <div class="image-upload-wrap">
25 |     <input class="file-upload-input" type='file' onchange="readURL(this);" accept="image/*" />
26 |     <div class="drag-text">
27 |       <h3>Drag and drop an Image</h3>
28 |     </div>
29 |   </div>
30 |   <div class="file-upload-content">
31 |     <img class="file-upload-image" src="#" alt="your image" />
32 |     <div class="image-title-wrap">
33 |       <button type="button" onclick="removeUpload()" class="remove-image">Remove <span class="image-title">Uploaded Image</span></button>
34 |     </div>
35 |   </div>
36 |   <div id="mode" style="display:none">
37 |     <button class="file-upload-btn mode-btn" type="button" onclick="serverRequest('face')">Face Brighten</button>
38 |     <button class="file-upload-btn mode-btn" type="button" onclick="serverRequest('edge')">Edge Enhance</button>
39 |     <button class="file-upload-btn mode-btn" type="button" onclick="serverRequest('hdr')">HDR+</button>
40 |   </div>
41 | 
42 | </div>
43 | 
44 | 
45 |   <div class="file-upload">
46 |   <!--<button class="file-upload-btn" type="button">Share</button>-->
47 |     <div>
48 |       <button class="file-upload-btn mode-btn" type="button" onclick="">Facebook</button>
49 |       <button class="file-upload-btn mode-btn" type="button" onclick="">Twitter</button>
50 |       <button class="file-upload-btn mode-btn" type="button" onclick="">LinkdedIn</button>
51 |     </div>
52 |     <div class="image-upload-wrap">
53 | 
54 | 
55 |       <div class="drag-text">
56 |         <h3>Enjoy Your Image</h3>
57 |       </div>
58 |     </div>
59 |     <div class="file-upload-content" id="res">
60 | 
61 |     </div>
62 |   </div>
63 | 
64 |   
65 | 
66 |     <script  src="js/index.js"></script>
67 | 
68 | 
69 | 
70 | 
71 | </body>
72 | 
73 | </html>
74 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Real-time image enhancement DL android App and web App
 2 | An deep learning project focusing on deploying pretrained models on mobile device and cloud. It's implemented during 3 weeks Insight AI fellowship program.
 3 | 
 4 | The pretrained models give credit to [Deep Bilateral Learning
 5 | for Real-Time Image Enhancement](https://groups.csail.mit.edu/graphics/hdrnet/)   
 6 | 
 7 | ## Setup
 8 | 
 9 | ### Dependencies
10 | 
11 | To install the Python dependencies, run:
12 | 
13 |     pip install -r requirements.txt
14 | 
15 | ## Usage
16 | 
17 | To download the pretrained models, please refer to [Deep Bilateral Learning
18 | for Real-Time Image Enhancement](https://groups.csail.mit.edu/graphics/hdrnet/)
19 | 
20 | 
21 | To prepare a model for use on mobile, freeze the graph, and optimize the network:
22 | 
23 |     ./scripts/freeze_graph.py <checkpoint_dir>
24 |     ./scripts/optimize_graph.py <checkpoint_dir>
25 |     
26 | To test the prepares model for use on web app or mobile:
27 |     
28 |     ./scripts/test_pb_graph.py <chechpoint_dir>
29 | 
30 | 
31 | ## Serving the Hdrnet model on cloud
32 | ### [photoAI](http://photo-ai.surge.sh/)
33 | <p align="center">
34 |   <img src="./example.png"/>
35 | </p>
36 | 
37 | Now this web app 'photoAI' are serving 3 different pretrained models: face brightening, edge enhancing, hdr+
38 | 
39 | 
40 | ## Deloy Hdrnet model on android by Tensorflow mobile
41 | 
42 | In order to deploy this model on android, I have to implement a custom tensorflow op (CUDA version) by opencl so that the op can run on mobile.
43 | Need to clean some code, to be updated.
44 | 
45 | <p align="center">
46 |   <img src="./IMG_1410.jpg" width="350"/>
47 | </p>
48 | 
49 | ## Inference performance comparison with and without XLA
50 | 
51 | Some tests with XLA fused operation optimization. Images below shows tests with 1 batch (20) 1500*1000 pictures. Didn't see any improvements by using XLA. I think there are mainly two reasons: bilateralSliceApply is a very heavy computation custom op, and cannot be fused by XLA; XLA is still at early stage.
52 | 
53 | Inference without XLA JIT
54 | <p align="center">
55 |     <img src="./no_xla.png"/>
56 | </p>
57 | 
58 | Inference with XLA JIT
59 | <p align="center">
60 |     <img src="./xla.png"/>
61 | </p>
62 | 
63 | ## Known issues and limitations
64 | 
65 | * Tensorflow mobile doesn't support custom op, especially for ops implemented in CUDA. The hdrnet model used a custom op - BilateralSliceApply op - is GPU only.
66 | 
67 | * The pre-trained HDR+ model trained on a specially formatted 16-bit linear input. When feeding general images will receive outputs with weird color.


--------------------------------------------------------------------------------
/photoai/js/index.js:
--------------------------------------------------------------------------------
 1 | var dataURL;
 2 | var server = "http://184.105.86.228:9999/infer";
 3 | 
 4 | function readURL(input) {
 5 |   $('#res img:last-child').remove();
 6 |   if (input.files && input.files[0]) {
 7 | 
 8 |     var reader = new FileReader();
 9 | 
10 |     reader.onload = function(e) {
11 |       $('.image-upload-wrap').hide();
12 | 
13 |       $('.file-upload-image').attr('src', e.target.result);
14 |       $('.file-upload-content').show();
15 | 
16 |       $('.image-title').html(input.files[0].name);
17 |       dataURL = reader.result;
18 |       $('#mode').show();
19 |     };
20 | 
21 |     reader.readAsDataURL(input.files[0]);
22 |     
23 |   } else {
24 |     removeUpload();
25 |   }
26 | }
27 | 
28 | function removeUpload() {
29 |   $('.file-upload-input').replaceWith($('.file-upload-input').clone());
30 |   $('.file-upload-content').hide();
31 |   $('.image-upload-wrap').show();
32 |   $('#mode').hide();
33 |   $('#res img:last-child').remove();
34 | }
35 | 
36 | 
37 | $('.image-upload-wrap').bind('dragover', function () {
38 | 		$('.image-upload-wrap').addClass('image-dropping');
39 | 	});
40 | 	$('.image-upload-wrap').bind('dragleave', function () {
41 | 		$('.image-upload-wrap').removeClass('image-dropping');
42 | });
43 | 
44 | 
45 | function serverRequest(mode) {
46 | //    var http = new XMLHttpRequest();
47 | //    var url = server;
48 | //    var params = {
49 | //        mode: mode,
50 | //        data: dataURL
51 | //    };
52 | //    http.open("POST", url, true);
53 | //
54 | //    //Send the proper header information along with the request
55 | //    //http.setRequestHeader("Content-type", "application/x-www-form-urlencoded");
56 | //
57 | //    http.onreadystatechange = function(data) {//Call a function when the state changes.
58 | //        if(http.readyState == 4 && http.status == 200) {
59 | //            $("<img>", {
60 | //             "src": "data:image/jpeg;base64," + data['data'],
61 | //             "class": "file-upload-image"
62 | //             }).appendTo("#res");
63 | //        }
64 | //    }
65 | //    http.send(params);
66 | 
67 |   $.ajax({
68 |     type: 'POST',
69 |     url: server,
70 |     crossDomain: true,
71 |     data: {
72 |         mode: mode,
73 |         data: dataURL
74 |     },
75 |     dataType: 'json',
76 |     success: function(data) {
77 |   	 $("<img>", {
78 |          "src": "data:image/jpeg;base64," + data['data'],
79 |          "class": "file-upload-image"
80 |          }).appendTo("#res")
81 | //     $('#res').attr('src', "data:image/jpeg;base64," + data['data'])
82 |   	},
83 |   	error: function(jqXHR, textStatus, errorThrown){
84 |        console.log('error')
85 |        console.log(jqXHR)
86 |        console.log(textStatus)
87 |        console.log(errorThrown)
88 |     }
89 |   })
90 | }


--------------------------------------------------------------------------------
/photoai/css/style.css:
--------------------------------------------------------------------------------
  1 | body {
  2 |   font-family: sans-serif;
  3 |   background-color: #eeeeee;
  4 | }
  5 | 
  6 | .file-upload {
  7 |   background-color: #ffffff;
  8 |   width: 650px;
  9 |   display: inline;
 10 |   float: left;
 11 |   margin: 10px;
 12 |   padding: 20px;
 13 | }
 14 | 
 15 | .file-upload-right {
 16 |   float: right;
 17 | }
 18 | 
 19 | .file-upload-btn {
 20 |   width: 100%;
 21 |   margin: 0;
 22 |   color: #fff;
 23 |   background: #1FB264;
 24 |   border: none;
 25 |   padding: 10px;
 26 |   border-radius: 4px;
 27 |   border-bottom: 4px solid #15824B;
 28 |   transition: all .2s ease;
 29 |   outline: none;
 30 |   text-transform: uppercase;
 31 |   font-weight: 700;
 32 | }
 33 | 
 34 | .mode-btn {
 35 |   width: 30%;
 36 |   margin-left: 8px;
 37 |   margin-right: 8px;
 38 | }
 39 | 
 40 | .p {
 41 |     margin-left: 12px;
 42 |     margin-top: 20px;
 43 |     margin-bottom: 10px;
 44 | }
 45 | 
 46 | .file-upload-btn:hover {
 47 |   background: #1AA059;
 48 |   color: #ffffff;
 49 |   transition: all .2s ease;
 50 |   cursor: pointer;
 51 | }
 52 | 
 53 | .file-upload-btn:active {
 54 |   border: 0;
 55 |   transition: all .2s ease;
 56 | }
 57 | 
 58 | .file-upload-content {
 59 |   display: none;
 60 |   text-align: center;
 61 | }
 62 | 
 63 | .file-upload-input {
 64 |   position: absolute;
 65 |   margin: 0;
 66 |   padding: 0;
 67 |   width: 100%;
 68 |   height: 100%;
 69 |   outline: none;
 70 |   opacity: 0;
 71 |   cursor: pointer;
 72 | }
 73 | 
 74 | .image-upload-wrap {
 75 |   margin-top: 20px;
 76 |   border: 4px dashed #1FB264;
 77 |   position: relative;
 78 | }
 79 | 
 80 | .image-dropping,
 81 | .image-upload-wrap:hover {
 82 |   background-color: #1FB264;
 83 |   border: 4px dashed #ffffff;
 84 | }
 85 | 
 86 | .image-title-wrap {
 87 |   padding: 0 15px 15px 15px;
 88 |   color: #222;
 89 | }
 90 | 
 91 | .drag-text {
 92 |   text-align: center;
 93 | }
 94 | 
 95 | .drag-text h3 {
 96 |   font-weight: 100;
 97 |   text-transform: uppercase;
 98 |   color: #15824B;
 99 |   padding: 60px 0;
100 | }
101 | 
102 | .file-upload-image {
103 |   max-height: 600px;
104 |   max-width: 600px;
105 |   margin: auto;
106 |   padding: 20px;
107 | }
108 | 
109 | .remove-image {
110 |   width: 200px;
111 |   margin: 0;
112 |   color: #fff;
113 |   background: #cd4535;
114 |   border: none;
115 |   padding: 10px;
116 |   border-radius: 4px;
117 |   border-bottom: 4px solid #b02818;
118 |   transition: all .2s ease;
119 |   outline: none;
120 |   text-transform: uppercase;
121 |   font-weight: 700;
122 | }
123 | 
124 | .remove-image:hover {
125 |   background: #c13b2a;
126 |   color: #ffffff;
127 |   transition: all .2s ease;
128 |   cursor: pointer;
129 | }
130 | 
131 | .remove-image:active {
132 |   border: 0;
133 |   transition: all .2s ease;
134 | }
135 | 


--------------------------------------------------------------------------------
/hdrnet/hdrnet_ops.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Python interface to custom Tensorflow operations for HDRnet."""
16 | 
17 | import os
18 | import tensorflow as tf
19 | from tensorflow.python.framework import ops
20 | 
21 | __all__ = ['bilateral_slice']
22 | 
23 | path = os.path.dirname(os.path.abspath(__file__))
24 | path = tf.resource_loader.get_path_to_datafile(
25 |     os.path.join(path, 'lib', 'hdrnet_ops.so'))
26 | 
27 | _hdrnet = tf.load_op_library(path)
28 | 
29 | # -- Register operations ------------------------------------------------------
30 | bilateral_slice = _hdrnet.bilateral_slice
31 | bilateral_slice_apply = _hdrnet.bilateral_slice_apply
32 | 
33 | # ----------- Register gradients ----------------------------------------------
34 | @ops.RegisterGradient('BilateralSlice')
35 | def _bilateral_slice_grad(op, grad):
36 |   grid_tensor = op.inputs[0]
37 |   guide_tensor = op.inputs[1]
38 |   return _hdrnet.bilateral_slice_grad(grid_tensor, guide_tensor, grad)
39 | 
40 | 
41 | @ops.RegisterGradient('BilateralSliceApply')
42 | def _bilateral_slice_grad(op, grad):
43 |   grid_tensor = op.inputs[0]
44 |   guide_tensor = op.inputs[1]
45 |   input_tensor = op.inputs[2]
46 |   has_offset = op.get_attr('has_offset')
47 |   return _hdrnet.bilateral_slice_apply_grad(
48 |       grid_tensor, guide_tensor, input_tensor, grad, has_offset=has_offset) 
49 | 
50 | 
51 | # ----------- Register Shape inference ----------------------------------------
52 | @ops.RegisterShape('BilateralSlice')
53 | def _bilateral_slice_shape(op):
54 |   input_tensor = op.inputs[0]
55 |   guide_tensor = op.inputs[1]
56 |   return [guide_tensor.get_shape().concatenate(input_tensor.get_shape()[-1])]
57 | 
58 | 
59 | @ops.RegisterShape('BilateralSliceApply')
60 | def _bilateral_slice_shape(op):
61 |   grid_tensor = op.inputs[0]
62 |   guide_tensor = op.inputs[1]
63 |   input_tensor = op.inputs[2]
64 | 
65 |   has_offset = op.get_attr('has_offset')
66 |   chan_in = input_tensor.get_shape()[-1]
67 |   chan_grid = grid_tensor.get_shape()[-1]
68 | 
69 |   if has_offset:
70 |     chan_out = chan_grid // (chan_in+1)
71 |   else:
72 |     chan_out = chan_grid // chan_in
73 |   return [guide_tensor.get_shape().concatenate(chan_out)]
74 | 


--------------------------------------------------------------------------------
/scripts/test_pb_graph.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | 
 4 | # Copyright 2018 Fei Cheng
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import tensorflow as tf
19 | import hdrnet.models as models
20 | import cv2
21 | import numpy as np
22 | import skimage
23 | import skimage.io
24 | import skimage.transform
25 | from PIL import Image
26 | import argparse
27 | 
28 | 
29 | def load_graph(pb_graph_file):
30 |     # load the protobuf file from the disk and parse it to retrieve the
31 |     # unserialized graph_def
32 |     with tf.gfile.GFile(pb_graph_file, "rb") as f:
33 |         graph_def = tf.GraphDef()
34 |         graph_def.ParseFromString(f.read())
35 | 
36 |     # import the graph_def into a new Graph and returns it
37 |     with tf.Graph().as_default() as graph:
38 |         tf.import_graph_def(graph_def)
39 |     return graph
40 | 
41 | def main(args):
42 |     input_path = args.input_image
43 |     im_input = cv2.imread(input_path, -1)  # -1 means read as is, no conversions.
44 |     if im_input.shape[2] == 4:
45 |         im_input = im_input[:, :, :3]
46 | 
47 |     im_input = np.flip(im_input, 2)  # OpenCV reads BGR, convert back to RGB.
48 |     im_input = skimage.img_as_float(im_input)
49 | 
50 |     lowres_input = skimage.transform.resize(im_input, [256, 256], order=0)
51 |     im_input = im_input[np.newaxis, :, :, :]
52 |     lowres_input = lowres_input[np.newaxis, :, :, :]
53 | 
54 |     graph = load_graph(args.pb_file)
55 | 
56 |     # nodes names need to be customized if graph changed
57 |     fullres = graph.get_tensor_by_name('fullres_input:0')
58 |     lowres = graph.get_tensor_by_name('lowres_input:0')
59 |     out = graph.get_tensor_by_name('output_img:0')
60 | 
61 |     with tf.Session(graph=graph) as sess:
62 |         feed_dict = {
63 |             fullres: im_input,
64 |             lowres: lowres_input
65 |         }
66 |         # run the inference
67 |         y_out = sess.run(out, feed_dict=feed_dict)
68 | 
69 |     img = Image.fromarray(y_out, 'RGB')
70 |     img.save(args.output_image)
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     parser = argparse.ArgumentParser()
75 |     parser.add_argument('pb_file', default=None, help='path to the optimized graph')
76 |     parser.add_argument('input_image', default=None, help='input image path')
77 |     parser.add_argument('output_image', default=None, help='output image path')
78 | 
79 |     args = parser.parse_args()
80 |     main(args)


--------------------------------------------------------------------------------
/scripts/optimize_graph.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | # Copyright 2018 Fei Cheng
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import argparse
18 | import tensorflow as tf
19 | import hdrnet.models as models
20 | from tensorflow.core.framework import graph_pb2
21 | from tensorflow.python.framework import errors
22 | from tensorflow.python.pywrap_tensorflow import TransformGraphWithStringInputs
23 | from tensorflow.python.util import compat
24 | 
25 | def TransformGraph(input_graph_def, inputs, outputs, transforms):
26 |     """Python wrapper for the Graph Transform Tool.
27 | 
28 |     Gives access to all graph transforms available through the command line tool.
29 |     See documentation at https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/graph_transforms/README.md
30 |     for full details of the options available.
31 | 
32 |     Args:
33 |     input_graph_def: GraphDef object containing a model to be transformed.
34 |     inputs: List of node names for the model inputs.
35 |     outputs: List of node names for the model outputs.
36 |     transforms: List of strings containing transform names and parameters.
37 | 
38 |     Returns:
39 |     New GraphDef with transforms applied.
40 |     """
41 | 
42 |     input_graph_def_string = input_graph_def.SerializeToString()
43 |     inputs_string = compat.as_bytes(",".join(inputs))
44 |     outputs_string = compat.as_bytes(",".join(outputs))
45 |     transforms_string = compat.as_bytes(" ".join(transforms))
46 |     with errors.raise_exception_on_not_ok_status() as status:
47 |         output_graph_def_string = TransformGraphWithStringInputs(
48 |             input_graph_def_string, inputs_string, outputs_string,
49 |             transforms_string, status)
50 |     output_graph_def = graph_pb2.GraphDef()
51 |     output_graph_def.ParseFromString(output_graph_def_string)
52 |     return output_graph_def
53 | 
54 | 
55 | def load_graph(frozen_graph_path):
56 |     # load the protobuf file from the disk and parse it to retrieve the
57 |     # unserialized graph_def
58 |     with tf.gfile.GFile(frozen_graph_path, "rb") as f:
59 |         graph_def = tf.GraphDef()
60 |         graph_def.ParseFromString(f.read())
61 |     return graph_def
62 | 
63 | 
64 | def write_trans_graph(output_graph, output_graph_def):
65 |     with tf.gfile.GFile(output_graph, "wb") as f:
66 |         f.write(output_graph_def.SerializeToString())
67 | 
68 | 
69 | def main(args):
70 |     graph_def = load_graph(args.frozen_path)
71 |     out = TransformGraph(graph_def, args.input_nodes, args.output_nodes,
72 |                          ['strip_unused_nodes', 'remove_nodes(op=Identity, op=CheckNumerics)', 'merge_duplicate_nodes',
73 |                           'fold_constants(ignore_errors=true)', 'fold_batch_norms', 'sort_by_execution_order',
74 |                           'strip_unused_nodes'])
75 |     write_trans_graph(args.optimized_path, out)
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     parser = argparse.ArgumentParser()
80 |     parser.add_argument('frozen_path', default=None, help='path to the saved frozen graph')
81 |     parser.add_argument('optimized_path', default=None, help='path to output optimized graph')
82 |     parser.add_argument('input_nodes', nargs='+', help='input nodes names of the graph')
83 |     parser.add_argument('output_nodes', nargs='+', help='output nodes names of the graph')
84 | 
85 |     args = parser.parse_args()
86 |     main(args)


--------------------------------------------------------------------------------
/web_serving/inference.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # encoding: utf-8
  3 | # Copyright 2018 Fei Cheng
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import tensorflow as tf
 18 | import hdrnet.models as models
 19 | import numpy as np
 20 | import skimage
 21 | import skimage.io
 22 | import skimage.transform
 23 | import base64
 24 | import cv2
 25 | from PIL import Image
 26 | import re
 27 | import cStringIO
 28 | 
 29 | class Hdrnet(object):
 30 |     def __init__(self, checkpoint, dir):
 31 |         gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
 32 |         self.checkpoint = checkpoint
 33 |         self.graph = self.load_graph(checkpoint)
 34 |         self.sess = tf.Session(graph=self.graph)
 35 |         self.count = 0
 36 |         self.dir = dir
 37 | 
 38 |     def load_graph(self, graph):
 39 |         # load the protobuf file from the disk and parse it to retrieve the
 40 |         # unserialized graph_def
 41 |         with tf.gfile.GFile(graph, "rb") as f:
 42 |             graph_def = tf.GraphDef()
 43 |             graph_def.ParseFromString(f.read())
 44 | 
 45 |         # import the graph_def into a new Graph and returns it
 46 |         with tf.Graph().as_default() as graph:
 47 |             tf.import_graph_def(graph_def)
 48 |         return graph
 49 | 
 50 |     def preprocess(self, url_data):
 51 |         img_dict = re.match("data:(?P<type>.*?);(?P<encoding>.*?),(?P<data>.*)", url_data).groupdict()
 52 |         #file = img_dict['data'].decode(img_dict['encoding'], 'strict')
 53 |         data = base64.b64decode(img_dict['data'])
 54 |         with open('/tmp/' + self.dir + '/'+str(self.count)+'.jpeg', 'wb') as f:
 55 |             f.write(data)
 56 |         np_data = cv2.imread('/tmp/' + self.dir + '/'+str(self.count)+'.jpeg', -1)
 57 |         print(np_data.shape)
 58 |         self.count += 1
 59 |         return np_data
 60 | 
 61 | 
 62 |     def infer(self, data):
 63 |         """ Perform inferencing.  In other words, generate a paraphrase
 64 |         for the source sentence.
 65 | 
 66 |         Args:
 67 |             file : input buffer from memory
 68 | 
 69 |         Returns:
 70 |             new_image: numpy array
 71 |         """
 72 | 
 73 |         im_input = self.preprocess(data)
 74 |         # im_input = cv2.imdecode(img, -1)  # -1 means read as is, no conversions.
 75 |         if im_input.shape[2] == 4:
 76 |             im_input = im_input[:, :, :3]
 77 | 
 78 |         im_input = np.flip(im_input, 2)  # OpenCV reads BGR, convert back to RGB.
 79 | 
 80 |         if im_input.dtype == np.uint16 and self.dir == 'hdr':
 81 |             # im_input = im_input / 32767.0
 82 |             # im_input = im_input / 32767.0 /2
 83 |             # im_input = im_input / (1.0*2**16)
 84 |             im_input = skimage.img_as_float(im_input)
 85 |         else:
 86 |             im_input = skimage.img_as_float(im_input)
 87 | 
 88 |         lowres_input = skimage.transform.resize(im_input, [256, 256], order=0)
 89 |         im_input = im_input[np.newaxis, :, :, :]
 90 |         lowres_input = lowres_input[np.newaxis, :, :, :]
 91 | 
 92 | 
 93 |         fullres = self.graph.get_tensor_by_name('import/fullres_input:0')
 94 |         lowres = self.graph.get_tensor_by_name('import/lowres_input:0')
 95 |         out = self.graph.get_tensor_by_name('import/output_img:0')
 96 | 
 97 |         feed_dict = {
 98 |             fullres: im_input,
 99 |             lowres: lowres_input
100 |         }
101 | 
102 |         y_out = self.sess.run(out, feed_dict=feed_dict)
103 | 
104 |         img = Image.fromarray(y_out, 'RGB')
105 |         buffer = cStringIO.StringIO()
106 |         img.save(buffer, format='JPEG')
107 |         return buffer.getvalue()
108 | 
109 | 
110 | def main():
111 |     import argparse
112 |     parser = argparse.ArgumentParser()
113 |     parser.add_argument('checkpoint', type=str, help='optimized graph path')
114 |     parser.add_argument('input_image', type=str, help='input image file')
115 |     parser.add_argument('output_image', type=str, help='output image path')
116 |     args = parser.parse_args()
117 |     hdrnet = Hdrnet(args.checkpoint)
118 | 
119 |     with open(args.image_file, 'rb') as f:
120 |         img = f.read()
121 |         new_image = hdrnet.infer(img)
122 | 
123 |     return new_image
124 |     # img = Image.fromarray(new_image, 'RGB')
125 |     # img.save(args.output_image)
126 | 
127 | if __name__ == '__main__':
128 |     main()
129 | 


--------------------------------------------------------------------------------
/hdrnet/layers.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Google Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Shortcuts for some graph operators."""
 16 | 
 17 | import tensorflow as tf
 18 | import numpy as np
 19 | 
 20 | from hdrnet import hdrnet_ops
 21 | 
 22 | w_initializer = tf.contrib.layers.variance_scaling_initializer
 23 | b_initializer = tf.constant_initializer
 24 | 
 25 | def conv(inputs, num_outputs, kernel_size, stride=1, rate=1,
 26 |     use_bias=True,
 27 |     batch_norm=False, is_training=False,
 28 |     activation_fn=tf.nn.relu, 
 29 |     scope=None, reuse=False):
 30 |   if batch_norm:
 31 |     normalizer_fn = tf.contrib.layers.batch_norm
 32 |     b_init = None
 33 |   else:
 34 |     normalizer_fn = None
 35 |     if use_bias:
 36 |       b_init = b_initializer(0.0)
 37 |     else:
 38 |       b_init = None
 39 | 
 40 |   output = tf.contrib.layers.convolution2d(
 41 |       inputs=inputs,
 42 |       num_outputs=num_outputs, kernel_size=kernel_size, 
 43 |       stride=stride, padding='SAME',
 44 |       rate=rate,
 45 |       weights_initializer=w_initializer(),
 46 |       weights_regularizer=tf.contrib.layers.l2_regularizer(1.0),
 47 |       biases_initializer=b_init,
 48 |       normalizer_fn=normalizer_fn,
 49 |       normalizer_params={
 50 |         'center':True, 'is_training':is_training,
 51 |         'variables_collections':{
 52 |           'beta':[tf.GraphKeys.BIASES],
 53 |           'moving_mean':[tf.GraphKeys.MOVING_AVERAGE_VARIABLES],
 54 |           'moving_variance':[tf.GraphKeys.MOVING_AVERAGE_VARIABLES]},
 55 |         }, 
 56 |       activation_fn=activation_fn, 
 57 |       variables_collections={'weights':[tf.GraphKeys.WEIGHTS], 'biases':[tf.GraphKeys.BIASES]},
 58 |       outputs_collections=[tf.GraphKeys.ACTIVATIONS],
 59 |       scope=scope, reuse=reuse)
 60 |   return output
 61 | 
 62 | 
 63 | def fc(inputs, num_outputs,
 64 |     use_bias=True,
 65 |     batch_norm=False, is_training=False,
 66 |     activation_fn=tf.nn.relu, 
 67 |     scope=None):
 68 |   if batch_norm:
 69 |     normalizer_fn = tf.contrib.layers.batch_norm
 70 |     b_init = None
 71 |   else:
 72 |     normalizer_fn = None
 73 |     if use_bias:
 74 |       b_init = b_initializer(0.0)
 75 |     else:
 76 |       b_init = None
 77 | 
 78 |   output = tf.contrib.layers.fully_connected(
 79 |       inputs=inputs,
 80 |       num_outputs=num_outputs,
 81 |       weights_initializer=w_initializer(),
 82 |       weights_regularizer=tf.contrib.layers.l2_regularizer(1.0),
 83 |       biases_initializer=b_init,
 84 |       normalizer_fn=normalizer_fn,
 85 |       normalizer_params={
 86 |         'center':True, 'is_training':is_training,
 87 |         'variables_collections':{
 88 |           'beta':[tf.GraphKeys.BIASES],
 89 |           'moving_mean':[tf.GraphKeys.MOVING_AVERAGE_VARIABLES],
 90 |           'moving_variance':[tf.GraphKeys.MOVING_AVERAGE_VARIABLES]},
 91 |         }, 
 92 |       activation_fn=activation_fn, 
 93 |       variables_collections={'weights':[tf.GraphKeys.WEIGHTS], 'biases':[tf.GraphKeys.BIASES]},
 94 |       scope=scope)
 95 |   return output
 96 | 
 97 | 
 98 | # -----------------------------------------------------------------------------
 99 | 
100 | # pylint: disable=redefined-builtin
101 | def bilateral_slice(grid, guide, name=None):
102 |   """Slices into a bilateral grid using the guide map.
103 | 
104 |   Args:
105 |     grid: (Tensor) [batch_size, grid_h, grid_w, depth, n_outputs]
106 |       grid to slice from.
107 |     guide: (Tensor) [batch_size, h, w ] guide map to slice along.
108 |     name: (string) name for the operation.
109 |   Returns:
110 |     sliced: (Tensor) [batch_size, h, w, n_outputs] sliced output.
111 |   """
112 | 
113 |   with tf.name_scope(name):
114 |     gridshape = grid.get_shape().as_list()
115 |     if len(gridshape) == 6:
116 |       _, _, _, _, n_out, n_in = gridshape
117 |       grid = tf.concat(tf.unstack(grid, None, axis=5), 4)
118 | 
119 |     sliced = hdrnet_ops.bilateral_slice(grid, guide)
120 | 
121 |     if len(gridshape) == 6:
122 |       sliced = tf.stack(tf.split(sliced, n_in, axis=3), axis=4)
123 |     return sliced
124 | # pylint: enable=redefined-builtin
125 | 
126 | 
127 | def bilateral_slice_apply(grid, guide, input_image, has_offset=True, name=None):
128 |   """Slices into a bilateral grid using the guide map.
129 | 
130 |   Args:
131 |     grid: (Tensor) [batch_size, grid_h, grid_w, depth, n_outputs]
132 |       grid to slice from.
133 |     guide: (Tensor) [batch_size, h, w ] guide map to slice along.
134 |     input_image: (Tensor) [batch_size, h, w, n_input] input data onto which to
135 |       apply the affine transform.
136 |     name: (string) name for the operation.
137 |   Returns:
138 |     sliced: (Tensor) [batch_size, h, w, n_outputs] sliced output.
139 |   """
140 | 
141 |   with tf.name_scope(name):
142 |     gridshape = grid.get_shape().as_list()
143 |     if len(gridshape) == 6:
144 |       gs = tf.shape(grid)
145 |       _, _, _, _, n_out, n_in = gridshape
146 |       grid = tf.reshape(grid, tf.stack([gs[0], gs[1], gs[2], gs[3], gs[4]*gs[5]]))
147 |       # grid = tf.concat(tf.unstack(grid, None, axis=5), 4)
148 | 
149 |     sliced = hdrnet_ops.bilateral_slice_apply(grid, guide, input_image, has_offset=has_offset)
150 |     return sliced
151 | # pylint: enable=redefined-builtin
152 | 
153 | 
154 | # pylint: disable=redefined-builtin
155 | def apply(sliced, input_image, has_affine_term=True, name=None):
156 |   """Applies a sliced affined model to the input image.
157 | 
158 |   Args:
159 |     sliced: (Tensor) [batch_size, h, w, n_output, n_input+1] affine coefficients
160 |     input_image: (Tensor) [batch_size, h, w, n_input] input data onto which to
161 |       apply the affine transform.
162 |     name: (string) name for the operation.
163 |   Returns:
164 |     ret: (Tensor) [batch_size, h, w, n_output] the transformed data.
165 |   Raises:
166 |     ValueError: if the input is not properly dimensioned.
167 |     ValueError: if the affine model parameter dimensions do not match the input.
168 |   """
169 | 
170 |   with tf.name_scope(name):
171 |     if len(input_image.get_shape().as_list()) != 4:
172 |       raise ValueError('input image should have dims [b,h,w,n_in].')
173 |     in_shape = input_image.get_shape().as_list()
174 |     sliced_shape = sliced.get_shape().as_list()
175 |     if (in_shape[:-1] != sliced_shape[:-2]):
176 |       raise ValueError('input image and affine coefficients'
177 |                        ' dimensions do not match: {} and {}'.format(
178 |                        in_shape, sliced_shape))
179 |     _, _, _, n_out, n_in = sliced.get_shape().as_list()
180 |     if has_affine_term:
181 |       n_in -= 1
182 | 
183 |     scale = sliced[:, :, :, :, :n_in]
184 | 
185 |     if has_affine_term:
186 |       offset = sliced[:, :, :, :, n_in]
187 | 
188 |     out_channels = []
189 |     for chan in range(n_out):
190 |       ret = scale[:, :, :, chan, 0]*input_image[:, :, :, 0]
191 |       for chan_i in range(1, n_in):
192 |         ret += scale[:, :, :, chan, chan_i]*input_image[:, :, :, chan_i]
193 |       if has_affine_term:
194 |         ret += offset[:, :, :, chan]
195 |       ret = tf.expand_dims(ret, 3)
196 |       out_channels.append(ret)
197 | 
198 |     ret = tf.concat(out_channels, 3)
199 | 
200 |   return ret
201 | # pylint: enable=redefined-builtin
202 | 


--------------------------------------------------------------------------------
/tensorflow_serving/hdr_saved.py:
--------------------------------------------------------------------------------
  1 | import shutil
  2 | 
  3 | import tensorflow as tf
  4 | 
  5 | import hdrnet.models as models
  6 | import hdrnet.utils as utils
  7 | import os
  8 | import numpy as np
  9 | 
 10 | 
 11 | tf.app.flags.DEFINE_string('checkpoint_dir', '/tmp/checkpoint_dir/faces',
 12 |                            """Directory where to read training checkpoints.""")
 13 | tf.app.flags.DEFINE_string('output_dir', '/tmp/hdrnet_output',
 14 |                            """Directory where to export inference model.""")
 15 | tf.app.flags.DEFINE_integer('model_version', 1,
 16 |                             """Version number of the model.""")
 17 | tf.app.flags.DEFINE_integer('image_size', 256,
 18 |                             """Needs to provide same value as in training.""")
 19 | 
 20 | FLAGS = tf.app.flags.FLAGS
 21 | 
 22 | 
 23 | def preprocess_image(image_buffer):
 24 |     '''
 25 |     Preprocess JPEG encoded bytes to 3D float Tensor and rescales
 26 |     it so that pixels are in a range of [-1, 1]
 27 |     :param image_buffer: Buffer that contains JPEG image
 28 |     :return: 4D image tensor (1, width, height,channels) with pixels scaled
 29 |              to [-1, 1]. First dimension is a batch size (1 is our case)
 30 |     '''
 31 | 
 32 |     # Decode the string as an RGB JPEG.
 33 |     # Note that the resulting image contains an unknown height and width
 34 |     # that is set dynamically by decode_jpeg. In other words, the height
 35 |     # and width of image is unknown at compile-time.
 36 |     image = tf.image.decode_jpeg(image_buffer, channels=3, dct_method='INTEGER_ACCURATE')
 37 | 
 38 |     # After this point, all image pixels reside in [0,1)
 39 |     # until the very end, when they're rescaled to (-1, 1).  The various
 40 |     # adjust_* ops all require this range for dtype float.
 41 |     image = tf.image.convert_image_dtype(image, dtype=tf.float32)
 42 | 
 43 |     # Networks accept images in batches.
 44 |     # The first dimension usually represents the batch size.
 45 |     # In our case the batch size is one.
 46 |     #image = tf.expand_dims(image, 0)
 47 | 
 48 |     return image
 49 | 
 50 | 
 51 | def preprocess_low_image(image_buffer):
 52 |     """Preprocess JPEG encoded bytes to 3D float Tensor."""
 53 | 
 54 |     # Decode the string as an RGB JPEG.
 55 |     # Note that the resulting image contains an unknown height and width
 56 |     # that is set dynamically by decode_jpeg. In other words, the height
 57 |     # and width of image is unknown at compile-time.
 58 |     image = tf.image.decode_jpeg(image_buffer, channels=3, dct_method='INTEGER_ACCURATE')
 59 |     # After this point, all image pixels reside in [0,1)
 60 |     # until the very end, when they're rescaled to (-1, 1).  The various
 61 |     # adjust_* ops all require this range for dtype float.
 62 |     image = tf.image.convert_image_dtype(image, dtype=tf.float32)
 63 |     # Crop the central region of the image with an area containing 87.5% of
 64 |     # the original image.
 65 |     # image = tf.image.central_crop(image, central_fraction=0.875)
 66 |     # Resize the image to the original height and width.
 67 |     image = tf.expand_dims(image, 0)
 68 |     image = tf.image.resize_nearest_neighbor(image, [FLAGS.image_size, FLAGS.image_size], align_corners=False)
 69 |     image = tf.squeeze(image, [0])
 70 |     # Finally, rescale to [-1,1] instead of [0, 1)
 71 |     #image = tf.subtract(image, 0.5)
 72 |     #image = tf.multiply(image, 2.0)
 73 |     return image
 74 | 
 75 | # def cv_preprocess_low_image(image_buffer):
 76 | #     record_defaults = [['']] * (256 * 256 * 3)
 77 | #     flat = tf.decode_csv(image_buffer, record_defaults=record_defaults)
 78 | #     flat = tf.string_to_number(flat, out_type=tf.float32)
 79 | #     return tf.expand_dims(tf.reshape(flat, [256, 256, 3]), 0)
 80 | #
 81 | #
 82 | # def cv_preprocess_image(image_buffer):
 83 | #     #array = np.load(image_buffer)
 84 | #
 85 | #     record_defaults = [['']] * 1920
 86 | #     flat = tf.stack(tf.decode_csv(image_buffer, record_defaults=record_defaults))
 87 | #     flat = tf.string_to_number(flat, out_type=tf.float32)
 88 | #
 89 | #     #array = tf.convert_to_tensor(array, dtype=tf.float32)
 90 | #     return tf.expand_dims(tf.reshape(flat, [1920, 1080, 3]), 0)
 91 | 
 92 | 
 93 | def main(_):
 94 |     with tf.Graph().as_default():
 95 |         # Inject placeholder into the graph
 96 |         serialized_tf_example = tf.placeholder(tf.string, name='input_image')
 97 |         serialized_low_example = tf.placeholder(tf.string, name='low_image')
 98 |         #serialized_shape = tf.placeholder(tf.string, name='shape_image')
 99 |         feature_configs = {
100 |             'image/encoded': tf.FixedLenFeature(
101 |                 shape=[], dtype=tf.string)
102 |         }
103 |         tf_example = tf.parse_example(serialized_tf_example, feature_configs)
104 |         tf_low_example = tf.parse_example(serialized_low_example, feature_configs)
105 |         #tf_low_shape = tf.parse_example(serialized_shape, feature_configs)
106 | 
107 |         jpegs = tf_example['image/encoded']
108 |         low_jpegs = tf_low_example['image/encoded']
109 |         #shape_jpegs = tf_low_shape['image/encoded']
110 | 
111 |         full_images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32)
112 |         low_images = tf.map_fn(preprocess_low_image, low_jpegs, dtype=tf.float32)
113 |         #full_images = tf.squeeze(full_images, [0])
114 |         #low_images = tf.squeeze(low_images, [0])
115 | 
116 |         # now the image shape is (1, ?, ?, 3)
117 | 
118 |         # Create model
119 |         checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
120 | 
121 |         metapath = ".".join([checkpoint_path, "meta"])
122 |         tf.train.import_meta_graph(metapath)
123 |         with tf.Session() as sess:
124 |             model_params = utils.get_model_params(sess)
125 |         mdl = getattr(models, model_params['model_name'])
126 | 
127 |         with tf.variable_scope('inference'):
128 |             prediction = mdl.inference(low_images, full_images, model_params, is_training=False)
129 |         output = tf.cast(255.0 * tf.squeeze(tf.clip_by_value(prediction, 0, 1)), tf.uint8)
130 |         #output_img = tf.image.encode_png(tf.image.convert_image_dtype(output[0], dtype=tf.uint8))
131 | 
132 | 
133 |         # Create saver to restore from checkpoints
134 |         saver = tf.train.Saver()
135 | 
136 |         with tf.Session() as sess:
137 |             sess.run(tf.global_variables_initializer())
138 |             # Restore the model from last checkpoints
139 |             ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
140 |             saver.restore(sess, ckpt.model_checkpoint_path)
141 | 
142 |             # (re-)create export directory
143 |             export_path = os.path.join(
144 |                 tf.compat.as_bytes(FLAGS.output_dir),
145 |                 tf.compat.as_bytes(str(FLAGS.model_version)))
146 |             if os.path.exists(export_path):
147 |                 shutil.rmtree(export_path)
148 | 
149 |             # create model builder
150 |             builder = tf.saved_model.builder.SavedModelBuilder(export_path)
151 | 
152 |             # create tensors info
153 |             predict_tensor_inputs_info = tf.saved_model.utils.build_tensor_info(jpegs)
154 |             predict_tensor_low_info = tf.saved_model.utils.build_tensor_info(low_jpegs)
155 |             #predict_tensor_shape_info = tf.saved_model.utils.build_tensor_info(shape_jpegs)
156 |             predict_tensor_scores_info = tf.saved_model.utils.build_tensor_info(output)
157 | 
158 |             # build prediction signature
159 |             prediction_signature = (
160 |                 tf.saved_model.signature_def_utils.build_signature_def(
161 |                     inputs={'images': predict_tensor_inputs_info,
162 |                             'low': predict_tensor_low_info},
163 |                             #'shape': predict_tensor_shape_info},
164 |                     outputs={'result': predict_tensor_scores_info},
165 |                     method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
166 |                 )
167 |             )
168 | 
169 |             # save the model
170 |             #legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')
171 |             builder.add_meta_graph_and_variables(
172 |                 sess, [tf.saved_model.tag_constants.SERVING],
173 |                 signature_def_map={
174 |                     'predict_images': prediction_signature
175 |                 })
176 |                 #legacy_init_op=legacy_init_op)
177 | 
178 |             builder.save()
179 | 
180 |     print("Successfully exported hdr model version '{}' into '{}'".format(
181 |         FLAGS.model_version, FLAGS.output_dir))
182 | 
183 | if __name__ == '__main__':
184 |     tf.app.run()


--------------------------------------------------------------------------------
/hdrnet/models.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Google Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Defines computation graphs."""
 16 | 
 17 | import tensorflow as tf
 18 | import numpy as np
 19 | import os
 20 | 
 21 | from hdrnet.layers import (conv, fc, bilateral_slice_apply)
 22 | 
 23 | __all__ = [
 24 |   'HDRNetCurves',
 25 |   'HDRNetPointwiseNNGuide',
 26 |   'HDRNetGaussianPyrNN',
 27 | ]
 28 | 
 29 | 
 30 | class HDRNetCurves(object):
 31 |   """Main model, as submitted in January 2017.
 32 |   """
 33 |   
 34 |   @classmethod
 35 |   def n_out(cls):
 36 |     return 3
 37 | 
 38 |   @classmethod
 39 |   def n_in(cls):
 40 |     return 3+1
 41 | 
 42 |   @classmethod
 43 |   def inference(cls, lowres_input, fullres_input, params,
 44 |                 is_training=False):
 45 | 
 46 |     with tf.variable_scope('coefficients'):
 47 |       bilateral_coeffs = cls._coefficients(lowres_input, params, is_training)
 48 |       tf.add_to_collection('bilateral_coefficients', bilateral_coeffs)
 49 | 
 50 |     with tf.variable_scope('guide'):
 51 |       guide = cls._guide(fullres_input, params, is_training)
 52 |       tf.add_to_collection('guide', guide)
 53 | 
 54 |     with tf.variable_scope('output'):
 55 |       output = cls._output(
 56 |           fullres_input, guide, bilateral_coeffs)
 57 |       tf.add_to_collection('output', output)
 58 | 
 59 |     return output
 60 | 
 61 |   @classmethod
 62 |   def _coefficients(cls, input_tensor, params, is_training):
 63 |     bs = input_tensor.get_shape().as_list()[0]
 64 |     gd = params['luma_bins']
 65 |     cm = params['channel_multiplier']
 66 |     spatial_bin = params['spatial_bin']
 67 | 
 68 |     # -----------------------------------------------------------------------
 69 |     with tf.variable_scope('splat'):
 70 |       n_ds_layers = int(np.log2(params['net_input_size']/spatial_bin))
 71 | 
 72 |       current_layer = input_tensor
 73 |       for i in range(n_ds_layers):
 74 |         if i > 0:  # don't normalize first layer
 75 |           use_bn = params['batch_norm']
 76 |         else:
 77 |           use_bn = False
 78 |         current_layer = conv(current_layer, cm*(2**i)*gd, 3, stride=2,
 79 |                              batch_norm=use_bn, is_training=is_training,
 80 |                              scope='conv{}'.format(i+1))
 81 | 
 82 |       splat_features = current_layer
 83 |     # -----------------------------------------------------------------------
 84 | 
 85 |     # -----------------------------------------------------------------------
 86 |     with tf.variable_scope('global'):
 87 |       n_global_layers = int(np.log2(spatial_bin/4))  # 4x4 at the coarsest lvl
 88 | 
 89 |       current_layer = splat_features
 90 |       for i in range(2):
 91 |         current_layer = conv(current_layer, 8*cm*gd, 3, stride=2,
 92 |             batch_norm=params['batch_norm'], is_training=is_training,
 93 |             scope="conv{}".format(i+1))
 94 |       _, lh, lw, lc = current_layer.get_shape().as_list()
 95 |       current_layer = tf.reshape(current_layer, [bs, lh*lw*lc])
 96 | 
 97 |       current_layer = fc(current_layer, 32*cm*gd, 
 98 |                          batch_norm=params['batch_norm'], is_training=is_training,
 99 |                          scope="fc1")
100 |       current_layer = fc(current_layer, 16*cm*gd, 
101 |                          batch_norm=params['batch_norm'], is_training=is_training,
102 |                          scope="fc2")
103 |       # don't normalize before fusion
104 |       current_layer = fc(current_layer, 8*cm*gd, activation_fn=None, scope="fc3")
105 |       global_features = current_layer
106 |     # -----------------------------------------------------------------------
107 | 
108 |     # -----------------------------------------------------------------------
109 |     with tf.variable_scope('local'):
110 |       current_layer = splat_features
111 |       current_layer = conv(current_layer, 8*cm*gd, 3, 
112 |                            batch_norm=params['batch_norm'], 
113 |                            is_training=is_training,
114 |                            scope='conv1')
115 |       # don't normalize before fusion
116 |       current_layer = conv(current_layer, 8*cm*gd, 3, activation_fn=None,
117 |                            use_bias=False, scope='conv2')
118 |       grid_features = current_layer
119 |     # -----------------------------------------------------------------------
120 | 
121 |     # -----------------------------------------------------------------------
122 |     with tf.name_scope('fusion'):
123 |       fusion_grid = grid_features
124 |       fusion_global = tf.reshape(global_features, [bs, 1, 1, 8*cm*gd])
125 |       fusion = tf.nn.relu(fusion_grid+fusion_global)
126 |     # -----------------------------------------------------------------------
127 | 
128 |     # -----------------------------------------------------------------------
129 |     with tf.variable_scope('prediction'):
130 |       current_layer = fusion
131 |       current_layer = conv(current_layer, gd*cls.n_out()*cls.n_in(), 1,
132 |                                   activation_fn=None, scope='conv1')
133 | 
134 |       with tf.name_scope('unroll_grid'):
135 |         current_layer = tf.stack(
136 |             tf.split(current_layer, cls.n_out()*cls.n_in(), axis=3), axis=4)
137 |         current_layer = tf.stack(
138 |             tf.split(current_layer, cls.n_in(), axis=4), axis=5)
139 |       tf.add_to_collection('packed_coefficients', current_layer)
140 |     # -----------------------------------------------------------------------
141 | 
142 |     return current_layer
143 | 
144 |   @classmethod
145 |   def _guide(cls, input_tensor, params, is_training):
146 |     npts = 16  # number of control points for the curve
147 |     nchans = input_tensor.get_shape().as_list()[-1]
148 | 
149 |     guidemap = input_tensor
150 | 
151 |     # Color space change
152 |     idtity = np.identity(nchans, dtype=np.float32) + np.random.randn(1).astype(np.float32)*1e-4
153 |     ccm = tf.get_variable('ccm', dtype=tf.float32, initializer=idtity)
154 |     with tf.name_scope('ccm'):
155 |       ccm_bias = tf.get_variable('ccm_bias', shape=[nchans,], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
156 | 
157 |       guidemap = tf.matmul(tf.reshape(input_tensor, [-1, nchans]), ccm)
158 |       guidemap = tf.nn.bias_add(guidemap, ccm_bias, name='ccm_bias_add')
159 | 
160 |       guidemap = tf.reshape(guidemap, tf.shape(input_tensor))
161 | 
162 |     # Per-channel curve
163 |     with tf.name_scope('curve'):
164 |       shifts_ = np.linspace(0, 1, npts, endpoint=False, dtype=np.float32)
165 |       shifts_ = shifts_[np.newaxis, np.newaxis, np.newaxis, :]
166 |       shifts_ = np.tile(shifts_, (1, 1, nchans, 1))
167 | 
168 |       guidemap = tf.expand_dims(guidemap, 4)
169 |       shifts = tf.get_variable('shifts', dtype=tf.float32, initializer=shifts_)
170 | 
171 |       slopes_ = np.zeros([1, 1, 1, nchans, npts], dtype=np.float32)
172 |       slopes_[:, :, :, :, 0] = 1.0
173 |       slopes = tf.get_variable('slopes', dtype=tf.float32, initializer=slopes_)
174 | 
175 |       guidemap = tf.reduce_sum(slopes*tf.nn.relu(guidemap-shifts), reduction_indices=[4])
176 | 
177 |     guidemap = tf.contrib.layers.convolution2d(
178 |         inputs=guidemap,
179 |         num_outputs=1, kernel_size=1, 
180 |         weights_initializer=tf.constant_initializer(1.0/nchans),
181 |         biases_initializer=tf.constant_initializer(0),
182 |         activation_fn=None, 
183 |         variables_collections={'weights':[tf.GraphKeys.WEIGHTS], 'biases':[tf.GraphKeys.BIASES]},
184 |         outputs_collections=[tf.GraphKeys.ACTIVATIONS],
185 |         scope='channel_mixing')
186 | 
187 |     guidemap = tf.clip_by_value(guidemap, 0, 1)
188 |     guidemap = tf.squeeze(guidemap, squeeze_dims=[3,])
189 | 
190 |     return guidemap
191 | 
192 |   @classmethod
193 |   def _output(cls, im, guide, coeffs):
194 |     with tf.device('/gpu:0'):
195 |       out = bilateral_slice_apply(coeffs, guide, im, has_offset=True, name='slice')
196 |     return out
197 | 
198 | 
199 | class HDRNetPointwiseNNGuide(HDRNetCurves):
200 |   """Replaces the pointwise curves in the guide by a pointwise neural net.
201 |   """
202 |   @classmethod
203 |   def _guide(cls, input_tensor, params, is_training):
204 |     n_guide_feats = params['guide_complexity']
205 |     guidemap = conv(input_tensor, n_guide_feats, 1, 
206 |                     batch_norm=True, is_training=is_training,
207 |                     scope='conv1')
208 |     guidemap = conv(guidemap, 1, 1, activation_fn=tf.nn.sigmoid, scope='conv2')
209 |     guidemap = tf.squeeze(guidemap, squeeze_dims=[3,])
210 |     return guidemap
211 | 
212 | 
213 | class HDRNetGaussianPyrNN(HDRNetPointwiseNNGuide):
214 |   """Replace input to the affine model by a pyramid
215 |   """
216 |   @classmethod
217 |   def n_scales(cls):
218 |     return 3
219 | 
220 |   @classmethod
221 |   def n_out(cls):
222 |     return 3*cls.n_scales()
223 | 
224 |   @classmethod
225 |   def n_in(cls):
226 |     return 3+1
227 | 
228 |   @classmethod
229 |   def inference(cls, lowres_input, fullres_input, params,
230 |                 is_training=False):
231 | 
232 |     with tf.variable_scope('coefficients'):
233 |       bilateral_coeffs = cls._coefficients(lowres_input, params, is_training)
234 |       tf.add_to_collection('bilateral_coefficients', bilateral_coeffs)
235 | 
236 |     with tf.variable_scope('multiscale'):
237 |       multiscale = cls._multiscale_input(fullres_input)
238 |       for m in multiscale:
239 |         tf.add_to_collection('multiscale', m)
240 | 
241 |     with tf.variable_scope('guide'):
242 |       guide = cls._guide(multiscale, params, is_training)
243 |       for g in guide:
244 |         tf.add_to_collection('guide', g)
245 | 
246 |     with tf.variable_scope('output'):
247 |       output = cls._output(multiscale, guide, bilateral_coeffs)
248 |       tf.add_to_collection('output', output)
249 | 
250 |     return output
251 | 
252 |   @classmethod
253 |   def _multiscale_input(cls, fullres_input):
254 |     full_sz = tf.shape(fullres_input)[1:3]
255 |     sz = full_sz
256 | 
257 |     current_level = fullres_input
258 |     lvls = [current_level]
259 |     for lvl in range(cls.n_scales()-1):
260 |       sz = sz / 2
261 |       current_level = tf.image.resize_images(
262 |           current_level, sz, tf.image.ResizeMethod.BILINEAR,
263 |           align_corners=True)
264 |       lvls.append(current_level)
265 |     return lvls
266 | 
267 |   @classmethod
268 |   def _guide(cls, multiscale, params, is_training):
269 |     guide_lvls = []
270 |     for il, lvl in enumerate(multiscale):
271 |       with tf.variable_scope('level_{}'.format(il)):
272 |         guide_lvl = HDRNetPointwiseNNGuide._guide(lvl, params, is_training)
273 |       guide_lvls.append(guide_lvl)
274 |     return guide_lvls
275 | 
276 |   @classmethod
277 |   def _output(cls, lvls, guide_lvls, coeffs):
278 |     for il, (lvl, guide_lvl) in enumerate(reversed(zip(lvls, guide_lvls))):
279 |       c = coeffs[:, :, :, :, il*3:(il+1)*3, :]
280 |       out_lvl = HDRNetPointwiseNNGuide._output(lvl, guide_lvl, c)
281 | 
282 |       if il == 0:
283 |         current = out_lvl
284 |       else:
285 |         sz = tf.shape(out_lvl)[1:3]
286 |         current = tf.image.resize_images(current, sz, tf.image.ResizeMethod.BILINEAR, align_corners=True)
287 |         current = tf.add(current, out_lvl)
288 | 
289 |     return current
290 | 
291 | 
292 | 


--------------------------------------------------------------------------------
/scripts/freeze_graph.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # encoding: utf-8
  3 | # Copyright 2016 Google Inc.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | # Modifications Copyright 2018 Fei Cheng
 18 | #
 19 | # Licensed under the Apache License, Version 2.0 (the "License");
 20 | # you may not use this file except in compliance with the License.
 21 | # You may obtain a copy of the License at
 22 | #
 23 | #     http://www.apache.org/licenses/LICENSE-2.0
 24 | #
 25 | # Unless required by applicable law or agreed to in writing, software
 26 | # distributed under the License is distributed on an "AS IS" BASIS,
 27 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 28 | # See the License for the specific language governing permissions and
 29 | # limitations under the License.
 30 | 
 31 | """Freeze graph weights; use to optimize runtime."""
 32 | 
 33 | import argparse
 34 | import logging
 35 | import numpy as np
 36 | import os
 37 | import tensorflow as tf
 38 | from tensorflow.python.tools import freeze_graph
 39 | from tensorflow.core.framework import graph_pb2
 40 | 
 41 | import hdrnet.utils as utils
 42 | import hdrnet.models as models
 43 | 
 44 | logging.basicConfig(format="[%(process)d] %(levelname)s %(filename)s:%(lineno)s | %(message)s")
 45 | log = logging.getLogger("train")
 46 | log.setLevel(logging.INFO)
 47 | 
 48 | 
 49 | def save(data, filepath):
 50 |     log.info("Saving {}".format(filepath))
 51 |     with open(filepath, 'wb') as fid:
 52 |         fid.write(data.tobytes())
 53 | 
 54 | 
 55 | def main(args):
 56 |     # Read model parameters
 57 |     checkpoint_path = tf.train.latest_checkpoint(args.checkpoint_dir)
 58 |     if checkpoint_path is None:
 59 |         log.error('Could not find a checkpoint in {}'.format(args.checkpoint_dir))
 60 |         return
 61 |     metapath = ".".join([checkpoint_path, "meta"])
 62 |     log.info("Loading {}".format(metapath))
 63 |     tf.train.import_meta_graph(metapath)
 64 |     with tf.Session() as sess:
 65 |         model_params = utils.get_model_params(sess)
 66 | 
 67 |     if not hasattr(models, model_params['model_name']):
 68 |         log.error("Model {} does not exist".format(model_params['model_name']))
 69 |         return
 70 |     mdl = getattr(models, model_params['model_name'])
 71 | 
 72 |     # Instantiate new evaluation graph
 73 |     tf.reset_default_graph()
 74 |     sz = model_params['net_input_size']
 75 | 
 76 |     log.info("Model {}".format(model_params['model_name']))
 77 | 
 78 |     #
 79 |     # identify the input and output tensors to export
 80 |     # the part of graph you'd like to freeze
 81 |     #
 82 |     fullres_input = tf.placeholder(tf.float32, (1, None, None, 3), name='fullres_input')
 83 |     input_tensor = tf.placeholder(tf.float32, (1, sz, sz, 3), name='lowres_input')
 84 |     with tf.variable_scope('inference'):
 85 |         prediction = mdl.inference(input_tensor, fullres_input, model_params, is_training=False)
 86 |     if model_params["model_name"] == "HDRNetGaussianPyrNN":
 87 | 
 88 |         # export seperate graphs for deploying models on android
 89 |         output_tensor = tf.get_collection('guide')[0]
 90 |         output_tensor = tf.reshape(output_tensor, [-1], name='guide')
 91 |         # output_tensor = tf.get_collection('packed_coefficients')[0]
 92 |         # gs = output_tensor.get_shape().as_list()
 93 |         # output_tensor = tf.reshape(tf.reshape(output_tensor, tf.stack([gs[0], gs[1], gs[2], gs[3], gs[4] * gs[5]])),
 94 |         #                            [-1], name="bilateral_coefficients")
 95 |         # output_tensor = tf.transpose(tf.squeeze(output_tensor), [3, 2, 0, 1, 4], name="bilateral_coefficients")
 96 | 
 97 |         # export the whole graph when deploying on cloud
 98 |         # output_tensor = tf.cast(255.0*tf.squeeze(tf.clip_by_value(output_tensor, 0, 1)), tf.uint8, name='output_img')
 99 |         log.info("Output shape".format(output_tensor.get_shape()))
100 |     else:
101 |         # export seperate graphs for deploying models on android
102 |         output_tensor = tf.get_collection('guide')[0]
103 |         output_tensor = tf.reshape(output_tensor, [-1], name='guide')
104 |         # output_tensor = tf.get_collection('packed_coefficients')[0]
105 |         # gs = output_tensor.get_shape().as_list()
106 |         # output_tensor = tf.reshape(tf.reshape(output_tensor, tf.stack([gs[0], gs[1], gs[2], gs[3], gs[4]*gs[5]])),
107 |         #                            [-1], name="bilateral_coefficients")
108 |         # output_tensor = tf.transpose(tf.squeeze(output_tensor), [3, 2, 0, 1, 4], name="bilateral_coefficients")
109 | 
110 |         # export the whole graph when deploying on cloud
111 |         # output_tensor = tf.cast(255.0*tf.squeeze(tf.clip_by_value(output_tensor, 0, 1)), tf.uint8, name='output_img')
112 |         log.info("Output shape {}".format(output_tensor.get_shape()))
113 |     saver = tf.train.Saver()
114 | 
115 |     gdef = tf.get_default_graph().as_graph_def()
116 | 
117 |     log.info("Restoring weights from {}".format(checkpoint_path))
118 |     test_graph_name = "test_graph.pbtxt"
119 |     with tf.Session() as sess:
120 |         saver.restore(sess, checkpoint_path)
121 |         tf.train.write_graph(sess.graph, args.checkpoint_dir, test_graph_name)
122 | 
123 |         input_graph_path = os.path.join(args.checkpoint_dir, test_graph_name)
124 |         output_graph_path = os.path.join(args.checkpoint_dir, "frozen_graph.pb")
125 |         input_saver_def_path = ""
126 |         input_binary = False
127 |         output_binary = True
128 |         input_node_names = input_tensor.name.split(":")[0]
129 |         output_node_names = output_tensor.name.split(":")[0]
130 |         restore_op_name = "save/restore_all"
131 |         filename_tensor_name = "save/Const:0"
132 |         clear_devices = False
133 | 
134 |         log.info("Freezing to {}".format(output_graph_path))
135 |         freeze_graph.freeze_graph(input_graph_path, input_saver_def_path,
136 |                                   input_binary, checkpoint_path, output_node_names,
137 |                                   restore_op_name, filename_tensor_name,
138 |                                   output_graph_path, clear_devices, "")
139 |         log.info('input tensor: {} {}'.format(input_tensor.name, input_tensor.shape))
140 |         log.info('output tensor: {} {}'.format(output_tensor.name, output_tensor.shape))
141 | 
142 |         # Dump guide parameters
143 |         if model_params['model_name'] == 'HDRNetCurves':
144 |             g = tf.get_default_graph()
145 |             ccm = g.get_tensor_by_name('inference/guide/ccm:0')
146 |             ccm_bias = g.get_tensor_by_name('inference/guide/ccm_bias:0')
147 |             shifts = g.get_tensor_by_name('inference/guide/shifts:0')
148 |             slopes = g.get_tensor_by_name('inference/guide/slopes:0')
149 |             mixing_weights = g.get_tensor_by_name('inference/guide/channel_mixing/weights:0')
150 |             mixing_bias = g.get_tensor_by_name('inference/guide/channel_mixing/biases:0')
151 | 
152 |             ccm_, ccm_bias_, shifts_, slopes_, mixing_weights_, mixing_bias_ = sess.run(
153 |                 [ccm, ccm_bias, shifts, slopes, mixing_weights, mixing_bias])
154 |             shifts_ = np.squeeze(shifts_).astype(np.float32)
155 |             slopes_ = np.squeeze(slopes_).astype(np.float32)
156 |             mix_matrix_dump = np.append(np.squeeze(mixing_weights_), mixing_bias_[0]).astype(np.float32)
157 |             ccm34_ = np.vstack((ccm_, ccm_bias_[np.newaxis, :]))
158 | 
159 |             save(ccm34_.T, os.path.join(args.checkpoint_dir, 'guide_ccm_f32_3x4.bin'))
160 |             save(shifts_.T, os.path.join(args.checkpoint_dir, 'guide_shifts_f32_16x3.bin'))
161 |             save(slopes_.T, os.path.join(args.checkpoint_dir, 'guide_slopes_f32_16x3.bin'))
162 |             save(mix_matrix_dump, os.path.join(args.checkpoint_dir, 'guide_mix_matrix_f32_1x4.bin'))
163 | 
164 |         elif model_params['model_name'] == "HDRNetGaussianPyrNN":
165 |             g = tf.get_default_graph()
166 |             for lvl in range(3):
167 |                 conv1_w = g.get_tensor_by_name('inference/guide/level_{}/conv1/weights:0'.format(lvl))
168 |                 conv1_b = g.get_tensor_by_name('inference/guide/level_{}/conv1/BatchNorm/beta:0'.format(lvl))
169 |                 conv1_mu = g.get_tensor_by_name('inference/guide/level_{}/conv1/BatchNorm/moving_mean:0'.format(lvl))
170 |                 conv1_sigma = g.get_tensor_by_name(
171 |                     'inference/guide/level_{}/conv1/BatchNorm/moving_variance:0'.format(lvl))
172 |                 conv1_eps = g.get_tensor_by_name(
173 |                     'inference/guide/level_{}/conv1/BatchNorm/batchnorm/add/y:0'.format(lvl))
174 |                 conv2_w = g.get_tensor_by_name('inference/guide/level_{}/conv2/weights:0'.format(lvl))
175 |                 conv2_b = g.get_tensor_by_name('inference/guide/level_{}/conv2/biases:0'.format(lvl))
176 | 
177 |                 conv1w_, conv1b_, conv1mu_, conv1sigma_, conv1eps_, conv2w_, conv2b_ = sess.run(
178 |                     [conv1_w, conv1_b, conv1_mu, conv1_sigma, conv1_eps, conv2_w, conv2_b])
179 | 
180 |                 conv1b_ -= conv1mu_ / np.sqrt((conv1sigma_ + conv1eps_))
181 |                 conv1w_ = conv1w_ / np.sqrt((conv1sigma_ + conv1eps_))
182 | 
183 |                 conv1w_ = np.squeeze(conv1w_.astype(np.float32))
184 |                 conv1b_ = np.squeeze(conv1b_.astype(np.float32))
185 |                 conv1b_ = conv1b_[np.newaxis, :]
186 | 
187 |                 conv2w_ = np.squeeze(conv2w_.astype(np.float32))
188 |                 conv2b_ = np.squeeze(conv2b_.astype(np.float32))
189 | 
190 |                 conv2 = np.append(conv2w_, conv2b_)
191 |                 conv1 = np.vstack([conv1w_, conv1b_])
192 | 
193 |                 save(conv1.T, os.path.join(args.checkpoint_dir, 'guide_level{}_conv1.bin'.format(lvl)))
194 |                 save(conv2, os.path.join(args.checkpoint_dir, 'guide_level{}_conv2.bin'.format(lvl)))
195 | 
196 |         elif model_params['model_name'] in "HDRNetPointwiseNNGuide":
197 |             g = tf.get_default_graph()
198 |             conv1_w = g.get_tensor_by_name('inference/guide/conv1/weights:0')
199 |             conv1_b = g.get_tensor_by_name('inference/guide/conv1/BatchNorm/beta:0')
200 |             conv1_mu = g.get_tensor_by_name('inference/guide/conv1/BatchNorm/moving_mean:0')
201 |             conv1_sigma = g.get_tensor_by_name('inference/guide/conv1/BatchNorm/moving_variance:0')
202 |             conv1_eps = g.get_tensor_by_name('inference/guide/conv1/BatchNorm/batchnorm/add/y:0')
203 |             conv2_w = g.get_tensor_by_name('inference/guide/conv2/weights:0')
204 |             conv2_b = g.get_tensor_by_name('inference/guide/conv2/biases:0')
205 | 
206 |             conv1w_, conv1b_, conv1mu_, conv1sigma_, conv1eps_, conv2w_, conv2b_ = sess.run(
207 |                 [conv1_w, conv1_b, conv1_mu, conv1_sigma, conv1_eps, conv2_w, conv2_b])
208 | 
209 |             conv1b_ -= conv1mu_ / np.sqrt((conv1sigma_ + conv1eps_))
210 |             conv1w_ = conv1w_ / np.sqrt((conv1sigma_ + conv1eps_))
211 | 
212 |             conv1w_ = np.squeeze(conv1w_.astype(np.float32))
213 |             conv1b_ = np.squeeze(conv1b_.astype(np.float32))
214 |             conv1b_ = conv1b_[np.newaxis, :]
215 | 
216 |             conv2w_ = np.squeeze(conv2w_.astype(np.float32))
217 |             conv2b_ = np.squeeze(conv2b_.astype(np.float32))
218 | 
219 |             conv2 = np.append(conv2w_, conv2b_)
220 |             conv1 = np.vstack([conv1w_, conv1b_])
221 | 
222 |             save(conv1.T, os.path.join(args.checkpoint_dir, 'guide_conv1.bin'))
223 |             save(conv2, os.path.join(args.checkpoint_dir, 'guide_conv2.bin'))
224 | 
225 | 
226 | if __name__ == '__main__':
227 |     parser = argparse.ArgumentParser()
228 |     parser.add_argument('checkpoint_dir', default=None, help='')
229 | 
230 |     args = parser.parse_args()
231 |     main(args)
232 | 


--------------------------------------------------------------------------------
/hdrnet/ops/bilateral_slice.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2016 Google Inc.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #include "tensorflow/core/framework/op.h"
 16 | #include "tensorflow/core/framework/op_kernel.h"
 17 | 
 18 | using namespace tensorflow;
 19 | 
 20 | typedef Eigen::ThreadPoolDevice CPUDevice;
 21 | typedef Eigen::GpuDevice GPUDevice;
 22 | 
 23 | // -- OPS REGISTRAION ---------------------------------------------------------
 24 | REGISTER_OP("BilateralSlice")
 25 |   .Input("in: float")
 26 |   .Input("guide: float")
 27 |   .Output("out: float")
 28 |   .Doc(R"doc(
 29 | Slices input in in the location defined by guide, to produce output.
 30 | )doc");
 31 | 
 32 | REGISTER_OP("BilateralSliceGrad")
 33 |   .Input("in: float")
 34 |   .Input("guide: float")
 35 |   .Input("backprop: float")
 36 |   .Output("grid_grad: float")
 37 |   .Output("guide_grad: float");
 38 | 
 39 | REGISTER_OP("BilateralSliceApply")
 40 |   .Input("grid: float")
 41 |   .Input("guide: float")
 42 |   .Input("input: float")
 43 |   .Attr("has_offset: bool")
 44 |   .Output("out: float")
 45 |   .Doc(R"doc(
 46 | Slices input in in the location defined by guide and apply it, to produce output.
 47 | )doc");
 48 | 
 49 | REGISTER_OP("BilateralSliceApplyGrad")
 50 |   .Input("grid: float")
 51 |   .Input("guide: float")
 52 |   .Input("input: float")
 53 |   .Input("backprop: float")
 54 |   .Attr("has_offset: bool")
 55 |   .Output("grid_grad: float")
 56 |   .Output("guide_grad: float")
 57 |   .Output("input_grad: float");
 58 | // ----------------------------------------------------------------------------
 59 | 
 60 | // -- KERNEL LAUNCHERS --------------------------------------------------------
 61 | bool BilateralSliceKernelLauncher(
 62 |     const GPUDevice& d,
 63 |     int bs, int gh, int gw, int gd, int chans,
 64 |     int h, int w,
 65 |     const float* const grid, const float* const guide, float* const out);
 66 | 
 67 | bool BilateralSliceGradKernelLauncher(
 68 |     const GPUDevice& d,
 69 |     const float* const grid, const int64* grid_size,
 70 |     const float* const guide, const int64* guide_size,
 71 |     const float* const backprop,
 72 |     float* const grid_grad, float* const guide_grad);
 73 | 
 74 | bool BilateralSliceApplyKernelLauncher(
 75 |     const GPUDevice& d,
 76 |     int bs, int gh, int gw, int gd, 
 77 |     int input_chans, int output_chans, bool has_offset,
 78 |     int h, int w,
 79 |     const float* const grid, const float* const guide, const float* const input,
 80 |     float* const out);
 81 | 
 82 | bool BilateralSliceApplyGradKernelLauncher(
 83 |     const GPUDevice& d,
 84 |     const float* const grid, const int64* grid_size,
 85 |     const float* const guide, const int64* guide_size,
 86 |     const float* const input, const int64* input_size,
 87 |     const float* const backprop,
 88 |     bool has_offset,
 89 |     float* const grid_grad, float* const guide_grad, float* const input_grad);
 90 | // ----------------------------------------------------------------------------
 91 | 
 92 | 
 93 | // ----------------------------------------------------------------------------
 94 | class BilateralSliceOp : public OpKernel {
 95 |  public:
 96 |   explicit BilateralSliceOp(OpKernelConstruction* context) : OpKernel(context) {}
 97 | 
 98 |   void Compute(OpKernelContext* context) override {
 99 |     // Grab the inputs
100 |     const Tensor& bilateral_grid = context->input(0);
101 |     const Tensor& guide = context->input(1);
102 | 
103 |     OP_REQUIRES(
104 |         context, bilateral_grid.dims() == 5,
105 |         errors::InvalidArgument(
106 |         R"msg(Input grid should be 5D (batch, height, width, depth, nchannels))msg"));
107 |     OP_REQUIRES(
108 |         context, guide.dims() == 3,
109 |         errors::InvalidArgument(
110 |         R"msg(Guide image should be 3D (batch, height, width))msg"));
111 | 
112 |     // Get shape of output tensor
113 |     TensorShape shape;
114 |     shape.AddDim(guide.dim_size(0));  // Batch size
115 |     shape.AddDim(guide.dim_size(1));  // height
116 |     shape.AddDim(guide.dim_size(2));  // width
117 |     shape.AddDim(bilateral_grid.dim_size(4));  // channels
118 | 
119 |     // Allocate output tensor
120 |     Tensor* output_tensor = NULL;
121 |     OP_REQUIRES_OK(context, context->allocate_output(0, shape, &output_tensor));
122 | 
123 |     auto output = output_tensor->flat<float>();
124 | 
125 |     const int64 *grid_size = bilateral_grid.shape().dim_sizes().data();
126 |     const int64 *guide_size = guide.shape().dim_sizes().data();
127 | 
128 |     int h = guide.dim_size(1);
129 |     int w = guide.dim_size(2);
130 |     int bs = bilateral_grid.dim_size(0);
131 |     int gh = bilateral_grid.dim_size(1);
132 |     int gw = bilateral_grid.dim_size(2);
133 |     int gd = bilateral_grid.dim_size(3);
134 |     int chans = bilateral_grid.dim_size(4);
135 | 
136 |     // Call the cuda kernel launcher
137 |     if (!context->status().ok()) {
138 |       return;
139 |     }
140 | 
141 |     bool status = BilateralSliceKernelLauncher(
142 |         context->eigen_device<GPUDevice>(),
143 |         bs, gh, gw, gd, chans,
144 |         h, w,
145 |         bilateral_grid.flat<float>().data(), guide.flat<float>().data(), 
146 |         output.data());
147 | 
148 |     if (!status) {
149 |       context->SetStatus(
150 |           errors::Internal("Failed launch BilateralSliceKernel."));
151 |     }
152 |   }
153 | };
154 | 
155 | 
156 | class BilateralSliceGradOp : public OpKernel {
157 |  public:
158 |   explicit BilateralSliceGradOp(OpKernelConstruction* context) : OpKernel(context) {}
159 | 
160 |   void Compute(OpKernelContext* context) override {
161 |     // Grab the inputs
162 |     const Tensor& bilateral_grid = context->input(0);
163 |     const Tensor& guide = context->input(1);
164 |     const Tensor& backprop = context->input(2);
165 | 
166 |     OP_REQUIRES(
167 |         context, bilateral_grid.dims() == 5,
168 |         errors::InvalidArgument(
169 |         R"msg(Input grid should be 5D (batch, height, width, depth, nchannels))msg"));
170 |     OP_REQUIRES(
171 |         context, guide.dims() == 3,
172 |         errors::InvalidArgument(
173 |         R"msg(Guide image should be 3D (batch, height, width))msg"));
174 |     OP_REQUIRES(
175 |         context, backprop.dims() == 4,
176 |         errors::InvalidArgument(
177 |         R"msg(Backprop should be 4D (batch, height, width, nchannels))msg"));
178 | 
179 |     // Get shape of output tensor
180 |     TensorShape grid_shape = bilateral_grid.shape();
181 |     TensorShape guide_shape = guide.shape();
182 | 
183 |     // Allocate output tensor
184 |     Tensor* grid_grad = NULL;
185 |     OP_REQUIRES_OK(context, context->allocate_output(0, grid_shape,
186 |                                                      &grid_grad));
187 |     Tensor* guide_grad = NULL;
188 |     OP_REQUIRES_OK(context, context->allocate_output(1, guide_shape,
189 |                                                      &guide_grad));
190 | 
191 |     const int64 *grid_size = bilateral_grid.shape().dim_sizes().data();
192 |     const int64 *guide_size = guide.shape().dim_sizes().data();
193 | 
194 |     auto grid_grad_array = grid_grad->template flat<float>();
195 |     auto guide_grad_array = guide_grad->template flat<float>();
196 | 
197 |     // Call the cuda kernel launcher
198 |     bool status = BilateralSliceGradKernelLauncher(
199 |         context->eigen_device<GPUDevice>(),
200 |         bilateral_grid.flat<float>().data(), grid_size,
201 |         guide.flat<float>().data(), guide_size,
202 |         backprop.flat<float>().data(),
203 |         grid_grad_array.data(), guide_grad_array.data());
204 | 
205 |     if (!status) {
206 |       context->SetStatus(
207 |           errors::Internal("Failed launch BilateralSliceGradKernel."));
208 |     }
209 |   }
210 | };
211 | 
212 | 
213 | class BilateralSliceApplyOp : public OpKernel {
214 |   private:
215 |     bool has_offset;
216 | 
217 |   public:
218 |     explicit BilateralSliceApplyOp(OpKernelConstruction* context) : OpKernel(context) {
219 |       OP_REQUIRES_OK(context, context->GetAttr("has_offset", &has_offset));
220 |     }
221 | 
222 |     void Compute(OpKernelContext* context) override {
223 |       // Grab the inputs
224 |       const Tensor& bilateral_grid = context->input(0);
225 |       const Tensor& guide = context->input(1);
226 |       const Tensor& input = context->input(2);
227 | 
228 |       // Check tensor dims
229 |       OP_REQUIRES(
230 |           context, bilateral_grid.dims() == 5,
231 |           errors::InvalidArgument(
232 |             R"msg(Input grid should be 5D (batch, height, width, depth, nchannels))msg"));
233 |       OP_REQUIRES(
234 |           context, guide.dims() == 3,
235 |           errors::InvalidArgument(
236 |             R"msg(Guide image should be 3D (batch, height, width))msg"));
237 |       OP_REQUIRES(
238 |           context, input.dims() == 4,
239 |           errors::InvalidArgument(
240 |             R"msg(Guide image should be 4D (batch, height, width, nchannels))msg"));
241 | 
242 |       // Sizes
243 |       const int64 *grid_size = bilateral_grid.shape().dim_sizes().data();
244 |       const int64 *guide_size = guide.shape().dim_sizes().data();
245 |       int h = guide.dim_size(1);
246 |       int w = guide.dim_size(2);
247 |       int bs = bilateral_grid.dim_size(0);
248 |       int gh = bilateral_grid.dim_size(1);
249 |       int gw = bilateral_grid.dim_size(2);
250 |       int gd = bilateral_grid.dim_size(3);
251 |       int coeffs_chans = bilateral_grid.dim_size(4);
252 |       int input_chans = input.dim_size(3);
253 | 
254 |       OP_REQUIRES(
255 |           context, input.dim_size(0) == guide.dim_size(0) && input.dim_size(1) == h && input.dim_size(2) == w,
256 |           errors::InvalidArgument(
257 |             R"msg(Input and guide size should match.)msg"));
258 |       OP_REQUIRES(
259 |           context, guide.dim_size(0) == bs,
260 |           errors::InvalidArgument(
261 |             R"msg(Batch sizes should match.)msg"));
262 | 
263 |       int output_chans = 0;
264 |       if (has_offset) {
265 |         OP_REQUIRES(
266 |             context, coeffs_chans % (input_chans+1) == 0,
267 |             errors::InvalidArgument(
268 |               R"msg(Slicing with affine offset, coefficients grid should have n_out*(n_in+1) channels.)msg"));
269 |         output_chans = coeffs_chans / (input_chans+1);
270 |       } else {
271 |         OP_REQUIRES(
272 |             context, coeffs_chans % input_chans == 0,
273 |             errors::InvalidArgument(
274 |               R"msg(Slicing without affine offset, coefficients grid should have n_out*n_in channels.)msg"));
275 |         output_chans = coeffs_chans / input_chans;
276 |       }
277 | 
278 |       // Allocate output tensor
279 |       TensorShape out_shape;
280 |       out_shape.AddDim(bs);
281 |       out_shape.AddDim(h);
282 |       out_shape.AddDim(w);
283 |       out_shape.AddDim(output_chans);
284 |       Tensor* output_tensor = NULL;
285 |       OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output_tensor));
286 | 
287 |       // Call the cuda kernel launcher
288 |       auto output = output_tensor->flat<float>();
289 |       bool status = BilateralSliceApplyKernelLauncher(
290 |           context->eigen_device<GPUDevice>(),
291 |           bs, gh, gw, gd, 
292 |           input_chans, output_chans, has_offset,
293 |           h, w,
294 |           bilateral_grid.flat<float>().data(), guide.flat<float>().data(), input.flat<float>().data(),
295 |           output.data());
296 | 
297 |       if (!status) {
298 |         context->SetStatus(
299 |             errors::Internal("Failed to launch BilateralSliceApplyKernel."));
300 |       }
301 |     }
302 | };
303 | 
304 | class BilateralSliceApplyGradOp : public OpKernel {
305 |   private:
306 |     bool has_offset;
307 | 
308 |   public:
309 |     explicit BilateralSliceApplyGradOp(OpKernelConstruction* context) : OpKernel(context) {
310 |       OP_REQUIRES_OK(context, context->GetAttr("has_offset", &has_offset));
311 |     }
312 | 
313 |     void Compute(OpKernelContext* context) override {
314 |       // Grab the inputs
315 |       const Tensor& bilateral_grid = context->input(0);
316 |       const Tensor& guide = context->input(1);
317 |       const Tensor& input = context->input(2);
318 |       const Tensor& backprop = context->input(3);
319 | 
320 |       OP_REQUIRES(
321 |           context, bilateral_grid.dims() == 5,
322 |           errors::InvalidArgument(
323 |             R"msg(Input grid should be 5D (batch, height, width, depth, nchannels))msg"));
324 |       OP_REQUIRES(
325 |           context, guide.dims() == 3,
326 |           errors::InvalidArgument(
327 |             R"msg(Guide image should be 3D (batch, height, width))msg"));
328 |       OP_REQUIRES(
329 |           context, input.dims() == 4,
330 |           errors::InvalidArgument(
331 |             R"msg(Input image should be 4D (batch, height, width, nchannels))msg"));
332 |       OP_REQUIRES(
333 |           context, backprop.dims() == 4,
334 |           errors::InvalidArgument(
335 |             R"msg(Backprop should be 4D (batch, height, width, nchannels))msg"));
336 | 
337 |       // Get shape of output tensor
338 |       TensorShape grid_shape = bilateral_grid.shape();
339 |       TensorShape guide_shape = guide.shape();
340 |       TensorShape input_shape = input.shape();
341 | 
342 |       // Allocate output tensor
343 |       Tensor* grid_grad = NULL;
344 |       OP_REQUIRES_OK(context, context->allocate_output(0, grid_shape,
345 |             &grid_grad));
346 |       Tensor* guide_grad = NULL;
347 |       OP_REQUIRES_OK(context, context->allocate_output(1, guide_shape,
348 |             &guide_grad));
349 |       Tensor* input_grad = NULL;
350 |       OP_REQUIRES_OK(context, context->allocate_output(2, input_shape,
351 |             &input_grad));
352 | 
353 |       int64 grid_size[5]{bilateral_grid.dim_size(0),
354 |         bilateral_grid.dim_size(1),
355 |         bilateral_grid.dim_size(2),
356 |         bilateral_grid.dim_size(3),
357 |         bilateral_grid.dim_size(4)};
358 |       int64 guide_size[3]{guide.dim_size(0),
359 |         guide.dim_size(1),
360 |         guide.dim_size(2)};
361 |       int64 input_size[4]{input.dim_size(0),
362 |         input.dim_size(1),
363 |         input.dim_size(2),
364 |         input.dim_size(3)};
365 | 
366 |       auto grid_grad_array = grid_grad->template flat<float>();
367 |       auto guide_grad_array = guide_grad->template flat<float>();
368 |       auto input_grad_array = input_grad->template flat<float>();
369 | 
370 |       // Call the cuda kernel launcher
371 |       bool status = BilateralSliceApplyGradKernelLauncher(
372 |           context->eigen_device<GPUDevice>(),
373 |           bilateral_grid.flat<float>().data(), grid_size,
374 |           guide.flat<float>().data(), guide_size,
375 |           input.flat<float>().data(), input_size,
376 |           backprop.flat<float>().data(), has_offset,
377 |           grid_grad_array.data(), guide_grad_array.data(), input_grad_array.data());
378 | 
379 |       if (!status) {
380 |         context->SetStatus(
381 |             errors::Internal("Failed launch BilateralSliceApplyGradKernel."));
382 |       }
383 |     }
384 | };
385 | // ----------------------------------------------------------------------------
386 | 
387 | // -- KERNEL REGISTRATION -----------------------------------------------------
388 | REGISTER_KERNEL_BUILDER(Name("BilateralSlice").Device(DEVICE_GPU), BilateralSliceOp);
389 | REGISTER_KERNEL_BUILDER(Name("BilateralSliceGrad").Device(DEVICE_GPU), BilateralSliceGradOp);
390 | REGISTER_KERNEL_BUILDER(Name("BilateralSliceApply").Device(DEVICE_GPU), BilateralSliceApplyOp);
391 | REGISTER_KERNEL_BUILDER(Name("BilateralSliceApplyGrad").Device(DEVICE_GPU), BilateralSliceApplyGradOp);
392 | // ----------------------------------------------------------------------------
393 | 


--------------------------------------------------------------------------------
/hdrnet/ops/bilateral_slice.cu.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2016 Google Inc.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #if GOOGLE_CUDA
 16 | 
 17 | #define EIGEN_USE_GPU
 18 | 
 19 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 20 | #include "tensorflow/core/util/cuda_kernel_helper.h"
 21 | 
 22 | #include "math.h"
 23 | 
 24 | #include <iostream>
 25 | 
 26 | using namespace tensorflow;
 27 | 
 28 | typedef Eigen::GpuDevice GPUDevice;
 29 | 
 30 | __device__ float diff_abs(float x) {
 31 |   float eps = 1e-8;
 32 |   return sqrt(x*x+eps);
 33 | }
 34 | 
 35 | __device__ float d_diff_abs(float x) {
 36 |   float eps = 1e-8;
 37 |   return x/sqrt(x*x+eps);
 38 | }
 39 | 
 40 | __device__ float weight_z(float x) {
 41 |   float abx = diff_abs(x);
 42 |   return max(1.0f-abx, 0.0f);
 43 | }
 44 | 
 45 | __device__ float d_weight_z(float x) {
 46 |   float abx = diff_abs(x);
 47 |   if(abx > 1.0f) {
 48 |     return 0.0f;
 49 |     // return abx;
 50 |   } else {
 51 |     return d_diff_abs(x);
 52 |   }
 53 | }
 54 | 
 55 | __global__ void BilateralSliceKernel(
 56 |     int64 nthreads,
 57 |     const float* grid, const float* guide, 
 58 |     const int bs, const int h, const int w, const int chans,
 59 |     const int gh, const int gw, const int gd,
 60 |     float* out)
 61 | {
 62 |   // - Samples centered at 0.5.
 63 |   // - Repeating boundary conditions
 64 | 
 65 |   CUDA_1D_KERNEL_LOOP(idx, nthreads) {
 66 |     int c = idx % chans;
 67 |     int x = (idx / chans) % w;
 68 |     int y = (idx / (chans*w)) % h;
 69 |     int b = (idx / (chans*w*h));
 70 | 
 71 |     float gx = (x+0.5f)*gw/(1.0f*w);
 72 |     float gy = (y+0.5f)*gh/(1.0f*h);
 73 |     float gz = guide[x + w*(y + h*b)]*gd;
 74 | 
 75 |     int fx = static_cast<int>(floor(gx-0.5f));
 76 |     int fy = static_cast<int>(floor(gy-0.5f));
 77 |     int fz = static_cast<int>(floor(gz-0.5f));
 78 | 
 79 |     int sz = chans;
 80 |     int sx = chans*gd;
 81 |     int sy = chans*gd*gw;
 82 |     int sb = chans*gd*gw*gh;
 83 | 
 84 |     float value = 0.0f;
 85 |     for (int xx = fx; xx < fx+2; ++xx) {
 86 |       int x_ = max(min(xx, gw-1), 0);
 87 |       float wx = max(1.0f-abs(xx+0.5-gx), 0.0f);
 88 |       for (int yy = fy; yy < fy+2; ++yy)
 89 |       {
 90 |         int y_ = max(min(yy, gh-1), 0);
 91 |         float wy = max(1.0f-abs(yy+0.5-gy), 0.0f);
 92 |         for (int zz = fz; zz < fz+2; ++zz)
 93 |         {
 94 |           int z_ = max(min(zz, gd-1), 0);
 95 |           float wz = weight_z(zz+0.5-gz);
 96 |           int grid_idx = c + sz*z_ + sx*x_ + sy*y_ + sb*b;
 97 |           value += grid[grid_idx]*wx*wy*wz;
 98 |         }
 99 |       }
100 |     }
101 |     out[idx] = value;
102 |   }
103 | }
104 | 
105 | __global__ void BilateralSliceGridGradKernel(
106 |     int64 nthreads,
107 |     const float* grid, const float* guide, const float* backprop, 
108 |     const int bs, const int h, const int w, const int chans,
109 |     const int gh, const int gw, const int gd,
110 |     float* out)
111 | {
112 |   CUDA_1D_KERNEL_LOOP(idx, nthreads) {
113 |     int c = idx % chans;
114 |     int gz = (idx / chans) % gd;
115 |     int gx = (idx / (chans*gd)) % gw;
116 |     int gy = (idx / (chans*gd*gw)) % gh;
117 |     int b = (idx / (chans*gd*gw*gh));
118 | 
119 |     float scale_w = w*1.0/gw;
120 |     float scale_h = h*1.0/gh;
121 | 
122 |     int left_x = static_cast<int>(floor(scale_w*(gx+0.5-1)));
123 |     int right_x = static_cast<int>(ceil(scale_w*(gx+0.5+1)));
124 |     int left_y = static_cast<int>(floor(scale_h*(gy+0.5-1)));
125 |     int right_y = static_cast<int>(ceil(scale_h*(gy+0.5+1)));
126 | 
127 |     int sx = chans;
128 |     int sy = chans*w;
129 |     int sb = chans*w*h;
130 | 
131 |     float value = 0.0f;
132 |     for (int x = left_x; x < right_x; ++x)
133 |     {
134 |       int x_ = x;
135 | 
136 |       // mirror boundary
137 |       if (x_ < 0) x_ = -x_-1;
138 |       if (x_ >= w) x_ = 2*w-1-x_;
139 | 
140 |       // x_ = max(min(x_, w-1), 0);
141 |       float gx2 = (x+0.5f)/scale_w;
142 |       float wx = max(1.0f-abs(gx+0.5-gx2), 0.0f);
143 | 
144 |       for (int y = left_y; y < right_y; ++y)
145 |       {
146 |         int y_ = y;
147 | 
148 |         // mirror boundary
149 |         if (y_ < 0) y_ = -y_-1;
150 |         if (y_ >= h) y_ = 2*h-1-y_;
151 | 
152 |         // y_ = max(min(y_, h-1), 0);
153 |         float gy2 = (y+0.5f)/scale_h;
154 |         float wy = max(1.0f-abs(gy+0.5-gy2), 0.0f);
155 | 
156 |         int guide_idx = x_ + w*y_ + h*w*b;
157 |         float gz2 = guide[guide_idx]*gd;
158 |         // float wz = max(1.0f-diff_abs(gz+0.5f - gz2), 0.0f);
159 |         float wz = weight_z(gz+0.5f-gz2);
160 |         if ((gz==0 && gz2<0.5f) || (gz==gd-1 && gz2>gd-0.5f)) {
161 |           wz = 1.0f;
162 |         }
163 | 
164 |         int back_idx = c + sx*x_ + sy*y_ + sb*b;
165 |         value += wz*wx*wy*backprop[back_idx];
166 |       }
167 |     }
168 |     out[idx] = value;
169 |   }
170 | }
171 | 
172 | __global__ void BilateralSliceGuideGradKernel(
173 |     int64 nthreads,
174 |     const float* grid, const float* guide, const float* backprop, 
175 |     const int bs, const int h, const int w, const int chans,
176 |     const int gh, const int gw, const int gd,
177 |     float* out)
178 | {
179 |   CUDA_1D_KERNEL_LOOP(idx, nthreads) {
180 |     int x = idx  % w;
181 |     int y = (idx / w) % h;
182 |     int b = (idx / (w*h));
183 | 
184 |     float gx = (x+0.5f)*gw/(1.0f*w);
185 |     float gy = (y+0.5f)*gh/(1.0f*h);
186 |     float gz = guide[x + w*(y + h*b)]*gd;
187 | 
188 |     int fx = static_cast<int>(floor(gx-0.5f));
189 |     int fy = static_cast<int>(floor(gy-0.5f));
190 |     int fz = static_cast<int>(floor(gz-0.5f));
191 | 
192 |     int sz = chans;
193 |     int sx = chans*gd;
194 |     int sy = chans*gd*gw;
195 |     int sb = chans*gd*gw*gh;
196 | 
197 |     float value = 0.0f;
198 |     for (int c = 0; c < chans; ++c) {
199 |       float chan_val = 0.0f;
200 |       for (int xx = fx; xx < fx+2; ++xx) {
201 |         int x_ = max(min(xx, gw-1), 0);
202 |         float wx = max(1.0f-abs(xx+0.5-gx), 0.0f);
203 |         for (int yy = fy; yy < fy+2; ++yy)
204 |         {
205 |           int y_ = max(min(yy, gh-1), 0);
206 |           float wy = max(1.0f-abs(yy+0.5-gy), 0.0f);
207 |           for (int zz = fz; zz < fz+2; ++zz)
208 |           {
209 |             int z_ = max(min(zz, gd-1), 0);
210 |             float dwz = gd*d_weight_z(zz+0.5-gz);
211 | 
212 |             int grid_idx = c + sz*z_ + sx*x_ + sy*y_ + sb*b;
213 |             chan_val += grid[grid_idx]*wx*wy*dwz;
214 |           }
215 |         }
216 |       }
217 |       chan_val *= backprop[c + chans*(x + w*(y + h*b))];
218 |       value += chan_val;
219 |     }
220 |     out[idx] = value;
221 |   }
222 | }
223 | 
224 | bool BilateralSliceApplyKernelLauncher(
225 |     const GPUDevice& d,
226 |     int bs, int gh, int gw, int gd,
227 |     int input_chans, int output_chans, bool has_offset,
228 |     int h, int w,
229 |     const float* const grid, const float* const guide, const float* const input,
230 |     float* const out)
231 | {
232 |   int total_count = bs*h*w*output_chans;
233 |   if (total_count > 0) {
234 |     CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
235 |     BilateralSliceApplyKernel<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
236 |         total_count, grid, guide, input,
237 |         bs, h, w, gh, gw, gd, input_chans, output_chans, has_offset,
238 |         out);
239 |   }
240 | 
241 |   return d.ok();
242 | }
243 | 
244 | __global__ void BilateralSliceApplyKernel(
245 |     int64 nthreads,
246 |     const float* grid, const float* guide, const float* input,
247 |     const int bs, const int h, const int w, 
248 |     const int gh, const int gw, const int gd,
249 |     const int input_chans, const int output_chans, const bool has_offset,
250 |     float* out)
251 | {
252 |   // - Samples centered at 0.5.
253 |   // - Repeating boundary conditions
254 | 
255 |   int grid_chans = input_chans*output_chans;
256 |   int coeff_stride = input_chans;
257 |   if(has_offset) {
258 |     grid_chans += output_chans;
259 |     coeff_stride += 1;
260 |   }
261 | 
262 |   CUDA_1D_KERNEL_LOOP(idx, nthreads) {
263 |     int out_c = idx % output_chans;
264 |     int x = (idx / output_chans) % w;
265 |     int y = (idx / (output_chans*w)) % h;
266 |     int b = (idx / (output_chans*w*h));
267 | 
268 |     float gx = (x+0.5f)*gw/(1.0f*w);
269 |     float gy = (y+0.5f)*gh/(1.0f*h);
270 |     float gz = guide[x + w*(y + h*b)]*gd;
271 | 
272 |     int fx = static_cast<int>(floor(gx-0.5f));
273 |     int fy = static_cast<int>(floor(gy-0.5f));
274 |     int fz = static_cast<int>(floor(gz-0.5f));
275 | 
276 | 
277 |     // Grid strides
278 |     int sz = grid_chans;
279 |     int sx = grid_chans*gd;
280 |     int sy = grid_chans*gd*gw;
281 |     int sb = grid_chans*gd*gw*gh;
282 | 
283 |     float value = 0.0f;
284 |     for (int in_c = 0; in_c < coeff_stride; ++in_c) {
285 |       float coeff_sample = 0.0f;
286 |       for (int xx = fx; xx < fx+2; ++xx) {
287 |         int x_ = max(min(xx, gw-1), 0);
288 |         float wx = max(1.0f-abs(xx+0.5-gx), 0.0f);
289 |         for (int yy = fy; yy < fy+2; ++yy)
290 |         {
291 |           int y_ = max(min(yy, gh-1), 0);
292 |           float wy = max(1.0f-abs(yy+0.5-gy), 0.0f);
293 |           for (int zz = fz; zz < fz+2; ++zz)
294 |           {
295 |             int z_ = max(min(zz, gd-1), 0);
296 |             float wz = weight_z(zz+0.5-gz);
297 |             int grid_idx = (coeff_stride*out_c + in_c) + sz*z_ + sx*x_ + sy*y_ + sb*b;
298 |             coeff_sample += grid[grid_idx]*wx*wy*wz;
299 |           }
300 |         }
301 |       } // Grid trilinear interpolation
302 |       if(in_c < input_chans) {
303 |         int input_idx = in_c + input_chans*(x + w*(y + h*b));
304 |         value += coeff_sample*input[input_idx];
305 |       } else { // Offset term
306 |         value += coeff_sample;
307 |       }
308 |     }
309 |     out[idx] = value;
310 |   }
311 | }
312 | 
313 | 
314 | __global__ void BilateralSliceApplyGridGradKernel(
315 |     int64 nthreads,
316 |     const float* grid, const float* guide, const float* input, const float* backprop, 
317 |     const int bs, const int h, const int w, 
318 |     const int gh, const int gw, const int gd,
319 |     const int input_chans, const int output_chans, const bool has_offset,
320 |     float* out)
321 | {
322 |   int grid_chans = input_chans*output_chans;
323 |   int coeff_stride = input_chans;
324 |   if(has_offset) {
325 |     grid_chans += output_chans;
326 |     coeff_stride += 1;
327 |   }
328 | 
329 |   CUDA_1D_KERNEL_LOOP(idx, nthreads) {
330 |     int c = idx % grid_chans;
331 |     int gz = (idx / grid_chans) % gd;
332 |     int gx = (idx / (grid_chans*gd)) % gw;
333 |     int gy = (idx / (grid_chans*gd*gw)) % gh;
334 |     int b = (idx / (grid_chans*gd*gw*gh));
335 | 
336 |     float scale_w = w*1.0/gw;
337 |     float scale_h = h*1.0/gh;
338 | 
339 |     int left_x = static_cast<int>(floor(scale_w*(gx+0.5-1)));
340 |     int right_x = static_cast<int>(ceil(scale_w*(gx+0.5+1)));
341 |     int left_y = static_cast<int>(floor(scale_h*(gy+0.5-1)));
342 |     int right_y = static_cast<int>(ceil(scale_h*(gy+0.5+1)));
343 | 
344 |     // Strides in the output
345 |     int sx = output_chans;
346 |     int sy = output_chans*w;
347 |     int sb = output_chans*w*h;
348 |     
349 |     // Strides in the input
350 |     int isx = input_chans;
351 |     int isy = input_chans*w;
352 |     int isb = input_chans*w*h;
353 | 
354 |     int out_c = c / coeff_stride;
355 |     int in_c = c % coeff_stride;
356 | 
357 |     float value = 0.0f;
358 |     for (int x = left_x; x < right_x; ++x)
359 |     {
360 |       int x_ = x;
361 | 
362 |       // mirror boundary
363 |       if (x_ < 0) x_ = -x_-1;
364 |       if (x_ >= w) x_ = 2*w-1-x_;
365 | 
366 |       float gx2 = (x+0.5f)/scale_w;
367 |       float wx = max(1.0f-abs(gx+0.5-gx2), 0.0f);
368 | 
369 |       for (int y = left_y; y < right_y; ++y)
370 |       {
371 |         int y_ = y;
372 | 
373 |         // mirror boundary
374 |         if (y_ < 0) y_ = -y_-1;
375 |         if (y_ >= h) y_ = 2*h-1-y_;
376 | 
377 |         float gy2 = (y+0.5f)/scale_h;
378 |         float wy = max(1.0f-abs(gy+0.5-gy2), 0.0f);
379 | 
380 |         int guide_idx = x_ + w*y_ + h*w*b;
381 |         float gz2 = guide[guide_idx]*gd;
382 |         float wz = weight_z(gz+0.5f-gz2);
383 |         if ((gz==0 && gz2<0.5f) || (gz==gd-1 && gz2>gd-0.5f)) {
384 |           wz = 1.0f;
385 |         }
386 | 
387 |         int back_idx = out_c + sx*x_ + sy*y_ + sb*b;
388 |         if (in_c < input_chans) {
389 |           int input_idx = in_c + isx*x_ + isy*y_ + isb*b;
390 |           value += wz*wx*wy*backprop[back_idx]*input[input_idx];
391 |         } else { // offset term
392 |           value += wz*wx*wy*backprop[back_idx];
393 |         }
394 |       }
395 |     }
396 |     out[idx] = value;
397 |   }
398 | }
399 | 
400 | 
401 | __global__ void BilateralSliceApplyGuideGradKernel(
402 |     int64 nthreads,
403 |     const float* grid, const float* guide, const float* input, const float* backprop, 
404 |     const int bs, const int h, const int w,
405 |     const int gh, const int gw, const int gd,
406 |     const int input_chans, const int output_chans, const bool has_offset,
407 |     float* out)
408 | {
409 | 
410 |   int grid_chans = input_chans*output_chans;
411 |   int coeff_stride = input_chans;
412 |   if(has_offset) {
413 |     grid_chans += output_chans;
414 |     coeff_stride += 1;
415 |   }
416 | 
417 |   CUDA_1D_KERNEL_LOOP(idx, nthreads) {
418 |     int x = idx  % w;
419 |     int y = (idx / w) % h;
420 |     int b = (idx / (w*h));
421 | 
422 |     float gx = (x+0.5f)*gw/(1.0f*w);
423 |     float gy = (y+0.5f)*gh/(1.0f*h);
424 |     float gz = guide[x + w*(y + h*b)]*gd;
425 | 
426 |     int fx = static_cast<int>(floor(gx-0.5f));
427 |     int fy = static_cast<int>(floor(gy-0.5f));
428 |     int fz = static_cast<int>(floor(gz-0.5f));
429 | 
430 |     // Grid stride 
431 |     int sz = grid_chans;
432 |     int sx = grid_chans*gd;
433 |     int sy = grid_chans*gd*gw;
434 |     int sb = grid_chans*gd*gw*gh;
435 | 
436 |     float out_sum = 0.0f;
437 |     for (int out_c = 0; out_c < output_chans; ++out_c) {
438 | 
439 |       float in_sum = 0.0f;
440 |       for (int in_c = 0; in_c < coeff_stride; ++in_c) {
441 | 
442 |         float grid_sum = 0.0f;
443 |         for (int xx = fx; xx < fx+2; ++xx) {
444 |           int x_ = max(min(xx, gw-1), 0);
445 |           float wx = max(1.0f-abs(xx+0.5-gx), 0.0f);
446 |           for (int yy = fy; yy < fy+2; ++yy)
447 |           {
448 |             int y_ = max(min(yy, gh-1), 0);
449 |             float wy = max(1.0f-abs(yy+0.5-gy), 0.0f);
450 |             for (int zz = fz; zz < fz+2; ++zz)
451 |             {
452 |               int z_ = max(min(zz, gd-1), 0);
453 |               float dwz = gd*d_weight_z(zz+0.5-gz);
454 | 
455 |               int grid_idx = (coeff_stride*out_c + in_c) + sz*z_ + sx*x_ + sy*y_ + sb*b;
456 |               grid_sum += grid[grid_idx]*wx*wy*dwz;
457 |             } // z
458 |           } // y
459 |         } // x, grid trilinear interp
460 | 
461 |         if(in_c < input_chans) {
462 |           in_sum += grid_sum*input[in_c + input_chans*(x + w*(y + h*b))];
463 |         } else {  // offset term
464 |           in_sum += grid_sum;
465 |         }
466 |       } // in_c
467 | 
468 |       out_sum += in_sum*backprop[out_c + output_chans*(x + w*(y + h*b))];
469 |     } // out_c
470 | 
471 |     out[idx] = out_sum;
472 |   }
473 | }
474 | 
475 | 
476 | __global__ void BilateralSliceApplyInputGradKernel(
477 |     int64 nthreads,
478 |     const float* grid, const float* guide, const float* input, const float* backprop, 
479 |     const int bs, const int h, const int w,
480 |     const int gh, const int gw, const int gd,
481 |     const int input_chans, const int output_chans, const bool has_offset,
482 |     float* out)
483 | {
484 |   int grid_chans = input_chans*output_chans;
485 |   int coeff_stride = input_chans;
486 |   if(has_offset) {
487 |     grid_chans += output_chans;
488 |     coeff_stride += 1;
489 |   }
490 | 
491 |   CUDA_1D_KERNEL_LOOP(idx, nthreads) {
492 |     int in_c = idx % input_chans;
493 |     int x = (idx / input_chans) % w;
494 |     int y = (idx / (input_chans*w)) % h;
495 |     int b = (idx / (input_chans*w*h));
496 | 
497 |     float gx = (x+0.5f)*gw/(1.0f*w);
498 |     float gy = (y+0.5f)*gh/(1.0f*h);
499 |     float gz = guide[x + w*(y + h*b)]*gd;
500 | 
501 |     int fx = static_cast<int>(floor(gx-0.5f));
502 |     int fy = static_cast<int>(floor(gy-0.5f));
503 |     int fz = static_cast<int>(floor(gz-0.5f));
504 | 
505 |     // Grid stride 
506 |     int sz = grid_chans;
507 |     int sx = grid_chans*gd;
508 |     int sy = grid_chans*gd*gw;
509 |     int sb = grid_chans*gd*gw*gh;
510 | 
511 |     float value = 0.0f;
512 |     for (int out_c = 0; out_c < output_chans; ++out_c) {
513 |       float chan_val = 0.0f;
514 |       for (int xx = fx; xx < fx+2; ++xx) {
515 |         int x_ = max(min(xx, gw-1), 0);
516 |         float wx = max(1.0f-abs(xx+0.5-gx), 0.0f);
517 |         for (int yy = fy; yy < fy+2; ++yy)
518 |         {
519 |           int y_ = max(min(yy, gh-1), 0);
520 |           float wy = max(1.0f-abs(yy+0.5-gy), 0.0f);
521 |           for (int zz = fz; zz < fz+2; ++zz)
522 |           {
523 | 
524 |             int z_ = max(min(zz, gd-1), 0);
525 | 
526 |             float wz = weight_z(zz+0.5-gz);
527 | 
528 |             int grid_idx = (coeff_stride*out_c + in_c) + sz*z_ + sx*x_ + sy*y_ + sb*b;
529 |             chan_val += grid[grid_idx]*wx*wy*wz;
530 |           } // z
531 |         } // y
532 |       } // x, grid trilinear interp
533 | 
534 |       value += chan_val*backprop[out_c + output_chans*(x + w*(y + h*b))];
535 |     } // out_c
536 |     out[idx] = value;
537 |   }
538 | }
539 | 
540 | 
541 | // -- KERNEL LAUNCHERS ---------------------------------------------------------
542 | bool BilateralSliceKernelLauncher(
543 |     const GPUDevice& d,
544 |     int bs, int gh, int gw, int gd, int chans,
545 |     int h, int w,
546 |     const float* const grid, const float* const guide, float* const out)
547 | {
548 |   int total_count = bs*h*w*chans;
549 |   if (total_count > 0) {
550 |     CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
551 |     BilateralSliceKernel<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
552 |         total_count, grid, guide, 
553 |         bs, h, w, chans, gh, gw, gd,
554 |         out);
555 |   }
556 | 
557 |   return d.ok();
558 | }
559 | 
560 | bool BilateralSliceGradKernelLauncher(
561 |     const GPUDevice& d,
562 |     const float* grid, const int64* grid_size,
563 |     const float* guide, const int64* guide_size,
564 |     const float* backprop,
565 |     float* grid_grad, float* guide_grad)
566 | {
567 |   int64 bs = grid_size[0];
568 |   int64 gh = grid_size[1];
569 |   int64 gw = grid_size[2];
570 |   int64 gd = grid_size[3];
571 |   int64 chans = grid_size[4];
572 | 
573 |   int64 h = guide_size[1];
574 |   int64 w = guide_size[2];
575 | 
576 |   int64 grid_count = bs*gh*gw*gd*chans;
577 |   if (grid_count > 0) {
578 |     CudaLaunchConfig config = GetCudaLaunchConfig(grid_count, d);
579 |     BilateralSliceGridGradKernel<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
580 |         grid_count, grid, guide, backprop,
581 |         bs, h, w, chans, gh, gw, gd,
582 |         grid_grad);
583 |   }
584 | 
585 |   int64 guide_count = bs*h*w;
586 |   if (guide_count > 0) {
587 |     CudaLaunchConfig config = GetCudaLaunchConfig(guide_count, d);
588 |     BilateralSliceGuideGradKernel<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
589 |         guide_count, grid, guide, backprop,
590 |         bs, h, w, chans, gh, gw, gd,
591 |         guide_grad);
592 |   }
593 | 
594 |   return d.ok();
595 | }
596 | 
597 | 
598 | 
599 | 
600 | bool BilateralSliceApplyGradKernelLauncher(
601 |     const GPUDevice& d,
602 |     const float* grid, const int64* grid_size,
603 |     const float* guide, const int64* guide_size,
604 |     const float* input, const int64* input_size,
605 |     const float* backprop,
606 |     bool has_offset,
607 |     float* grid_grad, float* guide_grad, float* input_grad)
608 | {
609 |   int64 gh = grid_size[1];
610 |   int64 gw = grid_size[2];
611 |   int64 gd = grid_size[3];
612 |   int64 coeff_chans = grid_size[4];
613 |   int64 bs = guide_size[0];
614 |   int64 h = guide_size[1];
615 |   int64 w = guide_size[2];
616 |   int64 input_chans = input_size[3];
617 | 
618 |   int64 output_chans = 0;
619 |   if (has_offset) {
620 |     output_chans = coeff_chans/(input_chans+1);
621 |   } else {
622 |     output_chans = coeff_chans/input_chans;
623 |   }
624 | 
625 | 
626 |   int64 grid_count = bs*gh*gw*gd*coeff_chans;
627 |   if (grid_count > 0) {
628 |     CudaLaunchConfig config = GetCudaLaunchConfig(grid_count, d);
629 |     BilateralSliceApplyGridGradKernel<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
630 |         grid_count, grid, guide, input, backprop,
631 |         bs, h, w, gh, gw, gd,
632 |         input_chans, output_chans, has_offset,
633 |         grid_grad);
634 |   }
635 | 
636 |   int64 guide_count = bs*h*w;
637 |   if (guide_count > 0) {
638 |     CudaLaunchConfig config = GetCudaLaunchConfig(guide_count, d);
639 |     BilateralSliceApplyGuideGradKernel<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
640 |         guide_count, grid, guide, input, backprop,
641 |         bs, h, w, gh, gw, gd,
642 |         input_chans, output_chans, has_offset,
643 |         guide_grad);
644 |   }
645 | 
646 |   int64 input_count = bs*h*w*input_chans;
647 |   if (input_count > 0) {
648 |     CudaLaunchConfig config = GetCudaLaunchConfig(input_count, d);
649 |     BilateralSliceApplyInputGradKernel<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
650 |         input_count, grid, guide, input, backprop,
651 |         bs, h, w, gh, gw, gd,
652 |         input_chans, output_chans, has_offset,
653 |         input_grad);
654 |   }
655 | 
656 |   return d.ok();
657 | }
658 | 
659 | #endif
660 | 


--------------------------------------------------------------------------------