├── xla.png ├── IMG_1410.jpg ├── example.png ├── no_xla.png ├── photoai ├── logo.png ├── license.txt ├── index.html ├── js │ └── index.js └── css │ └── style.css ├── web_app_page.png ├── hdrnet ├── lib │ └── hdrnet_ops.so ├── utils.py ├── ops │ ├── cuda │ │ └── cuda_config.h │ ├── bilateral_slice.cc │ └── bilateral_slice.cu.cc ├── hdrnet_ops.py ├── layers.py └── models.py ├── optimized_graph ├── optimized_hdr.pb ├── optimized_edge.pb └── optimized_face.pb ├── requirements.txt ├── web_serving ├── web.py └── inference.py ├── README.md ├── scripts ├── test_pb_graph.py ├── optimize_graph.py └── freeze_graph.py └── tensorflow_serving └── hdr_saved.py /xla.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/xla.png -------------------------------------------------------------------------------- /IMG_1410.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/IMG_1410.jpg -------------------------------------------------------------------------------- /example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/example.png -------------------------------------------------------------------------------- /no_xla.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/no_xla.png -------------------------------------------------------------------------------- /photoai/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/photoai/logo.png -------------------------------------------------------------------------------- /web_app_page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/web_app_page.png -------------------------------------------------------------------------------- /hdrnet/lib/hdrnet_ops.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/hdrnet/lib/hdrnet_ops.so -------------------------------------------------------------------------------- /optimized_graph/optimized_hdr.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/optimized_graph/optimized_hdr.pb -------------------------------------------------------------------------------- /optimized_graph/optimized_edge.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/optimized_graph/optimized_edge.pb -------------------------------------------------------------------------------- /optimized_graph/optimized_face.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DaFun/Image-Enhancement/HEAD/optimized_graph/optimized_face.pb -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | setproctitle==1.1.10 2 | numpy==1.12.0 3 | pyglib==0.1 4 | scikit_image==0.9.3 5 | tensorflow_gpu==1.1.0 6 | python_gflags==3.1.1 7 | python_magic==0.4.13 8 | 9 | -------------------------------------------------------------------------------- /hdrnet/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """TF graph utilities.""" 15 | 16 | import tensorflow as tf 17 | 18 | 19 | def get_model_params(sess, param_collection="model_params"): 20 | pcoll = tf.get_collection(param_collection) 21 | params_ = {p.name.split(':')[0]: p for p in pcoll} 22 | model_params = sess.run(params_) 23 | return model_params 24 | -------------------------------------------------------------------------------- /hdrnet/ops/cuda/cuda_config.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * ==============================================================================*/ 15 | 16 | // DO NOT EDIT: automatically generated file 17 | // #ifndef CUDA_CUDA_CONFIG_H_ 18 | // #define CUDA_CUDA_CONFIG_H_ 19 | // 20 | // #define TF_CUDA_CAPABILITIES CudaVersion("3.0") 21 | // 22 | // #define TF_CUDA_VERSION "8.0" 23 | // #define TF_CUDNN_VERSION "5" 24 | // 25 | // #define TF_CUDA_TOOLKIT_PATH "/usr/local/cuda-8.0" 26 | // 27 | // #endif // CUDA_CUDA_CONFIG_H_ 28 | -------------------------------------------------------------------------------- /photoai/license.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 17 | -------------------------------------------------------------------------------- /web_serving/web.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | # Copyright 2018 Fei Cheng 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from bottle import Bottle, route, run, template, request, response, BaseRequest 18 | BaseRequest.MEMFILE_MAX = 256000000 19 | from inference import Hdrnet 20 | import base64 21 | import os 22 | 23 | if not os.path.isdir('/tmp/face'): 24 | os.mkdir('/tmp/face') 25 | os.mkdir('/tmp/hdr') 26 | os.mkdir('/tmp/edge') 27 | 28 | # set the optimized graph path 29 | hdrnet_face = Hdrnet('optimized_graph.pb', 'face') 30 | hdrnet_edge = Hdrnet('optimized_edge.pb', 'edge') 31 | hdrnet_hdr = Hdrnet('optimized_hdr.pb', 'hdr') 32 | 33 | 34 | @route('/') 35 | def index(): 36 | return template('index') 37 | 38 | @route('/infer', method=['POST']) 39 | def infer(): 40 | file = request.forms.get('data') 41 | mode = request.forms.get('mode') 42 | if mode == 'face': 43 | data = hdrnet_face.infer(file) 44 | elif mode == 'edge': 45 | data = hdrnet_edge.infer(file) 46 | elif mode == 'hdr': 47 | data = hdrnet_hdr.infer(file) 48 | 49 | response.content_type = 'text/json' 50 | response.set_header('Access-Control-Allow-Origin', '*') 51 | return {'data': base64.b64encode(data)} 52 | 53 | # set inet address 54 | run(host='10.64.25.231', port=9999, reload=True) 55 | -------------------------------------------------------------------------------- /photoai/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | File upload input 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 | 20 |
21 |
22 | 23 | 24 |
25 | 26 |
27 |

Drag and drop an Image

28 |
29 |
30 |
31 | your image 32 |
33 | 34 |
35 |
36 | 41 | 42 |
43 | 44 | 45 |
46 | 47 |
48 | 49 | 50 | 51 |
52 |
53 | 54 | 55 |
56 |

Enjoy Your Image

57 |
58 |
59 |
60 | 61 |
62 |
63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Real-time image enhancement DL android App and web App 2 | An deep learning project focusing on deploying pretrained models on mobile device and cloud. It's implemented during 3 weeks Insight AI fellowship program. 3 | 4 | The pretrained models give credit to [Deep Bilateral Learning 5 | for Real-Time Image Enhancement](https://groups.csail.mit.edu/graphics/hdrnet/) 6 | 7 | ## Setup 8 | 9 | ### Dependencies 10 | 11 | To install the Python dependencies, run: 12 | 13 | pip install -r requirements.txt 14 | 15 | ## Usage 16 | 17 | To download the pretrained models, please refer to [Deep Bilateral Learning 18 | for Real-Time Image Enhancement](https://groups.csail.mit.edu/graphics/hdrnet/) 19 | 20 | 21 | To prepare a model for use on mobile, freeze the graph, and optimize the network: 22 | 23 | ./scripts/freeze_graph.py 24 | ./scripts/optimize_graph.py 25 | 26 | To test the prepares model for use on web app or mobile: 27 | 28 | ./scripts/test_pb_graph.py 29 | 30 | 31 | ## Serving the Hdrnet model on cloud 32 | ### [photoAI](http://photo-ai.surge.sh/) 33 |

34 | 35 |

36 | 37 | Now this web app 'photoAI' are serving 3 different pretrained models: face brightening, edge enhancing, hdr+ 38 | 39 | 40 | ## Deloy Hdrnet model on android by Tensorflow mobile 41 | 42 | In order to deploy this model on android, I have to implement a custom tensorflow op (CUDA version) by opencl so that the op can run on mobile. 43 | Need to clean some code, to be updated. 44 | 45 |

46 | 47 |

48 | 49 | ## Inference performance comparison with and without XLA 50 | 51 | Some tests with XLA fused operation optimization. Images below shows tests with 1 batch (20) 1500*1000 pictures. Didn't see any improvements by using XLA. I think there are mainly two reasons: bilateralSliceApply is a very heavy computation custom op, and cannot be fused by XLA; XLA is still at early stage. 52 | 53 | Inference without XLA JIT 54 |

55 | 56 |

57 | 58 | Inference with XLA JIT 59 |

60 | 61 |

62 | 63 | ## Known issues and limitations 64 | 65 | * Tensorflow mobile doesn't support custom op, especially for ops implemented in CUDA. The hdrnet model used a custom op - BilateralSliceApply op - is GPU only. 66 | 67 | * The pre-trained HDR+ model trained on a specially formatted 16-bit linear input. When feeding general images will receive outputs with weird color. -------------------------------------------------------------------------------- /photoai/js/index.js: -------------------------------------------------------------------------------- 1 | var dataURL; 2 | var server = "http://184.105.86.228:9999/infer"; 3 | 4 | function readURL(input) { 5 | $('#res img:last-child').remove(); 6 | if (input.files && input.files[0]) { 7 | 8 | var reader = new FileReader(); 9 | 10 | reader.onload = function(e) { 11 | $('.image-upload-wrap').hide(); 12 | 13 | $('.file-upload-image').attr('src', e.target.result); 14 | $('.file-upload-content').show(); 15 | 16 | $('.image-title').html(input.files[0].name); 17 | dataURL = reader.result; 18 | $('#mode').show(); 19 | }; 20 | 21 | reader.readAsDataURL(input.files[0]); 22 | 23 | } else { 24 | removeUpload(); 25 | } 26 | } 27 | 28 | function removeUpload() { 29 | $('.file-upload-input').replaceWith($('.file-upload-input').clone()); 30 | $('.file-upload-content').hide(); 31 | $('.image-upload-wrap').show(); 32 | $('#mode').hide(); 33 | $('#res img:last-child').remove(); 34 | } 35 | 36 | 37 | $('.image-upload-wrap').bind('dragover', function () { 38 | $('.image-upload-wrap').addClass('image-dropping'); 39 | }); 40 | $('.image-upload-wrap').bind('dragleave', function () { 41 | $('.image-upload-wrap').removeClass('image-dropping'); 42 | }); 43 | 44 | 45 | function serverRequest(mode) { 46 | // var http = new XMLHttpRequest(); 47 | // var url = server; 48 | // var params = { 49 | // mode: mode, 50 | // data: dataURL 51 | // }; 52 | // http.open("POST", url, true); 53 | // 54 | // //Send the proper header information along with the request 55 | // //http.setRequestHeader("Content-type", "application/x-www-form-urlencoded"); 56 | // 57 | // http.onreadystatechange = function(data) {//Call a function when the state changes. 58 | // if(http.readyState == 4 && http.status == 200) { 59 | // $("", { 60 | // "src": "data:image/jpeg;base64," + data['data'], 61 | // "class": "file-upload-image" 62 | // }).appendTo("#res"); 63 | // } 64 | // } 65 | // http.send(params); 66 | 67 | $.ajax({ 68 | type: 'POST', 69 | url: server, 70 | crossDomain: true, 71 | data: { 72 | mode: mode, 73 | data: dataURL 74 | }, 75 | dataType: 'json', 76 | success: function(data) { 77 | $("", { 78 | "src": "data:image/jpeg;base64," + data['data'], 79 | "class": "file-upload-image" 80 | }).appendTo("#res") 81 | // $('#res').attr('src', "data:image/jpeg;base64," + data['data']) 82 | }, 83 | error: function(jqXHR, textStatus, errorThrown){ 84 | console.log('error') 85 | console.log(jqXHR) 86 | console.log(textStatus) 87 | console.log(errorThrown) 88 | } 89 | }) 90 | } -------------------------------------------------------------------------------- /photoai/css/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: sans-serif; 3 | background-color: #eeeeee; 4 | } 5 | 6 | .file-upload { 7 | background-color: #ffffff; 8 | width: 650px; 9 | display: inline; 10 | float: left; 11 | margin: 10px; 12 | padding: 20px; 13 | } 14 | 15 | .file-upload-right { 16 | float: right; 17 | } 18 | 19 | .file-upload-btn { 20 | width: 100%; 21 | margin: 0; 22 | color: #fff; 23 | background: #1FB264; 24 | border: none; 25 | padding: 10px; 26 | border-radius: 4px; 27 | border-bottom: 4px solid #15824B; 28 | transition: all .2s ease; 29 | outline: none; 30 | text-transform: uppercase; 31 | font-weight: 700; 32 | } 33 | 34 | .mode-btn { 35 | width: 30%; 36 | margin-left: 8px; 37 | margin-right: 8px; 38 | } 39 | 40 | .p { 41 | margin-left: 12px; 42 | margin-top: 20px; 43 | margin-bottom: 10px; 44 | } 45 | 46 | .file-upload-btn:hover { 47 | background: #1AA059; 48 | color: #ffffff; 49 | transition: all .2s ease; 50 | cursor: pointer; 51 | } 52 | 53 | .file-upload-btn:active { 54 | border: 0; 55 | transition: all .2s ease; 56 | } 57 | 58 | .file-upload-content { 59 | display: none; 60 | text-align: center; 61 | } 62 | 63 | .file-upload-input { 64 | position: absolute; 65 | margin: 0; 66 | padding: 0; 67 | width: 100%; 68 | height: 100%; 69 | outline: none; 70 | opacity: 0; 71 | cursor: pointer; 72 | } 73 | 74 | .image-upload-wrap { 75 | margin-top: 20px; 76 | border: 4px dashed #1FB264; 77 | position: relative; 78 | } 79 | 80 | .image-dropping, 81 | .image-upload-wrap:hover { 82 | background-color: #1FB264; 83 | border: 4px dashed #ffffff; 84 | } 85 | 86 | .image-title-wrap { 87 | padding: 0 15px 15px 15px; 88 | color: #222; 89 | } 90 | 91 | .drag-text { 92 | text-align: center; 93 | } 94 | 95 | .drag-text h3 { 96 | font-weight: 100; 97 | text-transform: uppercase; 98 | color: #15824B; 99 | padding: 60px 0; 100 | } 101 | 102 | .file-upload-image { 103 | max-height: 600px; 104 | max-width: 600px; 105 | margin: auto; 106 | padding: 20px; 107 | } 108 | 109 | .remove-image { 110 | width: 200px; 111 | margin: 0; 112 | color: #fff; 113 | background: #cd4535; 114 | border: none; 115 | padding: 10px; 116 | border-radius: 4px; 117 | border-bottom: 4px solid #b02818; 118 | transition: all .2s ease; 119 | outline: none; 120 | text-transform: uppercase; 121 | font-weight: 700; 122 | } 123 | 124 | .remove-image:hover { 125 | background: #c13b2a; 126 | color: #ffffff; 127 | transition: all .2s ease; 128 | cursor: pointer; 129 | } 130 | 131 | .remove-image:active { 132 | border: 0; 133 | transition: all .2s ease; 134 | } 135 | -------------------------------------------------------------------------------- /hdrnet/hdrnet_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Python interface to custom Tensorflow operations for HDRnet.""" 16 | 17 | import os 18 | import tensorflow as tf 19 | from tensorflow.python.framework import ops 20 | 21 | __all__ = ['bilateral_slice'] 22 | 23 | path = os.path.dirname(os.path.abspath(__file__)) 24 | path = tf.resource_loader.get_path_to_datafile( 25 | os.path.join(path, 'lib', 'hdrnet_ops.so')) 26 | 27 | _hdrnet = tf.load_op_library(path) 28 | 29 | # -- Register operations ------------------------------------------------------ 30 | bilateral_slice = _hdrnet.bilateral_slice 31 | bilateral_slice_apply = _hdrnet.bilateral_slice_apply 32 | 33 | # ----------- Register gradients ---------------------------------------------- 34 | @ops.RegisterGradient('BilateralSlice') 35 | def _bilateral_slice_grad(op, grad): 36 | grid_tensor = op.inputs[0] 37 | guide_tensor = op.inputs[1] 38 | return _hdrnet.bilateral_slice_grad(grid_tensor, guide_tensor, grad) 39 | 40 | 41 | @ops.RegisterGradient('BilateralSliceApply') 42 | def _bilateral_slice_grad(op, grad): 43 | grid_tensor = op.inputs[0] 44 | guide_tensor = op.inputs[1] 45 | input_tensor = op.inputs[2] 46 | has_offset = op.get_attr('has_offset') 47 | return _hdrnet.bilateral_slice_apply_grad( 48 | grid_tensor, guide_tensor, input_tensor, grad, has_offset=has_offset) 49 | 50 | 51 | # ----------- Register Shape inference ---------------------------------------- 52 | @ops.RegisterShape('BilateralSlice') 53 | def _bilateral_slice_shape(op): 54 | input_tensor = op.inputs[0] 55 | guide_tensor = op.inputs[1] 56 | return [guide_tensor.get_shape().concatenate(input_tensor.get_shape()[-1])] 57 | 58 | 59 | @ops.RegisterShape('BilateralSliceApply') 60 | def _bilateral_slice_shape(op): 61 | grid_tensor = op.inputs[0] 62 | guide_tensor = op.inputs[1] 63 | input_tensor = op.inputs[2] 64 | 65 | has_offset = op.get_attr('has_offset') 66 | chan_in = input_tensor.get_shape()[-1] 67 | chan_grid = grid_tensor.get_shape()[-1] 68 | 69 | if has_offset: 70 | chan_out = chan_grid // (chan_in+1) 71 | else: 72 | chan_out = chan_grid // chan_in 73 | return [guide_tensor.get_shape().concatenate(chan_out)] 74 | -------------------------------------------------------------------------------- /scripts/test_pb_graph.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # Copyright 2018 Fei Cheng 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import tensorflow as tf 19 | import hdrnet.models as models 20 | import cv2 21 | import numpy as np 22 | import skimage 23 | import skimage.io 24 | import skimage.transform 25 | from PIL import Image 26 | import argparse 27 | 28 | 29 | def load_graph(pb_graph_file): 30 | # load the protobuf file from the disk and parse it to retrieve the 31 | # unserialized graph_def 32 | with tf.gfile.GFile(pb_graph_file, "rb") as f: 33 | graph_def = tf.GraphDef() 34 | graph_def.ParseFromString(f.read()) 35 | 36 | # import the graph_def into a new Graph and returns it 37 | with tf.Graph().as_default() as graph: 38 | tf.import_graph_def(graph_def) 39 | return graph 40 | 41 | def main(args): 42 | input_path = args.input_image 43 | im_input = cv2.imread(input_path, -1) # -1 means read as is, no conversions. 44 | if im_input.shape[2] == 4: 45 | im_input = im_input[:, :, :3] 46 | 47 | im_input = np.flip(im_input, 2) # OpenCV reads BGR, convert back to RGB. 48 | im_input = skimage.img_as_float(im_input) 49 | 50 | lowres_input = skimage.transform.resize(im_input, [256, 256], order=0) 51 | im_input = im_input[np.newaxis, :, :, :] 52 | lowres_input = lowres_input[np.newaxis, :, :, :] 53 | 54 | graph = load_graph(args.pb_file) 55 | 56 | # nodes names need to be customized if graph changed 57 | fullres = graph.get_tensor_by_name('fullres_input:0') 58 | lowres = graph.get_tensor_by_name('lowres_input:0') 59 | out = graph.get_tensor_by_name('output_img:0') 60 | 61 | with tf.Session(graph=graph) as sess: 62 | feed_dict = { 63 | fullres: im_input, 64 | lowres: lowres_input 65 | } 66 | # run the inference 67 | y_out = sess.run(out, feed_dict=feed_dict) 68 | 69 | img = Image.fromarray(y_out, 'RGB') 70 | img.save(args.output_image) 71 | 72 | 73 | if __name__ == '__main__': 74 | parser = argparse.ArgumentParser() 75 | parser.add_argument('pb_file', default=None, help='path to the optimized graph') 76 | parser.add_argument('input_image', default=None, help='input image path') 77 | parser.add_argument('output_image', default=None, help='output image path') 78 | 79 | args = parser.parse_args() 80 | main(args) -------------------------------------------------------------------------------- /scripts/optimize_graph.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | # Copyright 2018 Fei Cheng 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import argparse 18 | import tensorflow as tf 19 | import hdrnet.models as models 20 | from tensorflow.core.framework import graph_pb2 21 | from tensorflow.python.framework import errors 22 | from tensorflow.python.pywrap_tensorflow import TransformGraphWithStringInputs 23 | from tensorflow.python.util import compat 24 | 25 | def TransformGraph(input_graph_def, inputs, outputs, transforms): 26 | """Python wrapper for the Graph Transform Tool. 27 | 28 | Gives access to all graph transforms available through the command line tool. 29 | See documentation at https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/graph_transforms/README.md 30 | for full details of the options available. 31 | 32 | Args: 33 | input_graph_def: GraphDef object containing a model to be transformed. 34 | inputs: List of node names for the model inputs. 35 | outputs: List of node names for the model outputs. 36 | transforms: List of strings containing transform names and parameters. 37 | 38 | Returns: 39 | New GraphDef with transforms applied. 40 | """ 41 | 42 | input_graph_def_string = input_graph_def.SerializeToString() 43 | inputs_string = compat.as_bytes(",".join(inputs)) 44 | outputs_string = compat.as_bytes(",".join(outputs)) 45 | transforms_string = compat.as_bytes(" ".join(transforms)) 46 | with errors.raise_exception_on_not_ok_status() as status: 47 | output_graph_def_string = TransformGraphWithStringInputs( 48 | input_graph_def_string, inputs_string, outputs_string, 49 | transforms_string, status) 50 | output_graph_def = graph_pb2.GraphDef() 51 | output_graph_def.ParseFromString(output_graph_def_string) 52 | return output_graph_def 53 | 54 | 55 | def load_graph(frozen_graph_path): 56 | # load the protobuf file from the disk and parse it to retrieve the 57 | # unserialized graph_def 58 | with tf.gfile.GFile(frozen_graph_path, "rb") as f: 59 | graph_def = tf.GraphDef() 60 | graph_def.ParseFromString(f.read()) 61 | return graph_def 62 | 63 | 64 | def write_trans_graph(output_graph, output_graph_def): 65 | with tf.gfile.GFile(output_graph, "wb") as f: 66 | f.write(output_graph_def.SerializeToString()) 67 | 68 | 69 | def main(args): 70 | graph_def = load_graph(args.frozen_path) 71 | out = TransformGraph(graph_def, args.input_nodes, args.output_nodes, 72 | ['strip_unused_nodes', 'remove_nodes(op=Identity, op=CheckNumerics)', 'merge_duplicate_nodes', 73 | 'fold_constants(ignore_errors=true)', 'fold_batch_norms', 'sort_by_execution_order', 74 | 'strip_unused_nodes']) 75 | write_trans_graph(args.optimized_path, out) 76 | 77 | 78 | if __name__ == '__main__': 79 | parser = argparse.ArgumentParser() 80 | parser.add_argument('frozen_path', default=None, help='path to the saved frozen graph') 81 | parser.add_argument('optimized_path', default=None, help='path to output optimized graph') 82 | parser.add_argument('input_nodes', nargs='+', help='input nodes names of the graph') 83 | parser.add_argument('output_nodes', nargs='+', help='output nodes names of the graph') 84 | 85 | args = parser.parse_args() 86 | main(args) -------------------------------------------------------------------------------- /web_serving/inference.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | # Copyright 2018 Fei Cheng 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import tensorflow as tf 18 | import hdrnet.models as models 19 | import numpy as np 20 | import skimage 21 | import skimage.io 22 | import skimage.transform 23 | import base64 24 | import cv2 25 | from PIL import Image 26 | import re 27 | import cStringIO 28 | 29 | class Hdrnet(object): 30 | def __init__(self, checkpoint, dir): 31 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) 32 | self.checkpoint = checkpoint 33 | self.graph = self.load_graph(checkpoint) 34 | self.sess = tf.Session(graph=self.graph) 35 | self.count = 0 36 | self.dir = dir 37 | 38 | def load_graph(self, graph): 39 | # load the protobuf file from the disk and parse it to retrieve the 40 | # unserialized graph_def 41 | with tf.gfile.GFile(graph, "rb") as f: 42 | graph_def = tf.GraphDef() 43 | graph_def.ParseFromString(f.read()) 44 | 45 | # import the graph_def into a new Graph and returns it 46 | with tf.Graph().as_default() as graph: 47 | tf.import_graph_def(graph_def) 48 | return graph 49 | 50 | def preprocess(self, url_data): 51 | img_dict = re.match("data:(?P.*?);(?P.*?),(?P.*)", url_data).groupdict() 52 | #file = img_dict['data'].decode(img_dict['encoding'], 'strict') 53 | data = base64.b64decode(img_dict['data']) 54 | with open('/tmp/' + self.dir + '/'+str(self.count)+'.jpeg', 'wb') as f: 55 | f.write(data) 56 | np_data = cv2.imread('/tmp/' + self.dir + '/'+str(self.count)+'.jpeg', -1) 57 | print(np_data.shape) 58 | self.count += 1 59 | return np_data 60 | 61 | 62 | def infer(self, data): 63 | """ Perform inferencing. In other words, generate a paraphrase 64 | for the source sentence. 65 | 66 | Args: 67 | file : input buffer from memory 68 | 69 | Returns: 70 | new_image: numpy array 71 | """ 72 | 73 | im_input = self.preprocess(data) 74 | # im_input = cv2.imdecode(img, -1) # -1 means read as is, no conversions. 75 | if im_input.shape[2] == 4: 76 | im_input = im_input[:, :, :3] 77 | 78 | im_input = np.flip(im_input, 2) # OpenCV reads BGR, convert back to RGB. 79 | 80 | if im_input.dtype == np.uint16 and self.dir == 'hdr': 81 | # im_input = im_input / 32767.0 82 | # im_input = im_input / 32767.0 /2 83 | # im_input = im_input / (1.0*2**16) 84 | im_input = skimage.img_as_float(im_input) 85 | else: 86 | im_input = skimage.img_as_float(im_input) 87 | 88 | lowres_input = skimage.transform.resize(im_input, [256, 256], order=0) 89 | im_input = im_input[np.newaxis, :, :, :] 90 | lowres_input = lowres_input[np.newaxis, :, :, :] 91 | 92 | 93 | fullres = self.graph.get_tensor_by_name('import/fullres_input:0') 94 | lowres = self.graph.get_tensor_by_name('import/lowres_input:0') 95 | out = self.graph.get_tensor_by_name('import/output_img:0') 96 | 97 | feed_dict = { 98 | fullres: im_input, 99 | lowres: lowres_input 100 | } 101 | 102 | y_out = self.sess.run(out, feed_dict=feed_dict) 103 | 104 | img = Image.fromarray(y_out, 'RGB') 105 | buffer = cStringIO.StringIO() 106 | img.save(buffer, format='JPEG') 107 | return buffer.getvalue() 108 | 109 | 110 | def main(): 111 | import argparse 112 | parser = argparse.ArgumentParser() 113 | parser.add_argument('checkpoint', type=str, help='optimized graph path') 114 | parser.add_argument('input_image', type=str, help='input image file') 115 | parser.add_argument('output_image', type=str, help='output image path') 116 | args = parser.parse_args() 117 | hdrnet = Hdrnet(args.checkpoint) 118 | 119 | with open(args.image_file, 'rb') as f: 120 | img = f.read() 121 | new_image = hdrnet.infer(img) 122 | 123 | return new_image 124 | # img = Image.fromarray(new_image, 'RGB') 125 | # img.save(args.output_image) 126 | 127 | if __name__ == '__main__': 128 | main() 129 | -------------------------------------------------------------------------------- /hdrnet/layers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Shortcuts for some graph operators.""" 16 | 17 | import tensorflow as tf 18 | import numpy as np 19 | 20 | from hdrnet import hdrnet_ops 21 | 22 | w_initializer = tf.contrib.layers.variance_scaling_initializer 23 | b_initializer = tf.constant_initializer 24 | 25 | def conv(inputs, num_outputs, kernel_size, stride=1, rate=1, 26 | use_bias=True, 27 | batch_norm=False, is_training=False, 28 | activation_fn=tf.nn.relu, 29 | scope=None, reuse=False): 30 | if batch_norm: 31 | normalizer_fn = tf.contrib.layers.batch_norm 32 | b_init = None 33 | else: 34 | normalizer_fn = None 35 | if use_bias: 36 | b_init = b_initializer(0.0) 37 | else: 38 | b_init = None 39 | 40 | output = tf.contrib.layers.convolution2d( 41 | inputs=inputs, 42 | num_outputs=num_outputs, kernel_size=kernel_size, 43 | stride=stride, padding='SAME', 44 | rate=rate, 45 | weights_initializer=w_initializer(), 46 | weights_regularizer=tf.contrib.layers.l2_regularizer(1.0), 47 | biases_initializer=b_init, 48 | normalizer_fn=normalizer_fn, 49 | normalizer_params={ 50 | 'center':True, 'is_training':is_training, 51 | 'variables_collections':{ 52 | 'beta':[tf.GraphKeys.BIASES], 53 | 'moving_mean':[tf.GraphKeys.MOVING_AVERAGE_VARIABLES], 54 | 'moving_variance':[tf.GraphKeys.MOVING_AVERAGE_VARIABLES]}, 55 | }, 56 | activation_fn=activation_fn, 57 | variables_collections={'weights':[tf.GraphKeys.WEIGHTS], 'biases':[tf.GraphKeys.BIASES]}, 58 | outputs_collections=[tf.GraphKeys.ACTIVATIONS], 59 | scope=scope, reuse=reuse) 60 | return output 61 | 62 | 63 | def fc(inputs, num_outputs, 64 | use_bias=True, 65 | batch_norm=False, is_training=False, 66 | activation_fn=tf.nn.relu, 67 | scope=None): 68 | if batch_norm: 69 | normalizer_fn = tf.contrib.layers.batch_norm 70 | b_init = None 71 | else: 72 | normalizer_fn = None 73 | if use_bias: 74 | b_init = b_initializer(0.0) 75 | else: 76 | b_init = None 77 | 78 | output = tf.contrib.layers.fully_connected( 79 | inputs=inputs, 80 | num_outputs=num_outputs, 81 | weights_initializer=w_initializer(), 82 | weights_regularizer=tf.contrib.layers.l2_regularizer(1.0), 83 | biases_initializer=b_init, 84 | normalizer_fn=normalizer_fn, 85 | normalizer_params={ 86 | 'center':True, 'is_training':is_training, 87 | 'variables_collections':{ 88 | 'beta':[tf.GraphKeys.BIASES], 89 | 'moving_mean':[tf.GraphKeys.MOVING_AVERAGE_VARIABLES], 90 | 'moving_variance':[tf.GraphKeys.MOVING_AVERAGE_VARIABLES]}, 91 | }, 92 | activation_fn=activation_fn, 93 | variables_collections={'weights':[tf.GraphKeys.WEIGHTS], 'biases':[tf.GraphKeys.BIASES]}, 94 | scope=scope) 95 | return output 96 | 97 | 98 | # ----------------------------------------------------------------------------- 99 | 100 | # pylint: disable=redefined-builtin 101 | def bilateral_slice(grid, guide, name=None): 102 | """Slices into a bilateral grid using the guide map. 103 | 104 | Args: 105 | grid: (Tensor) [batch_size, grid_h, grid_w, depth, n_outputs] 106 | grid to slice from. 107 | guide: (Tensor) [batch_size, h, w ] guide map to slice along. 108 | name: (string) name for the operation. 109 | Returns: 110 | sliced: (Tensor) [batch_size, h, w, n_outputs] sliced output. 111 | """ 112 | 113 | with tf.name_scope(name): 114 | gridshape = grid.get_shape().as_list() 115 | if len(gridshape) == 6: 116 | _, _, _, _, n_out, n_in = gridshape 117 | grid = tf.concat(tf.unstack(grid, None, axis=5), 4) 118 | 119 | sliced = hdrnet_ops.bilateral_slice(grid, guide) 120 | 121 | if len(gridshape) == 6: 122 | sliced = tf.stack(tf.split(sliced, n_in, axis=3), axis=4) 123 | return sliced 124 | # pylint: enable=redefined-builtin 125 | 126 | 127 | def bilateral_slice_apply(grid, guide, input_image, has_offset=True, name=None): 128 | """Slices into a bilateral grid using the guide map. 129 | 130 | Args: 131 | grid: (Tensor) [batch_size, grid_h, grid_w, depth, n_outputs] 132 | grid to slice from. 133 | guide: (Tensor) [batch_size, h, w ] guide map to slice along. 134 | input_image: (Tensor) [batch_size, h, w, n_input] input data onto which to 135 | apply the affine transform. 136 | name: (string) name for the operation. 137 | Returns: 138 | sliced: (Tensor) [batch_size, h, w, n_outputs] sliced output. 139 | """ 140 | 141 | with tf.name_scope(name): 142 | gridshape = grid.get_shape().as_list() 143 | if len(gridshape) == 6: 144 | gs = tf.shape(grid) 145 | _, _, _, _, n_out, n_in = gridshape 146 | grid = tf.reshape(grid, tf.stack([gs[0], gs[1], gs[2], gs[3], gs[4]*gs[5]])) 147 | # grid = tf.concat(tf.unstack(grid, None, axis=5), 4) 148 | 149 | sliced = hdrnet_ops.bilateral_slice_apply(grid, guide, input_image, has_offset=has_offset) 150 | return sliced 151 | # pylint: enable=redefined-builtin 152 | 153 | 154 | # pylint: disable=redefined-builtin 155 | def apply(sliced, input_image, has_affine_term=True, name=None): 156 | """Applies a sliced affined model to the input image. 157 | 158 | Args: 159 | sliced: (Tensor) [batch_size, h, w, n_output, n_input+1] affine coefficients 160 | input_image: (Tensor) [batch_size, h, w, n_input] input data onto which to 161 | apply the affine transform. 162 | name: (string) name for the operation. 163 | Returns: 164 | ret: (Tensor) [batch_size, h, w, n_output] the transformed data. 165 | Raises: 166 | ValueError: if the input is not properly dimensioned. 167 | ValueError: if the affine model parameter dimensions do not match the input. 168 | """ 169 | 170 | with tf.name_scope(name): 171 | if len(input_image.get_shape().as_list()) != 4: 172 | raise ValueError('input image should have dims [b,h,w,n_in].') 173 | in_shape = input_image.get_shape().as_list() 174 | sliced_shape = sliced.get_shape().as_list() 175 | if (in_shape[:-1] != sliced_shape[:-2]): 176 | raise ValueError('input image and affine coefficients' 177 | ' dimensions do not match: {} and {}'.format( 178 | in_shape, sliced_shape)) 179 | _, _, _, n_out, n_in = sliced.get_shape().as_list() 180 | if has_affine_term: 181 | n_in -= 1 182 | 183 | scale = sliced[:, :, :, :, :n_in] 184 | 185 | if has_affine_term: 186 | offset = sliced[:, :, :, :, n_in] 187 | 188 | out_channels = [] 189 | for chan in range(n_out): 190 | ret = scale[:, :, :, chan, 0]*input_image[:, :, :, 0] 191 | for chan_i in range(1, n_in): 192 | ret += scale[:, :, :, chan, chan_i]*input_image[:, :, :, chan_i] 193 | if has_affine_term: 194 | ret += offset[:, :, :, chan] 195 | ret = tf.expand_dims(ret, 3) 196 | out_channels.append(ret) 197 | 198 | ret = tf.concat(out_channels, 3) 199 | 200 | return ret 201 | # pylint: enable=redefined-builtin 202 | -------------------------------------------------------------------------------- /tensorflow_serving/hdr_saved.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | 3 | import tensorflow as tf 4 | 5 | import hdrnet.models as models 6 | import hdrnet.utils as utils 7 | import os 8 | import numpy as np 9 | 10 | 11 | tf.app.flags.DEFINE_string('checkpoint_dir', '/tmp/checkpoint_dir/faces', 12 | """Directory where to read training checkpoints.""") 13 | tf.app.flags.DEFINE_string('output_dir', '/tmp/hdrnet_output', 14 | """Directory where to export inference model.""") 15 | tf.app.flags.DEFINE_integer('model_version', 1, 16 | """Version number of the model.""") 17 | tf.app.flags.DEFINE_integer('image_size', 256, 18 | """Needs to provide same value as in training.""") 19 | 20 | FLAGS = tf.app.flags.FLAGS 21 | 22 | 23 | def preprocess_image(image_buffer): 24 | ''' 25 | Preprocess JPEG encoded bytes to 3D float Tensor and rescales 26 | it so that pixels are in a range of [-1, 1] 27 | :param image_buffer: Buffer that contains JPEG image 28 | :return: 4D image tensor (1, width, height,channels) with pixels scaled 29 | to [-1, 1]. First dimension is a batch size (1 is our case) 30 | ''' 31 | 32 | # Decode the string as an RGB JPEG. 33 | # Note that the resulting image contains an unknown height and width 34 | # that is set dynamically by decode_jpeg. In other words, the height 35 | # and width of image is unknown at compile-time. 36 | image = tf.image.decode_jpeg(image_buffer, channels=3, dct_method='INTEGER_ACCURATE') 37 | 38 | # After this point, all image pixels reside in [0,1) 39 | # until the very end, when they're rescaled to (-1, 1). The various 40 | # adjust_* ops all require this range for dtype float. 41 | image = tf.image.convert_image_dtype(image, dtype=tf.float32) 42 | 43 | # Networks accept images in batches. 44 | # The first dimension usually represents the batch size. 45 | # In our case the batch size is one. 46 | #image = tf.expand_dims(image, 0) 47 | 48 | return image 49 | 50 | 51 | def preprocess_low_image(image_buffer): 52 | """Preprocess JPEG encoded bytes to 3D float Tensor.""" 53 | 54 | # Decode the string as an RGB JPEG. 55 | # Note that the resulting image contains an unknown height and width 56 | # that is set dynamically by decode_jpeg. In other words, the height 57 | # and width of image is unknown at compile-time. 58 | image = tf.image.decode_jpeg(image_buffer, channels=3, dct_method='INTEGER_ACCURATE') 59 | # After this point, all image pixels reside in [0,1) 60 | # until the very end, when they're rescaled to (-1, 1). The various 61 | # adjust_* ops all require this range for dtype float. 62 | image = tf.image.convert_image_dtype(image, dtype=tf.float32) 63 | # Crop the central region of the image with an area containing 87.5% of 64 | # the original image. 65 | # image = tf.image.central_crop(image, central_fraction=0.875) 66 | # Resize the image to the original height and width. 67 | image = tf.expand_dims(image, 0) 68 | image = tf.image.resize_nearest_neighbor(image, [FLAGS.image_size, FLAGS.image_size], align_corners=False) 69 | image = tf.squeeze(image, [0]) 70 | # Finally, rescale to [-1,1] instead of [0, 1) 71 | #image = tf.subtract(image, 0.5) 72 | #image = tf.multiply(image, 2.0) 73 | return image 74 | 75 | # def cv_preprocess_low_image(image_buffer): 76 | # record_defaults = [['']] * (256 * 256 * 3) 77 | # flat = tf.decode_csv(image_buffer, record_defaults=record_defaults) 78 | # flat = tf.string_to_number(flat, out_type=tf.float32) 79 | # return tf.expand_dims(tf.reshape(flat, [256, 256, 3]), 0) 80 | # 81 | # 82 | # def cv_preprocess_image(image_buffer): 83 | # #array = np.load(image_buffer) 84 | # 85 | # record_defaults = [['']] * 1920 86 | # flat = tf.stack(tf.decode_csv(image_buffer, record_defaults=record_defaults)) 87 | # flat = tf.string_to_number(flat, out_type=tf.float32) 88 | # 89 | # #array = tf.convert_to_tensor(array, dtype=tf.float32) 90 | # return tf.expand_dims(tf.reshape(flat, [1920, 1080, 3]), 0) 91 | 92 | 93 | def main(_): 94 | with tf.Graph().as_default(): 95 | # Inject placeholder into the graph 96 | serialized_tf_example = tf.placeholder(tf.string, name='input_image') 97 | serialized_low_example = tf.placeholder(tf.string, name='low_image') 98 | #serialized_shape = tf.placeholder(tf.string, name='shape_image') 99 | feature_configs = { 100 | 'image/encoded': tf.FixedLenFeature( 101 | shape=[], dtype=tf.string) 102 | } 103 | tf_example = tf.parse_example(serialized_tf_example, feature_configs) 104 | tf_low_example = tf.parse_example(serialized_low_example, feature_configs) 105 | #tf_low_shape = tf.parse_example(serialized_shape, feature_configs) 106 | 107 | jpegs = tf_example['image/encoded'] 108 | low_jpegs = tf_low_example['image/encoded'] 109 | #shape_jpegs = tf_low_shape['image/encoded'] 110 | 111 | full_images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32) 112 | low_images = tf.map_fn(preprocess_low_image, low_jpegs, dtype=tf.float32) 113 | #full_images = tf.squeeze(full_images, [0]) 114 | #low_images = tf.squeeze(low_images, [0]) 115 | 116 | # now the image shape is (1, ?, ?, 3) 117 | 118 | # Create model 119 | checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) 120 | 121 | metapath = ".".join([checkpoint_path, "meta"]) 122 | tf.train.import_meta_graph(metapath) 123 | with tf.Session() as sess: 124 | model_params = utils.get_model_params(sess) 125 | mdl = getattr(models, model_params['model_name']) 126 | 127 | with tf.variable_scope('inference'): 128 | prediction = mdl.inference(low_images, full_images, model_params, is_training=False) 129 | output = tf.cast(255.0 * tf.squeeze(tf.clip_by_value(prediction, 0, 1)), tf.uint8) 130 | #output_img = tf.image.encode_png(tf.image.convert_image_dtype(output[0], dtype=tf.uint8)) 131 | 132 | 133 | # Create saver to restore from checkpoints 134 | saver = tf.train.Saver() 135 | 136 | with tf.Session() as sess: 137 | sess.run(tf.global_variables_initializer()) 138 | # Restore the model from last checkpoints 139 | ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) 140 | saver.restore(sess, ckpt.model_checkpoint_path) 141 | 142 | # (re-)create export directory 143 | export_path = os.path.join( 144 | tf.compat.as_bytes(FLAGS.output_dir), 145 | tf.compat.as_bytes(str(FLAGS.model_version))) 146 | if os.path.exists(export_path): 147 | shutil.rmtree(export_path) 148 | 149 | # create model builder 150 | builder = tf.saved_model.builder.SavedModelBuilder(export_path) 151 | 152 | # create tensors info 153 | predict_tensor_inputs_info = tf.saved_model.utils.build_tensor_info(jpegs) 154 | predict_tensor_low_info = tf.saved_model.utils.build_tensor_info(low_jpegs) 155 | #predict_tensor_shape_info = tf.saved_model.utils.build_tensor_info(shape_jpegs) 156 | predict_tensor_scores_info = tf.saved_model.utils.build_tensor_info(output) 157 | 158 | # build prediction signature 159 | prediction_signature = ( 160 | tf.saved_model.signature_def_utils.build_signature_def( 161 | inputs={'images': predict_tensor_inputs_info, 162 | 'low': predict_tensor_low_info}, 163 | #'shape': predict_tensor_shape_info}, 164 | outputs={'result': predict_tensor_scores_info}, 165 | method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME 166 | ) 167 | ) 168 | 169 | # save the model 170 | #legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') 171 | builder.add_meta_graph_and_variables( 172 | sess, [tf.saved_model.tag_constants.SERVING], 173 | signature_def_map={ 174 | 'predict_images': prediction_signature 175 | }) 176 | #legacy_init_op=legacy_init_op) 177 | 178 | builder.save() 179 | 180 | print("Successfully exported hdr model version '{}' into '{}'".format( 181 | FLAGS.model_version, FLAGS.output_dir)) 182 | 183 | if __name__ == '__main__': 184 | tf.app.run() -------------------------------------------------------------------------------- /hdrnet/models.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Defines computation graphs.""" 16 | 17 | import tensorflow as tf 18 | import numpy as np 19 | import os 20 | 21 | from hdrnet.layers import (conv, fc, bilateral_slice_apply) 22 | 23 | __all__ = [ 24 | 'HDRNetCurves', 25 | 'HDRNetPointwiseNNGuide', 26 | 'HDRNetGaussianPyrNN', 27 | ] 28 | 29 | 30 | class HDRNetCurves(object): 31 | """Main model, as submitted in January 2017. 32 | """ 33 | 34 | @classmethod 35 | def n_out(cls): 36 | return 3 37 | 38 | @classmethod 39 | def n_in(cls): 40 | return 3+1 41 | 42 | @classmethod 43 | def inference(cls, lowres_input, fullres_input, params, 44 | is_training=False): 45 | 46 | with tf.variable_scope('coefficients'): 47 | bilateral_coeffs = cls._coefficients(lowres_input, params, is_training) 48 | tf.add_to_collection('bilateral_coefficients', bilateral_coeffs) 49 | 50 | with tf.variable_scope('guide'): 51 | guide = cls._guide(fullres_input, params, is_training) 52 | tf.add_to_collection('guide', guide) 53 | 54 | with tf.variable_scope('output'): 55 | output = cls._output( 56 | fullres_input, guide, bilateral_coeffs) 57 | tf.add_to_collection('output', output) 58 | 59 | return output 60 | 61 | @classmethod 62 | def _coefficients(cls, input_tensor, params, is_training): 63 | bs = input_tensor.get_shape().as_list()[0] 64 | gd = params['luma_bins'] 65 | cm = params['channel_multiplier'] 66 | spatial_bin = params['spatial_bin'] 67 | 68 | # ----------------------------------------------------------------------- 69 | with tf.variable_scope('splat'): 70 | n_ds_layers = int(np.log2(params['net_input_size']/spatial_bin)) 71 | 72 | current_layer = input_tensor 73 | for i in range(n_ds_layers): 74 | if i > 0: # don't normalize first layer 75 | use_bn = params['batch_norm'] 76 | else: 77 | use_bn = False 78 | current_layer = conv(current_layer, cm*(2**i)*gd, 3, stride=2, 79 | batch_norm=use_bn, is_training=is_training, 80 | scope='conv{}'.format(i+1)) 81 | 82 | splat_features = current_layer 83 | # ----------------------------------------------------------------------- 84 | 85 | # ----------------------------------------------------------------------- 86 | with tf.variable_scope('global'): 87 | n_global_layers = int(np.log2(spatial_bin/4)) # 4x4 at the coarsest lvl 88 | 89 | current_layer = splat_features 90 | for i in range(2): 91 | current_layer = conv(current_layer, 8*cm*gd, 3, stride=2, 92 | batch_norm=params['batch_norm'], is_training=is_training, 93 | scope="conv{}".format(i+1)) 94 | _, lh, lw, lc = current_layer.get_shape().as_list() 95 | current_layer = tf.reshape(current_layer, [bs, lh*lw*lc]) 96 | 97 | current_layer = fc(current_layer, 32*cm*gd, 98 | batch_norm=params['batch_norm'], is_training=is_training, 99 | scope="fc1") 100 | current_layer = fc(current_layer, 16*cm*gd, 101 | batch_norm=params['batch_norm'], is_training=is_training, 102 | scope="fc2") 103 | # don't normalize before fusion 104 | current_layer = fc(current_layer, 8*cm*gd, activation_fn=None, scope="fc3") 105 | global_features = current_layer 106 | # ----------------------------------------------------------------------- 107 | 108 | # ----------------------------------------------------------------------- 109 | with tf.variable_scope('local'): 110 | current_layer = splat_features 111 | current_layer = conv(current_layer, 8*cm*gd, 3, 112 | batch_norm=params['batch_norm'], 113 | is_training=is_training, 114 | scope='conv1') 115 | # don't normalize before fusion 116 | current_layer = conv(current_layer, 8*cm*gd, 3, activation_fn=None, 117 | use_bias=False, scope='conv2') 118 | grid_features = current_layer 119 | # ----------------------------------------------------------------------- 120 | 121 | # ----------------------------------------------------------------------- 122 | with tf.name_scope('fusion'): 123 | fusion_grid = grid_features 124 | fusion_global = tf.reshape(global_features, [bs, 1, 1, 8*cm*gd]) 125 | fusion = tf.nn.relu(fusion_grid+fusion_global) 126 | # ----------------------------------------------------------------------- 127 | 128 | # ----------------------------------------------------------------------- 129 | with tf.variable_scope('prediction'): 130 | current_layer = fusion 131 | current_layer = conv(current_layer, gd*cls.n_out()*cls.n_in(), 1, 132 | activation_fn=None, scope='conv1') 133 | 134 | with tf.name_scope('unroll_grid'): 135 | current_layer = tf.stack( 136 | tf.split(current_layer, cls.n_out()*cls.n_in(), axis=3), axis=4) 137 | current_layer = tf.stack( 138 | tf.split(current_layer, cls.n_in(), axis=4), axis=5) 139 | tf.add_to_collection('packed_coefficients', current_layer) 140 | # ----------------------------------------------------------------------- 141 | 142 | return current_layer 143 | 144 | @classmethod 145 | def _guide(cls, input_tensor, params, is_training): 146 | npts = 16 # number of control points for the curve 147 | nchans = input_tensor.get_shape().as_list()[-1] 148 | 149 | guidemap = input_tensor 150 | 151 | # Color space change 152 | idtity = np.identity(nchans, dtype=np.float32) + np.random.randn(1).astype(np.float32)*1e-4 153 | ccm = tf.get_variable('ccm', dtype=tf.float32, initializer=idtity) 154 | with tf.name_scope('ccm'): 155 | ccm_bias = tf.get_variable('ccm_bias', shape=[nchans,], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) 156 | 157 | guidemap = tf.matmul(tf.reshape(input_tensor, [-1, nchans]), ccm) 158 | guidemap = tf.nn.bias_add(guidemap, ccm_bias, name='ccm_bias_add') 159 | 160 | guidemap = tf.reshape(guidemap, tf.shape(input_tensor)) 161 | 162 | # Per-channel curve 163 | with tf.name_scope('curve'): 164 | shifts_ = np.linspace(0, 1, npts, endpoint=False, dtype=np.float32) 165 | shifts_ = shifts_[np.newaxis, np.newaxis, np.newaxis, :] 166 | shifts_ = np.tile(shifts_, (1, 1, nchans, 1)) 167 | 168 | guidemap = tf.expand_dims(guidemap, 4) 169 | shifts = tf.get_variable('shifts', dtype=tf.float32, initializer=shifts_) 170 | 171 | slopes_ = np.zeros([1, 1, 1, nchans, npts], dtype=np.float32) 172 | slopes_[:, :, :, :, 0] = 1.0 173 | slopes = tf.get_variable('slopes', dtype=tf.float32, initializer=slopes_) 174 | 175 | guidemap = tf.reduce_sum(slopes*tf.nn.relu(guidemap-shifts), reduction_indices=[4]) 176 | 177 | guidemap = tf.contrib.layers.convolution2d( 178 | inputs=guidemap, 179 | num_outputs=1, kernel_size=1, 180 | weights_initializer=tf.constant_initializer(1.0/nchans), 181 | biases_initializer=tf.constant_initializer(0), 182 | activation_fn=None, 183 | variables_collections={'weights':[tf.GraphKeys.WEIGHTS], 'biases':[tf.GraphKeys.BIASES]}, 184 | outputs_collections=[tf.GraphKeys.ACTIVATIONS], 185 | scope='channel_mixing') 186 | 187 | guidemap = tf.clip_by_value(guidemap, 0, 1) 188 | guidemap = tf.squeeze(guidemap, squeeze_dims=[3,]) 189 | 190 | return guidemap 191 | 192 | @classmethod 193 | def _output(cls, im, guide, coeffs): 194 | with tf.device('/gpu:0'): 195 | out = bilateral_slice_apply(coeffs, guide, im, has_offset=True, name='slice') 196 | return out 197 | 198 | 199 | class HDRNetPointwiseNNGuide(HDRNetCurves): 200 | """Replaces the pointwise curves in the guide by a pointwise neural net. 201 | """ 202 | @classmethod 203 | def _guide(cls, input_tensor, params, is_training): 204 | n_guide_feats = params['guide_complexity'] 205 | guidemap = conv(input_tensor, n_guide_feats, 1, 206 | batch_norm=True, is_training=is_training, 207 | scope='conv1') 208 | guidemap = conv(guidemap, 1, 1, activation_fn=tf.nn.sigmoid, scope='conv2') 209 | guidemap = tf.squeeze(guidemap, squeeze_dims=[3,]) 210 | return guidemap 211 | 212 | 213 | class HDRNetGaussianPyrNN(HDRNetPointwiseNNGuide): 214 | """Replace input to the affine model by a pyramid 215 | """ 216 | @classmethod 217 | def n_scales(cls): 218 | return 3 219 | 220 | @classmethod 221 | def n_out(cls): 222 | return 3*cls.n_scales() 223 | 224 | @classmethod 225 | def n_in(cls): 226 | return 3+1 227 | 228 | @classmethod 229 | def inference(cls, lowres_input, fullres_input, params, 230 | is_training=False): 231 | 232 | with tf.variable_scope('coefficients'): 233 | bilateral_coeffs = cls._coefficients(lowres_input, params, is_training) 234 | tf.add_to_collection('bilateral_coefficients', bilateral_coeffs) 235 | 236 | with tf.variable_scope('multiscale'): 237 | multiscale = cls._multiscale_input(fullres_input) 238 | for m in multiscale: 239 | tf.add_to_collection('multiscale', m) 240 | 241 | with tf.variable_scope('guide'): 242 | guide = cls._guide(multiscale, params, is_training) 243 | for g in guide: 244 | tf.add_to_collection('guide', g) 245 | 246 | with tf.variable_scope('output'): 247 | output = cls._output(multiscale, guide, bilateral_coeffs) 248 | tf.add_to_collection('output', output) 249 | 250 | return output 251 | 252 | @classmethod 253 | def _multiscale_input(cls, fullres_input): 254 | full_sz = tf.shape(fullres_input)[1:3] 255 | sz = full_sz 256 | 257 | current_level = fullres_input 258 | lvls = [current_level] 259 | for lvl in range(cls.n_scales()-1): 260 | sz = sz / 2 261 | current_level = tf.image.resize_images( 262 | current_level, sz, tf.image.ResizeMethod.BILINEAR, 263 | align_corners=True) 264 | lvls.append(current_level) 265 | return lvls 266 | 267 | @classmethod 268 | def _guide(cls, multiscale, params, is_training): 269 | guide_lvls = [] 270 | for il, lvl in enumerate(multiscale): 271 | with tf.variable_scope('level_{}'.format(il)): 272 | guide_lvl = HDRNetPointwiseNNGuide._guide(lvl, params, is_training) 273 | guide_lvls.append(guide_lvl) 274 | return guide_lvls 275 | 276 | @classmethod 277 | def _output(cls, lvls, guide_lvls, coeffs): 278 | for il, (lvl, guide_lvl) in enumerate(reversed(zip(lvls, guide_lvls))): 279 | c = coeffs[:, :, :, :, il*3:(il+1)*3, :] 280 | out_lvl = HDRNetPointwiseNNGuide._output(lvl, guide_lvl, c) 281 | 282 | if il == 0: 283 | current = out_lvl 284 | else: 285 | sz = tf.shape(out_lvl)[1:3] 286 | current = tf.image.resize_images(current, sz, tf.image.ResizeMethod.BILINEAR, align_corners=True) 287 | current = tf.add(current, out_lvl) 288 | 289 | return current 290 | 291 | 292 | -------------------------------------------------------------------------------- /scripts/freeze_graph.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | # Copyright 2016 Google Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Modifications Copyright 2018 Fei Cheng 18 | # 19 | # Licensed under the Apache License, Version 2.0 (the "License"); 20 | # you may not use this file except in compliance with the License. 21 | # You may obtain a copy of the License at 22 | # 23 | # http://www.apache.org/licenses/LICENSE-2.0 24 | # 25 | # Unless required by applicable law or agreed to in writing, software 26 | # distributed under the License is distributed on an "AS IS" BASIS, 27 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 28 | # See the License for the specific language governing permissions and 29 | # limitations under the License. 30 | 31 | """Freeze graph weights; use to optimize runtime.""" 32 | 33 | import argparse 34 | import logging 35 | import numpy as np 36 | import os 37 | import tensorflow as tf 38 | from tensorflow.python.tools import freeze_graph 39 | from tensorflow.core.framework import graph_pb2 40 | 41 | import hdrnet.utils as utils 42 | import hdrnet.models as models 43 | 44 | logging.basicConfig(format="[%(process)d] %(levelname)s %(filename)s:%(lineno)s | %(message)s") 45 | log = logging.getLogger("train") 46 | log.setLevel(logging.INFO) 47 | 48 | 49 | def save(data, filepath): 50 | log.info("Saving {}".format(filepath)) 51 | with open(filepath, 'wb') as fid: 52 | fid.write(data.tobytes()) 53 | 54 | 55 | def main(args): 56 | # Read model parameters 57 | checkpoint_path = tf.train.latest_checkpoint(args.checkpoint_dir) 58 | if checkpoint_path is None: 59 | log.error('Could not find a checkpoint in {}'.format(args.checkpoint_dir)) 60 | return 61 | metapath = ".".join([checkpoint_path, "meta"]) 62 | log.info("Loading {}".format(metapath)) 63 | tf.train.import_meta_graph(metapath) 64 | with tf.Session() as sess: 65 | model_params = utils.get_model_params(sess) 66 | 67 | if not hasattr(models, model_params['model_name']): 68 | log.error("Model {} does not exist".format(model_params['model_name'])) 69 | return 70 | mdl = getattr(models, model_params['model_name']) 71 | 72 | # Instantiate new evaluation graph 73 | tf.reset_default_graph() 74 | sz = model_params['net_input_size'] 75 | 76 | log.info("Model {}".format(model_params['model_name'])) 77 | 78 | # 79 | # identify the input and output tensors to export 80 | # the part of graph you'd like to freeze 81 | # 82 | fullres_input = tf.placeholder(tf.float32, (1, None, None, 3), name='fullres_input') 83 | input_tensor = tf.placeholder(tf.float32, (1, sz, sz, 3), name='lowres_input') 84 | with tf.variable_scope('inference'): 85 | prediction = mdl.inference(input_tensor, fullres_input, model_params, is_training=False) 86 | if model_params["model_name"] == "HDRNetGaussianPyrNN": 87 | 88 | # export seperate graphs for deploying models on android 89 | output_tensor = tf.get_collection('guide')[0] 90 | output_tensor = tf.reshape(output_tensor, [-1], name='guide') 91 | # output_tensor = tf.get_collection('packed_coefficients')[0] 92 | # gs = output_tensor.get_shape().as_list() 93 | # output_tensor = tf.reshape(tf.reshape(output_tensor, tf.stack([gs[0], gs[1], gs[2], gs[3], gs[4] * gs[5]])), 94 | # [-1], name="bilateral_coefficients") 95 | # output_tensor = tf.transpose(tf.squeeze(output_tensor), [3, 2, 0, 1, 4], name="bilateral_coefficients") 96 | 97 | # export the whole graph when deploying on cloud 98 | # output_tensor = tf.cast(255.0*tf.squeeze(tf.clip_by_value(output_tensor, 0, 1)), tf.uint8, name='output_img') 99 | log.info("Output shape".format(output_tensor.get_shape())) 100 | else: 101 | # export seperate graphs for deploying models on android 102 | output_tensor = tf.get_collection('guide')[0] 103 | output_tensor = tf.reshape(output_tensor, [-1], name='guide') 104 | # output_tensor = tf.get_collection('packed_coefficients')[0] 105 | # gs = output_tensor.get_shape().as_list() 106 | # output_tensor = tf.reshape(tf.reshape(output_tensor, tf.stack([gs[0], gs[1], gs[2], gs[3], gs[4]*gs[5]])), 107 | # [-1], name="bilateral_coefficients") 108 | # output_tensor = tf.transpose(tf.squeeze(output_tensor), [3, 2, 0, 1, 4], name="bilateral_coefficients") 109 | 110 | # export the whole graph when deploying on cloud 111 | # output_tensor = tf.cast(255.0*tf.squeeze(tf.clip_by_value(output_tensor, 0, 1)), tf.uint8, name='output_img') 112 | log.info("Output shape {}".format(output_tensor.get_shape())) 113 | saver = tf.train.Saver() 114 | 115 | gdef = tf.get_default_graph().as_graph_def() 116 | 117 | log.info("Restoring weights from {}".format(checkpoint_path)) 118 | test_graph_name = "test_graph.pbtxt" 119 | with tf.Session() as sess: 120 | saver.restore(sess, checkpoint_path) 121 | tf.train.write_graph(sess.graph, args.checkpoint_dir, test_graph_name) 122 | 123 | input_graph_path = os.path.join(args.checkpoint_dir, test_graph_name) 124 | output_graph_path = os.path.join(args.checkpoint_dir, "frozen_graph.pb") 125 | input_saver_def_path = "" 126 | input_binary = False 127 | output_binary = True 128 | input_node_names = input_tensor.name.split(":")[0] 129 | output_node_names = output_tensor.name.split(":")[0] 130 | restore_op_name = "save/restore_all" 131 | filename_tensor_name = "save/Const:0" 132 | clear_devices = False 133 | 134 | log.info("Freezing to {}".format(output_graph_path)) 135 | freeze_graph.freeze_graph(input_graph_path, input_saver_def_path, 136 | input_binary, checkpoint_path, output_node_names, 137 | restore_op_name, filename_tensor_name, 138 | output_graph_path, clear_devices, "") 139 | log.info('input tensor: {} {}'.format(input_tensor.name, input_tensor.shape)) 140 | log.info('output tensor: {} {}'.format(output_tensor.name, output_tensor.shape)) 141 | 142 | # Dump guide parameters 143 | if model_params['model_name'] == 'HDRNetCurves': 144 | g = tf.get_default_graph() 145 | ccm = g.get_tensor_by_name('inference/guide/ccm:0') 146 | ccm_bias = g.get_tensor_by_name('inference/guide/ccm_bias:0') 147 | shifts = g.get_tensor_by_name('inference/guide/shifts:0') 148 | slopes = g.get_tensor_by_name('inference/guide/slopes:0') 149 | mixing_weights = g.get_tensor_by_name('inference/guide/channel_mixing/weights:0') 150 | mixing_bias = g.get_tensor_by_name('inference/guide/channel_mixing/biases:0') 151 | 152 | ccm_, ccm_bias_, shifts_, slopes_, mixing_weights_, mixing_bias_ = sess.run( 153 | [ccm, ccm_bias, shifts, slopes, mixing_weights, mixing_bias]) 154 | shifts_ = np.squeeze(shifts_).astype(np.float32) 155 | slopes_ = np.squeeze(slopes_).astype(np.float32) 156 | mix_matrix_dump = np.append(np.squeeze(mixing_weights_), mixing_bias_[0]).astype(np.float32) 157 | ccm34_ = np.vstack((ccm_, ccm_bias_[np.newaxis, :])) 158 | 159 | save(ccm34_.T, os.path.join(args.checkpoint_dir, 'guide_ccm_f32_3x4.bin')) 160 | save(shifts_.T, os.path.join(args.checkpoint_dir, 'guide_shifts_f32_16x3.bin')) 161 | save(slopes_.T, os.path.join(args.checkpoint_dir, 'guide_slopes_f32_16x3.bin')) 162 | save(mix_matrix_dump, os.path.join(args.checkpoint_dir, 'guide_mix_matrix_f32_1x4.bin')) 163 | 164 | elif model_params['model_name'] == "HDRNetGaussianPyrNN": 165 | g = tf.get_default_graph() 166 | for lvl in range(3): 167 | conv1_w = g.get_tensor_by_name('inference/guide/level_{}/conv1/weights:0'.format(lvl)) 168 | conv1_b = g.get_tensor_by_name('inference/guide/level_{}/conv1/BatchNorm/beta:0'.format(lvl)) 169 | conv1_mu = g.get_tensor_by_name('inference/guide/level_{}/conv1/BatchNorm/moving_mean:0'.format(lvl)) 170 | conv1_sigma = g.get_tensor_by_name( 171 | 'inference/guide/level_{}/conv1/BatchNorm/moving_variance:0'.format(lvl)) 172 | conv1_eps = g.get_tensor_by_name( 173 | 'inference/guide/level_{}/conv1/BatchNorm/batchnorm/add/y:0'.format(lvl)) 174 | conv2_w = g.get_tensor_by_name('inference/guide/level_{}/conv2/weights:0'.format(lvl)) 175 | conv2_b = g.get_tensor_by_name('inference/guide/level_{}/conv2/biases:0'.format(lvl)) 176 | 177 | conv1w_, conv1b_, conv1mu_, conv1sigma_, conv1eps_, conv2w_, conv2b_ = sess.run( 178 | [conv1_w, conv1_b, conv1_mu, conv1_sigma, conv1_eps, conv2_w, conv2_b]) 179 | 180 | conv1b_ -= conv1mu_ / np.sqrt((conv1sigma_ + conv1eps_)) 181 | conv1w_ = conv1w_ / np.sqrt((conv1sigma_ + conv1eps_)) 182 | 183 | conv1w_ = np.squeeze(conv1w_.astype(np.float32)) 184 | conv1b_ = np.squeeze(conv1b_.astype(np.float32)) 185 | conv1b_ = conv1b_[np.newaxis, :] 186 | 187 | conv2w_ = np.squeeze(conv2w_.astype(np.float32)) 188 | conv2b_ = np.squeeze(conv2b_.astype(np.float32)) 189 | 190 | conv2 = np.append(conv2w_, conv2b_) 191 | conv1 = np.vstack([conv1w_, conv1b_]) 192 | 193 | save(conv1.T, os.path.join(args.checkpoint_dir, 'guide_level{}_conv1.bin'.format(lvl))) 194 | save(conv2, os.path.join(args.checkpoint_dir, 'guide_level{}_conv2.bin'.format(lvl))) 195 | 196 | elif model_params['model_name'] in "HDRNetPointwiseNNGuide": 197 | g = tf.get_default_graph() 198 | conv1_w = g.get_tensor_by_name('inference/guide/conv1/weights:0') 199 | conv1_b = g.get_tensor_by_name('inference/guide/conv1/BatchNorm/beta:0') 200 | conv1_mu = g.get_tensor_by_name('inference/guide/conv1/BatchNorm/moving_mean:0') 201 | conv1_sigma = g.get_tensor_by_name('inference/guide/conv1/BatchNorm/moving_variance:0') 202 | conv1_eps = g.get_tensor_by_name('inference/guide/conv1/BatchNorm/batchnorm/add/y:0') 203 | conv2_w = g.get_tensor_by_name('inference/guide/conv2/weights:0') 204 | conv2_b = g.get_tensor_by_name('inference/guide/conv2/biases:0') 205 | 206 | conv1w_, conv1b_, conv1mu_, conv1sigma_, conv1eps_, conv2w_, conv2b_ = sess.run( 207 | [conv1_w, conv1_b, conv1_mu, conv1_sigma, conv1_eps, conv2_w, conv2_b]) 208 | 209 | conv1b_ -= conv1mu_ / np.sqrt((conv1sigma_ + conv1eps_)) 210 | conv1w_ = conv1w_ / np.sqrt((conv1sigma_ + conv1eps_)) 211 | 212 | conv1w_ = np.squeeze(conv1w_.astype(np.float32)) 213 | conv1b_ = np.squeeze(conv1b_.astype(np.float32)) 214 | conv1b_ = conv1b_[np.newaxis, :] 215 | 216 | conv2w_ = np.squeeze(conv2w_.astype(np.float32)) 217 | conv2b_ = np.squeeze(conv2b_.astype(np.float32)) 218 | 219 | conv2 = np.append(conv2w_, conv2b_) 220 | conv1 = np.vstack([conv1w_, conv1b_]) 221 | 222 | save(conv1.T, os.path.join(args.checkpoint_dir, 'guide_conv1.bin')) 223 | save(conv2, os.path.join(args.checkpoint_dir, 'guide_conv2.bin')) 224 | 225 | 226 | if __name__ == '__main__': 227 | parser = argparse.ArgumentParser() 228 | parser.add_argument('checkpoint_dir', default=None, help='') 229 | 230 | args = parser.parse_args() 231 | main(args) 232 | -------------------------------------------------------------------------------- /hdrnet/ops/bilateral_slice.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "tensorflow/core/framework/op.h" 16 | #include "tensorflow/core/framework/op_kernel.h" 17 | 18 | using namespace tensorflow; 19 | 20 | typedef Eigen::ThreadPoolDevice CPUDevice; 21 | typedef Eigen::GpuDevice GPUDevice; 22 | 23 | // -- OPS REGISTRAION --------------------------------------------------------- 24 | REGISTER_OP("BilateralSlice") 25 | .Input("in: float") 26 | .Input("guide: float") 27 | .Output("out: float") 28 | .Doc(R"doc( 29 | Slices input in in the location defined by guide, to produce output. 30 | )doc"); 31 | 32 | REGISTER_OP("BilateralSliceGrad") 33 | .Input("in: float") 34 | .Input("guide: float") 35 | .Input("backprop: float") 36 | .Output("grid_grad: float") 37 | .Output("guide_grad: float"); 38 | 39 | REGISTER_OP("BilateralSliceApply") 40 | .Input("grid: float") 41 | .Input("guide: float") 42 | .Input("input: float") 43 | .Attr("has_offset: bool") 44 | .Output("out: float") 45 | .Doc(R"doc( 46 | Slices input in in the location defined by guide and apply it, to produce output. 47 | )doc"); 48 | 49 | REGISTER_OP("BilateralSliceApplyGrad") 50 | .Input("grid: float") 51 | .Input("guide: float") 52 | .Input("input: float") 53 | .Input("backprop: float") 54 | .Attr("has_offset: bool") 55 | .Output("grid_grad: float") 56 | .Output("guide_grad: float") 57 | .Output("input_grad: float"); 58 | // ---------------------------------------------------------------------------- 59 | 60 | // -- KERNEL LAUNCHERS -------------------------------------------------------- 61 | bool BilateralSliceKernelLauncher( 62 | const GPUDevice& d, 63 | int bs, int gh, int gw, int gd, int chans, 64 | int h, int w, 65 | const float* const grid, const float* const guide, float* const out); 66 | 67 | bool BilateralSliceGradKernelLauncher( 68 | const GPUDevice& d, 69 | const float* const grid, const int64* grid_size, 70 | const float* const guide, const int64* guide_size, 71 | const float* const backprop, 72 | float* const grid_grad, float* const guide_grad); 73 | 74 | bool BilateralSliceApplyKernelLauncher( 75 | const GPUDevice& d, 76 | int bs, int gh, int gw, int gd, 77 | int input_chans, int output_chans, bool has_offset, 78 | int h, int w, 79 | const float* const grid, const float* const guide, const float* const input, 80 | float* const out); 81 | 82 | bool BilateralSliceApplyGradKernelLauncher( 83 | const GPUDevice& d, 84 | const float* const grid, const int64* grid_size, 85 | const float* const guide, const int64* guide_size, 86 | const float* const input, const int64* input_size, 87 | const float* const backprop, 88 | bool has_offset, 89 | float* const grid_grad, float* const guide_grad, float* const input_grad); 90 | // ---------------------------------------------------------------------------- 91 | 92 | 93 | // ---------------------------------------------------------------------------- 94 | class BilateralSliceOp : public OpKernel { 95 | public: 96 | explicit BilateralSliceOp(OpKernelConstruction* context) : OpKernel(context) {} 97 | 98 | void Compute(OpKernelContext* context) override { 99 | // Grab the inputs 100 | const Tensor& bilateral_grid = context->input(0); 101 | const Tensor& guide = context->input(1); 102 | 103 | OP_REQUIRES( 104 | context, bilateral_grid.dims() == 5, 105 | errors::InvalidArgument( 106 | R"msg(Input grid should be 5D (batch, height, width, depth, nchannels))msg")); 107 | OP_REQUIRES( 108 | context, guide.dims() == 3, 109 | errors::InvalidArgument( 110 | R"msg(Guide image should be 3D (batch, height, width))msg")); 111 | 112 | // Get shape of output tensor 113 | TensorShape shape; 114 | shape.AddDim(guide.dim_size(0)); // Batch size 115 | shape.AddDim(guide.dim_size(1)); // height 116 | shape.AddDim(guide.dim_size(2)); // width 117 | shape.AddDim(bilateral_grid.dim_size(4)); // channels 118 | 119 | // Allocate output tensor 120 | Tensor* output_tensor = NULL; 121 | OP_REQUIRES_OK(context, context->allocate_output(0, shape, &output_tensor)); 122 | 123 | auto output = output_tensor->flat(); 124 | 125 | const int64 *grid_size = bilateral_grid.shape().dim_sizes().data(); 126 | const int64 *guide_size = guide.shape().dim_sizes().data(); 127 | 128 | int h = guide.dim_size(1); 129 | int w = guide.dim_size(2); 130 | int bs = bilateral_grid.dim_size(0); 131 | int gh = bilateral_grid.dim_size(1); 132 | int gw = bilateral_grid.dim_size(2); 133 | int gd = bilateral_grid.dim_size(3); 134 | int chans = bilateral_grid.dim_size(4); 135 | 136 | // Call the cuda kernel launcher 137 | if (!context->status().ok()) { 138 | return; 139 | } 140 | 141 | bool status = BilateralSliceKernelLauncher( 142 | context->eigen_device(), 143 | bs, gh, gw, gd, chans, 144 | h, w, 145 | bilateral_grid.flat().data(), guide.flat().data(), 146 | output.data()); 147 | 148 | if (!status) { 149 | context->SetStatus( 150 | errors::Internal("Failed launch BilateralSliceKernel.")); 151 | } 152 | } 153 | }; 154 | 155 | 156 | class BilateralSliceGradOp : public OpKernel { 157 | public: 158 | explicit BilateralSliceGradOp(OpKernelConstruction* context) : OpKernel(context) {} 159 | 160 | void Compute(OpKernelContext* context) override { 161 | // Grab the inputs 162 | const Tensor& bilateral_grid = context->input(0); 163 | const Tensor& guide = context->input(1); 164 | const Tensor& backprop = context->input(2); 165 | 166 | OP_REQUIRES( 167 | context, bilateral_grid.dims() == 5, 168 | errors::InvalidArgument( 169 | R"msg(Input grid should be 5D (batch, height, width, depth, nchannels))msg")); 170 | OP_REQUIRES( 171 | context, guide.dims() == 3, 172 | errors::InvalidArgument( 173 | R"msg(Guide image should be 3D (batch, height, width))msg")); 174 | OP_REQUIRES( 175 | context, backprop.dims() == 4, 176 | errors::InvalidArgument( 177 | R"msg(Backprop should be 4D (batch, height, width, nchannels))msg")); 178 | 179 | // Get shape of output tensor 180 | TensorShape grid_shape = bilateral_grid.shape(); 181 | TensorShape guide_shape = guide.shape(); 182 | 183 | // Allocate output tensor 184 | Tensor* grid_grad = NULL; 185 | OP_REQUIRES_OK(context, context->allocate_output(0, grid_shape, 186 | &grid_grad)); 187 | Tensor* guide_grad = NULL; 188 | OP_REQUIRES_OK(context, context->allocate_output(1, guide_shape, 189 | &guide_grad)); 190 | 191 | const int64 *grid_size = bilateral_grid.shape().dim_sizes().data(); 192 | const int64 *guide_size = guide.shape().dim_sizes().data(); 193 | 194 | auto grid_grad_array = grid_grad->template flat(); 195 | auto guide_grad_array = guide_grad->template flat(); 196 | 197 | // Call the cuda kernel launcher 198 | bool status = BilateralSliceGradKernelLauncher( 199 | context->eigen_device(), 200 | bilateral_grid.flat().data(), grid_size, 201 | guide.flat().data(), guide_size, 202 | backprop.flat().data(), 203 | grid_grad_array.data(), guide_grad_array.data()); 204 | 205 | if (!status) { 206 | context->SetStatus( 207 | errors::Internal("Failed launch BilateralSliceGradKernel.")); 208 | } 209 | } 210 | }; 211 | 212 | 213 | class BilateralSliceApplyOp : public OpKernel { 214 | private: 215 | bool has_offset; 216 | 217 | public: 218 | explicit BilateralSliceApplyOp(OpKernelConstruction* context) : OpKernel(context) { 219 | OP_REQUIRES_OK(context, context->GetAttr("has_offset", &has_offset)); 220 | } 221 | 222 | void Compute(OpKernelContext* context) override { 223 | // Grab the inputs 224 | const Tensor& bilateral_grid = context->input(0); 225 | const Tensor& guide = context->input(1); 226 | const Tensor& input = context->input(2); 227 | 228 | // Check tensor dims 229 | OP_REQUIRES( 230 | context, bilateral_grid.dims() == 5, 231 | errors::InvalidArgument( 232 | R"msg(Input grid should be 5D (batch, height, width, depth, nchannels))msg")); 233 | OP_REQUIRES( 234 | context, guide.dims() == 3, 235 | errors::InvalidArgument( 236 | R"msg(Guide image should be 3D (batch, height, width))msg")); 237 | OP_REQUIRES( 238 | context, input.dims() == 4, 239 | errors::InvalidArgument( 240 | R"msg(Guide image should be 4D (batch, height, width, nchannels))msg")); 241 | 242 | // Sizes 243 | const int64 *grid_size = bilateral_grid.shape().dim_sizes().data(); 244 | const int64 *guide_size = guide.shape().dim_sizes().data(); 245 | int h = guide.dim_size(1); 246 | int w = guide.dim_size(2); 247 | int bs = bilateral_grid.dim_size(0); 248 | int gh = bilateral_grid.dim_size(1); 249 | int gw = bilateral_grid.dim_size(2); 250 | int gd = bilateral_grid.dim_size(3); 251 | int coeffs_chans = bilateral_grid.dim_size(4); 252 | int input_chans = input.dim_size(3); 253 | 254 | OP_REQUIRES( 255 | context, input.dim_size(0) == guide.dim_size(0) && input.dim_size(1) == h && input.dim_size(2) == w, 256 | errors::InvalidArgument( 257 | R"msg(Input and guide size should match.)msg")); 258 | OP_REQUIRES( 259 | context, guide.dim_size(0) == bs, 260 | errors::InvalidArgument( 261 | R"msg(Batch sizes should match.)msg")); 262 | 263 | int output_chans = 0; 264 | if (has_offset) { 265 | OP_REQUIRES( 266 | context, coeffs_chans % (input_chans+1) == 0, 267 | errors::InvalidArgument( 268 | R"msg(Slicing with affine offset, coefficients grid should have n_out*(n_in+1) channels.)msg")); 269 | output_chans = coeffs_chans / (input_chans+1); 270 | } else { 271 | OP_REQUIRES( 272 | context, coeffs_chans % input_chans == 0, 273 | errors::InvalidArgument( 274 | R"msg(Slicing without affine offset, coefficients grid should have n_out*n_in channels.)msg")); 275 | output_chans = coeffs_chans / input_chans; 276 | } 277 | 278 | // Allocate output tensor 279 | TensorShape out_shape; 280 | out_shape.AddDim(bs); 281 | out_shape.AddDim(h); 282 | out_shape.AddDim(w); 283 | out_shape.AddDim(output_chans); 284 | Tensor* output_tensor = NULL; 285 | OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output_tensor)); 286 | 287 | // Call the cuda kernel launcher 288 | auto output = output_tensor->flat(); 289 | bool status = BilateralSliceApplyKernelLauncher( 290 | context->eigen_device(), 291 | bs, gh, gw, gd, 292 | input_chans, output_chans, has_offset, 293 | h, w, 294 | bilateral_grid.flat().data(), guide.flat().data(), input.flat().data(), 295 | output.data()); 296 | 297 | if (!status) { 298 | context->SetStatus( 299 | errors::Internal("Failed to launch BilateralSliceApplyKernel.")); 300 | } 301 | } 302 | }; 303 | 304 | class BilateralSliceApplyGradOp : public OpKernel { 305 | private: 306 | bool has_offset; 307 | 308 | public: 309 | explicit BilateralSliceApplyGradOp(OpKernelConstruction* context) : OpKernel(context) { 310 | OP_REQUIRES_OK(context, context->GetAttr("has_offset", &has_offset)); 311 | } 312 | 313 | void Compute(OpKernelContext* context) override { 314 | // Grab the inputs 315 | const Tensor& bilateral_grid = context->input(0); 316 | const Tensor& guide = context->input(1); 317 | const Tensor& input = context->input(2); 318 | const Tensor& backprop = context->input(3); 319 | 320 | OP_REQUIRES( 321 | context, bilateral_grid.dims() == 5, 322 | errors::InvalidArgument( 323 | R"msg(Input grid should be 5D (batch, height, width, depth, nchannels))msg")); 324 | OP_REQUIRES( 325 | context, guide.dims() == 3, 326 | errors::InvalidArgument( 327 | R"msg(Guide image should be 3D (batch, height, width))msg")); 328 | OP_REQUIRES( 329 | context, input.dims() == 4, 330 | errors::InvalidArgument( 331 | R"msg(Input image should be 4D (batch, height, width, nchannels))msg")); 332 | OP_REQUIRES( 333 | context, backprop.dims() == 4, 334 | errors::InvalidArgument( 335 | R"msg(Backprop should be 4D (batch, height, width, nchannels))msg")); 336 | 337 | // Get shape of output tensor 338 | TensorShape grid_shape = bilateral_grid.shape(); 339 | TensorShape guide_shape = guide.shape(); 340 | TensorShape input_shape = input.shape(); 341 | 342 | // Allocate output tensor 343 | Tensor* grid_grad = NULL; 344 | OP_REQUIRES_OK(context, context->allocate_output(0, grid_shape, 345 | &grid_grad)); 346 | Tensor* guide_grad = NULL; 347 | OP_REQUIRES_OK(context, context->allocate_output(1, guide_shape, 348 | &guide_grad)); 349 | Tensor* input_grad = NULL; 350 | OP_REQUIRES_OK(context, context->allocate_output(2, input_shape, 351 | &input_grad)); 352 | 353 | int64 grid_size[5]{bilateral_grid.dim_size(0), 354 | bilateral_grid.dim_size(1), 355 | bilateral_grid.dim_size(2), 356 | bilateral_grid.dim_size(3), 357 | bilateral_grid.dim_size(4)}; 358 | int64 guide_size[3]{guide.dim_size(0), 359 | guide.dim_size(1), 360 | guide.dim_size(2)}; 361 | int64 input_size[4]{input.dim_size(0), 362 | input.dim_size(1), 363 | input.dim_size(2), 364 | input.dim_size(3)}; 365 | 366 | auto grid_grad_array = grid_grad->template flat(); 367 | auto guide_grad_array = guide_grad->template flat(); 368 | auto input_grad_array = input_grad->template flat(); 369 | 370 | // Call the cuda kernel launcher 371 | bool status = BilateralSliceApplyGradKernelLauncher( 372 | context->eigen_device(), 373 | bilateral_grid.flat().data(), grid_size, 374 | guide.flat().data(), guide_size, 375 | input.flat().data(), input_size, 376 | backprop.flat().data(), has_offset, 377 | grid_grad_array.data(), guide_grad_array.data(), input_grad_array.data()); 378 | 379 | if (!status) { 380 | context->SetStatus( 381 | errors::Internal("Failed launch BilateralSliceApplyGradKernel.")); 382 | } 383 | } 384 | }; 385 | // ---------------------------------------------------------------------------- 386 | 387 | // -- KERNEL REGISTRATION ----------------------------------------------------- 388 | REGISTER_KERNEL_BUILDER(Name("BilateralSlice").Device(DEVICE_GPU), BilateralSliceOp); 389 | REGISTER_KERNEL_BUILDER(Name("BilateralSliceGrad").Device(DEVICE_GPU), BilateralSliceGradOp); 390 | REGISTER_KERNEL_BUILDER(Name("BilateralSliceApply").Device(DEVICE_GPU), BilateralSliceApplyOp); 391 | REGISTER_KERNEL_BUILDER(Name("BilateralSliceApplyGrad").Device(DEVICE_GPU), BilateralSliceApplyGradOp); 392 | // ---------------------------------------------------------------------------- 393 | -------------------------------------------------------------------------------- /hdrnet/ops/bilateral_slice.cu.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #if GOOGLE_CUDA 16 | 17 | #define EIGEN_USE_GPU 18 | 19 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 20 | #include "tensorflow/core/util/cuda_kernel_helper.h" 21 | 22 | #include "math.h" 23 | 24 | #include 25 | 26 | using namespace tensorflow; 27 | 28 | typedef Eigen::GpuDevice GPUDevice; 29 | 30 | __device__ float diff_abs(float x) { 31 | float eps = 1e-8; 32 | return sqrt(x*x+eps); 33 | } 34 | 35 | __device__ float d_diff_abs(float x) { 36 | float eps = 1e-8; 37 | return x/sqrt(x*x+eps); 38 | } 39 | 40 | __device__ float weight_z(float x) { 41 | float abx = diff_abs(x); 42 | return max(1.0f-abx, 0.0f); 43 | } 44 | 45 | __device__ float d_weight_z(float x) { 46 | float abx = diff_abs(x); 47 | if(abx > 1.0f) { 48 | return 0.0f; 49 | // return abx; 50 | } else { 51 | return d_diff_abs(x); 52 | } 53 | } 54 | 55 | __global__ void BilateralSliceKernel( 56 | int64 nthreads, 57 | const float* grid, const float* guide, 58 | const int bs, const int h, const int w, const int chans, 59 | const int gh, const int gw, const int gd, 60 | float* out) 61 | { 62 | // - Samples centered at 0.5. 63 | // - Repeating boundary conditions 64 | 65 | CUDA_1D_KERNEL_LOOP(idx, nthreads) { 66 | int c = idx % chans; 67 | int x = (idx / chans) % w; 68 | int y = (idx / (chans*w)) % h; 69 | int b = (idx / (chans*w*h)); 70 | 71 | float gx = (x+0.5f)*gw/(1.0f*w); 72 | float gy = (y+0.5f)*gh/(1.0f*h); 73 | float gz = guide[x + w*(y + h*b)]*gd; 74 | 75 | int fx = static_cast(floor(gx-0.5f)); 76 | int fy = static_cast(floor(gy-0.5f)); 77 | int fz = static_cast(floor(gz-0.5f)); 78 | 79 | int sz = chans; 80 | int sx = chans*gd; 81 | int sy = chans*gd*gw; 82 | int sb = chans*gd*gw*gh; 83 | 84 | float value = 0.0f; 85 | for (int xx = fx; xx < fx+2; ++xx) { 86 | int x_ = max(min(xx, gw-1), 0); 87 | float wx = max(1.0f-abs(xx+0.5-gx), 0.0f); 88 | for (int yy = fy; yy < fy+2; ++yy) 89 | { 90 | int y_ = max(min(yy, gh-1), 0); 91 | float wy = max(1.0f-abs(yy+0.5-gy), 0.0f); 92 | for (int zz = fz; zz < fz+2; ++zz) 93 | { 94 | int z_ = max(min(zz, gd-1), 0); 95 | float wz = weight_z(zz+0.5-gz); 96 | int grid_idx = c + sz*z_ + sx*x_ + sy*y_ + sb*b; 97 | value += grid[grid_idx]*wx*wy*wz; 98 | } 99 | } 100 | } 101 | out[idx] = value; 102 | } 103 | } 104 | 105 | __global__ void BilateralSliceGridGradKernel( 106 | int64 nthreads, 107 | const float* grid, const float* guide, const float* backprop, 108 | const int bs, const int h, const int w, const int chans, 109 | const int gh, const int gw, const int gd, 110 | float* out) 111 | { 112 | CUDA_1D_KERNEL_LOOP(idx, nthreads) { 113 | int c = idx % chans; 114 | int gz = (idx / chans) % gd; 115 | int gx = (idx / (chans*gd)) % gw; 116 | int gy = (idx / (chans*gd*gw)) % gh; 117 | int b = (idx / (chans*gd*gw*gh)); 118 | 119 | float scale_w = w*1.0/gw; 120 | float scale_h = h*1.0/gh; 121 | 122 | int left_x = static_cast(floor(scale_w*(gx+0.5-1))); 123 | int right_x = static_cast(ceil(scale_w*(gx+0.5+1))); 124 | int left_y = static_cast(floor(scale_h*(gy+0.5-1))); 125 | int right_y = static_cast(ceil(scale_h*(gy+0.5+1))); 126 | 127 | int sx = chans; 128 | int sy = chans*w; 129 | int sb = chans*w*h; 130 | 131 | float value = 0.0f; 132 | for (int x = left_x; x < right_x; ++x) 133 | { 134 | int x_ = x; 135 | 136 | // mirror boundary 137 | if (x_ < 0) x_ = -x_-1; 138 | if (x_ >= w) x_ = 2*w-1-x_; 139 | 140 | // x_ = max(min(x_, w-1), 0); 141 | float gx2 = (x+0.5f)/scale_w; 142 | float wx = max(1.0f-abs(gx+0.5-gx2), 0.0f); 143 | 144 | for (int y = left_y; y < right_y; ++y) 145 | { 146 | int y_ = y; 147 | 148 | // mirror boundary 149 | if (y_ < 0) y_ = -y_-1; 150 | if (y_ >= h) y_ = 2*h-1-y_; 151 | 152 | // y_ = max(min(y_, h-1), 0); 153 | float gy2 = (y+0.5f)/scale_h; 154 | float wy = max(1.0f-abs(gy+0.5-gy2), 0.0f); 155 | 156 | int guide_idx = x_ + w*y_ + h*w*b; 157 | float gz2 = guide[guide_idx]*gd; 158 | // float wz = max(1.0f-diff_abs(gz+0.5f - gz2), 0.0f); 159 | float wz = weight_z(gz+0.5f-gz2); 160 | if ((gz==0 && gz2<0.5f) || (gz==gd-1 && gz2>gd-0.5f)) { 161 | wz = 1.0f; 162 | } 163 | 164 | int back_idx = c + sx*x_ + sy*y_ + sb*b; 165 | value += wz*wx*wy*backprop[back_idx]; 166 | } 167 | } 168 | out[idx] = value; 169 | } 170 | } 171 | 172 | __global__ void BilateralSliceGuideGradKernel( 173 | int64 nthreads, 174 | const float* grid, const float* guide, const float* backprop, 175 | const int bs, const int h, const int w, const int chans, 176 | const int gh, const int gw, const int gd, 177 | float* out) 178 | { 179 | CUDA_1D_KERNEL_LOOP(idx, nthreads) { 180 | int x = idx % w; 181 | int y = (idx / w) % h; 182 | int b = (idx / (w*h)); 183 | 184 | float gx = (x+0.5f)*gw/(1.0f*w); 185 | float gy = (y+0.5f)*gh/(1.0f*h); 186 | float gz = guide[x + w*(y + h*b)]*gd; 187 | 188 | int fx = static_cast(floor(gx-0.5f)); 189 | int fy = static_cast(floor(gy-0.5f)); 190 | int fz = static_cast(floor(gz-0.5f)); 191 | 192 | int sz = chans; 193 | int sx = chans*gd; 194 | int sy = chans*gd*gw; 195 | int sb = chans*gd*gw*gh; 196 | 197 | float value = 0.0f; 198 | for (int c = 0; c < chans; ++c) { 199 | float chan_val = 0.0f; 200 | for (int xx = fx; xx < fx+2; ++xx) { 201 | int x_ = max(min(xx, gw-1), 0); 202 | float wx = max(1.0f-abs(xx+0.5-gx), 0.0f); 203 | for (int yy = fy; yy < fy+2; ++yy) 204 | { 205 | int y_ = max(min(yy, gh-1), 0); 206 | float wy = max(1.0f-abs(yy+0.5-gy), 0.0f); 207 | for (int zz = fz; zz < fz+2; ++zz) 208 | { 209 | int z_ = max(min(zz, gd-1), 0); 210 | float dwz = gd*d_weight_z(zz+0.5-gz); 211 | 212 | int grid_idx = c + sz*z_ + sx*x_ + sy*y_ + sb*b; 213 | chan_val += grid[grid_idx]*wx*wy*dwz; 214 | } 215 | } 216 | } 217 | chan_val *= backprop[c + chans*(x + w*(y + h*b))]; 218 | value += chan_val; 219 | } 220 | out[idx] = value; 221 | } 222 | } 223 | 224 | bool BilateralSliceApplyKernelLauncher( 225 | const GPUDevice& d, 226 | int bs, int gh, int gw, int gd, 227 | int input_chans, int output_chans, bool has_offset, 228 | int h, int w, 229 | const float* const grid, const float* const guide, const float* const input, 230 | float* const out) 231 | { 232 | int total_count = bs*h*w*output_chans; 233 | if (total_count > 0) { 234 | CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d); 235 | BilateralSliceApplyKernel<<>>( 236 | total_count, grid, guide, input, 237 | bs, h, w, gh, gw, gd, input_chans, output_chans, has_offset, 238 | out); 239 | } 240 | 241 | return d.ok(); 242 | } 243 | 244 | __global__ void BilateralSliceApplyKernel( 245 | int64 nthreads, 246 | const float* grid, const float* guide, const float* input, 247 | const int bs, const int h, const int w, 248 | const int gh, const int gw, const int gd, 249 | const int input_chans, const int output_chans, const bool has_offset, 250 | float* out) 251 | { 252 | // - Samples centered at 0.5. 253 | // - Repeating boundary conditions 254 | 255 | int grid_chans = input_chans*output_chans; 256 | int coeff_stride = input_chans; 257 | if(has_offset) { 258 | grid_chans += output_chans; 259 | coeff_stride += 1; 260 | } 261 | 262 | CUDA_1D_KERNEL_LOOP(idx, nthreads) { 263 | int out_c = idx % output_chans; 264 | int x = (idx / output_chans) % w; 265 | int y = (idx / (output_chans*w)) % h; 266 | int b = (idx / (output_chans*w*h)); 267 | 268 | float gx = (x+0.5f)*gw/(1.0f*w); 269 | float gy = (y+0.5f)*gh/(1.0f*h); 270 | float gz = guide[x + w*(y + h*b)]*gd; 271 | 272 | int fx = static_cast(floor(gx-0.5f)); 273 | int fy = static_cast(floor(gy-0.5f)); 274 | int fz = static_cast(floor(gz-0.5f)); 275 | 276 | 277 | // Grid strides 278 | int sz = grid_chans; 279 | int sx = grid_chans*gd; 280 | int sy = grid_chans*gd*gw; 281 | int sb = grid_chans*gd*gw*gh; 282 | 283 | float value = 0.0f; 284 | for (int in_c = 0; in_c < coeff_stride; ++in_c) { 285 | float coeff_sample = 0.0f; 286 | for (int xx = fx; xx < fx+2; ++xx) { 287 | int x_ = max(min(xx, gw-1), 0); 288 | float wx = max(1.0f-abs(xx+0.5-gx), 0.0f); 289 | for (int yy = fy; yy < fy+2; ++yy) 290 | { 291 | int y_ = max(min(yy, gh-1), 0); 292 | float wy = max(1.0f-abs(yy+0.5-gy), 0.0f); 293 | for (int zz = fz; zz < fz+2; ++zz) 294 | { 295 | int z_ = max(min(zz, gd-1), 0); 296 | float wz = weight_z(zz+0.5-gz); 297 | int grid_idx = (coeff_stride*out_c + in_c) + sz*z_ + sx*x_ + sy*y_ + sb*b; 298 | coeff_sample += grid[grid_idx]*wx*wy*wz; 299 | } 300 | } 301 | } // Grid trilinear interpolation 302 | if(in_c < input_chans) { 303 | int input_idx = in_c + input_chans*(x + w*(y + h*b)); 304 | value += coeff_sample*input[input_idx]; 305 | } else { // Offset term 306 | value += coeff_sample; 307 | } 308 | } 309 | out[idx] = value; 310 | } 311 | } 312 | 313 | 314 | __global__ void BilateralSliceApplyGridGradKernel( 315 | int64 nthreads, 316 | const float* grid, const float* guide, const float* input, const float* backprop, 317 | const int bs, const int h, const int w, 318 | const int gh, const int gw, const int gd, 319 | const int input_chans, const int output_chans, const bool has_offset, 320 | float* out) 321 | { 322 | int grid_chans = input_chans*output_chans; 323 | int coeff_stride = input_chans; 324 | if(has_offset) { 325 | grid_chans += output_chans; 326 | coeff_stride += 1; 327 | } 328 | 329 | CUDA_1D_KERNEL_LOOP(idx, nthreads) { 330 | int c = idx % grid_chans; 331 | int gz = (idx / grid_chans) % gd; 332 | int gx = (idx / (grid_chans*gd)) % gw; 333 | int gy = (idx / (grid_chans*gd*gw)) % gh; 334 | int b = (idx / (grid_chans*gd*gw*gh)); 335 | 336 | float scale_w = w*1.0/gw; 337 | float scale_h = h*1.0/gh; 338 | 339 | int left_x = static_cast(floor(scale_w*(gx+0.5-1))); 340 | int right_x = static_cast(ceil(scale_w*(gx+0.5+1))); 341 | int left_y = static_cast(floor(scale_h*(gy+0.5-1))); 342 | int right_y = static_cast(ceil(scale_h*(gy+0.5+1))); 343 | 344 | // Strides in the output 345 | int sx = output_chans; 346 | int sy = output_chans*w; 347 | int sb = output_chans*w*h; 348 | 349 | // Strides in the input 350 | int isx = input_chans; 351 | int isy = input_chans*w; 352 | int isb = input_chans*w*h; 353 | 354 | int out_c = c / coeff_stride; 355 | int in_c = c % coeff_stride; 356 | 357 | float value = 0.0f; 358 | for (int x = left_x; x < right_x; ++x) 359 | { 360 | int x_ = x; 361 | 362 | // mirror boundary 363 | if (x_ < 0) x_ = -x_-1; 364 | if (x_ >= w) x_ = 2*w-1-x_; 365 | 366 | float gx2 = (x+0.5f)/scale_w; 367 | float wx = max(1.0f-abs(gx+0.5-gx2), 0.0f); 368 | 369 | for (int y = left_y; y < right_y; ++y) 370 | { 371 | int y_ = y; 372 | 373 | // mirror boundary 374 | if (y_ < 0) y_ = -y_-1; 375 | if (y_ >= h) y_ = 2*h-1-y_; 376 | 377 | float gy2 = (y+0.5f)/scale_h; 378 | float wy = max(1.0f-abs(gy+0.5-gy2), 0.0f); 379 | 380 | int guide_idx = x_ + w*y_ + h*w*b; 381 | float gz2 = guide[guide_idx]*gd; 382 | float wz = weight_z(gz+0.5f-gz2); 383 | if ((gz==0 && gz2<0.5f) || (gz==gd-1 && gz2>gd-0.5f)) { 384 | wz = 1.0f; 385 | } 386 | 387 | int back_idx = out_c + sx*x_ + sy*y_ + sb*b; 388 | if (in_c < input_chans) { 389 | int input_idx = in_c + isx*x_ + isy*y_ + isb*b; 390 | value += wz*wx*wy*backprop[back_idx]*input[input_idx]; 391 | } else { // offset term 392 | value += wz*wx*wy*backprop[back_idx]; 393 | } 394 | } 395 | } 396 | out[idx] = value; 397 | } 398 | } 399 | 400 | 401 | __global__ void BilateralSliceApplyGuideGradKernel( 402 | int64 nthreads, 403 | const float* grid, const float* guide, const float* input, const float* backprop, 404 | const int bs, const int h, const int w, 405 | const int gh, const int gw, const int gd, 406 | const int input_chans, const int output_chans, const bool has_offset, 407 | float* out) 408 | { 409 | 410 | int grid_chans = input_chans*output_chans; 411 | int coeff_stride = input_chans; 412 | if(has_offset) { 413 | grid_chans += output_chans; 414 | coeff_stride += 1; 415 | } 416 | 417 | CUDA_1D_KERNEL_LOOP(idx, nthreads) { 418 | int x = idx % w; 419 | int y = (idx / w) % h; 420 | int b = (idx / (w*h)); 421 | 422 | float gx = (x+0.5f)*gw/(1.0f*w); 423 | float gy = (y+0.5f)*gh/(1.0f*h); 424 | float gz = guide[x + w*(y + h*b)]*gd; 425 | 426 | int fx = static_cast(floor(gx-0.5f)); 427 | int fy = static_cast(floor(gy-0.5f)); 428 | int fz = static_cast(floor(gz-0.5f)); 429 | 430 | // Grid stride 431 | int sz = grid_chans; 432 | int sx = grid_chans*gd; 433 | int sy = grid_chans*gd*gw; 434 | int sb = grid_chans*gd*gw*gh; 435 | 436 | float out_sum = 0.0f; 437 | for (int out_c = 0; out_c < output_chans; ++out_c) { 438 | 439 | float in_sum = 0.0f; 440 | for (int in_c = 0; in_c < coeff_stride; ++in_c) { 441 | 442 | float grid_sum = 0.0f; 443 | for (int xx = fx; xx < fx+2; ++xx) { 444 | int x_ = max(min(xx, gw-1), 0); 445 | float wx = max(1.0f-abs(xx+0.5-gx), 0.0f); 446 | for (int yy = fy; yy < fy+2; ++yy) 447 | { 448 | int y_ = max(min(yy, gh-1), 0); 449 | float wy = max(1.0f-abs(yy+0.5-gy), 0.0f); 450 | for (int zz = fz; zz < fz+2; ++zz) 451 | { 452 | int z_ = max(min(zz, gd-1), 0); 453 | float dwz = gd*d_weight_z(zz+0.5-gz); 454 | 455 | int grid_idx = (coeff_stride*out_c + in_c) + sz*z_ + sx*x_ + sy*y_ + sb*b; 456 | grid_sum += grid[grid_idx]*wx*wy*dwz; 457 | } // z 458 | } // y 459 | } // x, grid trilinear interp 460 | 461 | if(in_c < input_chans) { 462 | in_sum += grid_sum*input[in_c + input_chans*(x + w*(y + h*b))]; 463 | } else { // offset term 464 | in_sum += grid_sum; 465 | } 466 | } // in_c 467 | 468 | out_sum += in_sum*backprop[out_c + output_chans*(x + w*(y + h*b))]; 469 | } // out_c 470 | 471 | out[idx] = out_sum; 472 | } 473 | } 474 | 475 | 476 | __global__ void BilateralSliceApplyInputGradKernel( 477 | int64 nthreads, 478 | const float* grid, const float* guide, const float* input, const float* backprop, 479 | const int bs, const int h, const int w, 480 | const int gh, const int gw, const int gd, 481 | const int input_chans, const int output_chans, const bool has_offset, 482 | float* out) 483 | { 484 | int grid_chans = input_chans*output_chans; 485 | int coeff_stride = input_chans; 486 | if(has_offset) { 487 | grid_chans += output_chans; 488 | coeff_stride += 1; 489 | } 490 | 491 | CUDA_1D_KERNEL_LOOP(idx, nthreads) { 492 | int in_c = idx % input_chans; 493 | int x = (idx / input_chans) % w; 494 | int y = (idx / (input_chans*w)) % h; 495 | int b = (idx / (input_chans*w*h)); 496 | 497 | float gx = (x+0.5f)*gw/(1.0f*w); 498 | float gy = (y+0.5f)*gh/(1.0f*h); 499 | float gz = guide[x + w*(y + h*b)]*gd; 500 | 501 | int fx = static_cast(floor(gx-0.5f)); 502 | int fy = static_cast(floor(gy-0.5f)); 503 | int fz = static_cast(floor(gz-0.5f)); 504 | 505 | // Grid stride 506 | int sz = grid_chans; 507 | int sx = grid_chans*gd; 508 | int sy = grid_chans*gd*gw; 509 | int sb = grid_chans*gd*gw*gh; 510 | 511 | float value = 0.0f; 512 | for (int out_c = 0; out_c < output_chans; ++out_c) { 513 | float chan_val = 0.0f; 514 | for (int xx = fx; xx < fx+2; ++xx) { 515 | int x_ = max(min(xx, gw-1), 0); 516 | float wx = max(1.0f-abs(xx+0.5-gx), 0.0f); 517 | for (int yy = fy; yy < fy+2; ++yy) 518 | { 519 | int y_ = max(min(yy, gh-1), 0); 520 | float wy = max(1.0f-abs(yy+0.5-gy), 0.0f); 521 | for (int zz = fz; zz < fz+2; ++zz) 522 | { 523 | 524 | int z_ = max(min(zz, gd-1), 0); 525 | 526 | float wz = weight_z(zz+0.5-gz); 527 | 528 | int grid_idx = (coeff_stride*out_c + in_c) + sz*z_ + sx*x_ + sy*y_ + sb*b; 529 | chan_val += grid[grid_idx]*wx*wy*wz; 530 | } // z 531 | } // y 532 | } // x, grid trilinear interp 533 | 534 | value += chan_val*backprop[out_c + output_chans*(x + w*(y + h*b))]; 535 | } // out_c 536 | out[idx] = value; 537 | } 538 | } 539 | 540 | 541 | // -- KERNEL LAUNCHERS --------------------------------------------------------- 542 | bool BilateralSliceKernelLauncher( 543 | const GPUDevice& d, 544 | int bs, int gh, int gw, int gd, int chans, 545 | int h, int w, 546 | const float* const grid, const float* const guide, float* const out) 547 | { 548 | int total_count = bs*h*w*chans; 549 | if (total_count > 0) { 550 | CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d); 551 | BilateralSliceKernel<<>>( 552 | total_count, grid, guide, 553 | bs, h, w, chans, gh, gw, gd, 554 | out); 555 | } 556 | 557 | return d.ok(); 558 | } 559 | 560 | bool BilateralSliceGradKernelLauncher( 561 | const GPUDevice& d, 562 | const float* grid, const int64* grid_size, 563 | const float* guide, const int64* guide_size, 564 | const float* backprop, 565 | float* grid_grad, float* guide_grad) 566 | { 567 | int64 bs = grid_size[0]; 568 | int64 gh = grid_size[1]; 569 | int64 gw = grid_size[2]; 570 | int64 gd = grid_size[3]; 571 | int64 chans = grid_size[4]; 572 | 573 | int64 h = guide_size[1]; 574 | int64 w = guide_size[2]; 575 | 576 | int64 grid_count = bs*gh*gw*gd*chans; 577 | if (grid_count > 0) { 578 | CudaLaunchConfig config = GetCudaLaunchConfig(grid_count, d); 579 | BilateralSliceGridGradKernel<<>>( 580 | grid_count, grid, guide, backprop, 581 | bs, h, w, chans, gh, gw, gd, 582 | grid_grad); 583 | } 584 | 585 | int64 guide_count = bs*h*w; 586 | if (guide_count > 0) { 587 | CudaLaunchConfig config = GetCudaLaunchConfig(guide_count, d); 588 | BilateralSliceGuideGradKernel<<>>( 589 | guide_count, grid, guide, backprop, 590 | bs, h, w, chans, gh, gw, gd, 591 | guide_grad); 592 | } 593 | 594 | return d.ok(); 595 | } 596 | 597 | 598 | 599 | 600 | bool BilateralSliceApplyGradKernelLauncher( 601 | const GPUDevice& d, 602 | const float* grid, const int64* grid_size, 603 | const float* guide, const int64* guide_size, 604 | const float* input, const int64* input_size, 605 | const float* backprop, 606 | bool has_offset, 607 | float* grid_grad, float* guide_grad, float* input_grad) 608 | { 609 | int64 gh = grid_size[1]; 610 | int64 gw = grid_size[2]; 611 | int64 gd = grid_size[3]; 612 | int64 coeff_chans = grid_size[4]; 613 | int64 bs = guide_size[0]; 614 | int64 h = guide_size[1]; 615 | int64 w = guide_size[2]; 616 | int64 input_chans = input_size[3]; 617 | 618 | int64 output_chans = 0; 619 | if (has_offset) { 620 | output_chans = coeff_chans/(input_chans+1); 621 | } else { 622 | output_chans = coeff_chans/input_chans; 623 | } 624 | 625 | 626 | int64 grid_count = bs*gh*gw*gd*coeff_chans; 627 | if (grid_count > 0) { 628 | CudaLaunchConfig config = GetCudaLaunchConfig(grid_count, d); 629 | BilateralSliceApplyGridGradKernel<<>>( 630 | grid_count, grid, guide, input, backprop, 631 | bs, h, w, gh, gw, gd, 632 | input_chans, output_chans, has_offset, 633 | grid_grad); 634 | } 635 | 636 | int64 guide_count = bs*h*w; 637 | if (guide_count > 0) { 638 | CudaLaunchConfig config = GetCudaLaunchConfig(guide_count, d); 639 | BilateralSliceApplyGuideGradKernel<<>>( 640 | guide_count, grid, guide, input, backprop, 641 | bs, h, w, gh, gw, gd, 642 | input_chans, output_chans, has_offset, 643 | guide_grad); 644 | } 645 | 646 | int64 input_count = bs*h*w*input_chans; 647 | if (input_count > 0) { 648 | CudaLaunchConfig config = GetCudaLaunchConfig(input_count, d); 649 | BilateralSliceApplyInputGradKernel<<>>( 650 | input_count, grid, guide, input, backprop, 651 | bs, h, w, gh, gw, gd, 652 | input_chans, output_chans, has_offset, 653 | input_grad); 654 | } 655 | 656 | return d.ok(); 657 | } 658 | 659 | #endif 660 | --------------------------------------------------------------------------------