├── models ├── __init__.py ├── stgru.py ├── dilation.py ├── flownet1.py ├── lrr.py └── flownet2.py ├── misc ├── compile.sh ├── cityscapes_labels.pckl ├── bilinear_warping.cu.cc └── bilinear_warping.cc ├── LICENSE ├── config.py ├── .gitignore ├── README.md ├── evaluate.py └── train.py /models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /misc/compile.sh: -------------------------------------------------------------------------------- 1 | # compile the bilinear warping operator 2 | 3 | TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 4 | 5 | nvcc -std=c++11 -c -o bilinear_warping.cu.o bilinear_warping.cu.cc \ 6 | -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC --expt-relaxed-constexpr 7 | 8 | g++ -std=c++11 -shared -o bilinear_warping.so bilinear_warping.cc \ 9 | bilinear_warping.cu.o -I $TF_INC -fPIC -L /usr/local/cuda/lib64/ -lcudart 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 David Nilsson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import os 2 | # See "https://www.cityscapes-dataset.com/downloads/" for the zip files referenced below 3 | 4 | # Where "gtFine_trainvaltest.zip" is unpacked 5 | cityscapes_dir = '' 6 | 7 | # Where "leftImg8bit_sequence_trainvaltest.zip" is unpacked. May be the same path as above. 8 | cityscapes_video_dir = '' 9 | 10 | # Where "https://github.com/mcordts/cityscapesScripts" is unpacked 11 | cityscapes_scripts_root = os.path.join(cityscapes_dir, 'scripts') 12 | 13 | example_im = os.path.join(cityscapes_dir, 'gtFine', 'train', 'aachen', 'aachen_000000_000019_gtFine_labelIds.png') 14 | assert os.path.isfile(example_im), "The CityScapes root directory is incorrect. Could not find %s" % (example_im) 15 | 16 | example_im = os.path.join(cityscapes_video_dir, 'leftImg8bit_sequence', 'train', 'aachen', 'aachen_000000_000000_leftImg8bit.png') 17 | assert os.path.isfile(example_im), "The CityScapes video root directory is incorrect. Could not find %s" % (example_im) 18 | 19 | file = os.path.join(cityscapes_scripts_root, 'evaluation', 'evalPixelLevelSemanticLabeling.py') 20 | assert os.path.isfile(file), "Could not find the evaluation script %s" % file -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /misc/cityscapes_labels.pckl: -------------------------------------------------------------------------------- 1 | (lp0 2 | (dp1 3 | I0 4 | I255 5 | sI1 6 | I255 7 | sI2 8 | I255 9 | sI3 10 | I255 11 | sI4 12 | I255 13 | sI5 14 | I255 15 | sI6 16 | I255 17 | sI7 18 | I0 19 | sI8 20 | I1 21 | sI9 22 | I255 23 | sI10 24 | I255 25 | sI11 26 | I2 27 | sI12 28 | I3 29 | sI13 30 | I4 31 | sI14 32 | I255 33 | sI15 34 | I255 35 | sI16 36 | I255 37 | sI17 38 | I5 39 | sI18 40 | I255 41 | sI19 42 | I6 43 | sI20 44 | I7 45 | sI21 46 | I8 47 | sI22 48 | I9 49 | sI23 50 | I10 51 | sI24 52 | I11 53 | sI25 54 | I12 55 | sI26 56 | I13 57 | sI27 58 | I14 59 | sI28 60 | I15 61 | sI29 62 | I255 63 | sI30 64 | I255 65 | sI31 66 | I16 67 | sI32 68 | I17 69 | sI33 70 | I18 71 | sI-1 72 | I-1 73 | sa(dp2 74 | I0 75 | S'unlabeled' 76 | p3 77 | sI1 78 | S'ego vehicle' 79 | p4 80 | sI2 81 | S'rectification border' 82 | p5 83 | sI3 84 | S'out of roi' 85 | p6 86 | sI4 87 | S'static' 88 | p7 89 | sI5 90 | S'dynamic' 91 | p8 92 | sI6 93 | S'ground' 94 | p9 95 | sI7 96 | S'road' 97 | p10 98 | sI8 99 | S'sidewalk' 100 | p11 101 | sI9 102 | S'parking' 103 | p12 104 | sI10 105 | S'rail track' 106 | p13 107 | sI11 108 | S'building' 109 | p14 110 | sI12 111 | S'wall' 112 | p15 113 | sI13 114 | S'fence' 115 | p16 116 | sI14 117 | S'guard rail' 118 | p17 119 | sI15 120 | S'bridge' 121 | p18 122 | sI16 123 | S'tunnel' 124 | p19 125 | sI17 126 | S'pole' 127 | p20 128 | sI18 129 | S'polegroup' 130 | p21 131 | sI19 132 | S'traffic light' 133 | p22 134 | sI20 135 | S'traffic sign' 136 | p23 137 | sI21 138 | S'vegetation' 139 | p24 140 | sI22 141 | S'terrain' 142 | p25 143 | sI23 144 | S'sky' 145 | p26 146 | sI24 147 | S'person' 148 | p27 149 | sI25 150 | S'rider' 151 | p28 152 | sI26 153 | S'car' 154 | p29 155 | sI27 156 | S'truck' 157 | p30 158 | sI28 159 | S'bus' 160 | p31 161 | sI29 162 | S'caravan' 163 | p32 164 | sI30 165 | S'trailer' 166 | p33 167 | sI31 168 | S'train' 169 | p34 170 | sI32 171 | S'motorcycle' 172 | p35 173 | sI33 174 | S'bicycle' 175 | p36 176 | sI-1 177 | S'license plate' 178 | p37 179 | sa. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Semantic Video Segmentation by Gated Recurrent Flow Propagation 2 | This repo contains the code for the CVPR 2018 paper "Semantic Video Segmentation by Gated Recurrent Flow Propagation" by David Nilsson and Cristian Sminchisescu. [[pdf]](http://openaccess.thecvf.com/content_cvpr_2018/papers/Nilsson_Semantic_Video_Segmentation_CVPR_2018_paper.pdf) 3 | 4 | ### Setup 5 | 6 | Check config.py. Download all data from the cityscapes dataset and change the paths in config.py. Check that you can run python config.py without any errors. 7 | 8 | Run misc/compile.sh to compile the bilinear warping operator. Change the include directory on line 9 if you get errors related to libcudart. 9 | 10 | Download all pretrained models from [here](https://drive.google.com/open?id=1eGy7JcX1ptzxwQ6thEd2R_ix4VehLRQL) and unpack them under ./checkpoints/. For instance, the file ./checkpoints/flownet1.index should exist. 11 | 12 | ### Evaluate a Pre-Trained Model 13 | 14 | Evaluate the GRFP(LRR-4x, FlowNet2) setup on the validation set by running: 15 | ``` 16 | python evaluate.py --static lrr --flow flownet2 17 | ``` 18 | 19 | Evalutate GRFP(Dilation10, FlowNet2) for various number of frames, as in Table 3 and 4 in the paper: 20 | ``` 21 | python evaluate.py --static dilation --flow flownet2 --frames 1 22 | python evaluate.py --static dilation --flow flownet2 --frames 5 23 | ``` 24 | 25 | The values in table 9 can be reproduced by running the following: 26 | ``` 27 | python evaluate.py --static lrr --flow flownet2 28 | python evaluate.py --static lrr --flow flownet1 29 | python evaluate.py --static lrr --flow farneback 30 | ``` 31 | 32 | ### Training 33 | 34 | Train and evaluate a model with the following commands: 35 | ``` 36 | python train.py --static lrr --flow flownet2 37 | python evaluate.py --static lrr --flow flownet2 --ckpt lrr_flownet2_it10000 38 | ``` 39 | This should match the performance of the pre-trained LRR model above. See the ./checkpoints directory where parameters are saved during the training procedure. Only LRR is supported at the moment. 40 | 41 | ### Citation 42 | If you use the code in your own research, please cite 43 | ``` 44 | @InProceedings{Nilsson_2018_CVPR, 45 | author = {Nilsson, David and Sminchisescu, Cristian}, 46 | title = {Semantic Video Segmentation by Gated Recurrent Flow Propagation}, 47 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 48 | month = {June}, 49 | year = {2018} 50 | } 51 | ``` 52 | 53 | Depending on the setup you use, consider also citing [PSP](https://github.com/hszhao/PSPNet), [LRR](https://github.com/golnazghiasi/LRR), [Dilation](https://github.com/fyu/dilation), [FlowNet1](https://lmb.informatik.uni-freiburg.de/Publications/2015/DFIB15/), [FlowNet2](https://github.com/lmb-freiburg/flownet2) or [Farnebäck](https://link.springer.com/chapter/10.1007/3-540-45103-X_50). -------------------------------------------------------------------------------- /misc/bilinear_warping.cu.cc: -------------------------------------------------------------------------------- 1 | #if GOOGLE_CUDA 2 | #define EIGEN_USE_GPU 3 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 4 | 5 | __global__ void set_zero(const int N, float* in) { 6 | int index = blockIdx.x * blockDim.x + threadIdx.x; 7 | if(index < N) { 8 | in[index] = 0.; 9 | } 10 | } 11 | 12 | __global__ void kernel_warping_bilinear_forward(const int N, int channels, int width, 13 | int height, const float* in, const float* flow, float* out) { 14 | int index = blockIdx.x * blockDim.x + threadIdx.x; 15 | if(index < N) { 16 | int c = index % channels; 17 | int w = (index / channels) % width; 18 | int h = (index / channels / width) % height; 19 | int n = index / channels / width / height; 20 | 21 | float flow_i = flow[((n*height + h)*width + w)*2 + 0]; 22 | float flow_j = flow[((n*height + h)*width + w)*2 + 1]; 23 | 24 | int s_low = ceil(h + flow_i - 1); 25 | int s_high = s_low + 1; 26 | int t_low = ceil(w + flow_j - 1); 27 | int t_high = t_low + 1; 28 | 29 | out[index] = 0.; 30 | for(int s=s_low; s<=s_high; s++) { 31 | for(int t=t_low; t<=t_high; t++) { 32 | if(s < 0 || s >= height || t < 0 || t >= width) 33 | continue; 34 | 35 | float di = h + flow_i - s; 36 | float dj = w + flow_j - t; 37 | 38 | float weight = (1. - abs(di)) * (1 - abs(dj)); 39 | out[index] += weight*in[((n*height + s)*width + t)*channels + c]; 40 | } 41 | } 42 | } 43 | } 44 | 45 | __global__ void kernel_warping_bilinear_backward(const int N, int channels, int width, 46 | int height, const float* grad_y, const float* input, 47 | const float* flow, float* grad_x, float* grad_flow) { 48 | int index = blockIdx.x * blockDim.x + threadIdx.x; 49 | if(index < N) { 50 | int c = index % channels; 51 | int w = (index / channels) % width; 52 | int h = (index / channels / width) % height; 53 | int n = index / channels / width / height; 54 | 55 | float flow_i = flow[((n*height + h)*width + w)*2 + 0]; 56 | float flow_j = flow[((n*height + h)*width + w)*2 + 1]; 57 | 58 | int s_low = ceil(h + flow_i - 1); 59 | int s_high = s_low + 1; 60 | int t_low = ceil(w + flow_j - 1); 61 | int t_high = t_low + 1; 62 | 63 | bool prop_to_data = true, prop_to_flow = true; 64 | 65 | for(int s=s_low; s<=s_high; s++) { 66 | for(int t=t_low; t<=t_high; t++) { 67 | if(s < 0 || s >= height || t < 0 || t >= width) 68 | continue; 69 | float di = h + flow_i - s; 70 | float dj = w + flow_j - t; 71 | 72 | float weight = (1-fabs(di))*(1-fabs(dj)); 73 | 74 | if(prop_to_data) { 75 | atomicAdd(&grad_x[((n*height + s)*width + t)*channels + c], 76 | grad_y[((n*height + h)*width + w)*channels + c]*weight); 77 | } 78 | if(prop_to_flow) { 79 | float weight_0 = (dj > 0) ? -(1-fabs(di)) : (1-fabs(di)); 80 | atomicAdd(&grad_flow[((n*height + h)*width + w)*2 + 1], 81 | grad_y[((n*height + h)*width + w)*channels + c]* 82 | input[((n*height + s)*width + t)*channels + c]* 83 | weight_0); 84 | 85 | float weight_1 = (di > 0) ? -(1-fabs(dj)) : (1-fabs(dj)); 86 | atomicAdd(&grad_flow[((n*height + h)*width + w)*2 + 0], 87 | grad_y[((n*height + h)*width + w)*channels + c]* 88 | input[((n*height + s)*width + t)*channels + c]* 89 | weight_1); 90 | } 91 | } 92 | } 93 | } 94 | } 95 | 96 | void BilinearWarpingLauncher(const float* input, const float* flow, float* out, 97 | const int count, const int channels, const int height, const int width) { 98 | 99 | const int kThreadsPerBlock = 1024; 100 | kernel_warping_bilinear_forward<<<(count + kThreadsPerBlock - 1)/kThreadsPerBlock, 101 | kThreadsPerBlock>>>(count, channels, width, height, 102 | input, flow, out); 103 | } 104 | 105 | void BilinearWarpingGradLauncher(const float* grad_y, const float* input, 106 | const float* flow, float* grad_x, float* grad_flow, 107 | const int count, const int channels, const int height, const int width) { 108 | 109 | const int kThreadsPerBlock = 1024; 110 | set_zero<<<(count + kThreadsPerBlock - 1) / kThreadsPerBlock, 111 | kThreadsPerBlock>>>(count, grad_x); 112 | 113 | set_zero<<<(count / channels * 2 + kThreadsPerBlock - 1) / kThreadsPerBlock, 114 | kThreadsPerBlock>>>(count / channels * 2, grad_flow); 115 | 116 | 117 | kernel_warping_bilinear_backward<<<(count + kThreadsPerBlock - 1) / kThreadsPerBlock, 118 | kThreadsPerBlock>>>(count, channels, width, 119 | height, grad_y, input, flow, grad_x, grad_flow); 120 | } 121 | 122 | 123 | #endif -------------------------------------------------------------------------------- /misc/bilinear_warping.cc: -------------------------------------------------------------------------------- 1 | #include "tensorflow/core/framework/op.h" 2 | #include "tensorflow/core/framework/op_kernel.h" 3 | 4 | using namespace tensorflow; 5 | 6 | REGISTER_OP("BilinearWarping") 7 | .Input("x: float") 8 | .Input("flow: float") 9 | .Output("y: float"); 10 | 11 | REGISTER_OP("BilinearWarpingGrad") 12 | .Input("grad_y: float") 13 | .Input("x: float") 14 | .Input("flow: float") 15 | .Output("grad_x: float") 16 | .Output("grad_flow: float"); 17 | 18 | void BilinearWarpingLauncher(const float* input, const float* flow, float* out, 19 | const int count, const int channels, const int height, const int width); 20 | 21 | class BilinearWarpingGPUOp : public OpKernel { 22 | public: 23 | explicit BilinearWarpingGPUOp(OpKernelConstruction* context) : OpKernel(context) {} 24 | 25 | void Compute(OpKernelContext* context) override { 26 | const Tensor& input_tensor = context->input(0); 27 | auto input = input_tensor.flat(); 28 | 29 | const Tensor& flow_tensor = context->input(1); 30 | auto flow = flow_tensor.flat(); 31 | 32 | OP_REQUIRES(context, input_tensor.dims() == 4, errors::InvalidArgument("input dim != 4")); 33 | OP_REQUIRES(context, flow_tensor.dims() == 4, errors::InvalidArgument("flow dim != 4")); 34 | 35 | Tensor* output_tensor = NULL; 36 | OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), &output_tensor)); 37 | auto output = output_tensor->flat(); 38 | 39 | const int input_dims = input_tensor.dims(); 40 | OP_REQUIRES(context, input_dims == 4, errors::InvalidArgument("input dim != 4")); 41 | OP_REQUIRES(context, flow_tensor.dims() == 4, errors::InvalidArgument("flow dim != 4")); 42 | OP_REQUIRES(context, flow_tensor.dim_size(0) == input_tensor.dim_size(0), errors::InvalidArgument("flow dim 0 != input dim 0")); 43 | OP_REQUIRES(context, flow_tensor.dim_size(1) == input_tensor.dim_size(1), errors::InvalidArgument("flow dim 1 != input dim 1")); 44 | OP_REQUIRES(context, flow_tensor.dim_size(2) == input_tensor.dim_size(2), errors::InvalidArgument("flow dim 2 != input dim 2")); 45 | OP_REQUIRES(context, flow_tensor.dim_size(3) == 2, errors::InvalidArgument("Flow dim 3 != 2")); 46 | 47 | const int count = input_tensor.NumElements(); 48 | const int channels = input_tensor.dim_size(3); 49 | const int height = input_tensor.dim_size(1); 50 | const int width = input_tensor.dim_size(2); 51 | BilinearWarpingLauncher(input.data(), flow.data(), output.data(), count, channels, height, width); 52 | } 53 | }; 54 | 55 | void BilinearWarpingGradLauncher(const float* grad_y, const float* input, 56 | const float* flow, float* grad_x, float* grad_flow, 57 | const int count, const int channels, const int height, const int width); 58 | 59 | class BilinearWarpingGradGPUOp : public OpKernel { 60 | public: 61 | explicit BilinearWarpingGradGPUOp(OpKernelConstruction* context) : OpKernel(context) {} 62 | 63 | void Compute(OpKernelContext* context) override { 64 | const Tensor& grad_y_tensor = context->input(0); 65 | auto grad_y = grad_y_tensor.flat(); 66 | 67 | const Tensor& input_tensor = context->input(1); 68 | auto input = input_tensor.flat(); 69 | 70 | const Tensor& flow_tensor = context->input(2); 71 | auto flow = flow_tensor.flat(); 72 | 73 | OP_REQUIRES(context, input_tensor.dims() == 4, errors::InvalidArgument("input dim != 4")); 74 | OP_REQUIRES(context, flow_tensor.dims() == 4, errors::InvalidArgument("flow dim != 4")); 75 | 76 | Tensor* grad_x_tensor = NULL; 77 | OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), &grad_x_tensor)); 78 | auto grad_x = grad_x_tensor->flat(); 79 | 80 | Tensor* grad_flow_tensor = NULL; 81 | OP_REQUIRES_OK(context, context->allocate_output(1, flow_tensor.shape(), &grad_flow_tensor)); 82 | auto grad_flow = grad_flow_tensor->flat(); 83 | 84 | const int input_dims = input_tensor.dims(); 85 | OP_REQUIRES(context, input_dims == 4, errors::InvalidArgument("input dim != 4")); 86 | OP_REQUIRES(context, flow_tensor.dims() == 4, errors::InvalidArgument("flow dim != 4")); 87 | OP_REQUIRES(context, flow_tensor.dim_size(0) == input_tensor.dim_size(0), errors::InvalidArgument("flow dim 0 != input dim 0")); 88 | OP_REQUIRES(context, flow_tensor.dim_size(1) == input_tensor.dim_size(1), errors::InvalidArgument("flow dim 1 != input dim 1")); 89 | OP_REQUIRES(context, flow_tensor.dim_size(2) == input_tensor.dim_size(2), errors::InvalidArgument("flow dim 2 != input dim 2")); 90 | OP_REQUIRES(context, flow_tensor.dim_size(3) == 2, errors::InvalidArgument("Flow dim 3 != 2")); 91 | 92 | const int count = input_tensor.NumElements(); 93 | const int channels = input_tensor.dim_size(3); 94 | const int height = input_tensor.dim_size(1); 95 | const int width = input_tensor.dim_size(2); 96 | BilinearWarpingGradLauncher(grad_y.data(), input.data(), flow.data(), grad_x.data(), grad_flow.data(), count, channels, height, width); 97 | } 98 | }; 99 | 100 | REGISTER_KERNEL_BUILDER(Name("BilinearWarping").Device(DEVICE_GPU), BilinearWarpingGPUOp); 101 | REGISTER_KERNEL_BUILDER(Name("BilinearWarpingGrad").Device(DEVICE_GPU), BilinearWarpingGradGPUOp); -------------------------------------------------------------------------------- /models/stgru.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | class STGRU: 5 | def __init__(self, tensor_size, conv_sizes, bilinear_warping_module): 6 | # tensor_size is something like 19 x 512 x 512 7 | # conv sizes are e.g. 5 x 5 8 | self.bilinear_warping_module = bilinear_warping_module 9 | channels, height, width = tensor_size 10 | conv_height, conv_width = conv_sizes 11 | conv_pad = conv_height / 2 12 | 13 | self.channels, self.height, self.width = channels, height, width 14 | self.conv_height, self.conv_width = conv_height, conv_width 15 | 16 | identity = np.zeros((conv_height, conv_width, channels, channels)) 17 | for k in range(channels): 18 | identity[conv_height/2, conv_width/2, k, k] = 1. 19 | identity_map = tf.constant(identity, dtype=tf.float32) 20 | # identity + noise was needed for some variables to train the model 21 | self.weights = { 22 | 'ir': tf.Variable(tf.random_normal([conv_height, conv_width, 3, 1], stddev=0.001), name="W_ir"), 23 | 'xh': tf.Variable(6.*identity_map + tf.random_normal([conv_height, conv_width, channels, channels], stddev=0.01), name="W_xh"), 24 | 'hh': tf.Variable(6.*identity_map + tf.random_normal([conv_height, conv_width, channels, channels], stddev=0.01), name="W_hh"), 25 | 'xz': tf.Variable(tf.random_normal([conv_height, conv_width, channels, 1], stddev=0.01), name="W_xz"), 26 | 'hz': tf.Variable(tf.random_normal([conv_height, conv_width, channels, 1], stddev=0.01), name="W_hz"), 27 | 'lambda': tf.Variable(tf.constant(2., dtype=tf.float32), name="lambda"), 28 | 'bias_r': tf.Variable(tf.zeros([1], dtype=tf.float32), name="bias_r"), 29 | 'bias_z': tf.Variable(tf.zeros([channels], dtype=tf.float32), name="bias_z"), 30 | } 31 | 32 | def get_one_step_predictor(self): 33 | input_images_tensor = tf.placeholder('float', [2, 1, self.height, self.width, 3], name="gru_input_images") 34 | input_images = tf.unstack(input_images_tensor, num=2) 35 | 36 | input_flow = tf.placeholder('float', [1, self.height, self.width, 2], name="gru_input_flows") 37 | 38 | input_segmentation = tf.placeholder('float', [1, self.height, self.width, self.channels], name="gru_input_unaries") 39 | 40 | prev_h = tf.placeholder('float', [1, self.height, self.width, self.channels]) 41 | 42 | new_h = self.get_GRU_cell(input_images[1], input_images[0], \ 43 | input_flow, prev_h, input_segmentation) 44 | 45 | prediction = tf.argmax(new_h, 3) 46 | return input_images_tensor, input_flow, input_segmentation, prev_h, new_h, prediction 47 | 48 | def get_optimizer(self, N_steps): 49 | input_images_tensor = tf.placeholder('float', [N_steps, 1, self.height, self.width, 3], name="gru_input_images") 50 | input_images = tf.unstack(input_images_tensor, num=N_steps) 51 | 52 | input_flow_tensor = tf.placeholder('float', [N_steps-1, 1, self.height, self.width, 2], name="gru_input_flows") 53 | input_flow = tf.unstack(input_flow_tensor, num=N_steps-1) 54 | 55 | input_segmentation_tensor = tf.placeholder('float', [N_steps, 1, self.height, self.width, self.channels], name="gru_input_unaries") 56 | input_segmentation = tf.unstack(input_segmentation_tensor, num=N_steps) 57 | 58 | outputs = [input_segmentation[0]] 59 | for t in range(1, N_steps): 60 | h = self.get_GRU_cell(input_images[t], input_images[t-1], \ 61 | input_flow[t-1], outputs[-1], input_segmentation[t]) 62 | outputs.append(h) 63 | 64 | # the loss is tricky to implement since softmaxloss requires [i,j] matrix 65 | # with j ranging over the classes 66 | # the image has to be manipulated to fit 67 | scores = tf.reshape(outputs[-1], [self.height*self.width, self.channels]) 68 | prediction = tf.argmax(scores, 1) 69 | prediction = tf.reshape(prediction, [self.height, self.width]) 70 | 71 | targets = tf.placeholder('int64', [self.height, self.width]) 72 | targets_r = tf.reshape(targets, [self.height*self.width]) 73 | idx = targets_r < self.channels # classes are 0,1,...,c-1 with 255 being unknown 74 | loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits( 75 | logits=tf.boolean_mask(scores, idx), labels=tf.boolean_mask(targets_r, idx))) 76 | 77 | learning_rate = tf.placeholder('float', []) 78 | opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.95, beta2=0.99, epsilon=1e-8) 79 | 80 | opt = opt.minimize(loss) 81 | return opt, loss, prediction, learning_rate, \ 82 | input_images_tensor, input_flow_tensor, input_segmentation_tensor, targets 83 | 84 | 85 | def get_GRU_cell(self, input_image, prev_image, flow_input, h_prev, unary_input): 86 | # apply softmax to h_prev and unary_input 87 | h_prev = self.softmax_last_dim(h_prev) 88 | unary_input = self.softmax_last_dim(unary_input) 89 | h_prev = h_prev - 1./19 90 | unary_input = unary_input - 1./19 91 | 92 | I_diff = input_image - self.bilinear_warping_module.bilinear_warping(prev_image, flow_input) 93 | 94 | # candidate state 95 | h_prev_warped = self.bilinear_warping_module.bilinear_warping(h_prev, flow_input) 96 | 97 | r = 1. - tf.tanh(tf.abs(tf.nn.conv2d(I_diff, self.weights['ir'], [1,1,1,1], padding='SAME') \ 98 | + self.weights['bias_r'])) 99 | 100 | h_prev_reset = h_prev_warped * r 101 | 102 | h_tilde = tf.nn.conv2d(unary_input, self.weights['xh'], [1,1,1,1], padding='SAME') \ 103 | + tf.nn.conv2d(h_prev_reset, self.weights['hh'], [1,1,1,1], padding='SAME') 104 | 105 | 106 | # weighting 107 | z = tf.sigmoid( \ 108 | tf.nn.conv2d(unary_input, self.weights['xz'], [1,1,1,1], padding='SAME') \ 109 | + tf.nn.conv2d(h_prev_reset, self.weights['hz'], [1,1,1,1], padding='SAME') \ 110 | + self.weights['bias_z'] 111 | ) 112 | 113 | h = self.weights['lambda']*(1 - z)*h_prev_reset + z*h_tilde 114 | 115 | return h 116 | 117 | def softmax_last_dim(self, x): 118 | # apply softmax to a 4D tensor along the last dimension 119 | S = tf.shape(x) 120 | y = tf.reshape(x, [-1, S[4-1]]) 121 | y = tf.nn.softmax(y) 122 | y = tf.reshape(y, S) 123 | return y -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | import argparse, glob, os, cv2, sys, pickle 2 | import numpy as np 3 | import tensorflow as tf 4 | import config as cfg 5 | from models.stgru import STGRU 6 | from models.lrr import LRR 7 | from models.dilation import dilation10network 8 | from models.flownet2 import Flownet2 9 | from models.flownet1 import Flownet1 10 | from tensorflow.python.framework import ops 11 | 12 | sys.path.insert(0, os.path.join(cfg.cityscapes_scripts_root, 'evaluation')) 13 | import evalPixelLevelSemanticLabeling 14 | 15 | bilinear_warping_module = tf.load_op_library('./misc/bilinear_warping.so') 16 | @ops.RegisterGradient("BilinearWarping") 17 | def _BilinearWarping(op, grad): 18 | return bilinear_warping_module.bilinear_warping_grad(grad, op.inputs[0], op.inputs[1]) 19 | 20 | def evaluate(args): 21 | data_split = 'val' 22 | nbr_classes = 19 23 | im_size = [1024, 2048] 24 | image_mean = [72.39,82.91,73.16] # the mean is automatically subtracted in some modules e.g. flownet2, so be careful 25 | 26 | f = open('misc/cityscapes_labels.pckl') 27 | cs_id2trainid, cs_id2name = pickle.load(f) 28 | f.close() 29 | 30 | assert args.static in ['dilation', 'lrr'], "Only dilation and LRR are supported for now." 31 | 32 | if args.flow == 'flownet2': 33 | with tf.variable_scope('flow'): 34 | flow_network = Flownet2(bilinear_warping_module) 35 | flow_img0 = tf.placeholder(tf.float32) 36 | flow_img1 = tf.placeholder(tf.float32) 37 | flow_tensor = flow_network(flow_img0, flow_img1, flip=True) 38 | elif args.flow == 'flownet1': 39 | with tf.variable_scope('flow'): 40 | flow_network = Flownet1() 41 | flow_img0 = tf.placeholder(tf.float32) 42 | flow_img1 = tf.placeholder(tf.float32) 43 | flow_tensor = flow_network.get_output_tensor(flow_img0, flow_img1, im_size) 44 | 45 | RNN = STGRU([nbr_classes, im_size[0], im_size[1]], [7, 7], bilinear_warping_module) 46 | 47 | input_images_tensor, input_flow, \ 48 | input_segmentation, prev_h, new_h, \ 49 | prediction = RNN.get_one_step_predictor() 50 | 51 | if args.static == 'lrr': 52 | static_input = tf.placeholder(tf.float32) 53 | static_network = LRR() 54 | static_output = static_network(static_input) 55 | elif args.static == 'dilation': 56 | static_input = tf.placeholder(tf.float32) 57 | static_network = dilation10network() 58 | static_output = static_network.get_output_tensor(static_input, im_size) 59 | 60 | saver = tf.train.Saver([k for k in tf.global_variables() if not k.name.startswith('flow/')]) 61 | if args.flow in ['flownet1', 'flownet2']: 62 | saver_fn = tf.train.Saver([k for k in tf.global_variables() if k.name.startswith('flow/')]) 63 | 64 | with tf.Session() as sess: 65 | if args.ckpt != '': 66 | saver.restore(sess, './checkpoints/%s' % (args.ckpt)) 67 | else: 68 | if args.static == 'lrr': 69 | saver.restore(sess, './checkpoints/lrr_grfp') 70 | elif args.static == 'dilation': 71 | saver.restore(sess, './checkpoints/dilation_grfp') 72 | 73 | if args.flow == 'flownet1': 74 | saver_fn.restore(sess, './checkpoints/flownet1') 75 | elif args.flow == 'flownet2': 76 | saver_fn.restore(sess, './checkpoints/flownet2') 77 | 78 | L = glob.glob(os.path.join(cfg.cityscapes_dir, 'gtFine', data_split, "*", "*labelIds.png")) 79 | for (progress_counter, im_path) in enumerate(L): 80 | parts = im_path.split('/')[-1].split('_') 81 | city, seq, frame = parts[0], parts[1], parts[2] 82 | 83 | print("Processing sequence %d/%d" % (progress_counter+1, len(L))) 84 | for dt in range(-args.frames + 1, 1): 85 | first_frame = dt == -args.frames + 1 86 | t = int(frame) + dt 87 | 88 | frame_path = os.path.join(cfg.cityscapes_video_dir, 'leftImg8bit_sequence', data_split, 89 | city, ("%s_%s_%06d_leftImg8bit.png" % (city, seq, t))) 90 | im = cv2.imread(frame_path, 1).astype(np.float32)[np.newaxis,...] 91 | 92 | # Compute optical flow 93 | if not first_frame: 94 | if args.flow == 'flownet2': 95 | flow = sess.run(flow_tensor, feed_dict={flow_img0: im, flow_img1: last_im}) 96 | elif args.flow == 'flownet1': 97 | flow = sess.run(flow_tensor, feed_dict={flow_img0: im, flow_img1: last_im}) 98 | flow = flow[...,(1, 0)] 99 | elif args.flow == 'farneback': 100 | im_gray = cv2.cvtColor(im[0], cv2.COLOR_BGR2GRAY) 101 | last_im_gray = cv2.cvtColor(last_im[0], cv2.COLOR_BGR2GRAY) 102 | 103 | flow = cv2.calcOpticalFlowFarneback(im_gray, last_im_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0) 104 | flow = flow[...,(1, 0)] 105 | flow = flow[np.newaxis,...] 106 | 107 | # Static segmentation 108 | if args.static == 'dilation': 109 | # augment a 186x186 border around the image and subtract the mean 110 | im_aug = cv2.copyMakeBorder(im[0], 186, 186, 186, 186, cv2.BORDER_REFLECT_101) 111 | im_aug = im_aug - image_mean 112 | im_aug = im_aug[np.newaxis,...] 113 | 114 | x = sess.run(static_output, feed_dict={static_input: im_aug}) 115 | elif args.static == 'lrr': 116 | x = sess.run(static_output, feed_dict={static_input: im}) 117 | 118 | if first_frame: 119 | # the hidden state is simple the static segmentation for the first frame 120 | h = x 121 | pred = np.argmax(h, axis=3) 122 | else: 123 | inputs = { 124 | input_images_tensor: np.stack([last_im, im]), 125 | input_flow: flow, 126 | input_segmentation: x, 127 | prev_h: h 128 | } 129 | # GRFP 130 | h, pred = sess.run([new_h, prediction], feed_dict=inputs) 131 | 132 | last_im = im 133 | 134 | # save it 135 | S = pred[0] 136 | S_new = S.copy() 137 | for (idx, train_idx) in cs_id2trainid.iteritems(): 138 | S_new[S == train_idx] = idx 139 | 140 | output_path = '%s_%s_%s.png' % (city, seq, frame) 141 | cv2.imwrite(os.path.join(cfg.cityscapes_dir, 'results', output_path), S_new) 142 | 143 | 144 | # Evaluate using the official CityScapes code 145 | evalPixelLevelSemanticLabeling.main([]) 146 | 147 | 148 | 149 | if __name__ == '__main__': 150 | parser = argparse.ArgumentParser(description='Evluate GRFP on the CityScapes validation set.') 151 | 152 | parser.add_argument('--static', help='Which static network to use.', required=True) 153 | parser.add_argument('--flow', help='Which optical flow method to use.', required=True) 154 | parser.add_argument('--frames', type=int, help='Number of frames to use.', default=5, required=False) 155 | parser.add_argument('--ckpt', help='Which checkpoint file to load from. Specify relative to the ./checkpoints/ directory.', default='', required=False) 156 | 157 | args = parser.parse_args() 158 | 159 | assert args.flow in ['flownet1', 'flownet2', 'farneback'], "Unknown flow method %s." % args.flow 160 | assert args.static in ['dilation', 'dilation_grfp', 'lrr', 'lrr_grfp'], "Unknown static method %s." % args.static 161 | assert args.frames >= 1 and args.frames <= 20, "The number of frames must be between 1 and 20." 162 | 163 | evaluate(args) -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import argparse, glob, os, cv2, sys, pickle, random 2 | import numpy as np 3 | import tensorflow as tf 4 | import config as cfg 5 | from models.stgru import STGRU 6 | from models.lrr import LRR 7 | from models.dilation import dilation10network 8 | from models.flownet2 import Flownet2 9 | from models.flownet1 import Flownet1 10 | from tensorflow.python.framework import ops 11 | 12 | bilinear_warping_module = tf.load_op_library('./misc/bilinear_warping.so') 13 | @ops.RegisterGradient("BilinearWarping") 14 | def _BilinearWarping(op, grad): 15 | return bilinear_warping_module.bilinear_warping_grad(grad, op.inputs[0], op.inputs[1]) 16 | 17 | class DataLoader(): 18 | def __init__(self, im_size, nbr_frames): 19 | self.im_size = im_size 20 | self.dataset_size = [1024, 2048] 21 | self.nbr_frames = nbr_frames 22 | self.L = glob.glob(os.path.join(cfg.cityscapes_dir, 'gtFine', 'train', "*", "*labelTrainIds.png")) 23 | random.shuffle(self.L) 24 | self.idx = 0 25 | 26 | def get_next_sequence(self): 27 | H, W = self.dataset_size 28 | h, w = self.im_size 29 | 30 | offset = [np.random.randint(H - h), 31 | np.random.randint(W - w)] 32 | i0, j0 = offset 33 | i1, j1 = i0 + h, j0 + w 34 | 35 | im_path = self.L[self.idx % len(self.L)] 36 | self.idx += 1 37 | 38 | parts = im_path.split('/')[-1].split('_') 39 | city, seq, frame = parts[0], parts[1], parts[2] 40 | 41 | images = [] 42 | gt = cv2.imread(im_path, 0)[i0:i1, j0:j1] 43 | 44 | for dt in range(-self.nbr_frames + 1, 1): 45 | t = int(frame) + dt 46 | 47 | frame_path = os.path.join(cfg.cityscapes_video_dir, 'leftImg8bit_sequence', 'train', 48 | city, ("%s_%s_%06d_leftImg8bit.png" % (city, seq, t))) 49 | images.append(cv2.imread(frame_path, 1).astype(np.float32)[i0:i1,j0:j1][np.newaxis,...]) 50 | 51 | return images, gt 52 | 53 | def train(args): 54 | nbr_classes = 19 55 | 56 | # learning rates for the GRU and the static segmentation networks, respectively 57 | learning_rate = 2e-5 58 | static_learning_rate = 2e-12 59 | 60 | # The total number of iterations and when the static network should start being refined 61 | nbr_iterations = 10000 62 | t0_dilation_net = 5000 63 | 64 | im_size = [512, 512] 65 | image_mean = [72.39,82.91,73.16] # the mean is automatically subtracted in some modules e.g. flownet2, so be careful 66 | 67 | f = open('misc/cityscapes_labels.pckl') 68 | cs_id2trainid, cs_id2name = pickle.load(f) 69 | f.close() 70 | 71 | assert args.static in ['dilation', 'lrr'], "Only dilation and LRR are supported for now." 72 | 73 | if args.flow == 'flownet2': 74 | with tf.variable_scope('flow'): 75 | flow_network = Flownet2(bilinear_warping_module) 76 | flow_img0 = tf.placeholder(tf.float32) 77 | flow_img1 = tf.placeholder(tf.float32) 78 | flow_tensor = flow_network(flow_img0, flow_img1, flip=True) 79 | elif args.flow == 'flownet1': 80 | with tf.variable_scope('flow'): 81 | flow_network = Flownet1() 82 | flow_img0 = tf.placeholder(tf.float32) 83 | flow_img1 = tf.placeholder(tf.float32) 84 | flow_tensor = flow_network.get_output_tensor(flow_img0, flow_img1, im_size) 85 | 86 | RNN = STGRU([nbr_classes, im_size[0], im_size[1]], [7, 7], bilinear_warping_module) 87 | 88 | gru_opt, gru_loss, gru_prediction, gru_learning_rate, \ 89 | gru_input_images_tensor, gru_input_flow_tensor, \ 90 | gru_input_segmentation_tensor, gru_targets = RNN.get_optimizer(args.frames) 91 | unary_grad_op = tf.gradients(gru_loss, gru_input_segmentation_tensor) 92 | 93 | if args.static == 'lrr': 94 | static_input = tf.placeholder(tf.float32) 95 | static_network = LRR() 96 | static_output = static_network(static_input) 97 | 98 | unary_opt, unary_dLdy = static_network.get_optimizer(static_input, static_output, static_learning_rate) 99 | elif args.static == 'dilation': 100 | static_input = tf.placeholder(tf.float32) 101 | static_network = dilation10network() 102 | static_output = static_network.get_output_tensor(static_input, im_size) 103 | 104 | data_loader = DataLoader(im_size, args.frames) 105 | 106 | loss_history = np.zeros(nbr_iterations) 107 | loss_history_smoothed = np.zeros(nbr_iterations) 108 | 109 | vars_trainable = [k for k in tf.trainable_variables() if not k.name.startswith('flow/')] 110 | vars_static = [k for k in vars_trainable if not k in RNN.weights.values()] 111 | loader_static = tf.train.Saver(vars_static) 112 | saver = tf.train.Saver(vars_trainable) 113 | 114 | if args.flow in ['flownet1', 'flownet2']: 115 | saver_fn = tf.train.Saver([k for k in tf.trainable_variables() if k.name.startswith('flow/')]) 116 | 117 | init = tf.global_variables_initializer() 118 | 119 | with tf.Session() as sess: 120 | sess.run(init) 121 | 122 | if args.static == 'lrr': 123 | loader_static.restore(sess, './checkpoints/lrr_pretrained') 124 | elif args.static == 'dilation': 125 | assert False, "Pretrained dilation model will soon be released." 126 | saver.restore(sess, './checkpoints/dilation_grfp') 127 | 128 | if args.flow == 'flownet1': 129 | saver_fn.restore(sess, './checkpoints/flownet1') 130 | elif args.flow == 'flownet2': 131 | saver_fn.restore(sess, './checkpoints/flownet2') 132 | 133 | for training_it in range(nbr_iterations): 134 | images, ground_truth = data_loader.get_next_sequence() 135 | 136 | # Optical flow 137 | optflow = [] 138 | for frame in range(1, args.frames): 139 | im, last_im = images[frame], images[frame-1] 140 | if args.flow == 'flownet2': 141 | flow = sess.run(flow_tensor, feed_dict={flow_img0: im, flow_img1: last_im}) 142 | elif args.flow == 'flownet1': 143 | flow = sess.run(flow_tensor, feed_dict={flow_img0: im, flow_img1: last_im}) 144 | flow = flow[...,(1, 0)] 145 | elif args.flow == 'farneback': 146 | im_gray = cv2.cvtColor(im[0], cv2.COLOR_BGR2GRAY) 147 | last_im_gray = cv2.cvtColor(last_im[0], cv2.COLOR_BGR2GRAY) 148 | 149 | flow = cv2.calcOpticalFlowFarneback(im_gray, last_im_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0) 150 | flow = flow[...,(1, 0)] 151 | flow = flow[np.newaxis,...] 152 | optflow.append(flow) 153 | 154 | # Static segmentation 155 | static_segm = [] 156 | for frame in range(args.frames): 157 | im = images[frame] 158 | if args.static == 'dilation': 159 | # augment a 186x186 border around the image and subtract the mean 160 | im_aug = cv2.copyMakeBorder(im[0], 186, 186, 186, 186, cv2.BORDER_REFLECT_101) 161 | im_aug = im_aug - image_mean 162 | im_aug = im_aug[np.newaxis,...] 163 | 164 | x = sess.run(static_output, feed_dict={static_input: im_aug}) 165 | elif args.static == 'lrr': 166 | x = sess.run(static_output, feed_dict={static_input: im}) 167 | static_segm.append(x) 168 | 169 | # GRFP 170 | rnn_input = { 171 | gru_learning_rate: learning_rate, 172 | gru_input_images_tensor: np.stack(images), 173 | gru_input_flow_tensor: np.stack(optflow), 174 | gru_input_segmentation_tensor: np.stack(static_segm), 175 | gru_targets: ground_truth, 176 | } 177 | 178 | _, loss, pred, unary_grads = sess.run([gru_opt, gru_loss, 179 | gru_prediction, unary_grad_op], feed_dict=rnn_input) 180 | loss_history[training_it] = loss 181 | 182 | if training_it < 300: 183 | loss_history_smoothed[training_it] = np.mean(loss_history[0:training_it+1]) 184 | else: 185 | loss_history_smoothed[training_it] = 0.997*loss_history_smoothed[training_it-1] + 0.003*loss 186 | 187 | # Refine the static network? 188 | # The reason that a two-stage training routine is used 189 | # is because there is not enough GPU memory (with a 12 GB Titan X) 190 | # to do it in one pass. 191 | if training_it+1 > t0_dilation_net: 192 | for k in range(len(images)-3, len(images)): 193 | g = unary_grads[0][k] 194 | im = images[k] 195 | _ = sess.run([unary_opt], feed_dict={ 196 | static_input: im, 197 | unary_dLdy: g 198 | }) 199 | 200 | if training_it > 0 and (training_it+1) % 1000 == 0: 201 | saver.save(sess, './checkpoints/%s_%s_it%d' % (args.static, args.flow, training_it+1)) 202 | 203 | if (training_it+1) % 200 == 0: 204 | print("Iteration %d/%d: Loss %.3f" % (training_it+1, nbr_iterations, loss_history_smoothed[training_it])) 205 | 206 | if __name__ == '__main__': 207 | parser = argparse.ArgumentParser(description='Tran GRFP on the CityScapes training set.') 208 | 209 | parser.add_argument('--static', help='Which static network to use.', required=True) 210 | parser.add_argument('--flow', help='Which optical flow method to use.', required=True) 211 | parser.add_argument('--frames', type=int, help='Number of frames to use.', default=5, required=False) 212 | 213 | args = parser.parse_args() 214 | 215 | assert args.flow in ['flownet1', 'flownet2', 'farneback'], "Unknown flow method %s." % args.flow 216 | assert args.static in ['dilation', 'dilation_grfp', 'lrr', 'lrr_grfp'], "Unknown static method %s." % args.static 217 | assert args.frames >= 1 and args.frames <= 20, "The number of frames must be between 1 and 20." 218 | 219 | train(args) -------------------------------------------------------------------------------- /models/dilation.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | class dilation10network: 4 | def __init__(self, dropout_keeprate = 1.0): 5 | # 6 | self.dropout_keeprate = dropout_keeprate 7 | self.mean = [72.39,82.91,73.16] 8 | 9 | self.weights = { 10 | 'conv1_1': tf.Variable(tf.zeros([3, 3, 3, 64], dtype=tf.float32), name='conv1_1'), 11 | 'conv1_2': tf.Variable(tf.zeros([3, 3, 64, 64], dtype=tf.float32), name='conv1_2'), 12 | 13 | 'conv2_1': tf.Variable(tf.zeros([3, 3, 64, 128], dtype=tf.float32), name='conv2_1'), 14 | 'conv2_2': tf.Variable(tf.zeros([3, 3, 128, 128], dtype=tf.float32), name='conv2_2'), 15 | 16 | 'conv3_1': tf.Variable(tf.zeros([3, 3, 128, 256], dtype=tf.float32), name='conv3_1'), 17 | 'conv3_2': tf.Variable(tf.zeros([3, 3, 256, 256], dtype=tf.float32), name='conv3_2'), 18 | 'conv3_3': tf.Variable(tf.zeros([3, 3, 256, 256], dtype=tf.float32), name='conv3_3'), 19 | 20 | 'conv4_1': tf.Variable(tf.zeros([3, 3, 256, 512], dtype=tf.float32), name='conv4_1'), 21 | 'conv4_2': tf.Variable(tf.zeros([3, 3, 512, 512], dtype=tf.float32), name='conv4_2'), 22 | 'conv4_3': tf.Variable(tf.zeros([3, 3, 512, 512], dtype=tf.float32), name='conv4_3'), 23 | 24 | 'conv5_1': tf.Variable(tf.zeros([3, 3, 512, 512], dtype=tf.float32), name='conv5_1'), 25 | 'conv5_2': tf.Variable(tf.zeros([3, 3, 512, 512], dtype=tf.float32), name='conv5_2'), 26 | 'conv5_3': tf.Variable(tf.zeros([3, 3, 512, 512], dtype=tf.float32), name='conv5_3'), 27 | 28 | 'fc6': tf.Variable(tf.zeros([7, 7, 512, 4096], dtype=tf.float32), name='fc6'), 29 | 'fc7': tf.Variable(tf.zeros([1, 1, 4096, 4096], dtype=tf.float32), name='fc7'), 30 | 'final': tf.Variable(tf.zeros([1, 1, 4096, 19], dtype=tf.float32), name='final'), 31 | 32 | 'ctx_conv1_1': tf.Variable(tf.zeros([3, 3, 19, 19], dtype=tf.float32), name='ctx_conv1_1'), 33 | 'ctx_conv1_2': tf.Variable(tf.zeros([3, 3, 19, 19], dtype=tf.float32), name='ctx_conv1_1'), 34 | 'ctx_conv2_1': tf.Variable(tf.zeros([3, 3, 19, 19], dtype=tf.float32), name='ctx_conv1_1'), 35 | 'ctx_conv3_1': tf.Variable(tf.zeros([3, 3, 19, 19], dtype=tf.float32), name='ctx_conv1_1'), 36 | 'ctx_conv4_1': tf.Variable(tf.zeros([3, 3, 19, 19], dtype=tf.float32), name='ctx_conv1_1'), 37 | 'ctx_conv5_1': tf.Variable(tf.zeros([3, 3, 19, 19], dtype=tf.float32), name='ctx_conv1_1'), 38 | 'ctx_conv6_1': tf.Variable(tf.zeros([3, 3, 19, 19], dtype=tf.float32), name='ctx_conv1_1'), 39 | 'ctx_conv7_1': tf.Variable(tf.zeros([3, 3, 19, 19], dtype=tf.float32), name='ctx_conv1_1'), 40 | 'ctx_fc1': tf.Variable(tf.zeros([3, 3, 19, 19], dtype=tf.float32), name='ctx_conv1_1'), 41 | 'ctx_final': tf.Variable(tf.zeros([1, 1, 19, 19], dtype=tf.float32), name='ctx_conv1_1'), 42 | 'ctx_upsample': tf.Variable(tf.zeros([16, 16, 19, 19], dtype=tf.float32), name='ctx_conv1_1'), 43 | } 44 | self.biases = { 45 | 'conv1_1': tf.Variable(tf.zeros([64], dtype=tf.float32), name='conv1_1_b'), 46 | 'conv1_2': tf.Variable(tf.zeros([64], dtype=tf.float32), name='conv1_2_b'), 47 | 48 | 'conv2_1': tf.Variable(tf.zeros([128], dtype=tf.float32), name='conv2_1_b'), 49 | 'conv2_2': tf.Variable(tf.zeros([128], dtype=tf.float32), name='conv2_2_b'), 50 | 51 | 'conv3_1': tf.Variable(tf.zeros([256], dtype=tf.float32), name='conv3_1_b'), 52 | 'conv3_2': tf.Variable(tf.zeros([256], dtype=tf.float32), name='conv3_2_b'), 53 | 'conv3_3': tf.Variable(tf.zeros([256], dtype=tf.float32), name='conv3_3_b'), 54 | 55 | 'conv4_1': tf.Variable(tf.zeros([512], dtype=tf.float32), name='conv4_1_b'), 56 | 'conv4_2': tf.Variable(tf.zeros([512], dtype=tf.float32), name='conv4_2_b'), 57 | 'conv4_3': tf.Variable(tf.zeros([512], dtype=tf.float32), name='conv4_3_b'), 58 | 59 | 'conv5_1': tf.Variable(tf.zeros([512], dtype=tf.float32), name='conv5_1_b'), 60 | 'conv5_2': tf.Variable(tf.zeros([512], dtype=tf.float32), name='conv5_2_b'), 61 | 'conv5_3': tf.Variable(tf.zeros([512], dtype=tf.float32), name='conv5_3_b'), 62 | 63 | 'fc6': tf.Variable(tf.zeros([4096], dtype=tf.float32), name='fc6_b'), 64 | 'fc7': tf.Variable(tf.zeros([4096], dtype=tf.float32), name='fc7_b'), 65 | 'final': tf.Variable(tf.zeros([19], dtype=tf.float32), name='final_b'), 66 | 67 | 'ctx_conv1_1': tf.Variable(tf.zeros([19], dtype=tf.float32), name='ctx_conv1_1_b'), 68 | 'ctx_conv1_2': tf.Variable(tf.zeros([19], dtype=tf.float32), name='ctx_conv1_1_b'), 69 | 'ctx_conv2_1': tf.Variable(tf.zeros([19], dtype=tf.float32), name='ctx_conv1_1_b'), 70 | 'ctx_conv3_1': tf.Variable(tf.zeros([19], dtype=tf.float32), name='ctx_conv1_1_b'), 71 | 'ctx_conv4_1': tf.Variable(tf.zeros([19], dtype=tf.float32), name='ctx_conv1_1_b'), 72 | 'ctx_conv5_1': tf.Variable(tf.zeros([19], dtype=tf.float32), name='ctx_conv1_1_b'), 73 | 'ctx_conv6_1': tf.Variable(tf.zeros([19], dtype=tf.float32), name='ctx_conv1_1_b'), 74 | 'ctx_conv7_1': tf.Variable(tf.zeros([19], dtype=tf.float32), name='ctx_conv1_1_b'), 75 | 'ctx_fc1': tf.Variable(tf.zeros([19], dtype=tf.float32), name='ctx_conv1_1_b'), 76 | 'ctx_final': tf.Variable(tf.zeros([19], dtype=tf.float32), name='ctx_conv1_1_b'), 77 | } 78 | 79 | def get_output_tensor(self, x, out_size): 80 | # returns output tensor 81 | output_shape = [1, out_size[0], out_size[1], 19] 82 | 83 | conv1_1 = tf.nn.relu(tf.nn.conv2d(x, self.weights['conv1_1'], strides=[1,1,1,1], padding="VALID") + self.biases['conv1_1']) 84 | conv1_2 = tf.nn.relu(tf.nn.conv2d(conv1_1, self.weights['conv1_2'], strides=[1,1,1,1], padding="VALID") + self.biases['conv1_2']) 85 | conv1_2 = tf.nn.max_pool(conv1_2, [1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID") 86 | 87 | conv2_1 = tf.nn.relu(tf.nn.conv2d(conv1_2, self.weights['conv2_1'], strides=[1,1,1,1], padding="VALID") + self.biases['conv2_1']) 88 | conv2_2 = tf.nn.relu(tf.nn.conv2d(conv2_1, self.weights['conv2_2'], strides=[1,1,1,1], padding="VALID") + self.biases['conv2_2']) 89 | conv2_2 = tf.nn.max_pool(conv2_2, [1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID") 90 | 91 | conv3_1 = tf.nn.relu(tf.nn.conv2d(conv2_2, self.weights['conv3_1'], strides=[1,1,1,1], padding="VALID") + self.biases['conv3_1']) 92 | conv3_2 = tf.nn.relu(tf.nn.conv2d(conv3_1, self.weights['conv3_2'], strides=[1,1,1,1], padding="VALID") + self.biases['conv3_2']) 93 | conv3_3 = tf.nn.relu(tf.nn.conv2d(conv3_2, self.weights['conv3_3'], strides=[1,1,1,1], padding="VALID") + self.biases['conv3_3']) 94 | conv3_3 = tf.nn.max_pool(conv3_3, [1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID") 95 | 96 | conv4_1 = tf.nn.relu(tf.nn.conv2d(conv3_3, self.weights['conv4_1'], strides=[1,1,1,1], padding="VALID") + self.biases['conv4_1']) 97 | conv4_2 = tf.nn.relu(tf.nn.conv2d(conv4_1, self.weights['conv4_2'], strides=[1,1,1,1], padding="VALID") + self.biases['conv4_2']) 98 | conv4_3 = tf.nn.relu(tf.nn.conv2d(conv4_2, self.weights['conv4_3'], strides=[1,1,1,1], padding="VALID") + self.biases['conv4_3']) 99 | # not pooling, instead dilations in the folling ops 100 | 101 | conv5_1 = tf.nn.relu(tf.nn.atrous_conv2d(conv4_3, self.weights['conv5_1'], padding="VALID", rate=2) + self.biases['conv5_1']) 102 | conv5_2 = tf.nn.relu(tf.nn.atrous_conv2d(conv5_1, self.weights['conv5_2'], padding="VALID", rate=2) + self.biases['conv5_2']) 103 | conv5_3 = tf.nn.relu(tf.nn.atrous_conv2d(conv5_2, self.weights['conv5_3'], padding="VALID", rate=2) + self.biases['conv5_3']) 104 | 105 | fc6 = tf.nn.relu(tf.nn.atrous_conv2d(conv5_3, self.weights['fc6'], padding="VALID", rate=4) + self.biases['fc6']) 106 | fc6 = tf.nn.dropout(fc6, self.dropout_keeprate) 107 | fc7 = tf.nn.relu(tf.nn.atrous_conv2d(fc6, self.weights['fc7'], padding="VALID", rate=4) + self.biases['fc7']) 108 | fc7 = tf.nn.dropout(fc7, self.dropout_keeprate) 109 | final = tf.nn.atrous_conv2d(fc7, self.weights['final'], padding="VALID", rate=4) + self.biases['final'] 110 | 111 | ctx_conv1_1 = tf.nn.relu(tf.nn.conv2d(final, self.weights['ctx_conv1_1'], strides=[1,1,1,1], padding="SAME") + self.biases['ctx_conv1_1']) 112 | ctx_conv1_2 = tf.nn.relu(tf.nn.conv2d(ctx_conv1_1, self.weights['ctx_conv1_2'], strides=[1,1,1,1], padding="SAME") + self.biases['ctx_conv1_2']) 113 | ctx_conv2_1 = tf.nn.relu(tf.nn.atrous_conv2d(ctx_conv1_2, self.weights['ctx_conv2_1'], padding="SAME", rate=2) + self.biases['ctx_conv2_1']) 114 | ctx_conv3_1 = tf.nn.relu(tf.nn.atrous_conv2d(ctx_conv2_1, self.weights['ctx_conv3_1'], padding="SAME", rate=4) + self.biases['ctx_conv3_1']) 115 | ctx_conv4_1 = tf.nn.relu(tf.nn.atrous_conv2d(ctx_conv3_1, self.weights['ctx_conv4_1'], padding="SAME", rate=8) + self.biases['ctx_conv4_1']) 116 | ctx_conv5_1 = tf.nn.relu(tf.nn.atrous_conv2d(ctx_conv4_1, self.weights['ctx_conv5_1'], padding="SAME", rate=16) + self.biases['ctx_conv5_1']) 117 | ctx_conv6_1 = tf.nn.relu(tf.nn.atrous_conv2d(ctx_conv5_1, self.weights['ctx_conv6_1'], padding="SAME", rate=32) + self.biases['ctx_conv6_1']) 118 | ctx_conv7_1 = tf.nn.relu(tf.nn.atrous_conv2d(ctx_conv6_1, self.weights['ctx_conv7_1'], padding="SAME", rate=64) + self.biases['ctx_conv7_1']) 119 | 120 | ctx_fc1 = tf.nn.relu(tf.nn.conv2d(ctx_conv7_1, self.weights['ctx_fc1'], strides=[1,1,1,1], padding="SAME") + self.biases['ctx_fc1']) 121 | ctx_final = tf.nn.conv2d(ctx_fc1, self.weights['ctx_final'], strides=[1,1,1,1], padding="SAME") + self.biases['ctx_final'] 122 | ctx_upsample = tf.nn.conv2d_transpose(ctx_final, self.weights['ctx_upsample'], output_shape=output_shape, strides=[1,8,8,1]) 123 | 124 | return ctx_upsample 125 | 126 | def get_optimizer(self, x, y, learning_rate): 127 | # optimize wrt the ctx_* variables 128 | dLdy = tf.placeholder('float') 129 | 130 | # the correct values will backpropagate to ctx_upsample 131 | loss = tf.reduce_sum(dLdy * y) 132 | 133 | #opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.95, beta2=0.99, epsilon=1e-8) 134 | opt = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) 135 | opt = opt.minimize(loss, 136 | [v for k, v in self.weights.iteritems() if (k[0:4] == 'ctx_' or k in ['fc6', 'fc7', 'final'])].extend( 137 | [v for k, v in self.biases.iteritems() if (k[0:4] == 'ctx_' or k in ['fc6', 'fc7', 'final'])])) 138 | 139 | return opt, dLdy -------------------------------------------------------------------------------- /models/flownet1.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import glob 3 | import numpy as np 4 | 5 | class Flownet1: 6 | def __init__(self): 7 | self.weights = { 8 | 'conv1': tf.Variable(tf.zeros([7, 7, 6, 64], dtype=tf.float32), name='conv1_w'), 9 | 'conv2': tf.Variable(tf.zeros([5, 5, 64, 128], dtype=tf.float32), name='conv2_w'), 10 | 'conv3_1': tf.Variable(tf.zeros([3, 3, 256, 256], dtype=tf.float32), name='conv3_1_w'), 11 | 'conv3': tf.Variable(tf.zeros([5, 5, 128, 256], dtype=tf.float32), name='conv3_w'), 12 | 'conv4': tf.Variable(tf.zeros([3, 3, 256, 512], dtype=tf.float32), name='conv4_w'), 13 | 'conv4_1': tf.Variable(tf.zeros([3, 3, 512, 512], dtype=tf.float32), name='conv4_1_w'), 14 | 'conv5': tf.Variable(tf.zeros([3, 3, 512, 512], dtype=tf.float32), name='conv5_w'), 15 | 'conv5_1': tf.Variable(tf.zeros([3, 3, 512, 512], dtype=tf.float32), name='conv5_1_w'), 16 | 'conv6': tf.Variable(tf.zeros([3, 3, 512, 1024], dtype=tf.float32), name='conv6_w'), 17 | 'conv6_1': tf.Variable(tf.zeros([3, 3, 1024, 1024], dtype=tf.float32), name='conv6_1_w'), 18 | 19 | 'Convolution1': tf.Variable(tf.zeros([3, 3, 1024, 2], dtype=tf.float32), name='Convolution1_w'), 20 | 'Convolution2': tf.Variable(tf.zeros([3, 3, 1026, 2], dtype=tf.float32), name='Convolution2_w'), 21 | 'Convolution3': tf.Variable(tf.zeros([3, 3, 770, 2], dtype=tf.float32), name='Convolution3_w'), 22 | 'Convolution4': tf.Variable(tf.zeros([3, 3, 386, 2], dtype=tf.float32), name='Convolution4_w'), 23 | 'Convolution5': tf.Variable(tf.zeros([3, 3, 194, 2], dtype=tf.float32), name='Convolution5_w'), 24 | 'Convolution6': tf.Variable(tf.zeros([1, 1, 2, 2], dtype=tf.float32), name='Convolution6_w'), 25 | 26 | 'deconv2': tf.Variable(tf.zeros([4, 4, 64, 386], dtype=tf.float32), name='deconv2_w'), 27 | 'deconv3': tf.Variable(tf.zeros([4, 4, 128, 770], dtype=tf.float32), name='deconv3_w'), 28 | 'deconv4': tf.Variable(tf.zeros([4, 4, 256, 1026], dtype=tf.float32), name='deconv4_w'), 29 | 'deconv5': tf.Variable(tf.zeros([4, 4, 512, 1024], dtype=tf.float32), name='deconv5_w'), 30 | 31 | 'upsample_flow3to2': tf.Variable(tf.zeros([4, 4, 2, 2], dtype=tf.float32), name='upsample_flow3to2_w'), 32 | 'upsample_flow4to3': tf.Variable(tf.zeros([4, 4, 2, 2], dtype=tf.float32), name='upsample_flow4to3_w'), 33 | 'upsample_flow5to4': tf.Variable(tf.zeros([4, 4, 2, 2], dtype=tf.float32), name='upsample_flow5to4_w'), 34 | 'upsample_flow6to5': tf.Variable(tf.zeros([4, 4, 2, 2], dtype=tf.float32), name='upsample_flow6to5_w'), 35 | } 36 | 37 | self.bias = { 38 | 'conv1': tf.Variable(tf.zeros([64], dtype=tf.float32), name='conv1_b'), 39 | 'conv2': tf.Variable(tf.zeros([128], dtype=tf.float32), name='conv2_b'), 40 | 'conv3_1': tf.Variable(tf.zeros([256], dtype=tf.float32), name='conv3_1_b'), 41 | 'conv3': tf.Variable(tf.zeros([256], dtype=tf.float32), name='conv3_b'), 42 | 'conv4': tf.Variable(tf.zeros([512], dtype=tf.float32), name='conv4_b'), 43 | 'conv4_1': tf.Variable(tf.zeros([512], dtype=tf.float32), name='conv4_1_b'), 44 | 'conv5': tf.Variable(tf.zeros([512], dtype=tf.float32), name='conv5_b'), 45 | 'conv5_1': tf.Variable(tf.zeros([512], dtype=tf.float32), name='conv5_1_b'), 46 | 'conv6': tf.Variable(tf.zeros([1024], dtype=tf.float32), name='conv6_b'), 47 | 'conv6_1': tf.Variable(tf.zeros([1024], dtype=tf.float32), name='conv6_1_b'), 48 | 49 | 'Convolution1': tf.Variable(tf.zeros([2], dtype=tf.float32), name='Convolution1_b'), 50 | 'Convolution2': tf.Variable(tf.zeros([2], dtype=tf.float32), name='Convolution2_b'), 51 | 'Convolution3': tf.Variable(tf.zeros([2], dtype=tf.float32), name='Convolution3_b'), 52 | 'Convolution4': tf.Variable(tf.zeros([2], dtype=tf.float32), name='Convolution4_b'), 53 | 'Convolution5': tf.Variable(tf.zeros([2], dtype=tf.float32), name='Convolution5_b'), 54 | 'Convolution6': tf.Variable(tf.zeros([2], dtype=tf.float32), name='Convolution6_b'), 55 | 56 | 'deconv2': tf.Variable(tf.zeros([64], dtype=tf.float32), name='deconv2_b'), 57 | 'deconv3': tf.Variable(tf.zeros([128], dtype=tf.float32), name='deconv3_b'), 58 | 'deconv4': tf.Variable(tf.zeros([256], dtype=tf.float32), name='deconv4_b'), 59 | 'deconv5': tf.Variable(tf.zeros([512], dtype=tf.float32), name='deconv5_b'), 60 | 61 | 'upsample_flow3to2': tf.Variable(tf.zeros([2], dtype=tf.float32), name='upsample_flow3to2_b'), 62 | 'upsample_flow4to3': tf.Variable(tf.zeros([2], dtype=tf.float32), name='upsample_flow4to3_b'), 63 | 'upsample_flow5to4': tf.Variable(tf.zeros([2], dtype=tf.float32), name='upsample_flow5to4_b'), 64 | 'upsample_flow6to5': tf.Variable(tf.zeros([2], dtype=tf.float32), name='upsample_flow6to5_b'), 65 | } 66 | 67 | def get_output_tensor(self, img0, img1, out_size): 68 | rescaling = 0.0039216 69 | mean = np.array([0.411451, 0.432060, 0.450141]) 70 | 71 | img0_aug = (img0 * rescaling) - mean 72 | img1_aug = (img1 * rescaling) - mean 73 | img0_nomean_resize = img0_aug 74 | img1_nomean_resize = img1_aug 75 | input_ = tf.concat(axis=3, values=[img0_nomean_resize, img1_nomean_resize]) 76 | self.input_ = input_ 77 | 78 | conv1 = tf.pad(input_, [[0,0], [3,3], [3,3], [0,0]]) 79 | conv1 = tf.nn.conv2d(conv1, self.weights['conv1'], strides=[1,2,2,1], padding="VALID") + self.bias['conv1'] 80 | conv1 = tf.maximum(conv1, 0.1*conv1) # leaky relu with negative slope 0.1 81 | self.conv1 = conv1 82 | 83 | conv2 = tf.pad(conv1, [[0,0], [2,2], [2,2], [0,0]]) 84 | conv2 = tf.nn.conv2d(conv2, self.weights['conv2'], strides=[1,2,2,1], padding="VALID") + self.bias['conv2'] 85 | conv2 = tf.maximum(conv2, 0.1*conv2) # leaky relu with negative slope 0.1 86 | self.conv2 = conv2 87 | 88 | conv3 = tf.pad(conv2, [[0,0], [2,2], [2,2], [0,0]]) 89 | conv3 = tf.nn.conv2d(conv3, self.weights['conv3'], strides=[1,2,2,1], padding="VALID") + self.bias['conv3'] 90 | conv3 = tf.maximum(conv3, 0.1*conv3) # leaky relu with negative slope 0.1 91 | self.conv3 = conv3 92 | 93 | conv3_1 = tf.nn.conv2d(conv3, self.weights['conv3_1'], strides=[1,1,1,1], padding="SAME") + self.bias['conv3_1'] 94 | conv3_1 = tf.maximum(conv3_1, 0.1*conv3_1) # leaky relu with negative slope 0.1 95 | self.conv3_1 = conv3_1 96 | 97 | conv4 = tf.pad(conv3_1, [[0,0], [1,1], [1,1], [0,0]]) 98 | conv4 = tf.nn.conv2d(conv4, self.weights['conv4'], strides=[1,2,2,1], padding="VALID") + self.bias['conv4'] 99 | conv4 = tf.maximum(conv4, 0.1*conv4) # leaky relu with negative slope 0.1 100 | self.conv4 = conv4 101 | 102 | conv4_1 = tf.nn.conv2d(conv4, self.weights['conv4_1'], strides=[1,1,1,1], padding="SAME") + self.bias['conv4_1'] 103 | conv4_1 = tf.maximum(conv4_1, 0.1*conv4_1) # leaky relu with negative slope 0.1 104 | self.conv4_1 = conv4_1 105 | 106 | conv5 = tf.pad(conv4_1, [[0,0], [1,1], [1,1], [0,0]]) 107 | conv5 = tf.nn.conv2d(conv5, self.weights['conv5'], strides=[1,2,2,1], padding="VALID") + self.bias['conv5'] 108 | conv5 = tf.maximum(conv5, 0.1*conv5) # leaky relu with negative slope 0.1 109 | self.conv5 = conv5 110 | 111 | conv5_1 = tf.nn.conv2d(conv5, self.weights['conv5_1'], strides=[1,1,1,1], padding="SAME") + self.bias['conv5_1'] 112 | conv5_1 = tf.maximum(conv5_1, 0.1*conv5_1) # leaky relu with negative slope 0.1 113 | self.conv5_1 = conv5_1 114 | 115 | conv6 = tf.pad(conv5_1, [[0,0], [1,1], [1,1], [0,0]]) 116 | conv6 = tf.nn.conv2d(conv6, self.weights['conv6'], strides=[1,2,2,1], padding="VALID") + self.bias['conv6'] 117 | conv6 = tf.maximum(conv6, 0.1*conv6) # leaky relu with negative slope 0.1 118 | self.conv6 = conv6 119 | 120 | conv6_1 = tf.nn.conv2d(conv6, self.weights['conv6_1'], strides=[1,1,1,1], padding="SAME") + self.bias['conv6_1'] 121 | conv6_1 = tf.maximum(conv6_1, 0.1*conv6_1) # leaky relu with negative slope 0.1 122 | self.conv6_1 = conv6_1 123 | 124 | predict_flow6 = tf.pad(conv6_1, [[0,0], [1,1], [1,1], [0,0]]) 125 | predict_flow6 = tf.nn.conv2d(predict_flow6, self.weights['Convolution1'], strides=[1,1,1,1], padding="VALID") + self.bias['Convolution1'] 126 | self.predict_flow6 = predict_flow6 127 | 128 | 129 | deconv5 = tf.nn.conv2d_transpose(conv6_1, self.weights['deconv5'], output_shape=[1, out_size[0]/32, out_size[1]/32, 512], strides=[1,2,2,1]) + self.bias['deconv5'] 130 | deconv5 = tf.maximum(deconv5, 0.1*deconv5) # leaky relu with negative slope 0.1 131 | self.deconv5 = deconv5 132 | 133 | upsampled_flow6_to_5 = tf.nn.conv2d_transpose(predict_flow6, self.weights['upsample_flow6to5'], output_shape=[1, out_size[0]/32, out_size[1]/32, 2], strides=[1,2,2,1]) + self.bias['upsample_flow6to5'] 134 | self.upsampled_flow6_to_5 = upsampled_flow6_to_5 135 | 136 | concat5 = tf.concat(axis=3, values=[conv5_1, deconv5, upsampled_flow6_to_5]) 137 | self.concat5 = concat5 138 | 139 | predict_flow5 = tf.pad(concat5, [[0,0], [1,1], [1,1], [0,0]]) 140 | predict_flow5 = tf.nn.conv2d(predict_flow5, self.weights['Convolution2'], strides=[1,1,1,1], padding="VALID") + self.bias['Convolution2'] 141 | self.predict_flow5 = predict_flow5 142 | 143 | deconv4 = tf.nn.conv2d_transpose(concat5, self.weights['deconv4'], output_shape=[1, out_size[0]/16, out_size[1]/16, 256], strides=[1,2,2,1]) + self.bias['deconv4'] 144 | deconv4 = tf.maximum(deconv4, 0.1*deconv4) # leaky relu with negative slope 0.1 145 | self.deconv4 = deconv4 146 | 147 | upsampled_flow5_to_4 = tf.nn.conv2d_transpose(predict_flow5, self.weights['upsample_flow5to4'], output_shape=[1, out_size[0]/16, out_size[1]/16, 2], strides=[1,2,2,1]) + self.bias['upsample_flow5to4'] 148 | self.upsampled_flow5_to_4 = upsampled_flow5_to_4 149 | 150 | concat4 = tf.concat(axis=3, values=[conv4_1, deconv4, upsampled_flow5_to_4]) 151 | self.concat4 = concat4 152 | 153 | predict_flow4 = tf.nn.conv2d(concat4, self.weights['Convolution3'], strides=[1,1,1,1], padding="SAME") + self.bias['Convolution3'] 154 | self.predict_flow4 = predict_flow4 155 | 156 | deconv3 = tf.nn.conv2d_transpose(concat4, self.weights['deconv3'], output_shape=[1, out_size[0]/8, out_size[1]/8, 128], strides=[1,2,2,1]) + self.bias['deconv3'] 157 | deconv3 = tf.maximum(deconv3, 0.1*deconv3) # leaky relu with negative slope 0.1 158 | self.deconv3 = deconv3 159 | 160 | upsampled_flow4_to_3 = tf.nn.conv2d_transpose(predict_flow4, self.weights['upsample_flow4to3'], output_shape=[1, out_size[0]/8, out_size[1]/8, 2], strides=[1,2,2,1]) + self.bias['upsample_flow4to3'] 161 | self.upsampled_flow4_to_3 = upsampled_flow4_to_3 162 | 163 | concat3 = tf.concat(axis=3, values=[conv3_1, deconv3, upsampled_flow4_to_3]) 164 | self.concat3 = concat3 165 | 166 | predict_flow3 = tf.nn.conv2d(concat3, self.weights['Convolution4'], strides=[1,1,1,1], padding="SAME") + self.bias['Convolution4'] 167 | self.predict_flow3 = predict_flow3 168 | 169 | deconv2 = tf.nn.conv2d_transpose(concat3, self.weights['deconv2'], output_shape=[1, out_size[0]/4, out_size[1]/4, 64], strides=[1,2,2,1]) + self.bias['deconv2'] 170 | deconv2 = tf.maximum(deconv2, 0.1*deconv2) # leaky relu with negative slope 0.1 171 | self.deconv2 = deconv2 172 | 173 | upsampled_flow3_to_2 = tf.nn.conv2d_transpose(predict_flow3, self.weights['upsample_flow3to2'], output_shape=[1, out_size[0]/4, out_size[1]/4, 2], strides=[1,2,2,1]) + self.bias['upsample_flow3to2'] 174 | self.upsampled_flow3_to_2 = upsampled_flow3_to_2 175 | 176 | 177 | concat2 = tf.concat(axis=3, values=[conv2, deconv2, upsampled_flow3_to_2]) 178 | self.concat2 = concat2 179 | 180 | predict_flow2 = tf.nn.conv2d(concat2, self.weights['Convolution5'], strides=[1,1,1,1], padding="SAME") + self.bias['Convolution5'] 181 | self.predict_flow2 = predict_flow2 182 | 183 | blob44 = predict_flow2 * 20.0 184 | self.blob44 = blob44 185 | 186 | predict_flow_resize = tf.image.resize_bilinear(blob44, out_size, align_corners=True) 187 | 188 | predict_flow_final = tf.nn.conv2d(predict_flow_resize, self.weights['Convolution6'], strides=[1,1,1,1], padding="SAME") + self.bias['Convolution6'] 189 | self.predict_flow_final = predict_flow_final 190 | return predict_flow_final 191 | -------------------------------------------------------------------------------- /models/lrr.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import numpy as np 3 | import tensorflow as tf 4 | import scipy.io 5 | 6 | class LRR: 7 | def __init__(self): 8 | self.weights = dict() 9 | 10 | for key, shape in self.all_variables(): 11 | self.weights[key] = tf.get_variable(key, shape=shape) 12 | 13 | def __call__(self, im): 14 | return self.get_blobs(im)['prediction_4x'] 15 | 16 | def get_optimizer(self, x, y, learning_rate): 17 | dLdy = tf.placeholder('float') 18 | 19 | # the correct values will backpropagate to y 20 | loss = tf.reduce_sum(dLdy * y) 21 | 22 | opt = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) 23 | opt = opt.minimize(loss, 24 | var_list=[v for k, v in self.weights.iteritems() if not ('_bil_' in k or 'deconv' in k or 'bases' in k)]) 25 | 26 | return opt, dLdy 27 | 28 | def get_blobs(self, im, dropout_keeprate=1): 29 | blobs = dict() 30 | 31 | # to rgb and mean subtraction 32 | # it expects a bgr image without the mean subtracted 33 | #im = im[:,:,:,(2,1,0)] 34 | im = tf.concat(tf.split(im, 3, axis=3)[::-1], axis=3) 35 | im = im - np.array([73.1652, 82.9206, 72.4080]) 36 | 37 | batch_size = tf.to_int32(tf.shape(im)[0]) 38 | width = tf.to_int32(tf.shape(im)[2]) 39 | height = tf.to_int32(tf.shape(im)[1]) 40 | TARGET_WIDTH = width 41 | TARGET_HEIGHT = height 42 | 43 | divisor = 32. 44 | ADAPTED_WIDTH = tf.to_int32(tf.ceil(tf.to_float(width)/divisor) * divisor) 45 | ADAPTED_HEIGHT = tf.to_int32(tf.ceil(tf.to_float(height)/divisor) * divisor) 46 | 47 | SCALE_WIDTH = tf.to_float(width) / tf.to_float(ADAPTED_WIDTH); 48 | SCALE_HEIGHT = tf.to_float(height) / tf.to_float(ADAPTED_HEIGHT); 49 | 50 | blobs['x1'] = tf.nn.conv2d(im, self.weights['conv1_1f'], [1,1,1,1], padding='SAME') + self.weights['conv1_1b'] 51 | blobs['x2'] = tf.nn.relu(blobs['x1']) 52 | blobs['x3'] = tf.nn.conv2d(blobs['x2'], self.weights['conv1_2f'], [1,1,1,1], padding='SAME') + self.weights['conv1_2b'] 53 | blobs['x4'] = tf.nn.relu(blobs['x3']) 54 | blobs['x5'] = tf.nn.max_pool(blobs['x4'], [1, 2, 2, 1], [1, 2, 2, 1], 'SAME') 55 | 56 | blobs['x6'] = tf.nn.conv2d(blobs['x5'], self.weights['conv2_1f'], [1,1,1,1], padding='SAME') + self.weights['conv2_1b'] 57 | blobs['x7'] = tf.nn.relu(blobs['x6']) 58 | blobs['x8'] = tf.nn.conv2d(blobs['x7'], self.weights['conv2_2f'], [1,1,1,1], padding='SAME') + self.weights['conv2_2b'] 59 | blobs['x9'] = tf.nn.relu(blobs['x8']) 60 | blobs['x10'] = tf.nn.max_pool(blobs['x9'], [1, 2, 2, 1], [1, 2, 2, 1], 'SAME') 61 | 62 | blobs['x11'] = tf.nn.conv2d(blobs['x10'], self.weights['conv3_1f'], [1,1,1,1], padding='SAME') + self.weights['conv3_1b'] 63 | blobs['x12'] = tf.nn.relu(blobs['x11']) 64 | blobs['x13'] = tf.nn.conv2d(blobs['x12'], self.weights['conv3_2f'], [1,1,1,1], padding='SAME') + self.weights['conv3_2b'] 65 | blobs['x14'] = tf.nn.relu(blobs['x13']) 66 | blobs['x15'] = tf.nn.conv2d(blobs['x14'], self.weights['conv3_3f'], [1,1,1,1], padding='SAME') + self.weights['conv3_3b'] 67 | blobs['x16'] = tf.nn.relu(blobs['x15']) 68 | blobs['x17'] = tf.nn.max_pool(blobs['x16'], [1, 2, 2, 1], [1, 2, 2, 1], 'SAME') 69 | 70 | blobs['x18'] = tf.nn.conv2d(blobs['x17'], self.weights['conv4_1f'], [1,1,1,1], padding='SAME') + self.weights['conv4_1b'] 71 | blobs['x19'] = tf.nn.relu(blobs['x18']) 72 | blobs['x20'] = tf.nn.conv2d(blobs['x19'], self.weights['conv4_2f'], [1,1,1,1], padding='SAME') + self.weights['conv4_2b'] 73 | blobs['x21'] = tf.nn.relu(blobs['x20']) 74 | blobs['x22'] = tf.nn.conv2d(blobs['x21'], self.weights['conv4_3f'], [1,1,1,1], padding='SAME') + self.weights['conv4_3b'] 75 | blobs['x23'] = tf.nn.relu(blobs['x22']) 76 | blobs['x24'] = tf.nn.max_pool(blobs['x23'], [1, 2, 2, 1], [1, 2, 2, 1], 'SAME') 77 | 78 | blobs['x25'] = tf.nn.conv2d(blobs['x24'], self.weights['conv5_1f'], [1,1,1,1], padding='SAME') + self.weights['conv5_1b'] 79 | blobs['x26'] = tf.nn.relu(blobs['x25']) 80 | blobs['x27'] = tf.nn.conv2d(blobs['x26'], self.weights['conv5_2f'], [1,1,1,1], padding='SAME') + self.weights['conv5_2b'] 81 | blobs['x28'] = tf.nn.relu(blobs['x27']) 82 | blobs['x29'] = tf.nn.conv2d(blobs['x28'], self.weights['conv5_3f'], [1,1,1,1], padding='SAME') + self.weights['conv5_3b'] 83 | blobs['x30'] = tf.nn.relu(blobs['x29']) 84 | blobs['x31'] = tf.nn.max_pool(blobs['x30'], [1, 2, 2, 1], [1, 2, 2, 1], 'SAME') 85 | 86 | blobs['x32'] = tf.nn.conv2d(blobs['x31'], self.weights['fc6f'], [1,1,1,1], padding='SAME') + self.weights['fc6b'] 87 | blobs['x33'] = tf.nn.relu(blobs['x32']) 88 | blobs['x34'] = tf.nn.dropout(blobs['x33'], keep_prob=dropout_keeprate) 89 | 90 | blobs['x35'] = tf.nn.conv2d(blobs['x34'], self.weights['fc7f'], [1,1,1,1], padding='SAME') + self.weights['fc7b'] 91 | blobs['x36'] = tf.nn.relu(blobs['x35']) 92 | blobs['x37'] = tf.nn.dropout(blobs['x36'], keep_prob=dropout_keeprate) 93 | 94 | ### end of VGG 95 | 96 | blobs['coef_32x'] = tf.nn.conv2d(blobs['x37'], self.weights['bases_coef_32xf'], [1,1,1,1], padding='SAME') + self.weights['bases_coef_32xb'] 97 | 98 | # groups parameter for a deconv layer is not supported in tensorflow, hacky solution below 99 | blobs['prediction_32x'] = tf.concat([tf.nn.conv2d_transpose(tf.slice(blobs['coef_32x'], [0, 0, 0, 10*k], [-1, -1, -1, 10]), tf.slice(self.weights['deconv_32xf'], [0, 0, 0, 10*k], [-1, -1, -1, 10]), output_shape=[batch_size, ADAPTED_HEIGHT/8, ADAPTED_WIDTH/8, 1], strides=[1,4,4,1]) for k in range(19)], axis=3) 100 | 101 | 102 | blobs['dil_seg32x_coef'] = tf.nn.conv2d(blobs['x37'], self.weights['dil_seg32x_coeff'], [1,1,1,1], padding='SAME') + self.weights['dil_seg32x_coefb'] 103 | 104 | blobs['dil_seg32x'] = tf.concat([tf.nn.conv2d_transpose(tf.slice(blobs['dil_seg32x_coef'], [0, 0, 0, 10*k], [-1, -1, -1, 10]), tf.slice(self.weights['dil_seg_deconv_32xf'], [0, 0, 0, 10*k], [-1, -1, -1, 10]), output_shape=[batch_size, ADAPTED_HEIGHT/8, ADAPTED_WIDTH/8, 1], strides=[1,4,4,1]) for k in range(19)], axis=3) + self.weights['dil_mask_deconv32sb'] 105 | 106 | blobs['ero_seg32x_coef'] = tf.nn.conv2d(blobs['x37'], self.weights['ero_mask32s_coeff'], [1,1,1,1], padding='SAME') + self.weights['ero_seg32x_coefb'] 107 | 108 | blobs['ero_seg32x'] = tf.concat([tf.nn.conv2d_transpose(tf.slice(blobs['ero_seg32x_coef'], [0, 0, 0, 10*k], [-1, -1, -1, 10]), tf.slice(self.weights['ero_seg_deconv_32xf'], [0, 0, 0, 10*k], [-1, -1, -1, 10]), output_shape=[batch_size, ADAPTED_HEIGHT/8, ADAPTED_WIDTH/8, 1], strides=[1,4,4,1]) for k in range(19)], axis=3) + self.weights['ero_mask_deconv32sb'] 109 | 110 | blobs['coef_16x'] = tf.nn.conv2d(blobs['x30'], self.weights['bases_coef_16sf'], [1,1,1,1], padding='SAME') + self.weights['bases_coef_16xb'] 111 | 112 | blobs['prediction_16x_add'] = tf.concat([tf.nn.conv2d_transpose(tf.slice(blobs['coef_16x'], [0, 0, 0, 10*k], [-1, -1, -1, 10]), tf.slice(self.weights['deconv_16xf'], [0, 0, 0, 10*k], [-1, -1, -1, 10]), output_shape=[batch_size, ADAPTED_HEIGHT/4, ADAPTED_WIDTH/4, 1], strides=[1,4,4,1]) for k in range(19)], axis=3) 113 | 114 | 115 | blobs['prediction_32x_bil_x2'] = tf.concat([tf.nn.conv2d_transpose(tf.slice(blobs['prediction_32x'], [0, 0, 0, k], [-1, -1, -1, 1]), tf.slice(self.weights['dec_prediction_32x_bil_x2f'], [0, 0, 0, k], [-1, -1, -1, 1]), output_shape=[batch_size, ADAPTED_HEIGHT/4, ADAPTED_WIDTH/4, 1], strides=[1,2,2,1]) for k in range(19)], axis=3) 116 | 117 | 118 | blobs['prob_32x'] = tf.nn.softmax(blobs['prediction_32x_bil_x2']) 119 | 120 | blobs['prob_32x_dilate'] = tf.nn.max_pool(blobs['prob_32x'], [1, 17, 17, 1], [1,1,1,1], padding='SAME') 121 | 122 | blobs['neg_prob_32x'] = -blobs['prob_32x'] 123 | 124 | blobs['neg_prob_32x_dilate'] = tf.nn.max_pool(blobs['neg_prob_32x'], [1, 17, 17, 1], [1,1,1,1], padding='SAME') 125 | 126 | blobs['bound_mask32x'] = blobs['prob_32x_dilate'] + blobs['neg_prob_32x_dilate'] 127 | 128 | blobs['pred_16x_aft_DP'] = blobs['bound_mask32x'] * blobs['prediction_16x_add'] 129 | 130 | blobs['coef_8x'] = tf.nn.conv2d(blobs['x23'], self.weights['bases_coef_8xf'], [1,1,1,1], padding='SAME') + self.weights['bases_coef_8xb'] 131 | 132 | blobs['prediction_8x_add'] = tf.concat([tf.nn.conv2d_transpose(tf.slice(blobs['coef_8x'], [0, 0, 0, 10*k], [-1, -1, -1, 10]), tf.slice(self.weights['deconv_8xf'], [0, 0, 0, 10*k], [-1, -1, -1, 10]), output_shape=[batch_size, ADAPTED_HEIGHT/2, ADAPTED_WIDTH/2, 1], strides=[1,4,4,1]) for k in range(19)], axis=3) 133 | 134 | blobs['prediction_16x'] = blobs['prediction_32x_bil_x2'] + blobs['pred_16x_aft_DP'] 135 | 136 | blobs['prediction_16x_bil_x2'] = tf.concat([tf.nn.conv2d_transpose(tf.slice(blobs['prediction_16x'], [0, 0, 0, k], [-1, -1, -1, 1]), tf.slice(self.weights['dec_prediction_16x_bil_x2f'], [0, 0, 0, k], [-1, -1, -1, 1]), output_shape=[batch_size, ADAPTED_HEIGHT/2, ADAPTED_WIDTH/2, 1], strides=[1,2,2,1]) for k in range(19)], axis=3) 137 | 138 | 139 | blobs['prob_16x'] = tf.nn.softmax(blobs['prediction_16x_bil_x2']) 140 | 141 | blobs['prob_16x_dilate'] = tf.nn.max_pool(blobs['prob_16x'], [1, 17, 17, 1], [1,1,1,1], padding='SAME') 142 | 143 | blobs['neg_prob_16x'] = -blobs['prob_16x'] 144 | 145 | blobs['neg_prob_16x_dilate'] = tf.nn.max_pool(blobs['neg_prob_16x'], [1, 17, 17, 1], [1,1,1,1], padding='SAME') 146 | 147 | blobs['bound_mask16x'] = blobs['prob_16x_dilate'] + blobs['neg_prob_16x_dilate'] 148 | 149 | blobs['pred_8x_aft_DP'] = blobs['bound_mask16x'] * blobs['prediction_8x_add'] 150 | 151 | blobs['prediction_8x'] = blobs['prediction_16x_bil_x2'] + blobs['pred_8x_aft_DP'] 152 | 153 | blobs['coef_4x'] = tf.nn.conv2d(blobs['x16'], self.weights['bases_coef_4xf'], [1,1,1,1], padding='SAME') + self.weights['bases_coef_4xb'] 154 | 155 | blobs['prediction_4x_add'] = tf.concat([tf.nn.conv2d_transpose(tf.slice(blobs['coef_4x'], [0, 0, 0, 10*k], [-1, -1, -1, 10]), tf.slice(self.weights['deconv_4xf'], [0, 0, 0, 10*k], [-1, -1, -1, 10]), output_shape=[batch_size, ADAPTED_HEIGHT, ADAPTED_WIDTH, 1], strides=[1,4,4,1]) for k in range(19)], axis=3) 156 | 157 | 158 | blobs['prediction_8x_bil_x2'] = tf.concat([tf.nn.conv2d_transpose(tf.slice(blobs['prediction_8x'], [0, 0, 0, k], [-1, -1, -1, 1]), tf.slice(self.weights['dec_prediction_8x_bil_x2f'], [0, 0, 0, k], [-1, -1, -1, 1]), output_shape=[batch_size, ADAPTED_HEIGHT, ADAPTED_WIDTH, 1], strides=[1,2,2,1]) for k in range(19)], axis=3) 159 | 160 | 161 | blobs['prob_8x'] = tf.nn.softmax(blobs['prediction_8x_bil_x2']) 162 | 163 | blobs['prob_8x_dilate'] = tf.nn.max_pool(blobs['prob_8x'], [1, 17, 17, 1], [1,1,1,1], padding='SAME') 164 | 165 | blobs['neg_prob_8x'] = -blobs['prob_8x'] 166 | 167 | blobs['neg_prob_8x_dilate'] = tf.nn.max_pool(blobs['neg_prob_8x'], [1, 17, 17, 1], [1,1,1,1], padding='SAME') 168 | 169 | blobs['bound_mask8x'] = blobs['prob_8x_dilate'] + blobs['neg_prob_8x_dilate'] 170 | 171 | blobs['pred_4x_aft_DP'] = blobs['bound_mask8x'] * blobs['prediction_4x_add'] 172 | 173 | blobs['prediction_4x'] = blobs['prediction_8x_bil_x2'] + blobs['pred_4x_aft_DP'] 174 | ############# DONE ############### 175 | 176 | return blobs 177 | 178 | # Hacky, but it works. I used to load from numpy files in __init__ 179 | def all_variables(self): 180 | return [('deconv_16xf', (8, 8, 1, 190)), 181 | ('conv3_1b', (256,)), 182 | ('bases_coef_32xb', (190,)), 183 | ('dil_seg32x_coeff', (5, 5, 4096, 190)), 184 | ('conv3_2f', (3, 3, 256, 256)), 185 | ('fc6f', (7, 7, 512, 4096)), 186 | ('conv5_2b', (512,)), 187 | ('conv4_2f', (3, 3, 512, 512)), 188 | ('conv5_3f', (3, 3, 512, 512)), 189 | ('conv4_3f', (3, 3, 512, 512)), 190 | ('bases_coef_16xb', (190,)), 191 | ('dil_seg32x_coefb', (190,)), 192 | ('deconv_32xf', (8, 8, 1, 190)), 193 | ('bases_coef_16sf', (5, 5, 512, 190)), 194 | ('ero_mask32s_coeff', (5, 5, 4096, 190)), 195 | ('conv4_1b', (512,)), 196 | ('bases_coef_32xf', (5, 5, 4096, 190)), 197 | ('conv2_2b', (128,)), 198 | ('conv1_2b', (64,)), 199 | ('conv3_3f', (3, 3, 256, 256)), 200 | ('dec_prediction_8x_bil_x2f', (4, 4, 1, 19)), 201 | ('conv3_2b', (256,)), 202 | ('bases_coef_8xb', (190,)), 203 | ('fc7f', (1, 1, 4096, 4096)), 204 | ('ero_seg_deconv_32xf', (8, 8, 1, 190)), 205 | ('conv2_1b', (128,)), 206 | ('conv1_2f', (3, 3, 64, 64)), 207 | ('fc6b', (4096,)), 208 | ('conv4_2b', (512,)), 209 | ('ero_seg32x_coefb', (190,)), 210 | ('bases_coef_8xf', (5, 5, 512, 190)), 211 | ('conv5_2f', (3, 3, 512, 512)), 212 | ('conv1_1b', (64,)), 213 | ('conv5_1f', (3, 3, 512, 512)), 214 | ('bases_coef_4xb', (190,)), 215 | ('conv3_1f', (3, 3, 128, 256)), 216 | ('dec_prediction_16x_bil_x2f', (4, 4, 1, 19)), 217 | ('conv3_3b', (256,)), 218 | ('conv2_1f', (3, 3, 64, 128)), 219 | ('deconv_4xf', (8, 8, 1, 190)), 220 | ('dil_mask_deconv32sb', (19,)), 221 | ('conv5_3b', (512,)), 222 | ('fc7b', (4096,)), 223 | ('conv2_2f', (3, 3, 128, 128)), 224 | ('conv4_3b', (512,)), 225 | ('conv5_1b', (512,)), 226 | ('bases_coef_4xf', (5, 5, 256, 190)), 227 | ('conv4_1f', (3, 3, 256, 512)), 228 | ('dec_prediction_32x_bil_x2f', (4, 4, 1, 19)), 229 | ('deconv_8xf', (8, 8, 1, 190)), 230 | ('conv1_1f', (3, 3, 3, 64)), 231 | ('dil_seg_deconv_32xf', (8, 8, 1, 190)), 232 | ('ero_mask_deconv32sb', (19,))] -------------------------------------------------------------------------------- /models/flownet2.py: -------------------------------------------------------------------------------- 1 | import glob, os 2 | import numpy as np 3 | import tensorflow as tf 4 | import tensorflow.contrib.graph_editor as ge 5 | 6 | class Flownet2: 7 | def __init__(self, bilinear_warping_module): 8 | self.weights = dict() 9 | 10 | for key, shape in self.all_variables(): 11 | self.weights[key] = tf.get_variable(key, shape=shape) 12 | 13 | self.bilinear_warping_module = bilinear_warping_module 14 | 15 | def leaky_relu(self, x, s): 16 | assert s > 0 and s < 1, "Wrong s" 17 | return tf.maximum(x, s*x) 18 | 19 | def warp(self, x, flow): 20 | return self.bilinear_warping_module.bilinear_warping(x, tf.stack([flow[:,:,:,1], flow[:,:,:,0]], axis=3)) 21 | 22 | # flip true -> [:,:,:,0] y axis downwards 23 | # [:,:,:,1] x axis 24 | # as in matrix indexing 25 | # 26 | # false returns 0->x, 1->y 27 | def __call__(self, im0, im1, flip=True): 28 | f = self.get_blobs(im0, im1)['predict_flow_final'] 29 | if flip: 30 | f = tf.stack([f[:,:,:,1], f[:,:,:,0]], axis=3) 31 | return f 32 | 33 | def get_optimizer(self, flow, target, learning_rate=1e-4): 34 | #flow = self.__call__(im0, im1) 35 | loss = tf.reduce_sum(flow * target) # target holding the gradients! 36 | opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.95, beta2=0.99, epsilon=1e-8) 37 | opt = opt.minimize(loss, var_list= 38 | # [v for k,v in self.weights.iteritems() if (k.startswith('net3_') or k.startswith('netsd_') or k.startswith('fuse_'))]) 39 | [v for k,v in self.weights.iteritems() if ((k.startswith('net3_') or k.startswith('netsd_') or k.startswith('fuse_')) and not ('upsample' in k or 'deconv' in k))]) 40 | return opt, loss 41 | 42 | # If I run the network with large images (1024x2048) it crashes due to memory 43 | # constraints on a 12Gb titan X. 44 | # See https://github.com/tensorflow/tensorflow/issues/5816#issuecomment-268710077 45 | # for a possible explanation. I fix it by adding run_after in the section with 46 | # the correlation layer so that 441 large tensors are not allocated at the same time 47 | def run_after(self, a_tensor, b_tensor): 48 | """Force a to run after b""" 49 | ge.reroute.add_control_inputs(a_tensor.op, [b_tensor.op]) 50 | 51 | # without epsilon I get nan-errors when I backpropagate 52 | def l2_norm(self, x): 53 | return tf.sqrt(tf.maximum(1e-5, tf.reduce_sum(x**2, axis=3, keep_dims=True))) 54 | 55 | def get_blobs(self, im0, im1): 56 | blobs = dict() 57 | 58 | batch_size = tf.to_int32(tf.shape(im0)[0]) 59 | width = tf.to_int32(tf.shape(im0)[2]) 60 | height = tf.to_int32(tf.shape(im0)[1]) 61 | TARGET_WIDTH = width 62 | TARGET_HEIGHT = height 63 | 64 | divisor = 64. 65 | ADAPTED_WIDTH = tf.to_int32(tf.ceil(tf.to_float(width)/divisor) * divisor) 66 | ADAPTED_HEIGHT = tf.to_int32(tf.ceil(tf.to_float(height)/divisor) * divisor) 67 | 68 | SCALE_WIDTH = tf.to_float(width) / tf.to_float(ADAPTED_WIDTH); 69 | SCALE_HEIGHT = tf.to_float(height) / tf.to_float(ADAPTED_HEIGHT); 70 | 71 | blobs['img0'] = im0 72 | blobs['img1'] = im1 73 | 74 | blobs['img0s'] = blobs['img0']*0.00392156862745098 75 | blobs['img1s'] = blobs['img1']*0.00392156862745098 76 | 77 | #mean = np.array([0.411451, 0.432060, 0.450141]) 78 | mean = np.array([0.37655231, 0.39534855, 0.40119368]) 79 | blobs['img0_nomean'] = blobs['img0s'] - mean 80 | blobs['img1_nomean'] = blobs['img1s'] - mean 81 | 82 | blobs['img0_nomean_resize'] = tf.image.resize_bilinear(blobs['img0_nomean'], size=[ADAPTED_HEIGHT, ADAPTED_WIDTH], align_corners=True) 83 | blobs['img1_nomean_resize'] = tf.image.resize_bilinear(blobs['img1_nomean'], size=[ADAPTED_HEIGHT, ADAPTED_WIDTH], align_corners=True) 84 | 85 | blobs['conv1a'] = tf.pad(blobs['img0_nomean_resize'], [[0,0], [3,3], [3,3], [0,0]]) 86 | blobs['conv1a'] = tf.nn.conv2d(blobs['conv1a'], self.weights['conv1_w'], strides=[1,2,2,1], padding="VALID") + self.weights['conv1_b'] 87 | blobs['conv1a'] = self.leaky_relu(blobs['conv1a'], 0.1) 88 | 89 | blobs['conv1b'] = tf.pad(blobs['img1_nomean_resize'], [[0,0], [3,3], [3,3], [0,0]]) 90 | blobs['conv1b'] = tf.nn.conv2d(blobs['conv1b'], self.weights['conv1_w'], strides=[1,2,2,1], padding="VALID") + self.weights['conv1_b'] 91 | blobs['conv1b'] = self.leaky_relu(blobs['conv1b'], 0.1) 92 | 93 | blobs['conv2a'] = tf.pad(blobs['conv1a'], [[0,0], [2,2], [2,2], [0,0]]) 94 | blobs['conv2a'] = tf.nn.conv2d(blobs['conv2a'], self.weights['conv2_w'], strides=[1,2,2,1], padding="VALID") + self.weights['conv2_b'] 95 | blobs['conv2a'] = self.leaky_relu(blobs['conv2a'], 0.1) 96 | 97 | blobs['conv2b'] = tf.pad(blobs['conv1b'], [[0,0], [2,2], [2,2], [0,0]]) 98 | blobs['conv2b'] = tf.nn.conv2d(blobs['conv2b'], self.weights['conv2_w'], strides=[1,2,2,1], padding="VALID") + self.weights['conv2_b'] 99 | blobs['conv2b'] = self.leaky_relu(blobs['conv2b'], 0.1) 100 | 101 | blobs['conv3a'] = tf.pad(blobs['conv2a'], [[0,0], [2,2], [2,2], [0,0]]) 102 | blobs['conv3a'] = tf.nn.conv2d(blobs['conv3a'], self.weights['conv3_w'], strides=[1,2,2,1], padding="VALID") + self.weights['conv3_b'] 103 | blobs['conv3a'] = self.leaky_relu(blobs['conv3a'], 0.1) 104 | 105 | blobs['conv3b'] = tf.pad(blobs['conv2b'], [[0,0], [2,2], [2,2], [0,0]]) 106 | blobs['conv3b'] = tf.nn.conv2d(blobs['conv3b'], self.weights['conv3_w'], strides=[1,2,2,1], padding="VALID") + self.weights['conv3_b'] 107 | blobs['conv3b'] = self.leaky_relu(blobs['conv3b'], 0.1) 108 | 109 | # this might be considered a bit hacky 110 | tmp = [] 111 | x1_l = [] 112 | x2_l = [] 113 | for di in range(-20, 21, 2): 114 | for dj in range(-20, 21, 2): 115 | x1 = tf.pad(blobs['conv3a'], [[0,0], [20,20], [20,20], [0,0]]) 116 | x2 = tf.pad(blobs['conv3b'], [[0,0], [20-di,20+di], [20-dj,20+dj], [0,0]]) 117 | x1_l.append(x1) 118 | x2_l.append(x2) 119 | c = tf.nn.conv2d(x1*x2, tf.ones([1, 1, 256, 1])/256., strides=[1,1,1,1], padding='VALID') 120 | tmp.append(c[:,20:-20,20:-20,:]) 121 | for i in range(len(tmp)-1): 122 | #self.run_after(tmp[i], tmp[i+1]) 123 | self.run_after(x1_l[i], tmp[i+1]) 124 | self.run_after(x2_l[i], tmp[i+1]) 125 | blobs['corr'] = tf.concat(tmp, axis=3) 126 | blobs['corr'] = self.leaky_relu(blobs['corr'], 0.1) 127 | 128 | blobs['conv_redir'] = tf.nn.conv2d(blobs['conv3a'], self.weights['conv_redir_w'], strides=[1,1,1,1], padding="VALID") + self.weights['conv_redir_b'] 129 | blobs['conv_redir'] = self.leaky_relu(blobs['conv_redir'], 0.1) 130 | 131 | blobs['blob16'] = tf.concat([blobs['conv_redir'], blobs['corr']], axis=3) 132 | 133 | blobs['conv3_1'] = tf.nn.conv2d(blobs['blob16'], self.weights['conv3_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['conv3_1_b'] 134 | blobs['conv3_1'] = self.leaky_relu(blobs['conv3_1'], 0.1) 135 | 136 | blobs['conv4'] = tf.pad(blobs['conv3_1'], [[0,0], [1,1], [1,1], [0,0]]) 137 | blobs['conv4'] = tf.nn.conv2d(blobs['conv4'], self.weights['conv4_w'], strides=[1,2,2,1], padding="VALID") + self.weights['conv4_b'] 138 | blobs['conv4'] = self.leaky_relu(blobs['conv4'], 0.1) 139 | 140 | blobs['conv4_1'] = tf.nn.conv2d(blobs['conv4'], self.weights['conv4_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['conv4_1_b'] 141 | blobs['conv4_1'] = self.leaky_relu(blobs['conv4_1'], 0.1) 142 | 143 | blobs['conv5'] = tf.pad(blobs['conv4_1'], [[0,0], [1,1], [1,1], [0,0]]) 144 | blobs['conv5'] = tf.nn.conv2d(blobs['conv5'], self.weights['conv5_w'], strides=[1,2,2,1], padding="VALID") + self.weights['conv5_b'] 145 | blobs['conv5'] = self.leaky_relu(blobs['conv5'], 0.1) 146 | 147 | blobs['conv5_1'] = tf.nn.conv2d(blobs['conv5'], self.weights['conv5_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['conv5_1_b'] 148 | blobs['conv5_1'] = self.leaky_relu(blobs['conv5_1'], 0.1) 149 | 150 | blobs['conv6'] = tf.pad(blobs['conv5_1'], [[0,0], [1,1], [1,1], [0,0]]) 151 | blobs['conv6'] = tf.nn.conv2d(blobs['conv6'], self.weights['conv6_w'], strides=[1,2,2,1], padding="VALID") + self.weights['conv6_b'] 152 | blobs['conv6'] = self.leaky_relu(blobs['conv6'], 0.1) 153 | 154 | blobs['conv6_1'] = tf.nn.conv2d(blobs['conv6'], self.weights['conv6_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['conv6_1_b'] 155 | blobs['conv6_1'] = self.leaky_relu(blobs['conv6_1'], 0.1) 156 | 157 | blobs['predict_flow6'] = tf.nn.conv2d(blobs['conv6_1'], self.weights['Convolution1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['Convolution1_b'] 158 | 159 | blobs['deconv5'] = tf.nn.conv2d_transpose(blobs['conv6_1'], self.weights['deconv5_w'], output_shape=[batch_size, ADAPTED_HEIGHT/32, ADAPTED_WIDTH/32, 512], strides=[1,2,2,1]) + self.weights['deconv5_b'] 160 | blobs['deconv5'] = self.leaky_relu(blobs['deconv5'], 0.1) 161 | 162 | blobs['upsampled_flow6_to_5'] = tf.nn.conv2d_transpose(blobs['predict_flow6'], self.weights['upsample_flow6to5_w'], output_shape=[batch_size, ADAPTED_HEIGHT/32, ADAPTED_WIDTH/32, 2], strides=[1,2,2,1]) + self.weights['upsample_flow6to5_b'] 163 | 164 | blobs['concat5'] = tf.concat([blobs['conv5_1'], blobs['deconv5'], blobs['upsampled_flow6_to_5']], axis=3) 165 | 166 | blobs['predict_flow5'] = tf.pad(blobs['concat5'], [[0,0], [1,1], [1,1], [0,0]]) 167 | blobs['predict_flow5'] = tf.nn.conv2d(blobs['predict_flow5'], self.weights['Convolution2_w'], strides=[1,1,1,1], padding="VALID") + self.weights['Convolution2_b'] 168 | 169 | blobs['deconv4'] = tf.nn.conv2d_transpose(blobs['concat5'], self.weights['deconv4_w'], output_shape=[batch_size, ADAPTED_HEIGHT/16, ADAPTED_WIDTH/16, 256], strides=[1,2,2,1]) + self.weights['deconv4_b'] 170 | blobs['deconv4'] = self.leaky_relu(blobs['deconv4'], 0.1) 171 | 172 | blobs['upsampled_flow5_to_4'] = tf.nn.conv2d_transpose(blobs['predict_flow5'], self.weights['upsample_flow5to4_w'], output_shape=[batch_size, ADAPTED_HEIGHT/16, ADAPTED_WIDTH/16, 2], strides=[1,2,2,1]) + self.weights['upsample_flow5to4_b'] 173 | 174 | blobs['concat4'] = tf.concat([blobs['conv4_1'], blobs['deconv4'], blobs['upsampled_flow5_to_4']], axis=3) 175 | 176 | blobs['predict_flow4'] = tf.nn.conv2d(blobs['concat4'], self.weights['Convolution3_w'], strides=[1,1,1,1], padding="SAME") + self.weights['Convolution3_b'] 177 | 178 | blobs['deconv3'] = tf.nn.conv2d_transpose(blobs['concat4'], self.weights['deconv3_w'], output_shape=[batch_size, ADAPTED_HEIGHT/8, ADAPTED_WIDTH/8, 128], strides=[1,2,2,1]) + self.weights['deconv3_b'] 179 | blobs['deconv3'] = self.leaky_relu(blobs['deconv3'], 0.1) 180 | 181 | blobs['upsampled_flow4_to_3'] = tf.nn.conv2d_transpose(blobs['predict_flow4'], self.weights['upsample_flow4to3_w'], output_shape=[batch_size, ADAPTED_HEIGHT/8, ADAPTED_WIDTH/8, 2], strides=[1,2,2,1]) + self.weights['upsample_flow4to3_b'] 182 | 183 | blobs['concat3'] = tf.concat([blobs['conv3_1'], blobs['deconv3'], blobs['upsampled_flow4_to_3']], axis=3) 184 | 185 | blobs['predict_flow3'] = tf.nn.conv2d(blobs['concat3'], self.weights['Convolution4_w'], strides=[1,1,1,1], padding="SAME") + self.weights['Convolution4_b'] 186 | 187 | blobs['deconv2'] = tf.nn.conv2d_transpose(blobs['concat3'], self.weights['deconv2_w'], output_shape=[batch_size, ADAPTED_HEIGHT/4, ADAPTED_WIDTH/4, 64], strides=[1,2,2,1]) + self.weights['deconv2_b'] 188 | blobs['deconv2'] = self.leaky_relu(blobs['deconv2'], 0.1) 189 | 190 | blobs['upsampled_flow3_to_2'] = tf.nn.conv2d_transpose(blobs['predict_flow3'], self.weights['upsample_flow3to2_w'], output_shape=[batch_size, ADAPTED_HEIGHT/4, ADAPTED_WIDTH/4, 2], strides=[1,2,2,1]) + self.weights['upsample_flow3to2_b'] 191 | 192 | blobs['concat2'] = tf.concat([blobs['conv2a'], blobs['deconv2'], blobs['upsampled_flow3_to_2']], axis=3) 193 | 194 | blobs['predict_flow2'] = tf.nn.conv2d(blobs['concat2'], self.weights['Convolution5_w'], strides=[1,1,1,1], padding="SAME") + self.weights['Convolution5_b'] 195 | 196 | blobs['blob41'] = blobs['predict_flow2'] * 20. 197 | 198 | blobs['blob42'] = tf.image.resize_bilinear(blobs['blob41'], size=[ADAPTED_HEIGHT, ADAPTED_WIDTH], align_corners=True) 199 | 200 | blobs['blob43'] = self.warp(blobs['img1_nomean_resize'], blobs['blob42']) 201 | 202 | blobs['blob44'] = blobs['img0_nomean_resize'] - blobs['blob43'] 203 | 204 | #blobs['blob45'] = tf.sqrt(1e-8+tf.reduce_sum(blobs['blob44']**2, axis=3, keep_dims=True)) 205 | blobs['blob45'] = self.l2_norm(blobs['blob44']) 206 | 207 | blobs['blob46'] = 0.05*blobs['blob42'] 208 | 209 | blobs['blob47'] = tf.concat([blobs['img0_nomean_resize'], blobs['img1_nomean_resize'], blobs['blob43'], blobs['blob46'], blobs['blob45']], axis=3) 210 | #################################################################################### 211 | #################################################################################### 212 | #################################################################################### 213 | ###################### END OF THE FIRST BRANCH ##################################### 214 | #################################################################################### 215 | #################################################################################### 216 | #################################################################################### 217 | 218 | 219 | 220 | blobs['blob48'] = tf.pad(blobs['blob47'], [[0,0], [3,3], [3,3], [0,0]]) 221 | blobs['blob48'] = tf.nn.conv2d(blobs['blob48'], self.weights['net2_conv1_w'], strides=[1,2,2,1], padding="VALID") + self.weights['net2_conv1_b'] 222 | blobs['blob48'] = self.leaky_relu(blobs['blob48'], 0.1) 223 | 224 | blobs['blob49'] = tf.pad(blobs['blob48'], [[0,0], [2,2], [2, 2], [0,0]]) 225 | blobs['blob49'] = tf.nn.conv2d(blobs['blob49'], self.weights['net2_conv2_w'], strides=[1,2,2,1], padding="VALID") + self.weights['net2_conv2_b'] 226 | blobs['blob49'] = self.leaky_relu(blobs['blob49'], 0.1) 227 | 228 | blobs['blob50'] = tf.pad(blobs['blob49'], [[0,0], [2,2], [2,2], [0,0]]) 229 | blobs['blob50'] = tf.nn.conv2d(blobs['blob50'], self.weights['net2_conv3_w'], strides=[1,2,2,1], padding="VALID") + self.weights['net2_conv3_b'] 230 | blobs['blob50'] = self.leaky_relu(blobs['blob50'], 0.1) 231 | 232 | blobs['blob51'] = tf.nn.conv2d(blobs['blob50'], self.weights['net2_conv3_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net2_conv3_1_b'] 233 | blobs['blob51'] = self.leaky_relu(blobs['blob51'], 0.1) 234 | 235 | blobs['blob52'] = tf.pad(blobs['blob51'], [[0,0], [1,1], [1,1], [0,0]]) 236 | blobs['blob52'] = tf.nn.conv2d(blobs['blob52'], self.weights['net2_conv4_w'], strides=[1,2,2,1], padding="VALID") + self.weights['net2_conv4_b'] 237 | blobs['blob52'] = self.leaky_relu(blobs['blob52'], 0.1) 238 | 239 | blobs['blob53'] = tf.nn.conv2d(blobs['blob52'], self.weights['net2_conv4_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net2_conv4_1_b'] 240 | blobs['blob53'] = self.leaky_relu(blobs['blob53'], 0.1) 241 | 242 | blobs['blob54'] = tf.pad(blobs['blob53'], [[0,0], [1,1], [1,1], [0,0]]) 243 | blobs['blob54'] = tf.nn.conv2d(blobs['blob54'], self.weights['net2_conv5_w'], strides=[1,2,2,1], padding="VALID") + self.weights['net2_conv5_b'] 244 | blobs['blob54'] = self.leaky_relu(blobs['blob54'], 0.1) 245 | 246 | blobs['blob55'] = tf.nn.conv2d(blobs['blob54'], self.weights['net2_conv5_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net2_conv5_1_b'] 247 | blobs['blob55'] = self.leaky_relu(blobs['blob55'], 0.1) 248 | 249 | blobs['blob56'] = tf.pad(blobs['blob55'], [[0,0], [1,1], [1,1], [0,0]]) 250 | blobs['blob56'] = tf.nn.conv2d(blobs['blob56'], self.weights['net2_conv6_w'], strides=[1,2,2,1], padding="VALID") + self.weights['net2_conv6_b'] 251 | blobs['blob56'] = self.leaky_relu(blobs['blob56'], 0.1) 252 | 253 | blobs['blob57'] = tf.nn.conv2d(blobs['blob56'], self.weights['net2_conv6_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net2_conv6_1_b'] 254 | blobs['blob57'] = self.leaky_relu(blobs['blob57'], 0.1) 255 | 256 | blobs['blob58'] = tf.nn.conv2d(blobs['blob57'], self.weights['net2_predict_conv6_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net2_predict_conv6_b'] 257 | 258 | blobs['blob59'] = tf.nn.conv2d_transpose(blobs['blob57'], self.weights['net2_deconv5_w'], output_shape=[batch_size, ADAPTED_HEIGHT/32, ADAPTED_WIDTH/32, 512], strides=[1,2,2,1]) + self.weights['net2_deconv5_b'] 259 | blobs['blob59'] = self.leaky_relu(blobs['blob59'], 0.1) 260 | 261 | blobs['blob60'] = tf.nn.conv2d_transpose(blobs['predict_flow6'], self.weights['net2_net2_upsample_flow6to5_w'], output_shape=[batch_size, ADAPTED_HEIGHT/32, ADAPTED_WIDTH/32, 2], strides=[1,2,2,1]) + self.weights['net2_net2_upsample_flow6to5_b'] 262 | 263 | blobs['blob61'] = tf.concat([blobs['blob55'], blobs['blob59'], blobs['blob60']], axis=3) 264 | 265 | blobs['blob62'] = tf.nn.conv2d(blobs['blob61'], self.weights['net2_predict_conv5_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net2_predict_conv5_b'] 266 | 267 | blobs['blob63'] = tf.nn.conv2d_transpose(blobs['blob61'], self.weights['net2_deconv4_w'], output_shape=[batch_size, ADAPTED_HEIGHT/16, ADAPTED_WIDTH/16, 256], strides=[1,2,2,1]) + self.weights['net2_deconv4_b'] 268 | blobs['blob63'] = self.leaky_relu(blobs['blob63'], 0.1) 269 | 270 | blobs['blob64'] = tf.nn.conv2d_transpose(blobs['blob62'], self.weights['net2_net2_upsample_flow5to4_w'], output_shape=[batch_size, ADAPTED_HEIGHT/16, ADAPTED_WIDTH/16, 2], strides=[1,2,2,1]) + self.weights['net2_net2_upsample_flow5to4_b'] 271 | 272 | blobs['blob65'] = tf.concat([blobs['blob53'], blobs['blob63'], blobs['blob64']], axis=3) 273 | 274 | blobs['blob66'] = tf.nn.conv2d(blobs['blob65'], self.weights['net2_predict_conv4_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net2_predict_conv4_b'] 275 | 276 | blobs['blob67'] = tf.nn.conv2d_transpose(blobs['blob65'], self.weights['net2_deconv3_w'], output_shape=[batch_size, ADAPTED_HEIGHT/8, ADAPTED_WIDTH/8, 128], strides=[1,2,2,1]) + self.weights['net2_deconv3_b'] 277 | blobs['blob67'] = self.leaky_relu(blobs['blob67'], 0.1) 278 | 279 | blobs['blob68'] = tf.nn.conv2d_transpose(blobs['blob66'], self.weights['net2_net2_upsample_flow4to3_w'], output_shape=[batch_size, ADAPTED_HEIGHT/8, ADAPTED_WIDTH/8, 2], strides=[1,2,2,1]) + self.weights['net2_net2_upsample_flow4to3_b'] 280 | 281 | blobs['blob69'] = tf.concat([blobs['blob51'], blobs['blob67'], blobs['blob68']], axis=3) 282 | 283 | blobs['blob70'] = tf.nn.conv2d(blobs['blob69'], self.weights['net2_predict_conv3_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net2_predict_conv3_b'] 284 | 285 | blobs['blob71'] = tf.nn.conv2d_transpose(blobs['blob69'], self.weights['net2_deconv2_w'], output_shape=[batch_size, ADAPTED_HEIGHT/4, ADAPTED_WIDTH/4, 64], strides=[1,2,2,1]) + self.weights['net2_deconv2_b'] 286 | blobs['blob71'] = self.leaky_relu(blobs['blob71'], 0.1) 287 | 288 | blobs['blob72'] = tf.nn.conv2d_transpose(blobs['blob70'], self.weights['net2_net2_upsample_flow3to2_w'], output_shape=[batch_size, ADAPTED_HEIGHT/4, ADAPTED_WIDTH/4, 2], strides=[1,2,2,1]) + self.weights['net2_net2_upsample_flow3to2_b'] 289 | 290 | blobs['blob73'] = tf.concat([blobs['blob49'], blobs['blob71'], blobs['blob72']], axis=3) 291 | 292 | blobs['blob74'] = tf.nn.conv2d(blobs['blob73'], self.weights['net2_predict_conv2_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net2_predict_conv2_b'] 293 | 294 | blobs['blob75'] = blobs['blob74'] * 20. 295 | 296 | blobs['blob76'] = tf.image.resize_bilinear(blobs['blob75'], size=[ADAPTED_HEIGHT, ADAPTED_WIDTH], align_corners=True) 297 | 298 | blobs['blob77'] = self.warp(blobs['img1_nomean_resize'], blobs['blob76']) 299 | 300 | blobs['blob78'] = blobs['img0_nomean_resize'] - blobs['blob77'] 301 | 302 | #blobs['blob79'] = tf.sqrt(1e-8+tf.reduce_sum(blobs['blob78']**2, axis=3, keep_dims=True)) 303 | blobs['blob79'] = self.l2_norm(blobs['blob78']) 304 | 305 | blobs['blob80'] = 0.05*blobs['blob76'] 306 | 307 | blobs['blob81'] = tf.concat([blobs['img0_nomean_resize'], blobs['img1_nomean_resize'], blobs['blob77'], blobs['blob80'], blobs['blob79']], axis=3) 308 | 309 | #################################################################################### 310 | #################################################################################### 311 | #################################################################################### 312 | ###################### END OF THE SECOND BRANCH #################################### 313 | #################################################################################### 314 | #################################################################################### 315 | #################################################################################### 316 | 317 | 318 | blobs['blob82'] = tf.pad(blobs['blob81'], [[0,0], [3,3], [3,3], [0,0]]) 319 | blobs['blob82'] = tf.nn.conv2d(blobs['blob82'], self.weights['net3_conv1_w'], strides=[1,2,2,1], padding="VALID") + self.weights['net3_conv1_b'] 320 | blobs['blob82'] = self.leaky_relu(blobs['blob82'], 0.1) 321 | 322 | blobs['blob83'] = tf.pad(blobs['blob82'], [[0,0], [2,2], [2, 2], [0,0]]) 323 | blobs['blob83'] = tf.nn.conv2d(blobs['blob83'], self.weights['net3_conv2_w'], strides=[1,2,2,1], padding="VALID") + self.weights['net3_conv2_b'] 324 | blobs['blob83'] = self.leaky_relu(blobs['blob83'], 0.1) 325 | 326 | blobs['blob84'] = tf.pad(blobs['blob83'], [[0,0], [2,2], [2,2], [0,0]]) 327 | blobs['blob84'] = tf.nn.conv2d(blobs['blob84'], self.weights['net3_conv3_w'], strides=[1,2,2,1], padding="VALID") + self.weights['net3_conv3_b'] 328 | blobs['blob84'] = self.leaky_relu(blobs['blob84'], 0.1) 329 | 330 | blobs['blob85'] = tf.nn.conv2d(blobs['blob84'], self.weights['net3_conv3_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net3_conv3_1_b'] 331 | blobs['blob85'] = self.leaky_relu(blobs['blob85'], 0.1) 332 | 333 | blobs['blob86'] = tf.pad(blobs['blob85'], [[0,0], [1,1], [1,1], [0,0]]) 334 | blobs['blob86'] = tf.nn.conv2d(blobs['blob86'], self.weights['net3_conv4_w'], strides=[1,2,2,1], padding="VALID") + self.weights['net3_conv4_b'] 335 | blobs['blob86'] = self.leaky_relu(blobs['blob86'], 0.1) 336 | 337 | blobs['blob87'] = tf.nn.conv2d(blobs['blob86'], self.weights['net3_conv4_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net3_conv4_1_b'] 338 | blobs['blob87'] = self.leaky_relu(blobs['blob87'], 0.1) 339 | 340 | blobs['blob88'] = tf.pad(blobs['blob87'], [[0,0], [1,1], [1,1], [0,0]]) 341 | blobs['blob88'] = tf.nn.conv2d(blobs['blob88'], self.weights['net3_conv5_w'], strides=[1,2,2,1], padding="VALID") + self.weights['net3_conv5_b'] 342 | blobs['blob88'] = self.leaky_relu(blobs['blob88'], 0.1) 343 | 344 | blobs['blob89'] = tf.nn.conv2d(blobs['blob88'], self.weights['net3_conv5_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net3_conv5_1_b'] 345 | blobs['blob89'] = self.leaky_relu(blobs['blob89'], 0.1) 346 | 347 | blobs['blob90'] = tf.pad(blobs['blob89'], [[0,0], [1,1], [1,1], [0,0]]) 348 | blobs['blob90'] = tf.nn.conv2d(blobs['blob90'], self.weights['net3_conv6_w'], strides=[1,2,2,1], padding="VALID") + self.weights['net3_conv6_b'] 349 | blobs['blob90'] = self.leaky_relu(blobs['blob90'], 0.1) 350 | 351 | blobs['blob91'] = tf.nn.conv2d(blobs['blob90'], self.weights['net3_conv6_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net3_conv6_1_b'] 352 | blobs['blob91'] = self.leaky_relu(blobs['blob91'], 0.1) 353 | 354 | blobs['blob92'] = tf.nn.conv2d(blobs['blob91'], self.weights['net3_predict_conv6_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net3_predict_conv6_b'] 355 | 356 | blobs['blob93'] = tf.nn.conv2d_transpose(blobs['blob91'], self.weights['net3_deconv5_w'], output_shape=[batch_size, ADAPTED_HEIGHT/32, ADAPTED_WIDTH/32, 512], strides=[1,2,2,1]) + self.weights['net3_deconv5_b'] 357 | blobs['blob93'] = self.leaky_relu(blobs['blob93'], 0.1) 358 | 359 | blobs['blob94'] = tf.nn.conv2d_transpose(blobs['blob92'], self.weights['net3_net3_upsample_flow6to5_w'], output_shape=[batch_size, ADAPTED_HEIGHT/32, ADAPTED_WIDTH/32, 2], strides=[1,2,2,1]) + self.weights['net3_net3_upsample_flow6to5_b'] 360 | 361 | blobs['blob95'] = tf.concat([blobs['blob89'], blobs['blob93'], blobs['blob94']], axis=3) 362 | 363 | blobs['blob96'] = tf.nn.conv2d(blobs['blob95'], self.weights['net3_predict_conv5_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net3_predict_conv5_b'] 364 | 365 | blobs['blob97'] = tf.nn.conv2d_transpose(blobs['blob95'], self.weights['net3_deconv4_w'], output_shape=[batch_size, ADAPTED_HEIGHT/16, ADAPTED_WIDTH/16, 256], strides=[1,2,2,1]) + self.weights['net3_deconv4_b'] 366 | blobs['blob97'] = self.leaky_relu(blobs['blob97'], 0.1) 367 | 368 | blobs['blob98'] = tf.nn.conv2d_transpose(blobs['blob96'], self.weights['net3_net3_upsample_flow5to4_w'], output_shape=[batch_size, ADAPTED_HEIGHT/16, ADAPTED_WIDTH/16, 2], strides=[1,2,2,1]) + self.weights['net3_net3_upsample_flow5to4_b'] 369 | 370 | blobs['blob99'] = tf.concat([blobs['blob87'], blobs['blob97'], blobs['blob98']], axis=3) 371 | 372 | blobs['blob100'] = tf.nn.conv2d(blobs['blob99'], self.weights['net3_predict_conv4_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net3_predict_conv4_b'] 373 | 374 | blobs['blob101'] = tf.nn.conv2d_transpose(blobs['blob99'], self.weights['net3_deconv3_w'], output_shape=[batch_size, ADAPTED_HEIGHT/8, ADAPTED_WIDTH/8, 128], strides=[1,2,2,1]) + self.weights['net3_deconv3_b'] 375 | blobs['blob101'] = self.leaky_relu(blobs['blob101'], 0.1) 376 | 377 | blobs['blob102'] = tf.nn.conv2d_transpose(blobs['blob100'], self.weights['net3_net3_upsample_flow4to3_w'], output_shape=[batch_size, ADAPTED_HEIGHT/8, ADAPTED_WIDTH/8, 2], strides=[1,2,2,1]) + self.weights['net3_net3_upsample_flow4to3_b'] 378 | 379 | blobs['blob103'] = tf.concat([blobs['blob85'], blobs['blob101'], blobs['blob102']], axis=3) 380 | 381 | blobs['blob104'] = tf.nn.conv2d(blobs['blob103'], self.weights['net3_predict_conv3_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net3_predict_conv3_b'] 382 | 383 | blobs['blob105'] = tf.nn.conv2d_transpose(blobs['blob103'], self.weights['net3_deconv2_w'], output_shape=[batch_size, ADAPTED_HEIGHT/4, ADAPTED_WIDTH/4, 64], strides=[1,2,2,1]) + self.weights['net3_deconv2_b'] 384 | blobs['blob105'] = self.leaky_relu(blobs['blob105'], 0.1) 385 | 386 | blobs['blob106'] = tf.nn.conv2d_transpose(blobs['blob104'], self.weights['net3_net3_upsample_flow3to2_w'], output_shape=[batch_size, ADAPTED_HEIGHT/4, ADAPTED_WIDTH/4, 2], strides=[1,2,2,1]) + self.weights['net3_net3_upsample_flow3to2_b'] 387 | 388 | blobs['blob107'] = tf.concat([blobs['blob83'], blobs['blob105'], blobs['blob106']], axis=3) 389 | 390 | blobs['blob108'] = tf.nn.conv2d(blobs['blob107'], self.weights['net3_predict_conv2_w'], strides=[1,1,1,1], padding="SAME") + self.weights['net3_predict_conv2_b'] 391 | 392 | blobs['blob109'] = blobs['blob108'] * 20. 393 | 394 | #################################################################################### 395 | #################################################################################### 396 | #################################################################################### 397 | ###################### END OF THE THIRD BRANCH #################################### 398 | #################################################################################### 399 | #################################################################################### 400 | #################################################################################### 401 | 402 | blobs['blob110'] = tf.concat([blobs['img0_nomean_resize'], blobs['img1_nomean_resize']], axis=3) 403 | #self.run_after(blobs['blob110'], blobs['blob109']) 404 | 405 | blobs['blob111'] = tf.nn.conv2d(blobs['blob110'], self.weights['netsd_conv0_w'], strides=[1,1,1,1], padding="SAME") + self.weights['netsd_conv0_b'] 406 | blobs['blob111'] = self.leaky_relu(blobs['blob111'], 0.1) 407 | 408 | blobs['blob112'] = tf.pad(blobs['blob111'], [[0,0], [1,1], [1,1], [0,0]]) 409 | blobs['blob112'] = tf.nn.conv2d(blobs['blob112'], self.weights['netsd_conv1_w'], strides=[1,2,2,1], padding="VALID") + self.weights['netsd_conv1_b'] 410 | blobs['blob112'] = self.leaky_relu(blobs['blob112'], 0.1) 411 | 412 | blobs['blob113'] = tf.nn.conv2d(blobs['blob112'], self.weights['netsd_conv1_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['netsd_conv1_1_b'] 413 | blobs['blob113'] = self.leaky_relu(blobs['blob113'], 0.1) 414 | 415 | blobs['blob114'] = tf.pad(blobs['blob113'], [[0,0], [1,1], [1,1], [0,0]]) 416 | blobs['blob114'] = tf.nn.conv2d(blobs['blob114'], self.weights['netsd_conv2_w'], strides=[1,2,2,1], padding="VALID") + self.weights['netsd_conv2_b'] 417 | blobs['blob114'] = self.leaky_relu(blobs['blob114'], 0.1) 418 | 419 | blobs['blob115'] = tf.nn.conv2d(blobs['blob114'], self.weights['netsd_conv2_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['netsd_conv2_1_b'] 420 | blobs['blob115'] = self.leaky_relu(blobs['blob115'], 0.1) 421 | 422 | blobs['blob116'] = tf.pad(blobs['blob115'], [[0,0], [1,1], [1,1], [0,0]]) 423 | blobs['blob116'] = tf.nn.conv2d(blobs['blob116'], self.weights['netsd_conv3_w'], strides=[1,2,2,1], padding="VALID") + self.weights['netsd_conv3_b'] 424 | blobs['blob116'] = self.leaky_relu(blobs['blob116'], 0.1) 425 | 426 | blobs['blob117'] = tf.nn.conv2d(blobs['blob116'], self.weights['netsd_conv3_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['netsd_conv3_1_b'] 427 | blobs['blob117'] = self.leaky_relu(blobs['blob117'], 0.1) 428 | 429 | blobs['blob118'] = tf.pad(blobs['blob117'], [[0,0], [1,1], [1,1], [0,0]]) 430 | blobs['blob118'] = tf.nn.conv2d(blobs['blob118'], self.weights['netsd_conv4_w'], strides=[1,2,2,1], padding="VALID") + self.weights['netsd_conv4_b'] 431 | blobs['blob118'] = self.leaky_relu(blobs['blob118'], 0.1) 432 | 433 | blobs['blob119'] = tf.nn.conv2d(blobs['blob118'], self.weights['netsd_conv4_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['netsd_conv4_1_b'] 434 | blobs['blob119'] = self.leaky_relu(blobs['blob119'], 0.1) 435 | 436 | blobs['blob120'] = tf.pad(blobs['blob119'], [[0,0], [1,1], [1,1], [0,0]]) 437 | blobs['blob120'] = tf.nn.conv2d(blobs['blob120'], self.weights['netsd_conv5_w'], strides=[1,2,2,1], padding="VALID") + self.weights['netsd_conv5_b'] 438 | blobs['blob120'] = self.leaky_relu(blobs['blob120'], 0.1) 439 | 440 | blobs['blob121'] = tf.nn.conv2d(blobs['blob120'], self.weights['netsd_conv5_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['netsd_conv5_1_b'] 441 | blobs['blob121'] = self.leaky_relu(blobs['blob121'], 0.1) 442 | 443 | blobs['blob122'] = tf.pad(blobs['blob121'], [[0,0], [1,1], [1,1], [0,0]]) 444 | blobs['blob122'] = tf.nn.conv2d(blobs['blob122'], self.weights['netsd_conv6_w'], strides=[1,2,2,1], padding="VALID") + self.weights['netsd_conv6_b'] 445 | blobs['blob122'] = self.leaky_relu(blobs['blob122'], 0.1) 446 | 447 | blobs['blob123'] = tf.nn.conv2d(blobs['blob122'], self.weights['netsd_conv6_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['netsd_conv6_1_b'] 448 | blobs['blob123'] = self.leaky_relu(blobs['blob123'], 0.1) 449 | 450 | blobs['blob124'] = tf.nn.conv2d(blobs['blob123'], self.weights['netsd_Convolution1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['netsd_Convolution1_b'] 451 | 452 | blobs['blob125'] = tf.nn.conv2d_transpose(blobs['blob123'], self.weights['netsd_deconv5_w'], output_shape=[batch_size, ADAPTED_HEIGHT/32, ADAPTED_WIDTH/32, 512], strides=[1,2,2,1]) + self.weights['netsd_deconv5_b'] 453 | blobs['blob125'] = self.leaky_relu(blobs['blob125'], 0.1) 454 | 455 | blobs['blob126'] = tf.nn.conv2d_transpose(blobs['blob124'], self.weights['netsd_upsample_flow6to5_w'], output_shape=[batch_size, ADAPTED_HEIGHT/32, ADAPTED_WIDTH/32, 2], strides=[1,2,2,1]) + self.weights['netsd_upsample_flow6to5_b'] 456 | 457 | blobs['blob127'] = tf.concat([blobs['blob121'], blobs['blob125'], blobs['blob126']], axis=3) 458 | 459 | blobs['blob128'] = tf.nn.conv2d(blobs['blob127'], self.weights['netsd_interconv5_w'], strides=[1,1,1,1], padding="SAME") + self.weights['netsd_interconv5_b'] 460 | 461 | blobs['blob129'] = tf.nn.conv2d(blobs['blob128'], self.weights['netsd_Convolution2_w'], strides=[1,1,1,1], padding="SAME") + self.weights['netsd_Convolution2_b'] 462 | 463 | blobs['blob130'] = tf.nn.conv2d_transpose(blobs['blob127'], self.weights['netsd_deconv4_w'], output_shape=[batch_size, ADAPTED_HEIGHT/16, ADAPTED_WIDTH/16, 256], strides=[1,2,2,1]) + self.weights['netsd_deconv4_b'] 464 | blobs['blob130'] = self.leaky_relu(blobs['blob130'], 0.1) 465 | 466 | blobs['blob131'] = tf.nn.conv2d_transpose(blobs['blob129'], self.weights['netsd_upsample_flow5to4_w'], output_shape=[batch_size, ADAPTED_HEIGHT/16, ADAPTED_WIDTH/16, 2], strides=[1,2,2,1]) + self.weights['netsd_upsample_flow5to4_b'] 467 | 468 | blobs['blob132'] = tf.concat([blobs['blob119'], blobs['blob130'], blobs['blob131']], axis=3) 469 | 470 | blobs['blob133'] = tf.nn.conv2d(blobs['blob132'], self.weights['netsd_interconv4_w'], strides=[1,1,1,1], padding="SAME") + self.weights['netsd_interconv4_b'] 471 | 472 | blobs['blob134'] = tf.nn.conv2d(blobs['blob133'], self.weights['netsd_Convolution3_w'], strides=[1,1,1,1], padding="SAME") + self.weights['netsd_Convolution3_b'] 473 | 474 | blobs['blob135'] = tf.nn.conv2d_transpose(blobs['blob132'], self.weights['netsd_deconv3_w'], output_shape=[batch_size, ADAPTED_HEIGHT/8, ADAPTED_WIDTH/8, 128], strides=[1,2,2,1]) + self.weights['netsd_deconv3_b'] 475 | blobs['blob135'] = self.leaky_relu(blobs['blob135'], 0.1) 476 | 477 | blobs['blob136'] = tf.nn.conv2d_transpose(blobs['blob134'], self.weights['netsd_upsample_flow4to3_w'], output_shape=[batch_size, ADAPTED_HEIGHT/8, ADAPTED_WIDTH/8, 2], strides=[1,2,2,1]) + self.weights['netsd_upsample_flow4to3_b'] 478 | 479 | blobs['blob137'] = tf.concat([blobs['blob117'], blobs['blob135'], blobs['blob136']], axis=3) 480 | 481 | blobs['blob138'] = tf.nn.conv2d(blobs['blob137'], self.weights['netsd_interconv3_w'], strides=[1,1,1,1], padding="SAME") + self.weights['netsd_interconv3_b'] 482 | 483 | blobs['blob139'] = tf.nn.conv2d(blobs['blob138'], self.weights['netsd_Convolution4_w'], strides=[1,1,1,1], padding="SAME") + self.weights['netsd_Convolution4_b'] 484 | 485 | blobs['blob140'] = tf.nn.conv2d_transpose(blobs['blob137'], self.weights['netsd_deconv2_w'], output_shape=[batch_size, ADAPTED_HEIGHT/4, ADAPTED_WIDTH/4, 64], strides=[1,2,2,1]) + self.weights['netsd_deconv2_b'] 486 | blobs['blob140'] = self.leaky_relu(blobs['blob140'], 0.1) 487 | 488 | blobs['blob141'] = tf.nn.conv2d_transpose(blobs['blob139'], self.weights['netsd_upsample_flow3to2_w'], output_shape=[batch_size, ADAPTED_HEIGHT/4, ADAPTED_WIDTH/4, 2], strides=[1,2,2,1]) + self.weights['netsd_upsample_flow3to2_b'] 489 | 490 | blobs['blob142'] = tf.concat([blobs['blob115'], blobs['blob140'], blobs['blob141']], axis=3) 491 | 492 | blobs['blob143'] = tf.nn.conv2d(blobs['blob142'], self.weights['netsd_interconv2_w'], strides=[1,1,1,1], padding="SAME") + self.weights['netsd_interconv2_b'] 493 | 494 | blobs['blob144'] = tf.nn.conv2d(blobs['blob143'], self.weights['netsd_Convolution5_w'], strides=[1,1,1,1], padding="SAME") + self.weights['netsd_Convolution5_b'] 495 | 496 | blobs['blob145'] = 0.05*blobs['blob144'] 497 | 498 | blobs['blob146'] = tf.image.resize_nearest_neighbor(blobs['blob145'], size=[ADAPTED_HEIGHT, ADAPTED_WIDTH], align_corners=False) 499 | 500 | blobs['blob147'] = tf.image.resize_nearest_neighbor(blobs['blob109'], size=[ADAPTED_HEIGHT, ADAPTED_WIDTH], align_corners=False) 501 | 502 | #blobs['blob148'] = tf.sqrt(1e-8+tf.reduce_sum(blobs['blob146']**2, axis=3, keep_dims=True)) 503 | blobs['blob148'] = self.l2_norm(blobs['blob146']) 504 | 505 | #blobs['blob149'] = tf.sqrt(1e-8+tf.reduce_sum(blobs['blob147']**2, axis=3, keep_dims=True)) 506 | blobs['blob149'] = self.l2_norm(blobs['blob147']) 507 | 508 | blobs['blob150'] = self.warp(blobs['img1_nomean_resize'], blobs['blob146']) 509 | 510 | blobs['blob151'] = blobs['img0_nomean_resize'] - blobs['blob150'] 511 | 512 | #blobs['blob152'] = tf.sqrt(1e-8+tf.reduce_sum(blobs['blob151']**2, axis=3, keep_dims=True)) 513 | blobs['blob152'] = self.l2_norm(blobs['blob151']) 514 | 515 | blobs['blob153'] = self.warp(blobs['img1_nomean_resize'], blobs['blob147']) 516 | 517 | blobs['blob154'] = blobs['img0_nomean_resize'] - blobs['blob153'] 518 | 519 | #blobs['blob155'] = tf.sqrt(1e-8+tf.reduce_sum(blobs['blob154']**2, axis=3, keep_dims=True)) 520 | blobs['blob155'] = self.l2_norm(blobs['blob154']) 521 | 522 | blobs['blob156'] = tf.concat([blobs['img0_nomean_resize'], blobs['blob146'], blobs['blob147'], blobs['blob148'], blobs['blob149'], blobs['blob152'], blobs['blob155']], axis=3) 523 | 524 | blobs['blob157'] = tf.nn.conv2d(blobs['blob156'], self.weights['fuse_conv0_w'], strides=[1,1,1,1], padding="SAME") + self.weights['fuse_conv0_b'] 525 | blobs['blob157'] = self.leaky_relu(blobs['blob157'], 0.1) 526 | 527 | blobs['blob158'] = tf.pad(blobs['blob157'], [[0,0], [1,1], [1,1], [0,0]]) 528 | blobs['blob158'] = tf.nn.conv2d(blobs['blob158'], self.weights['fuse_conv1_w'], strides=[1,2,2,1], padding="VALID") + self.weights['fuse_conv1_b'] 529 | blobs['blob158'] = self.leaky_relu(blobs['blob158'], 0.1) 530 | 531 | blobs['blob159'] = tf.nn.conv2d(blobs['blob158'], self.weights['fuse_conv1_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['fuse_conv1_1_b'] 532 | blobs['blob159'] = self.leaky_relu(blobs['blob159'], 0.1) 533 | 534 | blobs['blob160'] = tf.pad(blobs['blob159'], [[0,0], [1,1], [1,1], [0,0]]) 535 | blobs['blob160'] = tf.nn.conv2d(blobs['blob160'], self.weights['fuse_conv2_w'], strides=[1,2,2,1], padding="VALID") + self.weights['fuse_conv2_b'] 536 | blobs['blob160'] = self.leaky_relu(blobs['blob160'], 0.1) 537 | 538 | blobs['blob161'] = tf.nn.conv2d(blobs['blob160'], self.weights['fuse_conv2_1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['fuse_conv2_1_b'] 539 | blobs['blob161'] = self.leaky_relu(blobs['blob161'], 0.1) 540 | 541 | blobs['blob162'] = tf.nn.conv2d(blobs['blob161'], self.weights['fuse__Convolution5_w'], strides=[1,1,1,1], padding="SAME") + self.weights['fuse__Convolution5_b'] 542 | 543 | blobs['blob163'] = tf.nn.conv2d_transpose(blobs['blob161'], self.weights['fuse_deconv1_w'], output_shape=[batch_size, ADAPTED_HEIGHT/2, ADAPTED_WIDTH/2, 32], strides=[1,2,2,1]) + self.weights['fuse_deconv1_b'] 544 | blobs['blob163'] = self.leaky_relu(blobs['blob163'], 0.1) 545 | 546 | blobs['blob164'] = tf.nn.conv2d_transpose(blobs['blob162'], self.weights['fuse_upsample_flow2to1_w'], output_shape=[batch_size, ADAPTED_HEIGHT/2, ADAPTED_WIDTH/2, 2], strides=[1,2,2,1]) + self.weights['fuse_upsample_flow2to1_b'] 547 | 548 | blobs['blob165'] = tf.concat([blobs['blob159'], blobs['blob163'], blobs['blob164']], axis=3) 549 | 550 | blobs['blob166'] = tf.nn.conv2d(blobs['blob165'], self.weights['fuse_interconv1_w'], strides=[1,1,1,1], padding="SAME") + self.weights['fuse_interconv1_b'] 551 | 552 | blobs['blob167'] = tf.nn.conv2d(blobs['blob166'], self.weights['fuse__Convolution6_w'], strides=[1,1,1,1], padding="SAME") + self.weights['fuse__Convolution6_b'] 553 | 554 | blobs['blob168'] = tf.nn.conv2d_transpose(blobs['blob165'], self.weights['fuse_deconv0_w'], output_shape=[batch_size, ADAPTED_HEIGHT/1, ADAPTED_WIDTH/1, 16], strides=[1,2,2,1]) + self.weights['fuse_deconv0_b'] 555 | blobs['blob168'] = self.leaky_relu(blobs['blob168'], 0.1) 556 | 557 | blobs['blob169'] = tf.nn.conv2d_transpose(blobs['blob167'], self.weights['fuse_upsample_flow1to0_w'], output_shape=[batch_size, ADAPTED_HEIGHT, ADAPTED_WIDTH, 2], strides=[1,2,2,1]) + self.weights['fuse_upsample_flow1to0_b'] 558 | 559 | blobs['blob170'] = tf.concat([blobs['blob157'], blobs['blob168'], blobs['blob169']], axis=3) 560 | 561 | blobs['blob171'] = tf.nn.conv2d(blobs['blob170'], self.weights['fuse_interconv0_w'], strides=[1,1,1,1], padding="SAME") + self.weights['fuse_interconv0_b'] 562 | 563 | blobs['blob172'] = tf.nn.conv2d(blobs['blob171'], self.weights['fuse__Convolution7_w'], strides=[1,1,1,1], padding="SAME") + self.weights['fuse__Convolution7_b'] 564 | 565 | blobs['predict_flow_resize'] = tf.image.resize_bilinear(blobs['blob172'], size=[TARGET_HEIGHT, TARGET_WIDTH], align_corners=True) 566 | 567 | scale = tf.stack([SCALE_WIDTH, SCALE_HEIGHT]) 568 | scale = tf.reshape(scale, [1,1,1,2]) 569 | blobs['predict_flow_final'] = scale*blobs['predict_flow_resize'] 570 | 571 | self.blobs = blobs 572 | 573 | return blobs 574 | 575 | def all_variables(self): 576 | return [('netsd_deconv5_w', (4, 4, 512, 1024)), 577 | ('netsd_conv1_b', (64,)), 578 | ('netsd_upsample_flow5to4_w', (4, 4, 2, 2)), 579 | ('conv2_b', (128,)), 580 | ('fuse__Convolution5_w', (3, 3, 128, 2)), 581 | ('netsd_conv4_1_w', (3, 3, 512, 512)), 582 | ('netsd_interconv3_w', (3, 3, 386, 128)), 583 | ('netsd_deconv4_w', (4, 4, 256, 1026)), 584 | ('deconv4_b', (256,)), 585 | ('fuse_interconv0_w', (3, 3, 82, 16)), 586 | ('netsd_Convolution2_b', (2,)), 587 | ('net3_conv4_b', (512,)), 588 | ('net3_conv3_b', (256,)), 589 | ('net3_predict_conv2_w', (3, 3, 194, 2)), 590 | ('net3_predict_conv3_b', (2,)), 591 | ('conv6_1_w', (3, 3, 1024, 1024)), 592 | ('fuse_upsample_flow2to1_b', (2,)), 593 | ('Convolution1_w', (3, 3, 1024, 2)), 594 | ('net3_deconv3_w', (4, 4, 128, 770)), 595 | ('net2_deconv3_b', (128,)), 596 | ('fuse_conv1_w', (3, 3, 64, 64)), 597 | ('conv5_w', (3, 3, 512, 512)), 598 | ('Convolution4_w', (3, 3, 386, 2)), 599 | ('fuse_conv0_b', (64,)), 600 | ('net2_conv3_w', (5, 5, 128, 256)), 601 | ('upsample_flow4to3_b', (2,)), 602 | ('netsd_conv4_1_b', (512,)), 603 | ('fuse_upsample_flow2to1_w', (4, 4, 2, 2)), 604 | ('netsd_conv4_b', (512,)), 605 | ('net2_net2_upsample_flow3to2_b', (2,)), 606 | ('net3_predict_conv4_b', (2,)), 607 | ('fuse_upsample_flow1to0_b', (2,)), 608 | ('conv4_1_w', (3, 3, 512, 512)), 609 | ('deconv2_b', (64,)), 610 | ('net2_conv4_1_w', (3, 3, 512, 512)), 611 | ('net3_deconv4_w', (4, 4, 256, 1026)), 612 | ('net2_deconv5_b', (512,)), 613 | ('netsd_deconv5_b', (512,)), 614 | ('net2_deconv2_b', (64,)), 615 | ('net3_conv2_b', (128,)), 616 | ('conv_redir_w', (1, 1, 256, 32)), 617 | ('fuse_conv1_1_b', (128,)), 618 | ('net2_deconv5_w', (4, 4, 512, 1024)), 619 | ('net2_conv5_b', (512,)), 620 | ('net2_conv4_w', (3, 3, 256, 512)), 621 | ('net2_predict_conv6_w', (3, 3, 1024, 2)), 622 | ('netsd_conv5_b', (512,)), 623 | ('deconv4_w', (4, 4, 256, 1026)), 624 | ('net2_net2_upsample_flow4to3_b', (2,)), 625 | ('fuse__Convolution6_w', (3, 3, 32, 2)), 626 | ('net3_deconv2_w', (4, 4, 64, 386)), 627 | ('net2_conv6_1_w', (3, 3, 1024, 1024)), 628 | ('netsd_conv0_b', (64,)), 629 | ('netsd_conv5_1_w', (3, 3, 512, 512)), 630 | ('net2_conv6_1_b', (1024,)), 631 | ('net3_conv2_w', (5, 5, 64, 128)), 632 | ('net3_predict_conv6_w', (3, 3, 1024, 2)), 633 | ('net3_conv4_1_b', (512,)), 634 | ('net3_net3_upsample_flow4to3_w', (4, 4, 2, 2)), 635 | ('net2_deconv2_w', (4, 4, 64, 386)), 636 | ('deconv3_b', (128,)), 637 | ('netsd_interconv5_b', (512,)), 638 | ('net2_conv3_1_w', (3, 3, 256, 256)), 639 | ('netsd_interconv4_w', (3, 3, 770, 256)), 640 | ('net3_deconv3_b', (128,)), 641 | ('fuse_conv0_w', (3, 3, 11, 64)), 642 | ('net3_predict_conv6_b', (2,)), 643 | ('fuse_upsample_flow1to0_w', (4, 4, 2, 2)), 644 | ('netsd_deconv3_b', (128,)), 645 | ('net3_predict_conv5_w', (3, 3, 1026, 2)), 646 | ('netsd_conv5_w', (3, 3, 512, 512)), 647 | ('netsd_interconv5_w', (3, 3, 1026, 512)), 648 | ('netsd_Convolution3_w', (3, 3, 256, 2)), 649 | ('net2_predict_conv4_w', (3, 3, 770, 2)), 650 | ('deconv2_w', (4, 4, 64, 386)), 651 | ('net3_predict_conv5_b', (2,)), 652 | ('fuse__Convolution5_b', (2,)), 653 | ('fuse__Convolution7_w', (3, 3, 16, 2)), 654 | ('net2_net2_upsample_flow6to5_w', (4, 4, 2, 2)), 655 | ('netsd_conv3_b', (256,)), 656 | ('net3_conv6_w', (3, 3, 512, 1024)), 657 | ('net3_conv1_b', (64,)), 658 | ('netsd_Convolution4_b', (2,)), 659 | ('net3_conv3_w', (5, 5, 128, 256)), 660 | ('netsd_conv0_w', (3, 3, 6, 64)), 661 | ('net2_conv4_b', (512,)), 662 | ('net2_predict_conv3_w', (3, 3, 386, 2)), 663 | ('net3_net3_upsample_flow3to2_w', (4, 4, 2, 2)), 664 | ('fuse_conv1_1_w', (3, 3, 64, 128)), 665 | ('deconv5_b', (512,)), 666 | ('fuse__Convolution7_b', (2,)), 667 | ('net3_conv6_1_w', (3, 3, 1024, 1024)), 668 | ('net3_net3_upsample_flow5to4_w', (4, 4, 2, 2)), 669 | ('net3_conv4_w', (3, 3, 256, 512)), 670 | ('upsample_flow5to4_w', (4, 4, 2, 2)), 671 | ('conv4_1_b', (512,)), 672 | ('img0s_aug_b', (320, 448, 3, 1)), 673 | ('conv5_1_b', (512,)), 674 | ('net3_conv4_1_w', (3, 3, 512, 512)), 675 | ('upsample_flow5to4_b', (2,)), 676 | ('net3_conv3_1_b', (256,)), 677 | ('Convolution1_b', (2,)), 678 | ('upsample_flow4to3_w', (4, 4, 2, 2)), 679 | ('conv5_1_w', (3, 3, 512, 512)), 680 | ('conv3_1_b', (256,)), 681 | ('conv3_w', (5, 5, 128, 256)), 682 | ('net2_conv2_b', (128,)), 683 | ('net3_net3_upsample_flow6to5_w', (4, 4, 2, 2)), 684 | ('upsample_flow3to2_b', (2,)), 685 | ('netsd_Convolution5_w', (3, 3, 64, 2)), 686 | ('netsd_interconv2_w', (3, 3, 194, 64)), 687 | ('net2_predict_conv6_b', (2,)), 688 | ('net2_deconv4_w', (4, 4, 256, 1026)), 689 | ('scale_conv1_b', (2,)), 690 | ('net2_net2_upsample_flow5to4_w', (4, 4, 2, 2)), 691 | ('netsd_conv2_b', (128,)), 692 | ('netsd_conv2_1_b', (128,)), 693 | ('netsd_upsample_flow6to5_w', (4, 4, 2, 2)), 694 | ('net2_predict_conv5_b', (2,)), 695 | ('net3_conv6_1_b', (1024,)), 696 | ('netsd_conv6_w', (3, 3, 512, 1024)), 697 | ('Convolution4_b', (2,)), 698 | ('net2_predict_conv4_b', (2,)), 699 | ('fuse_deconv1_b', (32,)), 700 | ('conv3_1_w', (3, 3, 473, 256)), 701 | ('net3_deconv2_b', (64,)), 702 | ('netsd_conv6_b', (1024,)), 703 | ('net2_conv5_1_w', (3, 3, 512, 512)), 704 | ('net3_conv5_1_w', (3, 3, 512, 512)), 705 | ('deconv5_w', (4, 4, 512, 1024)), 706 | ('fuse_conv2_b', (128,)), 707 | ('netsd_conv1_1_b', (128,)), 708 | ('netsd_upsample_flow6to5_b', (2,)), 709 | ('Convolution5_w', (3, 3, 194, 2)), 710 | ('scale_conv1_w', (1, 1, 2, 2)), 711 | ('net2_net2_upsample_flow5to4_b', (2,)), 712 | ('conv6_1_b', (1024,)), 713 | ('fuse_conv2_1_b', (128,)), 714 | ('netsd_Convolution5_b', (2,)), 715 | ('netsd_conv3_1_b', (256,)), 716 | ('conv2_w', (5, 5, 64, 128)), 717 | ('fuse_conv2_w', (3, 3, 128, 128)), 718 | ('net2_conv2_w', (5, 5, 64, 128)), 719 | ('conv3_b', (256,)), 720 | ('net3_deconv5_w', (4, 4, 512, 1024)), 721 | ('img1s_aug_w', (1, 1, 1, 1)), 722 | ('netsd_conv2_w', (3, 3, 128, 128)), 723 | ('conv6_w', (3, 3, 512, 1024)), 724 | ('netsd_conv4_w', (3, 3, 256, 512)), 725 | ('net2_conv1_w', (7, 7, 12, 64)), 726 | ('netsd_Convolution1_w', (3, 3, 1024, 2)), 727 | ('netsd_conv1_w', (3, 3, 64, 64)), 728 | ('netsd_deconv4_b', (256,)), 729 | ('conv4_w', (3, 3, 256, 512)), 730 | ('conv5_b', (512,)), 731 | ('net3_deconv5_b', (512,)), 732 | ('netsd_interconv3_b', (128,)), 733 | ('net3_conv3_1_w', (3, 3, 256, 256)), 734 | ('net2_predict_conv5_w', (3, 3, 1026, 2)), 735 | ('Convolution3_b', (2,)), 736 | ('netsd_conv5_1_b', (512,)), 737 | ('netsd_interconv4_b', (256,)), 738 | ('conv4_b', (512,)), 739 | ('net3_net3_upsample_flow6to5_b', (2,)), 740 | ('Convolution5_b', (2,)), 741 | ('fuse_conv2_1_w', (3, 3, 128, 128)), 742 | ('net3_net3_upsample_flow4to3_b', (2,)), 743 | ('conv1_w', (7, 7, 3, 64)), 744 | ('upsample_flow6to5_b', (2,)), 745 | ('conv6_b', (1024,)), 746 | ('netsd_upsample_flow3to2_w', (4, 4, 2, 2)), 747 | ('net2_deconv3_w', (4, 4, 128, 770)), 748 | ('netsd_conv2_1_w', (3, 3, 128, 128)), 749 | ('netsd_Convolution3_b', (2,)), 750 | ('netsd_upsample_flow4to3_w', (4, 4, 2, 2)), 751 | ('fuse_interconv1_w', (3, 3, 162, 32)), 752 | ('netsd_upsample_flow4to3_b', (2,)), 753 | ('netsd_conv3_1_w', (3, 3, 256, 256)), 754 | ('netsd_deconv3_w', (4, 4, 128, 770)), 755 | ('net3_conv5_b', (512,)), 756 | ('net3_conv5_1_b', (512,)), 757 | ('net2_net2_upsample_flow4to3_w', (4, 4, 2, 2)), 758 | ('net2_net2_upsample_flow3to2_w', (4, 4, 2, 2)), 759 | ('net2_conv3_b', (256,)), 760 | ('netsd_conv6_1_w', (3, 3, 1024, 1024)), 761 | ('fuse_deconv0_b', (16,)), 762 | ('net2_predict_conv2_w', (3, 3, 194, 2)), 763 | ('net2_conv1_b', (64,)), 764 | ('net2_conv6_b', (1024,)), 765 | ('net3_predict_conv2_b', (2,)), 766 | ('net2_conv4_1_b', (512,)), 767 | ('netsd_Convolution4_w', (3, 3, 128, 2)), 768 | ('deconv3_w', (4, 4, 128, 770)), 769 | ('fuse_deconv1_w', (4, 4, 32, 128)), 770 | ('netsd_Convolution2_w', (3, 3, 512, 2)), 771 | ('netsd_Convolution1_b', (2,)), 772 | ('net2_conv3_1_b', (256,)), 773 | ('fuse_conv1_b', (64,)), 774 | ('net2_deconv4_b', (256,)), 775 | ('net3_predict_conv4_w', (3, 3, 770, 2)), 776 | ('Convolution3_w', (3, 3, 770, 2)), 777 | ('netsd_upsample_flow3to2_b', (2,)), 778 | ('net3_net3_upsample_flow3to2_b', (2,)), 779 | ('fuse_interconv0_b', (16,)), 780 | ('Convolution2_w', (3, 3, 1026, 2)), 781 | ('net2_conv6_w', (3, 3, 512, 1024)), 782 | ('netsd_conv3_w', (3, 3, 128, 256)), 783 | ('netsd_upsample_flow5to4_b', (2,)), 784 | ('net3_predict_conv3_w', (3, 3, 386, 2)), 785 | ('conv_redir_b', (32,)), 786 | ('net2_conv5_1_b', (512,)), 787 | ('upsample_flow6to5_w', (4, 4, 2, 2)), 788 | ('net2_net2_upsample_flow6to5_b', (2,)), 789 | ('net3_conv6_b', (1024,)), 790 | ('fuse__Convolution6_b', (2,)), 791 | ('Convolution2_b', (2,)), 792 | ('upsample_flow3to2_w', (4, 4, 2, 2)), 793 | ('net3_conv1_w', (7, 7, 12, 64)), 794 | ('fuse_deconv0_w', (4, 4, 16, 162)), 795 | ('img0s_aug_w', (1, 1, 1, 1)), 796 | ('netsd_conv1_1_w', (3, 3, 64, 128)), 797 | ('netsd_deconv2_b', (64,)), 798 | ('net2_conv5_w', (3, 3, 512, 512)), 799 | ('fuse_interconv1_b', (32,)), 800 | ('netsd_conv6_1_b', (1024,)), 801 | ('netsd_interconv2_b', (64,)), 802 | ('img1s_aug_b', (320, 448, 3, 1)), 803 | ('netsd_deconv2_w', (4, 4, 64, 386)), 804 | ('net2_predict_conv3_b', (2,)), 805 | ('net2_predict_conv2_b', (2,)), 806 | ('net3_deconv4_b', (256,)), 807 | ('net3_net3_upsample_flow5to4_b', (2,)), 808 | ('conv1_b', (64,)), 809 | ('net3_conv5_w', (3, 3, 512, 512))] --------------------------------------------------------------------------------