├── Codes
    ├── flownet2
    │   ├── __init__.py
    │   ├── src
    │   │   ├── __init__.py
    │   │   ├── ops
    │   │   │   ├── build
    │   │   │   │   └── .gitkeep
    │   │   │   ├── downsample
    │   │   │   │   ├── downsample_kernel.h
    │   │   │   │   ├── downsample_op.cc
    │   │   │   │   ├── downsample_kernel.cc
    │   │   │   │   └── downsample_kernel_gpu.cu.cc
    │   │   │   ├── correlation
    │   │   │   │   ├── pad.h
    │   │   │   │   ├── pad.cu.cc
    │   │   │   │   ├── correlation_op.cc
    │   │   │   │   ├── correlation_kernel.h
    │   │   │   │   ├── correlation_kernel.cc
    │   │   │   │   ├── correlation_kernel.cu.cc
    │   │   │   │   └── correlation_grad_kernel.cc
    │   │   │   ├── flow_warp
    │   │   │   │   ├── flow_warp_op.cc
    │   │   │   │   ├── flow_warp.h
    │   │   │   │   ├── flow_warp.cc
    │   │   │   │   ├── flow_warp_grad.cc
    │   │   │   │   ├── flow_warp.cu.cc
    │   │   │   │   └── flow_warp_grad.cu.cc
    │   │   │   └── preprocessing
    │   │   │   │   ├── kernels
    │   │   │   │       ├── flow_augmentation.h
    │   │   │   │       ├── data_augmentation.h
    │   │   │   │       ├── flow_augmentation_gpu.cu.cc
    │   │   │   │       ├── flow_augmentation.cc
    │   │   │   │       └── augmentation_base.h
    │   │   │   │   └── preprocessing.cc
    │   │   ├── flownet2
    │   │   │   ├── __init__.py
    │   │   │   ├── train.py
    │   │   │   ├── test.py
    │   │   │   └── flownet2.py
    │   │   ├── flownet_c
    │   │   │   ├── __init__.py
    │   │   │   ├── train.py
    │   │   │   ├── test.py
    │   │   │   └── flownet_c.py
    │   │   ├── flownet_cs
    │   │   │   ├── __init__.py
    │   │   │   ├── train.py
    │   │   │   ├── test.py
    │   │   │   └── flownet_cs.py
    │   │   ├── flownet_css
    │   │   │   ├── __init__.py
    │   │   │   ├── train.py
    │   │   │   ├── test.py
    │   │   │   └── flownet_css.py
    │   │   ├── flownet_s
    │   │   │   ├── __init__.py
    │   │   │   ├── train.py
    │   │   │   ├── test.py
    │   │   │   └── flownet_s.py
    │   │   ├── flownet_sd
    │   │   │   ├── __init__.py
    │   │   │   ├── train.py
    │   │   │   ├── test.py
    │   │   │   └── flownet_sd.py
    │   │   ├── downsample.py
    │   │   ├── training_schedules.py
    │   │   ├── flow_warp.py
    │   │   ├── correlation.py
    │   │   ├── utils.py
    │   │   ├── dataset_configs.py
    │   │   └── net.py
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── corr.py
    │   ├── Makefile
    │   └── test.py
    ├── requirements.txt
    ├── checkpoints
    │   └── download_pretrains.sh
    ├── models.py
    ├── unet.py
    ├── loss_functions.py
    ├── training_hyper_params
    │   └── hyper_params.ini
    ├── inference.py
    ├── utils.py
    ├── constant.py
    └── train.py
├── assets
    ├── images.JPG
    ├── scalars.JPG
    └── architecture.JPG
├── .gitignore
├── Data
    ├── ped1.sh
    ├── ped2.sh
    ├── avenue.sh
    └── shanghaitech.sh
└── README.md


/Codes/flownet2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/build/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_c/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_cs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_css/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_s/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_sd/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assets/images.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StevenLiuWen/ano_pred_cvpr2018/HEAD/assets/images.JPG


--------------------------------------------------------------------------------
/assets/scalars.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StevenLiuWen/ano_pred_cvpr2018/HEAD/assets/scalars.JPG


--------------------------------------------------------------------------------
/assets/architecture.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StevenLiuWen/ano_pred_cvpr2018/HEAD/assets/architecture.JPG


--------------------------------------------------------------------------------
/Codes/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.14.1scipy==1.0.0matplotlib==2.1.2tensorflow==1.4.1tensorflow-gpu==1.4.1Pillow==5.0.0pypng==0.0.18scikit_learn==0.19.1opencv-python==3.2.0.6


--------------------------------------------------------------------------------
/Codes/flownet2/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | *.py[cod]
 3 | *$py.class
 4 | *.o
 5 | *.so
 6 | *.so.dSYM
 7 | checkpoints/
 8 | !checkpoints/download.sh
 9 | !checkpoints/README.md
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | *.pyc
3 | Codes/checkpoints/avenue.*
4 | Codes/checkpoints/ped1.*
5 | Codes/checkpoints/ped2.*
6 | Codes/checkpoints/shanghaitech.*
7 | Codes/checkpoints/flownet-SD.*
8 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/downsample.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | _downsample = tf.load_op_library(
4 |     tf.resource_loader.get_path_to_datafile("./ops/build/downsample.so"))
5 | 
6 | 
7 | def downsample(tensor, size):
8 |     return _downsample.downsample(tensor, size)
9 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/training_schedules.py:
--------------------------------------------------------------------------------
 1 | LONG_SCHEDULE = {
 2 |     'step_values': [400000, 600000, 800000, 1000000],
 3 |     'learning_rates': [0.0001, 0.00005, 0.000025, 0.0000125, 0.00000625],
 4 |     'momentum': 0.9,
 5 |     'momentum2': 0.999,
 6 |     'weight_decay': 0.0004,
 7 |     'max_iter': 1200000,
 8 | }
 9 | 
10 | FINETUNE_SCHEDULE = {
11 |     # TODO: Finetune schedule
12 | }
13 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flow_warp.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | _flow_warp_ops = tf.load_op_library(
 4 |     tf.resource_loader.get_path_to_datafile("./ops/build/flow_warp.so"))
 5 | 
 6 | 
 7 | def flow_warp(image, flow):
 8 |     return _flow_warp_ops.flow_warp(image, flow)
 9 | 
10 | 
11 | @tf.RegisterGradient("FlowWarp")
12 | def _flow_warp_grad(flow_warp_op, gradients):
13 |     return _flow_warp_ops.flow_warp_grad(flow_warp_op.inputs[0],
14 |                                          flow_warp_op.inputs[1],
15 |                                          gradients)
16 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_c/train.py:
--------------------------------------------------------------------------------
 1 | from ..dataloader import load_batch
 2 | from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
 3 | from ..training_schedules import LONG_SCHEDULE
 4 | from .flownet_c import FlowNetC
 5 | 
 6 | # Create a new network
 7 | net = FlowNetC()
 8 | 
 9 | # Load a batch of data
10 | input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
11 | 
12 | # Train on the data
13 | net.train(
14 |     log_dir='./logs/flownet_c',
15 |     training_schedule=LONG_SCHEDULE,
16 |     input_a=input_a,
17 |     input_b=input_b,
18 |     flow=flow
19 | )
20 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_s/train.py:
--------------------------------------------------------------------------------
 1 | from ..dataloader import load_batch
 2 | from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
 3 | from ..training_schedules import LONG_SCHEDULE
 4 | from .flownet_s import FlowNetS
 5 | 
 6 | # Create a new network
 7 | net = FlowNetS()
 8 | 
 9 | # Load a batch of data
10 | input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
11 | 
12 | # Train on the data
13 | net.train(
14 |     log_dir='./logs/flownet_s_sample',
15 |     training_schedule=LONG_SCHEDULE,
16 |     input_a=input_a,
17 |     input_b=input_b,
18 |     flow=flow
19 | )
20 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_sd/train.py:
--------------------------------------------------------------------------------
 1 | from ..dataloader import load_batch
 2 | from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
 3 | from ..training_schedules import LONG_SCHEDULE
 4 | from .flownet_sd import FlowNetSD
 5 | 
 6 | # Create a new network
 7 | net = FlowNetSD()
 8 | 
 9 | # Load a batch of data
10 | input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
11 | 
12 | # Train on the data
13 | net.train(
14 |     log_dir='./logs/flownet_sd_sample',
15 |     training_schedule=LONG_SCHEDULE,
16 |     input_a=input_a,
17 |     input_b=input_b,
18 |     flow=flow
19 | )
20 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/downsample/downsample_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef FLOWNET_DOWNSAMPLE_H_
 2 | #define FLOWNET_DOWNSAMPLE_H_
 3 | 
 4 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 5 | #include "tensorflow/core/framework/tensor_types.h"
 6 | #include "tensorflow/core/platform/types.h"
 7 | 
 8 | namespace tensorflow {
 9 | 
10 | typedef Eigen::GpuDevice GPUDevice;
11 | 
12 | bool Downsample(const GPUDevice& device,
13 |                 typename TTypes<float, 4>::ConstTensor input,
14 |                 typename TTypes<float, 4>::Tensor output);
15 | 
16 | }  // end namespace tensorflow
17 | 
18 | #endif  // FLOWNET_DOWNSAMPLE_H_
19 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/correlation/pad.h:
--------------------------------------------------------------------------------
 1 | #ifndef FLOWNET_PAD_H_
 2 | #define FLOWNET_PAD_H_
 3 | 
 4 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 5 | 
 6 | namespace tensorflow {
 7 | typedef Eigen::GpuDevice GPUDevice;
 8 | 
 9 | void Pad(const GPUDevice& device,
10 |          const float     *input,
11 |          int              batch_size,
12 |          int              input_height,
13 |          int              input_width,
14 |          int              input_channels,
15 |          int              output_height,
16 |          int              output_width,
17 |          float           *output);
18 | } // end namespace tensorflow
19 | 
20 | #endif // ifndef FLOWNET_PAD_H_
21 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_cs/train.py:
--------------------------------------------------------------------------------
 1 | from ..dataloader import load_batch
 2 | from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
 3 | from ..training_schedules import LONG_SCHEDULE
 4 | from .flownet_cs import FlowNetCS
 5 | 
 6 | # Create a new network
 7 | net = FlowNetCS()
 8 | 
 9 | # Load a batch of data
10 | input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
11 | 
12 | # Train on the data
13 | net.train(
14 |     log_dir='./logs/flownet_cs',
15 |     training_schedule=LONG_SCHEDULE,
16 |     input_a=input_a,
17 |     input_b=input_b,
18 |     flow=flow,
19 |     # Load trained weights for C part of network
20 |     checkpoints={'./checkpoints/FlowNetC/flownet-C.ckpt-0': ('FlowNetCS/FlowNetC', 'FlowNetCS')}
21 | )
22 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_css/train.py:
--------------------------------------------------------------------------------
 1 | from ..dataloader import load_batch
 2 | from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
 3 | from ..training_schedules import LONG_SCHEDULE
 4 | from .flownet_css import FlowNetCSS
 5 | 
 6 | # Create a new network
 7 | net = FlowNetCSS()
 8 | 
 9 | # Load a batch of data
10 | input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
11 | 
12 | # Train on the data
13 | net.train(
14 |     log_dir='./logs/flownet_css',
15 |     training_schedule=LONG_SCHEDULE,
16 |     input_a=input_a,
17 |     input_b=input_b,
18 |     flow=flow,
19 |     # Load trained weights for CS part of network
20 |     checkpoints={
21 |         './checkpoints/FlowNetCS/flownet-CS.ckpt-0': ('FlowNetCSS/FlowNetCS', 'FlowNetCSS')}
22 | )
23 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/flow_warp/flow_warp_op.cc:
--------------------------------------------------------------------------------
 1 | #include "tensorflow/core/framework/common_shape_fns.h"
 2 | #include "tensorflow/core/framework/op.h"
 3 | #include "tensorflow/core/framework/shape_inference.h"
 4 | 
 5 | namespace tensorflow {
 6 | REGISTER_OP("FlowWarp")
 7 | .Input("image: float32")
 8 | .Input("flow: float32")
 9 | .Output("output: float32")
10 | .SetShapeFn(::tensorflow::shape_inference::UnchangedShape);
11 | 
12 | REGISTER_OP("FlowWarpGrad")
13 | .Input("image: float32")
14 | .Input("flow: float32")
15 | .Input("gradient: float32")
16 | .Output("image_grad: float32")
17 | .Output("flow_grad: float32")
18 | .SetShapeFn([](shape_inference::InferenceContext *c) {
19 |     c->set_output(0, c->input(0));
20 |     c->set_output(1, c->input(1));
21 |     return Status::OK();
22 |   });
23 | } // namespace tensorflow
24 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.h:
--------------------------------------------------------------------------------
 1 | #ifndef FLOWNET_FLOW_AUG_H_
 2 | #define FLOWNET_FLOW_AUG_H_
 3 | 
 4 | // See docs in ../ops/image_ops.cc.
 5 | 
 6 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 7 | #include "tensorflow/core/framework/tensor_types.h"
 8 | #include "tensorflow/core/platform/types.h"
 9 | 
10 | namespace tensorflow {
11 | template<class Device>
12 | void FillFlowAugmentation(const Device& device,
13 |                           typename TTypes<float, 4>::Tensor output,
14 |                           typename TTypes<float, 4>::ConstTensor flows,
15 |                           typename TTypes<float, 2>::ConstTensor transforms_from_a,
16 |                           typename TTypes<float, 2>::ConstTensor transforms_from_b);
17 | } // end namespace tensorflow
18 | 
19 | #endif  // FLOWNET_FLOW_AUG_H_
20 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet2/train.py:
--------------------------------------------------------------------------------
 1 | from ..dataloader import load_batch
 2 | from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
 3 | from ..training_schedules import LONG_SCHEDULE
 4 | from .flownet2 import FlowNet2
 5 | 
 6 | # Create a new network
 7 | net = FlowNet2()
 8 | 
 9 | # Load a batch of data
10 | input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
11 | 
12 | # Train on the data
13 | net.train(
14 |     log_dir='./logs/flownet_2',
15 |     training_schedule=LONG_SCHEDULE,
16 |     input_a=input_a,
17 |     input_b=input_b,
18 |     flow=flow,
19 |     # Load trained weights for CSS and SD parts of network
20 |     checkpoints={
21 |         './checkpoints/FlowNetCSS-ft-sd/flownet-CSS-ft-sd.ckpt-0': ('FlowNet2/FlowNetCSS', 'FlowNet2'),
22 |         './checkpoints/FlowNetSD/flownet-SD.ckpt-0': ('FlowNet2/FlowNetSD', 'FlowNet2')
23 |     }
24 | )
25 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.h:
--------------------------------------------------------------------------------
 1 | #ifndef FLOWNET_DATA_AUGMENTATION_H_
 2 | #define FLOWNET_DATA_AUGMENTATION_H_
 3 | 
 4 | #include "tensorflow/core/framework/op_kernel.h"
 5 | 
 6 | namespace tensorflow {
 7 | template<class Device>
 8 | void Augment(OpKernelContext *context,
 9 |              const Device   & d,
10 |              const int        batch_size,
11 |              const int        channels,
12 |              const int        src_width,
13 |              const int        src_height,
14 |              const int        src_count,
15 |              const int        out_width,
16 |              const int        out_height,
17 |              const float     *src_data,
18 |              float           *out_data,
19 |              const float     *transMats,
20 |              float           *chromatic_coeffs);
21 | } // namespace tensorflow
22 | #endif // FLOWNET_DATA_AUGMENTATION_H_
23 | 


--------------------------------------------------------------------------------
/Data/ped1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Downloading UCSD-Ped1 dataset....."
 4 | 
 5 | wget "https://ofhz9a.bn.files.1drv.com/y4mP5HrUYe3m0KnhIA3KbOaqlFEKpvCmqepz-C9UDoIUgO4i0WuW9Dm-J-98qYXivCdniC-_mYHq9r4t25im6XogBz-INqqktYE2Rc38vkKKwM1iFZ_uWxoGon4QniumO2gNLscP9N9wNw6fWD8GqIYqOUVe_UO9svbF0RpeRpAbSe82uHJ9qqmN2q-mZ9prbrScwsolPEv_IxprXqgjG5Plw/ped1.tar.gz?download&psid=1"
 6 | mv "ped1.tar.gz?download&psid=1" ped1.tar.gz
 7 | tar -xvf ped1.tar.gz
 8 | rm ped1.tar.gz
 9 | 
10 | echo "Download UCSD-Ped1 successfully..."
11 | 
12 | echo "If encounters any ERRORS(I guess the download link in shell script is not permanent),
13 | please manually download ped1.tar.gz from https://onedrive.live.com/?authkey=%21AMqh2fTSemfrokE&id=3705E349C336415F%215109&cid=3705E349C336415F
14 | and run the following commands:
15 | 
16 | tar -xvf ped1.tar.gz
17 | rm ped1.tar.gz
18 | 
19 | make sure the ped1 dataset is under the director of Data.
20 | "
21 | 
22 | 


--------------------------------------------------------------------------------
/Data/ped2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Downloading UCSD-Ped2 dataset....."
 4 | 
 5 | wget "https://ofhz9a.bn.files.1drv.com/y4mFzDLdy1ZKsJawXtABkPGQsYhoZzeVYofrv5cKtvNS85CyUJcqwL0-P_PnzNvwrfEkIlQA9mQhld7CK9ohIa_lFvovPgNOZ3Z7BVnY-0sKA97Bv3OrnSU2Vkh9fl5ceDBo8PuCVoc_XHJN03Zj-v8q31cswu9RliBzujx_mLW4PxPi0cxui2j_n9xFp-S1Px_6H5a4_SGQBr_8EP8qsz3fA/ped2.tar.gz?download&psid=1"
 6 | mv "ped2.tar.gz?download&psid=1"    ped2.tar.gz
 7 | tar -xvf ped2.tar.gz
 8 | rm ped2.tar.gz
 9 | 
10 | echo "Download UCSD-Ped2 successfully..."
11 | 
12 | echo "If encounters any ERRORS(I guess the download link in shell script is not permanent),
13 | please manually download ped2.tar.gz from https://onedrive.live.com/?authkey=%21AMqh2fTSemfrokE&id=3705E349C336415F%215109&cid=3705E349C336415F
14 | and run the following commands:
15 | 
16 | tar -xvf ped2.tar.gz
17 | rm ped2.tar.gz
18 | 
19 | make sure the ped2 dataset is under the director of Data.
20 | "
21 | 


--------------------------------------------------------------------------------
/Data/avenue.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Downloading CUHK-Avenue dataset....."
 4 | 
 5 | wget "https://ofhz9a.bn.files.1drv.com/y4m4fpDJfxvClUUg4yfbH22DpWmnN8smMTSoK0tPyEB2VUQmsD0oUkURguUYhQABYcDkdXvpseAe2G4gxjdnssPWERMbyGA8z6tk-pU6V4fNvRjZBdH3P6joeAEbOPXcK0ZhQCRqDVROdbZQ0vMZjoXiRf2Kvs_o175MW1xLKvfOmIMcw3ZhtF6iOmvIvMfmP8RcZJNbp8CSOwySQgONpkODQ/avenue.tar.gz?download&psid=1"
 6 | mv "avenue.tar.gz?download&psid=1" avenue.tar.gz
 7 | tar -xvf avenue.tar.gz
 8 | rm avenue.tar.gz
 9 | 
10 | echo "Download CUHK-Avenue successfully..."
11 | 
12 | echo "If encounters any ERRORS(I guess the download link in shell script is not permanent),
13 | please manually download avenue.tar.gz from https://onedrive.live.com/?authkey=%21AMqh2fTSemfrokE&id=3705E349C336415F%215109&cid=3705E349C336415F
14 | and run the following commands:
15 | 
16 | tar -xvf avenue.tar.gz
17 | rm avenue.tar.gz
18 | 
19 | make sure the avenue dataset is under the director of Data.
20 | "
21 | 


--------------------------------------------------------------------------------
/Data/shanghaitech.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo "download ShanghaiTech-Campus dataset....."
 3 | 
 4 | wget "https://ofhz9a.bn.files.1drv.com/y4mZ-bxF_FckWxjvJKGdcIkCr4PZOK3JQIbVqcv_1IE8QnAvQzlCnIqAWiWI6l_NVpBcfizN_6EOYc01NMPCiEj_YCFOyBVK1ZjczoTHClYXry87x5DrzcimwVXttkPtHBytzj43XKWFoOIFyZqpJDUL5o5GoZnfp5g3i1tthSsuIy4YnMMOup1tebJ8jb_Kqb09kksykw2YE-C-0pD5ovsVQ/shanghaitech.tar.gz?download&psid=1"
 5 | mv "shanghaitech.tar.gz?download&psid=1"    shanghaitech.tar.gz
 6 | tar -xvf shanghaitech.tar.gz
 7 | rm shanghaitech.tar.gz
 8 | 
 9 | echo "download ShanghaiTech-Campus successfully..."
10 | 
11 | echo "If encounters any ERRORS(I guess the download link in shell script is not permanent),
12 | please manually download shanghaitech.tar.gz from https://onedrive.live.com/?authkey=%21AMqh2fTSemfrokE&id=3705E349C336415F%215109&cid=3705E349C336415F
13 | and run the following commands:
14 | 
15 | tar -xvf shanghaitech.tar.gz
16 | rm shanghaitech.tar.gz
17 | 
18 | make sure the shanghaitech dataset is under the director of Data.
19 | "


--------------------------------------------------------------------------------
/Codes/checkpoints/download_pretrains.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Downloading trained models on ped1, ped2 and avenue datasets ....."
 4 | 
 5 | wget "https://ofhz9a.bn.files.1drv.com/y4m5lC_SnkDiTcKjKEiue7uKKHX_jM7LojjlsjpurNHC8gkOj0MjgqdKrj6YJwLNFMAb649j07rheaBeS-B8JmYwGc3wy6Zb7T0ICYBzz9PdheTGxHWGsLCxJ7MpaA4Rj6V0KmtAyoUYbdeNQVWEAPAZtVn1ikrdslLVVvKB1doyWRaTnIKjCiXIybbXG-6VtZ4uw10H_PrBFTEq6cBeqr2CQ/pretrains.tar.gz?download&psid=1"
 6 | mv "pretrains.tar.gz?download&psid=1" pretrains.tar.gz
 7 | tar -xvf pretrains.tar.gz
 8 | rm pretrains.tar.gz
 9 | 
10 | echo "Download pretrains successfully..."
11 | 
12 | echo "If encounters any ERRORS(I guess the download link in shell script is not permanent),
13 | please manually download pretrains.tar.gz from https://onedrive.live.com/?authkey=%21AMqh2fTSemfrokE&id=3705E349C336415F%215109&cid=3705E349C336415F
14 | and run the following commands:
15 | 
16 | tar -xvf pretrains.tar.gz
17 | rm pretrains.tar.gz
18 | 
19 | make sure the pretrains is under the director of Codes/checkpoints.
20 | "


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/downsample/downsample_op.cc:
--------------------------------------------------------------------------------
 1 | #include "tensorflow/core/framework/common_shape_fns.h"
 2 | #include "tensorflow/core/framework/op.h"
 3 | #include "tensorflow/core/framework/shape_inference.h"
 4 | 
 5 | namespace tensorflow {
 6 | 
 7 | using shape_inference::InferenceContext;
 8 | using shape_inference::ShapeHandle;
 9 | using shape_inference::DimensionHandle;
10 | 
11 | Status SetOutputToSizedImage(InferenceContext* c) {
12 |   ShapeHandle input;
13 |   TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
14 |   DimensionHandle batch = c->Dim(input, 0);
15 |   DimensionHandle depth = c->Dim(input, 3);
16 |   std::vector<int32> size_;
17 |   c->GetAttr("size", &size_);
18 |   DimensionHandle height = c->MakeDim(size_[0]);
19 |   DimensionHandle width  = c->MakeDim(size_[1]);
20 |   c->set_output(0, c->MakeShape({batch, height, width, depth}));
21 |   return Status::OK();
22 | }
23 | 
24 | REGISTER_OP("Downsample")
25 |     .Input("input: float32")
26 |     .Attr("size: list(int) >= 2")
27 |     .Output("output: float32")
28 |     .SetShapeFn(SetOutputToSizedImage);
29 | 
30 | }  // namespace tensorflow
31 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/flow_warp/flow_warp.h:
--------------------------------------------------------------------------------
 1 | #ifndef FLOWNET_FLOWWARP_H_
 2 | #define FLOWNET_FLOWWARP_H_
 3 | 
 4 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 5 | #include "tensorflow/core/framework/tensor_types.h"
 6 | #include "tensorflow/core/platform/types.h"
 7 | 
 8 | #define FW_THREADS 32
 9 | #define FW_TILE_X FW_THREADS
10 | #define FW_TILE_C FW_THREADS
11 | 
12 | namespace tensorflow {
13 | typedef Eigen::GpuDevice GPUDevice;
14 | 
15 | void FlowWarp(const GPUDevice& device,
16 |               typename TTypes<float, 4>::ConstTensor input,
17 |               typename TTypes<float, 4>::ConstTensor flow,
18 |               typename TTypes<float, 4>::Tensor output);
19 | 
20 | void FlowWarpGrad(const GPUDevice& device,
21 |                   typename TTypes<float, 4>::ConstTensor image,
22 |                   typename TTypes<float, 4>::ConstTensor flow,
23 |                   typename TTypes<float, 4>::ConstTensor gradient,
24 |                   typename TTypes<float, 4>::Tensor image_grad,
25 |                   typename TTypes<float, 4>::Tensor flow_grad);
26 | } // end namespace tensorflow
27 | 
28 | #endif  // FLOWNET_FLOWWARP_H_
29 | 


--------------------------------------------------------------------------------
/Codes/flownet2/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Sam Pepose
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet2/test.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | from ..net import Mode
 4 | from .flownet2 import FlowNet2
 5 | 
 6 | FLAGS = None
 7 | 
 8 | 
 9 | def main():
10 |     # Create a new network
11 |     net = FlowNet2(mode=Mode.TEST)
12 | 
13 |     # Train on the data
14 |     net.test(
15 |         checkpoint='./checkpoints/FlowNet2/flownet-2.ckpt-0',
16 |         input_a_path=FLAGS.input_a,
17 |         input_b_path=FLAGS.input_b,
18 |         out_path=FLAGS.out,
19 |     )
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     parser = argparse.ArgumentParser()
24 |     parser.add_argument(
25 |         '--input_a',
26 |         type=str,
27 |         required=True,
28 |         help='Path to first image'
29 |     )
30 |     parser.add_argument(
31 |         '--input_b',
32 |         type=str,
33 |         required=True,
34 |         help='Path to second image'
35 |     )
36 |     parser.add_argument(
37 |         '--out',
38 |         type=str,
39 |         required=True,
40 |         help='Path to output flow result'
41 |     )
42 |     FLAGS = parser.parse_args()
43 | 
44 |     # Verify arguments are valid
45 |     if not os.path.exists(FLAGS.input_a):
46 |         raise ValueError('image_a path must exist')
47 |     if not os.path.exists(FLAGS.input_b):
48 |         raise ValueError('image_b path must exist')
49 |     if not os.path.isdir(FLAGS.out):
50 |         raise ValueError('out directory must exist')
51 |     main()
52 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_c/test.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | from ..net import Mode
 4 | from .flownet_c import FlowNetC
 5 | 
 6 | FLAGS = None
 7 | 
 8 | 
 9 | def main():
10 |     # Create a new network
11 |     net = FlowNetC(mode=Mode.TEST)
12 | 
13 |     # Train on the data
14 |     net.test(
15 |         checkpoint='./checkpoints/FlowNetC/flownet-C.ckpt-0',
16 |         input_a_path=FLAGS.input_a,
17 |         input_b_path=FLAGS.input_b,
18 |         out_path=FLAGS.out,
19 |     )
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     parser = argparse.ArgumentParser()
24 |     parser.add_argument(
25 |         '--input_a',
26 |         type=str,
27 |         required=True,
28 |         help='Path to first image'
29 |     )
30 |     parser.add_argument(
31 |         '--input_b',
32 |         type=str,
33 |         required=True,
34 |         help='Path to second image'
35 |     )
36 |     parser.add_argument(
37 |         '--out',
38 |         type=str,
39 |         required=True,
40 |         help='Path to output flow result'
41 |     )
42 |     FLAGS = parser.parse_args()
43 | 
44 |     # Verify arguments are valid
45 |     if not os.path.exists(FLAGS.input_a):
46 |         raise ValueError('image_a path must exist')
47 |     if not os.path.exists(FLAGS.input_b):
48 |         raise ValueError('image_b path must exist')
49 |     if not os.path.isdir(FLAGS.out):
50 |         raise ValueError('out directory must exist')
51 |     main()
52 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_s/test.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | from ..net import Mode
 4 | from .flownet_s import FlowNetS
 5 | 
 6 | FLAGS = None
 7 | 
 8 | 
 9 | def main():
10 |     # Create a new network
11 |     net = FlowNetS(mode=Mode.TEST)
12 | 
13 |     # Train on the data
14 |     net.test(
15 |         checkpoint='./checkpoints/FlowNetS/flownet-S.ckpt-0',
16 |         input_a_path=FLAGS.input_a,
17 |         input_b_path=FLAGS.input_b,
18 |         out_path=FLAGS.out,
19 |     )
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     parser = argparse.ArgumentParser()
24 |     parser.add_argument(
25 |         '--input_a',
26 |         type=str,
27 |         required=True,
28 |         help='Path to first image'
29 |     )
30 |     parser.add_argument(
31 |         '--input_b',
32 |         type=str,
33 |         required=True,
34 |         help='Path to second image'
35 |     )
36 |     parser.add_argument(
37 |         '--out',
38 |         type=str,
39 |         required=True,
40 |         help='Path to output flow result'
41 |     )
42 |     FLAGS = parser.parse_args()
43 | 
44 |     # Verify arguments are valid
45 |     if not os.path.exists(FLAGS.input_a):
46 |         raise ValueError('image_a path must exist')
47 |     if not os.path.exists(FLAGS.input_b):
48 |         raise ValueError('image_b path must exist')
49 |     if not os.path.isdir(FLAGS.out):
50 |         raise ValueError('out directory must exist')
51 |     main()
52 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_cs/test.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | from ..net import Mode
 4 | from .flownet_cs import FlowNetCS
 5 | 
 6 | FLAGS = None
 7 | 
 8 | 
 9 | def main():
10 |     # Create a new network
11 |     net = FlowNetCS(mode=Mode.TEST)
12 | 
13 |     # Train on the data
14 |     net.test(
15 |         checkpoint='./checkpoints/FlowNetCS/flownet-CS.ckpt-0',
16 |         input_a_path=FLAGS.input_a,
17 |         input_b_path=FLAGS.input_b,
18 |         out_path=FLAGS.out,
19 |     )
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     parser = argparse.ArgumentParser()
24 |     parser.add_argument(
25 |         '--input_a',
26 |         type=str,
27 |         required=True,
28 |         help='Path to first image'
29 |     )
30 |     parser.add_argument(
31 |         '--input_b',
32 |         type=str,
33 |         required=True,
34 |         help='Path to second image'
35 |     )
36 |     parser.add_argument(
37 |         '--out',
38 |         type=str,
39 |         required=True,
40 |         help='Path to output flow result'
41 |     )
42 |     FLAGS = parser.parse_args()
43 | 
44 |     # Verify arguments are valid
45 |     if not os.path.exists(FLAGS.input_a):
46 |         raise ValueError('image_a path must exist')
47 |     if not os.path.exists(FLAGS.input_b):
48 |         raise ValueError('image_b path must exist')
49 |     if not os.path.isdir(FLAGS.out):
50 |         raise ValueError('out directory must exist')
51 |     main()
52 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_sd/test.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | from ..net import Mode
 4 | from .flownet_sd import FlowNetSD
 5 | 
 6 | FLAGS = None
 7 | 
 8 | 
 9 | def main():
10 |     # Create a new network
11 |     net = FlowNetSD(mode=Mode.TEST)
12 | 
13 |     # Train on the data
14 |     net.test(
15 |         checkpoint='./checkpoints/FlowNetSD/flownet-SD.ckpt-0',
16 |         input_a_path=FLAGS.input_a,
17 |         input_b_path=FLAGS.input_b,
18 |         out_path=FLAGS.out,
19 |     )
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     parser = argparse.ArgumentParser()
24 |     parser.add_argument(
25 |         '--input_a',
26 |         type=str,
27 |         required=True,
28 |         help='Path to first image'
29 |     )
30 |     parser.add_argument(
31 |         '--input_b',
32 |         type=str,
33 |         required=True,
34 |         help='Path to second image'
35 |     )
36 |     parser.add_argument(
37 |         '--out',
38 |         type=str,
39 |         required=True,
40 |         help='Path to output flow result'
41 |     )
42 |     FLAGS = parser.parse_args()
43 | 
44 |     # Verify arguments are valid
45 |     if not os.path.exists(FLAGS.input_a):
46 |         raise ValueError('image_a path must exist')
47 |     if not os.path.exists(FLAGS.input_b):
48 |         raise ValueError('image_b path must exist')
49 |     if not os.path.isdir(FLAGS.out):
50 |         raise ValueError('out directory must exist')
51 |     main()
52 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_css/test.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | from ..net import Mode
 4 | from .flownet_css import FlowNetCSS
 5 | 
 6 | FLAGS = None
 7 | 
 8 | 
 9 | def main():
10 |     # Create a new network
11 |     net = FlowNetCSS(mode=Mode.TEST)
12 | 
13 |     # Train on the data
14 |     net.test(
15 |         checkpoint='./checkpoints/FlowNetCSS/flownet-CSS.ckpt-0',
16 |         input_a_path=FLAGS.input_a,
17 |         input_b_path=FLAGS.input_b,
18 |         out_path=FLAGS.out,
19 |     )
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     parser = argparse.ArgumentParser()
24 |     parser.add_argument(
25 |         '--input_a',
26 |         type=str,
27 |         required=True,
28 |         help='Path to first image'
29 |     )
30 |     parser.add_argument(
31 |         '--input_b',
32 |         type=str,
33 |         required=True,
34 |         help='Path to second image'
35 |     )
36 |     parser.add_argument(
37 |         '--out',
38 |         type=str,
39 |         required=True,
40 |         help='Path to output flow result'
41 |     )
42 |     FLAGS = parser.parse_args()
43 | 
44 |     # Verify arguments are valid
45 |     if not os.path.exists(FLAGS.input_a):
46 |         raise ValueError('image_a path must exist')
47 |     if not os.path.exists(FLAGS.input_b):
48 |         raise ValueError('image_b path must exist')
49 |     if not os.path.isdir(FLAGS.out):
50 |         raise ValueError('out directory must exist')
51 |     main()
52 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/correlation.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | _correlation_ops = tf.load_op_library(
 4 |     tf.resource_loader.get_path_to_datafile("./ops/build/correlation.so"))
 5 | 
 6 | 
 7 | def correlation(input_a, input_b, kernel_size, max_displacement, stride_1, stride_2, padding):
 8 |     return _correlation_ops.correlation(input_a,
 9 |                                         input_b,
10 |                                         kernel_size,
11 |                                         max_displacement,
12 |                                         stride_1,
13 |                                         stride_2,
14 |                                         padding)
15 | 
16 | 
17 | @tf.RegisterGradient("Correlation")
18 | def _correlation_grad(corr_op, gradients):
19 |     kernel_size = corr_op.get_attr("kernel_size")
20 |     max_displacement = corr_op.get_attr("max_displacement")
21 |     stride_1 = corr_op.get_attr("stride_1")
22 |     stride_2 = corr_op.get_attr("stride_2")
23 |     pad = corr_op.get_attr("pad")
24 | 
25 |     corr_grads = _correlation_ops.correlation_grad(gradients,
26 |                                                    corr_op.inputs[0],
27 |                                                    corr_op.inputs[1],
28 |                                                    kernel_size,
29 |                                                    max_displacement,
30 |                                                    stride_1,
31 |                                                    stride_2,
32 |                                                    pad)
33 | 
34 |     # Return the gradients with respect to input_a and input_b
35 |     return corr_grads.backprops_a, corr_grads.backprops_b
36 | 


--------------------------------------------------------------------------------
/Codes/flownet2/corr.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import math
 4 | 
 5 | BATCH_SIZE = 8
 6 | HEIGHT = 30
 7 | WIDTH = 60
 8 | CHANNELS = 3
 9 | 
10 | NEIGHBORHOOD_SIZE = 41
11 | MAX_DISPLACEMENT = int(math.ceil(NEIGHBORHOOD_SIZE / 2.0))
12 | STRIDE_2 = 2
13 | 
14 | assert(STRIDE_2 <= NEIGHBORHOOD_SIZE)
15 | 
16 | # Define two feature maps
17 | fmA = tf.ones((BATCH_SIZE, HEIGHT, WIDTH, CHANNELS), dtype=tf.int32)
18 | fmB = tf.convert_to_tensor(np.random.randint(5, size=(BATCH_SIZE, HEIGHT, WIDTH, CHANNELS)), dtype=tf.int32)
19 | 
20 | depth = int(math.floor((2.0 * MAX_DISPLACEMENT + 1) / STRIDE_2) ** 2)
21 | 
22 | print('Output should be size:', (BATCH_SIZE, HEIGHT, WIDTH, depth))
23 | print('Striding at values: ', [e for e in range(-MAX_DISPLACEMENT + 1, MAX_DISPLACEMENT, STRIDE_2)])
24 | 
25 | def main():
26 |     out = []
27 |     for i in range(-MAX_DISPLACEMENT + 1, MAX_DISPLACEMENT, STRIDE_2): # height
28 |         for j in range(-MAX_DISPLACEMENT + 1, MAX_DISPLACEMENT, STRIDE_2): # width
29 |             padded_a = tf.pad(fmA, [[0,0], [0, abs(i)], [0, abs(j)], [0, 0]])
30 |             padded_b = tf.pad(fmB, [[0, 0], [abs(i), 0], [abs(j), 0], [0, 0]])
31 |             m = padded_a * padded_b
32 | 
33 |             height_start_idx = 0 if i <= 0 else i
34 |             height_end_idx = height_start_idx + HEIGHT
35 |             width_start_idx = 0 if j <= 0 else j
36 |             width_end_idx = width_start_idx + WIDTH
37 |             cut = m[:, height_start_idx:height_end_idx, width_start_idx:width_end_idx, :]
38 | 
39 |             final = tf.reduce_sum(cut, 3)
40 |             out.append(final)
41 |     corr = tf.stack(out, 3)
42 |     print('Output size: ', corr.shape)
43 | 
44 | 
45 | main()
46 | 


--------------------------------------------------------------------------------
/Codes/models.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | import unet
 4 | import pix2pix
 5 | 
 6 | from flownet2.src.flowlib import flow_to_image
 7 | from flownet2.src.flownet_sd.flownet_sd import FlowNetSD  # Ok
 8 | from flownet2.src.training_schedules import LONG_SCHEDULE
 9 | from flownet2.src.net import Mode
10 | 
11 | 
12 | slim = tf.contrib.slim
13 | 
14 | 
15 | def generator(inputs, layers, features_root=64, filter_size=3, pool_size=2, output_channel=3):
16 |     return unet.unet(inputs, layers, features_root, filter_size, pool_size, output_channel)
17 | 
18 | 
19 | def discriminator(inputs, num_filers=(128, 256, 512, 512)):
20 |     logits, end_points = pix2pix.pix2pix_discriminator(inputs, num_filers)
21 |     return logits, end_points['predictions']
22 | 
23 | 
24 | def flownet(input_a, input_b, height, width, reuse=None):
25 |     net = FlowNetSD(mode=Mode.TEST)
26 |     # train preds flow
27 |     input_a = (input_a + 1.0) / 2.0     # flownet receives image with color space in [0, 1]
28 |     input_b = (input_b + 1.0) / 2.0     # flownet receives image with color space in [0, 1]
29 |     # input size is 384 x 512
30 |     input_a = tf.image.resize_images(input_a, [height, width])
31 |     input_b = tf.image.resize_images(input_b, [height, width])
32 |     flows = net.model(
33 |         inputs={'input_a': input_a, 'input_b': input_b},
34 |         training_schedule=LONG_SCHEDULE,
35 |         trainable=False, reuse=reuse
36 |     )
37 |     return flows['flow']
38 | 
39 | 
40 | def initialize_flownet(sess, checkpoint):
41 |     flownet_vars = slim.get_variables_to_restore(include=['FlowNetSD'])
42 |     flownet_saver = tf.train.Saver(flownet_vars)
43 |     print('FlownetSD restore from {}!'.format(checkpoint))
44 |     flownet_saver.restore(sess, checkpoint)
45 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/downsample/downsample_kernel.cc:
--------------------------------------------------------------------------------
 1 | #define EIGEN_USE_THREADS
 2 | 
 3 | #include "downsample_kernel.h"
 4 | #include "tensorflow/core/framework/op_kernel.h"
 5 | #include "tensorflow/core/framework/register_types.h"
 6 | #include "tensorflow/core/framework/types.h"
 7 | #include "tensorflow/core/platform/types.h"
 8 | 
 9 | namespace tensorflow {
10 | 
11 | typedef Eigen::GpuDevice GPUDevice;
12 | 
13 | template <typename Device>
14 | class DownsampleKernel : public OpKernel {
15 |  public:
16 |   explicit DownsampleKernel(OpKernelConstruction* ctx) : OpKernel(ctx) {
17 |     // Get the size [height, width] tensor and verify its dimensions
18 |     OP_REQUIRES_OK(ctx, ctx->GetAttr("size", &size_));
19 |     OP_REQUIRES(ctx, size_.size() == 2, errors::InvalidArgument("size must be 2 dimensions"));
20 |   }
21 | 
22 |   void Compute(OpKernelContext* ctx) override {
23 |     // Get the input images and transforms and verify their dimensions
24 |     const Tensor& input_t = ctx->input(0);
25 |     OP_REQUIRES(ctx, input_t.dims() == 4,
26 |                 errors::InvalidArgument("Input images must have rank 4"));
27 | 
28 |     // Allocate the memory for the output
29 |     Tensor* output_t;
30 |     OP_REQUIRES_OK(ctx, ctx->allocate_output(
31 |         0, TensorShape({input_t.dim_size(0), size_[0], size_[1], input_t.dim_size(3)}), &output_t));
32 | 
33 |     // Perform flow augmentation
34 |     auto input = input_t.tensor<float, 4>();
35 |     auto output = output_t->tensor<float, 4>();
36 | 
37 |     Downsample(ctx->eigen_gpu_device(), input, output);
38 |   }
39 | 
40 |   private:
41 |     std::vector<int32> size_;
42 | };
43 | 
44 | REGISTER_KERNEL_BUILDER(Name("Downsample")
45 |                           .Device(DEVICE_GPU),
46 |                       DownsampleKernel<GPUDevice>)
47 | }  // end namespace tensorflow
48 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_cs/flownet_cs.py:
--------------------------------------------------------------------------------
 1 | from ..net import Net, Mode
 2 | from ..flownet_c.flownet_c import FlowNetC
 3 | from ..flownet_s.flownet_s import FlowNetS
 4 | from ..flow_warp import flow_warp
 5 | import tensorflow as tf
 6 | 
 7 | 
 8 | class FlowNetCS(Net):
 9 | 
10 |     def __init__(self, mode=Mode.TRAIN, debug=False):
11 |         self.net_c = FlowNetC(mode, debug)
12 |         self.net_s = FlowNetS(mode, debug)
13 |         super(FlowNetCS, self).__init__(mode=mode, debug=debug)
14 | 
15 |     def model(self, inputs, training_schedule, trainable=True):
16 |         with tf.variable_scope('FlowNetCS'):
17 |             # Forward pass through FlowNetC with weights frozen
18 |             net_c_predictions = self.net_c.model(inputs, training_schedule, trainable=True)
19 | 
20 |             # Perform flow warping (to move image B closer to image A based on flow prediction)
21 |             warped = flow_warp(inputs['input_b'], net_c_predictions['flow'])
22 | 
23 |             # Compute brightness error: sqrt(sum (input_a - warped)^2 over channels)
24 |             brightness_error = inputs['input_a'] - warped
25 |             brightness_error = tf.square(brightness_error)
26 |             brightness_error = tf.reduce_sum(brightness_error, keep_dims=True, axis=3)
27 |             brightness_error = tf.sqrt(brightness_error)
28 | 
29 |             # Gather all inputs to FlowNetS
30 |             inputs_to_s = {
31 |                 'input_a': inputs['input_a'],
32 |                 'input_b': inputs['input_b'],
33 |                 'warped': warped,
34 |                 'flow': net_c_predictions['flow'] * 0.05,
35 |                 'brightness_error': brightness_error,
36 |             }
37 | 
38 |             return self.net_s.model(inputs_to_s, training_schedule, trainable=trainable)
39 | 
40 |     def loss(self, flow, predictions):
41 |         return self.net_s.loss(flow, predictions)
42 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_css/flownet_css.py:
--------------------------------------------------------------------------------
 1 | from ..net import Net, Mode
 2 | from ..flownet_cs.flownet_cs import FlowNetCS
 3 | from ..flownet_s.flownet_s import FlowNetS
 4 | from ..flow_warp import flow_warp
 5 | import tensorflow as tf
 6 | 
 7 | 
 8 | class FlowNetCSS(Net):
 9 | 
10 |     def __init__(self, mode=Mode.TRAIN, debug=False):
11 |         self.net_cs = FlowNetCS(mode, debug)
12 |         self.net_s = FlowNetS(mode, debug)
13 |         super(FlowNetCSS, self).__init__(mode=mode, debug=debug)
14 | 
15 |     def model(self, inputs, training_schedule, trainable=True):
16 |         with tf.variable_scope('FlowNetCSS'):
17 |             # Forward pass through FlowNetCS with weights frozen
18 |             net_cs_predictions = self.net_cs.model(inputs, training_schedule, trainable=True)
19 | 
20 |             # Perform flow warping (to move image B closer to image A based on flow prediction)
21 |             warped = flow_warp(inputs['input_b'], net_cs_predictions['flow'])
22 | 
23 |             # Compute brightness error: sqrt(sum (input_a - warped)^2 over channels)
24 |             brightness_error = inputs['input_a'] - warped
25 |             brightness_error = tf.square(brightness_error)
26 |             brightness_error = tf.reduce_sum(brightness_error, keep_dims=True, axis=3)
27 |             brightness_error = tf.sqrt(brightness_error)
28 | 
29 |             # Gather all inputs to FlowNetS
30 |             inputs_to_s = {
31 |                 'input_a': inputs['input_a'],
32 |                 'input_b': inputs['input_b'],
33 |                 'warped': warped,
34 |                 'flow': net_cs_predictions['flow'] * 0.05,
35 |                 'brightness_error': brightness_error,
36 |             }
37 | 
38 |             return self.net_s.model(inputs_to_s, training_schedule, trainable=trainable)
39 | 
40 |     def loss(self, flow, predictions):
41 |         return self.net_s.loss(flow, predictions)
42 | 


--------------------------------------------------------------------------------
/Codes/unet.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.contrib.layers import conv2d, max_pool2d, conv2d_transpose
 3 | 
 4 | 
 5 | def unet(inputs, layers, features_root=64, filter_size=3, pool_size=2, output_channel=1):
 6 |     """
 7 |     :param inputs: input tensor, shape[None, height, width, channel]
 8 |     :param layers: number of layers
 9 |     :param features_root: number of features in the first layer
10 |     :param filter_size: size of each conv layer
11 |     :param pool_size:  size of each max pooling layer
12 |     :param output_channel:  number of channel for output tensor
13 |     :return: a tensor, shape[None, height, width, output_channel]
14 |     """
15 | 
16 |     in_node = inputs
17 |     conv = []
18 |     for layer in range(0, layers):
19 |         features = 2**layer*features_root
20 | 
21 |         conv1 = conv2d(inputs=in_node, num_outputs=features, kernel_size=filter_size)
22 |         conv2 = conv2d(inputs=conv1, num_outputs=features, kernel_size=filter_size)
23 |         conv.append(conv2)
24 | 
25 |         if layer < layers - 1:
26 |             in_node = max_pool2d(inputs=conv2, kernel_size=pool_size, padding='SAME')
27 |             # in_node = conv2d(inputs=conv2, num_outputs=features, kernel_size=filter_size, stride=2)
28 | 
29 |     in_node = conv[-1]
30 | 
31 |     for layer in range(layers-2, -1, -1):
32 |         features = 2**(layer+1)*features_root
33 | 
34 |         h_deconv = conv2d_transpose(inputs=in_node, num_outputs=features//2, kernel_size=pool_size, stride=pool_size)
35 |         h_deconv_concat = tf.concat([conv[layer], h_deconv], axis=3)
36 | 
37 |         conv1 = conv2d(inputs=h_deconv_concat, num_outputs=features//2, kernel_size=filter_size)
38 |         in_node = conv2d(inputs=conv1, num_outputs=features//2, kernel_size=filter_size)
39 | 
40 |     output = conv2d(inputs=in_node, num_outputs=output_channel, kernel_size=filter_size, activation_fn=None)
41 |     output = tf.tanh(output)
42 |     return output
43 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/utils.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | # Thanks, https://github.com/tensorflow/tensorflow/issues/4079
 5 | def LeakyReLU(x, leak=0.1, name="lrelu"):
 6 |     with tf.variable_scope(name):
 7 |         f1 = 0.5 * (1.0 + leak)
 8 |         f2 = 0.5 * (1.0 - leak)
 9 |         return f1 * x + f2 * abs(x)
10 | 
11 | 
12 | def average_endpoint_error(labels, predictions):
13 |     """
14 |     Given labels and predictions of size (N, H, W, 2), calculates average endpoint error:
15 |         sqrt[sum_across_channels{(X - Y)^2}]
16 |     """
17 |     num_samples = predictions.shape.as_list()[0]
18 |     with tf.name_scope(None, "average_endpoint_error", (predictions, labels)) as scope:
19 |         predictions = tf.to_float(predictions)
20 |         labels = tf.to_float(labels)
21 |         predictions.get_shape().assert_is_compatible_with(labels.get_shape())
22 | 
23 |         squared_difference = tf.square(tf.subtract(predictions, labels))
24 |         # sum across channels: sum[(X - Y)^2] -> N, H, W, 1
25 |         loss = tf.reduce_sum(squared_difference, 3, keep_dims=True)
26 |         loss = tf.sqrt(loss)
27 |         return tf.reduce_sum(loss) / num_samples
28 | 
29 | 
30 | def pad(tensor, num=1):
31 |     """
32 |     Pads the given tensor along the height and width dimensions with `num` 0s on each side
33 |     """
34 |     return tf.pad(tensor, [[0, 0], [num, num], [num, num], [0, 0]], "CONSTANT")
35 | 
36 | 
37 | def antipad(tensor, num=1):
38 |     """
39 |     Performs a crop. "padding" for a deconvolutional layer (conv2d tranpose) removes
40 |     padding from the output rather than adding it to the input.
41 |     """
42 |     batch, h, w, c = tensor.get_shape().as_list()
43 |     # print(batch, h, w, c)
44 |     # print(type(batch), type(h), type(w), type(c))
45 |     # return tf.slice(tensor, begin=[0, num, num, 0], size=[batch, h - 2 * num, w - 2 * num, c])
46 |     return tensor[:, num: num + h - 2 * num, num: num + w - 2 * num, :]
47 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/flow_warp/flow_warp.cc:
--------------------------------------------------------------------------------
 1 | #define EIGEN_USE_THREADS
 2 | 
 3 | #include "flow_warp.h"
 4 | #include "tensorflow/core/framework/op_kernel.h"
 5 | #include "tensorflow/core/framework/register_types.h"
 6 | #include "tensorflow/core/framework/types.h"
 7 | #include "tensorflow/core/platform/types.h"
 8 | 
 9 | namespace tensorflow {
10 | typedef Eigen::GpuDevice GPUDevice;
11 | 
12 | template<typename Device>
13 | class FlowWarpKernel : public OpKernel {
14 |   public:
15 |     explicit FlowWarpKernel(OpKernelConstruction *ctx) : OpKernel(ctx) {}
16 | 
17 |     void Compute(OpKernelContext *ctx) override {
18 |       // Get the input image and flow and verify dimensions
19 |       const Tensor& input_t = ctx->input(0);
20 |       const Tensor& flow_t  = ctx->input(1);
21 | 
22 |       OP_REQUIRES(ctx, input_t.dims() == 4,
23 |                   errors::InvalidArgument("Input image must have rank 4"));
24 |       OP_REQUIRES(ctx, flow_t.dims() == 4,
25 |                   errors::InvalidArgument("Input flow must have rank 4"));
26 |       OP_REQUIRES(ctx,
27 |                   input_t.dim_size(0) == flow_t.dim_size(0) && input_t.dim_size(
28 |                     1) == flow_t.dim_size(1) && input_t.dim_size(2) == flow_t.dim_size(2),
29 |                   errors::InvalidArgument(
30 |                     "Input image and flow must have same N x H x W dimensions"));
31 | 
32 |       // Allocate the memory for the output
33 |       Tensor *output_t;
34 |       OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input_t.shape(), &output_t));
35 | 
36 |       // Perform flow augmentation
37 |       auto input  = input_t.tensor<float, 4>();
38 |       auto flow   = flow_t.tensor<float, 4>();
39 |       auto output = output_t->tensor<float, 4>();
40 | 
41 |       FlowWarp(ctx->eigen_gpu_device(), input, flow, output);
42 |     }
43 | };
44 | 
45 | REGISTER_KERNEL_BUILDER(Name("FlowWarp")
46 |                         .Device(DEVICE_GPU),
47 |                         FlowWarpKernel<GPUDevice>)
48 | } // end namespace tensorflow
49 | 


--------------------------------------------------------------------------------
/Codes/loss_functions.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | 
 5 | def flow_loss(gen_flows, gt_flows):
 6 |     print(gen_flows['flow'])
 7 |     return tf.reduce_mean(tf.abs(gen_flows['flow'] - gt_flows['flow']))
 8 | 
 9 | 
10 | def intensity_loss(gen_frames, gt_frames, l_num):
11 |     """
12 |     Calculates the sum of lp losses between the predicted and ground truth frames.
13 | 
14 |     @param gen_frames: The predicted frames at each scale.
15 |     @param gt_frames: The ground truth frames at each scale
16 |     @param l_num: 1 or 2 for l1 and l2 loss, respectively).
17 | 
18 |     @return: The lp loss.
19 |     """
20 |     return tf.reduce_mean(tf.abs((gen_frames - gt_frames) ** l_num))
21 | 
22 | 
23 | def gradient_loss(gen_frames, gt_frames, alpha):
24 |     """
25 |     Calculates the sum of GDL losses between the predicted and ground truth frames.
26 | 
27 |     @param gen_frames: The predicted frames at each scale.
28 |     @param gt_frames: The ground truth frames at each scale
29 |     @param alpha: The power to which each gradient term is raised.
30 | 
31 |     @return: The GDL loss.
32 |     """
33 |     # calculate the loss for each scale
34 |     # create filters [-1, 1] and [[1],[-1]] for diffing to the left and down respectively.
35 | 
36 |     channels = gen_frames.get_shape().as_list()[-1]
37 |     pos = tf.constant(np.identity(channels), dtype=tf.float32)     # 3 x 3
38 |     neg = -1 * pos
39 |     filter_x = tf.expand_dims(tf.stack([neg, pos]), 0)  # [-1, 1]
40 |     filter_y = tf.stack([tf.expand_dims(pos, 0), tf.expand_dims(neg, 0)])  # [[1],[-1]]
41 |     strides = [1, 1, 1, 1]  # stride of (1, 1)
42 |     padding = 'SAME'
43 | 
44 |     gen_dx = tf.abs(tf.nn.conv2d(gen_frames, filter_x, strides, padding=padding))
45 |     gen_dy = tf.abs(tf.nn.conv2d(gen_frames, filter_y, strides, padding=padding))
46 |     gt_dx = tf.abs(tf.nn.conv2d(gt_frames, filter_x, strides, padding=padding))
47 |     gt_dy = tf.abs(tf.nn.conv2d(gt_frames, filter_y, strides, padding=padding))
48 | 
49 |     grad_diff_x = tf.abs(gt_dx - gen_dx)
50 |     grad_diff_y = tf.abs(gt_dy - gen_dy)
51 | 
52 |     # condense into one tensor and avg
53 |     return tf.reduce_mean(grad_diff_x ** alpha + grad_diff_y ** alpha)
54 | 
55 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/correlation/pad.cu.cc:
--------------------------------------------------------------------------------
 1 | #if GOOGLE_CUDA
 2 | 
 3 | #define EIGEN_USE_GPU
 4 | 
 5 | #include <stdio.h>
 6 | #include <iostream>
 7 | 
 8 | #include "pad.h"
 9 | #include "tensorflow/core/util/cuda_kernel_helper.h"
10 | 
11 | namespace tensorflow {
12 | typedef Eigen::GpuDevice GPUDevice;
13 | 
14 | __global__ void PadData(
15 |   const float *in,
16 |   int          in_widthheight,
17 |   int          in_width,
18 |   int          in_height,
19 |   int          out_width,
20 |   int          out_height,
21 |   int          channels,
22 |   int          padding,
23 |   float       *out) {
24 |   int xy = blockIdx.x * blockDim.x + threadIdx.x;
25 | 
26 |   int x  = xy % in_width;
27 |   int y  = xy / in_width;
28 |   int ch = blockIdx.y;
29 |   int n  = blockIdx.z;
30 | 
31 |   if (xy >= in_widthheight) {
32 |     out[((n * out_height + y) * out_width + x) * channels + ch] = 0.0;
33 |     return;
34 |   }
35 | 
36 |   float value = in[((n * in_height + y) * in_width + x) * channels + ch];
37 | 
38 |   __syncthreads();
39 | 
40 |   int xpad = x + padding;
41 |   int ypad = y + padding;
42 | 
43 |   out[((n * out_height + ypad) * out_width + xpad) * channels + ch] = value;
44 | }
45 | 
46 | void Pad(const GPUDevice& device,
47 |          const float     *input,
48 |          int              batch_size,
49 |          int              input_height,
50 |          int              input_width,
51 |          int              input_channels,
52 |          int              output_height,
53 |          int              output_width,
54 |          float           *output) {
55 |   int  in_widthheight    = input_width * input_height;
56 |   int  threads_per_block = 16;
57 |   dim3 totalBlocks((in_widthheight - 1) / threads_per_block + 1, input_channels, batch_size);
58 | 
59 |   cudaMemset(output, 0, batch_size * output_height * output_width * input_channels * sizeof(float));
60 | 
61 |   int padding = (output_height - input_height) / 2;
62 | 
63 |   // LAUNCH KERNEL
64 |   PadData << < totalBlocks, threads_per_block, 0, device.stream() >> > (
65 |     input,
66 |     in_widthheight,
67 |     input_width,
68 |     input_height,
69 |     output_width,
70 |     output_height,
71 |     input_channels,
72 |     padding,
73 |     output);
74 | }
75 | }
76 | #endif // if GOOGLE_CUDA
77 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cc:
--------------------------------------------------------------------------------
 1 | #define EIGEN_USE_THREADS
 2 | 
 3 | #include "flow_warp.h"
 4 | #include "tensorflow/core/framework/op_kernel.h"
 5 | #include "tensorflow/core/framework/register_types.h"
 6 | #include "tensorflow/core/framework/types.h"
 7 | #include "tensorflow/core/platform/types.h"
 8 | 
 9 | namespace tensorflow {
10 | typedef Eigen::GpuDevice GPUDevice;
11 | 
12 | template<typename Device>
13 | class FlowWarpGradKernel : public OpKernel {
14 |   public:
15 |     explicit FlowWarpGradKernel(OpKernelConstruction *ctx) : OpKernel(ctx) {}
16 | 
17 |     void Compute(OpKernelContext *ctx) override {
18 |       // Get the input image and flow and verify dimensions
19 |       const Tensor& image_t = ctx->input(0);
20 |       const Tensor& flow_t  = ctx->input(1);
21 |       const Tensor& grad_t  = ctx->input(2);
22 | 
23 |       OP_REQUIRES(ctx, image_t.dims() == 4,
24 |                   errors::InvalidArgument("Input image must have rank 4"));
25 |       OP_REQUIRES(ctx, flow_t.dims() == 4,
26 |                   errors::InvalidArgument("Input flow must have rank 4"));
27 |       OP_REQUIRES(ctx,
28 |                   image_t.dim_size(0) == flow_t.dim_size(0) && image_t.dim_size(
29 |                     1) == flow_t.dim_size(1) && image_t.dim_size(2) == flow_t.dim_size(2),
30 |                   errors::InvalidArgument(
31 |                     "Input image and flow must have same N x H x W dimensions"));
32 | 
33 |       // Allocate the memory for the output
34 |       Tensor *image_grad_t;
35 |       Tensor *flow_grad_t;
36 |       OP_REQUIRES_OK(ctx, ctx->allocate_output(0, image_t.shape(), &image_grad_t));
37 |       OP_REQUIRES_OK(ctx, ctx->allocate_output(0, flow_t.shape(), &flow_grad_t));
38 | 
39 |       auto image      = image_t.tensor<float, 4>();
40 |       auto flow       = flow_t.tensor<float, 4>();
41 |       auto gradient   = grad_t.tensor<float, 4>();
42 |       auto image_grad = image_grad_t->tensor<float, 4>();
43 |       auto flow_grad  = flow_grad_t->tensor<float, 4>();
44 | 
45 |       FlowWarpGrad(ctx->eigen_gpu_device(),
46 |                    image,
47 |                    flow,
48 |                    gradient,
49 |                    image_grad,
50 |                    flow_grad);
51 |     }
52 | };
53 | 
54 | REGISTER_KERNEL_BUILDER(Name("FlowWarpGrad")
55 |                         .Device(DEVICE_GPU),
56 |                         FlowWarpGradKernel<GPUDevice>)
57 | } // end namespace tensorflow
58 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/correlation/correlation_op.cc:
--------------------------------------------------------------------------------
 1 | #include "tensorflow/core/framework/common_shape_fns.h"
 2 | #include "tensorflow/core/framework/op.h"
 3 | #include "tensorflow/core/framework/shape_inference.h"
 4 | 
 5 | namespace tensorflow {
 6 | using shape_inference::InferenceContext;
 7 | using shape_inference::ShapeHandle;
 8 | 
 9 | Status SetOutput(InferenceContext *c) {
10 |   ShapeHandle input_a, input_b, input;
11 | 
12 |   // Get shapes of both inputs and verify they are rank 4
13 |   TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_a));
14 |   TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 4, &input_b));
15 | 
16 |   // Verify inputs are same dimensions
17 |   TF_RETURN_IF_ERROR(c->Merge(input_a, input_b, &input));
18 | 
19 |   // Get the attributes
20 |   int kernel_size, max_displacement, stride_1, stride_2, pad;
21 |   TF_RETURN_IF_ERROR(c->GetAttr("kernel_size", &kernel_size));
22 |   TF_RETURN_IF_ERROR(c->GetAttr("max_displacement", &max_displacement));
23 |   TF_RETURN_IF_ERROR(c->GetAttr("stride_1", &stride_1));
24 |   TF_RETURN_IF_ERROR(c->GetAttr("stride_2", &stride_2));
25 |   TF_RETURN_IF_ERROR(c->GetAttr("pad", &pad));
26 | 
27 |   // Get dimensions of input (already padded)
28 |   int64 batch         = c->Value(c->Dim(input, 0));
29 |   int64 input_height  = c->Value(c->Dim(input, 1));
30 |   int64 input_width   = c->Value(c->Dim(input, 2));
31 |   int64 padded_height = input_height + 2 * pad;
32 |   int64 padded_width  = input_width + 2 * pad;
33 | 
34 |   // The size of unreachable border region on each side
35 |   int kernel_radius = (kernel_size - 1) / 2;
36 |   int border_size   = max_displacement + kernel_radius;
37 | 
38 |   // Calculate the output dimensions
39 |   int64 output_height = (int64)ceil((float)(padded_height - border_size * 2) / (float)stride_1);
40 |   int64 output_width  = (int64)ceil((float)(padded_width - border_size * 2) / (float)stride_1);
41 | 
42 |   // TODO: Verify output size >= 1
43 | 
44 |   int   neighborhood_grid_radius = max_displacement / stride_2;
45 |   int   neighborhood_grid_width  = neighborhood_grid_radius * 2 + 1;
46 |   int64 output_channels          = neighborhood_grid_width * neighborhood_grid_width;
47 | 
48 |   // Set output shape
49 |   c->set_output(0, c->MakeShape({ batch, output_height, output_width, output_channels }));
50 |   return Status::OK();
51 | }
52 | 
53 | REGISTER_OP("Correlation")
54 | .Input("input_a: float32")
55 | .Input("input_b: float32")
56 | .Attr("kernel_size: int")
57 | .Attr("max_displacement: int")
58 | .Attr("stride_1: int")
59 | .Attr("stride_2: int")
60 | .Attr("pad: int")
61 | .Output("output: float32")
62 | .SetShapeFn(SetOutput);
63 | 
64 | REGISTER_OP("CorrelationGrad")
65 | .Input("gradients: float32")
66 | .Input("input_a: float32")
67 | .Input("input_b: float32")
68 | .Attr("kernel_size: int")
69 | .Attr("max_displacement: int")
70 | .Attr("stride_1: int")
71 | .Attr("stride_2: int")
72 | .Attr("pad: int")
73 | .Output("backprops_a: float32")
74 | .Output("backprops_b: float32")
75 | .SetShapeFn([](InferenceContext *c) {
76 |     // Output gradients should be the same dimensions as the inputs
77 |     ShapeHandle out;
78 |     TF_RETURN_IF_ERROR(c->Merge(c->input(1), c->input(2), &out));
79 |     c->set_output(0, out);
80 |     c->set_output(1, out);
81 |     return Status::OK();
82 |   });
83 | } // namespace tensorflow
84 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/correlation/correlation_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef FLOWNET_CORRELATION_H_
 2 | #define FLOWNET_CORRELATION_H_
 3 | 
 4 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 5 | #include "tensorflow/core/framework/tensor_types.h"
 6 | #include "tensorflow/core/platform/types.h"
 7 | 
 8 | namespace tensorflow {
 9 | typedef Eigen::GpuDevice GPUDevice;
10 | 
11 | void Correlation(const GPUDevice& device,
12 |                  const float     *input_a,
13 |                  const float     *input_b,
14 |                  const int        batch_size,
15 |                  const int        out_height,
16 |                  const int        out_width,
17 |                  const int        out_channels,
18 |                  const int        out_count,
19 |                  const int        in_height_padded,
20 |                  const int        in_width_padded,
21 |                  const int        in_channels,
22 |                  int              max_displacement,
23 |                  int              neighborhood_grid_radius,
24 |                  int              neighborhood_grid_width,
25 |                  int              kernel_radius,
26 |                  int              kernel_size,
27 |                  int              stride_1,
28 |                  int              stride_2,
29 |                  float           *output);
30 | 
31 | 
32 | void CorrelationGradA(const GPUDevice& device,
33 |                       const int        batch_size,
34 |                       const int        out_width,
35 |                       const int        out_height,
36 |                       const int        out_channels,
37 |                       const int        max_displacement,
38 |                       const int        neighborhood_grid_radius,
39 |                       const int        neighborhood_grid_width,
40 |                       const int        kernel_radius,
41 |                       const int        stride_1,
42 |                       const int        stride_2,
43 |                       const int        in_width,
44 |                       const int        in_height,
45 |                       const int        padded_in_width,
46 |                       const int        padded_in_height,
47 |                       const int        in_channels,
48 |                       const int        in_count_per_sample,
49 |                       const int        pad,
50 |                       const float     *input_b,
51 |                       const float     *gradient,
52 |                       float           *output_a_gradient);
53 | 
54 | void CorrelationGradB(const GPUDevice& device,
55 |                       const int        batch_size,
56 |                       const int        out_width,
57 |                       const int        out_height,
58 |                       const int        out_channels,
59 |                       const int        max_displacement,
60 |                       const int        neighborhood_grid_radius,
61 |                       const int        neighborhood_grid_width,
62 |                       const int        kernel_radius,
63 |                       const int        stride_1,
64 |                       const int        stride_2,
65 |                       const int        in_width,
66 |                       const int        in_height,
67 |                       const int        padded_in_width,
68 |                       const int        padded_in_height,
69 |                       const int        in_channels,
70 |                       const int        in_count_per_sample,
71 |                       const int        pad,
72 |                       const float     *input_a,
73 |                       const float     *gradient,
74 |                       float           *output_b_gradient);
75 | } // end namespace tensorflow
76 | 
77 | #endif  // FLOWNET_CORRELATION_H_
78 | 


--------------------------------------------------------------------------------
/Codes/training_hyper_params/hyper_params.ini:
--------------------------------------------------------------------------------
  1 | [ped2]
  2 | # for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively)
  3 | L_NUM = 2
  4 | # the power to which each gradient term is raised in GDL loss
  5 | ALPHA_NUM = 1
  6 | # the percentage of the adversarial loss to use in the combined loss
  7 | LAM_ADV = 0.05
  8 | # the percentage of the lp loss to use in the combined loss
  9 | LAM_LP = 1
 10 | # the percentage of the GDL loss to use in the combined loss
 11 | LAM_GDL = 1
 12 | # the percentage of the different frame loss
 13 | LAM_FLOW = 2
 14 | 
 15 | # For gray scale video, such as Ped2 and Ped1, learning rate of G and D star from 1e-4 and 1e-5, respectively.
 16 | LRATE_G = [0.0001, 0.00001]
 17 | LRATE_G_BOUNDARIES = [7000]
 18 | 
 19 | LRATE_D = [0.00001, 0.000001]
 20 | LRATE_D_BOUNDARIES = [7000]
 21 | 
 22 | [ped1]
 23 | # for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively)
 24 | L_NUM = 2
 25 | # the power to which each gradient term is raised in GDL loss
 26 | ALPHA_NUM = 1
 27 | # the percentage of the adversarial loss to use in the combined loss
 28 | LAM_ADV = 0.05
 29 | # the percentage of the lp loss to use in the combined loss
 30 | LAM_LP = 1
 31 | # the percentage of the GDL loss to use in the combined loss
 32 | LAM_GDL = 1
 33 | # the percentage of the different frame loss, LAM_FLOW = 2 is also ok, but LAM_FLOW = 0.01 is slightly better.
 34 | LAM_FLOW = 0.01
 35 | 
 36 | # For gray scale video, such as Ped2 and Ped1, learning rate of G and D star from 1e-4 and 1e-5, respectively.
 37 | LRATE_G = [0.0001, 0.00001]
 38 | LRATE_G_BOUNDARIES = [40000]
 39 | 
 40 | LRATE_D = [0.00001, 0.000001]
 41 | LRATE_D_BOUNDARIES = [40000]
 42 | 
 43 | 
 44 | [avenue]
 45 | # for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively)
 46 | L_NUM = 2
 47 | # the power to which each gradient term is raised in GDL loss
 48 | ALPHA_NUM = 1
 49 | # the percentage of the adversarial loss to use in the combined loss
 50 | LAM_ADV = 0.05
 51 | # the percentage of the lp loss to use in the combined loss,
 52 | # we found in smaller lp is slightly better in avenue, but not too much difference
 53 | # LAM_LP = 1 is 84.9, LAM_LP = 0.001 may arrive to 85.1
 54 | LAM_LP = 0.001
 55 | # the percentage of the GDL loss to use in the combined loss
 56 | LAM_GDL = 1
 57 | # the percentage of the different frame loss
 58 | LAM_FLOW = 2
 59 | 
 60 | # For rgb color scale video, such as Ped2 and Ped1, learning rate of G and D star from 2e-4 and 2e-5, respectively.
 61 | LRATE_G = [0.0002, 0.00002]
 62 | LRATE_G_BOUNDARIES = [100000]
 63 | 
 64 | LRATE_D = [0.00002, 0.000002]
 65 | LRATE_D_BOUNDARIES = [100000]
 66 | 
 67 | 
 68 | [shanghaitech]
 69 | # for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively)
 70 | L_NUM = 2
 71 | # the power to which each gradient term is raised in GDL loss
 72 | ALPHA_NUM = 1
 73 | # the percentage of the adversarial loss to use in the combined loss
 74 | LAM_ADV = 0.05
 75 | # the percentage of the lp loss to use in the combined loss
 76 | LAM_LP = 1
 77 | # the percentage of the GDL loss to use in the combined loss
 78 | LAM_GDL = 1
 79 | # the percentage of the different frame loss
 80 | LAM_FLOW = 2
 81 | 
 82 | # For rgb color scale video, such as Ped2 and Ped1, learning rate of G and D star from 2e-4 and 2e-5, respectively.
 83 | LRATE_G = [0.0002, 0.00002]
 84 | LRATE_G_BOUNDARIES = [50000]
 85 | 
 86 | LRATE_D = [0.00002, 0.000002]
 87 | LRATE_D_BOUNDARIES = [50000]
 88 | 
 89 | 
 90 | [toydata]
 91 | # for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively)
 92 | L_NUM = 2
 93 | # the power to which each gradient term is raised in GDL loss
 94 | ALPHA_NUM = 1
 95 | # the percentage of the adversarial loss to use in the combined loss
 96 | LAM_ADV = 0.05
 97 | # the percentage of the lp loss to use in the combined loss
 98 | LAM_LP = 1
 99 | # the percentage of the GDL loss to use in the combined loss
100 | LAM_GDL = 1
101 | # the percentage of the different frame loss
102 | LAM_FLOW = 2
103 | 
104 | LRATE_G = [0.0002, 0.00002]
105 | LRATE_G_BOUNDARIES = [5000]
106 | 
107 | LRATE_D = [0.00002, 0.000002]
108 | LRATE_D_BOUNDARIES = [5000]
109 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/preprocessing/preprocessing.cc:
--------------------------------------------------------------------------------
 1 | #include "tensorflow/core/framework/common_shape_fns.h"
 2 | #include "tensorflow/core/framework/op.h"
 3 | #include "tensorflow/core/framework/shape_inference.h"
 4 | 
 5 | namespace tensorflow {
 6 | using shape_inference::InferenceContext;
 7 | using shape_inference::ShapeHandle;
 8 | using shape_inference::DimensionHandle;
 9 | 
10 | Status SetOutputToSizedImage(InferenceContext *c) {
11 |   ShapeHandle input;
12 | 
13 |   TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
14 |   DimensionHandle batch    = c->Dim(input, 0);
15 |   DimensionHandle    depth = c->Dim(input, 3);
16 |   std::vector<int32> crop_;
17 |   c->GetAttr("crop", &crop_);
18 |   DimensionHandle height = c->MakeDim(crop_[0]);
19 |   DimensionHandle width  = c->MakeDim(crop_[1]);
20 |   c->set_output(0, c->MakeShape({ batch, height, width, depth }));
21 |   return Status::OK();
22 | }
23 | 
24 | REGISTER_OP("DataAugmentation")
25 | .Input("image_a: float32")
26 | .Input("image_b: float32")
27 | .Input("global_step: int64")
28 | .Attr("crop: list(int) >= 2")
29 | .Attr("params_a_name: list(string)")
30 | .Attr("params_a_rand_type: list(string)")
31 | .Attr("params_a_exp: list(bool)")
32 | .Attr("params_a_mean: list(float)")
33 | .Attr("params_a_spread: list(float)")
34 | .Attr("params_a_prob: list(float)")
35 | .Attr("params_a_coeff_schedule: list(float)")
36 | .Attr("params_b_name: list(string)")
37 | .Attr("params_b_rand_type: list(string)")
38 | .Attr("params_b_exp: list(bool)")
39 | .Attr("params_b_mean: list(float)")
40 | .Attr("params_b_spread: list(float)")
41 | .Attr("params_b_prob: list(float)")
42 | .Attr("params_b_coeff_schedule: list(float)")
43 | .Output("aug_image_a: float32")
44 | .Output("aug_image_b: float32")
45 | .Output("transforms_from_a: float32")
46 | .Output("transforms_from_b: float32")
47 | .SetShapeFn([](InferenceContext *c) {
48 |     // Verify input A and input B both have 4 dimensions
49 |     ShapeHandle input_shape_a, input_shape_b;
50 |     TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape_a));
51 |     TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 4, &input_shape_b));
52 | 
53 |     // TODO: Verify params vectors all have the same length
54 | 
55 |     // TODO: Move this out of here and into Compute
56 |     // Verify input A and input B are the same shape
57 |     DimensionHandle batch_size, unused;
58 |     TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input_shape_a, 0),
59 |                                     c->Value(c->Dim(input_shape_b, 0)),
60 |                                     &batch_size));
61 |     TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input_shape_a, 1),
62 |                                     c->Value(c->Dim(input_shape_b, 1)), &unused));
63 |     TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input_shape_a, 2),
64 |                                     c->Value(c->Dim(input_shape_b, 2)), &unused));
65 |     TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input_shape_a, 3),
66 |                                     c->Value(c->Dim(input_shape_b, 3)), &unused));
67 | 
68 |     // Get cropping dimensions
69 |     std::vector<int32>crop_;
70 |     TF_RETURN_IF_ERROR(c->GetAttr("crop", &crop_));
71 | 
72 |     // Reshape input shape to cropped shape
73 |     TF_RETURN_IF_ERROR(c->ReplaceDim(input_shape_a, 1, c->MakeDim(crop_[0]),
74 |                                      &input_shape_a));
75 |     TF_RETURN_IF_ERROR(c->ReplaceDim(input_shape_a, 2, c->MakeDim(crop_[1]),
76 |                                      &input_shape_a));
77 | 
78 |     // Set output images shapes
79 |     c->set_output(0, input_shape_a);
80 |     c->set_output(1, input_shape_a);
81 | 
82 |     // Set output spatial transforms shapes
83 |     c->set_output(2, c->MakeShape({ batch_size, 6 }));
84 |     c->set_output(3, c->MakeShape({ batch_size, 6 }));
85 | 
86 |     return Status::OK();
87 |   });
88 | 
89 | REGISTER_OP("FlowAugmentation")
90 | .Input("flows: float32")
91 | .Input("transforms_from_a: float32")
92 | .Input("transforms_from_b: float32")
93 | .Attr("crop: list(int) >= 2")
94 | .Output("transformed_flows: float32")
95 | .SetShapeFn(SetOutputToSizedImage);
96 | } // namespace tensorflow
97 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/flow_warp/flow_warp.cu.cc:
--------------------------------------------------------------------------------
  1 | #if GOOGLE_CUDA
  2 | 
  3 | #define EIGEN_USE_GPU
  4 | 
  5 | #include <stdio.h>
  6 | #include <iostream>
  7 | 
  8 | #include "flow_warp.h"
  9 | #include "tensorflow/core/framework/register_types.h"
 10 | #include "tensorflow/core/framework/types.h"
 11 | #include "tensorflow/core/framework/tensor_types.h"
 12 | #include "tensorflow/core/platform/types.h"
 13 | #include "tensorflow/core/util/cuda_kernel_helper.h"
 14 | 
 15 | #define RA_TILE 32
 16 | #define RA_ROWS 8
 17 | 
 18 | namespace tensorflow {
 19 | typedef Eigen::GpuDevice GPUDevice;
 20 | 
 21 | __global__ void FlowWarpKernel(
 22 |   const float *image,
 23 |   const float *flow,
 24 |   float       *warped,
 25 |   const int    batch_size,
 26 |   const int    channels,
 27 |   const int    cblocks,
 28 |   const int    width,
 29 |   const int    wblocks,
 30 |   const int    height,
 31 |   const int    width_height) {
 32 |   int y = blockIdx.y;
 33 |   int n = blockIdx.z;
 34 | 
 35 |   __shared__ float x2_buf[FW_TILE_X], y2_buf[FW_TILE_X];
 36 |   __shared__ float buffer[FW_TILE_C][FW_TILE_X + 1];
 37 | 
 38 |   int x;
 39 |   int c;
 40 | 
 41 |   x = blockIdx.x * FW_TILE_X + threadIdx.x;
 42 | 
 43 |   if ((threadIdx.y == 0) && (x < width)) {
 44 |     const int idx = ((n * height + y) * width + x) * 2;
 45 |     x2_buf[threadIdx.x] = float(x) + flow[idx];
 46 |     y2_buf[threadIdx.x] = float(y) + flow[idx + 1];
 47 |   }
 48 | 
 49 |   __syncthreads();
 50 | 
 51 |   float x2 = x2_buf[threadIdx.y];
 52 |   float y2 = y2_buf[threadIdx.y];
 53 | 
 54 |   int ix2_L = int(x2);
 55 |   int iy2_T = int(y2);
 56 |   int ix2_R = min(ix2_L + 1, width - 1);
 57 |   int iy2_B = min(iy2_T + 1, height - 1);
 58 | 
 59 |   int off_TL = ((n * height + iy2_T) * width + ix2_L) * channels;
 60 |   int off_TR = ((n * height + iy2_T) * width + ix2_R) * channels;
 61 |   int off_BL = ((n * height + iy2_B) * width + ix2_L) * channels;
 62 |   int off_BR = ((n * height + iy2_B) * width + ix2_R) * channels;
 63 | 
 64 |   float alpha   = x2 - ix2_L;
 65 |   float beta    = y2 - iy2_T;
 66 |   float coeffTL = (1 - alpha) * (1 - beta);
 67 |   float coeffTR = alpha * (1 - beta);
 68 |   float coeffBL = (1 - alpha) * beta;
 69 |   float coeffBR = alpha * beta;
 70 | 
 71 |   for (int cb = 0; cb < cblocks; cb++) {
 72 |     __syncthreads();
 73 | 
 74 |     buffer[threadIdx.y][threadIdx.x] = 0.0;
 75 | 
 76 |     __syncthreads();
 77 | 
 78 |     c = cb * FW_TILE_C + threadIdx.x;
 79 | 
 80 |     if ((x2 >= 0) && (y2 >= 0) && (x2 < width) && (y2 < height) && (c < channels)) {
 81 |       buffer[threadIdx.y][threadIdx.x] = // buffer [x][c]
 82 |                                          coeffTL * image[off_TL + c] +
 83 |                                          coeffTR * image[off_TR + c] +
 84 |                                          coeffBL * image[off_BL + c] +
 85 |                                          coeffBR * image[off_BR + c];
 86 |     }
 87 | 
 88 |     __syncthreads();
 89 | 
 90 |     c = cb * FW_TILE_C + threadIdx.y;
 91 |     x = blockIdx.x * FW_TILE_X + threadIdx.x;
 92 | 
 93 |     if ((c < channels) && (x < width)) {
 94 |       warped[((n * height + y) * width + x) * channels + c] = buffer[threadIdx.x][threadIdx.y];
 95 |     }
 96 |   }
 97 | }
 98 | 
 99 | void FlowWarp(const GPUDevice& device,
100 |               typename TTypes<float, 4>::ConstTensor input,
101 |               typename TTypes<float, 4>::ConstTensor flow,
102 |               typename TTypes<float, 4>::Tensor output) {
103 |   const int batch_size = input.dimension(0);
104 |   const int height     = input.dimension(1);
105 |   const int width      = input.dimension(2);
106 |   const int channels   = input.dimension(3);
107 | 
108 |   const int width_height = width * height;
109 |   int  wblocks           = ((width - 1) / FW_TILE_X + 1);
110 |   int  cblocks           = ((channels - 1) / FW_TILE_C + 1);
111 |   dim3 warpThreads(FW_TILE_X, FW_TILE_C);
112 |   dim3 warpBlocks(wblocks, height, batch_size);
113 | 
114 |   cudaMemset(output.data(), 0, batch_size * height * width * 2 * sizeof(float));
115 | 
116 |   FlowWarpKernel << < warpBlocks, warpThreads, 0, device.stream() >> > (
117 |     input.data(),
118 |     flow.data(),
119 |     output.data(),
120 |     batch_size,
121 |     channels,
122 |     cblocks,
123 |     width,
124 |     wblocks,
125 |     height,
126 |     width_height);
127 | }
128 | } // end namespace tensorflow
129 | 
130 | #endif  // GOOGLE_CUDA
131 | 


--------------------------------------------------------------------------------
/Codes/flownet2/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile
 2 | 
 3 | TF_INC = `python -c "import tensorflow; print(tensorflow.sysconfig.get_include())"`
 4 | 
 5 | ifndef CUDA_HOME
 6 |     CUDA_HOME := /usr/local/cuda
 7 | endif
 8 | 
 9 | CC        = gcc -O2 -pthread
10 | CXX       = g++
11 | GPUCC     = nvcc
12 | CFLAGS    = -std=c++11 -I$(TF_INC) -I"$(CUDA_HOME)/include" -DGOOGLE_CUDA=1
13 | GPUCFLAGS = -c
14 | LFLAGS    = -pthread -shared -fPIC
15 | GPULFLAGS = -x cu -Xcompiler -fPIC
16 | CGPUFLAGS = -L$(CUDA_HOME)/lib -L$(CUDA_HOME)/lib64 -lcudart
17 | 
18 | OUT_DIR   = src/ops/build
19 | PREPROCESSING_SRC = "src/ops/preprocessing/preprocessing.cc" "src/ops/preprocessing/kernels/flow_augmentation.cc" "src/ops/preprocessing/kernels/augmentation_base.cc" "src/ops/preprocessing/kernels/data_augmentation.cc"
20 | GPU_SRC_DATA_AUG  	= src/ops/preprocessing/kernels/data_augmentation.cu.cc
21 | GPU_SRC_FLOW     	= src/ops/preprocessing/kernels/flow_augmentation_gpu.cu.cc
22 | GPU_PROD_DATA_AUG 	= $(OUT_DIR)/data_augmentation.o
23 | GPU_PROD_FLOW    	= $(OUT_DIR)/flow_augmentation_gpu.o
24 | PREPROCESSING_PROD	= $(OUT_DIR)/preprocessing.so
25 | 
26 | DOWNSAMPLE_SRC = "src/ops/downsample/downsample_kernel.cc" "src/ops/downsample/downsample_op.cc"
27 | GPU_SRC_DOWNSAMPLE  = src/ops/downsample/downsample_kernel_gpu.cu.cc
28 | GPU_PROD_DOWNSAMPLE = $(OUT_DIR)/downsample_kernel_gpu.o
29 | DOWNSAMPLE_PROD 	= $(OUT_DIR)/downsample.so
30 | 
31 | CORRELATION_SRC = "src/ops/correlation/correlation_kernel.cc" "src/ops/correlation/correlation_grad_kernel.cc" "src/ops/correlation/correlation_op.cc"
32 | GPU_SRC_CORRELATION  = src/ops/correlation/correlation_kernel.cu.cc
33 | GPU_SRC_CORRELATION_GRAD  = src/ops/correlation/correlation_grad_kernel.cu.cc
34 | GPU_SRC_PAD = src/ops/correlation/pad.cu.cc
35 | GPU_PROD_CORRELATION = $(OUT_DIR)/correlation_kernel_gpu.o
36 | GPU_PROD_CORRELATION_GRAD = $(OUT_DIR)/correlation_grad_kernel_gpu.o
37 | GPU_PROD_PAD = $(OUT_DIR)/correlation_pad_gpu.o
38 | CORRELATION_PROD 	= $(OUT_DIR)/correlation.so
39 | 
40 | FLOWWARP_SRC = "src/ops/flow_warp/flow_warp_op.cc" "src/ops/flow_warp/flow_warp.cc" "src/ops/flow_warp/flow_warp_grad.cc"
41 | GPU_SRC_FLOWWARP = "src/ops/flow_warp/flow_warp.cu.cc"
42 | GPU_SRC_FLOWWARP_GRAD = "src/ops/flow_warp/flow_warp_grad.cu.cc"
43 | GPU_PROD_FLOWWARP = "$(OUT_DIR)/flow_warp_gpu.o"
44 | GPU_PROD_FLOWWARP_GRAD = "$(OUT_DIR)/flow_warp_grad_gpu.o"
45 | FLOWWARP_PROD = "$(OUT_DIR)/flow_warp.so"
46 | 
47 | ifeq ($(OS),Windows_NT)
48 |     detected_OS := Windows
49 | else
50 |     detected_OS := $(shell sh -c 'uname -s 2>/dev/null || echo not')
51 | endif
52 | ifeq ($(detected_OS),Darwin)  # Mac OS X
53 | 	CGPUFLAGS += -undefined dynamic_lookup
54 | endif
55 | ifeq ($(detected_OS),Linux)
56 | 	CFLAGS += -D_MWAITXINTRIN_H_INCLUDED -D_FORCE_INLINES -D__STRICT_ANSI__ -D_GLIBCXX_USE_CXX11_ABI=0
57 | endif
58 | 
59 | all: preprocessing downsample correlation flowwarp
60 | 
61 | preprocessing:
62 | 	$(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_DATA_AUG) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_DATA_AUG)
63 | 	$(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_FLOW) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_FLOW)
64 | 	$(CXX) -g $(CFLAGS)  $(PREPROCESSING_SRC) $(GPU_PROD_DATA_AUG) $(GPU_PROD_FLOW) $(LFLAGS) $(CGPUFLAGS) -o $(PREPROCESSING_PROD)
65 | 
66 | downsample:
67 | 	$(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_DOWNSAMPLE) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_DOWNSAMPLE)
68 | 	$(CXX) -g $(CFLAGS)  $(DOWNSAMPLE_SRC) $(GPU_PROD_DOWNSAMPLE) $(LFLAGS) $(CGPUFLAGS) -o $(DOWNSAMPLE_PROD)
69 | 
70 | correlation:
71 | 	$(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_CORRELATION) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_CORRELATION)
72 | 	$(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_CORRELATION_GRAD) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_CORRELATION_GRAD)
73 | 	$(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_PAD) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_PAD)
74 | 	$(CXX) -g $(CFLAGS)  $(CORRELATION_SRC) $(GPU_PROD_CORRELATION) $(GPU_PROD_CORRELATION_GRAD) $(GPU_PROD_PAD) $(LFLAGS) $(CGPUFLAGS) -o $(CORRELATION_PROD)
75 | 
76 | flowwarp:
77 | 	$(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_FLOWWARP) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_FLOWWARP)
78 | 	$(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_FLOWWARP_GRAD) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_FLOWWARP_GRAD)
79 | 	$(CXX) -g $(CFLAGS)  $(FLOWWARP_SRC) $(GPU_PROD_FLOWWARP) $(GPU_PROD_FLOWWARP_GRAD) $(LFLAGS) $(CGPUFLAGS) -o $(FLOWWARP_PROD)
80 | 
81 | clean:
82 | 	rm -f $(PREPROCESSING_PROD) $(GPU_PROD_FLOW) $(GPU_PROD_DATA_AUG) $(DOWNSAMPLE_PROD) $(GPU_PROD_DOWNSAMPLE)
83 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation_gpu.cu.cc:
--------------------------------------------------------------------------------
 1 | #if GOOGLE_CUDA
 2 | 
 3 | #define EIGEN_USE_GPU
 4 | 
 5 | #include <stdio.h>
 6 | #include <iostream>
 7 | 
 8 | #include "flow_augmentation.h"
 9 | #include "tensorflow/core/framework/register_types.h"
10 | #include "tensorflow/core/framework/types.h"
11 | #include "tensorflow/core/framework/tensor_types.h"
12 | #include "tensorflow/core/platform/types.h"
13 | #include "tensorflow/core/util/cuda_kernel_helper.h"
14 | 
15 | namespace tensorflow {
16 | typedef Eigen::GpuDevice GPUDevice;
17 | 
18 | inline __device__ __host__ int clamp(int f, int a, int b) {
19 |   return max(a, min(f, b));
20 | }
21 | 
22 | __global__ void FillFlowAugmentationKernel(
23 |   const int32 nthreads,
24 |   const float *flow_ptr,
25 |   const float *transforms_from_a,
26 |   const float *inv_transforms_from_b,
27 |   const int src_total_count, const int src_height, const int src_width,
28 |   const int batch_size, const int out_height,
29 |   const int out_width, float *output_ptr) {
30 |   CUDA_1D_KERNEL_LOOP(index, nthreads) {
31 |     const float x = (float)(index % out_width);
32 |     const float y = (float)((index / out_width) % out_height);
33 |     const int   n = (index / out_width / out_height);
34 | 
35 |     const int transformIdx = n * 6;
36 | 
37 |     // Apply transformation matrix applied to second image
38 |     const float xpos1 = x * transforms_from_a[transformIdx + 0]
39 |                         + y * transforms_from_a[transformIdx + 1]
40 |                         + transforms_from_a[transformIdx + 2];
41 |     const float ypos1 = x * transforms_from_a[transformIdx + 3]
42 |                         + y * transforms_from_a[transformIdx + 4]
43 |                         + transforms_from_a[transformIdx + 5];
44 | 
45 |     // Caffe, NKHW: ((n * K + k) * H + h) * W + w at point (n, k, h, w)
46 |     // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k)
47 |     const int srcXIdx =
48 |       ((n * src_height + (int)(ypos1 + 0.5)) * src_width + (int)(xpos1 + 0.5)) *
49 |       2 + 0;
50 |     const int srcYIdx = srcXIdx + 1;
51 | 
52 |     const float xpos2 = xpos1 + flow_ptr[clamp(srcXIdx, 0, src_total_count - 1)];
53 |     const float ypos2 = ypos1 + flow_ptr[clamp(srcYIdx, 0, src_total_count - 1)];
54 | 
55 |     // Apply inverse of the transformation matrix applied to first image
56 |     const float xpos3 = xpos2 * inv_transforms_from_b[transformIdx + 0]
57 |                         + ypos2 * inv_transforms_from_b[transformIdx + 1]
58 |                         + inv_transforms_from_b[transformIdx + 2];
59 |     const float ypos3 = xpos2 * inv_transforms_from_b[transformIdx + 3]
60 |                         + ypos2 * inv_transforms_from_b[transformIdx + 4]
61 |                         + inv_transforms_from_b[transformIdx + 5];
62 | 
63 |     output_ptr[((n * out_height + (int)y) * out_width + (int)x) * 2 + 0] = xpos3 -
64 |                                                                            x;
65 |     output_ptr[((n * out_height + (int)y) * out_width + (int)x) * 2 + 1] = ypos3 -
66 |                                                                            y;
67 |   }
68 | }
69 | 
70 | template<>
71 | void FillFlowAugmentation(const GPUDevice& device,
72 |                           typename TTypes<float, 4>::Tensor output,
73 |                           typename TTypes<float, 4>::ConstTensor flows,
74 |                           typename TTypes<const float, 2>::ConstTensor transforms_from_a,
75 |                           typename TTypes<const float, 2>::ConstTensor transforms_from_b) {
76 |   const int batch_size      = output.dimension(0);
77 |   const int out_height      = output.dimension(1);
78 |   const int out_width       = output.dimension(2);
79 |   const int depth           = 2;
80 |   const int total_count     = batch_size * out_height * out_width * depth;
81 |   const int src_total_count = flows.dimension(0) * flows.dimension(1) *
82 |                               flows.dimension(2) * flows.dimension(3);
83 | 
84 |   CudaLaunchConfig config = GetCudaLaunchConfig(total_count / 2, device);
85 | 
86 |   FillFlowAugmentationKernel << < config.block_count, config.thread_per_block, 0,
87 |     device.stream() >> > (
88 |     total_count / 2, flows.data(), transforms_from_a.data(),
89 |     transforms_from_b.data(),
90 |     src_total_count, flows.dimension(1), flows.dimension(2), batch_size,
91 |     out_height, out_width, output.data());
92 | }
93 | } // end namespace tensorflow
94 | 
95 | #endif  // GOOGLE_CUDA
96 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/downsample/downsample_kernel_gpu.cu.cc:
--------------------------------------------------------------------------------
  1 | #if GOOGLE_CUDA
  2 | 
  3 | #define EIGEN_USE_GPU
  4 | 
  5 | #include <stdio.h>
  6 | #include <iostream>
  7 | 
  8 | #include "downsample_kernel.h"
  9 | #include "tensorflow/core/framework/register_types.h"
 10 | #include "tensorflow/core/framework/types.h"
 11 | #include "tensorflow/core/framework/tensor_types.h"
 12 | #include "tensorflow/core/platform/types.h"
 13 | #include "tensorflow/core/util/cuda_kernel_helper.h"
 14 | 
 15 | #define CUDART_NAN_F            __int_as_float(0x7fffffff)
 16 | 
 17 | namespace tensorflow {
 18 | 
 19 | typedef Eigen::GpuDevice GPUDevice;
 20 | 
 21 | __global__ void DownsampleKernel(
 22 |     const int32 nthreads,
 23 |     const float* input_ptr,
 24 |     float* output_ptr,
 25 |     const int in_width,
 26 |     const int in_height,
 27 |     const int out_width,
 28 |     const int out_height,
 29 |     const int channels,
 30 |     const float width_scale,
 31 |     const float height_scale,
 32 |     const int wradius,
 33 |     const int hradius) {
 34 |         CUDA_1D_KERNEL_LOOP(index, nthreads) {
 35 |             const int c = index % channels;
 36 |             const int destx = (index / channels) % out_width;
 37 |             const int desty = (index / channels / out_width) % out_height;
 38 |             const int n = (index / channels / out_width) / out_height;
 39 | 
 40 |             const float srcx = ((float)destx / (float)(out_width - 1)) * (float)(in_width - 1);
 41 |             const float srcy = ((float)desty / (float)(out_height - 1)) * (float)(in_height - 1);
 42 | 
 43 |             const int isrcx = round(srcx);
 44 |             const int isrcy = round(srcy);
 45 | 
 46 |             float accum_value = 0;
 47 |             float accum_weight = 0;
 48 |             float accum_nan = 0;
 49 | 
 50 |             for (int dy = -hradius; dy <= hradius; dy++) {
 51 |                 int yoff = isrcy + dy;
 52 |                 //
 53 |                 for (int dx = -wradius; dx <= wradius; dx++) {
 54 |                     int xoff = isrcx + dx;
 55 | 
 56 |                     if (xoff >= 0 && yoff >= 0 && xoff < in_width && yoff < in_height) {
 57 |                         int idx = ((n * in_height + yoff) * in_width + xoff) * channels + c;
 58 |                         float sample = input_ptr[idx];
 59 |                         float weight = fmaxf(0.0f, 1.0f - (fabsf((float)xoff - srcx) / width_scale))
 60 |                                        * fmaxf(0.0f, 1.0f - (fabsf((float)yoff - srcy) / height_scale));
 61 |                         if (sample != sample) { // isnan
 62 |                             accum_nan += weight;
 63 |                             sample = 0;
 64 |                             weight = 0;
 65 |                         }
 66 |                         accum_value += sample * weight;
 67 |                         accum_weight += weight;
 68 |                     }
 69 |                 }
 70 |             }
 71 | 
 72 |             if (accum_nan / accum_weight > 0.5) {
 73 |                 output_ptr[index] = CUDART_NAN_F;
 74 |             } else {
 75 |                 output_ptr[index] = accum_value / accum_weight;
 76 |             }
 77 |         }
 78 | }
 79 | 
 80 | bool Downsample(const GPUDevice& device,
 81 |                 typename TTypes<float, 4>::ConstTensor input,
 82 |                 typename TTypes<float, 4>::Tensor output) {
 83 |     const int batch_size = output.dimension(0);
 84 |     const int out_height = output.dimension(1);
 85 |     const int out_width = output.dimension(2);
 86 |     const int out_channels = output.dimension(3);
 87 |     const int total_count = batch_size * out_height * out_width * out_channels;
 88 | 
 89 |     const int in_height = input.dimension(1);
 90 |     const int in_width = input.dimension(2);
 91 | 
 92 |     const float width_scale = (float)(in_width - 1) / (float)(out_width - 1);
 93 |     const float height_scale = (float)(in_height - 1) / (float)(out_height - 1);
 94 | 
 95 |     const int wradius = ceil(width_scale);
 96 |     const int hradius = ceil(height_scale);
 97 | 
 98 |     CudaLaunchConfig config = GetCudaLaunchConfig(total_count, device);
 99 |     DownsampleKernel<<<config.block_count, config.thread_per_block, 0,
100 |                         device.stream()>>>(total_count, input.data(), output.data(),
101 |                         in_width, in_height, out_width, out_height, out_channels,
102 |                         width_scale, height_scale, wradius, hradius);
103 |     return device.ok();
104 | }
105 | 
106 | }  // end namespace tensorflow
107 | 
108 | #endif  // GOOGLE_CUDA
109 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cu.cc:
--------------------------------------------------------------------------------
  1 | #if GOOGLE_CUDA
  2 | 
  3 | #define EIGEN_USE_GPU
  4 | 
  5 | #include "flow_warp.h"
  6 | 
  7 | namespace tensorflow {
  8 | typedef Eigen::GpuDevice GPUDevice;
  9 | 
 10 | __global__ void FlowWarpGradKernel(
 11 |   const float *image,
 12 |   float       *image_grad,
 13 |   const float *flow,
 14 |   float       *flow_grad,
 15 |   const float *gradient,
 16 |   int          batch_size,
 17 |   int          channels,
 18 |   int          cblocks,
 19 |   int          width,
 20 |   int          wblocks,
 21 |   int          height,
 22 |   int          widthheight) {
 23 |   int x = blockIdx.x * FW_TILE_X + threadIdx.x;
 24 | 
 25 |   if (x >= width) return;
 26 | 
 27 |   int y = blockIdx.y;
 28 |   int n = blockIdx.z;
 29 | 
 30 |   const int flow_idx = ((n * height + y) * width + x) * 2;
 31 |   float     x2       = float(x) + flow[flow_idx];
 32 |   float     y2       = float(y) + flow[flow_idx + 1];
 33 | 
 34 |   if ((x2 >= 0.f) && (y2 >= 0.f) && (x2 < width) && (y2 < height)) {
 35 |     int ix2_L = int(x2);
 36 |     int iy2_T = int(y2);
 37 |     int ix2_R = min(ix2_L + 1, width - 1);
 38 |     int iy2_B = min(iy2_T + 1, height - 1);
 39 | 
 40 |     float alpha = x2 - ix2_L;
 41 |     float beta  = y2 - iy2_T;
 42 | 
 43 |     for (int c = 0; c < channels; c++) {
 44 |       float warped_diff_value = gradient[((n * height + y) * width + x) * channels + c];
 45 |       atomicAdd(&image_grad[((n * height + iy2_T) * width + ix2_L) * channels + c],
 46 |                 warped_diff_value * (1 - alpha) * (1 - beta));
 47 |       atomicAdd(&image_grad[((n * height + iy2_T) * width + ix2_R) * channels + c],
 48 |                 warped_diff_value * alpha * (1 - beta));
 49 |       atomicAdd(&image_grad[((n * height + iy2_B) * width + ix2_L) * channels + c],
 50 |                 warped_diff_value * (1 - alpha) * beta);
 51 |       atomicAdd(&image_grad[((n * height + iy2_B) * width + ix2_R) * channels + c],
 52 |                 warped_diff_value * alpha * beta);
 53 |     }
 54 | 
 55 |     float gamma    = iy2_B - y2;
 56 |     float bot_diff = 0;
 57 | 
 58 |     for (int c = 0; c < channels; c++) {
 59 |       int   ch_off = (n * channels + c) * height;
 60 |       float temp   = 0;
 61 |       temp += gamma *
 62 |               (image[((n * height + iy2_T) * width + ix2_R) * channels + c] -
 63 |                image[((n * height + iy2_T) * width + ix2_L) * channels + c]);
 64 |       temp += (1 - gamma) *
 65 |               (image[((n * height + iy2_B) * width + ix2_R) * channels + c] -
 66 |                image[((n * height + iy2_B) * width + ix2_L) * channels + c]);
 67 | 
 68 |       bot_diff += gradient[((n * height + y) * width + x) * channels + c] * temp;
 69 |     }
 70 |     flow_grad[((n * height + y) * width + x) * 2] = bot_diff;
 71 | 
 72 |     gamma    = ix2_R - x2;
 73 |     bot_diff = 0;
 74 | 
 75 |     for (int c = 0; c < channels; c++) {
 76 |       float temp = 0;
 77 |       temp += gamma *
 78 |               (image[((n * height + iy2_B) * width + ix2_L) * channels + c] -
 79 |                image[((n * height + iy2_T) * width + ix2_L) * channels + c]);
 80 |       temp += (1 - gamma) *
 81 |               (image[((n * height + iy2_B) * width + ix2_R) * channels + c] -
 82 |                image[((n * height + iy2_T) * width + ix2_R) * channels + c]);
 83 | 
 84 |       bot_diff += gradient[((n * height + y) * width + x) * channels + c] * temp;
 85 |     }
 86 |     flow_grad[((n * height + y) * width + x) * 2 + 1] = bot_diff;
 87 |   }
 88 | }
 89 | 
 90 | void FlowWarpGrad(const GPUDevice& device,
 91 |                   typename TTypes<float, 4>::ConstTensor image,
 92 |                   typename TTypes<float, 4>::ConstTensor flow,
 93 |                   typename TTypes<float, 4>::ConstTensor gradient,
 94 |                   typename TTypes<float, 4>::Tensor image_grad,
 95 |                   typename TTypes<float, 4>::Tensor flow_grad) {
 96 |   const int batch_size   = image.dimension(0);
 97 |   const int height       = image.dimension(1);
 98 |   const int width        = image.dimension(2);
 99 |   const int channels     = image.dimension(3);
100 |   const int width_height = width * height;
101 | 
102 |   int  wblocks = ((width - 1) / FW_TILE_X + 1);
103 |   int  cblocks = ((channels - 1) / FW_TILE_C + 1);
104 |   dim3 warpThreads(FW_TILE_X, 1);
105 |   dim3 warpBlocks(wblocks, height, batch_size);
106 | 
107 |   cudaMemset(image_grad.data(), 0, batch_size * height * width * channels * sizeof(float));
108 |   cudaMemset(flow_grad.data(),  0, batch_size * height * width * 2 * sizeof(float));
109 | 
110 |   FlowWarpGradKernel << < warpBlocks, warpThreads, 0, device.stream() >> > (
111 |     image.data(),
112 |     image_grad.data(),
113 |     flow.data(),
114 |     flow_grad.data(),
115 |     gradient.data(),
116 |     batch_size,
117 |     channels,
118 |     cblocks,
119 |     width,
120 |     wblocks,
121 |     height,
122 |     width_height);
123 | }
124 | } // end namespace tensorflow
125 | 
126 | #endif  // GOOGLE_CUDA
127 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/dataset_configs.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Add dataset configurations here. Each dataset must have the following structure:
  3 | 
  4 | NAME = {
  5 |     IMAGE_HEIGHT: int,
  6 |     IMAGE_WIDTH: int,
  7 |     ITEMS_TO_DESCRIPTIONS: {
  8 |         'image_a': 'A 3-channel image.',
  9 |         'image_b': 'A 3-channel image.',
 10 |         'flow': 'A 2-channel optical flow field',
 11 |     },
 12 |     SIZES: {
 13 |         'train': int,
 14 |         'validate': int,    (optional)
 15 |         ...
 16 |     },
 17 |     BATCH_SIZE: int,
 18 |     PATHS: {
 19 |         'train': '',
 20 |         'validate': '', (optional)
 21 |         ...
 22 |     }
 23 | }
 24 | """
 25 | 
 26 | """
 27 | note that one step = one batch of data processed, ~not~ an entire epoch
 28 | 'coeff_schedule_param': {
 29 |     'half_life': 50000,         after this many steps, the value will be i + (f - i)/2
 30 |     'initial_coeff': 0.5,       initial value
 31 |     'final_coeff': 1,           final value
 32 | },
 33 | """
 34 | 
 35 | FLYING_CHAIRS_DATASET_CONFIG = {
 36 |     'IMAGE_HEIGHT': 384,
 37 |     'IMAGE_WIDTH': 512,
 38 |     'ITEMS_TO_DESCRIPTIONS': {
 39 |         'image_a': 'A 3-channel image.',
 40 |         'image_b': 'A 3-channel image.',
 41 |         'flow': 'A 2-channel optical flow field',
 42 |     },
 43 |     'SIZES': {
 44 |         'train': 22232,
 45 |         'validate': 640,
 46 |         'sample': 8,
 47 |     },
 48 |     'BATCH_SIZE': 8,
 49 |     'PATHS': {
 50 |         'train': './data/tfrecords/fc_train.tfrecords',
 51 |         'validate': './data/tfrecords/fc_val.tfrecords',
 52 |         'sample': './data/tfrecords/fc_sample.tfrecords',
 53 |     },
 54 |     'PREPROCESS': {
 55 |         'scale': False,
 56 |         'crop_height': 320,
 57 |         'crop_width': 448,
 58 |         'image_a': {
 59 |             'translate': {
 60 |                 'rand_type': "uniform_bernoulli",
 61 |                 'exp': False,
 62 |                 'mean': 0,
 63 |                 'spread': 0.4,
 64 |                 'prob': 1.0,
 65 |             },
 66 |             'rotate': {
 67 |                 'rand_type': "uniform_bernoulli",
 68 |                 'exp': False,
 69 |                 'mean': 0,
 70 |                 'spread': 0.4,
 71 |                 'prob': 1.0,
 72 |             },
 73 |             'zoom': {
 74 |                 'rand_type': "uniform_bernoulli",
 75 |                 'exp': True,
 76 |                 'mean': 0.2,
 77 |                 'spread': 0.4,
 78 |                 'prob': 1.0,
 79 |             },
 80 |             'squeeze': {
 81 |                 'rand_type': "uniform_bernoulli",
 82 |                 'exp': True,
 83 |                 'mean': 0,
 84 |                 'spread': 0.3,
 85 |                 'prob': 1.0,
 86 |             },
 87 |             'noise': {
 88 |                 'rand_type': "uniform_bernoulli",
 89 |                 'exp': False,
 90 |                 'mean': 0.03,
 91 |                 'spread': 0.03,
 92 |                 'prob': 1.0,
 93 |             },
 94 |         },
 95 |         # All preprocessing to image A will be applied to image B in addition to the following.
 96 |         'image_b': {
 97 |             'translate': {
 98 |                 'rand_type': "gaussian_bernoulli",
 99 |                 'exp': False,
100 |                 'mean': 0,
101 |                 'spread': 0.03,
102 |                 'prob': 1.0,
103 |             },
104 |             'rotate': {
105 |                 'rand_type': "gaussian_bernoulli",
106 |                 'exp': False,
107 |                 'mean': 0,
108 |                 'spread': 0.03,
109 |                 'prob': 1.0,
110 |             },
111 |             'zoom': {
112 |                 'rand_type': "gaussian_bernoulli",
113 |                 'exp': True,
114 |                 'mean': 0,
115 |                 'spread': 0.03,
116 |                 'prob': 1.0,
117 |             },
118 |             'gamma': {
119 |                 'rand_type': "gaussian_bernoulli",
120 |                 'exp': True,
121 |                 'mean': 0,
122 |                 'spread': 0.02,
123 |                 'prob': 1.0,
124 |             },
125 |             'brightness': {
126 |                 'rand_type': "gaussian_bernoulli",
127 |                 'exp': False,
128 |                 'mean': 0,
129 |                 'spread': 0.02,
130 |                 'prob': 1.0,
131 |             },
132 |             'contrast': {
133 |                 'rand_type': "gaussian_bernoulli",
134 |                 'exp': True,
135 |                 'mean': 0,
136 |                 'spread': 0.02,
137 |                 'prob': 1.0,
138 |             },
139 |             'color': {
140 |                 'rand_type': "gaussian_bernoulli",
141 |                 'exp': True,
142 |                 'mean': 0,
143 |                 'spread': 0.02,
144 |                 'prob': 1.0,
145 |             },
146 |             'coeff_schedule_param': {
147 |                 'half_life': 50000,
148 |                 'initial_coeff': 0.5,
149 |                 'final_coeff': 1,
150 |             },
151 |         }
152 |     },
153 | }
154 | 


--------------------------------------------------------------------------------
/Codes/inference.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import os
  3 | import time
  4 | import numpy as np
  5 | import pickle
  6 | 
  7 | 
  8 | from models import generator
  9 | from utils import DataLoader, load, save, psnr_error
 10 | from constant import const
 11 | import evaluate
 12 | 
 13 | 
 14 | slim = tf.contrib.slim
 15 | 
 16 | os.environ['CUDA_DEVICES_ORDER'] = "PCI_BUS_ID"
 17 | os.environ['CUDA_VISIBLE_DEVICES'] = const.GPU
 18 | 
 19 | dataset_name = const.DATASET
 20 | test_folder = const.TEST_FOLDER
 21 | 
 22 | num_his = const.NUM_HIS
 23 | height, width = 256, 256
 24 | 
 25 | snapshot_dir = const.SNAPSHOT_DIR
 26 | psnr_dir = const.PSNR_DIR
 27 | evaluate_name = const.EVALUATE
 28 | evaluate.set_data_dir("/p300/datasets")
 29 | print(evaluate.DATA_DIR)
 30 | print(const)
 31 | 
 32 | 
 33 | # define dataset
 34 | with tf.name_scope('dataset'):
 35 |     test_video_clips_tensor = tf.placeholder(shape=[1, height, width, 3 * (num_his + 1)],
 36 |                                              dtype=tf.float32)
 37 |     test_inputs = test_video_clips_tensor[..., 0:num_his*3]
 38 |     test_gt = test_video_clips_tensor[..., -3:]
 39 |     print('test inputs = {}'.format(test_inputs))
 40 |     print('test prediction gt = {}'.format(test_gt))
 41 | 
 42 | # define testing generator function and
 43 | # in testing, only generator networks, there is no discriminator networks and flownet.
 44 | with tf.variable_scope('generator', reuse=None):
 45 |     print('testing = {}'.format(tf.get_variable_scope().name))
 46 |     test_outputs = generator(test_inputs, layers=4, output_channel=3)
 47 |     test_psnr_error = psnr_error(gen_frames=test_outputs, gt_frames=test_gt)
 48 | 
 49 | 
 50 | config = tf.ConfigProto()
 51 | config.gpu_options.allow_growth = True
 52 | with tf.Session(config=config) as sess:
 53 |     # dataset
 54 |     data_loader = DataLoader(test_folder, height, width)
 55 | 
 56 |     # initialize weights
 57 |     sess.run(tf.global_variables_initializer())
 58 |     print('Init global successfully!')
 59 | 
 60 |     # tf saver
 61 |     saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=None)
 62 | 
 63 |     restore_var = [v for v in tf.global_variables()]
 64 |     loader = tf.train.Saver(var_list=restore_var)
 65 | 
 66 |     def inference_func(ckpt, dataset_name, evaluate_name):
 67 |         load(loader, sess, ckpt)
 68 | 
 69 |         psnr_records = []
 70 |         videos_info = data_loader.videos
 71 |         num_videos = len(videos_info.keys())
 72 |         total = 0
 73 |         timestamp = time.time()
 74 | 
 75 |         for video_name, video in videos_info.items():
 76 |             length = video['length']
 77 |             total += length
 78 |             psnrs = np.empty(shape=(length,), dtype=np.float32)
 79 | 
 80 |             for i in range(num_his, length):
 81 |                 video_clip = data_loader.get_video_clips(video_name, i - num_his, i + 1)
 82 |                 psnr = sess.run(test_psnr_error,
 83 |                                 feed_dict={test_video_clips_tensor: video_clip[np.newaxis, ...]})
 84 |                 psnrs[i] = psnr
 85 | 
 86 |                 print('video = {} / {}, i = {} / {}, psnr = {:.6f}'.format(
 87 |                     video_name, num_videos, i, length, psnr))
 88 | 
 89 |             psnrs[0:num_his] = psnrs[num_his]
 90 |             psnr_records.append(psnrs)
 91 | 
 92 |         result_dict = {'dataset': dataset_name, 'psnr': psnr_records, 'flow': [], 'names': [], 'diff_mask': []}
 93 | 
 94 |         used_time = time.time() - timestamp
 95 |         print('total time = {}, fps = {}'.format(used_time, total / used_time))
 96 | 
 97 |         # TODO specify what's the actual name of ckpt.
 98 |         pickle_path = os.path.join(psnr_dir, os.path.split(ckpt)[-1])
 99 |         with open(pickle_path, 'wb') as writer:
100 |             pickle.dump(result_dict, writer, pickle.HIGHEST_PROTOCOL)
101 | 
102 |         results = evaluate.evaluate(evaluate_name, pickle_path)
103 |         print(results)
104 | 
105 | 
106 |     if os.path.isdir(snapshot_dir):
107 |         def check_ckpt_valid(ckpt_name):
108 |             is_valid = False
109 |             ckpt = ''
110 |             if ckpt_name.startswith('model.ckpt-'):
111 |                 ckpt_name_splits = ckpt_name.split('.')
112 |                 ckpt = str(ckpt_name_splits[0]) + '.' + str(ckpt_name_splits[1])
113 |                 ckpt_path = os.path.join(snapshot_dir, ckpt)
114 |                 if os.path.exists(ckpt_path + '.index') and os.path.exists(ckpt_path + '.meta') and \
115 |                         os.path.exists(ckpt_path + '.data-00000-of-00001'):
116 |                     is_valid = True
117 | 
118 |             return is_valid, ckpt
119 | 
120 |         def scan_psnr_folder():
121 |             tested_ckpt_in_psnr_sets = set()
122 |             for test_psnr in os.listdir(psnr_dir):
123 |                 tested_ckpt_in_psnr_sets.add(test_psnr)
124 |             return tested_ckpt_in_psnr_sets
125 | 
126 |         def scan_model_folder():
127 |             saved_models = set()
128 |             for ckpt_name in os.listdir(snapshot_dir):
129 |                 is_valid, ckpt = check_ckpt_valid(ckpt_name)
130 |                 if is_valid:
131 |                     saved_models.add(ckpt)
132 |             return saved_models
133 | 
134 |         tested_ckpt_sets = scan_psnr_folder()
135 |         while True:
136 |             all_model_ckpts = scan_model_folder()
137 |             new_model_ckpts = all_model_ckpts - tested_ckpt_sets
138 | 
139 |             for ckpt_name in new_model_ckpts:
140 |                 # inference
141 |                 ckpt = os.path.join(snapshot_dir, ckpt_name)
142 |                 inference_func(ckpt, dataset_name, evaluate_name)
143 | 
144 |                 tested_ckpt_sets.add(ckpt_name)
145 | 
146 |             print('waiting for models...')
147 |             evaluate.evaluate('compute_auc', psnr_dir)
148 |             time.sleep(60)
149 |     else:
150 |         inference_func(snapshot_dir, dataset_name, evaluate_name)
151 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/correlation/correlation_kernel.cc:
--------------------------------------------------------------------------------
  1 | #define EIGEN_USE_THREADS
  2 | 
  3 | #include <utility>
  4 | 
  5 | #include "correlation_kernel.h"
  6 | #include "pad.h"
  7 | 
  8 | #include "tensorflow/core/framework/op.h"
  9 | #include "tensorflow/core/framework/op_kernel.h"
 10 | 
 11 | namespace tensorflow {
 12 | template<typename Device>
 13 | class CorrelationKernel : public OpKernel {
 14 |   public:
 15 |     explicit CorrelationKernel(OpKernelConstruction *ctx) : OpKernel(ctx) {
 16 |       // Get the attributes
 17 |       OP_REQUIRES_OK(ctx, ctx->GetAttr("kernel_size", &kernel_size));
 18 |       OP_REQUIRES_OK(ctx, ctx->GetAttr("max_displacement", &max_displacement));
 19 |       OP_REQUIRES_OK(ctx, ctx->GetAttr("stride_1", &stride_1));
 20 |       OP_REQUIRES_OK(ctx, ctx->GetAttr("stride_2", &stride_2));
 21 |       OP_REQUIRES_OK(ctx, ctx->GetAttr("pad", &pad));
 22 | 
 23 |       OP_REQUIRES(ctx, kernel_size % 2 != 0, errors::InvalidArgument("kernel_size must be odd"));
 24 |     }
 25 | 
 26 |     void Compute(OpKernelContext *ctx) override {
 27 |       // Get the input images and transforms and verify their dimensions
 28 |       const Tensor& input_a_t = ctx->input(0);
 29 |       const Tensor& input_b_t = ctx->input(1);
 30 | 
 31 |       OP_REQUIRES(ctx, input_a_t.dims() == 4, errors::InvalidArgument("input_a must have rank 4"));
 32 |       OP_REQUIRES(ctx, input_b_t.dims() == 4, errors::InvalidArgument("input_b must have rank 4"));
 33 | 
 34 |       // Get dimensions of input (already padded)
 35 |       int batch_size     = input_a_t.dim_size(0);
 36 |       int input_height   = input_a_t.dim_size(1);
 37 |       int input_width    = input_a_t.dim_size(2);
 38 |       int input_channels = input_a_t.dim_size(3);
 39 |       int padded_height  = input_height + 2 * pad;
 40 |       int padded_width   = input_width + 2 * pad;
 41 | 
 42 |       // The size of unreachable border region on each side
 43 |       int kernel_radius = (kernel_size - 1) / 2;
 44 |       int border_size   = max_displacement + kernel_radius;
 45 | 
 46 |       // Calculate the output dimensions
 47 |       int output_height = ceil((float)(padded_height - border_size * 2) / (float)stride_1);
 48 |       int output_width  = ceil((float)(padded_width - border_size * 2) / (float)stride_1);
 49 | 
 50 |       OP_REQUIRES(ctx, output_height >= 1,
 51 |                   errors::InvalidArgument("Neighborhood and kernel don't fit in input height."));
 52 |       OP_REQUIRES(ctx, output_width >= 1,
 53 |                   errors::InvalidArgument("Neighborhood and kernel don't fit in input width."));
 54 | 
 55 |       int neighborhood_grid_radius = max_displacement / stride_2;
 56 |       int neighborhood_grid_width  = neighborhood_grid_radius * 2 + 1;
 57 |       int output_channels          = neighborhood_grid_width * neighborhood_grid_width;
 58 | 
 59 |       // Allocate the memory for the output
 60 |       Tensor *output_t;
 61 |       OP_REQUIRES_OK(ctx, ctx->allocate_output(
 62 |                        0,
 63 |                        TensorShape({ batch_size, output_height, output_width, output_channels }),
 64 |                        &output_t));
 65 | 
 66 |       // Get the tensors
 67 |       auto input_a = input_a_t.tensor<float, 4>();
 68 |       auto input_b = input_b_t.tensor<float, 4>();
 69 |       auto output  = output_t->tensor<float, 4>();
 70 | 
 71 |       // Create temporary tensors for padded inputs
 72 |       Tensor padded_input_a_t, padded_input_b_t;
 73 |       OP_REQUIRES_OK(ctx,
 74 |                      ctx->allocate_temp(DataTypeToEnum<float>::value,
 75 |                                         TensorShape({ batch_size, padded_height, padded_width, input_channels }),
 76 |                                         &padded_input_a_t));
 77 |       OP_REQUIRES_OK(ctx,
 78 |                      ctx->allocate_temp(DataTypeToEnum<float>::value,
 79 |                                         TensorShape({ batch_size, padded_height, padded_width, input_channels }),
 80 |                                         &padded_input_b_t));
 81 |       auto padded_input_a = padded_input_a_t.tensor<float, 4>();
 82 |       auto padded_input_b = padded_input_b_t.tensor<float, 4>();
 83 | 
 84 |       // Pad the inputs
 85 |       Pad(ctx->eigen_device<Device>(),
 86 |           input_a.data(),
 87 |           batch_size,
 88 |           input_height,
 89 |           input_width,
 90 |           input_channels,
 91 |           padded_height,
 92 |           padded_width,
 93 |           padded_input_a.data());
 94 |       Pad(ctx->eigen_device<Device>(),
 95 |           input_b.data(),
 96 |           batch_size,
 97 |           input_height,
 98 |           input_width,
 99 |           input_channels,
100 |           padded_height,
101 |           padded_width,
102 |           padded_input_b.data());
103 | 
104 |       // Perform cross correlation
105 |       Correlation(ctx->eigen_device<Device>(),
106 |                   padded_input_a.data(),
107 |                   padded_input_b.data(),
108 |                   batch_size,
109 |                   output_height,
110 |                   output_width,
111 |                   output_channels,
112 |                   output_height * output_width * output_channels,
113 |                   padded_height,
114 |                   padded_width,
115 |                   input_channels,
116 |                   max_displacement,
117 |                   neighborhood_grid_radius,
118 |                   neighborhood_grid_width,
119 |                   kernel_radius,
120 |                   kernel_size,
121 |                   stride_1,
122 |                   stride_2,
123 |                   output.data());
124 |     }
125 | 
126 |   private:
127 |     int kernel_size;
128 |     int max_displacement;
129 |     int stride_1;
130 |     int stride_2;
131 |     int pad;
132 | };
133 | 
134 | REGISTER_KERNEL_BUILDER(Name("Correlation")
135 |                         .Device(DEVICE_GPU),
136 |                         CorrelationKernel<GPUDevice>)
137 | } // end namespace tensorflow
138 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.cc:
--------------------------------------------------------------------------------
  1 | #define EIGEN_USE_THREADS
  2 | 
  3 | #include "flow_augmentation.h"
  4 | #include "tensorflow/core/framework/op_kernel.h"
  5 | #include "tensorflow/core/framework/register_types.h"
  6 | #include "tensorflow/core/framework/types.h"
  7 | #include "tensorflow/core/platform/types.h"
  8 | 
  9 | namespace tensorflow {
 10 | typedef Eigen::ThreadPoolDevice CPUDevice;
 11 | typedef Eigen::GpuDevice        GPUDevice;
 12 | 
 13 | inline int clamp(int f, int a, int b) {
 14 |   return std::max(a, std::min(f, b));
 15 | }
 16 | 
 17 | template<>
 18 | void FillFlowAugmentation(const CPUDevice& device,
 19 |                           typename TTypes<float, 4>::Tensor output,
 20 |                           typename TTypes<float, 4>::ConstTensor flows,
 21 |                           typename TTypes<float, 2>::ConstTensor transforms_from_a,
 22 |                           typename TTypes<float, 2>::ConstTensor transforms_from_b) {
 23 |   const int batch_size      = output.dimension(0);
 24 |   const int out_height      = output.dimension(1);
 25 |   const int out_width       = output.dimension(2);
 26 |   const int src_height      = flows.dimension(1);
 27 |   const int src_width       = flows.dimension(2);
 28 |   const int src_total_count = flows.dimension(0) * flows.dimension(1) *
 29 |                               flows.dimension(2) * flows.dimension(3);
 30 |   float *output_ptr     = output.data();
 31 |   const float *flow_ptr = flows.data();
 32 | 
 33 |   for (int n = 0; n < batch_size; n++) {
 34 |     const float *transMatA = transforms_from_a.data() + n * 6;
 35 |     const float *transMatB = transforms_from_b.data() + n * 6;
 36 | 
 37 |     for (int y = 0; y < out_height; y++) {
 38 |       int outputIdxOffset = (n * out_height + y) * out_width;
 39 | 
 40 |       for (int x = 0; x < out_width; x++) {
 41 |         // Apply transformation matrix applied to first image
 42 |         const float xpos1 = x * transMatA[0] + y * transMatA[1] + transMatA[2];
 43 |         const float ypos1 = x * transMatA[3] + y * transMatA[4] + transMatA[5];
 44 | 
 45 |         const int srcXIdx =
 46 |           ((n * src_height + (int)(ypos1 + 0.5)) * src_width + (int)(xpos1 + 0.5)) * 2 + 0;
 47 |         const int srcYIdx = srcXIdx + 1;
 48 | 
 49 |         const float xpos2 = xpos1 + flow_ptr[clamp(srcXIdx, 0, src_total_count - 1)];
 50 |         const float ypos2 = ypos1 + flow_ptr[clamp(srcYIdx, 0, src_total_count - 1)];
 51 | 
 52 |         // Apply inverse of the transformation matrix applied to second image
 53 |         const float xpos3 = xpos2 * transMatB[0] + ypos2 * transMatB[1] + transMatB[2];
 54 |         const float ypos3 = xpos2 * transMatB[3] + ypos2 * transMatB[4] + transMatB[5];
 55 | 
 56 |         output_ptr[(outputIdxOffset + x) * 2 + 0] = xpos3 - (float)x;
 57 |         output_ptr[(outputIdxOffset + x) * 2 + 1] = ypos3 - (float)y;
 58 |       }
 59 |     }
 60 |   }
 61 | }
 62 | 
 63 | template<typename Device>
 64 | class FlowAugmentation : public OpKernel {
 65 |   public:
 66 |     explicit FlowAugmentation(OpKernelConstruction *ctx) : OpKernel(ctx) {
 67 |       // Get the crop [height, width] tensor and verify its dimensions
 68 |       OP_REQUIRES_OK(ctx, ctx->GetAttr("crop", &crop_));
 69 |       OP_REQUIRES(ctx, crop_.size() == 2,
 70 |                   errors::InvalidArgument("crop must be 2 dimensions"));
 71 |     }
 72 | 
 73 |     void Compute(OpKernelContext *ctx) override {
 74 |       // Get the input images and transforms and verify their dimensions
 75 |       const Tensor& flows_t             = ctx->input(0);
 76 |       const Tensor& transforms_from_a_t = ctx->input(1);
 77 |       const Tensor& transforms_from_b_t = ctx->input(2);
 78 | 
 79 |       OP_REQUIRES(ctx, flows_t.dims() == 4,
 80 |                   errors::InvalidArgument("Input images must have rank 4"));
 81 |       OP_REQUIRES(ctx,
 82 |                   (TensorShapeUtils::IsMatrix(transforms_from_a_t.shape()) &&
 83 |                    transforms_from_a_t.dim_size(0) ==
 84 |                    flows_t.dim_size(0) &&
 85 |                    transforms_from_a_t.dim_size(1) == 6),
 86 |                   errors::InvalidArgument(
 87 |                     "Input transforms_from_a should be num_images x 6"));
 88 |       OP_REQUIRES(ctx,
 89 |                   (TensorShapeUtils::IsMatrix(transforms_from_b_t.shape()) &&
 90 |                    transforms_from_b_t.dim_size(0) ==
 91 |                    flows_t.dim_size(0) &&
 92 |                    transforms_from_b_t.dim_size(1) == 6),
 93 |                   errors::InvalidArgument(
 94 |                     "Input transforms_from_b should be num_images x 6"));
 95 | 
 96 |       // Allocate the memory for the output
 97 |       Tensor *output_t;
 98 |       OP_REQUIRES_OK(ctx, ctx->allocate_output(
 99 |                        0,
100 |                        TensorShape({ flows_t.dim_size(0), crop_[0], crop_[1],
101 |                                      flows_t.dim_size(3) }), &output_t));
102 | 
103 |       // Perform flow augmentation
104 |       auto flows             = flows_t.tensor<float, 4>();
105 |       auto transforms_from_a = transforms_from_a_t.tensor<float, 2>();
106 |       auto transforms_from_b = transforms_from_b_t.tensor<float, 2>();
107 |       auto output            = output_t->tensor<float, 4>();
108 | 
109 |       FillFlowAugmentation(ctx->eigen_device<Device>(),
110 |                            output,
111 |                            flows,
112 |                            transforms_from_a,
113 |                            transforms_from_b);
114 |     }
115 | 
116 |   private:
117 |     std::vector<int32>crop_;
118 | };
119 | 
120 | REGISTER_KERNEL_BUILDER(Name("FlowAugmentation")
121 |                         .Device(DEVICE_CPU),
122 |                         FlowAugmentation<CPUDevice>)
123 | 
124 | #if GOOGLE_CUDA
125 | REGISTER_KERNEL_BUILDER(Name("FlowAugmentation")
126 |                         .Device(DEVICE_GPU),
127 |                         FlowAugmentation<GPUDevice>)
128 | #endif // GOOGLE_CUDA
129 | } // end namespace tensorflow
130 | 


--------------------------------------------------------------------------------
/Codes/flownet2/test.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | from scipy.misc import imread
  5 | import matplotlib
  6 | from src.flowlib import read_flow, flow_to_image
  7 | matplotlib.use('TKAgg')
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | _preprocessing_ops = tf.load_op_library(
 11 |     tf.resource_loader.get_path_to_datafile("./src/ops/build/preprocessing.so"))
 12 | 
 13 | 
 14 | def display(img, c):
 15 |     plt.subplot(int('22' + str(c + 1)))
 16 |     plt.imshow(img[0, :, :, :])
 17 | 
 18 | 
 19 | def main():
 20 |     """
 21 | .Input("image_a: float32")
 22 | .Input("image_b: float32")
 23 | .Attr("crop: list(int) >= 2")
 24 | .Attr("params_a_name: list(string)")
 25 | .Attr("params_a_rand_type: list(string)")
 26 | .Attr("params_a_exp: list(bool)")
 27 | .Attr("params_a_mean: list(float32)")
 28 | .Attr("params_a_spread: list(float32)")
 29 | .Attr("params_a_prob: list(float32)")
 30 | .Attr("params_b_name: list(string)")
 31 | .Attr("params_b_rand_type: list(string)")
 32 | .Attr("params_b_exp: list(bool)")
 33 | .Attr("params_b_mean: list(float32)")
 34 | .Attr("params_b_spread: list(float32)")
 35 | .Attr("params_b_prob: list(float32)")
 36 | .Output("aug_image_a: float32")
 37 | .Output("aug_image_b: float32")
 38 | .Output("spatial_transform_a: float32")
 39 | .Output("inv_spatial_transform_b: float32")
 40 |     """
 41 | 
 42 |     crop = [364, 492]
 43 |     params_a_name = ['translate_x', 'translate_y']
 44 |     params_a_rand_type = ['uniform_bernoulli', 'uniform_bernoulli']
 45 |     params_a_exp = [False, False]
 46 |     params_a_mean = [0.0, 0.0]
 47 |     params_a_spread = [0.4, 0.4]
 48 |     params_a_prob = [1.0, 1.0]
 49 |     params_b_name = []
 50 |     params_b_rand_type = []
 51 |     params_b_exp = []
 52 |     params_b_mean = []
 53 |     params_b_spread = []
 54 |     params_b_prob = []
 55 | 
 56 |     with tf.Session() as sess:
 57 |         with tf.device('/gpu:0'):
 58 |             image_a = imread('./img0.ppm') / 255.0
 59 |             image_b = imread('./img1.ppm') / 255.0
 60 |             flow = read_flow('./flow.flo')
 61 | 
 62 |             image_a_tf = tf.expand_dims(tf.to_float(tf.constant(image_a, dtype=tf.float64)), 0)
 63 |             image_b_tf = tf.expand_dims(tf.to_float(tf.constant(image_b, dtype=tf.float64)), 0)
 64 | 
 65 |             preprocess = _preprocessing_ops.data_augmentation(image_a_tf,
 66 |                                                               image_b_tf,
 67 |                                                               crop,
 68 |                                                               params_a_name,
 69 |                                                               params_a_rand_type,
 70 |                                                               params_a_exp,
 71 |                                                               params_a_mean,
 72 |                                                               params_a_spread,
 73 |                                                               params_a_prob,
 74 |                                                               params_b_name,
 75 |                                                               params_b_rand_type,
 76 |                                                               params_b_exp,
 77 |                                                               params_b_mean,
 78 |                                                               params_b_spread,
 79 |                                                               params_b_prob)
 80 | 
 81 |             out = sess.run(preprocess)
 82 |             trans = out.spatial_transform_a
 83 |             inv_trans = out.inv_spatial_transform_b
 84 | 
 85 |             print(trans.shape)
 86 |             print(inv_trans.shape)
 87 | 
 88 |             flow_tf = tf.expand_dims(tf.to_float(tf.constant(flow)), 0)
 89 |             aug_flow_tf = _preprocessing_ops.flow_augmentation(flow_tf, trans, inv_trans, crop)
 90 | 
 91 |             aug_flow = sess.run(aug_flow_tf)[0, :, :, :]
 92 | 
 93 |             # Plot img0, img0aug
 94 |             plt.subplot(321)
 95 |             plt.imshow(image_a)
 96 |             plt.subplot(322)
 97 |             plt.imshow(out.aug_image_a[0, :, :, :])
 98 | 
 99 |             # Plot img1, img1aug
100 |             plt.subplot(323)
101 |             plt.imshow(image_b)
102 |             plt.subplot(324)
103 |             plt.imshow(out.aug_image_b[0, :, :, :])
104 | 
105 |             # Plot flow, flowaug
106 |             plt.subplot(325)
107 |             plt.imshow(flow_to_image(flow))
108 |             plt.subplot(326)
109 |             plt.imshow(flow_to_image(aug_flow))
110 | 
111 |             plt.show()
112 | 
113 |             # image_b_aug = sess.run(image_b_tf)
114 |             #
115 |             # display(np.expand_dims(image_a, 0), 0)
116 |             # display(np.expand_dims(image_b, 0), 1)
117 |             # display(image_a_aug, 2)
118 |             # display(image_b_aug, 3)
119 |             # plt.show()
120 | 
121 |             # o = _preprocessing_ops.flow_augmentation(flow, trans, inv_t, [4, 8])
122 |             # print n[:, :, :]
123 |             # print n[0, 0, 1], n[0, 0, 0]
124 |             # print n[1, 0, 1], n[1, 0, 0]
125 |             # print n[2, 0, 1], n[2, 0, 0]
126 |             # print '---'
127 |             # print sess.run(o)
128 | 
129 |             """# Goes along width first!!
130 |             // Caffe, NKHW: ((n * K + k) * H + h) * W + w at point (n, k, h, w)
131 |             // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k)
132 | 
133 |             H=5, W=10, K=2
134 |             n=0, h=1, w=5, k=0
135 | 
136 |             (2 * 10)                + c
137 | 
138 |             30      49                  n[0, 1, 5, 0]"""
139 | 
140 | 
141 | print(os.getpid())
142 | input("Press Enter to continue...")
143 | main()
144 | 
145 | # Last index is channel!!
146 | 
147 | #   K
148 | 
149 | # value 13 should be at [0, 2, 7, 1] aka batch=0, height=1, width=0, channel=0. it is at index=20.
150 | #
151 | # items = {
152 | #     'N': [0, 0],
153 | #     'H': [5, 2],
154 | #     'W': [10, 7],
155 | #     'K': [2, 1],
156 | # }
157 | #
158 | # for (i1, v1) in items.iteritems():
159 | #     for (i2, v2) in items.iteritems():
160 | #         for (i3, v3) in items.iteritems():
161 | #             for (i4, v4) in items.iteritems():
162 | #                 if ((v1[1] * v2[0] + v2[1]) * v3[0] + v3[1]) * v4[0] + v4[1] == 55:
163 | #                     print 'found it: ', i1, i2, i3, i4
164 | 


--------------------------------------------------------------------------------
/Codes/utils.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | from collections import OrderedDict
  4 | import os
  5 | import glob
  6 | import cv2
  7 | 
  8 | 
  9 | rng = np.random.RandomState(2017)
 10 | 
 11 | 
 12 | def np_load_frame(filename, resize_height, resize_width):
 13 |     """
 14 |     Load image path and convert it to numpy.ndarray. Notes that the color channels are BGR and the color space
 15 |     is normalized from [0, 255] to [-1, 1].
 16 | 
 17 |     :param filename: the full path of image
 18 |     :param resize_height: resized height
 19 |     :param resize_width: resized width
 20 |     :return: numpy.ndarray
 21 |     """
 22 |     image_decoded = cv2.imread(filename)
 23 |     image_resized = cv2.resize(image_decoded, (resize_width, resize_height))
 24 |     image_resized = image_resized.astype(dtype=np.float32)
 25 |     image_resized = (image_resized / 127.5) - 1.0
 26 |     return image_resized
 27 | 
 28 | 
 29 | class DataLoader(object):
 30 |     def __init__(self, video_folder, resize_height=256, resize_width=256):
 31 |         self.dir = video_folder
 32 |         self.videos = OrderedDict()
 33 |         self._resize_height = resize_height
 34 |         self._resize_width = resize_width
 35 |         self.setup()
 36 | 
 37 |     def __call__(self, batch_size, time_steps, num_pred=1):
 38 |         video_info_list = list(self.videos.values())
 39 |         num_videos = len(video_info_list)
 40 | 
 41 |         clip_length = time_steps + num_pred
 42 |         resize_height, resize_width = self._resize_height, self._resize_width
 43 | 
 44 |         def video_clip_generator():
 45 |             v_id = -1
 46 |             while True:
 47 |                 v_id = (v_id + 1) % num_videos
 48 | 
 49 |                 video_info = video_info_list[v_id]
 50 |                 start = rng.randint(0, video_info['length'] - clip_length)
 51 |                 video_clip = []
 52 |                 for frame_id in range(start, start + clip_length):
 53 |                     video_clip.append(np_load_frame(video_info['frame'][frame_id], resize_height, resize_width))
 54 |                 video_clip = np.concatenate(video_clip, axis=2)
 55 | 
 56 |                 yield video_clip
 57 | 
 58 |         # video clip paths
 59 |         dataset = tf.data.Dataset.from_generator(generator=video_clip_generator,
 60 |                                                  output_types=tf.float32,
 61 |                                                  output_shapes=[resize_height, resize_width, clip_length * 3])
 62 |         print('generator dataset, {}'.format(dataset))
 63 |         dataset = dataset.prefetch(buffer_size=1000)
 64 |         dataset = dataset.shuffle(buffer_size=1000).batch(batch_size)
 65 |         print('epoch dataset, {}'.format(dataset))
 66 | 
 67 |         return dataset
 68 | 
 69 |     def __getitem__(self, video_name):
 70 |         assert video_name in self.videos.keys(), 'video = {} is not in {}!'.format(video_name, self.videos.keys())
 71 |         return self.videos[video_name]
 72 | 
 73 |     def setup(self):
 74 |         videos = glob.glob(os.path.join(self.dir, '*'))
 75 |         for video in sorted(videos):
 76 |             video_name = video.split('/')[-1]
 77 |             self.videos[video_name] = {}
 78 |             self.videos[video_name]['path'] = video
 79 |             self.videos[video_name]['frame'] = glob.glob(os.path.join(video, '*.jpg'))
 80 |             self.videos[video_name]['frame'].sort()
 81 |             self.videos[video_name]['length'] = len(self.videos[video_name]['frame'])
 82 | 
 83 |     def get_video_clips(self, video, start, end):
 84 |         # assert video in self.videos, 'video = {} must in {}!'.format(video, self.videos.keys())
 85 |         # assert start >= 0, 'start = {} must >=0!'.format(start)
 86 |         # assert end <= self.videos[video]['length'], 'end = {} must <= {}'.format(video, self.videos[video]['length'])
 87 | 
 88 |         batch = []
 89 |         for i in range(start, end):
 90 |             image = np_load_frame(self.videos[video]['frame'][i], self._resize_height, self._resize_width)
 91 |             batch.append(image)
 92 | 
 93 |         return np.concatenate(batch, axis=2)
 94 | 
 95 | 
 96 | def log10(t):
 97 |     """
 98 |     Calculates the base-10 log of each element in t.
 99 | 
100 |     @param t: The tensor from which to calculate the base-10 log.
101 | 
102 |     @return: A tensor with the base-10 log of each element in t.
103 |     """
104 | 
105 |     numerator = tf.log(t)
106 |     denominator = tf.log(tf.constant(10, dtype=numerator.dtype))
107 |     return numerator / denominator
108 | 
109 | 
110 | def psnr_error(gen_frames, gt_frames):
111 |     """
112 |     Computes the Peak Signal to Noise Ratio error between the generated images and the ground
113 |     truth images.
114 | 
115 |     @param gen_frames: A tensor of shape [batch_size, height, width, 3]. The frames generated by the
116 |                        generator model.
117 |     @param gt_frames: A tensor of shape [batch_size, height, width, 3]. The ground-truth frames for
118 |                       each frame in gen_frames.
119 | 
120 |     @return: A scalar tensor. The mean Peak Signal to Noise Ratio error over each frame in the
121 |              batch.
122 |     """
123 |     shape = tf.shape(gen_frames)
124 |     num_pixels = tf.to_float(shape[1] * shape[2] * shape[3])
125 |     gt_frames = (gt_frames + 1.0) / 2.0
126 |     gen_frames = (gen_frames + 1.0) / 2.0
127 |     square_diff = tf.square(gt_frames - gen_frames)
128 | 
129 |     batch_errors = 10 * log10(1 / ((1 / num_pixels) * tf.reduce_sum(square_diff, [1, 2, 3])))
130 |     return tf.reduce_mean(batch_errors)
131 | 
132 | 
133 | def diff_mask(gen_frames, gt_frames, min_value=-1, max_value=1):
134 |     # normalize to [0, 1]
135 |     delta = max_value - min_value
136 |     gen_frames = (gen_frames - min_value) / delta
137 |     gt_frames = (gt_frames - min_value) / delta
138 | 
139 |     gen_gray_frames = tf.image.rgb_to_grayscale(gen_frames)
140 |     gt_gray_frames = tf.image.rgb_to_grayscale(gt_frames)
141 | 
142 |     diff = tf.abs(gen_gray_frames - gt_gray_frames)
143 |     return diff
144 | 
145 | 
146 | def load(saver, sess, ckpt_path):
147 |     saver.restore(sess, ckpt_path)
148 |     print("Restored model parameters from {}".format(ckpt_path))
149 | 
150 | 
151 | def save(saver, sess, logdir, step):
152 |     model_name = 'model.ckpt'
153 |     checkpoint_path = os.path.join(logdir, model_name)
154 |     if not os.path.exists(logdir):
155 |         os.makedirs(logdir)
156 |     saver.save(sess, checkpoint_path, global_step=step)
157 |     print('The checkpoint has been created.')
158 | 
159 | 
160 | 
161 | 
162 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/correlation/correlation_kernel.cu.cc:
--------------------------------------------------------------------------------
  1 | #if GOOGLE_CUDA
  2 | 
  3 | #define EIGEN_USE_GPU
  4 | 
  5 | #define WARPS_PER_BLOCK 1
  6 | #define THREADS_PER_WARP 32
  7 | 
  8 | #include <stdio.h>
  9 | #include <iostream>
 10 | 
 11 | #include "correlation_kernel.h"
 12 | #include "tensorflow/core/framework/register_types.h"
 13 | #include "tensorflow/core/framework/types.h"
 14 | #include "tensorflow/core/framework/tensor_types.h"
 15 | #include "tensorflow/core/platform/types.h"
 16 | #include "tensorflow/core/util/cuda_kernel_helper.h"
 17 | 
 18 | namespace tensorflow {
 19 | typedef Eigen::GpuDevice GPUDevice;
 20 | 
 21 | __global__ void CorrelateData(int          batch_size,
 22 |                               int          out_width,
 23 |                               int          out_height,
 24 |                               int          out_channels,
 25 |                               int          out_count,
 26 |                               int          max_displacement,
 27 |                               int          neighborhood_grid_radius,
 28 |                               int          neighborhood_grid_width,
 29 |                               int          kernel_radius,
 30 |                               int          kernel_size,
 31 |                               int          stride_1,
 32 |                               int          stride_2,
 33 |                               int          in_width_padded,
 34 |                               int          in_height_padded,
 35 |                               int          in_channels,
 36 |                               const float *input_a,
 37 |                               const float *input_b,
 38 |                               float       *output) {
 39 |   extern __shared__ char patch_data_char[];
 40 | 
 41 |   float *patch_data = (float *)patch_data_char;
 42 | 
 43 |   // First (upper left) position of kernel upper-left corner in current center
 44 |   // position of neighborhood in image 1
 45 |   int x1     = blockIdx.x * stride_1 + max_displacement;
 46 |   int y1     = blockIdx.y * stride_1 + max_displacement;
 47 |   int item   = blockIdx.z;
 48 |   int ch_off = threadIdx.x;
 49 | 
 50 |   // Load 3D patch into shared shared memory
 51 |   // HEIGHT
 52 |   for (int j = 0; j < kernel_size; j++) {
 53 |     // WIDTH
 54 |     for (int i = 0; i < kernel_size; i++) {
 55 |       int ji_off = ((j * kernel_size) + i) * in_channels;
 56 | 
 57 |       // CHANNELS
 58 |       for (int ch = ch_off; ch < in_channels; ch += (WARPS_PER_BLOCK * THREADS_PER_WARP)) {
 59 |         int idx1 = ((item * in_height_padded + y1 + j) * in_width_padded + x1 + i) *
 60 |                    in_channels + ch;
 61 |         int idxPatchData = ji_off + ch;
 62 |         patch_data[idxPatchData] = input_a[idx1];
 63 |       }
 64 |     }
 65 |   }
 66 | 
 67 |   __syncthreads();
 68 | 
 69 |   __shared__ float sum[WARPS_PER_BLOCK * THREADS_PER_WARP];
 70 | 
 71 |   // Compute correlation
 72 |   for (int out_channel = 0; out_channel < out_channels; out_channel++) {
 73 |     sum[ch_off] = 0;
 74 | 
 75 |     int s2o = (out_channel % neighborhood_grid_width - neighborhood_grid_radius) * stride_2;
 76 |     int s2p = (out_channel / neighborhood_grid_width - neighborhood_grid_radius) * stride_2;
 77 |     int x2  = x1 + s2o;
 78 |     int y2  = y1 + s2p;
 79 | 
 80 |     // HEIGHT
 81 |     for (int j = 0; j < kernel_size; j++) {
 82 |       // WIDTH
 83 |       for (int i = 0; i < kernel_size; i++) {
 84 |         int ji_off = ((j * kernel_size) + i) * in_channels;
 85 | 
 86 |         // CHANNELS
 87 |         for (int ch = ch_off; ch < in_channels; ch += (WARPS_PER_BLOCK * THREADS_PER_WARP)) {
 88 |           int idxPatchData = ji_off + ch;
 89 |           int idx2         = ((item * in_height_padded + y2 + j) * in_width_padded + x2 + i) *
 90 |                              in_channels + ch;
 91 | 
 92 |           sum[ch_off] += patch_data[idxPatchData] * input_b[idx2];
 93 |         }
 94 |       }
 95 |     }
 96 | 
 97 |     __syncthreads();
 98 | 
 99 |     if (ch_off == 0) {
100 |       float total_sum = 0;
101 | 
102 |       for (int idx = 0; idx < WARPS_PER_BLOCK * THREADS_PER_WARP; idx++) {
103 |         total_sum += sum[idx];
104 |       }
105 |       const int sumelems = kernel_size * kernel_size * in_channels;
106 |       const int index    = (blockIdx.y * out_width + blockIdx.x) * out_channels + out_channel;
107 | 
108 |       /* from Caffe:   const int index    = ((out_channel * out_height +
109 |          blockIdx.y) * out_width) + blockIdx.x; */
110 |       output[index + item * out_count] = total_sum / (float)sumelems;
111 | 
112 |       // Caffe, NKHW: ((n * K + k) * H + h) * W + w at point (n, k, h, w)
113 |       // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k)
114 |       // n = 0
115 |       // caffe: ((k * H + h) * W + w)  +   n * K * H * W
116 |       // tf: (h * W + w) * K + k       +   n * H * W * K
117 |     }
118 |   }
119 | }
120 | 
121 | void Correlation(const GPUDevice& device,
122 |                  const float     *input_a,
123 |                  const float     *input_b,
124 |                  const int        batch_size,
125 |                  const int        out_height,
126 |                  const int        out_width,
127 |                  const int        out_channels,
128 |                  const int        out_count,
129 |                  const int        in_height_padded,
130 |                  const int        in_width_padded,
131 |                  const int        in_channels,
132 |                  int              max_displacement,
133 |                  int              neighborhood_grid_radius,
134 |                  int              neighborhood_grid_width,
135 |                  int              kernel_radius,
136 |                  int              kernel_size,
137 |                  int              stride_1,
138 |                  int              stride_2,
139 |                  float           *output) {
140 |   dim3 totalBlocksCorr(out_width, out_height, batch_size);
141 |   dim3 threadsPerBlock(THREADS_PER_WARP *WARPS_PER_BLOCK);
142 |   const int shared_memory_per_block = (kernel_size * kernel_size) * in_channels;
143 | 
144 |   CorrelateData << < totalBlocksCorr, threadsPerBlock, shared_memory_per_block * sizeof(float),
145 |     device.stream() >> > (
146 |     batch_size, out_width, out_height, out_channels, out_count,
147 |     max_displacement, neighborhood_grid_radius, neighborhood_grid_width, kernel_radius,
148 |     kernel_size, stride_1, stride_2, in_width_padded, in_height_padded, in_channels,
149 |     input_a, input_b, output);
150 | }
151 | } // end namespace tensorflow
152 | 
153 | #endif  // GOOGLE_CUDA
154 | 


--------------------------------------------------------------------------------
/Codes/constant.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import configparser
  4 | 
  5 | 
  6 | def get_dir(directory):
  7 |     """
  8 |     get the directory, if no such directory, then make it.
  9 | 
 10 |     @param directory: The new directory.
 11 |     """
 12 | 
 13 |     if not os.path.exists(directory):
 14 |         os.makedirs(directory)
 15 | 
 16 |     return directory
 17 | 
 18 | 
 19 | def parser_args():
 20 |     parser = argparse.ArgumentParser(description='Options to run the network.')
 21 |     parser.add_argument('-g', '--gpu', type=str, default='0',
 22 |                         help='the device id of gpu.')
 23 |     parser.add_argument('-i', '--iters', type=int, default=1,
 24 |                         help='set the number of iterations, default is 1')
 25 |     parser.add_argument('-b', '--batch', type=int, default=4,
 26 |                         help='set the batch size, default is 4.')
 27 |     parser.add_argument('--num_his', type=int, default=4,
 28 |                         help='set the time steps, default is 4.')
 29 | 
 30 |     parser.add_argument('-d', '--dataset', type=str,
 31 |                         help='the name of dataset.')
 32 |     parser.add_argument('--train_folder', type=str, default='',
 33 |                         help='set the training folder path.')
 34 |     parser.add_argument('--test_folder', type=str, default='',
 35 |                         help='set the testing folder path.')
 36 | 
 37 |     parser.add_argument('--config', type=str, default='training_hyper_params/hyper_params.ini',
 38 |                         help='the path of training_hyper_params, default is training_hyper_params/hyper_params.ini')
 39 | 
 40 |     parser.add_argument('--snapshot_dir', type=str, default='',
 41 |                         help='if it is folder, then it is the directory to save models, '
 42 |                              'if it is a specific model.ckpt-xxx, then the system will load it for testing.')
 43 |     parser.add_argument('--summary_dir', type=str, default='', help='the directory to save summaries.')
 44 |     parser.add_argument('--psnr_dir', type=str, default='', help='the directory to save psnrs results in testing.')
 45 | 
 46 |     parser.add_argument('--evaluate', type=str, default='compute_auc',
 47 |                         help='the evaluation metric, default is compute_auc')
 48 | 
 49 |     return parser.parse_args()
 50 | 
 51 | 
 52 | class Const(object):
 53 |     class ConstError(TypeError):
 54 |         pass
 55 | 
 56 |     class ConstCaseError(ConstError):
 57 |         pass
 58 | 
 59 |     def __setattr__(self, name, value):
 60 |         if name in self.__dict__:
 61 |             raise self.ConstError("Can't change const.{}".format(name))
 62 |         if not name.isupper():
 63 |             raise self.ConstCaseError('const name {} is not all uppercase'.format(name))
 64 | 
 65 |         self.__dict__[name] = value
 66 | 
 67 |     def __str__(self):
 68 |         _str = '<================ Constants information ================>\n'
 69 |         for name, value in self.__dict__.items():
 70 |             print(name, value)
 71 |             _str += '\t{}\t{}\n'.format(name, value)
 72 | 
 73 |         return _str
 74 | 
 75 | 
 76 | args = parser_args()
 77 | const = Const()
 78 | 
 79 | # inputs constants
 80 | const.DATASET = args.dataset
 81 | const.TRAIN_FOLDER = args.train_folder
 82 | const.TEST_FOLDER = args.test_folder
 83 | 
 84 | const.GPU = args.gpu
 85 | 
 86 | const.BATCH_SIZE = args.batch
 87 | const.NUM_HIS = args.num_his
 88 | const.ITERATIONS = args.iters
 89 | 
 90 | const.EVALUATE = args.evaluate
 91 | 
 92 | # network constants
 93 | const.HEIGHT = 256
 94 | const.WIDTH = 256
 95 | const.FLOWNET_CHECKPOINT = 'checkpoints/pretrains/flownet-SD.ckpt-0'
 96 | const.FLOW_HEIGHT = 384
 97 | const.FLOW_WIDTH = 512
 98 | 
 99 | # set training hyper-parameters of different datasets
100 | config = configparser.ConfigParser()
101 | assert config.read(args.config)
102 | 
103 | # for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively)
104 | const.L_NUM = config.getint(const.DATASET, 'L_NUM')
105 | # the power to which each gradient term is raised in GDL loss
106 | const.ALPHA_NUM = config.getint(const.DATASET, 'ALPHA_NUM')
107 | # the percentage of the adversarial loss to use in the combined loss
108 | const.LAM_ADV = config.getfloat(const.DATASET, 'LAM_ADV')
109 | # the percentage of the lp loss to use in the combined loss
110 | const.LAM_LP = config.getfloat(const.DATASET, 'LAM_LP')
111 | # the percentage of the GDL loss to use in the combined loss
112 | const.LAM_GDL = config.getfloat(const.DATASET, 'LAM_GDL')
113 | # the percentage of the different frame loss
114 | const.LAM_FLOW = config.getfloat(const.DATASET, 'LAM_FLOW')
115 | 
116 | # Learning rate of generator
117 | const.LRATE_G = eval(config.get(const.DATASET, 'LRATE_G'))
118 | const.LRATE_G_BOUNDARIES = eval(config.get(const.DATASET, 'LRATE_G_BOUNDARIES'))
119 | 
120 | # Learning rate of discriminator
121 | const.LRATE_D = eval(config.get(const.DATASET, 'LRATE_D'))
122 | const.LRATE_D_BOUNDARIES = eval(config.get(const.DATASET, 'LRATE_D_BOUNDARIES'))
123 | 
124 | 
125 | const.SAVE_DIR = '{dataset}_l_{L_NUM}_alpha_{ALPHA_NUM}_lp_{LAM_LP}_' \
126 |                  'adv_{LAM_ADV}_gdl_{LAM_GDL}_flow_{LAM_FLOW}'.format(dataset=const.DATASET,
127 |                                                                       L_NUM=const.L_NUM,
128 |                                                                       ALPHA_NUM=const.ALPHA_NUM,
129 |                                                                       LAM_LP=const.LAM_LP, LAM_ADV=const.LAM_ADV,
130 |                                                                       LAM_GDL=const.LAM_GDL, LAM_FLOW=const.LAM_FLOW)
131 | 
132 | if args.snapshot_dir:
133 |     # if the snapshot_dir is model.ckpt-xxx, which means it is the single model for testing.
134 |     if os.path.exists(args.snapshot_dir + '.meta') or os.path.exists(args.snapshot_dir + '.data-00000-of-00001') or \
135 |             os.path.exists(args.snapshot_dir + '.index'):
136 |         const.SNAPSHOT_DIR = args.snapshot_dir
137 |         print(const.SNAPSHOT_DIR)
138 |     else:
139 |         const.SNAPSHOT_DIR = get_dir(os.path.join('checkpoints', const.SAVE_DIR + '_' + args.snapshot_dir))
140 | else:
141 |     const.SNAPSHOT_DIR = get_dir(os.path.join('checkpoints', const.SAVE_DIR))
142 | 
143 | if args.summary_dir:
144 |     const.SUMMARY_DIR = get_dir(os.path.join('summary', const.SAVE_DIR + '_' + args.summary_dir))
145 | else:
146 |     const.SUMMARY_DIR = get_dir(os.path.join('summary', const.SAVE_DIR))
147 | 
148 | if args.psnr_dir:
149 |     const.PSNR_DIR = get_dir(os.path.join('psnrs', const.SAVE_DIR + '_' + args.psnr_dir))
150 | else:
151 |     const.PSNR_DIR = get_dir(os.path.join('psnrs', const.SAVE_DIR))
152 | 
153 | 
154 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Future Frame Prediction for Anomaly Detection -- A New Baseline
  2 | This repo is the official open-source of [Future Frame Prediction for Anomaly Detection -- A New Baseline, CVPR 2018](https://arxiv.org/pdf/1712.09867.pdf) by Wen Liu, Weixin Luo, Dongze Lian and Shenghua Gao. 
  3 | A **demo** is shown in *https://www.youtube.com/watch?v=M--wv-Y_h0A*. 
  4 | ![scalars_tensorboard](assets/architecture.JPG)
  5 | 
  6 | It is implemented in TensorFlow. Please follow the instructions to run the code.
  7 | 
  8 | ## 1. Installation (Anaconda with python3.6 installation is recommended)
  9 | * Install 3rd-package dependencies of Python (listed in requirements.txt)
 10 | ```
 11 | numpy==1.14.1
 12 | scipy==1.0.0
 13 | matplotlib==2.1.2
 14 | tensorflow-gpu==1.4.1
 15 | tensorflow==1.4.1
 16 | Pillow==5.0.0
 17 | pypng==0.0.18
 18 | scikit_learn==0.19.1
 19 | opencv-python==3.2.0.6
 20 | ```
 21 | 
 22 | ```shell
 23 | pip install -r requirements.txt
 24 | 
 25 | pip install tensorflow-gpu==1.4.1
 26 | ```
 27 | * Other libraries
 28 | ```code
 29 | CUDA 8.0
 30 | Cudnn 6.0
 31 | Ubuntu 14.04 or 16.04, Centos 7 and other distributions.
 32 | ```
 33 | ## 2. Download datasets
 34 | Please manually download all datasets from [ped1.tar.gz, ped2.tar.gz, avenue.tar.gz and shanghaitech.tar.gz](https://1drv.ms/f/s!AjjUqiJZsj8whLt1Y8O-sOW8gWs1-A?e=fJGmAk)
 35 | and tar each tar.gz file, and move them into **Data** folder.
 36 | 
 37 | You can also download data from BaiduYun(https://pan.baidu.com/s/1j0TEt-2Dw3kcfdX-LCF0YQ) i9b3 
 38 | 
 39 | ## 3. Testing on saved models
 40 | * Download the trained models (There are the pretrained FlowNet and the trained models of the papers, such as ped1, ped2 and avenue).
 41 | Please manually download pretrained models of avenue, ped1, ped2, flownet from [pretrains.tar.gz](https://1drv.ms/f/s!AjjUqiJZsj8whLt1Y8O-sOW8gWs1-A?e=fJGmAk)
 42 | and tar -xvf pretrains.tar.gz, and move pretrains into **Codes/checkpoints** folder. **[ShanghaiTech pre-trained models](https://onedrive.live.com/?authkey=%21AMlRwbaoQ0sAgqU&id=303FB25922AAD438%217383&cid=303FB25922AAD438)**
 43 | 
 44 | * Running the sript (as ped2 and avenue datasets for examples) and cd into **Codes** folder at first.
 45 | ```shell
 46 | python inference.py  --dataset  ped2    \
 47 |                     --test_folder  ../Data/ped2/testing/frames      \
 48 |                     --gpu  1    \
 49 |                     --snapshot_dir    checkpoints/pretrains/ped2
 50 | ```
 51 | 
 52 | ```shell
 53 | python inference.py  --dataset  avenue    \
 54 |                     --test_folder  ../Data/avenue/testing/frames      \
 55 |                     --gpu  1    \
 56 |                     --snapshot_dir    checkpoints/pretrains/avenue
 57 | ```
 58 | 
 59 | 
 60 | ## 4. Training from scratch (here we use ped2 and avenue datasets for examples)
 61 | * Download the pretrained FlowNet at first and see above-mentioned step 3.1 
 62 | * Set hyper-parameters
 63 | The default hyper-parameters, such as $\lambda_{init}$, $\lambda_{gd}$, $\lambda_{op}$, $\lambda_{adv}$ and the learning rate of G, as well as D, are all initialized in **training_hyper_params/hyper_params.ini**. 
 64 | * Running script (as ped2 or avenue for instances) and cd into **Codes** folder at first.
 65 | ```shell
 66 | python train.py  --dataset  ped2    \
 67 |                  --train_folder  ../Data/ped2/training/frames     \
 68 |                  --test_folder  ../Data/ped2/testing/frames       \
 69 |                  --gpu  0       \
 70 |                  --iters    80000
 71 | ```
 72 | * Model selection while training
 73 | To do model selection, a popular way is to test the saved models after a number of iterations or epochs (Since there is no validation set provided on above all datasets, and to compare the performance with other methods, we just choose the best model on testing set). Here, we can use another GPU to listen to the **snapshot_dir** folder. When a new model.cpkt.xxx has arrived, then load the model and test. Finally, we choose the best model. Following is the script.
 74 | ```shell
 75 | python inference.py  --dataset  ped2    \
 76 |                      --test_folder  ../Data/ped2/testing/frames       \
 77 |                      --gpu  1
 78 | ```
 79 | Run **python train.py -h** to know more about the flag options or see the detials in **constant.py**.
 80 | ```shell
 81 | Options to run the network.
 82 | 
 83 | optional arguments:
 84 |   -h, --help            show this help message and exit
 85 |   -g GPU, --gpu GPU    the device id of gpu.
 86 |   -i ITERS, --iters ITERS
 87 |                         set the number of iterations, default is 1
 88 |   -b BATCH, --batch BATCH
 89 |                         set the batch size, default is 4.
 90 |   --num_his NUM_HIS    set the time steps, default is 4.
 91 |   -d DATASET, --dataset DATASET
 92 |                         the name of dataset.
 93 |   --train_folder TRAIN_FOLDER
 94 |                         set the training folder path.
 95 |   --test_folder TEST_FOLDER
 96 |                         set the testing folder path.
 97 |   --config CONFIG      the path of training_hyper_params, default is
 98 |                         training_hyper_params/hyper_params.ini
 99 |   --snapshot_dir SNAPSHOT_DIR
100 |                         if it is folder, then it is the directory to save
101 |                         models, if it is a specific model.ckpt-xxx, then the
102 |                         system will load it for testing.
103 |   --summary_dir SUMMARY_DIR
104 |                         the directory to save summaries.
105 |   --psnr_dir PSNR_DIR  the directory to save psnrs results in testing.
106 |   --evaluate EVALUATE  the evaluation metric, default is compute_auc
107 | ```
108 | * (Option) Tensorboard visualization
109 | ```shell
110 | tensorboard    --logdir=./summary    --port=10086
111 | ```
112 | Open the browser and type **https://ip:10086**. Following is the screenshot of Avenue on Tensorboard.
113 | ![scalars_tensorboard](assets/scalars.JPG)
114 | 
115 | ![images_tensorboard](assets/images.JPG)
116 | Since the models are trained in BGR image color channels, the visualized images in Tensorboard look different from RGB channels.
117 | In the demo, we change the output images from BGR to RGB.
118 | 
119 | ## Notes
120 | The flow loss (temporal loss) module is based on [a TensorFlow implementation of FlowNet2](https://github.com/sampepose/flownet2-tf). Thanks for their nice work.
121 | ## Citation
122 | If you find this useful, please cite our work as follows:
123 | ```code
124 | @INPROCEEDINGS{liu2018ano_pred, 
125 | 	author={W. Liu and W. Luo, D. Lian and S. Gao}, 
126 | 	booktitle={2018 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 
127 | 	title={Future Frame Prediction for Anomaly Detection -- A New Baseline}, 
128 | 	year={2018}
129 | }
130 | ```
131 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cc:
--------------------------------------------------------------------------------
  1 | #define EIGEN_USE_THREADS
  2 | 
  3 | #include "correlation_kernel.h"
  4 | #include "pad.h"
  5 | 
  6 | #include "tensorflow/core/framework/op_kernel.h"
  7 | #include "tensorflow/core/framework/register_types.h"
  8 | #include "tensorflow/core/framework/types.h"
  9 | #include "tensorflow/core/platform/types.h"
 10 | 
 11 | namespace tensorflow {
 12 | typedef Eigen::GpuDevice GPUDevice;
 13 | 
 14 | template<typename Device>
 15 | class CorrelationGradKernel : public OpKernel {
 16 |   public:
 17 |     explicit CorrelationGradKernel(OpKernelConstruction *ctx) : OpKernel(ctx) {
 18 |       // Get the attributes
 19 |       OP_REQUIRES_OK(ctx, ctx->GetAttr("kernel_size", &kernel_size));
 20 |       OP_REQUIRES_OK(ctx, ctx->GetAttr("max_displacement", &max_displacement));
 21 |       OP_REQUIRES_OK(ctx, ctx->GetAttr("stride_1", &stride_1));
 22 |       OP_REQUIRES_OK(ctx, ctx->GetAttr("stride_2", &stride_2));
 23 |       OP_REQUIRES_OK(ctx, ctx->GetAttr("pad", &pad));
 24 | 
 25 |       OP_REQUIRES(ctx, kernel_size % 2 != 0, errors::InvalidArgument("kernel_size must be odd"));
 26 |     }
 27 | 
 28 |     void Compute(OpKernelContext *ctx) override {
 29 |       // Get the input images and verify their dimensions
 30 |       const Tensor& gradients_t = ctx->input(0);
 31 |       const Tensor& input_a_t   = ctx->input(1);
 32 |       const Tensor& input_b_t   = ctx->input(2);
 33 | 
 34 |       OP_REQUIRES(ctx, input_a_t.dims() == 4, errors::InvalidArgument("input_a must have rank 4"));
 35 |       OP_REQUIRES(ctx, input_b_t.dims() == 4, errors::InvalidArgument("input_b must have rank 4"));
 36 | 
 37 |       // Get dimensions of input
 38 |       const int batch_size          = input_a_t.dim_size(0);
 39 |       const int in_height           = input_a_t.dim_size(1);
 40 |       const int in_width            = input_a_t.dim_size(2);
 41 |       const int in_channels         = input_a_t.dim_size(3);
 42 |       const int in_count_per_sample = in_height * in_width * in_channels;
 43 |       const int padded_height       = in_height + 2 * pad;
 44 |       const int padded_width        = in_width + 2 * pad;
 45 | 
 46 |       // The size of unreachable border region on each side
 47 |       const int kernel_radius = (kernel_size - 1) / 2;
 48 |       const int border_size   = max_displacement + kernel_radius;
 49 | 
 50 |       // Calculate the output dimensions
 51 |       const int out_height = ceil((float)(padded_height - border_size * 2) / (float)stride_1);
 52 |       const int out_width  = ceil((float)(padded_width - border_size * 2) / (float)stride_1);
 53 | 
 54 |       const int neighborhood_grid_radius = max_displacement / stride_2;
 55 |       const int neighborhood_grid_width  = neighborhood_grid_radius * 2 + 1;
 56 |       const int out_channels             = neighborhood_grid_width * neighborhood_grid_width;
 57 | 
 58 |       // Allocate the memory for the outputs
 59 |       Tensor *output_a_gradient_t;
 60 |       OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input_a_t.shape(), &output_a_gradient_t));
 61 |       Tensor *output_b_gradient_t;
 62 |       OP_REQUIRES_OK(ctx, ctx->allocate_output(1, input_b_t.shape(), &output_b_gradient_t));
 63 | 
 64 |       // Get the tensors
 65 |       auto gradients         = gradients_t.tensor<float, 4>();
 66 |       auto input_a           = input_a_t.tensor<float, 4>();
 67 |       auto input_b           = input_b_t.tensor<float, 4>();
 68 |       auto output_a_gradient = output_a_gradient_t->tensor<float, 4>();
 69 |       auto output_b_gradient = output_b_gradient_t->tensor<float, 4>();
 70 | 
 71 |       // Create temporary tensors for padded inputs
 72 |       Tensor padded_input_a_t, padded_input_b_t;
 73 |       OP_REQUIRES_OK(ctx,
 74 |                      ctx->allocate_temp(DataTypeToEnum<float>::value,
 75 |                                         TensorShape({ batch_size, padded_height, padded_width, in_channels }),
 76 |                                         &padded_input_a_t));
 77 |       OP_REQUIRES_OK(ctx,
 78 |                      ctx->allocate_temp(DataTypeToEnum<float>::value,
 79 |                                         TensorShape({ batch_size, padded_height, padded_width, in_channels }),
 80 |                                         &padded_input_b_t));
 81 |       auto padded_input_a = padded_input_a_t.tensor<float, 4>();
 82 |       auto padded_input_b = padded_input_b_t.tensor<float, 4>();
 83 | 
 84 |       // Pad the inputs
 85 |       Pad(ctx->eigen_device<Device>(),
 86 |           input_a.data(),
 87 |           batch_size,
 88 |           in_height,
 89 |           in_width,
 90 |           in_channels,
 91 |           padded_height,
 92 |           padded_width,
 93 |           padded_input_a.data());
 94 |       Pad(ctx->eigen_device<Device>(),
 95 |           input_b.data(),
 96 |           batch_size,
 97 |           in_height,
 98 |           in_width,
 99 |           in_channels,
100 |           padded_height,
101 |           padded_width,
102 |           padded_input_b.data());
103 | 
104 |       CorrelationGradA(ctx->eigen_gpu_device(),
105 |                        batch_size,
106 |                        out_width,
107 |                        out_height,
108 |                        out_channels,
109 |                        max_displacement,
110 |                        neighborhood_grid_radius,
111 |                        neighborhood_grid_width,
112 |                        kernel_radius,
113 |                        stride_1,
114 |                        stride_2,
115 |                        in_width,
116 |                        in_height,
117 |                        padded_width,
118 |                        padded_height,
119 |                        in_channels,
120 |                        in_count_per_sample,
121 |                        pad,
122 |                        padded_input_b.data(),
123 |                        gradients.data(),
124 |                        output_a_gradient.data());
125 | 
126 |       CorrelationGradB(ctx->eigen_gpu_device(),
127 |                        batch_size,
128 |                        out_width,
129 |                        out_height,
130 |                        out_channels,
131 |                        max_displacement,
132 |                        neighborhood_grid_radius,
133 |                        neighborhood_grid_width,
134 |                        kernel_radius,
135 |                        stride_1,
136 |                        stride_2,
137 |                        in_width,
138 |                        in_height,
139 |                        padded_width,
140 |                        padded_height,
141 |                        in_channels,
142 |                        in_count_per_sample,
143 |                        pad,
144 |                        padded_input_a.data(),
145 |                        gradients.data(),
146 |                        output_b_gradient.data());
147 |     }
148 | 
149 |   private:
150 |     int kernel_size;
151 |     int max_displacement;
152 |     int stride_1;
153 |     int stride_2;
154 |     int pad;
155 | };
156 | 
157 | REGISTER_KERNEL_BUILDER(Name("CorrelationGrad")
158 |                         .Device(DEVICE_GPU),
159 |                         CorrelationGradKernel<GPUDevice>)
160 | } // end namespace tensorflow
161 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet2/flownet2.py:
--------------------------------------------------------------------------------
  1 | from ..net import Net, Mode
  2 | from ..flownet_css.flownet_css import FlowNetCSS
  3 | from ..flownet_sd.flownet_sd import FlowNetSD
  4 | from ..flow_warp import flow_warp
  5 | from ..utils import LeakyReLU, average_endpoint_error, pad, antipad
  6 | from ..downsample import downsample
  7 | import tensorflow as tf
  8 | slim = tf.contrib.slim
  9 | 
 10 | 
 11 | class FlowNet2(Net):
 12 | 
 13 |     def __init__(self, mode=Mode.TRAIN, debug=False):
 14 |         self.net_css = FlowNetCSS(mode, debug)
 15 |         self.net_sd = FlowNetSD(mode, debug)
 16 |         super(FlowNet2, self).__init__(mode=mode, debug=debug)
 17 | 
 18 |     def model(self, inputs, training_schedule, trainable=True):
 19 |         _, height, width, _ = inputs['input_a'].shape.as_list()
 20 |         with tf.variable_scope('FlowNet2'):
 21 |             # Forward pass through FlowNetCSS and FlowNetSD with weights frozen
 22 |             net_css_predictions = self.net_css.model(inputs, training_schedule, trainable=True)
 23 |             net_sd_predictions = self.net_sd.model(inputs, training_schedule, trainable=True)
 24 | 
 25 |             def ChannelNorm(tensor):
 26 |                 sq = tf.square(tensor)
 27 |                 r_sum = tf.reduce_sum(sq, keep_dims=True, axis=3)
 28 |                 return tf.sqrt(r_sum)
 29 | 
 30 |             sd_flow_norm = ChannelNorm(net_sd_predictions['flow'])
 31 |             css_flow_norm = ChannelNorm(net_css_predictions['flow'])
 32 | 
 33 |             flow_warp_sd = flow_warp(inputs['input_b'], net_sd_predictions['flow'])
 34 |             img_diff_sd = inputs['input_a'] - flow_warp_sd
 35 |             img_diff_sd_norm = ChannelNorm(img_diff_sd)
 36 | 
 37 |             flow_warp_css = flow_warp(inputs['input_b'], net_css_predictions['flow'])
 38 |             img_diff_css = inputs['input_a'] - flow_warp_css
 39 |             img_diff_css_norm = ChannelNorm(img_diff_css)
 40 | 
 41 |             input_to_fusion = tf.concat([inputs['input_a'],
 42 |                                          net_sd_predictions['flow'],
 43 |                                          net_css_predictions['flow'],
 44 |                                          sd_flow_norm,
 45 |                                          css_flow_norm,
 46 |                                          img_diff_sd_norm,
 47 |                                          img_diff_css_norm], axis=3)
 48 | 
 49 |             # Fusion Network
 50 |             with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
 51 |                                 # Only backprop this network if trainable
 52 |                                 trainable=trainable,
 53 |                                 # He (aka MSRA) weight initialization
 54 |                                 weights_initializer=slim.variance_scaling_initializer(),
 55 |                                 activation_fn=LeakyReLU,
 56 |                                 # We will do our own padding to match the original Caffe code
 57 |                                 padding='VALID'):
 58 | 
 59 |                 weights_regularizer = slim.l2_regularizer(training_schedule['weight_decay'])
 60 |                 with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer):
 61 |                     fuse_conv0 = slim.conv2d(pad(input_to_fusion), 64, 3, scope='fuse_conv0')
 62 |                     fuse_conv1 = slim.conv2d(pad(fuse_conv0), 64, 3, stride=2, scope='fuse_conv1')
 63 |                     fuse_conv1_1 = slim.conv2d(pad(fuse_conv1), 128, 3, scope='fuse_conv1_1')
 64 |                     fuse_conv2 = slim.conv2d(pad(fuse_conv1_1), 128, 3,
 65 |                                              stride=2, scope='fuse_conv2')
 66 |                     fuse_conv2_1 = slim.conv2d(pad(fuse_conv2), 128, 3, scope='fuse_conv2_1')
 67 | 
 68 |                     predict_flow2 = slim.conv2d(pad(fuse_conv2_1), 2, 3,
 69 |                                                 scope='predict_flow2',
 70 |                                                 activation_fn=None)
 71 |                     fuse_deconv1 = antipad(slim.conv2d_transpose(fuse_conv2_1, 32, 4,
 72 |                                                                  stride=2,
 73 |                                                                  scope='fuse_deconv1'))
 74 |                     fuse_upsample_flow2to1 = antipad(slim.conv2d_transpose(predict_flow2, 2, 4,
 75 |                                                                            stride=2,
 76 |                                                                            scope='fuse_upsample_flow2to1',
 77 |                                                                            activation_fn=None))
 78 |                     concat1 = tf.concat([fuse_conv1_1, fuse_deconv1,
 79 |                                          fuse_upsample_flow2to1], axis=3)
 80 |                     fuse_interconv1 = slim.conv2d(pad(concat1), 32, 3,
 81 |                                                   activation_fn=None, scope='fuse_interconv1')
 82 | 
 83 |                     predict_flow1 = slim.conv2d(pad(fuse_interconv1), 2, 3,
 84 |                                                 scope='predict_flow1',
 85 |                                                 activation_fn=None)
 86 |                     fuse_deconv0 = antipad(slim.conv2d_transpose(concat1, 16, 4,
 87 |                                                                  stride=2,
 88 |                                                                  scope='fuse_deconv0'))
 89 |                     fuse_upsample_flow1to0 = antipad(slim.conv2d_transpose(predict_flow1, 2, 4,
 90 |                                                                            stride=2,
 91 |                                                                            scope='fuse_upsample_flow1to0',
 92 |                                                                            activation_fn=None))
 93 |                     concat0 = tf.concat([fuse_conv0, fuse_deconv0, fuse_upsample_flow1to0], axis=3)
 94 |                     fuse_interconv0 = slim.conv2d(pad(concat0), 16, 3,
 95 |                                                   activation_fn=None, scope='fuse_interconv0')
 96 | 
 97 |                     predict_flow0 = slim.conv2d(pad(fuse_interconv0), 2,
 98 |                                                 3, activation_fn=None, scope='predict_flow0')
 99 | 
100 |                     flow = tf.image.resize_bilinear(
101 |                         predict_flow0, tf.stack([height, width]), align_corners=True)
102 |                     print(predict_flow0)
103 |                     print(flow)
104 |                     return {
105 |                         'predict_flow0': predict_flow0,
106 |                         'flow': flow,
107 |                     }
108 | 
109 |     def loss(self, flow, predictions):
110 |         # L2 loss between predict_flow0, true flow (weighted w/ 0.005)
111 |         predict_flow0 = predictions['predict_flow0']
112 |         size = [predict_flow0.shape[1], predict_flow0.shape[2]]
113 |         downsampled_flow0 = downsample(flow, size)
114 |         loss = average_endpoint_error(downsampled_flow0, predict_flow0)
115 |         tf.losses.add_loss(loss)
116 | 
117 |         # Return the 'total' loss: loss fns + regularization terms defined in the model
118 |         return tf.losses.get_total_loss()
119 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/net.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | from enum import Enum
  3 | import os
  4 | import tensorflow as tf
  5 | from .flowlib import flow_to_image, write_flow
  6 | import numpy as np
  7 | # from scipy.misc import imread, imsave, imresize
  8 | import cv2
  9 | import uuid
 10 | from .training_schedules import LONG_SCHEDULE
 11 | slim = tf.contrib.slim
 12 | 
 13 | os.environ['CUDA_DEVICES_ORDER'] = "PCI_BUS_ID"
 14 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 15 | 
 16 | 
 17 | class Mode(Enum):
 18 |     TRAIN = 1
 19 |     TEST = 2
 20 | 
 21 | 
 22 | class Net(object):
 23 |     __metaclass__ = abc.ABCMeta
 24 | 
 25 |     def __init__(self, mode=Mode.TRAIN, debug=False):
 26 |         self.global_step = slim.get_or_create_global_step()
 27 |         self.mode = mode
 28 |         self.debug = debug
 29 | 
 30 |     @abc.abstractmethod
 31 |     def model(self, inputs, training_schedule, trainable=True):
 32 |         """
 33 |         Defines the model and returns a tuple of Tensors needed for calculating the loss.
 34 |         """
 35 |         return
 36 | 
 37 |     @abc.abstractmethod
 38 |     def loss(self, **kwargs):
 39 |         """
 40 |         Accepts prediction Tensors from the output of `model`.
 41 |         Returns a single Tensor representing the total loss of the model.
 42 |         """
 43 |         return
 44 |     """
 45 |      python -m src.flownet_sd.test --input_a /home/liuwen/ssd/videogan/Save_2017_05_31/Images/ped1_adv/Evaluate/model.ckpt-100000/01/gen_6.png \
 46 |                                  --input_b /home/liuwen/ssd/videogan/Save_2017_05_31/Images/ped1_adv/Evaluate/model.ckpt-100000/01/gen_7.png \
 47 |                                  --out  ./
 48 |       python -m src.flownet_sd.test --input_a 006.png  --input_b  007.png     --out ./
 49 |       python -m src.flownet_sd.test --input_a /home/liuwen/ssd/videogan/ped1/frames/testing/01/006.jpg \
 50 |                              --input_b /home/liuwen/ssd/videogan/ped1/frames/testing/01/007.jpg \
 51 |                              --out  ./
 52 |     """
 53 |     def test(self, checkpoint, input_a_path, input_b_path, out_path, save_image=True, save_flo=False):
 54 |         input_a = cv2.imread(input_a_path)
 55 |         input_b = cv2.imread(input_b_path)
 56 | 
 57 |         input_a = cv2.resize(input_a, (512, 384))
 58 |         input_b = cv2.resize(input_b, (512, 384))
 59 |         print(input_a.shape, input_b.shape)
 60 | 
 61 |         # Convert from RGB -> BGR
 62 |         # input_a = input_a[..., [2, 1, 0]]
 63 |         # input_b = input_b[..., [2, 1, 0]]
 64 | 
 65 |         # Scale from [0, 255] -> [0.0, 1.0] if needed
 66 |         if input_a.max() > 1.0:
 67 |             input_a = input_a / 255.0
 68 |         if input_b.max() > 1.0:
 69 |             input_b = input_b / 255.0
 70 | 
 71 |         # TODO: This is a hack, we should get rid of this
 72 |         training_schedule = LONG_SCHEDULE
 73 | 
 74 |         inputs = {
 75 |             'input_a': tf.expand_dims(tf.constant(input_a, dtype=tf.float32), 0),
 76 |             'input_b': tf.expand_dims(tf.constant(input_b, dtype=tf.float32), 0),
 77 |         }
 78 |         predictions = self.model(inputs, training_schedule)
 79 |         pred_flow = predictions['flow']
 80 | 
 81 |         saver = tf.train.Saver()
 82 | 
 83 |         config = tf.ConfigProto()
 84 |         config.gpu_options.allow_growth = True
 85 |         with tf.Session(config=config) as sess:
 86 |             saver.restore(sess, checkpoint)
 87 |             pred_flow = sess.run(pred_flow)[0, :, :, :]
 88 | 
 89 |             np.save('temporal_ped1', pred_flow)
 90 | 
 91 |             unique_name = 'flow-' + str(uuid.uuid4())
 92 |             if save_image:
 93 |                 flow_img = flow_to_image(pred_flow)
 94 |                 full_out_path = os.path.join(out_path, unique_name + '.png')
 95 |                 cv2.imwrite(full_out_path, flow_img)
 96 | 
 97 |             if save_flo:
 98 |                 full_out_path = os.path.join(out_path, unique_name + '.flo')
 99 |                 write_flow(pred_flow, full_out_path)
100 | 
101 |     def train(self, log_dir, training_schedule, input_a, input_b, flow, checkpoints=None):
102 |         tf.summary.image("image_a", input_a, max_outputs=2)
103 |         tf.summary.image("image_b", input_b, max_outputs=2)
104 | 
105 |         self.learning_rate = tf.train.piecewise_constant(
106 |             self.global_step,
107 |             [tf.cast(v, tf.int64) for v in training_schedule['step_values']],
108 |             training_schedule['learning_rates'])
109 | 
110 |         optimizer = tf.train.AdamOptimizer(
111 |             self.learning_rate,
112 |             training_schedule['momentum'],
113 |             training_schedule['momentum2'])
114 | 
115 |         inputs = {
116 |             'input_a': input_a,
117 |             'input_b': input_b,
118 |         }
119 |         predictions = self.model(inputs, training_schedule)
120 |         total_loss = self.loss(flow, predictions)
121 |         tf.summary.scalar('loss', total_loss)
122 | 
123 |         if checkpoints:
124 |             for (checkpoint_path, (scope, new_scope)) in checkpoints.iteritems():
125 |                 variables_to_restore = slim.get_variables(scope=scope)
126 |                 renamed_variables = {
127 |                     var.op.name.split(new_scope + '/')[1]: var
128 |                     for var in variables_to_restore
129 |                 }
130 |                 restorer = tf.train.Saver(renamed_variables)
131 |                 with tf.Session() as sess:
132 |                     restorer.restore(sess, checkpoint_path)
133 | 
134 |         # Show the generated flow in TensorBoard
135 |         if 'flow' in predictions:
136 |             pred_flow_0 = predictions['flow'][0, :, :, :]
137 |             pred_flow_0 = tf.py_func(flow_to_image, [pred_flow_0], tf.uint8)
138 |             pred_flow_1 = predictions['flow'][1, :, :, :]
139 |             pred_flow_1 = tf.py_func(flow_to_image, [pred_flow_1], tf.uint8)
140 |             pred_flow_img = tf.stack([pred_flow_0, pred_flow_1], 0)
141 |             tf.summary.image('pred_flow', pred_flow_img, max_outputs=2)
142 | 
143 |         true_flow_0 = flow[0, :, :, :]
144 |         true_flow_0 = tf.py_func(flow_to_image, [true_flow_0], tf.uint8)
145 |         true_flow_1 = flow[1, :, :, :]
146 |         true_flow_1 = tf.py_func(flow_to_image, [true_flow_1], tf.uint8)
147 |         true_flow_img = tf.stack([true_flow_0, true_flow_1], 0)
148 |         tf.summary.image('true_flow', true_flow_img, max_outputs=2)
149 | 
150 |         train_op = slim.learning.create_train_op(
151 |             total_loss,
152 |             optimizer,
153 |             summarize_gradients=True)
154 | 
155 |         if self.debug:
156 |             with tf.Session() as sess:
157 |                 sess.run(tf.global_variables_initializer())
158 |                 tf.train.start_queue_runners(sess)
159 |                 slim.learning.train_step(
160 |                     sess,
161 |                     train_op,
162 |                     self.global_step,
163 |                     {
164 |                         'should_trace': tf.constant(1),
165 |                         'should_log': tf.constant(1),
166 |                         'logdir': log_dir + '/debug',
167 |                     }
168 |                 )
169 |         else:
170 |             slim.learning.train(
171 |                 train_op,
172 |                 log_dir,
173 |                 # session_config=tf.ConfigProto(allow_soft_placement=True),
174 |                 global_step=self.global_step,
175 |                 save_summaries_secs=60,
176 |                 number_of_steps=training_schedule['max_iter']
177 |             )
178 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.h:
--------------------------------------------------------------------------------
  1 | #ifndef AUGMENTATION_LAYER_BASE_H_
  2 | #define AUGMENTATION_LAYER_BASE_H_
  3 | 
  4 | #include "tensorflow/core/framework/tensor_types.h"
  5 | 
  6 | #include <iostream>
  7 | #include <string>
  8 | #include <vector>
  9 | 
 10 | namespace tensorflow {
 11 | template<typename T>
 12 | class OptionalType {
 13 |   public:
 14 |     OptionalType(const T default_value) : default_value(default_value), has_value(false) {}
 15 | 
 16 |     operator bool() const {
 17 |       return has_value;
 18 |     }
 19 | 
 20 |     OptionalType& operator=(T val) {
 21 |       has_value = true;
 22 |       value     = val;
 23 |       return *this;
 24 |     }
 25 | 
 26 |     const T operator()() const {
 27 |       return has_value ? value : default_value;
 28 |     }
 29 | 
 30 |     void clear() {
 31 |       has_value = false;
 32 |     }
 33 | 
 34 |     const T get_default() {
 35 |       return default_value;
 36 |     }
 37 | 
 38 |   private:
 39 |     T value;
 40 |     bool has_value;
 41 |     const T default_value;
 42 | };
 43 | 
 44 | class AugmentationCoeff {
 45 |   public:
 46 |     // Spatial Types
 47 |     OptionalType<float>dx;
 48 |     OptionalType<float>dy;
 49 |     OptionalType<float>angle;
 50 |     OptionalType<float>zoom_x;
 51 |     OptionalType<float>zoom_y;
 52 | 
 53 |     // Chromatic Types
 54 |     OptionalType<float>gamma;
 55 |     OptionalType<float>brightness;
 56 |     OptionalType<float>contrast;
 57 |     OptionalType<float>color1;
 58 |     OptionalType<float>color2;
 59 |     OptionalType<float>color3;
 60 | 
 61 |     AugmentationCoeff() : dx(0.0), dy(0.0), angle(0.0), zoom_x(1.0), zoom_y(1.0), gamma(1.0),
 62 |       brightness(0.0), contrast(1.0), color1(1.0), color2(1.0), color3(1.0) {}
 63 | 
 64 |     AugmentationCoeff(const AugmentationCoeff& coeff) : AugmentationCoeff() {
 65 |       replace_with(coeff);
 66 |     }
 67 | 
 68 |     void clear();
 69 | 
 70 |     void combine_with(const AugmentationCoeff& coeff);
 71 | 
 72 |     void replace_with(const AugmentationCoeff& coeff);
 73 | };
 74 | 
 75 | typedef struct AugmentationParam {
 76 |   std::string rand_type;
 77 |   bool        should_exp;
 78 |   float       mean;
 79 |   float       spread;
 80 |   float       prob;
 81 | } AugmentationParam;
 82 | 
 83 | class AugmentationParams {
 84 |   public:
 85 |     int crop_height;
 86 |     int crop_width;
 87 | 
 88 |     // Spatial options
 89 |     OptionalType<struct AugmentationParam>translate;
 90 |     OptionalType<struct AugmentationParam>rotate;
 91 |     OptionalType<struct AugmentationParam>zoom;
 92 |     OptionalType<struct AugmentationParam>squeeze;
 93 | 
 94 |     // Chromatic options
 95 |     OptionalType<struct AugmentationParam>gamma;
 96 |     OptionalType<struct AugmentationParam>brightness;
 97 |     OptionalType<struct AugmentationParam>contrast;
 98 |     OptionalType<struct AugmentationParam>color;
 99 | 
100 |     inline AugmentationParams(int                     crop_height,
101 |                               int                     crop_width,
102 |                               std::vector<std::string>params_name,
103 |                               std::vector<std::string>params_rand_type,
104 |                               std::vector<bool>       params_exp,
105 |                               std::vector<float>      params_mean,
106 |                               std::vector<float>      params_spread,
107 |                               std::vector<float>      params_prob) :
108 |       crop_height(crop_height),
109 |       crop_width(crop_width),
110 |       translate(AugmentationParam()),
111 |       rotate(AugmentationParam()),
112 |       zoom(AugmentationParam()),
113 |       squeeze(AugmentationParam()),
114 |       gamma(AugmentationParam()),
115 |       brightness(AugmentationParam()),
116 |       contrast(AugmentationParam()),
117 |       color(AugmentationParam()) {
118 |       for (int i = 0; i < params_name.size(); i++) {
119 |         const std::string name      = params_name[i];
120 |         const std::string rand_type = params_rand_type[i];
121 |         const bool  should_exp      = params_exp[i];
122 |         const float mean            = params_mean[i];
123 |         const float spread          = params_spread[i];
124 |         const float prob            = params_prob[i];
125 | 
126 |         struct AugmentationParam param = { rand_type, should_exp, mean, spread, prob };
127 | 
128 |         if (name == "translate") {
129 |           this->translate = param;
130 |         } else if (name == "rotate") {
131 |           this->rotate = param;
132 |         } else if (name == "zoom") {
133 |           this->zoom = param;
134 |         }  else if (name == "squeeze") {
135 |           this->squeeze = param;
136 |         } else if (name == "noise") {
137 |           // NoOp: We handle noise on the Python side
138 |         } else if (name == "gamma") {
139 |           this->gamma = param;
140 |         } else if (name == "brightness") {
141 |           this->brightness = param;
142 |         } else if (name == "contrast") {
143 |           this->contrast = param;
144 |         } else if (name == "color") {
145 |           this->color = param;
146 |         } else {
147 |           std::cout << "Ignoring unknown augmentation parameter: " << name << std::endl;
148 |         }
149 |       }
150 |     }
151 | 
152 |     bool should_do_spatial_transform() {
153 |       return this->translate || this->rotate || this->zoom || this->squeeze;
154 |     }
155 | 
156 |     bool should_do_chromatic_transform() {
157 |       return this->gamma || this->brightness || this->contrast || this->color;
158 |     }
159 | };
160 | 
161 | class AugmentationLayerBase {
162 |   public:
163 |     class TransMat {
164 |       /**
165 |        * Translation matrix class for spatial augmentation
166 |        * | 0 1 2 |
167 |        * | 3 4 5 |
168 |        */
169 | 
170 |       public:
171 |         float t0, t1, t2;
172 |         float t3, t4, t5;
173 | 
174 | 
175 |         void fromCoeff(AugmentationCoeff *coeff,
176 |                        int                out_width,
177 |                        int                out_height,
178 |                        int                src_width,
179 |                        int                src_height);
180 | 
181 |         void     fromTensor(const float *tensor_data);
182 | 
183 |         TransMat inverse();
184 | 
185 |         void     leftMultiply(float u0,
186 |                               float u1,
187 |                               float u2,
188 |                               float u3,
189 |                               float u4,
190 |                               float u5);
191 | 
192 |         void toIdentity();
193 |     };
194 | 
195 |     // TODO: Class ChromaticCoeffs
196 | 
197 |     static float rng_generate(const AugmentationParam& param,
198 |                               float                    discount_coeff,
199 |                               const float              default_value);
200 | 
201 |     static void clear_spatial_coeffs(AugmentationCoeff& coeff);
202 |     static void generate_chromatic_coeffs(float                     discount_coeff,
203 |                                           const AugmentationParams& aug,
204 |                                           AugmentationCoeff       & coeff);
205 |     static void generate_spatial_coeffs(float                     discount_coeff,
206 |                                         const AugmentationParams& aug,
207 |                                         AugmentationCoeff       & coeff);
208 |     static void generate_valid_spatial_coeffs(float                     discount_coeff,
209 |                                               const AugmentationParams& aug,
210 |                                               AugmentationCoeff       & coeff,
211 |                                               int                       src_width,
212 |                                               int                       src_height,
213 |                                               int                       out_width,
214 |                                               int                       out_height);
215 | 
216 |     static void copy_chromatic_coeffs_to_tensor(const std::vector<AugmentationCoeff>& coeff_arr,
217 |                                                 typename TTypes<float, 2>::Tensor& out);
218 |     static void copy_spatial_coeffs_to_tensor(const std::vector<AugmentationCoeff>& coeff_arr,
219 |                                               const int out_width,
220 |                                               const int out_height,
221 |                                               const int src_width,
222 |                                               const int src_height,
223 |                                               typename TTypes<float, 2>::Tensor& out,
224 |                                               const bool invert = false);
225 | };
226 | } // namespace tensorflow
227 | 
228 | #endif // AUGMENTATION_LAYER_BASE_H_
229 | 


--------------------------------------------------------------------------------
/Codes/train.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import os
  3 | 
  4 | from models import generator, discriminator, flownet, initialize_flownet
  5 | from loss_functions import intensity_loss, gradient_loss
  6 | from utils import DataLoader, load, save, psnr_error
  7 | from constant import const
  8 | 
  9 | 
 10 | os.environ['CUDA_DEVICES_ORDER'] = "PCI_BUS_ID"
 11 | os.environ['CUDA_VISIBLE_DEVICES'] = const.GPU
 12 | 
 13 | dataset_name = const.DATASET
 14 | train_folder = const.TRAIN_FOLDER
 15 | test_folder = const.TEST_FOLDER
 16 | 
 17 | batch_size = const.BATCH_SIZE
 18 | iterations = const.ITERATIONS
 19 | num_his = const.NUM_HIS
 20 | height, width = 256, 256
 21 | flow_height, flow_width = const.FLOW_HEIGHT, const.FLOW_WIDTH
 22 | 
 23 | l_num = const.L_NUM
 24 | alpha_num = const.ALPHA_NUM
 25 | lam_lp = const.LAM_LP
 26 | lam_gdl = const.LAM_GDL
 27 | lam_adv = const.LAM_ADV
 28 | lam_flow = const.LAM_FLOW
 29 | adversarial = (lam_adv != 0)
 30 | 
 31 | summary_dir = const.SUMMARY_DIR
 32 | snapshot_dir = const.SNAPSHOT_DIR
 33 | 
 34 | 
 35 | print(const)
 36 | 
 37 | # define dataset
 38 | with tf.name_scope('dataset'):
 39 |     train_loader = DataLoader(train_folder, resize_height=height, resize_width=width)
 40 |     train_dataset = train_loader(batch_size=batch_size, time_steps=num_his, num_pred=1)
 41 | 
 42 |     train_it = train_dataset.make_one_shot_iterator()
 43 |     train_videos_clips_tensor = train_it.get_next()
 44 |     train_videos_clips_tensor.set_shape([batch_size, height, width, 3*(num_his + 1)])
 45 | 
 46 |     train_inputs = train_videos_clips_tensor[..., 0:num_his*3]
 47 |     train_gt = train_videos_clips_tensor[..., -3:]
 48 | 
 49 |     print('train inputs = {}'.format(train_inputs))
 50 |     print('train prediction gt = {}'.format(train_gt))
 51 | 
 52 |     test_loader = DataLoader(test_folder, resize_height=height, resize_width=width)
 53 |     test_dataset = test_loader(batch_size=batch_size, time_steps=num_his, num_pred=1)
 54 |     test_it = test_dataset.make_one_shot_iterator()
 55 |     test_videos_clips_tensor = test_it.get_next()
 56 |     test_videos_clips_tensor.set_shape([batch_size, height, width, 3*(num_his + 1)])
 57 | 
 58 |     test_inputs = test_videos_clips_tensor[..., 0:num_his*3]
 59 |     test_gt = test_videos_clips_tensor[..., -3:]
 60 | 
 61 |     print('test inputs = {}'.format(test_inputs))
 62 |     print('test prediction gt = {}'.format(test_gt))
 63 | 
 64 | # define training generator function
 65 | with tf.variable_scope('generator', reuse=None):
 66 |     print('training = {}'.format(tf.get_variable_scope().name))
 67 |     train_outputs = generator(train_inputs, layers=4, output_channel=3)
 68 |     train_psnr_error = psnr_error(gen_frames=train_outputs, gt_frames=train_gt)
 69 | 
 70 | # define testing generator function
 71 | with tf.variable_scope('generator', reuse=True):
 72 |     print('testing = {}'.format(tf.get_variable_scope().name))
 73 |     test_outputs = generator(test_inputs, layers=4, output_channel=3)
 74 |     test_psnr_error = psnr_error(gen_frames=test_outputs, gt_frames=test_gt)
 75 | 
 76 | 
 77 | # define intensity loss
 78 | if lam_lp != 0:
 79 |     lp_loss = intensity_loss(gen_frames=train_outputs, gt_frames=train_gt, l_num=l_num)
 80 | else:
 81 |     lp_loss = tf.constant(0.0, dtype=tf.float32)
 82 | 
 83 | 
 84 | # define gdl loss
 85 | if lam_gdl != 0:
 86 |     gdl_loss = gradient_loss(gen_frames=train_outputs, gt_frames=train_gt, alpha=alpha_num)
 87 | else:
 88 |     gdl_loss = tf.constant(0.0, dtype=tf.float32)
 89 | 
 90 | 
 91 | # define flow loss
 92 | if lam_flow != 0:
 93 |     train_gt_flow = flownet(input_a=train_inputs[..., -3:], input_b=train_gt,
 94 |                             height=flow_height, width=flow_width, reuse=None)
 95 |     train_pred_flow = flownet(input_a=train_inputs[..., -3:], input_b=train_outputs,
 96 |                               height=flow_height, width=flow_width, reuse=True)
 97 |     flow_loss = tf.reduce_mean(tf.abs(train_gt_flow - train_pred_flow))
 98 | else:
 99 |     flow_loss = tf.constant(0.0, dtype=tf.float32)
100 | 
101 | 
102 | # define adversarial loss
103 | if adversarial:
104 |     with tf.variable_scope('discriminator', reuse=None):
105 |         real_logits, real_outputs = discriminator(inputs=train_gt)
106 |     with tf.variable_scope('discriminator', reuse=True):
107 |         fake_logits, fake_outputs = discriminator(inputs=train_outputs)
108 | 
109 |     print('real_outputs = {}'.format(real_outputs))
110 |     print('fake_outputs = {}'.format(fake_outputs))
111 | 
112 |     adv_loss = tf.reduce_mean(tf.square(fake_outputs - 1) / 2)
113 |     dis_loss = tf.reduce_mean(tf.square(real_outputs - 1) / 2) + tf.reduce_mean(tf.square(fake_outputs) / 2)
114 | else:
115 |     adv_loss = tf.constant(0.0, dtype=tf.float32)
116 |     dis_loss = tf.constant(0.0, dtype=tf.float32)
117 | 
118 | 
119 | with tf.name_scope('training'):
120 |     g_loss = tf.add_n([lp_loss * lam_lp, gdl_loss * lam_gdl, adv_loss * lam_adv, flow_loss * lam_flow], name='g_loss')
121 | 
122 |     g_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='g_step')
123 |     g_lrate = tf.train.piecewise_constant(g_step, boundaries=const.LRATE_G_BOUNDARIES, values=const.LRATE_G)
124 |     g_optimizer = tf.train.AdamOptimizer(learning_rate=g_lrate, name='g_optimizer')
125 |     g_vars = tf.get_collection(key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator')
126 | 
127 |     g_train_op = g_optimizer.minimize(g_loss, global_step=g_step, var_list=g_vars, name='g_train_op')
128 | 
129 |     if adversarial:
130 |         # training discriminator
131 |         d_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='d_step')
132 |         d_lrate = tf.train.piecewise_constant(d_step, boundaries=const.LRATE_D_BOUNDARIES, values=const.LRATE_D)
133 |         d_optimizer = tf.train.AdamOptimizer(learning_rate=d_lrate, name='g_optimizer')
134 |         d_vars = tf.get_collection(key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator')
135 | 
136 |         d_train_op = d_optimizer.minimize(dis_loss, global_step=d_step, var_list=d_vars, name='d_optimizer')
137 |     else:
138 |         d_step = None
139 |         d_lrate = None
140 |         d_train_op = None
141 | 
142 | # add all to summaries
143 | tf.summary.scalar(tensor=train_psnr_error, name='train_psnr_error')
144 | tf.summary.scalar(tensor=test_psnr_error, name='test_psnr_error')
145 | tf.summary.scalar(tensor=g_loss, name='g_loss')
146 | tf.summary.scalar(tensor=adv_loss, name='adv_loss')
147 | tf.summary.scalar(tensor=dis_loss, name='dis_loss')
148 | tf.summary.image(tensor=train_outputs, name='train_outputs')
149 | tf.summary.image(tensor=train_gt, name='train_gt')
150 | tf.summary.image(tensor=test_outputs, name='test_outputs')
151 | tf.summary.image(tensor=test_gt, name='test_gt')
152 | summary_op = tf.summary.merge_all()
153 | 
154 | config = tf.ConfigProto()
155 | config.gpu_options.allow_growth = True
156 | with tf.Session(config=config) as sess:
157 |     # summaries
158 |     summary_writer = tf.summary.FileWriter(summary_dir, graph=sess.graph)
159 | 
160 |     # initialize weights
161 |     sess.run(tf.global_variables_initializer())
162 |     print('Init successfully!')
163 | 
164 |     if lam_flow != 0:
165 |         # initialize flownet
166 |         initialize_flownet(sess, const.FLOWNET_CHECKPOINT)
167 | 
168 |     # tf saver
169 |     saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=None)
170 |     restore_var = [v for v in tf.global_variables()]
171 |     loader = tf.train.Saver(var_list=restore_var)
172 |     if os.path.isdir(snapshot_dir):
173 |         ckpt = tf.train.get_checkpoint_state(snapshot_dir)
174 |         if ckpt and ckpt.model_checkpoint_path:
175 |             load(loader, sess, ckpt.model_checkpoint_path)
176 |         else:
177 |             print('No checkpoint file found.')
178 |     else:
179 |         load(loader, sess, snapshot_dir)
180 | 
181 |     _step, _loss, _summaries = 0, None, None
182 |     while _step < iterations:
183 |         try:
184 |             if adversarial:
185 |                 print('Training discriminator...')
186 |                 _, _d_lr, _d_step, _dis_loss = sess.run([d_train_op, d_lrate, d_step, dis_loss])
187 |             else:
188 |                 _d_step = 0
189 |                 _d_lr = 0
190 |                 _dis_loss = 0
191 | 
192 |             print('Training generator...')
193 |             _, _g_lr, _step, _lp_loss, _gdl_loss, _adv_loss, _flow_loss, _g_loss, _train_psnr, _summaries = sess.run(
194 |                 [g_train_op, g_lrate, g_step, lp_loss, gdl_loss, adv_loss, flow_loss, g_loss, train_psnr_error, summary_op])
195 | 
196 |             if _step % 10 == 0:
197 |                 print('DiscriminatorModel: Step {} | Global Loss: {:.6f}, lr = {:.6f}'.format(_d_step, _dis_loss, _d_lr))
198 |                 print('GeneratorModel : Step {}, lr = {:.6f}'.format(_step, _g_lr))
199 |                 print('                 Global      Loss : ', _g_loss)
200 |                 print('                 intensity   Loss : ({:.4f} * {:.4f} = {:.4f})'.format(_lp_loss, lam_lp, _lp_loss * lam_lp))
201 |                 print('                 gradient    Loss : ({:.4f} * {:.4f} = {:.4f})'.format( _gdl_loss, lam_gdl, _gdl_loss * lam_gdl))
202 |                 print('                 adversarial Loss : ({:.4f} * {:.4f} = {:.4f})'.format(_adv_loss, lam_adv, _adv_loss * lam_adv))
203 |                 print('                 flownet     Loss : ({:.4f} * {:.4f} = {:.4f})'.format(_flow_loss, lam_flow, _flow_loss * lam_flow))
204 |                 print('                 PSNR  Error      : ', _train_psnr)
205 |             if _step % 100 == 0:
206 |                 summary_writer.add_summary(_summaries, global_step=_step)
207 |                 print('Save summaries...')
208 | 
209 |             if _step % 1000 == 0:
210 |                 save(saver, sess, snapshot_dir, _step)
211 | 
212 |         except tf.errors.OutOfRangeError:
213 |             print('Finish successfully!')
214 |             save(saver, sess, snapshot_dir, _step)
215 |             break
216 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_s/flownet_s.py:
--------------------------------------------------------------------------------
  1 | from ..net import Net, Mode
  2 | from ..utils import LeakyReLU, average_endpoint_error, pad, antipad
  3 | from ..downsample import downsample
  4 | import math
  5 | import tensorflow as tf
  6 | slim = tf.contrib.slim
  7 | 
  8 | 
  9 | class FlowNetS(Net):
 10 | 
 11 |     def __init__(self, mode=Mode.TRAIN, debug=False):
 12 |         super(FlowNetS, self).__init__(mode=mode, debug=debug)
 13 | 
 14 |     def model(self, inputs, training_schedule, trainable=True):
 15 |         _, height, width, _ = inputs['input_a'].shape.as_list()
 16 |         stacked = False
 17 |         with tf.variable_scope('FlowNetS'):
 18 |             if 'warped' in inputs and 'flow' in inputs and 'brightness_error' in inputs:
 19 |                 stacked = True
 20 |                 concat_inputs = tf.concat([inputs['input_a'],
 21 |                                            inputs['input_b'],
 22 |                                            inputs['warped'],
 23 |                                            inputs['flow'],
 24 |                                            inputs['brightness_error']], axis=3)
 25 |             else:
 26 |                 concat_inputs = tf.concat([inputs['input_a'], inputs['input_b']], axis=3)
 27 |             with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
 28 |                                 # Only backprop this network if trainable
 29 |                                 trainable=trainable,
 30 |                                 # He (aka MSRA) weight initialization
 31 |                                 weights_initializer=slim.variance_scaling_initializer(),
 32 |                                 activation_fn=LeakyReLU,
 33 |                                 # We will do our own padding to match the original Caffe code
 34 |                                 padding='VALID'):
 35 | 
 36 |                 weights_regularizer = slim.l2_regularizer(training_schedule['weight_decay'])
 37 |                 with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer):
 38 |                     with slim.arg_scope([slim.conv2d], stride=2):
 39 |                         conv_1 = slim.conv2d(pad(concat_inputs, 3), 64, 7, scope='conv1')
 40 |                         conv_2 = slim.conv2d(pad(conv_1, 2), 128, 5, scope='conv2')
 41 |                         conv_3 = slim.conv2d(pad(conv_2, 2), 256, 5, scope='conv3')
 42 | 
 43 |                     conv3_1 = slim.conv2d(pad(conv_3), 256, 3, scope='conv3_1')
 44 |                     with slim.arg_scope([slim.conv2d], num_outputs=512, kernel_size=3):
 45 |                         conv4 = slim.conv2d(pad(conv3_1), stride=2, scope='conv4')
 46 |                         conv4_1 = slim.conv2d(pad(conv4), scope='conv4_1')
 47 |                         conv5 = slim.conv2d(pad(conv4_1), stride=2, scope='conv5')
 48 |                         conv5_1 = slim.conv2d(pad(conv5), scope='conv5_1')
 49 |                     conv6 = slim.conv2d(pad(conv5_1), 1024, 3, stride=2, scope='conv6')
 50 |                     conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope='conv6_1')
 51 | 
 52 |                     """ START: Refinement Network """
 53 |                     with slim.arg_scope([slim.conv2d_transpose], biases_initializer=None):
 54 |                         predict_flow6 = slim.conv2d(pad(conv6_1), 2, 3,
 55 |                                                     scope='predict_flow6',
 56 |                                                     activation_fn=None)
 57 |                         deconv5 = antipad(slim.conv2d_transpose(conv6_1, 512, 4,
 58 |                                                                 stride=2,
 59 |                                                                 scope='deconv5'))
 60 |                         upsample_flow6to5 = antipad(slim.conv2d_transpose(predict_flow6, 2, 4,
 61 |                                                                           stride=2,
 62 |                                                                           scope='upsample_flow6to5',
 63 |                                                                           activation_fn=None))
 64 |                         concat5 = tf.concat([conv5_1, deconv5, upsample_flow6to5], axis=3)
 65 | 
 66 |                         predict_flow5 = slim.conv2d(pad(concat5), 2, 3,
 67 |                                                     scope='predict_flow5',
 68 |                                                     activation_fn=None)
 69 |                         deconv4 = antipad(slim.conv2d_transpose(concat5, 256, 4,
 70 |                                                                 stride=2,
 71 |                                                                 scope='deconv4'))
 72 |                         upsample_flow5to4 = antipad(slim.conv2d_transpose(predict_flow5, 2, 4,
 73 |                                                                           stride=2,
 74 |                                                                           scope='upsample_flow5to4',
 75 |                                                                           activation_fn=None))
 76 |                         concat4 = tf.concat([conv4_1, deconv4, upsample_flow5to4], axis=3)
 77 | 
 78 |                         predict_flow4 = slim.conv2d(pad(concat4), 2, 3,
 79 |                                                     scope='predict_flow4',
 80 |                                                     activation_fn=None)
 81 |                         deconv3 = antipad(slim.conv2d_transpose(concat4, 128, 4,
 82 |                                                                 stride=2,
 83 |                                                                 scope='deconv3'))
 84 |                         upsample_flow4to3 = antipad(slim.conv2d_transpose(predict_flow4, 2, 4,
 85 |                                                                           stride=2,
 86 |                                                                           scope='upsample_flow4to3',
 87 |                                                                           activation_fn=None))
 88 |                         concat3 = tf.concat([conv3_1, deconv3, upsample_flow4to3], axis=3)
 89 | 
 90 |                         predict_flow3 = slim.conv2d(pad(concat3), 2, 3,
 91 |                                                     scope='predict_flow3',
 92 |                                                     activation_fn=None)
 93 |                         deconv2 = antipad(slim.conv2d_transpose(concat3, 64, 4,
 94 |                                                                 stride=2,
 95 |                                                                 scope='deconv2'))
 96 |                         upsample_flow3to2 = antipad(slim.conv2d_transpose(predict_flow3, 2, 4,
 97 |                                                                           stride=2,
 98 |                                                                           scope='upsample_flow3to2',
 99 |                                                                           activation_fn=None))
100 |                         concat2 = tf.concat([conv_2, deconv2, upsample_flow3to2], axis=3)
101 | 
102 |                         predict_flow2 = slim.conv2d(pad(concat2), 2, 3,
103 |                                                     scope='predict_flow2',
104 |                                                     activation_fn=None)
105 |                     """ END: Refinement Network """
106 | 
107 |                     flow = predict_flow2 * 20.0
108 |                     # TODO: Look at Accum (train) or Resample (deploy) to see if we need to do something different
109 |                     flow = tf.image.resize_bilinear(flow,
110 |                                                     tf.stack([height, width]),
111 |                                                     align_corners=True)
112 | 
113 |                     return {
114 |                         'predict_flow6': predict_flow6,
115 |                         'predict_flow5': predict_flow5,
116 |                         'predict_flow4': predict_flow4,
117 |                         'predict_flow3': predict_flow3,
118 |                         'predict_flow2': predict_flow2,
119 |                         'flow': flow,
120 |                     }
121 | 
122 |     def loss(self, flow, predictions):
123 |         flow = flow * 0.05
124 | 
125 |         losses = []
126 |         INPUT_HEIGHT, INPUT_WIDTH = float(flow.shape[1].value), float(flow.shape[2].value)
127 | 
128 |         # L2 loss between predict_flow6, blob23 (weighted w/ 0.32)
129 |         predict_flow6 = predictions['predict_flow6']
130 |         size = [predict_flow6.shape[1], predict_flow6.shape[2]]
131 |         downsampled_flow6 = downsample(flow, size)
132 |         losses.append(average_endpoint_error(downsampled_flow6, predict_flow6))
133 | 
134 |         # L2 loss between predict_flow5, blob28 (weighted w/ 0.08)
135 |         predict_flow5 = predictions['predict_flow5']
136 |         size = [predict_flow5.shape[1], predict_flow5.shape[2]]
137 |         downsampled_flow5 = downsample(flow, size)
138 |         losses.append(average_endpoint_error(downsampled_flow5, predict_flow5))
139 | 
140 |         # L2 loss between predict_flow4, blob33 (weighted w/ 0.02)
141 |         predict_flow4 = predictions['predict_flow4']
142 |         size = [predict_flow4.shape[1], predict_flow4.shape[2]]
143 |         downsampled_flow4 = downsample(flow, size)
144 |         losses.append(average_endpoint_error(downsampled_flow4, predict_flow4))
145 | 
146 |         # L2 loss between predict_flow3, blob38 (weighted w/ 0.01)
147 |         predict_flow3 = predictions['predict_flow3']
148 |         size = [predict_flow3.shape[1], predict_flow3.shape[2]]
149 |         downsampled_flow3 = downsample(flow, size)
150 |         losses.append(average_endpoint_error(downsampled_flow3, predict_flow3))
151 | 
152 |         # L2 loss between predict_flow2, blob43 (weighted w/ 0.005)
153 |         predict_flow2 = predictions['predict_flow2']
154 |         size = [predict_flow2.shape[1], predict_flow2.shape[2]]
155 |         downsampled_flow2 = downsample(flow, size)
156 |         losses.append(average_endpoint_error(downsampled_flow2, predict_flow2))
157 | 
158 |         loss = tf.losses.compute_weighted_loss(losses, [0.32, 0.08, 0.02, 0.01, 0.005])
159 | 
160 |         # Return the 'total' loss: loss fns + regularization terms defined in the model
161 |         return tf.losses.get_total_loss()
162 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_c/flownet_c.py:
--------------------------------------------------------------------------------
  1 | from ..net import Net, Mode
  2 | from ..utils import LeakyReLU, average_endpoint_error, pad, antipad
  3 | from ..correlation import correlation
  4 | from ..downsample import downsample
  5 | import math
  6 | import tensorflow as tf
  7 | slim = tf.contrib.slim
  8 | 
  9 | 
 10 | class FlowNetC(Net):
 11 | 
 12 |     def __init__(self, mode=Mode.TRAIN, debug=False):
 13 |         super(FlowNetC, self).__init__(mode=mode, debug=debug)
 14 | 
 15 |     def model(self, inputs, training_schedule, trainable=True):
 16 |         _, height, width, _ = inputs['input_a'].shape.as_list()
 17 |         with tf.variable_scope('FlowNetC'):
 18 |             with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
 19 |                                 # Only backprop this network if trainable
 20 |                                 trainable=trainable,
 21 |                                 # He (aka MSRA) weight initialization
 22 |                                 weights_initializer=slim.variance_scaling_initializer(),
 23 |                                 activation_fn=LeakyReLU,
 24 |                                 # We will do our own padding to match the original Caffe code
 25 |                                 padding='VALID'):
 26 | 
 27 |                 weights_regularizer = slim.l2_regularizer(training_schedule['weight_decay'])
 28 |                 with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer):
 29 |                     with slim.arg_scope([slim.conv2d], stride=2):
 30 |                         conv_a_1 = slim.conv2d(pad(inputs['input_a'], 3), 64, 7, scope='conv1')
 31 |                         conv_a_2 = slim.conv2d(pad(conv_a_1, 2), 128, 5, scope='conv2')
 32 |                         conv_a_3 = slim.conv2d(pad(conv_a_2, 2), 256, 5, scope='conv3')
 33 | 
 34 |                         conv_b_1 = slim.conv2d(pad(inputs['input_b'], 3),
 35 |                                                64, 7, scope='conv1', reuse=True)
 36 |                         conv_b_2 = slim.conv2d(pad(conv_b_1, 2), 128, 5, scope='conv2', reuse=True)
 37 |                         conv_b_3 = slim.conv2d(pad(conv_b_2, 2), 256, 5, scope='conv3', reuse=True)
 38 | 
 39 |                         # Compute cross correlation with leaky relu activation
 40 |                         cc = correlation(conv_a_3, conv_b_3, 1, 20, 1, 2, 20)
 41 |                         cc_relu = LeakyReLU(cc)
 42 | 
 43 |                     # Combine cross correlation results with convolution of feature map A
 44 |                     netA_conv = slim.conv2d(conv_a_3, 32, 1, scope='conv_redir')
 45 |                     # Concatenate along the channels axis
 46 |                     net = tf.concat([netA_conv, cc_relu], axis=3)
 47 | 
 48 |                     conv3_1 = slim.conv2d(pad(net), 256, 3, scope='conv3_1')
 49 |                     with slim.arg_scope([slim.conv2d], num_outputs=512, kernel_size=3):
 50 |                         conv4 = slim.conv2d(pad(conv3_1), stride=2, scope='conv4')
 51 |                         conv4_1 = slim.conv2d(pad(conv4), scope='conv4_1')
 52 |                         conv5 = slim.conv2d(pad(conv4_1), stride=2, scope='conv5')
 53 |                         conv5_1 = slim.conv2d(pad(conv5), scope='conv5_1')
 54 |                     conv6 = slim.conv2d(pad(conv5_1), 1024, 3, stride=2, scope='conv6')
 55 |                     conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope='conv6_1')
 56 | 
 57 |                     """ START: Refinement Network """
 58 |                     with slim.arg_scope([slim.conv2d_transpose], biases_initializer=None):
 59 |                         predict_flow6 = slim.conv2d(pad(conv6_1), 2, 3,
 60 |                                                     scope='predict_flow6',
 61 |                                                     activation_fn=None)
 62 | 
 63 |                         deconv5 = antipad(slim.conv2d_transpose(conv6_1, 512, 4,
 64 |                                                                 stride=2,
 65 |                                                                 scope='deconv5'))
 66 |                         upsample_flow6to5 = antipad(slim.conv2d_transpose(predict_flow6, 2, 4,
 67 |                                                                           stride=2,
 68 |                                                                           scope='upsample_flow6to5',
 69 |                                                                           activation_fn=None))
 70 |                         concat5 = tf.concat([conv5_1, deconv5, upsample_flow6to5], axis=3)
 71 | 
 72 |                         predict_flow5 = slim.conv2d(pad(concat5), 2, 3,
 73 |                                                     scope='predict_flow5',
 74 |                                                     activation_fn=None)
 75 |                         deconv4 = antipad(slim.conv2d_transpose(concat5, 256, 4,
 76 |                                                                 stride=2,
 77 |                                                                 scope='deconv4'))
 78 |                         upsample_flow5to4 = antipad(slim.conv2d_transpose(predict_flow5, 2, 4,
 79 |                                                                           stride=2,
 80 |                                                                           scope='upsample_flow5to4',
 81 |                                                                           activation_fn=None))
 82 |                         concat4 = tf.concat([conv4_1, deconv4, upsample_flow5to4], axis=3)
 83 | 
 84 |                         predict_flow4 = slim.conv2d(pad(concat4), 2, 3,
 85 |                                                     scope='predict_flow4',
 86 |                                                     activation_fn=None)
 87 |                         deconv3 = antipad(slim.conv2d_transpose(concat4, 128, 4,
 88 |                                                                 stride=2,
 89 |                                                                 scope='deconv3'))
 90 |                         upsample_flow4to3 = antipad(slim.conv2d_transpose(predict_flow4, 2, 4,
 91 |                                                                           stride=2,
 92 |                                                                           scope='upsample_flow4to3',
 93 |                                                                           activation_fn=None))
 94 |                         concat3 = tf.concat([conv3_1, deconv3, upsample_flow4to3], axis=3)
 95 | 
 96 |                         predict_flow3 = slim.conv2d(pad(concat3), 2, 3,
 97 |                                                     scope='predict_flow3',
 98 |                                                     activation_fn=None)
 99 |                         deconv2 = antipad(slim.conv2d_transpose(concat3, 64, 4,
100 |                                                                 stride=2,
101 |                                                                 scope='deconv2'))
102 |                         upsample_flow3to2 = antipad(slim.conv2d_transpose(predict_flow3, 2, 4,
103 |                                                                           stride=2,
104 |                                                                           scope='upsample_flow3to2',
105 |                                                                           activation_fn=None))
106 |                         concat2 = tf.concat([conv_a_2, deconv2, upsample_flow3to2], axis=3)
107 | 
108 |                         predict_flow2 = slim.conv2d(pad(concat2), 2, 3,
109 |                                                     scope='predict_flow2',
110 |                                                     activation_fn=None)
111 |                     """ END: Refinement Network """
112 | 
113 |                     flow = predict_flow2 * 20.0
114 |                     # TODO: Look at Accum (train) or Resample (deploy) to see if we need to do something different
115 |                     flow = tf.image.resize_bilinear(flow,
116 |                                                     tf.stack([height, width]),
117 |                                                     align_corners=True)
118 | 
119 |                     return {
120 |                         'predict_flow6': predict_flow6,
121 |                         'predict_flow5': predict_flow5,
122 |                         'predict_flow4': predict_flow4,
123 |                         'predict_flow3': predict_flow3,
124 |                         'predict_flow2': predict_flow2,
125 |                         'flow': flow,
126 |                     }
127 | 
128 |     def loss(self, flow, predictions):
129 |         flow = flow * 0.05
130 | 
131 |         losses = []
132 |         INPUT_HEIGHT, INPUT_WIDTH = float(flow.shape[1].value), float(flow.shape[2].value)
133 | 
134 |         # L2 loss between predict_flow6, blob23 (weighted w/ 0.32)
135 |         predict_flow6 = predictions['predict_flow6']
136 |         size = [predict_flow6.shape[1], predict_flow6.shape[2]]
137 |         downsampled_flow6 = downsample(flow, size)
138 |         losses.append(average_endpoint_error(downsampled_flow6, predict_flow6))
139 | 
140 |         # L2 loss between predict_flow5, blob28 (weighted w/ 0.08)
141 |         predict_flow5 = predictions['predict_flow5']
142 |         size = [predict_flow5.shape[1], predict_flow5.shape[2]]
143 |         downsampled_flow5 = downsample(flow, size)
144 |         losses.append(average_endpoint_error(downsampled_flow5, predict_flow5))
145 | 
146 |         # L2 loss between predict_flow4, blob33 (weighted w/ 0.02)
147 |         predict_flow4 = predictions['predict_flow4']
148 |         size = [predict_flow4.shape[1], predict_flow4.shape[2]]
149 |         downsampled_flow4 = downsample(flow, size)
150 |         losses.append(average_endpoint_error(downsampled_flow4, predict_flow4))
151 | 
152 |         # L2 loss between predict_flow3, blob38 (weighted w/ 0.01)
153 |         predict_flow3 = predictions['predict_flow3']
154 |         size = [predict_flow3.shape[1], predict_flow3.shape[2]]
155 |         downsampled_flow3 = downsample(flow, size)
156 |         losses.append(average_endpoint_error(downsampled_flow3, predict_flow3))
157 | 
158 |         # L2 loss between predict_flow2, blob43 (weighted w/ 0.005)
159 |         predict_flow2 = predictions['predict_flow2']
160 |         size = [predict_flow2.shape[1], predict_flow2.shape[2]]
161 |         downsampled_flow2 = downsample(flow, size)
162 |         losses.append(average_endpoint_error(downsampled_flow2, predict_flow2))
163 | 
164 |         loss = tf.losses.compute_weighted_loss(losses, [0.32, 0.08, 0.02, 0.01, 0.005])
165 | 
166 |         # Return the 'total' loss: loss fns + regularization terms defined in the model
167 |         return tf.losses.get_total_loss()
168 | 


--------------------------------------------------------------------------------
/Codes/flownet2/src/flownet_sd/flownet_sd.py:
--------------------------------------------------------------------------------
  1 | from ..net import Net, Mode
  2 | from ..utils import LeakyReLU, average_endpoint_error, pad, antipad
  3 | # from ..downsample import downsample
  4 | import math
  5 | import tensorflow as tf
  6 | slim = tf.contrib.slim
  7 | 
  8 | 
  9 | class FlowNetSD(Net):
 10 | 
 11 |     def __init__(self, mode=Mode.TRAIN, debug=False):
 12 |         super(FlowNetSD, self).__init__(mode=mode, debug=debug)
 13 | 
 14 |     def model(self, inputs, training_schedule, trainable=True, reuse=None):
 15 |         _, height, width, _ = inputs['input_a'].shape.as_list()
 16 |         with tf.variable_scope('FlowNetSD', reuse=reuse):
 17 |             concat_inputs = tf.concat([inputs['input_a'], inputs['input_b']], axis=3)
 18 |             with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
 19 |                                 # Only backprop this network if trainable
 20 |                                 trainable=trainable,
 21 |                                 # He (aka MSRA) weight initialization
 22 |                                 weights_initializer=slim.variance_scaling_initializer(),
 23 |                                 activation_fn=LeakyReLU,
 24 |                                 # We will do our own padding to match the original Caffe code
 25 |                                 padding='VALID'):
 26 | 
 27 |                 weights_regularizer = slim.l2_regularizer(training_schedule['weight_decay'])
 28 |                 with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer):
 29 |                     conv0 = slim.conv2d(pad(concat_inputs), 64, 3, scope='conv0')
 30 |                     conv1 = slim.conv2d(pad(conv0), 64, 3, stride=2, scope='conv1')
 31 |                     conv1_1 = slim.conv2d(pad(conv1), 128, 3, scope='conv1_1')
 32 |                     conv2 = slim.conv2d(pad(conv1_1), 128, 3, stride=2, scope='conv2')
 33 |                     conv2_1 = slim.conv2d(pad(conv2), 128, 3, scope='conv2_1')
 34 |                     conv3 = slim.conv2d(pad(conv2_1), 256, 3, stride=2, scope='conv3')
 35 |                     conv3_1 = slim.conv2d(pad(conv3), 256, 3, scope='conv3_1')
 36 |                     conv4 = slim.conv2d(pad(conv3_1), 512, 3, stride=2, scope='conv4')
 37 |                     conv4_1 = slim.conv2d(pad(conv4), 512, 3, scope='conv4_1')
 38 |                     conv5 = slim.conv2d(pad(conv4_1), 512, 3, stride=2, scope='conv5')
 39 |                     conv5_1 = slim.conv2d(pad(conv5), 512, 3, scope='conv5_1')
 40 |                     conv6 = slim.conv2d(pad(conv5_1), 1024, 3, stride=2, scope='conv6')
 41 |                     conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope='conv6_1')
 42 | 
 43 |                     """ START: Refinement Network """
 44 |                     with slim.arg_scope([slim.conv2d_transpose], biases_initializer=None):
 45 |                         predict_flow6 = slim.conv2d(pad(conv6_1), 2, 3,
 46 |                                                     scope='predict_flow6',
 47 |                                                     activation_fn=None)
 48 |                         deconv5 = antipad(slim.conv2d_transpose(conv6_1, 512, 4,
 49 |                                                                 stride=2,
 50 |                                                                 scope='deconv5'))
 51 |                         upsample_flow6to5 = antipad(slim.conv2d_transpose(predict_flow6, 2, 4,
 52 |                                                                           stride=2,
 53 |                                                                           scope='upsample_flow6to5',
 54 |                                                                           activation_fn=None))
 55 |                         concat5 = tf.concat([conv5_1, deconv5, upsample_flow6to5], axis=3)
 56 |                         interconv5 = slim.conv2d(pad(concat5), 512, 3,
 57 |                                                  activation_fn=None, scope='interconv5')
 58 | 
 59 |                         predict_flow5 = slim.conv2d(pad(interconv5), 2, 3,
 60 |                                                     scope='predict_flow5',
 61 |                                                     activation_fn=None)
 62 |                         deconv4 = antipad(slim.conv2d_transpose(concat5, 256, 4,
 63 |                                                                 stride=2,
 64 |                                                                 scope='deconv4'))
 65 |                         upsample_flow5to4 = antipad(slim.conv2d_transpose(predict_flow5, 2, 4,
 66 |                                                                           stride=2,
 67 |                                                                           scope='upsample_flow5to4',
 68 |                                                                           activation_fn=None))
 69 |                         concat4 = tf.concat([conv4_1, deconv4, upsample_flow5to4], axis=3)
 70 |                         interconv4 = slim.conv2d(pad(concat4), 256, 3,
 71 |                                                  activation_fn=None, scope='interconv4')
 72 | 
 73 |                         predict_flow4 = slim.conv2d(pad(interconv4), 2, 3,
 74 |                                                     scope='predict_flow4',
 75 |                                                     activation_fn=None)
 76 |                         deconv3 = antipad(slim.conv2d_transpose(concat4, 128, 4,
 77 |                                                                 stride=2,
 78 |                                                                 scope='deconv3'))
 79 |                         upsample_flow4to3 = antipad(slim.conv2d_transpose(predict_flow4, 2, 4,
 80 |                                                                           stride=2,
 81 |                                                                           scope='upsample_flow4to3',
 82 |                                                                           activation_fn=None))
 83 |                         concat3 = tf.concat([conv3_1, deconv3, upsample_flow4to3], axis=3)
 84 |                         interconv3 = slim.conv2d(pad(concat3), 128, 3,
 85 |                                                  activation_fn=None, scope='interconv3')
 86 | 
 87 |                         predict_flow3 = slim.conv2d(pad(interconv3), 2, 3,
 88 |                                                     scope='predict_flow3',
 89 |                                                     activation_fn=None)
 90 |                         deconv2 = antipad(slim.conv2d_transpose(concat3, 64, 4,
 91 |                                                                 stride=2,
 92 |                                                                 scope='deconv2'))
 93 |                         upsample_flow3to2 = antipad(slim.conv2d_transpose(predict_flow3, 2, 4,
 94 |                                                                           stride=2,
 95 |                                                                           scope='upsample_flow3to2',
 96 |                                                                           activation_fn=None))
 97 |                         concat2 = tf.concat([conv2, deconv2, upsample_flow3to2], axis=3)
 98 |                         interconv2 = slim.conv2d(pad(concat2), 64, 3,
 99 |                                                  activation_fn=None, scope='interconv2')
100 | 
101 |                         predict_flow2 = slim.conv2d(pad(interconv2), 2, 3,
102 |                                                     scope='predict_flow2',
103 |                                                     activation_fn=None)
104 |                     """ END: Refinement Network """
105 | 
106 |                     flow = predict_flow2 * 0.05
107 |                     # TODO: Look at Accum (train) or Resample (deploy) to see if we need to do something different
108 |                     flow = tf.image.resize_bilinear(flow,
109 |                                                     tf.stack([height, width]),
110 |                                                     align_corners=True)
111 | 
112 |                     return {
113 |                         'predict_flow6': predict_flow6,
114 |                         'predict_flow5': predict_flow5,
115 |                         'predict_flow4': predict_flow4,
116 |                         'predict_flow3': predict_flow3,
117 |                         'predict_flow2': predict_flow2,
118 |                         'flow': flow,
119 |                     }
120 | 
121 |     # def loss(self, flow, predictions):
122 |     #     flow = flow * 20.0
123 |     #
124 |     #     losses = []
125 |     #     INPUT_HEIGHT, INPUT_WIDTH = float(flow.shape[1].value), float(flow.shape[2].value)
126 |     #
127 |     #     # L2 loss between predict_flow6, blob23 (weighted w/ 0.32)
128 |     #     predict_flow6 = predictions['predict_flow6']
129 |     #     size = [predict_flow6.shape[1], predict_flow6.shape[2]]
130 |     #     downsampled_flow6 = downsample(flow, size)
131 |     #     losses.append(average_endpoint_error(downsampled_flow6, predict_flow6))
132 |     #
133 |     #     # L2 loss between predict_flow5, blob28 (weighted w/ 0.08)
134 |     #     predict_flow5 = predictions['predict_flow5']
135 |     #     size = [predict_flow5.shape[1], predict_flow5.shape[2]]
136 |     #     downsampled_flow5 = downsample(flow, size)
137 |     #     losses.append(average_endpoint_error(downsampled_flow5, predict_flow5))
138 |     #
139 |     #     # L2 loss between predict_flow4, blob33 (weighted w/ 0.02)
140 |     #     predict_flow4 = predictions['predict_flow4']
141 |     #     size = [predict_flow4.shape[1], predict_flow4.shape[2]]
142 |     #     downsampled_flow4 = downsample(flow, size)
143 |     #     losses.append(average_endpoint_error(downsampled_flow4, predict_flow4))
144 |     #
145 |     #     # L2 loss between predict_flow3, blob38 (weighted w/ 0.01)
146 |     #     predict_flow3 = predictions['predict_flow3']
147 |     #     size = [predict_flow3.shape[1], predict_flow3.shape[2]]
148 |     #     downsampled_flow3 = downsample(flow, size)
149 |     #     losses.append(average_endpoint_error(downsampled_flow3, predict_flow3))
150 |     #
151 |     #     # L2 loss between predict_flow2, blob43 (weighted w/ 0.005)
152 |     #     predict_flow2 = predictions['predict_flow2']
153 |     #     size = [predict_flow2.shape[1], predict_flow2.shape[2]]
154 |     #     downsampled_flow2 = downsample(flow, size)
155 |     #     losses.append(average_endpoint_error(downsampled_flow2, predict_flow2))
156 |     #
157 |     #     loss = tf.losses.compute_weighted_loss(losses, [0.32, 0.08, 0.02, 0.01, 0.005])
158 |     #
159 |     #     # Return the 'total' loss: loss fns + regularization terms defined in the model
160 |     #     return tf.losses.get_total_loss()
161 | 


--------------------------------------------------------------------------------