├── Makefile
├── README.md
├── data
├── rub1.raw
└── rub2.raw
├── examples
├── insect.png
└── optical_flow_example.png
├── settings.xml
└── src
├── cuda_operations
├── 2d
│ ├── cuda_operation_add_2d.cpp
│ ├── cuda_operation_add_2d.h
│ ├── cuda_operation_convolution_2d.cpp
│ ├── cuda_operation_convolution_2d.h
│ ├── cuda_operation_median_2d.cpp
│ ├── cuda_operation_median_2d.h
│ ├── cuda_operation_registration_2d.cpp
│ ├── cuda_operation_registration_2d.h
│ ├── cuda_operation_resample_2d.cpp
│ ├── cuda_operation_resample_2d.h
│ ├── cuda_operation_solve_2d.cpp
│ └── cuda_operation_solve_2d.h
├── cuda_operation_base.cpp
└── cuda_operation_base.h
├── data_types
├── data2d.cpp
├── data2d.h
├── data3d.cpp
├── data3d.h
├── data_structs.h
├── operation_parameters.cpp
└── operation_parameters.h
├── kernels
├── add_2d.cu
├── convolution_2d.cu
├── median_2d.cu
├── registration_2d.cu
├── resample_2d.cu
└── solve_2d.cu
├── main.cpp
├── optical_flow
├── optical_flow_2d.cpp
├── optical_flow_2d.h
├── optical_flow_base_2d.cpp
└── optical_flow_base_2d.h
└── utils
├── common_utils.cpp
├── common_utils.h
├── cuda_utils.cpp
├── cuda_utils.h
├── io_utils.cpp
├── io_utils.h
├── settings.cpp
├── settings.h
├── tinystr.cpp
├── tinystr.h
├── tinyxml.cpp
├── tinyxml.h
├── tinyxmlerror.cpp
└── tinyxmlparser.cpp
/Makefile:
--------------------------------------------------------------------------------
1 | BASEDIR :=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
2 | INC-DIR := "-I$(BASEDIR)"
3 |
4 | CC := g++
5 | CFLAGS := -c -std=c++11 -O3 -funroll-all-loops -Wno-deprecated
6 | CUDA-FLAGS = -ptx -std=c++11
7 |
8 |
9 | MACHINE := rtx
10 |
11 |
12 | ifeq ($(MACHINE),rtx)
13 | # anka-rtx
14 | TARGET := cuda-flow2d-rtx
15 | CUDA-TOP = /home/ws/fe0968/local/cuda-10.0
16 | CUDA = $(CUDA-TOP)/bin/nvcc
17 | CUDA-INC-DIR = -I$(CUDA-TOP)/include
18 | CUDA-LIB-DIR = -L$(CUDA-TOP)/lib64 -lcudart -lcuda
19 | else
20 | # ankaimage-concert
21 | TARGET := cuda-flow2d-concert
22 | CUDA = /usr/bin/nvcc
23 | CUDA-INC-DIR = -I/usr/include
24 | CUDA-LIB-DIR = -L/usr/lib/x86_64-linux-gnu -lcudart -lcuda
25 | endif
26 |
27 |
28 |
29 |
30 |
31 |
32 | # $(wildcard *.cpp /xxx/xxx/*.cpp): get all .cpp files from the current directory and dir "/xxx/xxx/"
33 | SRCS := $(wildcard $(BASEDIR)/src/*.cpp \
34 | $(BASEDIR)/src/*/*/*.cpp \
35 | $(BASEDIR)/src/*/*.cpp)
36 | # $(patsubst %.cpp,%.o,$(SRCS)): substitute all ".cpp" file name strings to ".o" file name strings
37 | OBJS := $(patsubst %.cpp,%.o,$(SRCS))
38 |
39 | CUDAOBJECTS = $(CUDASOURCES:.cu=.ptx)
40 |
41 | #OBJS := $(SRCS:.cpp=.o)
42 |
43 | all: $(TARGET)
44 |
45 | $(TARGET): $(OBJS) cuda
46 | $(CC) $(OBJS) $(CUDA-LIB-DIR) -o $@
47 |
48 | %.o: %.cpp
49 | $(CC) $(CFLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $< -o $@
50 |
51 |
52 |
53 |
54 | cuda:
55 | $(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/add_2d.cu -o $(BASEDIR)/kernels/add_2d.ptx
56 | $(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/median_2d.cu -o $(BASEDIR)/kernels/median_2d.ptx
57 | $(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/convolution_2d.cu -o $(BASEDIR)/kernels/convolution_2d.ptx
58 | $(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/registration_2d.cu -o $(BASEDIR)/kernels/registration_2d.ptx
59 | $(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/resample_2d.cu -o $(BASEDIR)/kernels/resample_2d.ptx
60 | $(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/solve_2d.cu -o $(BASEDIR)/kernels/solve_2d.ptx
61 | $(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/correlation_2d.cu -o $(BASEDIR)/kernels/correlation_2d.ptx
62 |
63 |
64 | clean:
65 | rm -rf $(TARGET) $(OBJS) $(CUDAOBJECTS)
66 |
67 | .PHONY: all clean
68 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # GPU-based 2D Optical flow using NVIDIA CUDA
2 |
3 | Optical flow using *variational methods* which determine the unknown displacement field as a minimal solution
4 | of the energy functional.
5 |
6 | In general, such energy-based formulations are composed of two
7 | parts: a *data term* which assumes constancy of specific image features, and a *smoothness term* which regularizes the spatial variation of the flow field.
8 |
9 | ## Features
10 |
11 | * **Computational model guarantees**:
12 | * a unique solution (global optimal solution)
13 | * stability of the algorithm (no critical dependence on parameters)
14 | * **High quality**:
15 | * dense flow results (one displacement for each pixel)
16 | * allows large displacements
17 | * different types of motion (translation, rotation, local elastic transformation)
18 | * sub-pixel accuracy
19 | * **Robustness**:
20 | * under noise
21 | * under varying illumination (brightness changes)
22 | * with respect to artifacts
23 |
24 | ## Examples
25 |
26 | 
27 |
28 | **Figure**: **Left**: Head of the feeding cockroach *Periplaneta americana* imaged by fast X-ray radiography and computed flow field, which captures the movements of the insect during chewing process. **Right**: Flow dynamics of liquid droplets in a fuel spray. Color coding: color represents direction and its brightness represents flow magnitude.
29 |
30 | ## Model
31 |
32 | * Brightness constancy data term [1]
33 | * Gradient constancy data term [2]
34 | * Data term based on higher-order derivatives [3]
35 | * Robust modeling of data term [4]
36 | * Flow-driven smoothness [3]
37 | * Coarse-to-fine flow estimation [2]
38 | * Intermediate flow median filtering [5]
39 |
40 |
41 | ## References
42 |
43 | * [1] B. Horn and B. Schunck. *Determining optical flow*. Artificial Intelligence, 17:185{203, 1981.
44 | * [2] T. Brox, A. Bruhn, N. Papenberg, and J. Weickert. *High accuracy optic flow estimation based on a theory for warping*. In T. Pajdla and J. Matas, editors, Computer Vision, ECCV 2004, volume 3024 of Lecture Notes in Computer Science, pages 25-36. Springer, Berlin, 2004.
45 | * [3] N. Papenberg, A. Bruhn, T. Brox, S. Didas, and J. Weickert. *Highly accurate optic ow computation with theoretically justified warping*. Int. J. Comput. Vision, 67(2):141-158, April 2006.
46 | * [4] M. J. Black and P. Anandan. *The robust estimation of multiple motions: Parametric and piecewise-smooth flow fields*. Computer Vision and Image Understanding, 63(1):75- 104, 1996.
47 | * [5] D. Sun, S. Roth, and M. J. Black. *A quantitative analysis of current practices in optical flow estimation and the principles behind them*. International Journal of Computer Vision, 106(2):115-137, 2014.
48 |
--------------------------------------------------------------------------------
/data/rub1.raw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/axruff/cuda-flow2d/827c3470351972aca7584a83aa9b85778f522be4/data/rub1.raw
--------------------------------------------------------------------------------
/data/rub2.raw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/axruff/cuda-flow2d/827c3470351972aca7584a83aa9b85778f522be4/data/rub2.raw
--------------------------------------------------------------------------------
/examples/insect.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/axruff/cuda-flow2d/827c3470351972aca7584a83aa9b85778f522be4/examples/insect.png
--------------------------------------------------------------------------------
/examples/optical_flow_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/axruff/cuda-flow2d/827c3470351972aca7584a83aa9b85778f522be4/examples/optical_flow_example.png
--------------------------------------------------------------------------------
/settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
27 |
28 |
--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_add_2d.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include "cuda_operation_add_2d.h"
23 |
24 | #include
25 |
26 | #include "src/data_types/data_structs.h"
27 |
28 | #include
29 |
30 | #include "src/utils/common_utils.h"
31 | #include "src/utils/cuda_utils.h"
32 |
33 | CudaOperationAdd2D::CudaOperationAdd2D()
34 | : CudaOperationBase("CUDA Add 2D")
35 | {
36 | }
37 |
38 | bool CudaOperationAdd2D::Initialize(const OperationParameters* params)
39 | {
40 | initialized_ = false;
41 |
42 | if (!params) {
43 | std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName());
44 | return initialized_;
45 | }
46 |
47 | DataSize3 container_size;
48 | GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_);
49 |
50 | char exec_path[256];
51 | Utils::GetExecutablePath(exec_path, 256);
52 | std::strcat(exec_path, "/kernels/add_2d.ptx");
53 |
54 | if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) {
55 | if (!CheckCudaError(cuModuleGetFunction(&cuf_add_, cu_module_, "add_2d"))) {
56 | size_t const_size;
57 |
58 | /* Get the pointer to the constant memory and copy data */
59 | if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) {
60 | if (const_size == sizeof(container_size)) {
61 | if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) {
62 | initialized_ = true;
63 | }
64 | }
65 | }
66 |
67 | } else {
68 | CheckCudaError(cuModuleUnload(cu_module_));
69 | cu_module_ = nullptr;
70 | }
71 | }
72 | return initialized_;
73 | }
74 |
75 | void CudaOperationAdd2D::Execute(OperationParameters& params)
76 | {
77 | if (!IsInitialized()) {
78 | return;
79 | }
80 |
81 | CUdeviceptr operand_0 = 0;
82 | CUdeviceptr operand_1 = 0;
83 | DataSize3 data_size;
84 |
85 | GET_PARAM_OR_RETURN(params, CUdeviceptr, operand_0, "operand_0");
86 | GET_PARAM_OR_RETURN(params, CUdeviceptr, operand_1, "operand_1");
87 | GET_PARAM_OR_RETURN(params, DataSize3, data_size, "data_size");
88 |
89 | dim3 block_dim = { 16, 8};
90 | dim3 grid_dim = { static_cast((data_size.width + block_dim.x - 1) / block_dim.x),
91 | static_cast((data_size.height + block_dim.y - 1) / block_dim.y) };
92 |
93 | void* args[4] = {
94 | &operand_0,
95 | &operand_1,
96 | &data_size.width,
97 | &data_size.height};
98 |
99 | CheckCudaError(cuLaunchKernel(cuf_add_,
100 | grid_dim.x, grid_dim.y, grid_dim.z,
101 | block_dim.x, block_dim.y, block_dim.z,
102 | 0,
103 | NULL,
104 | args,
105 | NULL));
106 | }
--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_add_2d.h:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_ADD_H_
23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_ADD_H_
24 |
25 | #include
26 |
27 | #include "src/cuda_operations/cuda_operation_base.h"
28 | #include "src/data_types/operation_parameters.h"
29 |
30 | class CudaOperationAdd2D : public CudaOperationBase {
31 | private:
32 | CUfunction cuf_add_;
33 |
34 | public:
35 | CudaOperationAdd2D();
36 |
37 | bool Initialize(const OperationParameters* params = nullptr) override;
38 | void Execute(OperationParameters& params) override;
39 | };
40 |
41 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_ADD_H_
42 |
--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_convolution_2d.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include "cuda_operation_convolution_2d.h"
23 |
24 | #include
25 | #include
26 | #include
27 |
28 | #include
29 | #include
30 | #include
31 |
32 | #include "src/data_types/data_structs.h"
33 | #include "src/utils/common_utils.h"
34 | #include "src/utils/cuda_utils.h"
35 |
36 | using namespace std;
37 |
38 | CudaOperationConvolution2D::CudaOperationConvolution2D()
39 | : CudaOperationBase("CUDA Convolution 2D")
40 | {
41 | }
42 |
43 | bool CudaOperationConvolution2D::Initialize(const OperationParameters* params)
44 | {
45 | initialized_ = false;
46 |
47 | if (!params) {
48 | std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName());
49 | return initialized_;
50 | }
51 |
52 | DataSize3 container_size;
53 | GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_);
54 |
55 | dev_container_size_ = container_size;
56 |
57 | char exec_path[256];
58 | Utils::GetExecutablePath(exec_path, 256);
59 | std::strcat(exec_path, "/kernels/convolution_2d.ptx");
60 |
61 | if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) {
62 | if (!CheckCudaError(cuModuleGetFunction(&cuf_convolution_rows_, cu_module_, "convolutionRowsKernel")) &&
63 | !CheckCudaError(cuModuleGetFunction(&cuf_convolution_cols_, cu_module_, "convolutionColumnsKernel"))) {
64 | size_t const_size;
65 |
66 | /* Get the pointer to the constant memory and copy data */
67 | if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) {
68 | if (const_size == sizeof(container_size)) {
69 | if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) {
70 | initialized_ = true;
71 | }
72 | }
73 | }
74 |
75 | } else {
76 | CheckCudaError(cuModuleUnload(cu_module_));
77 | cu_module_ = nullptr;
78 | }
79 | }
80 | return initialized_;
81 | }
82 |
83 | void CudaOperationConvolution2D::ComputeGaussianKernel(float sigma, size_t precision, float pixel_size)
84 | {
85 | kernel_radius_ = (size_t)(precision * sigma / pixel_size);
86 |
87 | kernel_ = new float[2*kernel_radius_ + 1];
88 | kernel_length_ = 2*kernel_radius_ + 1;
89 |
90 | int r = static_cast(kernel_radius_);
91 |
92 | for (int i = -r; i <= r; i++) {
93 | float val = 1.0 / (sigma * std::sqrt(2.0 * 3.1415926)) * std::exp(-(i * i * pixel_size * pixel_size) / (2.0 * sigma * sigma));
94 | kernel_[i+r] = val;
95 | }
96 |
97 | // Normalize convolution mask vector
98 | float sum = 0.0;
99 |
100 | for (unsigned int i = 0; i < kernel_length_; i++) {
101 | sum = sum + kernel_[i];
102 | }
103 |
104 | for (unsigned int i = 0; i < kernel_length_; i++) {
105 | kernel_[i] = kernel_[i] / sum;
106 | }
107 |
108 | /* for (unsigned int i = 0; i < kernel_length_; i++) {
109 | kernel_[i] = 1.0;
110 | }*/
111 |
112 | }
113 |
114 | void CudaOperationConvolution2D::PrintConvolutionKernel()
115 | {
116 | if (kernel_ == nullptr)
117 | std::printf("Error: Convolution kernel is not initialized.\n");
118 | else {
119 | std::printf("Convolution kernel (radius = %d)\n", kernel_radius_);
120 |
121 | for (unsigned int i = 0; i < kernel_length_; i++) {
122 | std::printf("%.4f ", kernel_[i]);
123 | }
124 | std::printf("\n\n");
125 |
126 | }
127 |
128 | }
129 |
130 |
131 |
132 | void CudaOperationConvolution2D::Execute(OperationParameters& params)
133 | {
134 | if (!IsInitialized()) {
135 | return;
136 | }
137 |
138 | CUdeviceptr dev_input = 0;
139 | CUdeviceptr dev_output = 0;
140 | CUdeviceptr dev_temp = 0;
141 | DataSize3 data_size;
142 | float gaussian_sigma;
143 |
144 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_input, "dev_input");
145 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_output, "dev_output");
146 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_temp, "dev_temp");
147 | GET_PARAM_OR_RETURN(params, DataSize3, data_size, "data_size");
148 | GET_PARAM_OR_RETURN(params, float, gaussian_sigma, "gaussian_sigma");
149 |
150 |
151 | if (dev_input == dev_output) {
152 | std::printf("Operation '%s': Error. Input buffer cannot serve as output buffer.", GetName());
153 | return;
154 | }
155 |
156 | // TODO: Make computation of kernel once for all images
157 | ComputeGaussianKernel(gaussian_sigma, 3, 1.0);
158 | //PrintConvolutionKernel();
159 |
160 | size_t const_size;
161 |
162 | /* Get the pointer to the constant memory and copy data */
163 | CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "c_Kernel"));
164 | CheckCudaError(cuMemcpyHtoD(dev_constants_, kernel_, kernel_length_*sizeof(float)));
165 |
166 | int pitch = static_cast(dev_container_size_.pitch / sizeof(float));
167 |
168 | /* 1. Process in horizontal direction (rows) */
169 | ResampleRows(dev_input, dev_temp, data_size, pitch);
170 |
171 |
172 | /* 2. Process in vertical direction (columns) */
173 | ResampleColumns(dev_temp, dev_output, data_size, pitch);
174 |
175 |
176 | }
177 |
178 | void CudaOperationConvolution2D::ResampleRows(CUdeviceptr input, CUdeviceptr output, DataSize3& data_size, size_t pitch) const
179 | {
180 | int kernel_radius = static_cast(kernel_radius_);
181 |
182 | unsigned int rows_results_steps = 4;
183 | unsigned int rows_halo_steps = 1;
184 |
185 | dim3 row_block_dim ={ 16, 4 };
186 | dim3 row_grid_dim ={ static_cast((data_size.width + (row_block_dim.x * rows_results_steps) - 1) / (row_block_dim.x * rows_results_steps)),
187 | static_cast((data_size.height + row_block_dim.y - 1) / row_block_dim.y) };
188 |
189 | /* Calculate the needed shared memory size */
190 | int shared_memory_size;
191 | CUdevice cu_device;
192 | CheckCudaError(cuDeviceGet(&cu_device, 0));
193 | CheckCudaError(cuDeviceGetAttribute(&shared_memory_size, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, cu_device));
194 |
195 | int needed_shared_memory_size =
196 | ((rows_results_steps + 2*rows_halo_steps)*row_block_dim.x) * (row_block_dim.y) * sizeof(float);
197 |
198 | //std::printf("Shared memory: %d Needed: %d \n", shared_memory_size, needed_shared_memory_size);
199 |
200 | if (needed_shared_memory_size > shared_memory_size) {
201 | std::printf("<%s>: Error shared memory allocation. Reduce the thread block size.\n", GetName());
202 | std::printf("Shared memory: %d Needed: %d \n", shared_memory_size, needed_shared_memory_size);
203 | return;
204 | }
205 |
206 | /*std::printf("Starting Convolution kernel: Grid: %d x %d, Blocks %d x %d \n", row_grid_dim.x,
207 | row_grid_dim.y,
208 | row_block_dim.x,
209 | row_block_dim.y);*/
210 |
211 |
212 | void* args[6] ={
213 | &output,
214 | &input,
215 | &data_size.width,
216 | &data_size.height,
217 | &pitch,
218 | &kernel_radius };
219 |
220 | CheckCudaError(cuLaunchKernel(cuf_convolution_rows_,
221 | row_grid_dim.x, row_grid_dim.y, row_grid_dim.z,
222 | row_block_dim.x, row_block_dim.y, row_block_dim.z,
223 | needed_shared_memory_size,
224 | NULL,
225 | args,
226 | NULL));
227 | }
228 |
229 | void CudaOperationConvolution2D::ResampleColumns(CUdeviceptr input, CUdeviceptr output, DataSize3& data_size, size_t pitch) const
230 | {
231 | int kernel_radius = static_cast(kernel_radius_);
232 |
233 | unsigned int cols_results_steps = 4;
234 | unsigned int cols_halo_steps = 1;
235 |
236 | dim3 col_block_dim ={ 4, 16 };
237 | dim3 col_grid_dim ={ static_cast((data_size.width + col_block_dim.x - 1) / col_block_dim.x),
238 | static_cast((data_size.height + (col_block_dim.y * cols_results_steps) - 1) / (col_block_dim.y * cols_results_steps)) };
239 |
240 | int shared_memory_size;
241 | CUdevice cu_device;
242 | CheckCudaError(cuDeviceGet(&cu_device, 0));
243 | CheckCudaError(cuDeviceGetAttribute(&shared_memory_size, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, cu_device));
244 |
245 | int needed_shared_memory_size =
246 | (col_block_dim.x * ((cols_results_steps + 2*cols_halo_steps)*col_block_dim.y + 0)) * sizeof(float);
247 |
248 | //std::printf("Shared memory: %d Needed: %d \n", shared_memory_size, needed_shared_memory_size);
249 |
250 | if (needed_shared_memory_size > shared_memory_size) {
251 | std::printf("<%s>: Error shared memory allocation. Reduce the thread block size.\n", GetName());
252 | std::printf("Shared memory: %d Needed: %d \n", shared_memory_size, needed_shared_memory_size);
253 | return;
254 | }
255 |
256 | /* std::printf("Starting Convolution kernel: Grid: %d x %d, Blocks %d x %d \n", col_grid_dim.x,
257 | col_grid_dim.y,
258 | col_block_dim.x,
259 | col_block_dim.y);
260 | */
261 |
262 |
263 | void* args[6] ={
264 | &output,
265 | &input,
266 | &data_size.width,
267 | &data_size.height,
268 | &pitch,
269 | &kernel_radius };
270 |
271 | CheckCudaError(cuLaunchKernel(cuf_convolution_cols_,
272 | col_grid_dim.x, col_grid_dim.y, col_grid_dim.z,
273 | col_block_dim.x, col_block_dim.y, col_block_dim.z,
274 | needed_shared_memory_size,
275 | NULL,
276 | args,
277 | NULL));
278 | }
279 |
280 |
281 |
282 |
283 | CudaOperationConvolution2D::~CudaOperationConvolution2D()
284 | {
285 | if (kernel_ != nullptr)
286 | delete kernel_;
287 |
288 | }
--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_convolution_2d.h:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_CONVOLUTION_2D_H_
23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_CONVOLUTION_2D_H_
24 |
25 | #include
26 |
27 | #include "src/cuda_operations/cuda_operation_base.h"
28 | #include "src/data_types/data_structs.h"
29 |
30 | class CudaOperationConvolution2D : public CudaOperationBase {
31 | private:
32 | CUfunction cuf_convolution_rows_;
33 | CUfunction cuf_convolution_cols_;
34 |
35 | DataSize3 dev_container_size_;
36 |
37 | float* kernel_ = nullptr;
38 | size_t kernel_length_;
39 | size_t kernel_radius_;
40 |
41 | void ComputeGaussianKernel(float sigma, size_t precision, float pixel_size);
42 |
43 | void ResampleRows(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, size_t pitch) const;
44 | void ResampleColumns(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, size_t pitch) const;
45 |
46 | public:
47 | CudaOperationConvolution2D();
48 |
49 | bool Initialize(const OperationParameters* params = nullptr) override;
50 | void Execute(OperationParameters& params) override;
51 | void PrintConvolutionKernel();
52 |
53 | ~CudaOperationConvolution2D();
54 | };
55 |
56 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_CONVOLUTION_2D_H_
57 |
--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_median_2d.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include "cuda_operation_median_2d.h"
23 |
24 | #include
25 |
26 | #include
27 | #include
28 |
29 | #include "src/data_types/data_structs.h"
30 | #include "src/utils/common_utils.h"
31 | #include "src/utils/cuda_utils.h"
32 |
33 | CudaOperationMedian2D::CudaOperationMedian2D()
34 | : CudaOperationBase("CUDA Median 2D")
35 | {
36 | }
37 |
38 | bool CudaOperationMedian2D::Initialize(const OperationParameters* params)
39 | {
40 | initialized_ = false;
41 |
42 | if (!params) {
43 | std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName());
44 | return initialized_;
45 | }
46 |
47 | DataSize3 container_size;
48 | GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_);
49 |
50 | dev_container_size_ = container_size;
51 |
52 | char exec_path[256];
53 | Utils::GetExecutablePath(exec_path, 256);
54 | std::strcat(exec_path, "/kernels/median_2d.ptx");
55 |
56 | if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) {
57 | if (!CheckCudaError(cuModuleGetFunction(&cuf_median_, cu_module_, "median_2d"))) {
58 | size_t const_size;
59 |
60 | /* Get the pointer to the constant memory and copy data */
61 | if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) {
62 | if (const_size == sizeof(container_size)) {
63 | if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) {
64 | initialized_ = true;
65 | }
66 | }
67 | }
68 |
69 | } else {
70 | CheckCudaError(cuModuleUnload(cu_module_));
71 | cu_module_ = nullptr;
72 | }
73 | }
74 | return initialized_;
75 | }
76 |
77 | void CudaOperationMedian2D::Execute(OperationParameters& params)
78 | {
79 | if (!IsInitialized()) {
80 | return;
81 | }
82 |
83 | CUdeviceptr dev_input;
84 | CUdeviceptr dev_output;
85 |
86 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_input, "dev_input");
87 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_output, "dev_output");
88 |
89 | DataSize3 data_size;
90 | size_t radius;
91 | GET_PARAM_OR_RETURN(params, DataSize3, data_size, "data_size");
92 | GET_PARAM_OR_RETURN(params, size_t, radius, "radius");
93 |
94 | if (dev_input == dev_output) {
95 | std::printf("Operation '%s': Error. Input buffer cannot serve as output buffer.", GetName());
96 | return;
97 | }
98 |
99 | /* If the radius equals 1 skip the filtering and copy data from the input to the output */
100 | if (radius == 1) {
101 | CheckCudaError(cuMemcpyDtoD(dev_output, dev_input,
102 | dev_container_size_.pitch * dev_container_size_.height));
103 | return;
104 | }
105 |
106 | if (radius % 2 == 0) {
107 | std::printf("Warning. Median raduis is even (%d), decresaing by 1...\n", radius);
108 | radius -= 1;
109 | }
110 |
111 | if (radius >= 3 && radius <= 7) {
112 | /* Be careful with the thread block size. Thread block size in each dimension
113 | should be greater than radius / 2, because radius / 2 threads are used to
114 | load halo around the thread block. */
115 | dim3 block_dim = { 8, 8};
116 |
117 | dim3 grid_dim ={ static_cast((data_size.width + block_dim.x - 1) / block_dim.x),
118 | static_cast((data_size.height + block_dim.y - 1) / block_dim.y)};
119 |
120 |
121 | /* Calculate the needed shared memory size */
122 | int shared_memory_size;
123 | CUdevice cu_device;
124 | CheckCudaError(cuDeviceGet(&cu_device, 0));
125 | CheckCudaError(cuDeviceGetAttribute(&shared_memory_size, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, cu_device));
126 |
127 | int radius_2 = radius / 2;
128 | int needed_shared_memory_size =
129 | (block_dim.x + 2 * radius_2) * (block_dim.y + 2 * radius_2) * sizeof(float);
130 |
131 | if (needed_shared_memory_size > shared_memory_size) {
132 | std::printf("<%s>: Error shared memory allocation. Reduce the thread block size.\n", GetName());
133 | std::printf("Shared memory: %d Needed: %d \n", shared_memory_size, needed_shared_memory_size);
134 | return;
135 | }
136 |
137 |
138 | void* args[5] = {
139 | &dev_input,
140 | &data_size.width,
141 | &data_size.height,
142 | &radius,
143 | &dev_output};
144 |
145 | CheckCudaError(cuLaunchKernel(cuf_median_,
146 | grid_dim.x, grid_dim.y, grid_dim.z,
147 | block_dim.x, block_dim.y, block_dim.z,
148 | needed_shared_memory_size,
149 | NULL,
150 | args,
151 | NULL));
152 | } else {
153 | std::printf("Error. Wrong median raduis (%d). Supported values: 3, 5, 7\n", radius);
154 | }
155 | }
--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_median_2d.h:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_MEDIAN_H_
23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_MEDIAN_H_
24 |
25 | #include
26 |
27 | #include "src/cuda_operations/cuda_operation_base.h"
28 | #include "src/data_types/data_structs.h"
29 |
30 | class CudaOperationMedian2D : public CudaOperationBase {
31 | private:
32 | CUfunction cuf_median_;
33 |
34 | DataSize3 dev_container_size_;
35 |
36 | public:
37 | CudaOperationMedian2D();
38 |
39 | bool Initialize(const OperationParameters* params = nullptr) override;
40 | void Execute(OperationParameters& params) override;
41 | };
42 |
43 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_MEDIAN_H_
44 |
--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_registration_2d.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include "cuda_operation_registration_2d.h"
23 |
24 | #include
25 | #include
26 | #include
27 |
28 | #include "src/data_types/data_structs.h"
29 | #include "src/utils/common_utils.h"
30 | #include "src/utils/cuda_utils.h"
31 |
32 | CudaOperationRegistration2D::CudaOperationRegistration2D()
33 | : CudaOperationBase("CUDA Registration 2D")
34 | {
35 | }
36 |
37 | bool CudaOperationRegistration2D::Initialize(const OperationParameters* params)
38 | {
39 | initialized_ = false;
40 |
41 | if (!params) {
42 | std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName());
43 | return initialized_;
44 | }
45 |
46 | DataSize3 container_size;
47 | GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_);
48 |
49 | char exec_path[256];
50 | Utils::GetExecutablePath(exec_path, 256);
51 | std::strcat(exec_path, "/kernels/registration_2d.ptx");
52 |
53 | if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) {
54 | if (!CheckCudaError(cuModuleGetFunction(&cuf_registration_, cu_module_, "registration_2d"))) {
55 | size_t const_size;
56 |
57 | /* Get the pointer to the constant memory and copy data */
58 | if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) {
59 | if (const_size == sizeof(container_size)) {
60 | if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) {
61 | initialized_ = true;
62 | }
63 | }
64 | }
65 |
66 | } else {
67 | CheckCudaError(cuModuleUnload(cu_module_));
68 | cu_module_ = nullptr;
69 | }
70 | }
71 | return initialized_;
72 | }
73 |
74 | void CudaOperationRegistration2D::Execute(OperationParameters& params)
75 | {
76 | if (!IsInitialized()) {
77 | return;
78 | }
79 |
80 | CUdeviceptr dev_frame_0;
81 | CUdeviceptr dev_frame_1;
82 | CUdeviceptr dev_flow_u;
83 | CUdeviceptr dev_flow_v;
84 | CUdeviceptr dev_output;
85 |
86 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_frame_0, "dev_frame_0");
87 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_frame_1, "dev_frame_1");
88 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_flow_u, "dev_flow_u");
89 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_flow_v, "dev_flow_v");
90 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_output, "dev_output");
91 |
92 | float hx;
93 | float hy;
94 | DataSize3 data_size;
95 |
96 | GET_PARAM_OR_RETURN(params, float, hx, "hx");
97 | GET_PARAM_OR_RETURN(params, float, hy, "hy");
98 | GET_PARAM_OR_RETURN(params, DataSize3, data_size, "data_size");
99 |
100 | if (dev_frame_1 == dev_output) {
101 | std::printf("Operation '%s': Error. Input buffer cannot serve as output buffer.", GetName());
102 | return;
103 | }
104 |
105 | dim3 block_dim = { 16, 8, 1 };
106 | dim3 grid_dim = { static_cast((data_size.width + block_dim.x - 1) / block_dim.x),
107 | static_cast((data_size.height + block_dim.y - 1) / block_dim.y),
108 | 1 };
109 |
110 | void* args[12] = {
111 | &dev_frame_0,
112 | &dev_frame_1,
113 | &dev_flow_u,
114 | &dev_flow_v,
115 | &data_size.width,
116 | &data_size.height,
117 | &hx,
118 | &hy,
119 | &dev_output};
120 |
121 | CheckCudaError(cuLaunchKernel(cuf_registration_,
122 | grid_dim.x, grid_dim.y, grid_dim.z,
123 | block_dim.x, block_dim.y, block_dim.z,
124 | 0,
125 | NULL,
126 | args,
127 | NULL));
128 | }
--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_registration_2d.h:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_REGISTRATION_H_
23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_REGISTRATION_H_
24 |
25 | #include
26 |
27 | #include "src/cuda_operations/cuda_operation_base.h"
28 |
29 | class CudaOperationRegistration2D : public CudaOperationBase {
30 | private:
31 | CUfunction cuf_registration_;
32 |
33 | public:
34 | CudaOperationRegistration2D();
35 |
36 | bool Initialize(const OperationParameters* params = nullptr) override;
37 | void Execute(OperationParameters& params) override;
38 | };
39 |
40 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_REGISTRATION_H_
41 |
--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_resample_2d.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include "cuda_operation_resample_2d.h"
23 |
24 | #include
25 |
26 | #include
27 |
28 | #include "src/data_types/data_structs.h"
29 | #include "src/utils/common_utils.h"
30 | #include "src/utils/cuda_utils.h"
31 |
32 | CudaOperationResample2D::CudaOperationResample2D()
33 | : CudaOperationBase("CUDA Resample 2D")
34 | {
35 | }
36 |
37 | bool CudaOperationResample2D::Initialize(const OperationParameters* params)
38 | {
39 | initialized_ = false;
40 |
41 | if (!params) {
42 | std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName());
43 | return initialized_;
44 | }
45 |
46 | DataSize3 container_size;
47 | GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_);
48 |
49 |
50 | char exec_path[256];
51 | Utils::GetExecutablePath(exec_path, 256);
52 | std::strcat(exec_path, "/kernels/resample_2d.ptx");
53 |
54 | if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) {
55 | if (!CheckCudaError(cuModuleGetFunction(&cuf_resample_x_, cu_module_, "resample_x")) &&
56 | !CheckCudaError(cuModuleGetFunction(&cuf_resample_y_, cu_module_, "resample_y"))) {
57 | size_t const_size;
58 |
59 | /* Get the pointer to the constant memory and copy data */
60 | if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) {
61 | if (const_size == sizeof(container_size)) {
62 | if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) {
63 | initialized_ = true;
64 | }
65 | }
66 | }
67 |
68 | } else {
69 | CheckCudaError(cuModuleUnload(cu_module_));
70 | cu_module_ = nullptr;
71 | }
72 | }
73 | return initialized_;
74 | }
75 |
76 | void CudaOperationResample2D::Execute(OperationParameters& params)
77 | {
78 | if (!IsInitialized()) {
79 | return;
80 | }
81 | CUdeviceptr dev_input = 0;
82 | CUdeviceptr dev_output = 0;
83 | CUdeviceptr dev_temp = 0;
84 |
85 | DataSize3 data_size;
86 | DataSize3 resample_size;
87 |
88 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_input, "dev_input");
89 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_output, "dev_output");
90 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_temp, "dev_temp");
91 | GET_PARAM_OR_RETURN(params, DataSize3, data_size, "data_size");
92 | GET_PARAM_OR_RETURN(params, DataSize3, resample_size, "resample_size");
93 |
94 | if (dev_input == dev_output) {
95 | std::printf("Operation '%s': Error. Input buffer cannot serve as output buffer.", GetName());
96 | return;
97 | }
98 |
99 | DataSize3 output_size = data_size;
100 | output_size.width = resample_size.width;
101 | ResampleX(dev_input, dev_temp, data_size, output_size);
102 |
103 | data_size.width = output_size.width;
104 | output_size.height = resample_size.height;
105 | ResampleY(dev_temp, dev_output, data_size, output_size);
106 |
107 | }
108 |
109 | void CudaOperationResample2D::ResampleX(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, DataSize3& output_size) const
110 | {
111 | dim3 block_dim = { 16, 8};
112 | dim3 grid_dim = { static_cast((output_size.width + block_dim.x - 1) / block_dim.x),
113 | static_cast((output_size.height + block_dim.y - 1) / block_dim.y)
114 | };
115 |
116 | void* args[5] = { &input,
117 | &output,
118 | &output_size.width,
119 | &output_size.height,
120 | &input_size.width };
121 |
122 |
123 | CheckCudaError(cuLaunchKernel(cuf_resample_x_,
124 | grid_dim.x, grid_dim.y, grid_dim.z,
125 | block_dim.x, block_dim.y, block_dim.z,
126 | 0,
127 | NULL,
128 | args,
129 | NULL));
130 | }
131 |
132 | void CudaOperationResample2D::ResampleY(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, DataSize3& output_size) const
133 | {
134 | dim3 block_dim = { 16, 8};
135 | dim3 grid_dim = { static_cast((output_size.width + block_dim.x - 1) / block_dim.x),
136 | static_cast((output_size.height + block_dim.y - 1) / block_dim.y)
137 | };
138 |
139 | void* args[5] = { &input,
140 | &output,
141 | &output_size.width,
142 | &output_size.height,
143 | &input_size.height };
144 |
145 | CheckCudaError(cuLaunchKernel(cuf_resample_y_,
146 | grid_dim.x, grid_dim.y, grid_dim.z,
147 | block_dim.x, block_dim.y, block_dim.z,
148 | 0,
149 | NULL,
150 | args,
151 | NULL));
152 | }
153 |
154 |
--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_resample_2d.h:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_RESAMPLE_2D_H_
23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_RESAMPLE_2D_H_
24 |
25 | #include
26 |
27 | #include "src/cuda_operations/cuda_operation_base.h"
28 | #include "src/data_types/data_structs.h"
29 |
30 | class CudaOperationResample2D : public CudaOperationBase {
31 | private:
32 | CUfunction cuf_resample_x_;
33 | CUfunction cuf_resample_y_;
34 |
35 | void ResampleX(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, DataSize3& output_size) const;
36 | void ResampleY(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, DataSize3& output_size) const;
37 |
38 |
39 | public:
40 | CudaOperationResample2D();
41 |
42 | bool Initialize(const OperationParameters* params = nullptr) override;
43 | void Execute(OperationParameters& params) override;
44 | };
45 |
46 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_RESAMPLE_2D_H_
47 |
--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_solve_2d.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include "cuda_operation_solve_2d.h"
23 |
24 | #include
25 | #include
26 | #include
27 | #include
28 |
29 | #include "src/data_types/data_structs.h"
30 | #include "src/utils/common_utils.h"
31 | #include "src/utils/cuda_utils.h"
32 |
33 |
34 | CudaOperationSolve2D::CudaOperationSolve2D()
35 | : CudaOperationBase("CUDA Solve 2D")
36 | {
37 | }
38 |
39 | bool CudaOperationSolve2D::Initialize(const OperationParameters* params)
40 | {
41 | initialized_ = false;
42 |
43 | if (!params) {
44 | std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName());
45 | return initialized_;
46 | }
47 |
48 | DataSize3 container_size;
49 | DataConstancy data_constancy = DataConstancy::LogDerivatives;
50 |
51 | GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_);
52 | GET_PARAM_OR_RETURN_VALUE(*params, DataConstancy, data_constancy, "data_constancy", initialized_);
53 |
54 | dev_container_size_ = container_size;
55 |
56 | char exec_path[256];
57 | Utils::GetExecutablePath(exec_path, 256);
58 | std::strcat(exec_path, "/kernels/solve_2d.ptx");
59 |
60 | if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) {
61 |
62 | std::string solve_function_name;
63 |
64 |
65 | switch (data_constancy)
66 | {
67 | case DataConstancy::Grey:
68 | solve_function_name = "solve_2d";
69 | break;
70 |
71 | case DataConstancy::Gradient:
72 | solve_function_name = "solve_2d_grad";
73 | break;
74 |
75 | case DataConstancy::LogDerivatives:
76 | solve_function_name = "solve_2d_log";
77 | break;
78 |
79 | default:
80 | solve_function_name = "solve_2d";
81 | break;
82 | }
83 |
84 | if (!CheckCudaError(cuModuleGetFunction(&cuf_compute_phi_ksi_, cu_module_, "compute_phi_ksi")) &&
85 | !CheckCudaError(cuModuleGetFunction(&cuf_solve_, cu_module_, solve_function_name.c_str()))
86 | ) {
87 | size_t const_size;
88 |
89 | /* Get the pointer to the constant memory and copy data */
90 | if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) {
91 | if (const_size == sizeof(container_size)) {
92 | if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) {
93 | initialized_ = true;
94 | }
95 | }
96 | }
97 |
98 | } else {
99 | CheckCudaError(cuModuleUnload(cu_module_));
100 | cu_module_ = nullptr;
101 | }
102 | }
103 | return initialized_;
104 | }
105 |
106 | void CudaOperationSolve2D::Execute(OperationParameters& params)
107 | {
108 | if (!IsInitialized()) {
109 | return;
110 | }
111 |
112 | CUdeviceptr dev_frame_0;
113 | CUdeviceptr dev_frame_1;
114 | CUdeviceptr dev_flow_u;
115 | CUdeviceptr dev_flow_v;
116 | CUdeviceptr dev_phi;
117 | CUdeviceptr dev_ksi;
118 |
119 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_frame_0, "dev_frame_0");
120 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_frame_1, "dev_frame_1");
121 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_flow_u, "dev_flow_u");
122 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_flow_v, "dev_flow_v");
123 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_phi, "dev_phi");
124 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_ksi, "dev_ksi");
125 |
126 | /* Use references to guaranty correct pointer values
127 | outside this operation after using std::swap() */
128 | CUdeviceptr* dev_flow_du_ptr;
129 | CUdeviceptr* dev_flow_dv_ptr;
130 | CUdeviceptr* dev_temp_du_ptr;
131 | CUdeviceptr* dev_temp_dv_ptr;
132 |
133 | GET_PARAM_PTR_OR_RETURN(params, CUdeviceptr, dev_flow_du_ptr, "dev_flow_du");
134 | GET_PARAM_PTR_OR_RETURN(params, CUdeviceptr, dev_flow_dv_ptr, "dev_flow_dv");
135 | GET_PARAM_PTR_OR_RETURN(params, CUdeviceptr, dev_temp_du_ptr, "dev_temp_du");
136 | GET_PARAM_PTR_OR_RETURN(params, CUdeviceptr, dev_temp_dv_ptr, "dev_temp_dv");
137 |
138 |
139 | CUdeviceptr& dev_flow_du = *dev_flow_du_ptr;
140 | CUdeviceptr& dev_flow_dv = *dev_flow_dv_ptr;
141 | CUdeviceptr& dev_temp_du = *dev_temp_du_ptr;
142 | CUdeviceptr& dev_temp_dv = *dev_temp_dv_ptr;
143 |
144 | size_t outer_iterations_count;
145 | size_t inner_iterations_count;
146 | float equation_alpha;
147 | float equation_smoothness;
148 | float equation_data;
149 | float hx;
150 | float hy;
151 | DataSize3 data_size;
152 |
153 | GET_PARAM_OR_RETURN(params, size_t, outer_iterations_count, "outer_iterations_count");
154 | GET_PARAM_OR_RETURN(params, size_t, inner_iterations_count, "inner_iterations_count");
155 | GET_PARAM_OR_RETURN(params, float, equation_alpha, "equation_alpha");
156 | GET_PARAM_OR_RETURN(params, float, equation_smoothness, "equation_smoothness");
157 | GET_PARAM_OR_RETURN(params, float, equation_data, "equation_data");
158 | GET_PARAM_OR_RETURN(params, float, hx, "hx");
159 | GET_PARAM_OR_RETURN(params, float, hy, "hy");
160 | GET_PARAM_OR_RETURN(params, DataSize3, data_size, "data_size");
161 |
162 |
163 | /* Run solver kernels */
164 | dim3 block_dim = { 16, 8};
165 |
166 | int shared_memory_size;
167 | CUdevice cu_device;
168 | CheckCudaError(cuDeviceGet(&cu_device, 0));
169 | CheckCudaError(cuDeviceGetAttribute(&shared_memory_size, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, cu_device));
170 |
171 | /* We need 6 extended blocks in the shared memory */
172 | int number_shared_blocks = 6;
173 | int needed_shared_memory_size =
174 | (block_dim.x + 2) * (block_dim.y + 2) * sizeof(float) * number_shared_blocks;
175 |
176 | int number_shared_blocks_solve = 8;
177 |
178 | DataConstancy data_constancy;
179 | GET_PARAM_OR_RETURN(params, DataConstancy, data_constancy, "data_constancy");
180 |
181 | switch (data_constancy)
182 | {
183 | case DataConstancy::Grey:
184 | number_shared_blocks_solve = 8;
185 | break;
186 |
187 | case DataConstancy::Gradient:
188 | number_shared_blocks_solve = 11;
189 | break;
190 |
191 | case DataConstancy::LogDerivatives:
192 | number_shared_blocks_solve = 11;
193 | break;
194 |
195 | default:
196 | number_shared_blocks_solve = 8;
197 | break;
198 | }
199 |
200 |
201 |
202 | int needed_shared_memory_size_solve =
203 | (block_dim.x + 2) * (block_dim.y + 2) * sizeof(float) * number_shared_blocks_solve;
204 |
205 | if (needed_shared_memory_size > shared_memory_size || needed_shared_memory_size_solve > shared_memory_size) {
206 | std::printf("<%s>: Error shared memory allocation. Reduce thread block size.\n", GetName());
207 | std::printf("Shared memory: %d Needed: %d | %d\n", shared_memory_size, needed_shared_memory_size, needed_shared_memory_size_solve);
208 | return;
209 | }
210 |
211 | /* Mesure execution time */
212 | size_t total_iterations_count = outer_iterations_count * inner_iterations_count;
213 |
214 | CUevent cu_event_start;
215 | CUevent cu_event_stop;
216 |
217 | CheckCudaError(cuEventCreate(&cu_event_start, CU_EVENT_DEFAULT));
218 | CheckCudaError(cuEventCreate(&cu_event_stop, CU_EVENT_DEFAULT));
219 |
220 | CheckCudaError(cuEventRecord(cu_event_start, NULL));
221 |
222 | /* Display computation status */
223 | if (!this->silent) {
224 | Utils::PrintProgressBar(0.f);
225 | std::printf(" % 3.0f%%", 0.f);
226 | }
227 |
228 | /* Initialize flow increment buffers with 0 */
229 | CheckCudaError(cuMemsetD2D8(dev_flow_du, dev_container_size_.pitch, 0, data_size.width * sizeof(float),
230 | dev_container_size_.height));
231 | CheckCudaError(cuMemsetD2D8(dev_flow_dv, dev_container_size_.pitch, 0, data_size.width * sizeof(float),
232 | dev_container_size_.height));
233 |
234 |
235 | dim3 grid_dim = { static_cast((data_size.width + block_dim.x - 1) / block_dim.x),
236 | static_cast((data_size.height + block_dim.y - 1) / block_dim.y)};
237 |
238 | for (size_t i = 0; i < outer_iterations_count; ++i) {
239 | void* args[14] = {
240 | &dev_frame_0,
241 | &dev_frame_1,
242 | &dev_flow_u,
243 | &dev_flow_v,
244 | &dev_flow_du,
245 | &dev_flow_dv,
246 | &data_size.width,
247 | &data_size.height,
248 | &hx,
249 | &hy,
250 | &equation_smoothness,
251 | &equation_data,
252 | &dev_phi,
253 | &dev_ksi };
254 |
255 | CheckCudaError(cuLaunchKernel(cuf_compute_phi_ksi_,
256 | grid_dim.x, grid_dim.y, grid_dim.z,
257 | block_dim.x, block_dim.y, block_dim.z,
258 | needed_shared_memory_size,
259 | NULL,
260 | args,
261 | NULL));
262 |
263 | for (size_t j = 0; j < inner_iterations_count; ++j) {
264 | void* args[15] = {
265 | &dev_frame_0,
266 | &dev_frame_1,
267 | &dev_flow_u,
268 | &dev_flow_v,
269 | &dev_flow_du,
270 | &dev_flow_dv,
271 | &dev_phi,
272 | &dev_ksi,
273 | &data_size.width,
274 | &data_size.height,
275 | &hx,
276 | &hy,
277 | &equation_alpha,
278 | &dev_temp_du,
279 | &dev_temp_dv};
280 |
281 | CheckCudaError(cuLaunchKernel(cuf_solve_,
282 | grid_dim.x, grid_dim.y, grid_dim.z,
283 | block_dim.x, block_dim.y, block_dim.z,
284 | needed_shared_memory_size_solve,
285 | NULL,
286 | args,
287 | NULL));
288 | std::swap(dev_flow_du, dev_temp_du);
289 | std::swap(dev_flow_dv, dev_temp_dv);
290 |
291 | CheckCudaError(cuStreamSynchronize(NULL));
292 |
293 | /* Display computation status */
294 | if (!this->silent) {
295 | float complete = (i * inner_iterations_count + j) / static_cast(total_iterations_count);
296 | Utils::PrintProgressBar(complete);
297 | std::printf(" % 3.0f%%", complete * 100);
298 | }
299 | }
300 | }
301 | /* Estimate GPU computation time */
302 | CheckCudaError(cuEventRecord(cu_event_stop, NULL));
303 | CheckCudaError(cuEventSynchronize(cu_event_stop));
304 |
305 | float elapsed_time;
306 | CheckCudaError(cuEventElapsedTime(&elapsed_time, cu_event_start, cu_event_stop));
307 |
308 | if (!this->silent) {
309 | Utils::PrintProgressBar(1.f);
310 | std::printf(" %8.4fs\n", elapsed_time / 1000.);
311 | }
312 |
313 | CheckCudaError(cuEventDestroy(cu_event_start));
314 | CheckCudaError(cuEventDestroy(cu_event_stop));
315 | }
--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_solve_2d.h:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_SOLVE_2D_H_
23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_SOLVE_2D_H_
24 |
25 | #include
26 |
27 | #include "src/cuda_operations/cuda_operation_base.h"
28 | #include "src/data_types/data_structs.h"
29 |
30 | class CudaOperationSolve2D : public CudaOperationBase {
31 | private:
32 | CUfunction cuf_compute_phi_ksi_;
33 | CUfunction cuf_solve_;
34 |
35 | DataSize3 dev_container_size_;
36 |
37 | public:
38 | CudaOperationSolve2D();
39 |
40 | bool Initialize(const OperationParameters* params = nullptr) override;
41 | void Execute(OperationParameters& params) override;
42 |
43 | bool silent = false;
44 | };
45 |
46 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_SOLVE_2D_H_
47 |
--------------------------------------------------------------------------------
/src/cuda_operations/cuda_operation_base.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include "cuda_operation_base.h"
23 |
24 | #include
25 | #include
26 |
27 | #include "src/utils/cuda_utils.h"
28 |
29 | CudaOperationBase::CudaOperationBase(const char* name)
30 | : name_(name)
31 | {
32 | }
33 |
34 | const char* CudaOperationBase::GetName() const
35 | {
36 | return name_;
37 | };
38 |
39 | bool CudaOperationBase::IsInitialized() const
40 | {
41 | if (!initialized_) {
42 | std::printf("Error: Operation '%s' was not initialized.\n", name_);
43 | }
44 | return initialized_;
45 | }
46 |
47 | void CudaOperationBase::Execute(OperationParameters& params)
48 | {
49 | std::printf("Warning: '%s' Execute() was not defined.\n", name_);
50 | }
51 |
52 | void CudaOperationBase::Destroy()
53 | {
54 | if (cu_module_) {
55 | CheckCudaError(cuModuleUnload(cu_module_));
56 | cu_module_ = nullptr;
57 | }
58 | initialized_ = false;
59 | }
60 |
61 | CudaOperationBase::~CudaOperationBase()
62 | {
63 | if (initialized_) {
64 | Destroy();
65 | }
66 | }
67 |
68 |
--------------------------------------------------------------------------------
/src/cuda_operations/cuda_operation_base.h:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_CUDA_OPERATION_BASE_H_
23 | #define GPUFLOW3D_CUDA_OPERATIONS_CUDA_OPERATION_BASE_H_
24 |
25 | #include
26 |
27 | #include "src/data_types/operation_parameters.h"
28 |
29 | class CudaOperationBase {
30 | private:
31 | const char* name_ = nullptr;
32 |
33 | protected:
34 | CUmodule cu_module_ = nullptr;
35 |
36 | CUdeviceptr dev_constants_ = 0;
37 |
38 | bool initialized_ = false;
39 |
40 | CudaOperationBase(const char* name);
41 |
42 | bool IsInitialized() const;
43 |
44 | public:
45 | const char* GetName() const;
46 |
47 | virtual bool Initialize(const OperationParameters* params = nullptr) = 0;
48 | virtual void Execute(OperationParameters& params);
49 | virtual void Destroy();
50 |
51 | virtual ~CudaOperationBase();
52 |
53 | };
54 |
55 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_CUDA_OPERATION_BASE_H_
56 |
--------------------------------------------------------------------------------
/src/data_types/data2d.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include "data2d.h"
23 |
24 | #include
25 | #include
26 | #include
27 | #include
28 |
29 | #include
30 |
31 | #include "src/utils/cuda_utils.h"
32 |
33 |
34 | //#define ALLOCATE_PINNED_MEMORY
35 |
36 | Data2D::Data2D()
37 | : width_(0), height_(0)
38 | {}
39 |
40 | Data2D::Data2D(size_t width, size_t height)
41 | : width_(width), height_(height)
42 | {
43 | data_ = static_cast(AllocateMemory(width_, height_));
44 | }
45 |
46 | void Data2D::Swap(Data2D& data2d)
47 | {
48 | if (this->width_ == data2d.width_ &&
49 | this->height_ == data2d.height_) {
50 | std::swap(this->data_, data2d.data_);
51 | } else {
52 | std::printf("Error. Cannot swap two Data2D objects (wrong dimensions).\n");
53 | }
54 | }
55 |
56 | void* Data2D::AllocateMemory(size_t width, size_t height)
57 | {
58 | void* ptr = nullptr;
59 |
60 | #ifdef ALLOCATE_PINNED_MEMORY
61 | if (CheckCudaError(cuMemAllocHost(&ptr, width * height * sizeof(float)))) {
62 | Invalidate();
63 | return nullptr;
64 | }
65 | #else
66 | try {
67 | ptr = new float[width * height];
68 | }
69 | catch (std::bad_alloc& ba) {
70 | Invalidate();
71 | std::printf("Error. Cannot allocate memory on the host. (%s)\n", ba.what());
72 | }
73 | #endif
74 |
75 | return ptr;
76 | }
77 |
78 | void Data2D::FreeMemory(void* pointer)
79 | {
80 | #ifdef ALLOCATE_PINNED_MEMORY
81 | if (pointer)
82 | CheckCudaError(cuMemFreeHost(pointer));
83 | #else
84 | if (pointer) {
85 | delete[] pointer;
86 | pointer = nullptr;
87 | }
88 | #endif
89 | }
90 |
91 | void Data2D::ZeroData()
92 | {
93 | if (data_) {
94 | std::memset(data_, 0, width_ * height_ * sizeof(float));
95 | }
96 | }
97 |
98 | bool Data2D::ReadRAWFromFileU8(const char* filename, size_t width, size_t height)
99 | {
100 | bool loaded = false;
101 | std::FILE *file = std::fopen(filename, "rb");
102 | if (file) {
103 | this->Invalidate();
104 |
105 | width_ = width;
106 | height_ = height;
107 | data_ = static_cast(AllocateMemory(width_, height_));
108 |
109 | if (data_) {
110 | unsigned char *dataU8 = new unsigned char[width_];
111 |
112 | for (size_t y = 0; y < height_; ++y) {
113 | size_t readed = std::fread(dataU8, sizeof(unsigned char), width_, file);
114 |
115 | if (readed == width_) {
116 | for (size_t x = 0; x < width_; ++x) {
117 | data_[Index(x, y)] = static_cast(dataU8[x]);
118 | }
119 | } else {
120 | goto loop_break;
121 | }
122 | }
123 |
124 |
125 | if (std::fread(dataU8, sizeof(unsigned char), width_, file) == 0) {
126 | loaded = true;
127 | } else {
128 |
129 | loop_break:
130 | std::printf("Error reading RAW data from file '%s': wrong dimensions.", filename);
131 | this->Invalidate();
132 | }
133 |
134 | delete[] dataU8;
135 | std::fclose(file);
136 | }
137 | } else {
138 | std::printf("Cannot open file '%s'.\n", filename);
139 | }
140 | return loaded;
141 | }
142 |
143 | bool Data2D::ReadRAWFromFileF32(const char* filename, size_t width, size_t height)
144 | {
145 | bool loaded = false;
146 | std::FILE *file = std::fopen(filename, "rb");
147 | if (file) {
148 | this->Invalidate();
149 |
150 | width_ = width;
151 | height_ = height;
152 | data_ = static_cast(AllocateMemory(width_, height_));
153 |
154 | if (data_) {
155 |
156 | for (size_t y = 0; y < height_; ++y) {
157 | size_t readed = std::fread(&data_[Index(0, y)], sizeof(float), width_, file);
158 | if (readed != width_) {
159 | goto loop_break;
160 | }
161 | }
162 |
163 |
164 | if (std::fread(data_, sizeof(unsigned char), width_, file) == 0) {
165 | loaded = true;
166 | } else {
167 | loop_break:
168 | std::printf("Error reading RAW data from file '%s': wrong dimensions.", filename);
169 | this->Invalidate();
170 | }
171 |
172 | std::fclose(file);
173 | }
174 | } else {
175 | std::printf("Cannot open file '%s'.\n", filename);
176 | }
177 | return loaded;
178 | }
179 |
180 | bool Data2D::WriteRAWToFileU8(const char* filename)
181 | {
182 | std::FILE *file = std::fopen(filename, "wb");
183 | if (file) {
184 | unsigned char *dataU8 = new unsigned char[width_];
185 |
186 |
187 | for (size_t y = 0; y < height_; ++y) {
188 | for (size_t x = 0; x < width_; ++x) {
189 | float dataF32 = std::min(255.f, std::max(0.f, data_[Index(x, y)]));
190 | dataU8[x] = static_cast(dataF32);
191 | }
192 | size_t written = std::fwrite(dataU8, sizeof(unsigned char), width_, file);
193 | if (written != width_) {
194 | std::printf("Error writing RAW data to file '%s'.", filename);
195 | delete[] dataU8;
196 | std::fclose(file);
197 | return false;
198 | }
199 | }
200 |
201 | delete[] dataU8;
202 | std::fclose(file);
203 | return true;
204 | } else {
205 | std::printf("Cannot open file '%s'.\n", filename);
206 | return false;
207 | }
208 | }
209 |
210 | bool Data2D::WriteRAWToFileF32(const char* filename)
211 | {
212 | std::FILE *file = std::fopen(filename, "wb");
213 | if (file) {
214 | unsigned char *dataU8 = new unsigned char[width_];
215 |
216 | for (size_t y = 0; y < height_; ++y) {
217 | size_t written = std::fwrite(&data_[Index(0, y)], sizeof(float), width_, file);
218 | if (written != width_) {
219 | std::printf("Error writing RAW data to file '%s'.", filename);
220 | std::fclose(file);
221 | return false;
222 | }
223 | }
224 |
225 | std::fclose(file);
226 | return true;
227 | } else {
228 | std::printf("Cannot open file '%s'.\n", filename);
229 | return false;
230 | }
231 | }
232 |
233 | //bool Data3D::WriteFlowToFileVTK(const char* filename, const Data3D& flow_u, const Data3D& flow_v, const Data3D& flow_w)
234 | //{
235 | // std::FILE *file = std::fopen(filename, "wb");
236 | // if (file) {
237 | // std::fprintf(file, "# vtk DataFile Version 2.0\n");
238 | // std::fprintf(file, "3D Vector field computed by GpuFlow3D\n");
239 | // std::fprintf(file, "BINARY\n");
240 | // std::fprintf(file, "DATASET STRUCTURED_POINTS\n");
241 | // std::fprintf(file, "DIMENSIONS %d %d %d\n", flow_u.width_, flow_u.height_, flow_u.depth_);
242 | // std::fprintf(file, "ORIGIN 0 0 0\n");
243 | // std::fprintf(file, "SPACING 1 1 1\n");
244 | // std::fprintf(file, "POINT_DATA %d\n", flow_u.width_ * flow_u.height_ * flow_u.depth_);
245 | // std::fprintf(file, "VECTORS vectors float\n");
246 | //
247 | // for (size_t z = 0; z < flow_u.depth_; ++z) {
248 | // for (size_t y = 0; y < flow_u.height_; ++y) {
249 | // for (size_t x = 0; x < flow_u.width_; ++x) {
250 | // std::fwrite(&flow_u.data_[(z * flow_u.height_ + y) * flow_u.width_ + x], sizeof(float), 1, file);
251 | // std::fwrite(&flow_v.data_[(z * flow_v.height_ + y) * flow_v.width_ + x], sizeof(float), 1, file);
252 | // std::fwrite(&flow_w.data_[(z * flow_w.height_ + y) * flow_w.width_ + x], sizeof(float), 1, file);
253 | // }
254 | // }
255 | // }
256 | //
257 | // std::fclose(file);
258 | // return true;
259 | // } else {
260 | // std::printf("Cannot open file '%s'.\n", filename);
261 | // return false;
262 | // }
263 | //}
264 |
265 | void Data2D::Invalidate()
266 | {
267 | width_ = 0;
268 | height_ = 0;
269 | FreeMemory(data_);
270 | }
271 |
272 | Data2D::~Data2D()
273 | {
274 | FreeMemory(data_);
275 | }
--------------------------------------------------------------------------------
/src/data_types/data2d.h:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #ifndef GPUFLOW3D_DATA_TYPES_DATA2D_H_
23 | #define GPUFLOW3D_DATA_TYPES_DATA2D_H_
24 |
25 | #include
26 |
27 | class Data2D
28 | {
29 | private:
30 | float *data_ = nullptr;
31 | size_t width_;
32 | size_t height_;
33 |
34 | inline size_t Index(size_t x, size_t y) {
35 | return y * width_ + x;
36 | }
37 |
38 | void Invalidate();
39 |
40 | void* AllocateMemory(size_t width, size_t height);
41 | void FreeMemory(void* pointer);
42 |
43 | public:
44 | Data2D();
45 | Data2D(size_t width, size_t height);
46 |
47 | inline size_t Width() { return width_; };
48 | inline size_t Height() { return height_; };
49 | inline float* DataPtr() { return data_; };
50 | inline float& Data(size_t x, size_t y) { return data_[Index(x, y)]; };
51 |
52 | void Swap(Data2D& data2d);
53 |
54 | void ZeroData();
55 |
56 | bool ReadRAWFromFileU8(const char* filename, size_t width, size_t height);
57 | bool ReadRAWFromFileF32(const char* filename, size_t width, size_t height);
58 |
59 | bool WriteRAWToFileU8(const char* filename);
60 | bool WriteRAWToFileF32(const char* filename);
61 |
62 | //static bool WriteFlowToFileVTK(const char* filename, const Data3D& flow_u, const Data3D& flow_v, const Data3D& flow_w);
63 |
64 | ~Data2D();
65 | };
66 |
67 |
68 |
69 | #endif // !GPUFLOW3D_DATA_TYPES_DATA2D_H_
--------------------------------------------------------------------------------
/src/data_types/data3d.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include "src/data_types/data3d.h"
23 |
24 | #include
25 | #include
26 | #include
27 | #include
28 |
29 | #include
30 |
31 | #include "src/utils/cuda_utils.h"
32 |
33 |
34 |
35 | //#define ALLOCATE_PINNED_MEMORY
36 |
37 | Data3D::Data3D()
38 | : width_(0), height_(0), depth_(0)
39 | {}
40 |
41 | Data3D::Data3D(size_t width, size_t height, size_t depth)
42 | : width_(width), height_(height), depth_(depth)
43 | {
44 | data_ = static_cast(AllocateMemory(width_, height_, depth_));
45 | }
46 |
47 | void Data3D::Swap(Data3D& data3d)
48 | {
49 | if (this->width_ == data3d.width_ &&
50 | this->height_ == data3d.height_ &&
51 | this->depth_ == data3d.depth_) {
52 | std::swap(this->data_, data3d.data_);
53 | } else {
54 | std::printf("Error. Cannot swap two Data3D objects (wrong dimensions).\n");
55 | }
56 | }
57 |
58 | void* Data3D::AllocateMemory(size_t width, size_t height, size_t depth)
59 | {
60 | void* ptr = nullptr;
61 |
62 | #ifdef ALLOCATE_PINNED_MEMORY
63 | if (CheckCudaError(cuMemAllocHost(&ptr, width * height * depth * sizeof(float)))) {
64 | Invalidate();
65 | return nullptr;
66 | }
67 | #else
68 | try {
69 | ptr = new float[width * height * depth];
70 | }
71 | catch (std::bad_alloc& ba) {
72 | Invalidate();
73 | std::printf("Error. Cannot allocate memory on the host. (%s)\n", ba.what());
74 | }
75 | #endif
76 |
77 | return ptr;
78 | }
79 |
80 | void Data3D::FreeMemory(void* pointer)
81 | {
82 | #ifdef ALLOCATE_PINNED_MEMORY
83 | if (pointer)
84 | CheckCudaError(cuMemFreeHost(pointer));
85 | #else
86 | if (pointer) {
87 | delete[] pointer;
88 | pointer = nullptr;
89 | }
90 | #endif
91 | }
92 |
93 | void Data3D::ZeroData()
94 | {
95 | if (data_) {
96 | std::memset(data_, 0, width_ * height_ * depth_ * sizeof(float));
97 | }
98 | }
99 |
100 | bool Data3D::ReadRAWFromFileU8(const char* filename, size_t width, size_t height, size_t depth)
101 | {
102 | bool loaded = false;
103 | std::FILE *file = std::fopen(filename, "rb");
104 | if (file) {
105 | this->Invalidate();
106 |
107 | width_ = width;
108 | height_ = height;
109 | depth_ = depth;
110 | data_ = static_cast(AllocateMemory(width_, height_, depth_));
111 |
112 | if (data_) {
113 | unsigned char *dataU8 = new unsigned char[width_];
114 |
115 | for (size_t z = 0; z < depth_; ++z) {
116 | for (size_t y = 0; y < height_; ++y) {
117 | size_t readed = std::fread(dataU8, sizeof(unsigned char), width_, file);
118 |
119 | if (readed == width_) {
120 | for (size_t x = 0; x < width_; ++x) {
121 | data_[Index(x, y, z)] = static_cast(dataU8[x]);
122 | }
123 | } else {
124 | goto loop_break;
125 | }
126 | }
127 | }
128 |
129 | if (std::fread(dataU8, sizeof(unsigned char), width_, file) == 0) {
130 | loaded = true;
131 | } else {
132 |
133 | loop_break:
134 | std::printf("Error reading RAW data from file '%s': wrong dimensions.", filename);
135 | this->Invalidate();
136 | }
137 |
138 | delete[] dataU8;
139 | std::fclose(file);
140 | }
141 | } else {
142 | std::printf("Cannot open file '%s'.\n", filename);
143 | }
144 | return loaded;
145 | }
146 |
147 | bool Data3D::ReadRAWFromFileF32(const char* filename, size_t width, size_t height, size_t depth)
148 | {
149 | bool loaded = false;
150 | std::FILE *file = std::fopen(filename, "rb");
151 | if (file) {
152 | this->Invalidate();
153 |
154 | width_ = width;
155 | height_ = height;
156 | depth_ = depth;
157 | data_ = static_cast(AllocateMemory(width_, height_, depth_));
158 |
159 | if (data_) {
160 | for (size_t z = 0; z < depth_; ++z) {
161 | for (size_t y = 0; y < height_; ++y) {
162 | size_t readed = std::fread(&data_[Index(0, y, z)], sizeof(float), width_, file);
163 | if (readed != width_) {
164 | goto loop_break;
165 | }
166 | }
167 | }
168 |
169 | if (std::fread(data_, sizeof(unsigned char), width_, file) == 0) {
170 | loaded = true;
171 | } else {
172 | loop_break:
173 | std::printf("Error reading RAW data from file '%s': wrong dimensions.", filename);
174 | this->Invalidate();
175 | }
176 |
177 | std::fclose(file);
178 | }
179 | } else {
180 | std::printf("Cannot open file '%s'.\n", filename);
181 | }
182 | return loaded;
183 | }
184 |
185 | bool Data3D::WriteRAWToFileU8(const char* filename)
186 | {
187 | std::FILE *file = std::fopen(filename, "wb");
188 | if (file) {
189 | unsigned char *dataU8 = new unsigned char[width_];
190 |
191 | for (size_t z = 0; z < depth_; ++z) {
192 | for (size_t y = 0; y < height_; ++y) {
193 | for (size_t x = 0; x < width_; ++x) {
194 | float dataF32 = std::min(255.f, std::max(0.f, data_[Index(x, y, z)]));
195 | dataU8[x] = static_cast(dataF32);
196 | }
197 | size_t written = std::fwrite(dataU8, sizeof(unsigned char), width_, file);
198 | if (written != width_) {
199 | std::printf("Error writing RAW data to file '%s'.", filename);
200 | delete[] dataU8;
201 | std::fclose(file);
202 | return false;
203 | }
204 | }
205 | }
206 | delete[] dataU8;
207 | std::fclose(file);
208 | return true;
209 | } else {
210 | std::printf("Cannot open file '%s'.\n", filename);
211 | return false;
212 | }
213 | }
214 |
215 | bool Data3D::WriteRAWToFileF32(const char* filename)
216 | {
217 | std::FILE *file = std::fopen(filename, "wb");
218 | if (file) {
219 | unsigned char *dataU8 = new unsigned char[width_];
220 |
221 | for (size_t z = 0; z < depth_; ++z) {
222 | for (size_t y = 0; y < height_; ++y) {
223 | size_t written = std::fwrite(&data_[Index(0, y, z)], sizeof(float), width_, file);
224 | if (written != width_) {
225 | std::printf("Error writing RAW data to file '%s'.", filename);
226 | std::fclose(file);
227 | return false;
228 | }
229 | }
230 | }
231 | std::fclose(file);
232 | return true;
233 | } else {
234 | std::printf("Cannot open file '%s'.\n", filename);
235 | return false;
236 | }
237 | }
238 |
239 | bool Data3D::WriteFlowToFileVTK(const char* filename, const Data3D& flow_u, const Data3D& flow_v, const Data3D& flow_w)
240 | {
241 | std::FILE *file = std::fopen(filename, "wb");
242 | if (file) {
243 | std::fprintf(file, "# vtk DataFile Version 2.0\n");
244 | std::fprintf(file, "3D Vector field computed by GpuFlow3D\n");
245 | std::fprintf(file, "BINARY\n");
246 | std::fprintf(file, "DATASET STRUCTURED_POINTS\n");
247 | std::fprintf(file, "DIMENSIONS %d %d %d\n", flow_u.width_, flow_u.height_, flow_u.depth_);
248 | std::fprintf(file, "ORIGIN 0 0 0\n");
249 | std::fprintf(file, "SPACING 1 1 1\n");
250 | std::fprintf(file, "POINT_DATA %d\n", flow_u.width_ * flow_u.height_ * flow_u.depth_);
251 | std::fprintf(file, "VECTORS vectors float\n");
252 |
253 | for (size_t z = 0; z < flow_u.depth_; ++z) {
254 | for (size_t y = 0; y < flow_u.height_; ++y) {
255 | for (size_t x = 0; x < flow_u.width_; ++x) {
256 | std::fwrite(&flow_u.data_[(z * flow_u.height_ + y) * flow_u.width_ + x], sizeof(float), 1, file);
257 | std::fwrite(&flow_v.data_[(z * flow_v.height_ + y) * flow_v.width_ + x], sizeof(float), 1, file);
258 | std::fwrite(&flow_w.data_[(z * flow_w.height_ + y) * flow_w.width_ + x], sizeof(float), 1, file);
259 | }
260 | }
261 | }
262 |
263 | std::fclose(file);
264 | return true;
265 | } else {
266 | std::printf("Cannot open file '%s'.\n", filename);
267 | return false;
268 | }
269 | }
270 |
271 | void Data3D::Invalidate()
272 | {
273 | width_ = 0;
274 | height_ = 0;
275 | depth_ = 0;
276 | FreeMemory(data_);
277 | }
278 |
279 | Data3D::~Data3D()
280 | {
281 | FreeMemory(data_);
282 | }
--------------------------------------------------------------------------------
/src/data_types/data3d.h:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #ifndef GPUFLOW3D_DATA_TYPES_DATA3D_H_
23 | #define GPUFLOW3D_DATA_TYPES_DATA3D_H_
24 |
25 | #include
26 |
27 | class Data3D
28 | {
29 | private:
30 | float *data_ = nullptr;
31 | size_t width_;
32 | size_t height_;
33 | size_t depth_;
34 |
35 | inline size_t Index(size_t x, size_t y, size_t z) {
36 | return (z * height_ + y) * width_ + x;
37 | }
38 |
39 | void Invalidate();
40 |
41 | void* AllocateMemory(size_t width, size_t height, size_t depth);
42 | void FreeMemory(void* pointer);
43 |
44 | public:
45 | Data3D();
46 | Data3D(size_t width, size_t height, size_t depth);
47 |
48 | inline size_t Width() { return width_; };
49 | inline size_t Height() { return height_; };
50 | inline size_t Depth() { return depth_; };
51 | inline float* DataPtr() { return data_; };
52 | inline float& Data(size_t x, size_t y, size_t z) { return data_[Index(x, y, z)]; };
53 |
54 | void Swap(Data3D& data3d);
55 |
56 | void ZeroData();
57 |
58 | bool ReadRAWFromFileU8(const char* filename, size_t width, size_t height, size_t depth);
59 | bool ReadRAWFromFileF32(const char* filename, size_t width, size_t height, size_t depth);
60 |
61 | bool WriteRAWToFileU8(const char* filename);
62 | bool WriteRAWToFileF32(const char* filename);
63 |
64 | static bool WriteFlowToFileVTK(const char* filename, const Data3D& flow_u, const Data3D& flow_v, const Data3D& flow_w);
65 |
66 | ~Data3D();
67 | };
68 |
69 | #endif // !GPUFLOW3D_DATA_TYPES_DATA3D_H_
--------------------------------------------------------------------------------
/src/data_types/data_structs.h:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #ifndef GPUFLOW3D_DATA_TYPES_DATA_STRUCTS_H_
23 | #define GPUFLOW3D_DATA_TYPES_DATA_STRUCTS_H_
24 |
25 | enum class Methods { OpticalFlow, Correlation };
26 |
27 | enum class DataConstancy { Grey, Gradient, LogDerivatives };
28 |
29 |
30 |
31 | struct DataSize3 {
32 | size_t width;
33 | size_t height;
34 | size_t pitch;
35 | };
36 |
37 |
38 | struct Stat3 {
39 | float min;
40 | float max;
41 | float avg;
42 | };
43 |
44 | #endif // !GPUFLOW3D_DATA_TYPES_DATA_STRUCTS_H_
45 |
--------------------------------------------------------------------------------
/src/data_types/operation_parameters.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include "src/data_types/operation_parameters.h"
23 |
24 | OperationParameters::OperationParameters()
25 | {
26 | }
27 |
28 | bool OperationParameters::PushValuePtr(std::string key, void* value_ptr)
29 | {
30 | if (!map_.count(key)) {
31 | map_.insert({ key, value_ptr });
32 | return true;
33 | }
34 | return false;
35 | }
36 |
37 | void* OperationParameters::GetValuePtr(std::string key) const
38 | {
39 | if (map_.count(key)) {
40 | return map_.at(key);
41 | }
42 | return nullptr;
43 | }
44 |
45 | void OperationParameters::Clear()
46 | {
47 | map_.clear();
48 | }
--------------------------------------------------------------------------------
/src/data_types/operation_parameters.h:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #ifndef GPUFLOW3D_DATA_TYPES_OPERATION_PARAMETERS_H_
23 | #define GPUFLOW3D_DATA_TYPES_OPERATION_PARAMETERS_H_
24 |
25 | #include
26 | #include
27 |
28 | class OperationParameters {
29 | private:
30 | std::unordered_map map_;
31 |
32 | public:
33 | OperationParameters();
34 |
35 | bool PushValuePtr(std::string key, void* value_ptr);
36 | void* GetValuePtr(std::string key) const;
37 | void Clear();
38 | };
39 |
40 | #endif // !GPUFLOW3D_DATA_TYPES_OPERATION_PARAMETERS_H_
41 |
--------------------------------------------------------------------------------
/src/kernels/add_2d.cu:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include
23 | #include
24 |
25 | #include "src/data_types/data_structs.h"
26 |
27 | //#define IND(X, Y, Z) (((Z) * container_size.height + (Y)) * (container_size.pitch / sizeof(float)) + (X))
28 | #define IND(X, Y) ((Y) * (container_size.pitch / sizeof(float)) + (X))
29 |
30 |
31 | __constant__ DataSize3 container_size;
32 |
33 | extern "C" __global__ void add_2d(
34 | float* operand_0,
35 | const float* operand_1,
36 | size_t width,
37 | size_t height)
38 | {
39 | dim3 global_id(blockDim.x * blockIdx.x + threadIdx.x,
40 | blockDim.y * blockIdx.y + threadIdx.y);
41 |
42 | if (global_id.x < width && global_id.y < height) {
43 | operand_0[IND(global_id.x, global_id.y)] +=
44 | operand_1[IND(global_id.x, global_id.y)];
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/src/kernels/convolution_2d.cu:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | /*
23 | * Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
24 | *
25 | * Please refer to the NVIDIA end user license agreement (EULA) associated
26 | * with this source code for terms and conditions that govern your use of
27 | * this software. Any use, reproduction, disclosure, or distribution of
28 | * this software and related documentation outside the terms of the EULA
29 | * is strictly prohibited.
30 | *
31 | */
32 |
33 | #include
34 | //#include
35 | #include
36 | #include
37 | #include
38 |
39 | #define __CUDACC__
40 |
41 | #include
42 | #include
43 |
44 | #include "src/data_types/data_structs.h"
45 |
46 | using namespace std;
47 |
48 |
49 | #define MAX_KERNEL_LENGTH 51
50 |
51 | __constant__ DataSize3 container_size;
52 |
53 |
54 |
55 | ////////////////////////////////////////////////////////////////////////////////
56 | // Convolution kernel storage
57 | ////////////////////////////////////////////////////////////////////////////////
58 | __constant__ float c_Kernel[MAX_KERNEL_LENGTH];
59 |
60 | //extern "C" void setConvolutionKernel(float *h_Kernel, unsigned int kernel_length)
61 | //{
62 | // cudaMemcpyToSymbol(c_Kernel, h_Kernel, kernel_length * sizeof(float));
63 | //}
64 |
65 |
66 | ////////////////////////////////////////////////////////////////////////////////
67 | // Row convolution filter
68 | ////////////////////////////////////////////////////////////////////////////////
69 | #define ROWS_BLOCKDIM_X 16
70 | #define ROWS_BLOCKDIM_Y 4
71 | #define ROWS_RESULT_STEPS 4
72 | #define ROWS_HALO_STEPS 1
73 |
74 | extern "C" __global__ void convolutionRowsKernel(
75 | float *d_Dst,
76 | const float *d_Src,
77 | int imageW,
78 | int imageH,
79 | int pitch,
80 | int kernel_radius
81 | )
82 | {
83 |
84 | /* dim3 global_id(blockDim.x * blockIdx.x * ROWS_RESULT_STEPS + threadIdx.x,
85 | blockDim.y * blockIdx.y + threadIdx.y);
86 |
87 | if (global_id.x > imageW)
88 | return;*/
89 |
90 | // Handle to thread block group
91 | __shared__ float s_Data[ROWS_BLOCKDIM_Y][(ROWS_RESULT_STEPS + 2 * ROWS_HALO_STEPS) * ROWS_BLOCKDIM_X];
92 |
93 | //Offset to the left halo edge
94 | const int baseX = (blockIdx.x * ROWS_RESULT_STEPS - ROWS_HALO_STEPS) * ROWS_BLOCKDIM_X + threadIdx.x;
95 | const int baseY = blockIdx.y * ROWS_BLOCKDIM_Y + threadIdx.y;
96 |
97 | d_Src += baseY * pitch + baseX;
98 | d_Dst += baseY * pitch + baseX;
99 |
100 | //Load main data
101 | #pragma unroll
102 |
103 | for (int i = ROWS_HALO_STEPS; i < ROWS_HALO_STEPS + ROWS_RESULT_STEPS; i++)
104 | {
105 | // Original NVIDIA version
106 | //s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = d_Src[i * ROWS_BLOCKDIM_X];
107 |
108 | // Modified version to allow for arbitrary image width
109 | size_t global_x = blockIdx.x * ROWS_RESULT_STEPS * ROWS_BLOCKDIM_X + ROWS_BLOCKDIM_X*(i-ROWS_HALO_STEPS) + threadIdx.x;
110 | s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (global_x < imageW) ? d_Src[i * ROWS_BLOCKDIM_X] : 0;
111 | }
112 |
113 | //Load left halo
114 | #pragma unroll
115 |
116 | for (int i = 0; i < ROWS_HALO_STEPS; i++)
117 | {
118 | s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (baseX >= -i * ROWS_BLOCKDIM_X) ? d_Src[i * ROWS_BLOCKDIM_X] : 0;
119 | }
120 |
121 | //Load right halo
122 | #pragma unroll
123 |
124 | for (int i = ROWS_HALO_STEPS + ROWS_RESULT_STEPS; i < ROWS_HALO_STEPS + ROWS_RESULT_STEPS + ROWS_HALO_STEPS; i++)
125 | {
126 | s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (imageW - baseX > i * ROWS_BLOCKDIM_X) ? d_Src[i * ROWS_BLOCKDIM_X] : 0;
127 |
128 |
129 | // Testing
130 | /* size_t global_x = blockIdx.x * ROWS_RESULT_STEPS * ROWS_BLOCKDIM_X + ROWS_BLOCKDIM_X*(i-5) + threadIdx.x;
131 |
132 | s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (global_x < imageW) ? d_Src[i * ROWS_BLOCKDIM_X] : 0;
133 |
134 | if (global_x > imageW)
135 | std::printf("Bx:%u tx:%u iter:%d global.x: %lu data:%f \n", blockIdx.x, threadIdx.x, i, static_cast(global_x), s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X]);
136 | */
137 | }
138 |
139 | //Compute and store results
140 |
141 |
142 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< //
143 | // -------------------------------------------------------- //
144 | __syncthreads();
145 | // -------------------------------------------------------- //
146 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< //
147 |
148 | #pragma unroll
149 |
150 | for (int i = ROWS_HALO_STEPS; i < ROWS_HALO_STEPS + ROWS_RESULT_STEPS; i++)
151 | {
152 | size_t global_x = blockIdx.x * ROWS_RESULT_STEPS * ROWS_BLOCKDIM_X + ROWS_BLOCKDIM_X*(i-ROWS_HALO_STEPS) + threadIdx.x;
153 |
154 | if (global_x >= imageW)
155 | return;
156 |
157 | float sum = 0;
158 |
159 | #pragma unroll
160 |
161 | for (int j = -kernel_radius; j <= kernel_radius; j++)
162 | {
163 | sum += c_Kernel[kernel_radius - j] * s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X + j];
164 | }
165 |
166 | d_Dst[i * ROWS_BLOCKDIM_X] = sum;
167 | }
168 | }
169 |
170 |
171 |
172 |
173 | ////////////////////////////////////////////////////////////////////////////////
174 | // Column convolution filter
175 | ////////////////////////////////////////////////////////////////////////////////
176 | #define COLUMNS_BLOCKDIM_X 4
177 | #define COLUMNS_BLOCKDIM_Y 16
178 | #define COLUMNS_RESULT_STEPS 4
179 | #define COLUMNS_HALO_STEPS 1
180 |
181 | extern "C" __global__ void convolutionColumnsKernel(
182 | float *d_Dst,
183 | const float *d_Src,
184 | int imageW,
185 | int imageH,
186 | int pitch,
187 | int kernel_radius
188 | )
189 | {
190 | __shared__ float s_Data[COLUMNS_BLOCKDIM_X][(COLUMNS_RESULT_STEPS + 2 * COLUMNS_HALO_STEPS) * COLUMNS_BLOCKDIM_Y + 0];
191 |
192 | //Offset to the upper halo edge
193 | const int baseX = blockIdx.x * COLUMNS_BLOCKDIM_X + threadIdx.x;
194 | const int baseY = (blockIdx.y * COLUMNS_RESULT_STEPS - COLUMNS_HALO_STEPS) * COLUMNS_BLOCKDIM_Y + threadIdx.y;
195 |
196 | d_Src += baseY * pitch + baseX;
197 | d_Dst += baseY * pitch + baseX;
198 |
199 | //Main data
200 | #pragma unroll
201 |
202 | for (int i = COLUMNS_HALO_STEPS; i < COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS; i++)
203 | {
204 | // Original NVIDIA version
205 | //s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y] = d_Src[i * COLUMNS_BLOCKDIM_Y * pitch];
206 |
207 | // Modified version to allow for arbitrary image height
208 | size_t global_y = blockIdx.y * COLUMNS_RESULT_STEPS * COLUMNS_BLOCKDIM_Y + COLUMNS_BLOCKDIM_Y*(i-COLUMNS_HALO_STEPS) + threadIdx.y;
209 | s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y]= (global_y < imageH) ? d_Src[i * COLUMNS_BLOCKDIM_Y * pitch] : 0;
210 |
211 | }
212 |
213 | //Upper halo
214 | #pragma unroll
215 |
216 | for (int i = 0; i < COLUMNS_HALO_STEPS; i++)
217 | {
218 | s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y] = (baseY >= -i * COLUMNS_BLOCKDIM_Y) ? d_Src[i * COLUMNS_BLOCKDIM_Y * pitch] : 0;
219 | }
220 |
221 | //Lower halo
222 | #pragma unroll
223 |
224 | for (int i = COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS; i < COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS + COLUMNS_HALO_STEPS; i++)
225 | {
226 | s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y]= (imageH - baseY > i * COLUMNS_BLOCKDIM_Y) ? d_Src[i * COLUMNS_BLOCKDIM_Y * pitch] : 0;
227 |
228 |
229 | }
230 |
231 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< //
232 | // -------------------------------------------------------- //
233 | __syncthreads();
234 | // -------------------------------------------------------- //
235 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< //
236 |
237 | #pragma unroll
238 |
239 | for (int i = COLUMNS_HALO_STEPS; i < COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS; i++)
240 | {
241 |
242 | size_t global_y = blockIdx.y * COLUMNS_RESULT_STEPS * COLUMNS_BLOCKDIM_Y + COLUMNS_BLOCKDIM_Y*(i-COLUMNS_HALO_STEPS) + threadIdx.y;
243 |
244 | //std::printf("By:%u ty:%u iter:%d global.y: %lu\n", blockIdx.y, threadIdx.y, i, static_cast(global_y));
245 |
246 | if (global_y >= imageH)
247 | return;
248 |
249 | float sum = 0;
250 | #pragma unroll
251 |
252 | for (int j = -kernel_radius; j <= kernel_radius; j++)
253 | {
254 | sum += c_Kernel[kernel_radius - j] * s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y + j];
255 | }
256 |
257 | d_Dst[i * COLUMNS_BLOCKDIM_Y * pitch] = sum;
258 |
259 | //std::printf("out: %d \n", i * COLUMNS_BLOCKDIM_Y * pitch);
260 | }
261 | }
262 |
263 |
264 |
--------------------------------------------------------------------------------
/src/kernels/median_2d.cu:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include
23 |
24 | #define __CUDACC__
25 |
26 | #include
27 | #include
28 |
29 | #include "src/data_types/data_structs.h"
30 |
31 |
32 | #define IND(X, Y) ((Y) * (container_size.pitch / sizeof(float)) + (X))
33 | #define SIND(X, Y) ((((Y) + radius_2)) * shared_block_size.x + ((X) + radius_2))
34 |
35 | __constant__ DataSize3 container_size;
36 |
37 |
38 | extern __shared__ float shared[];
39 |
40 | __device__ void bubbleSort(float* buffer, size_t length)
41 | {
42 | for (int i = 0; i < length - 1; i++) {
43 | for (int k = 0; k < length - i - 1; k++) {
44 | if (buffer[k] > buffer[k + 1]) {
45 | float a = buffer[k];
46 | buffer[k] = buffer[k + 1];
47 | buffer[k + 1] = a;
48 | }
49 | }
50 | }
51 | }
52 |
53 | __device__ void insertionSort(float* window, size_t size)
54 | {
55 | int i, j;
56 | float temp;
57 | for (i = 0; i < size; i++) {
58 | temp = window[i];
59 | for (j = i-1; j >= 0 && temp < window[j]; j--) {
60 | window[j+1] = window[j];
61 | }
62 | window[j+1] = temp;
63 | }
64 | }
65 |
66 | // \test Experimental. Shows worse performance. TODO: Clarify
67 | __device__ void partialSelection(float* window, size_t size)
68 | {
69 | for (unsigned int j=0; j<(size*size+1)/2; ++j)
70 | {
71 | // Find position of minimum element
72 | int min_index=j;
73 | for (unsigned int l=j+1; l= 0 ? offset : -offset;
117 | shared[SIND(-radius_2 + threadIdx.x, threadIdx.y)] = input[IND(global_x_l, global_y)];
118 | }
119 |
120 | /* Right slice */
121 | if (threadIdx.x > blockDim.x - 1 - radius_2) {
122 | int index = blockDim.x - threadIdx.x;
123 | int offset = blockDim.x *(blockIdx.x + 1) + radius_2 - index;
124 | size_t global_x_r = offset < width ? offset : 2 * width - offset - 2;
125 | shared[SIND(radius_2 + threadIdx.x, threadIdx.y)] = input[IND(global_x_r, global_y)];
126 | }
127 |
128 | /* Upper slice */
129 | if (threadIdx.y < radius_2) {
130 | int offset = blockDim.y * blockIdx.y - radius_2 + threadIdx.y;
131 | size_t global_y_u = offset >= 0 ? offset : -offset;
132 | shared[SIND(threadIdx.x, -radius_2 + threadIdx.y)] = input[IND(global_x, global_y_u)];
133 | }
134 |
135 | /* Bottom slice */
136 | if (threadIdx.y > blockDim.y - 1 - radius_2) {
137 | int index = blockDim.y - threadIdx.y;
138 | int offset = blockDim.y *(blockIdx.y + 1) + radius_2 - index;
139 | size_t global_y_b = offset < height ? offset : 2 * height - offset - 2;
140 | shared[SIND(threadIdx.x, radius_2 + threadIdx.y)] = input[IND(global_x, global_y_b)];
141 | }
142 |
143 |
144 | /* 12 edges */
145 | {
146 | //int global_x_e;
147 | //int global_y_e;
148 | //int global_z_e;
149 |
150 | ///* 4 along X */
151 | //if (threadIdx.y < radius_2 && threadIdx.z < radius_2) {
152 | // /* Front upper */
153 | // global_y_e = blockDim.y * blockIdx.y - radius_2 + threadIdx.y;
154 | // global_y_e = global_y_e > 0 ? global_y_e : -global_y_e;
155 | // shared[SIND(threadIdx.x, threadIdx.y - radius_2)] =
156 | // input[IND(global_x, global_y_e)];
157 |
158 | // /* Rear upper */
159 | // global_z_e = blockDim.z *(blockIdx.z + 1) + threadIdx.z;
160 | // global_z_e = global_z_e < depth ? global_z_e : 2 * depth - global_z_e - 2;
161 | // shared[SIND(threadIdx.x, threadIdx.y - radius_2, blockDim.z + threadIdx.z)] =
162 | // input[IND(global_x, global_y_e, global_z_e)];
163 |
164 | // /* Rear bottom */
165 | // global_y_e = blockDim.y *(blockIdx.y + 1) + threadIdx.y;
166 | // global_y_e = global_y_e < height ? global_y_e : 2 * height - global_y_e - 2;
167 | // shared[SIND(threadIdx.x, blockDim.y + threadIdx.y, blockDim.z + threadIdx.z)] =
168 | // input[IND(global_x, global_y_e, global_z_e)];
169 |
170 | // /* Front bottom */
171 | // global_z_e = blockDim.z * blockIdx.z - radius_2 + threadIdx.z;
172 | // global_z_e = global_z_e > 0 ? global_z_e : -global_z_e;
173 | // shared[SIND(threadIdx.x, blockDim.y + threadIdx.y, threadIdx.z - radius_2)] =
174 | // input[IND(global_x, global_y_e, global_z_e)];
175 | //}
176 |
177 | ///* 4 along Y */
178 | //if (threadIdx.x < radius_2 && threadIdx.z < radius_2) {
179 | // /* Front left */
180 | // global_x_e = blockDim.x * blockIdx.x - radius_2 + threadIdx.x;
181 | // global_x_e = global_x_e > 0 ? global_x_e : -global_x_e;
182 | // global_z_e = blockDim.z * blockIdx.z - radius_2 + threadIdx.z;
183 | // global_z_e = global_z_e > 0 ? global_z_e : -global_z_e;
184 | // shared[SIND(threadIdx.x - radius_2, threadIdx.y, threadIdx.z - radius_2)] =
185 | // input[IND(global_x_e, global_y, global_z_e)];
186 |
187 | // /* Rear left */
188 | // global_z_e = blockDim.z *(blockIdx.z + 1) + threadIdx.z;
189 | // global_z_e = global_z_e < depth ? global_z_e : 2 * depth - global_z_e - 2;
190 | // shared[SIND(threadIdx.x - radius_2, threadIdx.y, blockDim.z + threadIdx.z)] =
191 | // input[IND(global_x_e, global_y, global_z_e)];
192 |
193 | // /* Rear right */
194 | // global_x_e = blockDim.x *(blockIdx.x + 1) + threadIdx.x;
195 | // global_x_e = global_x_e < width ? global_x_e : 2 * width - global_x_e - 2;
196 | // shared[SIND(blockDim.x + threadIdx.x, threadIdx.y, blockDim.z + threadIdx.z)] =
197 | // input[IND(global_x_e, global_y, global_z_e)];
198 |
199 | // /* Front right */
200 | // global_z_e = blockDim.z * blockIdx.z - radius_2 + threadIdx.z;
201 | // global_z_e = global_z_e > 0 ? global_z_e : -global_z_e;
202 | // shared[SIND(blockDim.x + threadIdx.x, threadIdx.y, threadIdx.z - radius_2)] =
203 | // input[IND(global_x_e, global_y, global_z_e)];
204 | //}
205 |
206 | ///* 4 along Z */
207 | //if (threadIdx.x < radius_2 && threadIdx.y < radius_2) {
208 | // /* Upper left */
209 | // global_x_e = blockDim.x * blockIdx.x - radius_2 + threadIdx.x;
210 | // global_x_e = global_x_e > 0 ? global_x_e : -global_x_e;
211 | // global_y_e = blockDim.y * blockIdx.y - radius_2 + threadIdx.y;
212 | // global_y_e = global_y_e > 0 ? global_y_e : -global_y_e;
213 | // shared[SIND(threadIdx.x - radius_2, threadIdx.y - radius_2, threadIdx.z)] =
214 | // input[IND(global_x_e, global_y_e, global_z)];
215 |
216 | // /* Upper riight */
217 | // global_x_e = blockDim.x *(blockIdx.x + 1) + threadIdx.x;
218 | // global_x_e = global_x_e < width ? global_x_e : 2 * width - global_x_e - 2;
219 | // shared[SIND(blockDim.x + threadIdx.x, threadIdx.y - radius_2, threadIdx.z)] =
220 | // input[IND(global_x_e, global_y_e, global_z)];
221 |
222 | // /* Bottom right */
223 | // global_y_e = blockDim.y *(blockIdx.y + 1) + threadIdx.y;
224 | // global_y_e = global_y_e < height ? global_y_e : 2 * height - global_y_e - 2;
225 | // shared[SIND(blockDim.x + threadIdx.x, blockDim.y + threadIdx.y, threadIdx.z)] =
226 | // input[IND(global_x_e, global_y_e, global_z)];
227 |
228 | // /* Bottom left */
229 | // global_x_e = blockDim.x * blockIdx.x - radius_2 + threadIdx.x;
230 | // global_x_e = global_x_e > 0 ? global_x_e : -global_x_e;
231 | // shared[SIND(threadIdx.x - radius_2, blockDim.y + threadIdx.y, threadIdx.z)] =
232 | // input[IND(global_x_e, global_y_e, global_z)];
233 | //}
234 | }
235 |
236 | /* 4 corners */
237 | {
238 | int global_x_c;
239 | int global_y_c;
240 |
241 | if (threadIdx.x < radius_2 && threadIdx.y < radius_2) {
242 |
243 | global_x_c = blockDim.x * blockIdx.x - radius_2 + threadIdx.x;
244 | global_x_c = global_x_c > 0 ? global_x_c : -global_x_c;
245 |
246 | global_y_c = blockDim.y * blockIdx.y - radius_2 + threadIdx.y;
247 | global_y_c = global_y_c > 0 ? global_y_c : -global_y_c;
248 |
249 | /* Front upper left */
250 | shared[SIND(threadIdx.x - radius_2,threadIdx.y - radius_2)] =
251 | input[IND(global_x_c, global_y_c)];
252 |
253 | /* Front upper right */
254 | global_x_c = blockDim.x *(blockIdx.x + 1) + threadIdx.x;
255 | global_x_c = global_x_c < width ? global_x_c : 2 * width - global_x_c - 2;
256 | shared[SIND(blockDim.x + threadIdx.x, threadIdx.y - radius_2)] =
257 | input[IND(global_x_c, global_y_c)];
258 |
259 | /* Front bottom right */
260 | global_y_c = blockDim.y *(blockIdx.y + 1) + threadIdx.y;
261 | global_y_c = global_y_c < height ? global_y_c : 2 * height - global_y_c - 2;
262 | shared[SIND(blockDim.x + threadIdx.x, blockDim.y + threadIdx.y)] =
263 | input[IND(global_x_c, global_y_c)];
264 |
265 | /* Front bottom left */
266 | global_x_c = blockDim.x * blockIdx.x - radius_2 + threadIdx.x;
267 | global_x_c = global_x_c > 0 ? global_x_c : -global_x_c;
268 | shared[SIND(threadIdx.x - radius_2, blockDim.y + threadIdx.y)] =
269 | input[IND(global_x_c, global_y_c)];
270 |
271 | }
272 | }
273 |
274 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< //
275 | // -------------------------------------------------------- //
276 | __syncthreads();
277 | // -------------------------------------------------------- //
278 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< //
279 |
280 | if (global_id.x < width && global_id.y < height) {
281 | float buffer[49]; /* Max supported radius is 7, we have to store 7*7 values. */
282 | for (size_t iy = 0; iy < radius; ++iy) {
283 | for (size_t ix = 0; ix < radius; ++ix) {
284 | size_t lx = threadIdx.x - ix + radius_2;
285 | size_t ly = threadIdx.y - iy + radius_2;
286 | buffer[iy * radius + ix] = shared[SIND(lx, ly)];
287 | }
288 | }
289 |
290 |
291 | size_t length = radius * radius;
292 |
293 | //bubbleSort(buffer, length);
294 | insertionSort(buffer, length);
295 | //partialSelection(buffer, radius);
296 |
297 | output[IND(global_id.x, global_id.y)] = buffer[length / 2];
298 | }
299 | }
--------------------------------------------------------------------------------
/src/kernels/registration_2d.cu:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include
23 |
24 | #define __CUDACC__
25 | #include
26 |
27 | #include "src/data_types/data_structs.h"
28 |
29 | //#define IND(X, Y, Z) (((Z) * container_size.height + (Y)) * (container_size.pitch / sizeof(float)) + (X))
30 | #define IND(X, Y) ((Y) * (container_size.pitch / sizeof(float)) + (X))
31 |
32 | __constant__ DataSize3 container_size;
33 |
34 | extern "C" __global__ void registration_2d(
35 | const float* frame_0,
36 | const float* frame_1,
37 | const float* flow_u,
38 | const float* flow_v,
39 | size_t width,
40 | size_t height,
41 | float hx,
42 | float hy,
43 | float* output)
44 | {
45 | dim3 global_id(blockDim.x * blockIdx.x + threadIdx.x,
46 | blockDim.y * blockIdx.y + threadIdx.y);
47 |
48 | if (global_id.x < width && global_id.y < height) {
49 | float x_f = global_id.x + (flow_u[IND(global_id.x, global_id.y)] * (1.f / hx));
50 | float y_f = global_id.y + (flow_v[IND(global_id.x, global_id.y)] * (1.f / hy));
51 |
52 | if ((x_f < 0.) || (x_f > width - 1) || (y_f < 0.) || (y_f > height - 1) || isnan(x_f) || isnan(y_f)) {
53 | output[IND(global_id.x, global_id.y)] = frame_0[IND(global_id.x, global_id.y)];
54 |
55 | } else {
56 | int x = (int) floorf(x_f);
57 | int y = (int) floorf(y_f);
58 | float delta_x = x_f - (float) x;
59 | float delta_y = y_f - (float) y;
60 |
61 | int x_1 = min(int(width -1), x + 1);
62 | int y_1 = min(int(height - 1), y + 1);
63 |
64 | float value =
65 | (1.f - delta_x) * (1.f - delta_y) * frame_1[IND(x , y )] +
66 | ( delta_x) * (1.f - delta_y) * frame_1[IND(x_1, y )] +
67 | (1.f - delta_x) * ( delta_y) * frame_1[IND(x , y_1)] +
68 | ( delta_x) * ( delta_y) * frame_1[IND(x_1, y_1)];
69 |
70 |
71 | output[IND(global_id.x, global_id.y)] = value;
72 | }
73 | }
74 | }
--------------------------------------------------------------------------------
/src/kernels/resample_2d.cu:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include
23 |
24 | #define __CUDACC__
25 | #include
26 |
27 | #include "src/data_types/data_structs.h"
28 |
29 | //#define IND(X, Y, Z) (((Z) * container_size.height + (Y)) * (container_size.pitch / sizeof(float)) + (X))
30 | #define IND(X, Y) ((Y) * (container_size.pitch / sizeof(float)) + (X))
31 |
32 | __constant__ DataSize3 container_size;
33 |
34 | extern "C" __global__ void resample_x(
35 | const float* input,
36 | float* output,
37 | size_t out_width,
38 | size_t out_height,
39 | size_t in_width)
40 | {
41 | dim3 globalID(blockDim.x * blockIdx.x + threadIdx.x,
42 | blockDim.y * blockIdx.y + threadIdx.y);
43 |
44 | if (globalID.x < out_width && globalID.y < out_height) {
45 | float delta = in_width / static_cast(out_width);
46 | float normalization = out_width / static_cast(in_width);
47 |
48 | float left_f = globalID.x * delta;
49 | float right_f = (globalID.x + 1) * delta;
50 |
51 | int left_i = static_cast(floor(left_f));
52 | int right_i = min(static_cast(in_width), static_cast(ceil(right_f)));
53 |
54 | float value = 0.f;
55 |
56 | for (int j = 0; j < (right_i - left_i); j++) {
57 | float frac = 1.f;
58 |
59 | /* left boundary */
60 | if (j == 0) {
61 | frac = static_cast(left_i + 1) - left_f;
62 | }
63 | /* right boundary */
64 | if (j == (right_i - left_i) - 1) {
65 | frac = right_f - static_cast(left_i + j);
66 | }
67 | /* if the left and right boundaries are in the same cell */
68 | if ((right_i - left_i) == 1) {
69 | frac = delta;
70 | }
71 | value += input[IND(left_i + j, globalID.y)] * frac;
72 | }
73 | output[IND(globalID.x, globalID.y)] = value * normalization;
74 | }
75 | }
76 |
77 | extern "C" __global__ void resample_y(
78 | const float* input,
79 | float* output,
80 | size_t out_width,
81 | size_t out_height,
82 | size_t in_height)
83 | {
84 | dim3 globalID(blockDim.x * blockIdx.x + threadIdx.x,
85 | blockDim.y * blockIdx.y + threadIdx.y);
86 |
87 | if (globalID.x < out_width && globalID.y < out_height) {
88 | float delta = in_height / static_cast(out_height);
89 | float normalization = out_height / static_cast(in_height);
90 |
91 | float left_f = globalID.y * delta;
92 | float right_f = (globalID.y + 1) * delta;
93 |
94 | int left_i = static_cast(floor(left_f));
95 | int right_i = min(static_cast(in_height), static_cast(ceil(right_f)));
96 |
97 | float value = 0.f;
98 |
99 | for (int j = 0; j < (right_i - left_i); j++) {
100 | float frac = 1.f;
101 |
102 | /* left boundary */
103 | if (j == 0) {
104 | frac = static_cast(left_i + 1) - left_f;
105 | }
106 | /* right boundary */
107 | if (j == (right_i - left_i) - 1) {
108 | frac = right_f - static_cast(left_i + j);
109 | }
110 | /* if the left and right boundaries are in the same cell */
111 | if ((right_i - left_i) == 1) {
112 | frac = delta;
113 | }
114 | value += input[IND(globalID.x, left_i + j)] * frac;
115 | }
116 | output[IND(globalID.x, globalID.y)] = value * normalization;
117 | }
118 | }
119 |
120 | //extern "C" __global__ void resample_z(
121 | // const float* input,
122 | // float* output,
123 | // size_t out_width,
124 | // size_t out_height,
125 | // size_t out_depth,
126 | // size_t in_depth)
127 | //{
128 | // dim3 globalID(blockDim.x * blockIdx.x + threadIdx.x,
129 | // blockDim.y * blockIdx.y + threadIdx.y,
130 | // blockDim.z * blockIdx.z + threadIdx.z);
131 | //
132 | // if (globalID.x < out_width && globalID.y < out_height && globalID.z < out_depth) {
133 | // float delta = in_depth / static_cast(out_depth);
134 | // float normalization = out_depth / static_cast(in_depth);
135 | //
136 | // float left_f = globalID.z * delta;
137 | // float right_f = (globalID.z + 1) * delta;
138 | //
139 | // int left_i = static_cast(floor(left_f));
140 | // int right_i = min(in_depth, static_cast(ceil(right_f)));
141 | //
142 | // float value = 0.f;
143 | //
144 | // for (int j = 0; j < (right_i - left_i); j++) {
145 | // float frac = 1.f;
146 | //
147 | // /* left boundary */
148 | // if (j == 0) {
149 | // frac = static_cast(left_i + 1) - left_f;
150 | // }
151 | // /* right boundary */
152 | // if (j == (right_i - left_i) - 1) {
153 | // frac = right_f - static_cast(left_i + j);
154 | // }
155 | // /* if the left and right boundaries are in the same cell */
156 | // if ((right_i - left_i) == 1) {
157 | // frac = delta;
158 | // }
159 | // value += input[IND(globalID.x, globalID.y, left_i + j)] * frac;
160 | // }
161 | // output[IND(globalID.x, globalID.y, globalID.z)] = value * normalization;
162 | // }
163 | //}
164 |
165 | //extern "C" __global__ void resample_x_debug(
166 | // const float* input,
167 | // float* output,
168 | // size_t width,
169 | // size_t height,
170 | // size_t depth,
171 | // size_t resample_width)
172 | //{
173 | // dim3 globalID(blockDim.x * blockIdx.x + threadIdx.x,
174 | // blockDim.y * blockIdx.y + threadIdx.y,
175 | // blockDim.z * blockIdx.z + threadIdx.z);
176 | //
177 | // if (globalID.y < height && globalID.z < depth) {
178 | // for (size_t x = 0; x < width; x += 4) {
179 | // *((float4*)(&(output[IND(x, globalID.y, globalID.z)]))) =
180 | // *((const float4*)(&(input[IND(x, globalID.y, globalID.z)])));
181 | // }
182 | // for (size_t x = 0; x < width; x++) {
183 | // output[IND(x, globalID.y, globalID.z)] = input[IND(x, globalID.y, globalID.z)];
184 | // }
185 | // }
186 | //}
--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include
23 | #include
24 | #include
25 | #include
26 |
27 | #include
28 |
29 | #include "src/data_types/data2d.h"
30 | #include "src/data_types/data_structs.h"
31 | #include "src/data_types/operation_parameters.h"
32 | #include "src/utils/cuda_utils.h"
33 | #include "src/utils/io_utils.h"
34 | #include "src/utils/settings.h"
35 |
36 | #include "src/optical_flow/optical_flow_2d.h"
37 |
38 | using namespace OpticFlow;
39 |
40 |
41 | const bool key_press = true;
42 | const bool use_visualization = false;
43 | const bool silent_mode = true;
44 |
45 |
46 | int main(int argc, char** argv)
47 | {
48 |
49 |
50 | /* Initialize CUDA */
51 | CUcontext cu_context;
52 | if (!InitCudaContextWithFirstAvailableDevice(&cu_context)) {
53 | return 1;
54 | }
55 |
56 | std::printf("//----------------------------------------------------------------------//\n");
57 | std::printf("// 2D Optical flow using NVIDIA CUDA. Version 0.5.0 //\n");
58 | std::printf("// //\n");
59 | std::printf("// Karlsruhe Institute of Technology. 2015 - 2018 //\n");
60 | std::printf("//----------------------------------------------------------------------//\n");
61 |
62 |
63 |
64 | /* Dataset variables */
65 | size_t width = 584; //584;
66 | size_t height = 388; ////388;
67 |
68 |
69 | /* Optical flow variables */
70 | size_t warp_levels_count = 50;
71 | float warp_scale_factor = 0.9f;
72 | size_t outer_iterations_count = 40;
73 | size_t inner_iterations_count = 5;
74 | float equation_alpha = 35.0f; // 3.5f;
75 | float equation_smoothness = 0.001f;
76 | float equation_data = 0.001f;
77 | size_t median_radius = 5;
78 | float gaussian_sigma = 1.5f;
79 |
80 | DataConstancy data_constancy = DataConstancy::Grey;
81 |
82 | string file_name1 = "rub1.raw";
83 | string file_name2 = "rub2.raw";
84 | string input_path = "./data/";
85 | string output_path = "./data/output/";
86 | string counter = "";
87 |
88 |
89 | /* Optical flow computation class */
90 | OpticalFlow2D optical_flow;
91 |
92 |
93 | Data2D frame_0;
94 | Data2D frame_1;
95 |
96 | /* Read settings */
97 | string settingsPath = "settings.xml";
98 |
99 | /*
100 | Usage:
101 |
102 | 1. cuda-flow2d. settings.xml in the current directory will be used for settings
103 | 2. cuda-flow2d
104 | 3. cuda-flow2d
105 | */
106 |
107 | if (argc == 6 || argc == 7 || argc == 9) {
108 |
109 | file_name1 = argv[1];
110 | file_name2 = argv[2];
111 |
112 | output_path = string(argv[6]);
113 |
114 | width = atoi(argv[3]);
115 | height = atoi(argv[4]);
116 |
117 | if (argc == 7)
118 | counter = argv[5];
119 |
120 | if (argc == 9){
121 | equation_alpha = atof(argv[7]);
122 | gaussian_sigma = atof(argv[8]);
123 | counter = "alpha"+ string(argv[7])+"_sigma"+ string(argv[8])+"_";
124 | }
125 | }
126 | else if (argc < 3) {
127 | string settingsFile = (argc==1)? settingsPath : string(argv[1]);
128 |
129 | // Create Settings class
130 | cout<<"Reading settings: "<. Otherwise settings.xml in the current directory is used"<.
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include "src/optical_flow/optical_flow_2d.h"
23 |
24 | #include
25 | #include
26 | #include
27 | #include
28 | #include
29 |
30 | //#include "../viewflow3d/src/visualization.h"
31 | #include "src/utils/common_utils.h"
32 | #include "src/utils/cuda_utils.h"
33 | #include "src/data_types/data_structs.h"
34 |
35 | using namespace std;
36 |
37 | OpticalFlow2D::OpticalFlow2D()
38 | : OpticalFlowBase2D("Optical Flow 2D Single GPU")
39 | {
40 | cuda_operations_.push_front(&cuop_add_);
41 | cuda_operations_.push_front(&cuop_convolution_);
42 | cuda_operations_.push_front(&cuop_median_);
43 | cuda_operations_.push_front(&cuop_register_);
44 | cuda_operations_.push_front(&cuop_resample_);
45 | cuda_operations_.push_front(&cuop_solve_);
46 | }
47 |
48 | bool OpticalFlow2D::Initialize(const DataSize3& data_size, DataConstancy data_constancy)
49 | {
50 | data_constancy_ = data_constancy;
51 | dev_container_size_ = data_size;
52 | dev_container_size_.pitch = 0;
53 |
54 | initialized_ = InitCudaMemory() && InitCudaOperations();
55 | return initialized_;
56 | }
57 |
58 | bool OpticalFlow2D::InitCudaOperations()
59 | {
60 | if (dev_container_size_.pitch == 0) {
61 | std::printf("Initialization failed. Device pitch is 0.\n");
62 | return false;
63 | }
64 |
65 | std::printf("Initialization of cuda operations...\n");
66 |
67 | OperationParameters op;
68 | op.PushValuePtr("container_size", &dev_container_size_);
69 | op.PushValuePtr("data_constancy", &data_constancy_);
70 |
71 | for (CudaOperationBase* cuop : cuda_operations_) {
72 | std::printf("%-18s: ", cuop->GetName());
73 | bool result = cuop->Initialize(&op);
74 | if (result) {
75 | std::printf("OK\n");
76 | } else {
77 | Destroy();
78 | return false;
79 | }
80 | }
81 | return true;
82 | }
83 |
84 | bool OpticalFlow2D::InitCudaMemory()
85 | {
86 | std::printf("Allocating memory on the device...\n");
87 | /* Check available memory on the cuda device */
88 | size_t free_memory;
89 | size_t total_memory;
90 |
91 | CheckCudaError(cuMemGetInfo(&free_memory, &total_memory));
92 |
93 | std::printf("Available\t:\t%.0fMB / %.0fMB\n",
94 | free_memory / static_cast(1024 * 1024),
95 | total_memory / static_cast(1024 * 1024));
96 |
97 | /* Estimate needed memory (approx.)*/
98 | int alignment = 512;
99 | CUdevice cu_device;
100 | CheckCudaError(cuCtxGetDevice(&cu_device));
101 | CheckCudaError(cuDeviceGetAttribute(&alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, cu_device));
102 | alignment /= sizeof(float);
103 |
104 | size_t pitch = ((dev_container_size_.width % alignment == 0) ?
105 | (dev_container_size_.width) :
106 | (dev_container_size_.width + alignment - (dev_container_size_.width % alignment))) *
107 | sizeof(float);
108 |
109 | size_t needed_memory = pitch * dev_container_size_.height * dev_containers_count_;
110 | std::printf("Needed (approx.):\t%.0fMB\n", needed_memory / static_cast(1024 * 1024));
111 |
112 | size_t allocated_memory = 0;
113 | if (needed_memory < free_memory) {
114 | for (size_t i = 0; i < dev_containers_count_; ++i) {
115 | CUdeviceptr dev_container;
116 | bool error = CheckCudaError(cuMemAllocPitch(&dev_container,
117 | &pitch,
118 | dev_container_size_.width * sizeof(float),
119 | dev_container_size_.height * 1,
120 | sizeof(float)));
121 |
122 | /* Pitch should be the same for all containers */
123 | if (error || (i != 0 && dev_container_size_.pitch != pitch)) {
124 | std::printf("Error during device memory allocation.");
125 | Destroy();
126 | return false;
127 | } else {
128 | size_t container_size;
129 | CheckCudaError(cuMemGetAddressRange(NULL, &container_size, dev_container));
130 | allocated_memory += container_size;
131 |
132 | cuda_memory_ptrs_.push(dev_container);
133 | dev_container_size_.pitch = pitch;
134 | }
135 | }
136 | std::printf("Allocated\t:\t%.0fMB\n", allocated_memory / static_cast(1024 * 1024));
137 | return true;
138 | }
139 | return false;
140 | }
141 |
142 | void OpticalFlow2D::ComputeFlow(Data2D& frame_0, Data2D& frame_1, Data2D& flow_u, Data2D& flow_v, OperationParameters& params)
143 | {
144 | if (!IsInitialized()) {
145 | return;
146 | }
147 |
148 | /* Optical flow algorithm's parameters */
149 | size_t warp_levels_count;
150 | float warp_scale_factor;
151 | size_t outer_iterations_count;
152 | size_t inner_iterations_count;
153 | float equation_alpha;
154 | float equation_smoothness;
155 | float equation_data;
156 | size_t median_radius;
157 | float gaussian_sigma;
158 |
159 | /* Lambda function for correct working of GET_PARAM_OR_RETURN */
160 | GET_PARAM_OR_RETURN(params, size_t, warp_levels_count, "warp_levels_count");
161 | GET_PARAM_OR_RETURN(params, float, warp_scale_factor, "warp_scale_factor");
162 | GET_PARAM_OR_RETURN(params, size_t, outer_iterations_count, "outer_iterations_count");
163 | GET_PARAM_OR_RETURN(params, size_t, inner_iterations_count, "inner_iterations_count");
164 | GET_PARAM_OR_RETURN(params, float, equation_alpha, "equation_alpha");
165 | GET_PARAM_OR_RETURN(params, float, equation_smoothness, "equation_smoothness");
166 | GET_PARAM_OR_RETURN(params, float, equation_data, "equation_data");
167 | GET_PARAM_OR_RETURN(params, size_t, median_radius, "median_radius");
168 | GET_PARAM_OR_RETURN(params, float, gaussian_sigma, "gaussian_sigma");
169 |
170 | std::printf("\nStarting optical flow computation...\n");
171 |
172 | /* Create CUDA event for the time measure */
173 | CUevent cu_event_start;
174 | CUevent cu_event_stop;
175 |
176 | CheckCudaError(cuEventCreate(&cu_event_start, CU_EVENT_DEFAULT));
177 | CheckCudaError(cuEventCreate(&cu_event_stop, CU_EVENT_DEFAULT));
178 |
179 | CheckCudaError(cuEventRecord(cu_event_start, NULL));
180 |
181 | /* Auxiliary variables */
182 | float hx; // spacing in x-direction (current resol.)
183 | float hy; // spacing in y-direction (current resol.)
184 | DataSize3 original_data_size = { frame_0.Width(), frame_0.Height(), 0 };
185 | DataSize3 current_data_size = { 0 };
186 | DataSize3 prev_data_size = { 0 };
187 |
188 | size_t max_warp_level = GetMaxWarpLevel(original_data_size.width, original_data_size.height, warp_scale_factor);
189 | int current_warp_level = std::min(warp_levels_count, max_warp_level) - 1;
190 |
191 | OperationParameters op;
192 |
193 | /* Copy input data to the device */
194 | CUdeviceptr dev_frame_0 = cuda_memory_ptrs_.top();
195 | cuda_memory_ptrs_.pop();
196 | CUdeviceptr dev_frame_1 = cuda_memory_ptrs_.top();
197 | cuda_memory_ptrs_.pop();
198 | CUdeviceptr dev_frame_0_res = cuda_memory_ptrs_.top();
199 | cuda_memory_ptrs_.pop();
200 | CUdeviceptr dev_frame_1_res_br = cuda_memory_ptrs_.top();
201 | cuda_memory_ptrs_.pop();
202 |
203 | CUdeviceptr dev_flow_u = cuda_memory_ptrs_.top();
204 | cuda_memory_ptrs_.pop();
205 | CUdeviceptr dev_flow_v = cuda_memory_ptrs_.top();
206 | cuda_memory_ptrs_.pop();
207 |
208 | CUdeviceptr dev_flow_du = cuda_memory_ptrs_.top();
209 | cuda_memory_ptrs_.pop();
210 | CUdeviceptr dev_flow_dv = cuda_memory_ptrs_.top();
211 | cuda_memory_ptrs_.pop();
212 |
213 |
214 | CopyData2DtoDevice(frame_0, dev_frame_0, dev_container_size_.height, dev_container_size_.pitch);
215 | CopyData2DtoDevice(frame_1, dev_frame_1, dev_container_size_.height, dev_container_size_.pitch);
216 |
217 | /* Gaussian blurring */
218 | if (gaussian_sigma > 0.0) {
219 |
220 | CUdeviceptr dev_temp = cuda_memory_ptrs_.top();
221 | cuda_memory_ptrs_.pop();
222 |
223 | op.Clear();
224 | op.PushValuePtr("dev_input", &dev_frame_0);
225 | op.PushValuePtr("dev_output", &dev_flow_u);
226 | op.PushValuePtr("dev_temp", &dev_temp);
227 | op.PushValuePtr("data_size", &original_data_size);
228 | op.PushValuePtr("gaussian_sigma", &gaussian_sigma);
229 | cuop_convolution_.Execute(op);
230 |
231 | //CheckCudaError(cuStreamSynchronize(NULL));
232 | //std::this_thread::sleep_for(std::chrono::milliseconds(500));
233 |
234 |
235 | op.Clear();
236 | op.PushValuePtr("dev_input", &dev_frame_1);
237 | op.PushValuePtr("dev_output", &dev_flow_v);
238 | op.PushValuePtr("dev_temp", &dev_temp);
239 | op.PushValuePtr("data_size", &original_data_size);
240 | op.PushValuePtr("gaussian_sigma", &gaussian_sigma);
241 | cuop_convolution_.Execute(op);
242 |
243 | std::swap(dev_frame_0, dev_flow_u);
244 | std::swap(dev_frame_1, dev_flow_v);
245 |
246 | cuda_memory_ptrs_.push(dev_temp);
247 |
248 | // Test: Save smoothed image
249 | /* CopyData2DFromDevice(dev_frame_0, frame_0, dev_container_size_.height, dev_container_size_.pitch);
250 | CopyData2DFromDevice(dev_frame_1, frame_1, dev_container_size_.height, dev_container_size_.pitch);
251 |
252 | std::string filename =
253 | "-" + std::to_string(dev_container_size_.width) +
254 | "-" + std::to_string(dev_container_size_.height) + ".raw";
255 |
256 | frame_0.WriteRAWToFileF32(std::string("./data/output/test1_smooth" + filename).c_str());
257 |
258 | frame_1.WriteRAWToFileF32(std::string("./data/output/test2_smooth" + filename).c_str());*/
259 |
260 | }
261 |
262 |
263 |
264 | /* ---------------------------------------------------- */
265 | /* Main loop */
266 | /* ---------------------------------------------------- */
267 | while (current_warp_level >= 0) {
268 | float scale = std::pow(warp_scale_factor, static_cast(current_warp_level));
269 | current_data_size.width = static_cast(std::ceil(original_data_size.width * scale));
270 | current_data_size.height = static_cast(std::ceil(original_data_size.height * scale));
271 | hx = original_data_size.width / static_cast(current_data_size.width);
272 | hy = original_data_size.height / static_cast(current_data_size.height);
273 |
274 | if (!this->silent)
275 | std::printf("Solve level %2d (%4d x%4d) \n", current_warp_level, current_data_size.width, current_data_size.height);
276 |
277 |
278 | /* Data resampling */
279 | {
280 | if (current_warp_level == 0) {
281 | std::swap(dev_frame_0, dev_frame_0_res);
282 | std::swap(dev_frame_1, dev_frame_1_res_br);
283 | } else {
284 | CUdeviceptr dev_temp = cuda_memory_ptrs_.top();
285 | cuda_memory_ptrs_.pop();
286 |
287 | op.Clear();
288 | op.PushValuePtr("dev_input", &dev_frame_0);
289 | op.PushValuePtr("dev_output", &dev_frame_0_res);
290 | op.PushValuePtr("dev_temp", &dev_temp);
291 | op.PushValuePtr("data_size", &original_data_size);
292 | op.PushValuePtr("resample_size", ¤t_data_size);
293 | cuop_resample_.Execute(op);
294 |
295 | op.Clear();
296 | op.PushValuePtr("dev_input", &dev_frame_1);
297 | op.PushValuePtr("dev_output", &dev_frame_1_res_br);
298 | op.PushValuePtr("dev_temp", &dev_temp);
299 | op.PushValuePtr("data_size", &original_data_size);
300 | op.PushValuePtr("resample_size", ¤t_data_size);
301 | cuop_resample_.Execute(op);
302 |
303 | cuda_memory_ptrs_.push(dev_temp);
304 | }
305 | }
306 |
307 | /* Flow field resampling */
308 | {
309 | if (prev_data_size.width == 0) {
310 | CheckCudaError(cuMemsetD2D8(dev_flow_u, dev_container_size_.pitch, 0, dev_container_size_.width * sizeof(float),
311 | dev_container_size_.height * 1));
312 | CheckCudaError(cuMemsetD2D8(dev_flow_v, dev_container_size_.pitch, 0, dev_container_size_.width * sizeof(float),
313 | dev_container_size_.height * 1));
314 |
315 | } else {
316 | CUdeviceptr dev_temp = cuda_memory_ptrs_.top();
317 | cuda_memory_ptrs_.pop();
318 |
319 | op.Clear();
320 | op.PushValuePtr("dev_input", &dev_flow_u);
321 | op.PushValuePtr("dev_output", &dev_flow_du);
322 | op.PushValuePtr("dev_temp", &dev_temp);
323 | op.PushValuePtr("data_size", &prev_data_size);
324 | op.PushValuePtr("resample_size", ¤t_data_size);
325 | cuop_resample_.Execute(op);
326 |
327 | op.Clear();
328 | op.PushValuePtr("dev_input", &dev_flow_v);
329 | op.PushValuePtr("dev_output", &dev_flow_dv);
330 | op.PushValuePtr("dev_temp", &dev_temp);
331 | op.PushValuePtr("data_size", &prev_data_size);
332 | op.PushValuePtr("resample_size", ¤t_data_size);
333 | cuop_resample_.Execute(op);
334 |
335 |
336 | std::swap(dev_flow_u, dev_flow_du);
337 | std::swap(dev_flow_v, dev_flow_dv);
338 |
339 | cuda_memory_ptrs_.push(dev_temp);
340 | }
341 | }
342 |
343 | /* Backward registration */
344 | {
345 | CUdeviceptr dev_temp = cuda_memory_ptrs_.top();
346 | cuda_memory_ptrs_.pop();
347 |
348 | op.Clear();
349 | op.PushValuePtr("dev_frame_0", &dev_frame_0_res);
350 | op.PushValuePtr("dev_frame_1", &dev_frame_1_res_br);
351 | op.PushValuePtr("dev_flow_u", &dev_flow_u);
352 | op.PushValuePtr("dev_flow_v", &dev_flow_v);
353 | op.PushValuePtr("dev_output", &dev_temp);
354 | op.PushValuePtr("data_size", ¤t_data_size);
355 | op.PushValuePtr("hx", &hx);
356 | op.PushValuePtr("hy", &hy);
357 |
358 | cuop_register_.Execute(op);
359 |
360 | std::swap(dev_frame_1_res_br, dev_temp);
361 |
362 | cuda_memory_ptrs_.push(dev_temp);
363 | }
364 |
365 | /* Difference problem solver */
366 | {
367 | CUdeviceptr dev_phi = cuda_memory_ptrs_.top();
368 | cuda_memory_ptrs_.pop();
369 | CUdeviceptr dev_ksi = cuda_memory_ptrs_.top();
370 | cuda_memory_ptrs_.pop();
371 | CUdeviceptr dev_temp_du = cuda_memory_ptrs_.top();
372 | cuda_memory_ptrs_.pop();
373 | CUdeviceptr dev_temp_dv = cuda_memory_ptrs_.top();
374 | cuda_memory_ptrs_.pop();
375 |
376 | op.Clear();
377 | op.PushValuePtr("dev_frame_0", &dev_frame_0_res);
378 | op.PushValuePtr("dev_frame_1", &dev_frame_1_res_br);
379 | op.PushValuePtr("dev_flow_u", &dev_flow_u);
380 | op.PushValuePtr("dev_flow_v", &dev_flow_v);
381 | op.PushValuePtr("dev_flow_du", &dev_flow_du);
382 | op.PushValuePtr("dev_flow_dv", &dev_flow_dv);
383 | op.PushValuePtr("dev_phi", &dev_phi);
384 | op.PushValuePtr("dev_ksi", &dev_ksi);
385 | op.PushValuePtr("dev_temp_du", &dev_temp_du);
386 | op.PushValuePtr("dev_temp_dv", &dev_temp_dv);
387 |
388 | op.PushValuePtr("data_constancy", &data_constancy_);
389 | op.PushValuePtr("outer_iterations_count", &outer_iterations_count);
390 | op.PushValuePtr("inner_iterations_count", &inner_iterations_count);
391 | op.PushValuePtr("equation_alpha", &equation_alpha);
392 | op.PushValuePtr("equation_smoothness", &equation_smoothness);
393 | op.PushValuePtr("equation_data", &equation_data);
394 | op.PushValuePtr("data_size", ¤t_data_size);
395 | op.PushValuePtr("hx", &hx);
396 | op.PushValuePtr("hy", &hy);
397 |
398 |
399 | cuop_solve_.silent = silent;
400 | cuop_solve_.Execute(op);
401 |
402 | cuda_memory_ptrs_.push(dev_phi);
403 | cuda_memory_ptrs_.push(dev_ksi);
404 | cuda_memory_ptrs_.push(dev_temp_du);
405 | cuda_memory_ptrs_.push(dev_temp_dv);
406 | }
407 |
408 | /* Add the solved flow increment to the global flow */
409 | {
410 | op.Clear();
411 | op.PushValuePtr("operand_0", &dev_flow_u);
412 | op.PushValuePtr("operand_1", &dev_flow_du);
413 | op.PushValuePtr("data_size", ¤t_data_size);
414 | cuop_add_.Execute(op);
415 |
416 | op.Clear();
417 | op.PushValuePtr("operand_0", &dev_flow_v);
418 | op.PushValuePtr("operand_1", &dev_flow_dv);
419 | op.PushValuePtr("data_size", ¤t_data_size);
420 | cuop_add_.Execute(op);
421 |
422 | }
423 |
424 | prev_data_size = current_data_size;
425 | --current_warp_level;
426 |
427 | /* Flow field median filtering */
428 | {
429 | CUdeviceptr dev_temp = cuda_memory_ptrs_.top();
430 | cuda_memory_ptrs_.pop();
431 |
432 | op.Clear();
433 | op.PushValuePtr("dev_input", &dev_flow_u);
434 | op.PushValuePtr("dev_output", &dev_temp);
435 | op.PushValuePtr("data_size", ¤t_data_size);
436 | op.PushValuePtr("radius", &median_radius);
437 | cuop_median_.Execute(op);
438 | std::swap(dev_flow_u, dev_temp);
439 |
440 | op.Clear();
441 | op.PushValuePtr("dev_input", &dev_flow_v);
442 | op.PushValuePtr("dev_output", &dev_temp);
443 | op.PushValuePtr("data_size", ¤t_data_size);
444 | op.PushValuePtr("radius", &median_radius);
445 | cuop_median_.Execute(op);
446 | std::swap(dev_flow_v, dev_temp);
447 |
448 | cuda_memory_ptrs_.push(dev_temp);
449 | }
450 |
451 |
452 |
453 | /* Get data for visualization */
454 | //{
455 | // Visualization& visualization = Visualization::GetInstance();
456 |
457 | // if (visualization.IsInitialized()) {
458 | // CUdeviceptr dev_temp_u = cuda_memory_ptrs_.top();
459 | // cuda_memory_ptrs_.pop();
460 | // CUdeviceptr dev_temp_v = cuda_memory_ptrs_.top();
461 | // cuda_memory_ptrs_.pop();
462 | // CUdeviceptr dev_temp_w = cuda_memory_ptrs_.top();
463 | // cuda_memory_ptrs_.pop();
464 | // CUdeviceptr dev_temp = cuda_memory_ptrs_.top();
465 | // cuda_memory_ptrs_.pop();
466 |
467 | // /* Resample the flow field to original size */
468 | // op.Clear();
469 | // op.PushValuePtr("dev_input", &dev_flow_u);
470 | // op.PushValuePtr("dev_output", &dev_temp_u);
471 | // op.PushValuePtr("dev_temp", &dev_temp);
472 | // op.PushValuePtr("data_size", ¤t_data_size);
473 | // op.PushValuePtr("resample_size", &original_data_size);
474 | // cuop_resample_.Execute(op);
475 |
476 | // op.Clear();
477 | // op.PushValuePtr("dev_input", &dev_flow_v);
478 | // op.PushValuePtr("dev_output", &dev_temp_v);
479 | // op.PushValuePtr("dev_temp", &dev_temp);
480 | // op.PushValuePtr("data_size", ¤t_data_size);
481 | // op.PushValuePtr("resample_size", &original_data_size);
482 | // cuop_resample_.Execute(op);
483 |
484 |
485 | // /* Read output data back from the device */
486 | // CopyData2DFromDevice(dev_temp_u, flow_u, dev_container_size_.height, dev_container_size_.pitch);
487 | // CopyData2DFromDevice(dev_temp_v, flow_v, dev_container_size_.height, dev_container_size_.pitch);
488 |
489 |
490 | // /* Update 3D texture */
491 | // visualization.Load3DFlowTextureFromData3DUVW(flow_u, flow_v, NULL);
492 |
493 | // cuda_memory_ptrs_.push(dev_temp);
494 | // cuda_memory_ptrs_.push(dev_temp_u);
495 | // cuda_memory_ptrs_.push(dev_temp_v);
496 | // cuda_memory_ptrs_.push(dev_temp_w);
497 |
498 | // visualization.DoNextStep();
499 | // }
500 | //}
501 |
502 | }
503 | /* ---------------------------------------------------- */
504 | /* Main loop END */
505 | /* ---------------------------------------------------- */
506 |
507 | /* DEBUG Apply computed flow to the input data */
508 | if (false) {
509 | /* CopyData2DtoDevice(frame_0, dev_frame_0, dev_container_size_.height, dev_container_size_.pitch);
510 | CopyData2DtoDevice(frame_1, dev_frame_1, dev_container_size_.height, dev_container_size_.pitch);
511 |
512 | CheckCudaError(cuStreamSynchronize(NULL));
513 |
514 | CUdeviceptr dev_temp = cuda_memory_ptrs_.top();
515 | cuda_memory_ptrs_.pop();
516 |
517 | hx = 1.f;
518 | hy = 1.f;
519 |
520 | op.Clear();
521 | op.PushValuePtr("dev_frame_0", &dev_frame_0);
522 | op.PushValuePtr("dev_frame_1", &dev_frame_1);
523 | op.PushValuePtr("dev_flow_u", &dev_flow_u);
524 | op.PushValuePtr("dev_flow_v", &dev_flow_v);
525 | op.PushValuePtr("dev_output", &dev_temp);
526 | op.PushValuePtr("data_size", &original_data_size);
527 | op.PushValuePtr("hx", &hx);
528 | op.PushValuePtr("hy", &hy);
529 |
530 |
531 | cuop_register_.Execute(op);
532 | CheckCudaError(cuStreamSynchronize(NULL));
533 | CopyData2DFromDevice(dev_temp, flow_u, dev_container_size_.height, dev_container_size_.pitch);
534 | CheckCudaError(cuStreamSynchronize(NULL));
535 |
536 | flow_u.WriteRAWToFileU8("./data/output/registrated-180-180-151.raw");
537 | flow_u.WriteRAWToFileF32("./data/output/registrated-180-180-151-f.raw");
538 |
539 |
540 | cuda_memory_ptrs_.push(dev_temp);*/
541 | }
542 |
543 | /* Read output data back from the device */
544 | CopyData2DFromDevice(dev_flow_u, flow_u, dev_container_size_.height, dev_container_size_.pitch);
545 | CopyData2DFromDevice(dev_flow_v, flow_v, dev_container_size_.height, dev_container_size_.pitch);
546 |
547 | /* Estimate GPU computation time */
548 | CheckCudaError(cuEventRecord(cu_event_stop, NULL));
549 | CheckCudaError(cuEventSynchronize(cu_event_stop));
550 |
551 | float elapsed_time;
552 | CheckCudaError(cuEventElapsedTime(&elapsed_time, cu_event_start, cu_event_stop));
553 |
554 | std::printf("Total GPU computation time: % 4.4fs\n", elapsed_time / 1000.);
555 |
556 | CheckCudaError(cuEventDestroy(cu_event_start));
557 | CheckCudaError(cuEventDestroy(cu_event_stop));
558 |
559 | /* Return all memory pointers to stack */
560 | cuda_memory_ptrs_.push(dev_frame_0);
561 | cuda_memory_ptrs_.push(dev_frame_1);
562 | cuda_memory_ptrs_.push(dev_frame_0_res);
563 | cuda_memory_ptrs_.push(dev_frame_1_res_br);
564 | cuda_memory_ptrs_.push(dev_flow_u);
565 | cuda_memory_ptrs_.push(dev_flow_v);
566 | cuda_memory_ptrs_.push(dev_flow_du);
567 | cuda_memory_ptrs_.push(dev_flow_dv);
568 |
569 | }
570 |
571 | void OpticalFlow2D::Destroy()
572 | {
573 | for (CudaOperationBase* cuop : cuda_operations_) {
574 | cuop->Destroy();
575 | }
576 |
577 | size_t freed_containers_count = 0;
578 | while (!cuda_memory_ptrs_.empty()) {
579 | CUdeviceptr dev_container = cuda_memory_ptrs_.top();
580 | CheckCudaError(cuMemFree(dev_container));
581 |
582 | ++freed_containers_count;
583 | cuda_memory_ptrs_.pop();
584 | }
585 | if (freed_containers_count && freed_containers_count != dev_containers_count_) {
586 | std::printf("Warning. Not all device memory allocations were freed.\n");
587 | }
588 |
589 | initialized_ = false;
590 | }
591 |
592 | OpticalFlow2D::~OpticalFlow2D()
593 | {
594 | Destroy();
595 | }
--------------------------------------------------------------------------------
/src/optical_flow/optical_flow_2d.h:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #ifndef GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_2D_H_
23 | #define GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_2D_H_
24 |
25 | #include
26 | #include
27 |
28 | #include
29 |
30 | #include "src/cuda_operations/cuda_operation_base.h"
31 | #include "src/cuda_operations/2d/cuda_operation_add_2d.h"
32 | #include "src/cuda_operations/2d/cuda_operation_median_2d.h"
33 | #include "src/cuda_operations/2d/cuda_operation_convolution_2d.h"
34 | #include "src/cuda_operations/2d/cuda_operation_registration_2d.h"
35 | #include "src/cuda_operations/2d/cuda_operation_resample_2d.h"
36 | #include "src/cuda_operations/2d/cuda_operation_solve_2d.h"
37 |
38 | #include "src/data_types/operation_parameters.h"
39 | #include "src/data_types/data2d.h"
40 |
41 | #include "src/optical_flow/optical_flow_base_2d.h"
42 |
43 | class OpticalFlow2D : public OpticalFlowBase2D{
44 | private:
45 | const size_t dev_containers_count_ = 12;
46 | DataSize3 dev_container_size_;
47 |
48 | std::forward_list cuda_operations_;
49 | std::stack cuda_memory_ptrs_;
50 |
51 | CudaOperationAdd2D cuop_add_;
52 | CudaOperationConvolution2D cuop_convolution_;
53 | CudaOperationMedian2D cuop_median_;
54 | CudaOperationRegistration2D cuop_register_;
55 | CudaOperationResample2D cuop_resample_;
56 | CudaOperationSolve2D cuop_solve_;
57 |
58 | bool InitCudaOperations();
59 | bool InitCudaMemory();
60 |
61 | public:
62 | OpticalFlow2D();
63 |
64 | bool Initialize(const DataSize3& data_size, DataConstancy data_constancy = DataConstancy::Grey) override;
65 | void ComputeFlow(Data2D& frame_0, Data2D& frame_1, Data2D& flow_u, Data2D& flow_v, OperationParameters& params) override;
66 | void Destroy() override;
67 |
68 | bool silent = false;
69 |
70 | ~OpticalFlow2D() override;
71 | };
72 |
73 |
74 | #endif // !GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_2D_H_
75 |
--------------------------------------------------------------------------------
/src/optical_flow/optical_flow_base_2d.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include "src/optical_flow/optical_flow_base_2d.h"
23 |
24 | #include
25 |
26 | OpticalFlowBase2D::OpticalFlowBase2D(const char* name)
27 | : name_(name)
28 | {
29 | }
30 |
31 | const char* OpticalFlowBase2D::GetName() const
32 | {
33 | return name_;
34 | }
35 |
36 | size_t OpticalFlowBase2D::GetMaxWarpLevel(size_t width, size_t height, float scale_factor) const
37 | {
38 | /* Compute maximum number of warping levels for given image size and warping reduction factor */
39 | size_t r_width = 1;
40 | size_t r_height = 1;
41 | size_t level_counter = 1;
42 |
43 | while (scale_factor < 1.f) {
44 | float scale = std::pow(scale_factor, static_cast(level_counter));
45 | r_width = static_cast(std::ceil(width * scale));
46 | r_height = static_cast(std::ceil(height * scale));
47 |
48 | if (r_width < 4 || r_height < 4 ) {
49 | break;
50 | }
51 | ++level_counter;
52 | }
53 |
54 | if (r_width == 1 || r_height == 1) {
55 | --level_counter;
56 | }
57 |
58 | return level_counter;
59 | }
60 |
61 | bool OpticalFlowBase2D::IsInitialized() const
62 | {
63 | if (!initialized_) {
64 | std::printf("Error: '%s' was not initialized.\n", name_);
65 | }
66 | return initialized_;
67 | }
68 |
69 | void OpticalFlowBase2D::ComputeFlow(Data2D& frame_0, Data2D& frame_1, Data2D& flow_u, Data2D& flow_v, OperationParameters& params)
70 | {
71 | std::printf("Warning: '%s' ComputeFlow() was not defined.\n", name_);
72 |
73 | }
74 |
75 | void OpticalFlowBase2D::Destroy()
76 | {
77 | initialized_ = false;
78 | }
79 |
80 | OpticalFlowBase2D::~OpticalFlowBase2D()
81 | {
82 | if (initialized_) {
83 | Destroy();
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/src/optical_flow/optical_flow_base_2d.h:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #ifndef GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_BASE_2D_H_
23 | #define GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_BASE_2D_H_
24 |
25 | #include "src/data_types/data2d.h"
26 | #include "src/data_types/data_structs.h"
27 | #include "src/data_types/operation_parameters.h"
28 |
29 | class OpticalFlowBase2D {
30 | private:
31 | const char* name_ = nullptr;
32 |
33 | protected:
34 | bool initialized_ = false;
35 |
36 | DataConstancy data_constancy_;
37 |
38 | OpticalFlowBase2D(const char* name);
39 |
40 | size_t GetMaxWarpLevel(size_t width, size_t height, float scale_factor) const;
41 |
42 | bool IsInitialized() const;
43 |
44 | public:
45 | const char* GetName() const;
46 |
47 | virtual bool Initialize(const DataSize3& data_size, DataConstancy data_constancy = DataConstancy::Grey) = 0;
48 | virtual void ComputeFlow(Data2D& frame_0, Data2D& frame_1, Data2D& flow_u, Data2D& flow_v, OperationParameters& params);
49 | virtual void Destroy();
50 |
51 | virtual ~OpticalFlowBase2D();
52 | };
53 |
54 |
55 |
56 | #endif // !GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_BASE_2D_H_
57 |
--------------------------------------------------------------------------------
/src/utils/common_utils.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include "common_utils.h"
23 |
24 | #include
25 |
26 | #ifdef _WIN32
27 | #include
28 | #endif
29 | #ifdef __gnu_linux__
30 | #include
31 | #endif
32 |
33 | namespace Utils {
34 |
35 | void GetExecutablePath(char* path, size_t size)
36 | {
37 | #ifdef _WIN32
38 | GetModuleFileName(NULL, path, size);
39 | char* last_char = std::strrchr(path, '\\');
40 | if (last_char) {
41 | *last_char = '\0';
42 | }
43 | #endif
44 | #ifdef __gnu_linux__
45 | ssize_t link_size = readlink("/proc/self/exe", path, size);
46 | path[link_size] = '\0';
47 | char* last_char = std::strrchr(path, '/');
48 | if (last_char) {
49 | *last_char = '\0';
50 | }
51 | #endif
52 | }
53 |
54 | void PrintProgressBar(float complete)
55 | {
56 | const size_t width = 40;
57 | char buffer[80];
58 | char* buffer_ptr = buffer;
59 |
60 | *(buffer_ptr++) = '[';
61 | for (size_t i = 0; i < width; ++i) {
62 | *(buffer_ptr++) = (i / static_cast(width) < complete) ? '=' : ' ';
63 | }
64 | *(buffer_ptr++) = ']';
65 | *(buffer_ptr++) = '\0';
66 |
67 | printf("\r%s", buffer);
68 | }
69 |
70 | } // namespace Utils
--------------------------------------------------------------------------------
/src/utils/common_utils.h:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #ifndef GPUFLOW3D_UTILS_COMMON_UTILS_H_
23 | #define GPUFLOW3D_UTILS_COMMON_UTILS_H_
24 |
25 | #include
26 |
27 | namespace Utils {
28 |
29 | void GetExecutablePath(char* path, size_t size);
30 | void PrintProgressBar(float complete);
31 |
32 | } // namespace Utils
33 |
34 | #define GET_PARAM_OR_RETURN(P, T, V, N) \
35 | do { \
36 | void* v_ptr = (P).GetValuePtr((N)); \
37 | if (v_ptr) { \
38 | (V) = *(static_cast(v_ptr)); \
39 | } else { \
40 | std::printf("Operation: '%s'. Missing parameter '%s'.\n", GetName(), (N)); \
41 | return; \
42 | } \
43 | } while (0)
44 |
45 | #define GET_PARAM_OR_RETURN_VALUE(P, T, V, N, R) \
46 | do { \
47 | void* v_ptr = (P).GetValuePtr((N)); \
48 | if (v_ptr) { \
49 | (V) = *(static_cast(v_ptr)); \
50 | } else { \
51 | std::printf("Operation: '%s'. Missing parameter '%s'.\n", GetName(), (N)); \
52 | return (R); \
53 | } \
54 | } while (0)
55 |
56 | #define GET_PARAM_PTR_OR_RETURN(P, T, PTR, N) \
57 | do { \
58 | void* v_ptr = (P).GetValuePtr((N)); \
59 | if (v_ptr) { \
60 | (PTR) = (static_cast(v_ptr)); \
61 | } else { \
62 | std::printf("Operation: '%s'. Missing parameter '%s'.\n", GetName(), (N)); \
63 | return; \
64 | } \
65 | } while (0)
66 |
67 | #endif // !GPUFLOW3D_UTILS_COMMON_UTILS_H_
68 |
--------------------------------------------------------------------------------
/src/utils/cuda_utils.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include "cuda_utils.h"
23 |
24 | #include
25 |
26 | bool InitCudaContextWithFirstAvailableDevice(CUcontext* cu_context)
27 | {
28 | if (CheckCudaError(cuInit(0))) {
29 | return false;
30 | }
31 |
32 | int cu_device_count;
33 | if (CheckCudaError(cuDeviceGetCount(&cu_device_count))) {
34 | return false;
35 | }
36 | CUdevice cu_device;
37 |
38 | if (cu_device_count == 0) {
39 | printf("There are no cuda capable devices.");
40 | return false;
41 | }
42 |
43 | if (CheckCudaError(cuDeviceGet(&cu_device, 0))) {
44 | return false;
45 | }
46 |
47 | char cu_device_name[64];
48 | if (CheckCudaError(cuDeviceGetName(cu_device_name, 64, cu_device))) {
49 | return false;
50 | }
51 |
52 | int launch_timeout;
53 | CheckCudaError(cuDeviceGetAttribute(&launch_timeout, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, cu_device));
54 |
55 | printf("CUDA Device: %s. Launch timeout: %s\n", cu_device_name, (launch_timeout ? "Yes" : "No"));
56 |
57 | if (CheckCudaError(cuCtxCreate(cu_context, 0, cu_device))) {
58 | return false;
59 | }
60 |
61 | return true;
62 | }
63 |
64 |
65 |
66 | void CopyData2DtoDevice(Data2D& data2d, CUdeviceptr device_ptr, size_t device_height, size_t device_pitch)
67 | {
68 | CUDA_MEMCPY2D cu_copy2d;
69 | std::memset(&cu_copy2d, 0, sizeof(CUDA_MEMCPY2D));
70 |
71 | cu_copy2d.srcMemoryType = CU_MEMORYTYPE_HOST;
72 | cu_copy2d.srcHost = data2d.DataPtr();
73 | cu_copy2d.srcPitch = data2d.Width() * sizeof(float);
74 |
75 | cu_copy2d.dstMemoryType = CU_MEMORYTYPE_DEVICE;
76 | cu_copy2d.dstDevice = device_ptr;
77 | cu_copy2d.dstPitch = device_pitch;
78 |
79 | cu_copy2d.WidthInBytes = data2d.Width() * sizeof(float);
80 | cu_copy2d.Height = data2d.Height();
81 |
82 | CheckCudaError(cuMemcpy2D(&cu_copy2d));
83 | }
84 |
85 |
86 | void CopyData2DFromDevice(CUdeviceptr device_ptr, Data2D& data2d, size_t device_height, size_t device_pitch)
87 | {
88 | CUDA_MEMCPY2D cu_copy2d;
89 | std::memset(&cu_copy2d, 0, sizeof(CUDA_MEMCPY2D));
90 |
91 | cu_copy2d.srcMemoryType = CU_MEMORYTYPE_DEVICE;
92 | cu_copy2d.srcDevice = device_ptr;
93 | cu_copy2d.srcPitch = device_pitch;
94 |
95 |
96 | cu_copy2d.dstMemoryType = CU_MEMORYTYPE_HOST;
97 | cu_copy2d.dstHost = data2d.DataPtr();
98 | cu_copy2d.dstPitch = data2d.Width() * sizeof(float);
99 |
100 |
101 | cu_copy2d.WidthInBytes = data2d.Width() * sizeof(float);
102 | cu_copy2d.Height = data2d.Height();
103 |
104 | CheckCudaError(cuMemcpy2D(&cu_copy2d));
105 | }
106 |
--------------------------------------------------------------------------------
/src/utils/cuda_utils.h:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #ifndef GPUFLOW3D_UTILS_CUDA_UTILS_H_
23 | #define GPUFLOW3D_UTILS_CUDA_UTILS_H_
24 |
25 | #include
26 |
27 | #include
28 |
29 | #include "src/data_types/data2d.h"
30 |
31 | #define CheckCudaError(error) __CheckCudaError (error, __FILE__, __LINE__)
32 |
33 | inline bool __CheckCudaError(CUresult error, const char* file, const int line)
34 | {
35 | if (error != CUDA_SUCCESS)
36 | {
37 | const char *error_name = nullptr;
38 | const char *error_string = nullptr;
39 |
40 | #define NEW_CUDA_DRIVER
41 | #ifdef NEW_CUDA_DRIVER
42 | cuGetErrorName(error, &error_name);
43 | cuGetErrorString(error, &error_string);
44 | #endif
45 |
46 | std::fprintf(stderr, "Driver API error = %04d %s\n%s\n from file <%s>, line %i.\n",
47 | error, error_name, error_string, file, line);
48 | return true;
49 | }
50 | return false;
51 | }
52 |
53 | bool InitCudaContextWithFirstAvailableDevice(CUcontext* cu_context);
54 |
55 | void CopyData2DtoDevice(Data2D& data2d, CUdeviceptr device_ptr, size_t device_height, size_t device_pitch);
56 | void CopyData2DFromDevice(CUdeviceptr device_ptr, Data2D& data2d, size_t device_height, size_t device_pitch);
57 |
58 |
59 |
60 | #endif // !GPUFLOW3D_UTILS_CUDA_UTILS_H_
61 |
--------------------------------------------------------------------------------
/src/utils/io_utils.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include "io_utils.h"
23 |
24 | #include
25 | #include
26 | #include
27 |
28 | #include "src/data_types/data2d.h"
29 |
30 | using namespace IOUtils;
31 |
32 |
33 |
34 |
35 | void IOUtils::WriteFlowToImageRGB(Data2D& u, Data2D& v, float flowMaxScale, string fileName)
36 | {
37 |
38 | float maxLength = flowMaxScale;
39 |
40 | // Compute scaling factor
41 | float factor = 1.0 / maxLength;
42 |
43 | RGBColor rgb;
44 |
45 | fstream ofile(fileName.c_str(), ios::out | ios::binary);
46 |
47 | if (!ofile.is_open())
48 | {
49 | std::cerr << "Error: cannot save file " << std::endl;
50 | exit(255);
51 | }
52 |
53 | int nx = u.Width();
54 | int ny = u.Height();
55 |
56 | GRAY res;
57 |
58 | ofile<<"P6 \n";
59 | ofile<r = 0;
119 | this->g = 0;
120 | this->b = 0;
121 | }
122 |
123 | RGBColor::RGBColor(int r, int g, int b)
124 | {
125 | this->r = r;
126 | this->g = g;
127 | this->b = b;
128 | }
129 |
130 |
131 | /*****************************************************************************/
132 | /* */
133 | /* Copyright 08/2006 by Dr. Andres Bruhn */
134 | /* Faculty of Mathematics and Computer Science, Saarland University, */
135 | /* Saarbruecken, Germany. */
136 | /* */
137 | /* Modified by Alexey Ershov */
138 | /* */
139 | /*****************************************************************************/
140 | RGBColor IOUtils::ConvertToRGB(float x, float y)
141 | {
142 | /********************************************************/
143 | float Pi; /* pi */
144 | float amp; /* amplitude (magnitude) */
145 | float phi; /* phase (angle) */
146 | float alpha, beta; /* weights for linear interpolation */
147 | /********************************************************/
148 |
149 | RGBColor rgb;
150 |
151 | /* if (isUnknownFlow(x, y)) {
152 | x = 0.0;
153 | y = 0.0;
154 | }*/
155 |
156 | /* set pi */
157 | Pi = 2.0 * acos(0.0);
158 |
159 | /* determine amplitude and phase (cut amp at 1) */
160 | amp = sqrt(x * x + y * y);
161 | if (amp > 1) amp = 1;
162 | if (x == 0.0)
163 | if (y >= 0.0) phi = 0.5 * Pi;
164 | else phi = 1.5 * Pi;
165 | else if (x > 0.0)
166 | if (y >= 0.0) phi = atan(y/x);
167 | else phi = 2.0 * Pi + atan(y/x);
168 | else phi = Pi + atan(y/x);
169 |
170 | phi = phi / 2.0;
171 |
172 | // interpolation between red (0) and blue (0.25 * Pi)
173 | if ((phi >= 0.0) && (phi < 0.125 * Pi)) {
174 | beta = phi / (0.125 * Pi);
175 | alpha = 1.0 - beta;
176 | rgb.r = (int)floor(amp * (alpha * 255.0 + beta * 255.0));
177 | rgb.g = (int)floor(amp * (alpha * 0.0 + beta * 0.0));
178 | rgb.b = (int)floor(amp * (alpha * 0.0 + beta * 255.0));
179 | }
180 | if ((phi >= 0.125 * Pi) && (phi < 0.25 * Pi)) {
181 | beta = (phi-0.125 * Pi) / (0.125 * Pi);
182 | alpha = 1.0 - beta;
183 | rgb.r = (int)floor(amp * (alpha * 255.0 + beta * 64.0));
184 | rgb.g = (int)floor(amp * (alpha * 0.0 + beta * 64.0));
185 | rgb.b = (int)floor(amp * (alpha * 255.0 + beta * 255.0));
186 | }
187 | // interpolation between blue (0.25 * Pi) and green (0.5 * Pi)
188 | if ((phi >= 0.25 * Pi) && (phi < 0.375 * Pi)) {
189 | beta = (phi - 0.25 * Pi) / (0.125 * Pi);
190 | alpha = 1.0 - beta;
191 | rgb.r = (int)floor(amp * (alpha * 64.0 + beta * 0.0));
192 | rgb.g = (int)floor(amp * (alpha * 64.0 + beta * 255.0));
193 | rgb.b = (int)floor(amp * (alpha * 255.0 + beta * 255.0));
194 | }
195 | if ((phi >= 0.375 * Pi) && (phi < 0.5 * Pi)) {
196 | beta = (phi - 0.375 * Pi) / (0.125 * Pi);
197 | alpha = 1.0 - beta;
198 | rgb.r = (int)floor(amp * (alpha * 0.0 + beta * 0.0));
199 | rgb.g = (int)floor(amp * (alpha * 255.0 + beta * 255.0));
200 | rgb.b = (int)floor(amp * (alpha * 255.0 + beta * 0.0));
201 | }
202 | // interpolation between green (0.5 * Pi) and yellow (0.75 * Pi)
203 | if ((phi >= 0.5 * Pi) && (phi < 0.75 * Pi)) {
204 | beta = (phi - 0.5 * Pi) / (0.25 * Pi);
205 | alpha = 1.0 - beta;
206 | rgb.r = (int)floor(amp * (alpha * 0.0 + beta * 255.0));
207 | rgb.g = (int)floor(amp * (alpha * 255.0 + beta * 255.0));
208 | rgb.b = (int)floor(amp * (alpha * 0.0 + beta * 0.0));
209 | }
210 | // interpolation between yellow (0.75 * Pi) and red (Pi)
211 | if ((phi >= 0.75 * Pi) && (phi <= Pi)) {
212 | beta = (phi - 0.75 * Pi) / (0.25 * Pi);
213 | alpha = 1.0 - beta;
214 | rgb.r = (int)floor(amp * (alpha * 255.0 + beta * 255.0));
215 | rgb.g = (int)floor(amp * (alpha * 255.0 + beta * 0.0));
216 | rgb.b = (int)floor(amp * (alpha * 0.0 + beta * 0.0));
217 | }
218 |
219 | /* check RGBColor range */
220 | rgb.r = ConvertToByte(rgb.r);
221 | rgb.g = ConvertToByte(rgb.g);
222 | rgb.b = ConvertToByte(rgb.b);
223 |
224 | return rgb;
225 | }
226 |
227 |
--------------------------------------------------------------------------------
/src/utils/io_utils.h:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #ifndef GPUFLOW3D_UTILS_IO_UTILS_H_
23 | #define GPUFLOW3D_UTILS_IO_UTILS_H_
24 |
25 | #include "src/data_types/data2d.h"
26 |
27 | #include
28 | #include
29 |
30 |
31 | using namespace std;
32 |
33 | namespace IOUtils {
34 |
35 | typedef unsigned char GRAY;
36 |
37 | struct RGBColor
38 | {
39 | int r;
40 | int g;
41 | int b;
42 |
43 | RGBColor();
44 |
45 | RGBColor(int r, int g, int b) ;
46 | };
47 |
48 |
49 | //------------------------------------------------------------------
50 | // 2D Writing Routines
51 | //------------------------------------------------------------------
52 | void WriteFlowToImageRGB(Data2D& u, Data2D& v, float flowMaxScale, string fileName);
53 | void WriteMagnitudeToFileF32(Data2D& u, Data2D& v, string fileName);
54 |
55 | // Data convertion routines
56 | RGBColor ConvertToRGB(float x, float y);
57 |
58 | inline int ConvertToByte(int num)
59 | {
60 | return (num >=255) * 255 + ((num < 255) && (num > 0))* num;
61 | }
62 |
63 | inline GRAY ConvertToGray(float number)
64 | {
65 | GRAY result;
66 |
67 | if (number < 0.0)
68 | result = (GRAY)(0.0);
69 | else if (number > 255.0)
70 | result = (GRAY)(255.0);
71 | else
72 | result = (GRAY)(number);
73 |
74 | return result;
75 |
76 | }
77 |
78 | }
79 |
80 | #endif // !GPUFLOW3D_UTILS_IO_UTILS_H_
81 |
82 |
--------------------------------------------------------------------------------
/src/utils/settings.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #include "settings.h"
23 |
24 | #include
25 | #include
26 | #include
27 |
28 | using namespace OpticFlow;
29 |
30 |
31 |
32 |
33 | void Settings::LoadSettingsManually()
34 | {
35 |
36 | this->sigma = 1.2;
37 |
38 | // Solver settings
39 | this->iterInner = 5;
40 | this->iterOuter = 20;
41 |
42 | this->alpha = 40;
43 |
44 | this->levels = 1;
45 | this->warpScale = 0.5;
46 |
47 |
48 | this->e_smooth = 0.1;
49 | this->e_data = 0.001;
50 |
51 | }
52 |
53 | int Settings::LoadSettings(string fileName)
54 | {
55 |
56 | string m_name;
57 |
58 | TiXmlDocument doc(fileName.c_str());
59 |
60 | if (!doc.LoadFile()) {
61 | cout<<"Cannot read settings file: "<Value();
85 |
86 | // save this for later
87 | hRoot=TiXmlHandle(pElem);
88 |
89 | int value;
90 | float dbValue;
91 |
92 |
93 | pElem=hRoot.FirstChild( "Input" ).FirstChild("Path").Element();
94 | this->inputPath = pElem->Attribute("inputPath");
95 | pElem=hRoot.FirstChild( "Output").FirstChild("Path").Element();
96 | this->outputPath = pElem->Attribute("outputPath");
97 |
98 | this->fileName1 = hRoot.FirstChild( "Input" ).FirstChild("Mode").FirstChild("Files").Element()->Attribute("file1");
99 | this->fileName2 = hRoot.FirstChild( "Input" ).FirstChild("Mode").FirstChild("Files").Element()->Attribute("file2");
100 |
101 | hRoot.FirstChild( "Parameters" ).FirstChild("Method").Element()->QueryIntAttribute("key", &value);
102 | this->press_key = value;
103 |
104 | hRoot.FirstChild( "Input" ).FirstChild("Mode").Element()->QueryIntAttribute("Nx", &value);
105 | this->width = value;
106 |
107 | hRoot.FirstChild( "Input" ).FirstChild("Mode").Element()->QueryIntAttribute("Ny", &value);
108 | this->height = value;
109 |
110 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Model").Element()->QueryFloatAttribute("sigma", &dbValue);
111 | this->sigma = dbValue;
112 |
113 |
114 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Iterations").Element()->QueryIntAttribute("inner", &value);
115 | this->iterInner = value;
116 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Iterations").Element()->QueryIntAttribute("outer", &value);
117 | this->iterOuter = value;
118 |
119 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Warping").Element()->QueryIntAttribute("levels", &value);
120 | this->levels= value;
121 |
122 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Warping").Element()->QueryFloatAttribute("scaling", &dbValue);
123 | this->warpScale= dbValue;
124 |
125 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Warping").Element()->QueryIntAttribute("medianRadius", &value);
126 | this->medianRadius = value;
127 |
128 |
129 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Model").Element()->QueryFloatAttribute("alpha", &dbValue);
130 | this->alpha= dbValue;
131 |
132 |
133 |
134 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Model").Element()->QueryFloatAttribute("e_smooth", &dbValue);
135 | this->e_smooth= dbValue;
136 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Model").Element()->QueryFloatAttribute("e_data", &dbValue);
137 | this->e_data= dbValue;
138 |
139 |
140 |
141 | return 0;
142 |
143 |
144 | }
145 |
146 |
147 |
148 |
149 |
--------------------------------------------------------------------------------
/src/utils/settings.h:
--------------------------------------------------------------------------------
1 | /**
2 | * @file 2D Optical flow using NVIDIA CUDA
3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
4 | *
5 | * @date 2015-2018
6 | * @version 0.5.0
7 | *
8 | *
9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). .
18 | * See src/utils/tinyxml.h for details.
19 | *
20 | */
21 |
22 | #ifndef GPUFLOW3D_OPTICAL_FLOW_SETTINGS_H_
23 | #define GPUFLOW3D_OPTICAL_FLOW_SETTINGS_H_
24 |
25 | #include
26 | #include
27 | #include