├── Makefile ├── README.md ├── data ├── rub1.raw └── rub2.raw ├── examples ├── insect.png └── optical_flow_example.png ├── settings.xml └── src ├── cuda_operations ├── 2d │ ├── cuda_operation_add_2d.cpp │ ├── cuda_operation_add_2d.h │ ├── cuda_operation_convolution_2d.cpp │ ├── cuda_operation_convolution_2d.h │ ├── cuda_operation_median_2d.cpp │ ├── cuda_operation_median_2d.h │ ├── cuda_operation_registration_2d.cpp │ ├── cuda_operation_registration_2d.h │ ├── cuda_operation_resample_2d.cpp │ ├── cuda_operation_resample_2d.h │ ├── cuda_operation_solve_2d.cpp │ └── cuda_operation_solve_2d.h ├── cuda_operation_base.cpp └── cuda_operation_base.h ├── data_types ├── data2d.cpp ├── data2d.h ├── data3d.cpp ├── data3d.h ├── data_structs.h ├── operation_parameters.cpp └── operation_parameters.h ├── kernels ├── add_2d.cu ├── convolution_2d.cu ├── median_2d.cu ├── registration_2d.cu ├── resample_2d.cu └── solve_2d.cu ├── main.cpp ├── optical_flow ├── optical_flow_2d.cpp ├── optical_flow_2d.h ├── optical_flow_base_2d.cpp └── optical_flow_base_2d.h └── utils ├── common_utils.cpp ├── common_utils.h ├── cuda_utils.cpp ├── cuda_utils.h ├── io_utils.cpp ├── io_utils.h ├── settings.cpp ├── settings.h ├── tinystr.cpp ├── tinystr.h ├── tinyxml.cpp ├── tinyxml.h ├── tinyxmlerror.cpp └── tinyxmlparser.cpp /Makefile: -------------------------------------------------------------------------------- 1 | BASEDIR :=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) 2 | INC-DIR := "-I$(BASEDIR)" 3 | 4 | CC := g++ 5 | CFLAGS := -c -std=c++11 -O3 -funroll-all-loops -Wno-deprecated 6 | CUDA-FLAGS = -ptx -std=c++11 7 | 8 | 9 | MACHINE := rtx 10 | 11 | 12 | ifeq ($(MACHINE),rtx) 13 | # anka-rtx 14 | TARGET := cuda-flow2d-rtx 15 | CUDA-TOP = /home/ws/fe0968/local/cuda-10.0 16 | CUDA = $(CUDA-TOP)/bin/nvcc 17 | CUDA-INC-DIR = -I$(CUDA-TOP)/include 18 | CUDA-LIB-DIR = -L$(CUDA-TOP)/lib64 -lcudart -lcuda 19 | else 20 | # ankaimage-concert 21 | TARGET := cuda-flow2d-concert 22 | CUDA = /usr/bin/nvcc 23 | CUDA-INC-DIR = -I/usr/include 24 | CUDA-LIB-DIR = -L/usr/lib/x86_64-linux-gnu -lcudart -lcuda 25 | endif 26 | 27 | 28 | 29 | 30 | 31 | 32 | # $(wildcard *.cpp /xxx/xxx/*.cpp): get all .cpp files from the current directory and dir "/xxx/xxx/" 33 | SRCS := $(wildcard $(BASEDIR)/src/*.cpp \ 34 | $(BASEDIR)/src/*/*/*.cpp \ 35 | $(BASEDIR)/src/*/*.cpp) 36 | # $(patsubst %.cpp,%.o,$(SRCS)): substitute all ".cpp" file name strings to ".o" file name strings 37 | OBJS := $(patsubst %.cpp,%.o,$(SRCS)) 38 | 39 | CUDAOBJECTS = $(CUDASOURCES:.cu=.ptx) 40 | 41 | #OBJS := $(SRCS:.cpp=.o) 42 | 43 | all: $(TARGET) 44 | 45 | $(TARGET): $(OBJS) cuda 46 | $(CC) $(OBJS) $(CUDA-LIB-DIR) -o $@ 47 | 48 | %.o: %.cpp 49 | $(CC) $(CFLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $< -o $@ 50 | 51 | 52 | 53 | 54 | cuda: 55 | $(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/add_2d.cu -o $(BASEDIR)/kernels/add_2d.ptx 56 | $(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/median_2d.cu -o $(BASEDIR)/kernels/median_2d.ptx 57 | $(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/convolution_2d.cu -o $(BASEDIR)/kernels/convolution_2d.ptx 58 | $(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/registration_2d.cu -o $(BASEDIR)/kernels/registration_2d.ptx 59 | $(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/resample_2d.cu -o $(BASEDIR)/kernels/resample_2d.ptx 60 | $(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/solve_2d.cu -o $(BASEDIR)/kernels/solve_2d.ptx 61 | $(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/correlation_2d.cu -o $(BASEDIR)/kernels/correlation_2d.ptx 62 | 63 | 64 | clean: 65 | rm -rf $(TARGET) $(OBJS) $(CUDAOBJECTS) 66 | 67 | .PHONY: all clean 68 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GPU-based 2D Optical flow using NVIDIA CUDA 2 | 3 | Optical flow using *variational methods* which determine the unknown displacement field as a minimal solution 4 | of the energy functional. 5 | 6 | In general, such energy-based formulations are composed of two 7 | parts: a *data term* which assumes constancy of specific image features, and a *smoothness term* which regularizes the spatial variation of the flow field. 8 | 9 | ## Features 10 | 11 | * **Computational model guarantees**: 12 | * a unique solution (global optimal solution) 13 | * stability of the algorithm (no critical dependence on parameters) 14 | * **High quality**: 15 | * dense flow results (one displacement for each pixel) 16 | * allows large displacements 17 | * different types of motion (translation, rotation, local elastic transformation) 18 | * sub-pixel accuracy 19 | * **Robustness**: 20 | * under noise 21 | * under varying illumination (brightness changes) 22 | * with respect to artifacts 23 | 24 | ## Examples 25 | 26 | ![alt text](https://github.com/axruff/cuda-flow2d/raw/master/examples/optical_flow_example.png "Examples") 27 | 28 | **Figure**: **Left**: Head of the feeding cockroach *Periplaneta americana* imaged by fast X-ray radiography and computed flow field, which captures the movements of the insect during chewing process. **Right**: Flow dynamics of liquid droplets in a fuel spray. Color coding: color represents direction and its brightness represents flow magnitude. 29 | 30 | ## Model 31 | 32 | * Brightness constancy data term [1] 33 | * Gradient constancy data term [2] 34 | * Data term based on higher-order derivatives [3] 35 | * Robust modeling of data term [4] 36 | * Flow-driven smoothness [3] 37 | * Coarse-to-fine flow estimation [2] 38 | * Intermediate flow median filtering [5] 39 | 40 | 41 | ## References 42 | 43 | * [1] B. Horn and B. Schunck. *Determining optical flow*. Artificial Intelligence, 17:185{203, 1981. 44 | * [2] T. Brox, A. Bruhn, N. Papenberg, and J. Weickert. *High accuracy optic flow estimation based on a theory for warping*. In T. Pajdla and J. Matas, editors, Computer Vision, ECCV 2004, volume 3024 of Lecture Notes in Computer Science, pages 25-36. Springer, Berlin, 2004. 45 | * [3] N. Papenberg, A. Bruhn, T. Brox, S. Didas, and J. Weickert. *Highly accurate optic ow computation with theoretically justified warping*. Int. J. Comput. Vision, 67(2):141-158, April 2006. 46 | * [4] M. J. Black and P. Anandan. *The robust estimation of multiple motions: Parametric and piecewise-smooth flow fields*. Computer Vision and Image Understanding, 63(1):75- 104, 1996. 47 | * [5] D. Sun, S. Roth, and M. J. Black. *A quantitative analysis of current practices in optical flow estimation and the principles behind them*. International Journal of Computer Vision, 106(2):115-137, 2014. 48 | -------------------------------------------------------------------------------- /data/rub1.raw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/axruff/cuda-flow2d/827c3470351972aca7584a83aa9b85778f522be4/data/rub1.raw -------------------------------------------------------------------------------- /data/rub2.raw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/axruff/cuda-flow2d/827c3470351972aca7584a83aa9b85778f522be4/data/rub2.raw -------------------------------------------------------------------------------- /examples/insect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/axruff/cuda-flow2d/827c3470351972aca7584a83aa9b85778f522be4/examples/insect.png -------------------------------------------------------------------------------- /examples/optical_flow_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/axruff/cuda-flow2d/827c3470351972aca7584a83aa9b85778f522be4/examples/optical_flow_example.png -------------------------------------------------------------------------------- /settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /src/cuda_operations/2d/cuda_operation_add_2d.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include "cuda_operation_add_2d.h" 23 | 24 | #include 25 | 26 | #include "src/data_types/data_structs.h" 27 | 28 | #include 29 | 30 | #include "src/utils/common_utils.h" 31 | #include "src/utils/cuda_utils.h" 32 | 33 | CudaOperationAdd2D::CudaOperationAdd2D() 34 | : CudaOperationBase("CUDA Add 2D") 35 | { 36 | } 37 | 38 | bool CudaOperationAdd2D::Initialize(const OperationParameters* params) 39 | { 40 | initialized_ = false; 41 | 42 | if (!params) { 43 | std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName()); 44 | return initialized_; 45 | } 46 | 47 | DataSize3 container_size; 48 | GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_); 49 | 50 | char exec_path[256]; 51 | Utils::GetExecutablePath(exec_path, 256); 52 | std::strcat(exec_path, "/kernels/add_2d.ptx"); 53 | 54 | if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) { 55 | if (!CheckCudaError(cuModuleGetFunction(&cuf_add_, cu_module_, "add_2d"))) { 56 | size_t const_size; 57 | 58 | /* Get the pointer to the constant memory and copy data */ 59 | if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) { 60 | if (const_size == sizeof(container_size)) { 61 | if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) { 62 | initialized_ = true; 63 | } 64 | } 65 | } 66 | 67 | } else { 68 | CheckCudaError(cuModuleUnload(cu_module_)); 69 | cu_module_ = nullptr; 70 | } 71 | } 72 | return initialized_; 73 | } 74 | 75 | void CudaOperationAdd2D::Execute(OperationParameters& params) 76 | { 77 | if (!IsInitialized()) { 78 | return; 79 | } 80 | 81 | CUdeviceptr operand_0 = 0; 82 | CUdeviceptr operand_1 = 0; 83 | DataSize3 data_size; 84 | 85 | GET_PARAM_OR_RETURN(params, CUdeviceptr, operand_0, "operand_0"); 86 | GET_PARAM_OR_RETURN(params, CUdeviceptr, operand_1, "operand_1"); 87 | GET_PARAM_OR_RETURN(params, DataSize3, data_size, "data_size"); 88 | 89 | dim3 block_dim = { 16, 8}; 90 | dim3 grid_dim = { static_cast((data_size.width + block_dim.x - 1) / block_dim.x), 91 | static_cast((data_size.height + block_dim.y - 1) / block_dim.y) }; 92 | 93 | void* args[4] = { 94 | &operand_0, 95 | &operand_1, 96 | &data_size.width, 97 | &data_size.height}; 98 | 99 | CheckCudaError(cuLaunchKernel(cuf_add_, 100 | grid_dim.x, grid_dim.y, grid_dim.z, 101 | block_dim.x, block_dim.y, block_dim.z, 102 | 0, 103 | NULL, 104 | args, 105 | NULL)); 106 | } -------------------------------------------------------------------------------- /src/cuda_operations/2d/cuda_operation_add_2d.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_ADD_H_ 23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_ADD_H_ 24 | 25 | #include 26 | 27 | #include "src/cuda_operations/cuda_operation_base.h" 28 | #include "src/data_types/operation_parameters.h" 29 | 30 | class CudaOperationAdd2D : public CudaOperationBase { 31 | private: 32 | CUfunction cuf_add_; 33 | 34 | public: 35 | CudaOperationAdd2D(); 36 | 37 | bool Initialize(const OperationParameters* params = nullptr) override; 38 | void Execute(OperationParameters& params) override; 39 | }; 40 | 41 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_ADD_H_ 42 | -------------------------------------------------------------------------------- /src/cuda_operations/2d/cuda_operation_convolution_2d.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include "cuda_operation_convolution_2d.h" 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | #include "src/data_types/data_structs.h" 33 | #include "src/utils/common_utils.h" 34 | #include "src/utils/cuda_utils.h" 35 | 36 | using namespace std; 37 | 38 | CudaOperationConvolution2D::CudaOperationConvolution2D() 39 | : CudaOperationBase("CUDA Convolution 2D") 40 | { 41 | } 42 | 43 | bool CudaOperationConvolution2D::Initialize(const OperationParameters* params) 44 | { 45 | initialized_ = false; 46 | 47 | if (!params) { 48 | std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName()); 49 | return initialized_; 50 | } 51 | 52 | DataSize3 container_size; 53 | GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_); 54 | 55 | dev_container_size_ = container_size; 56 | 57 | char exec_path[256]; 58 | Utils::GetExecutablePath(exec_path, 256); 59 | std::strcat(exec_path, "/kernels/convolution_2d.ptx"); 60 | 61 | if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) { 62 | if (!CheckCudaError(cuModuleGetFunction(&cuf_convolution_rows_, cu_module_, "convolutionRowsKernel")) && 63 | !CheckCudaError(cuModuleGetFunction(&cuf_convolution_cols_, cu_module_, "convolutionColumnsKernel"))) { 64 | size_t const_size; 65 | 66 | /* Get the pointer to the constant memory and copy data */ 67 | if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) { 68 | if (const_size == sizeof(container_size)) { 69 | if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) { 70 | initialized_ = true; 71 | } 72 | } 73 | } 74 | 75 | } else { 76 | CheckCudaError(cuModuleUnload(cu_module_)); 77 | cu_module_ = nullptr; 78 | } 79 | } 80 | return initialized_; 81 | } 82 | 83 | void CudaOperationConvolution2D::ComputeGaussianKernel(float sigma, size_t precision, float pixel_size) 84 | { 85 | kernel_radius_ = (size_t)(precision * sigma / pixel_size); 86 | 87 | kernel_ = new float[2*kernel_radius_ + 1]; 88 | kernel_length_ = 2*kernel_radius_ + 1; 89 | 90 | int r = static_cast(kernel_radius_); 91 | 92 | for (int i = -r; i <= r; i++) { 93 | float val = 1.0 / (sigma * std::sqrt(2.0 * 3.1415926)) * std::exp(-(i * i * pixel_size * pixel_size) / (2.0 * sigma * sigma)); 94 | kernel_[i+r] = val; 95 | } 96 | 97 | // Normalize convolution mask vector 98 | float sum = 0.0; 99 | 100 | for (unsigned int i = 0; i < kernel_length_; i++) { 101 | sum = sum + kernel_[i]; 102 | } 103 | 104 | for (unsigned int i = 0; i < kernel_length_; i++) { 105 | kernel_[i] = kernel_[i] / sum; 106 | } 107 | 108 | /* for (unsigned int i = 0; i < kernel_length_; i++) { 109 | kernel_[i] = 1.0; 110 | }*/ 111 | 112 | } 113 | 114 | void CudaOperationConvolution2D::PrintConvolutionKernel() 115 | { 116 | if (kernel_ == nullptr) 117 | std::printf("Error: Convolution kernel is not initialized.\n"); 118 | else { 119 | std::printf("Convolution kernel (radius = %d)\n", kernel_radius_); 120 | 121 | for (unsigned int i = 0; i < kernel_length_; i++) { 122 | std::printf("%.4f ", kernel_[i]); 123 | } 124 | std::printf("\n\n"); 125 | 126 | } 127 | 128 | } 129 | 130 | 131 | 132 | void CudaOperationConvolution2D::Execute(OperationParameters& params) 133 | { 134 | if (!IsInitialized()) { 135 | return; 136 | } 137 | 138 | CUdeviceptr dev_input = 0; 139 | CUdeviceptr dev_output = 0; 140 | CUdeviceptr dev_temp = 0; 141 | DataSize3 data_size; 142 | float gaussian_sigma; 143 | 144 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_input, "dev_input"); 145 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_output, "dev_output"); 146 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_temp, "dev_temp"); 147 | GET_PARAM_OR_RETURN(params, DataSize3, data_size, "data_size"); 148 | GET_PARAM_OR_RETURN(params, float, gaussian_sigma, "gaussian_sigma"); 149 | 150 | 151 | if (dev_input == dev_output) { 152 | std::printf("Operation '%s': Error. Input buffer cannot serve as output buffer.", GetName()); 153 | return; 154 | } 155 | 156 | // TODO: Make computation of kernel once for all images 157 | ComputeGaussianKernel(gaussian_sigma, 3, 1.0); 158 | //PrintConvolutionKernel(); 159 | 160 | size_t const_size; 161 | 162 | /* Get the pointer to the constant memory and copy data */ 163 | CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "c_Kernel")); 164 | CheckCudaError(cuMemcpyHtoD(dev_constants_, kernel_, kernel_length_*sizeof(float))); 165 | 166 | int pitch = static_cast(dev_container_size_.pitch / sizeof(float)); 167 | 168 | /* 1. Process in horizontal direction (rows) */ 169 | ResampleRows(dev_input, dev_temp, data_size, pitch); 170 | 171 | 172 | /* 2. Process in vertical direction (columns) */ 173 | ResampleColumns(dev_temp, dev_output, data_size, pitch); 174 | 175 | 176 | } 177 | 178 | void CudaOperationConvolution2D::ResampleRows(CUdeviceptr input, CUdeviceptr output, DataSize3& data_size, size_t pitch) const 179 | { 180 | int kernel_radius = static_cast(kernel_radius_); 181 | 182 | unsigned int rows_results_steps = 4; 183 | unsigned int rows_halo_steps = 1; 184 | 185 | dim3 row_block_dim ={ 16, 4 }; 186 | dim3 row_grid_dim ={ static_cast((data_size.width + (row_block_dim.x * rows_results_steps) - 1) / (row_block_dim.x * rows_results_steps)), 187 | static_cast((data_size.height + row_block_dim.y - 1) / row_block_dim.y) }; 188 | 189 | /* Calculate the needed shared memory size */ 190 | int shared_memory_size; 191 | CUdevice cu_device; 192 | CheckCudaError(cuDeviceGet(&cu_device, 0)); 193 | CheckCudaError(cuDeviceGetAttribute(&shared_memory_size, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, cu_device)); 194 | 195 | int needed_shared_memory_size = 196 | ((rows_results_steps + 2*rows_halo_steps)*row_block_dim.x) * (row_block_dim.y) * sizeof(float); 197 | 198 | //std::printf("Shared memory: %d Needed: %d \n", shared_memory_size, needed_shared_memory_size); 199 | 200 | if (needed_shared_memory_size > shared_memory_size) { 201 | std::printf("<%s>: Error shared memory allocation. Reduce the thread block size.\n", GetName()); 202 | std::printf("Shared memory: %d Needed: %d \n", shared_memory_size, needed_shared_memory_size); 203 | return; 204 | } 205 | 206 | /*std::printf("Starting Convolution kernel: Grid: %d x %d, Blocks %d x %d \n", row_grid_dim.x, 207 | row_grid_dim.y, 208 | row_block_dim.x, 209 | row_block_dim.y);*/ 210 | 211 | 212 | void* args[6] ={ 213 | &output, 214 | &input, 215 | &data_size.width, 216 | &data_size.height, 217 | &pitch, 218 | &kernel_radius }; 219 | 220 | CheckCudaError(cuLaunchKernel(cuf_convolution_rows_, 221 | row_grid_dim.x, row_grid_dim.y, row_grid_dim.z, 222 | row_block_dim.x, row_block_dim.y, row_block_dim.z, 223 | needed_shared_memory_size, 224 | NULL, 225 | args, 226 | NULL)); 227 | } 228 | 229 | void CudaOperationConvolution2D::ResampleColumns(CUdeviceptr input, CUdeviceptr output, DataSize3& data_size, size_t pitch) const 230 | { 231 | int kernel_radius = static_cast(kernel_radius_); 232 | 233 | unsigned int cols_results_steps = 4; 234 | unsigned int cols_halo_steps = 1; 235 | 236 | dim3 col_block_dim ={ 4, 16 }; 237 | dim3 col_grid_dim ={ static_cast((data_size.width + col_block_dim.x - 1) / col_block_dim.x), 238 | static_cast((data_size.height + (col_block_dim.y * cols_results_steps) - 1) / (col_block_dim.y * cols_results_steps)) }; 239 | 240 | int shared_memory_size; 241 | CUdevice cu_device; 242 | CheckCudaError(cuDeviceGet(&cu_device, 0)); 243 | CheckCudaError(cuDeviceGetAttribute(&shared_memory_size, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, cu_device)); 244 | 245 | int needed_shared_memory_size = 246 | (col_block_dim.x * ((cols_results_steps + 2*cols_halo_steps)*col_block_dim.y + 0)) * sizeof(float); 247 | 248 | //std::printf("Shared memory: %d Needed: %d \n", shared_memory_size, needed_shared_memory_size); 249 | 250 | if (needed_shared_memory_size > shared_memory_size) { 251 | std::printf("<%s>: Error shared memory allocation. Reduce the thread block size.\n", GetName()); 252 | std::printf("Shared memory: %d Needed: %d \n", shared_memory_size, needed_shared_memory_size); 253 | return; 254 | } 255 | 256 | /* std::printf("Starting Convolution kernel: Grid: %d x %d, Blocks %d x %d \n", col_grid_dim.x, 257 | col_grid_dim.y, 258 | col_block_dim.x, 259 | col_block_dim.y); 260 | */ 261 | 262 | 263 | void* args[6] ={ 264 | &output, 265 | &input, 266 | &data_size.width, 267 | &data_size.height, 268 | &pitch, 269 | &kernel_radius }; 270 | 271 | CheckCudaError(cuLaunchKernel(cuf_convolution_cols_, 272 | col_grid_dim.x, col_grid_dim.y, col_grid_dim.z, 273 | col_block_dim.x, col_block_dim.y, col_block_dim.z, 274 | needed_shared_memory_size, 275 | NULL, 276 | args, 277 | NULL)); 278 | } 279 | 280 | 281 | 282 | 283 | CudaOperationConvolution2D::~CudaOperationConvolution2D() 284 | { 285 | if (kernel_ != nullptr) 286 | delete kernel_; 287 | 288 | } -------------------------------------------------------------------------------- /src/cuda_operations/2d/cuda_operation_convolution_2d.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_CONVOLUTION_2D_H_ 23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_CONVOLUTION_2D_H_ 24 | 25 | #include 26 | 27 | #include "src/cuda_operations/cuda_operation_base.h" 28 | #include "src/data_types/data_structs.h" 29 | 30 | class CudaOperationConvolution2D : public CudaOperationBase { 31 | private: 32 | CUfunction cuf_convolution_rows_; 33 | CUfunction cuf_convolution_cols_; 34 | 35 | DataSize3 dev_container_size_; 36 | 37 | float* kernel_ = nullptr; 38 | size_t kernel_length_; 39 | size_t kernel_radius_; 40 | 41 | void ComputeGaussianKernel(float sigma, size_t precision, float pixel_size); 42 | 43 | void ResampleRows(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, size_t pitch) const; 44 | void ResampleColumns(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, size_t pitch) const; 45 | 46 | public: 47 | CudaOperationConvolution2D(); 48 | 49 | bool Initialize(const OperationParameters* params = nullptr) override; 50 | void Execute(OperationParameters& params) override; 51 | void PrintConvolutionKernel(); 52 | 53 | ~CudaOperationConvolution2D(); 54 | }; 55 | 56 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_CONVOLUTION_2D_H_ 57 | -------------------------------------------------------------------------------- /src/cuda_operations/2d/cuda_operation_median_2d.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include "cuda_operation_median_2d.h" 23 | 24 | #include 25 | 26 | #include 27 | #include 28 | 29 | #include "src/data_types/data_structs.h" 30 | #include "src/utils/common_utils.h" 31 | #include "src/utils/cuda_utils.h" 32 | 33 | CudaOperationMedian2D::CudaOperationMedian2D() 34 | : CudaOperationBase("CUDA Median 2D") 35 | { 36 | } 37 | 38 | bool CudaOperationMedian2D::Initialize(const OperationParameters* params) 39 | { 40 | initialized_ = false; 41 | 42 | if (!params) { 43 | std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName()); 44 | return initialized_; 45 | } 46 | 47 | DataSize3 container_size; 48 | GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_); 49 | 50 | dev_container_size_ = container_size; 51 | 52 | char exec_path[256]; 53 | Utils::GetExecutablePath(exec_path, 256); 54 | std::strcat(exec_path, "/kernels/median_2d.ptx"); 55 | 56 | if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) { 57 | if (!CheckCudaError(cuModuleGetFunction(&cuf_median_, cu_module_, "median_2d"))) { 58 | size_t const_size; 59 | 60 | /* Get the pointer to the constant memory and copy data */ 61 | if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) { 62 | if (const_size == sizeof(container_size)) { 63 | if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) { 64 | initialized_ = true; 65 | } 66 | } 67 | } 68 | 69 | } else { 70 | CheckCudaError(cuModuleUnload(cu_module_)); 71 | cu_module_ = nullptr; 72 | } 73 | } 74 | return initialized_; 75 | } 76 | 77 | void CudaOperationMedian2D::Execute(OperationParameters& params) 78 | { 79 | if (!IsInitialized()) { 80 | return; 81 | } 82 | 83 | CUdeviceptr dev_input; 84 | CUdeviceptr dev_output; 85 | 86 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_input, "dev_input"); 87 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_output, "dev_output"); 88 | 89 | DataSize3 data_size; 90 | size_t radius; 91 | GET_PARAM_OR_RETURN(params, DataSize3, data_size, "data_size"); 92 | GET_PARAM_OR_RETURN(params, size_t, radius, "radius"); 93 | 94 | if (dev_input == dev_output) { 95 | std::printf("Operation '%s': Error. Input buffer cannot serve as output buffer.", GetName()); 96 | return; 97 | } 98 | 99 | /* If the radius equals 1 skip the filtering and copy data from the input to the output */ 100 | if (radius == 1) { 101 | CheckCudaError(cuMemcpyDtoD(dev_output, dev_input, 102 | dev_container_size_.pitch * dev_container_size_.height)); 103 | return; 104 | } 105 | 106 | if (radius % 2 == 0) { 107 | std::printf("Warning. Median raduis is even (%d), decresaing by 1...\n", radius); 108 | radius -= 1; 109 | } 110 | 111 | if (radius >= 3 && radius <= 7) { 112 | /* Be careful with the thread block size. Thread block size in each dimension 113 | should be greater than radius / 2, because radius / 2 threads are used to 114 | load halo around the thread block. */ 115 | dim3 block_dim = { 8, 8}; 116 | 117 | dim3 grid_dim ={ static_cast((data_size.width + block_dim.x - 1) / block_dim.x), 118 | static_cast((data_size.height + block_dim.y - 1) / block_dim.y)}; 119 | 120 | 121 | /* Calculate the needed shared memory size */ 122 | int shared_memory_size; 123 | CUdevice cu_device; 124 | CheckCudaError(cuDeviceGet(&cu_device, 0)); 125 | CheckCudaError(cuDeviceGetAttribute(&shared_memory_size, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, cu_device)); 126 | 127 | int radius_2 = radius / 2; 128 | int needed_shared_memory_size = 129 | (block_dim.x + 2 * radius_2) * (block_dim.y + 2 * radius_2) * sizeof(float); 130 | 131 | if (needed_shared_memory_size > shared_memory_size) { 132 | std::printf("<%s>: Error shared memory allocation. Reduce the thread block size.\n", GetName()); 133 | std::printf("Shared memory: %d Needed: %d \n", shared_memory_size, needed_shared_memory_size); 134 | return; 135 | } 136 | 137 | 138 | void* args[5] = { 139 | &dev_input, 140 | &data_size.width, 141 | &data_size.height, 142 | &radius, 143 | &dev_output}; 144 | 145 | CheckCudaError(cuLaunchKernel(cuf_median_, 146 | grid_dim.x, grid_dim.y, grid_dim.z, 147 | block_dim.x, block_dim.y, block_dim.z, 148 | needed_shared_memory_size, 149 | NULL, 150 | args, 151 | NULL)); 152 | } else { 153 | std::printf("Error. Wrong median raduis (%d). Supported values: 3, 5, 7\n", radius); 154 | } 155 | } -------------------------------------------------------------------------------- /src/cuda_operations/2d/cuda_operation_median_2d.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_MEDIAN_H_ 23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_MEDIAN_H_ 24 | 25 | #include 26 | 27 | #include "src/cuda_operations/cuda_operation_base.h" 28 | #include "src/data_types/data_structs.h" 29 | 30 | class CudaOperationMedian2D : public CudaOperationBase { 31 | private: 32 | CUfunction cuf_median_; 33 | 34 | DataSize3 dev_container_size_; 35 | 36 | public: 37 | CudaOperationMedian2D(); 38 | 39 | bool Initialize(const OperationParameters* params = nullptr) override; 40 | void Execute(OperationParameters& params) override; 41 | }; 42 | 43 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_MEDIAN_H_ 44 | -------------------------------------------------------------------------------- /src/cuda_operations/2d/cuda_operation_registration_2d.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include "cuda_operation_registration_2d.h" 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | #include "src/data_types/data_structs.h" 29 | #include "src/utils/common_utils.h" 30 | #include "src/utils/cuda_utils.h" 31 | 32 | CudaOperationRegistration2D::CudaOperationRegistration2D() 33 | : CudaOperationBase("CUDA Registration 2D") 34 | { 35 | } 36 | 37 | bool CudaOperationRegistration2D::Initialize(const OperationParameters* params) 38 | { 39 | initialized_ = false; 40 | 41 | if (!params) { 42 | std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName()); 43 | return initialized_; 44 | } 45 | 46 | DataSize3 container_size; 47 | GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_); 48 | 49 | char exec_path[256]; 50 | Utils::GetExecutablePath(exec_path, 256); 51 | std::strcat(exec_path, "/kernels/registration_2d.ptx"); 52 | 53 | if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) { 54 | if (!CheckCudaError(cuModuleGetFunction(&cuf_registration_, cu_module_, "registration_2d"))) { 55 | size_t const_size; 56 | 57 | /* Get the pointer to the constant memory and copy data */ 58 | if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) { 59 | if (const_size == sizeof(container_size)) { 60 | if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) { 61 | initialized_ = true; 62 | } 63 | } 64 | } 65 | 66 | } else { 67 | CheckCudaError(cuModuleUnload(cu_module_)); 68 | cu_module_ = nullptr; 69 | } 70 | } 71 | return initialized_; 72 | } 73 | 74 | void CudaOperationRegistration2D::Execute(OperationParameters& params) 75 | { 76 | if (!IsInitialized()) { 77 | return; 78 | } 79 | 80 | CUdeviceptr dev_frame_0; 81 | CUdeviceptr dev_frame_1; 82 | CUdeviceptr dev_flow_u; 83 | CUdeviceptr dev_flow_v; 84 | CUdeviceptr dev_output; 85 | 86 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_frame_0, "dev_frame_0"); 87 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_frame_1, "dev_frame_1"); 88 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_flow_u, "dev_flow_u"); 89 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_flow_v, "dev_flow_v"); 90 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_output, "dev_output"); 91 | 92 | float hx; 93 | float hy; 94 | DataSize3 data_size; 95 | 96 | GET_PARAM_OR_RETURN(params, float, hx, "hx"); 97 | GET_PARAM_OR_RETURN(params, float, hy, "hy"); 98 | GET_PARAM_OR_RETURN(params, DataSize3, data_size, "data_size"); 99 | 100 | if (dev_frame_1 == dev_output) { 101 | std::printf("Operation '%s': Error. Input buffer cannot serve as output buffer.", GetName()); 102 | return; 103 | } 104 | 105 | dim3 block_dim = { 16, 8, 1 }; 106 | dim3 grid_dim = { static_cast((data_size.width + block_dim.x - 1) / block_dim.x), 107 | static_cast((data_size.height + block_dim.y - 1) / block_dim.y), 108 | 1 }; 109 | 110 | void* args[12] = { 111 | &dev_frame_0, 112 | &dev_frame_1, 113 | &dev_flow_u, 114 | &dev_flow_v, 115 | &data_size.width, 116 | &data_size.height, 117 | &hx, 118 | &hy, 119 | &dev_output}; 120 | 121 | CheckCudaError(cuLaunchKernel(cuf_registration_, 122 | grid_dim.x, grid_dim.y, grid_dim.z, 123 | block_dim.x, block_dim.y, block_dim.z, 124 | 0, 125 | NULL, 126 | args, 127 | NULL)); 128 | } -------------------------------------------------------------------------------- /src/cuda_operations/2d/cuda_operation_registration_2d.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_REGISTRATION_H_ 23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_REGISTRATION_H_ 24 | 25 | #include 26 | 27 | #include "src/cuda_operations/cuda_operation_base.h" 28 | 29 | class CudaOperationRegistration2D : public CudaOperationBase { 30 | private: 31 | CUfunction cuf_registration_; 32 | 33 | public: 34 | CudaOperationRegistration2D(); 35 | 36 | bool Initialize(const OperationParameters* params = nullptr) override; 37 | void Execute(OperationParameters& params) override; 38 | }; 39 | 40 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_REGISTRATION_H_ 41 | -------------------------------------------------------------------------------- /src/cuda_operations/2d/cuda_operation_resample_2d.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include "cuda_operation_resample_2d.h" 23 | 24 | #include 25 | 26 | #include 27 | 28 | #include "src/data_types/data_structs.h" 29 | #include "src/utils/common_utils.h" 30 | #include "src/utils/cuda_utils.h" 31 | 32 | CudaOperationResample2D::CudaOperationResample2D() 33 | : CudaOperationBase("CUDA Resample 2D") 34 | { 35 | } 36 | 37 | bool CudaOperationResample2D::Initialize(const OperationParameters* params) 38 | { 39 | initialized_ = false; 40 | 41 | if (!params) { 42 | std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName()); 43 | return initialized_; 44 | } 45 | 46 | DataSize3 container_size; 47 | GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_); 48 | 49 | 50 | char exec_path[256]; 51 | Utils::GetExecutablePath(exec_path, 256); 52 | std::strcat(exec_path, "/kernels/resample_2d.ptx"); 53 | 54 | if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) { 55 | if (!CheckCudaError(cuModuleGetFunction(&cuf_resample_x_, cu_module_, "resample_x")) && 56 | !CheckCudaError(cuModuleGetFunction(&cuf_resample_y_, cu_module_, "resample_y"))) { 57 | size_t const_size; 58 | 59 | /* Get the pointer to the constant memory and copy data */ 60 | if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) { 61 | if (const_size == sizeof(container_size)) { 62 | if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) { 63 | initialized_ = true; 64 | } 65 | } 66 | } 67 | 68 | } else { 69 | CheckCudaError(cuModuleUnload(cu_module_)); 70 | cu_module_ = nullptr; 71 | } 72 | } 73 | return initialized_; 74 | } 75 | 76 | void CudaOperationResample2D::Execute(OperationParameters& params) 77 | { 78 | if (!IsInitialized()) { 79 | return; 80 | } 81 | CUdeviceptr dev_input = 0; 82 | CUdeviceptr dev_output = 0; 83 | CUdeviceptr dev_temp = 0; 84 | 85 | DataSize3 data_size; 86 | DataSize3 resample_size; 87 | 88 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_input, "dev_input"); 89 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_output, "dev_output"); 90 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_temp, "dev_temp"); 91 | GET_PARAM_OR_RETURN(params, DataSize3, data_size, "data_size"); 92 | GET_PARAM_OR_RETURN(params, DataSize3, resample_size, "resample_size"); 93 | 94 | if (dev_input == dev_output) { 95 | std::printf("Operation '%s': Error. Input buffer cannot serve as output buffer.", GetName()); 96 | return; 97 | } 98 | 99 | DataSize3 output_size = data_size; 100 | output_size.width = resample_size.width; 101 | ResampleX(dev_input, dev_temp, data_size, output_size); 102 | 103 | data_size.width = output_size.width; 104 | output_size.height = resample_size.height; 105 | ResampleY(dev_temp, dev_output, data_size, output_size); 106 | 107 | } 108 | 109 | void CudaOperationResample2D::ResampleX(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, DataSize3& output_size) const 110 | { 111 | dim3 block_dim = { 16, 8}; 112 | dim3 grid_dim = { static_cast((output_size.width + block_dim.x - 1) / block_dim.x), 113 | static_cast((output_size.height + block_dim.y - 1) / block_dim.y) 114 | }; 115 | 116 | void* args[5] = { &input, 117 | &output, 118 | &output_size.width, 119 | &output_size.height, 120 | &input_size.width }; 121 | 122 | 123 | CheckCudaError(cuLaunchKernel(cuf_resample_x_, 124 | grid_dim.x, grid_dim.y, grid_dim.z, 125 | block_dim.x, block_dim.y, block_dim.z, 126 | 0, 127 | NULL, 128 | args, 129 | NULL)); 130 | } 131 | 132 | void CudaOperationResample2D::ResampleY(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, DataSize3& output_size) const 133 | { 134 | dim3 block_dim = { 16, 8}; 135 | dim3 grid_dim = { static_cast((output_size.width + block_dim.x - 1) / block_dim.x), 136 | static_cast((output_size.height + block_dim.y - 1) / block_dim.y) 137 | }; 138 | 139 | void* args[5] = { &input, 140 | &output, 141 | &output_size.width, 142 | &output_size.height, 143 | &input_size.height }; 144 | 145 | CheckCudaError(cuLaunchKernel(cuf_resample_y_, 146 | grid_dim.x, grid_dim.y, grid_dim.z, 147 | block_dim.x, block_dim.y, block_dim.z, 148 | 0, 149 | NULL, 150 | args, 151 | NULL)); 152 | } 153 | 154 | -------------------------------------------------------------------------------- /src/cuda_operations/2d/cuda_operation_resample_2d.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_RESAMPLE_2D_H_ 23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_RESAMPLE_2D_H_ 24 | 25 | #include 26 | 27 | #include "src/cuda_operations/cuda_operation_base.h" 28 | #include "src/data_types/data_structs.h" 29 | 30 | class CudaOperationResample2D : public CudaOperationBase { 31 | private: 32 | CUfunction cuf_resample_x_; 33 | CUfunction cuf_resample_y_; 34 | 35 | void ResampleX(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, DataSize3& output_size) const; 36 | void ResampleY(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, DataSize3& output_size) const; 37 | 38 | 39 | public: 40 | CudaOperationResample2D(); 41 | 42 | bool Initialize(const OperationParameters* params = nullptr) override; 43 | void Execute(OperationParameters& params) override; 44 | }; 45 | 46 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_RESAMPLE_2D_H_ 47 | -------------------------------------------------------------------------------- /src/cuda_operations/2d/cuda_operation_solve_2d.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include "cuda_operation_solve_2d.h" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include "src/data_types/data_structs.h" 30 | #include "src/utils/common_utils.h" 31 | #include "src/utils/cuda_utils.h" 32 | 33 | 34 | CudaOperationSolve2D::CudaOperationSolve2D() 35 | : CudaOperationBase("CUDA Solve 2D") 36 | { 37 | } 38 | 39 | bool CudaOperationSolve2D::Initialize(const OperationParameters* params) 40 | { 41 | initialized_ = false; 42 | 43 | if (!params) { 44 | std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName()); 45 | return initialized_; 46 | } 47 | 48 | DataSize3 container_size; 49 | DataConstancy data_constancy = DataConstancy::LogDerivatives; 50 | 51 | GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_); 52 | GET_PARAM_OR_RETURN_VALUE(*params, DataConstancy, data_constancy, "data_constancy", initialized_); 53 | 54 | dev_container_size_ = container_size; 55 | 56 | char exec_path[256]; 57 | Utils::GetExecutablePath(exec_path, 256); 58 | std::strcat(exec_path, "/kernels/solve_2d.ptx"); 59 | 60 | if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) { 61 | 62 | std::string solve_function_name; 63 | 64 | 65 | switch (data_constancy) 66 | { 67 | case DataConstancy::Grey: 68 | solve_function_name = "solve_2d"; 69 | break; 70 | 71 | case DataConstancy::Gradient: 72 | solve_function_name = "solve_2d_grad"; 73 | break; 74 | 75 | case DataConstancy::LogDerivatives: 76 | solve_function_name = "solve_2d_log"; 77 | break; 78 | 79 | default: 80 | solve_function_name = "solve_2d"; 81 | break; 82 | } 83 | 84 | if (!CheckCudaError(cuModuleGetFunction(&cuf_compute_phi_ksi_, cu_module_, "compute_phi_ksi")) && 85 | !CheckCudaError(cuModuleGetFunction(&cuf_solve_, cu_module_, solve_function_name.c_str())) 86 | ) { 87 | size_t const_size; 88 | 89 | /* Get the pointer to the constant memory and copy data */ 90 | if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) { 91 | if (const_size == sizeof(container_size)) { 92 | if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) { 93 | initialized_ = true; 94 | } 95 | } 96 | } 97 | 98 | } else { 99 | CheckCudaError(cuModuleUnload(cu_module_)); 100 | cu_module_ = nullptr; 101 | } 102 | } 103 | return initialized_; 104 | } 105 | 106 | void CudaOperationSolve2D::Execute(OperationParameters& params) 107 | { 108 | if (!IsInitialized()) { 109 | return; 110 | } 111 | 112 | CUdeviceptr dev_frame_0; 113 | CUdeviceptr dev_frame_1; 114 | CUdeviceptr dev_flow_u; 115 | CUdeviceptr dev_flow_v; 116 | CUdeviceptr dev_phi; 117 | CUdeviceptr dev_ksi; 118 | 119 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_frame_0, "dev_frame_0"); 120 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_frame_1, "dev_frame_1"); 121 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_flow_u, "dev_flow_u"); 122 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_flow_v, "dev_flow_v"); 123 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_phi, "dev_phi"); 124 | GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_ksi, "dev_ksi"); 125 | 126 | /* Use references to guaranty correct pointer values 127 | outside this operation after using std::swap() */ 128 | CUdeviceptr* dev_flow_du_ptr; 129 | CUdeviceptr* dev_flow_dv_ptr; 130 | CUdeviceptr* dev_temp_du_ptr; 131 | CUdeviceptr* dev_temp_dv_ptr; 132 | 133 | GET_PARAM_PTR_OR_RETURN(params, CUdeviceptr, dev_flow_du_ptr, "dev_flow_du"); 134 | GET_PARAM_PTR_OR_RETURN(params, CUdeviceptr, dev_flow_dv_ptr, "dev_flow_dv"); 135 | GET_PARAM_PTR_OR_RETURN(params, CUdeviceptr, dev_temp_du_ptr, "dev_temp_du"); 136 | GET_PARAM_PTR_OR_RETURN(params, CUdeviceptr, dev_temp_dv_ptr, "dev_temp_dv"); 137 | 138 | 139 | CUdeviceptr& dev_flow_du = *dev_flow_du_ptr; 140 | CUdeviceptr& dev_flow_dv = *dev_flow_dv_ptr; 141 | CUdeviceptr& dev_temp_du = *dev_temp_du_ptr; 142 | CUdeviceptr& dev_temp_dv = *dev_temp_dv_ptr; 143 | 144 | size_t outer_iterations_count; 145 | size_t inner_iterations_count; 146 | float equation_alpha; 147 | float equation_smoothness; 148 | float equation_data; 149 | float hx; 150 | float hy; 151 | DataSize3 data_size; 152 | 153 | GET_PARAM_OR_RETURN(params, size_t, outer_iterations_count, "outer_iterations_count"); 154 | GET_PARAM_OR_RETURN(params, size_t, inner_iterations_count, "inner_iterations_count"); 155 | GET_PARAM_OR_RETURN(params, float, equation_alpha, "equation_alpha"); 156 | GET_PARAM_OR_RETURN(params, float, equation_smoothness, "equation_smoothness"); 157 | GET_PARAM_OR_RETURN(params, float, equation_data, "equation_data"); 158 | GET_PARAM_OR_RETURN(params, float, hx, "hx"); 159 | GET_PARAM_OR_RETURN(params, float, hy, "hy"); 160 | GET_PARAM_OR_RETURN(params, DataSize3, data_size, "data_size"); 161 | 162 | 163 | /* Run solver kernels */ 164 | dim3 block_dim = { 16, 8}; 165 | 166 | int shared_memory_size; 167 | CUdevice cu_device; 168 | CheckCudaError(cuDeviceGet(&cu_device, 0)); 169 | CheckCudaError(cuDeviceGetAttribute(&shared_memory_size, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, cu_device)); 170 | 171 | /* We need 6 extended blocks in the shared memory */ 172 | int number_shared_blocks = 6; 173 | int needed_shared_memory_size = 174 | (block_dim.x + 2) * (block_dim.y + 2) * sizeof(float) * number_shared_blocks; 175 | 176 | int number_shared_blocks_solve = 8; 177 | 178 | DataConstancy data_constancy; 179 | GET_PARAM_OR_RETURN(params, DataConstancy, data_constancy, "data_constancy"); 180 | 181 | switch (data_constancy) 182 | { 183 | case DataConstancy::Grey: 184 | number_shared_blocks_solve = 8; 185 | break; 186 | 187 | case DataConstancy::Gradient: 188 | number_shared_blocks_solve = 11; 189 | break; 190 | 191 | case DataConstancy::LogDerivatives: 192 | number_shared_blocks_solve = 11; 193 | break; 194 | 195 | default: 196 | number_shared_blocks_solve = 8; 197 | break; 198 | } 199 | 200 | 201 | 202 | int needed_shared_memory_size_solve = 203 | (block_dim.x + 2) * (block_dim.y + 2) * sizeof(float) * number_shared_blocks_solve; 204 | 205 | if (needed_shared_memory_size > shared_memory_size || needed_shared_memory_size_solve > shared_memory_size) { 206 | std::printf("<%s>: Error shared memory allocation. Reduce thread block size.\n", GetName()); 207 | std::printf("Shared memory: %d Needed: %d | %d\n", shared_memory_size, needed_shared_memory_size, needed_shared_memory_size_solve); 208 | return; 209 | } 210 | 211 | /* Mesure execution time */ 212 | size_t total_iterations_count = outer_iterations_count * inner_iterations_count; 213 | 214 | CUevent cu_event_start; 215 | CUevent cu_event_stop; 216 | 217 | CheckCudaError(cuEventCreate(&cu_event_start, CU_EVENT_DEFAULT)); 218 | CheckCudaError(cuEventCreate(&cu_event_stop, CU_EVENT_DEFAULT)); 219 | 220 | CheckCudaError(cuEventRecord(cu_event_start, NULL)); 221 | 222 | /* Display computation status */ 223 | if (!this->silent) { 224 | Utils::PrintProgressBar(0.f); 225 | std::printf(" % 3.0f%%", 0.f); 226 | } 227 | 228 | /* Initialize flow increment buffers with 0 */ 229 | CheckCudaError(cuMemsetD2D8(dev_flow_du, dev_container_size_.pitch, 0, data_size.width * sizeof(float), 230 | dev_container_size_.height)); 231 | CheckCudaError(cuMemsetD2D8(dev_flow_dv, dev_container_size_.pitch, 0, data_size.width * sizeof(float), 232 | dev_container_size_.height)); 233 | 234 | 235 | dim3 grid_dim = { static_cast((data_size.width + block_dim.x - 1) / block_dim.x), 236 | static_cast((data_size.height + block_dim.y - 1) / block_dim.y)}; 237 | 238 | for (size_t i = 0; i < outer_iterations_count; ++i) { 239 | void* args[14] = { 240 | &dev_frame_0, 241 | &dev_frame_1, 242 | &dev_flow_u, 243 | &dev_flow_v, 244 | &dev_flow_du, 245 | &dev_flow_dv, 246 | &data_size.width, 247 | &data_size.height, 248 | &hx, 249 | &hy, 250 | &equation_smoothness, 251 | &equation_data, 252 | &dev_phi, 253 | &dev_ksi }; 254 | 255 | CheckCudaError(cuLaunchKernel(cuf_compute_phi_ksi_, 256 | grid_dim.x, grid_dim.y, grid_dim.z, 257 | block_dim.x, block_dim.y, block_dim.z, 258 | needed_shared_memory_size, 259 | NULL, 260 | args, 261 | NULL)); 262 | 263 | for (size_t j = 0; j < inner_iterations_count; ++j) { 264 | void* args[15] = { 265 | &dev_frame_0, 266 | &dev_frame_1, 267 | &dev_flow_u, 268 | &dev_flow_v, 269 | &dev_flow_du, 270 | &dev_flow_dv, 271 | &dev_phi, 272 | &dev_ksi, 273 | &data_size.width, 274 | &data_size.height, 275 | &hx, 276 | &hy, 277 | &equation_alpha, 278 | &dev_temp_du, 279 | &dev_temp_dv}; 280 | 281 | CheckCudaError(cuLaunchKernel(cuf_solve_, 282 | grid_dim.x, grid_dim.y, grid_dim.z, 283 | block_dim.x, block_dim.y, block_dim.z, 284 | needed_shared_memory_size_solve, 285 | NULL, 286 | args, 287 | NULL)); 288 | std::swap(dev_flow_du, dev_temp_du); 289 | std::swap(dev_flow_dv, dev_temp_dv); 290 | 291 | CheckCudaError(cuStreamSynchronize(NULL)); 292 | 293 | /* Display computation status */ 294 | if (!this->silent) { 295 | float complete = (i * inner_iterations_count + j) / static_cast(total_iterations_count); 296 | Utils::PrintProgressBar(complete); 297 | std::printf(" % 3.0f%%", complete * 100); 298 | } 299 | } 300 | } 301 | /* Estimate GPU computation time */ 302 | CheckCudaError(cuEventRecord(cu_event_stop, NULL)); 303 | CheckCudaError(cuEventSynchronize(cu_event_stop)); 304 | 305 | float elapsed_time; 306 | CheckCudaError(cuEventElapsedTime(&elapsed_time, cu_event_start, cu_event_stop)); 307 | 308 | if (!this->silent) { 309 | Utils::PrintProgressBar(1.f); 310 | std::printf(" %8.4fs\n", elapsed_time / 1000.); 311 | } 312 | 313 | CheckCudaError(cuEventDestroy(cu_event_start)); 314 | CheckCudaError(cuEventDestroy(cu_event_stop)); 315 | } -------------------------------------------------------------------------------- /src/cuda_operations/2d/cuda_operation_solve_2d.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_SOLVE_2D_H_ 23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_SOLVE_2D_H_ 24 | 25 | #include 26 | 27 | #include "src/cuda_operations/cuda_operation_base.h" 28 | #include "src/data_types/data_structs.h" 29 | 30 | class CudaOperationSolve2D : public CudaOperationBase { 31 | private: 32 | CUfunction cuf_compute_phi_ksi_; 33 | CUfunction cuf_solve_; 34 | 35 | DataSize3 dev_container_size_; 36 | 37 | public: 38 | CudaOperationSolve2D(); 39 | 40 | bool Initialize(const OperationParameters* params = nullptr) override; 41 | void Execute(OperationParameters& params) override; 42 | 43 | bool silent = false; 44 | }; 45 | 46 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_SOLVE_2D_H_ 47 | -------------------------------------------------------------------------------- /src/cuda_operations/cuda_operation_base.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include "cuda_operation_base.h" 23 | 24 | #include 25 | #include 26 | 27 | #include "src/utils/cuda_utils.h" 28 | 29 | CudaOperationBase::CudaOperationBase(const char* name) 30 | : name_(name) 31 | { 32 | } 33 | 34 | const char* CudaOperationBase::GetName() const 35 | { 36 | return name_; 37 | }; 38 | 39 | bool CudaOperationBase::IsInitialized() const 40 | { 41 | if (!initialized_) { 42 | std::printf("Error: Operation '%s' was not initialized.\n", name_); 43 | } 44 | return initialized_; 45 | } 46 | 47 | void CudaOperationBase::Execute(OperationParameters& params) 48 | { 49 | std::printf("Warning: '%s' Execute() was not defined.\n", name_); 50 | } 51 | 52 | void CudaOperationBase::Destroy() 53 | { 54 | if (cu_module_) { 55 | CheckCudaError(cuModuleUnload(cu_module_)); 56 | cu_module_ = nullptr; 57 | } 58 | initialized_ = false; 59 | } 60 | 61 | CudaOperationBase::~CudaOperationBase() 62 | { 63 | if (initialized_) { 64 | Destroy(); 65 | } 66 | } 67 | 68 | -------------------------------------------------------------------------------- /src/cuda_operations/cuda_operation_base.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_CUDA_OPERATION_BASE_H_ 23 | #define GPUFLOW3D_CUDA_OPERATIONS_CUDA_OPERATION_BASE_H_ 24 | 25 | #include 26 | 27 | #include "src/data_types/operation_parameters.h" 28 | 29 | class CudaOperationBase { 30 | private: 31 | const char* name_ = nullptr; 32 | 33 | protected: 34 | CUmodule cu_module_ = nullptr; 35 | 36 | CUdeviceptr dev_constants_ = 0; 37 | 38 | bool initialized_ = false; 39 | 40 | CudaOperationBase(const char* name); 41 | 42 | bool IsInitialized() const; 43 | 44 | public: 45 | const char* GetName() const; 46 | 47 | virtual bool Initialize(const OperationParameters* params = nullptr) = 0; 48 | virtual void Execute(OperationParameters& params); 49 | virtual void Destroy(); 50 | 51 | virtual ~CudaOperationBase(); 52 | 53 | }; 54 | 55 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_CUDA_OPERATION_BASE_H_ 56 | -------------------------------------------------------------------------------- /src/data_types/data2d.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include "data2d.h" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include 30 | 31 | #include "src/utils/cuda_utils.h" 32 | 33 | 34 | //#define ALLOCATE_PINNED_MEMORY 35 | 36 | Data2D::Data2D() 37 | : width_(0), height_(0) 38 | {} 39 | 40 | Data2D::Data2D(size_t width, size_t height) 41 | : width_(width), height_(height) 42 | { 43 | data_ = static_cast(AllocateMemory(width_, height_)); 44 | } 45 | 46 | void Data2D::Swap(Data2D& data2d) 47 | { 48 | if (this->width_ == data2d.width_ && 49 | this->height_ == data2d.height_) { 50 | std::swap(this->data_, data2d.data_); 51 | } else { 52 | std::printf("Error. Cannot swap two Data2D objects (wrong dimensions).\n"); 53 | } 54 | } 55 | 56 | void* Data2D::AllocateMemory(size_t width, size_t height) 57 | { 58 | void* ptr = nullptr; 59 | 60 | #ifdef ALLOCATE_PINNED_MEMORY 61 | if (CheckCudaError(cuMemAllocHost(&ptr, width * height * sizeof(float)))) { 62 | Invalidate(); 63 | return nullptr; 64 | } 65 | #else 66 | try { 67 | ptr = new float[width * height]; 68 | } 69 | catch (std::bad_alloc& ba) { 70 | Invalidate(); 71 | std::printf("Error. Cannot allocate memory on the host. (%s)\n", ba.what()); 72 | } 73 | #endif 74 | 75 | return ptr; 76 | } 77 | 78 | void Data2D::FreeMemory(void* pointer) 79 | { 80 | #ifdef ALLOCATE_PINNED_MEMORY 81 | if (pointer) 82 | CheckCudaError(cuMemFreeHost(pointer)); 83 | #else 84 | if (pointer) { 85 | delete[] pointer; 86 | pointer = nullptr; 87 | } 88 | #endif 89 | } 90 | 91 | void Data2D::ZeroData() 92 | { 93 | if (data_) { 94 | std::memset(data_, 0, width_ * height_ * sizeof(float)); 95 | } 96 | } 97 | 98 | bool Data2D::ReadRAWFromFileU8(const char* filename, size_t width, size_t height) 99 | { 100 | bool loaded = false; 101 | std::FILE *file = std::fopen(filename, "rb"); 102 | if (file) { 103 | this->Invalidate(); 104 | 105 | width_ = width; 106 | height_ = height; 107 | data_ = static_cast(AllocateMemory(width_, height_)); 108 | 109 | if (data_) { 110 | unsigned char *dataU8 = new unsigned char[width_]; 111 | 112 | for (size_t y = 0; y < height_; ++y) { 113 | size_t readed = std::fread(dataU8, sizeof(unsigned char), width_, file); 114 | 115 | if (readed == width_) { 116 | for (size_t x = 0; x < width_; ++x) { 117 | data_[Index(x, y)] = static_cast(dataU8[x]); 118 | } 119 | } else { 120 | goto loop_break; 121 | } 122 | } 123 | 124 | 125 | if (std::fread(dataU8, sizeof(unsigned char), width_, file) == 0) { 126 | loaded = true; 127 | } else { 128 | 129 | loop_break: 130 | std::printf("Error reading RAW data from file '%s': wrong dimensions.", filename); 131 | this->Invalidate(); 132 | } 133 | 134 | delete[] dataU8; 135 | std::fclose(file); 136 | } 137 | } else { 138 | std::printf("Cannot open file '%s'.\n", filename); 139 | } 140 | return loaded; 141 | } 142 | 143 | bool Data2D::ReadRAWFromFileF32(const char* filename, size_t width, size_t height) 144 | { 145 | bool loaded = false; 146 | std::FILE *file = std::fopen(filename, "rb"); 147 | if (file) { 148 | this->Invalidate(); 149 | 150 | width_ = width; 151 | height_ = height; 152 | data_ = static_cast(AllocateMemory(width_, height_)); 153 | 154 | if (data_) { 155 | 156 | for (size_t y = 0; y < height_; ++y) { 157 | size_t readed = std::fread(&data_[Index(0, y)], sizeof(float), width_, file); 158 | if (readed != width_) { 159 | goto loop_break; 160 | } 161 | } 162 | 163 | 164 | if (std::fread(data_, sizeof(unsigned char), width_, file) == 0) { 165 | loaded = true; 166 | } else { 167 | loop_break: 168 | std::printf("Error reading RAW data from file '%s': wrong dimensions.", filename); 169 | this->Invalidate(); 170 | } 171 | 172 | std::fclose(file); 173 | } 174 | } else { 175 | std::printf("Cannot open file '%s'.\n", filename); 176 | } 177 | return loaded; 178 | } 179 | 180 | bool Data2D::WriteRAWToFileU8(const char* filename) 181 | { 182 | std::FILE *file = std::fopen(filename, "wb"); 183 | if (file) { 184 | unsigned char *dataU8 = new unsigned char[width_]; 185 | 186 | 187 | for (size_t y = 0; y < height_; ++y) { 188 | for (size_t x = 0; x < width_; ++x) { 189 | float dataF32 = std::min(255.f, std::max(0.f, data_[Index(x, y)])); 190 | dataU8[x] = static_cast(dataF32); 191 | } 192 | size_t written = std::fwrite(dataU8, sizeof(unsigned char), width_, file); 193 | if (written != width_) { 194 | std::printf("Error writing RAW data to file '%s'.", filename); 195 | delete[] dataU8; 196 | std::fclose(file); 197 | return false; 198 | } 199 | } 200 | 201 | delete[] dataU8; 202 | std::fclose(file); 203 | return true; 204 | } else { 205 | std::printf("Cannot open file '%s'.\n", filename); 206 | return false; 207 | } 208 | } 209 | 210 | bool Data2D::WriteRAWToFileF32(const char* filename) 211 | { 212 | std::FILE *file = std::fopen(filename, "wb"); 213 | if (file) { 214 | unsigned char *dataU8 = new unsigned char[width_]; 215 | 216 | for (size_t y = 0; y < height_; ++y) { 217 | size_t written = std::fwrite(&data_[Index(0, y)], sizeof(float), width_, file); 218 | if (written != width_) { 219 | std::printf("Error writing RAW data to file '%s'.", filename); 220 | std::fclose(file); 221 | return false; 222 | } 223 | } 224 | 225 | std::fclose(file); 226 | return true; 227 | } else { 228 | std::printf("Cannot open file '%s'.\n", filename); 229 | return false; 230 | } 231 | } 232 | 233 | //bool Data3D::WriteFlowToFileVTK(const char* filename, const Data3D& flow_u, const Data3D& flow_v, const Data3D& flow_w) 234 | //{ 235 | // std::FILE *file = std::fopen(filename, "wb"); 236 | // if (file) { 237 | // std::fprintf(file, "# vtk DataFile Version 2.0\n"); 238 | // std::fprintf(file, "3D Vector field computed by GpuFlow3D\n"); 239 | // std::fprintf(file, "BINARY\n"); 240 | // std::fprintf(file, "DATASET STRUCTURED_POINTS\n"); 241 | // std::fprintf(file, "DIMENSIONS %d %d %d\n", flow_u.width_, flow_u.height_, flow_u.depth_); 242 | // std::fprintf(file, "ORIGIN 0 0 0\n"); 243 | // std::fprintf(file, "SPACING 1 1 1\n"); 244 | // std::fprintf(file, "POINT_DATA %d\n", flow_u.width_ * flow_u.height_ * flow_u.depth_); 245 | // std::fprintf(file, "VECTORS vectors float\n"); 246 | // 247 | // for (size_t z = 0; z < flow_u.depth_; ++z) { 248 | // for (size_t y = 0; y < flow_u.height_; ++y) { 249 | // for (size_t x = 0; x < flow_u.width_; ++x) { 250 | // std::fwrite(&flow_u.data_[(z * flow_u.height_ + y) * flow_u.width_ + x], sizeof(float), 1, file); 251 | // std::fwrite(&flow_v.data_[(z * flow_v.height_ + y) * flow_v.width_ + x], sizeof(float), 1, file); 252 | // std::fwrite(&flow_w.data_[(z * flow_w.height_ + y) * flow_w.width_ + x], sizeof(float), 1, file); 253 | // } 254 | // } 255 | // } 256 | // 257 | // std::fclose(file); 258 | // return true; 259 | // } else { 260 | // std::printf("Cannot open file '%s'.\n", filename); 261 | // return false; 262 | // } 263 | //} 264 | 265 | void Data2D::Invalidate() 266 | { 267 | width_ = 0; 268 | height_ = 0; 269 | FreeMemory(data_); 270 | } 271 | 272 | Data2D::~Data2D() 273 | { 274 | FreeMemory(data_); 275 | } -------------------------------------------------------------------------------- /src/data_types/data2d.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #ifndef GPUFLOW3D_DATA_TYPES_DATA2D_H_ 23 | #define GPUFLOW3D_DATA_TYPES_DATA2D_H_ 24 | 25 | #include 26 | 27 | class Data2D 28 | { 29 | private: 30 | float *data_ = nullptr; 31 | size_t width_; 32 | size_t height_; 33 | 34 | inline size_t Index(size_t x, size_t y) { 35 | return y * width_ + x; 36 | } 37 | 38 | void Invalidate(); 39 | 40 | void* AllocateMemory(size_t width, size_t height); 41 | void FreeMemory(void* pointer); 42 | 43 | public: 44 | Data2D(); 45 | Data2D(size_t width, size_t height); 46 | 47 | inline size_t Width() { return width_; }; 48 | inline size_t Height() { return height_; }; 49 | inline float* DataPtr() { return data_; }; 50 | inline float& Data(size_t x, size_t y) { return data_[Index(x, y)]; }; 51 | 52 | void Swap(Data2D& data2d); 53 | 54 | void ZeroData(); 55 | 56 | bool ReadRAWFromFileU8(const char* filename, size_t width, size_t height); 57 | bool ReadRAWFromFileF32(const char* filename, size_t width, size_t height); 58 | 59 | bool WriteRAWToFileU8(const char* filename); 60 | bool WriteRAWToFileF32(const char* filename); 61 | 62 | //static bool WriteFlowToFileVTK(const char* filename, const Data3D& flow_u, const Data3D& flow_v, const Data3D& flow_w); 63 | 64 | ~Data2D(); 65 | }; 66 | 67 | 68 | 69 | #endif // !GPUFLOW3D_DATA_TYPES_DATA2D_H_ -------------------------------------------------------------------------------- /src/data_types/data3d.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include "src/data_types/data3d.h" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include 30 | 31 | #include "src/utils/cuda_utils.h" 32 | 33 | 34 | 35 | //#define ALLOCATE_PINNED_MEMORY 36 | 37 | Data3D::Data3D() 38 | : width_(0), height_(0), depth_(0) 39 | {} 40 | 41 | Data3D::Data3D(size_t width, size_t height, size_t depth) 42 | : width_(width), height_(height), depth_(depth) 43 | { 44 | data_ = static_cast(AllocateMemory(width_, height_, depth_)); 45 | } 46 | 47 | void Data3D::Swap(Data3D& data3d) 48 | { 49 | if (this->width_ == data3d.width_ && 50 | this->height_ == data3d.height_ && 51 | this->depth_ == data3d.depth_) { 52 | std::swap(this->data_, data3d.data_); 53 | } else { 54 | std::printf("Error. Cannot swap two Data3D objects (wrong dimensions).\n"); 55 | } 56 | } 57 | 58 | void* Data3D::AllocateMemory(size_t width, size_t height, size_t depth) 59 | { 60 | void* ptr = nullptr; 61 | 62 | #ifdef ALLOCATE_PINNED_MEMORY 63 | if (CheckCudaError(cuMemAllocHost(&ptr, width * height * depth * sizeof(float)))) { 64 | Invalidate(); 65 | return nullptr; 66 | } 67 | #else 68 | try { 69 | ptr = new float[width * height * depth]; 70 | } 71 | catch (std::bad_alloc& ba) { 72 | Invalidate(); 73 | std::printf("Error. Cannot allocate memory on the host. (%s)\n", ba.what()); 74 | } 75 | #endif 76 | 77 | return ptr; 78 | } 79 | 80 | void Data3D::FreeMemory(void* pointer) 81 | { 82 | #ifdef ALLOCATE_PINNED_MEMORY 83 | if (pointer) 84 | CheckCudaError(cuMemFreeHost(pointer)); 85 | #else 86 | if (pointer) { 87 | delete[] pointer; 88 | pointer = nullptr; 89 | } 90 | #endif 91 | } 92 | 93 | void Data3D::ZeroData() 94 | { 95 | if (data_) { 96 | std::memset(data_, 0, width_ * height_ * depth_ * sizeof(float)); 97 | } 98 | } 99 | 100 | bool Data3D::ReadRAWFromFileU8(const char* filename, size_t width, size_t height, size_t depth) 101 | { 102 | bool loaded = false; 103 | std::FILE *file = std::fopen(filename, "rb"); 104 | if (file) { 105 | this->Invalidate(); 106 | 107 | width_ = width; 108 | height_ = height; 109 | depth_ = depth; 110 | data_ = static_cast(AllocateMemory(width_, height_, depth_)); 111 | 112 | if (data_) { 113 | unsigned char *dataU8 = new unsigned char[width_]; 114 | 115 | for (size_t z = 0; z < depth_; ++z) { 116 | for (size_t y = 0; y < height_; ++y) { 117 | size_t readed = std::fread(dataU8, sizeof(unsigned char), width_, file); 118 | 119 | if (readed == width_) { 120 | for (size_t x = 0; x < width_; ++x) { 121 | data_[Index(x, y, z)] = static_cast(dataU8[x]); 122 | } 123 | } else { 124 | goto loop_break; 125 | } 126 | } 127 | } 128 | 129 | if (std::fread(dataU8, sizeof(unsigned char), width_, file) == 0) { 130 | loaded = true; 131 | } else { 132 | 133 | loop_break: 134 | std::printf("Error reading RAW data from file '%s': wrong dimensions.", filename); 135 | this->Invalidate(); 136 | } 137 | 138 | delete[] dataU8; 139 | std::fclose(file); 140 | } 141 | } else { 142 | std::printf("Cannot open file '%s'.\n", filename); 143 | } 144 | return loaded; 145 | } 146 | 147 | bool Data3D::ReadRAWFromFileF32(const char* filename, size_t width, size_t height, size_t depth) 148 | { 149 | bool loaded = false; 150 | std::FILE *file = std::fopen(filename, "rb"); 151 | if (file) { 152 | this->Invalidate(); 153 | 154 | width_ = width; 155 | height_ = height; 156 | depth_ = depth; 157 | data_ = static_cast(AllocateMemory(width_, height_, depth_)); 158 | 159 | if (data_) { 160 | for (size_t z = 0; z < depth_; ++z) { 161 | for (size_t y = 0; y < height_; ++y) { 162 | size_t readed = std::fread(&data_[Index(0, y, z)], sizeof(float), width_, file); 163 | if (readed != width_) { 164 | goto loop_break; 165 | } 166 | } 167 | } 168 | 169 | if (std::fread(data_, sizeof(unsigned char), width_, file) == 0) { 170 | loaded = true; 171 | } else { 172 | loop_break: 173 | std::printf("Error reading RAW data from file '%s': wrong dimensions.", filename); 174 | this->Invalidate(); 175 | } 176 | 177 | std::fclose(file); 178 | } 179 | } else { 180 | std::printf("Cannot open file '%s'.\n", filename); 181 | } 182 | return loaded; 183 | } 184 | 185 | bool Data3D::WriteRAWToFileU8(const char* filename) 186 | { 187 | std::FILE *file = std::fopen(filename, "wb"); 188 | if (file) { 189 | unsigned char *dataU8 = new unsigned char[width_]; 190 | 191 | for (size_t z = 0; z < depth_; ++z) { 192 | for (size_t y = 0; y < height_; ++y) { 193 | for (size_t x = 0; x < width_; ++x) { 194 | float dataF32 = std::min(255.f, std::max(0.f, data_[Index(x, y, z)])); 195 | dataU8[x] = static_cast(dataF32); 196 | } 197 | size_t written = std::fwrite(dataU8, sizeof(unsigned char), width_, file); 198 | if (written != width_) { 199 | std::printf("Error writing RAW data to file '%s'.", filename); 200 | delete[] dataU8; 201 | std::fclose(file); 202 | return false; 203 | } 204 | } 205 | } 206 | delete[] dataU8; 207 | std::fclose(file); 208 | return true; 209 | } else { 210 | std::printf("Cannot open file '%s'.\n", filename); 211 | return false; 212 | } 213 | } 214 | 215 | bool Data3D::WriteRAWToFileF32(const char* filename) 216 | { 217 | std::FILE *file = std::fopen(filename, "wb"); 218 | if (file) { 219 | unsigned char *dataU8 = new unsigned char[width_]; 220 | 221 | for (size_t z = 0; z < depth_; ++z) { 222 | for (size_t y = 0; y < height_; ++y) { 223 | size_t written = std::fwrite(&data_[Index(0, y, z)], sizeof(float), width_, file); 224 | if (written != width_) { 225 | std::printf("Error writing RAW data to file '%s'.", filename); 226 | std::fclose(file); 227 | return false; 228 | } 229 | } 230 | } 231 | std::fclose(file); 232 | return true; 233 | } else { 234 | std::printf("Cannot open file '%s'.\n", filename); 235 | return false; 236 | } 237 | } 238 | 239 | bool Data3D::WriteFlowToFileVTK(const char* filename, const Data3D& flow_u, const Data3D& flow_v, const Data3D& flow_w) 240 | { 241 | std::FILE *file = std::fopen(filename, "wb"); 242 | if (file) { 243 | std::fprintf(file, "# vtk DataFile Version 2.0\n"); 244 | std::fprintf(file, "3D Vector field computed by GpuFlow3D\n"); 245 | std::fprintf(file, "BINARY\n"); 246 | std::fprintf(file, "DATASET STRUCTURED_POINTS\n"); 247 | std::fprintf(file, "DIMENSIONS %d %d %d\n", flow_u.width_, flow_u.height_, flow_u.depth_); 248 | std::fprintf(file, "ORIGIN 0 0 0\n"); 249 | std::fprintf(file, "SPACING 1 1 1\n"); 250 | std::fprintf(file, "POINT_DATA %d\n", flow_u.width_ * flow_u.height_ * flow_u.depth_); 251 | std::fprintf(file, "VECTORS vectors float\n"); 252 | 253 | for (size_t z = 0; z < flow_u.depth_; ++z) { 254 | for (size_t y = 0; y < flow_u.height_; ++y) { 255 | for (size_t x = 0; x < flow_u.width_; ++x) { 256 | std::fwrite(&flow_u.data_[(z * flow_u.height_ + y) * flow_u.width_ + x], sizeof(float), 1, file); 257 | std::fwrite(&flow_v.data_[(z * flow_v.height_ + y) * flow_v.width_ + x], sizeof(float), 1, file); 258 | std::fwrite(&flow_w.data_[(z * flow_w.height_ + y) * flow_w.width_ + x], sizeof(float), 1, file); 259 | } 260 | } 261 | } 262 | 263 | std::fclose(file); 264 | return true; 265 | } else { 266 | std::printf("Cannot open file '%s'.\n", filename); 267 | return false; 268 | } 269 | } 270 | 271 | void Data3D::Invalidate() 272 | { 273 | width_ = 0; 274 | height_ = 0; 275 | depth_ = 0; 276 | FreeMemory(data_); 277 | } 278 | 279 | Data3D::~Data3D() 280 | { 281 | FreeMemory(data_); 282 | } -------------------------------------------------------------------------------- /src/data_types/data3d.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #ifndef GPUFLOW3D_DATA_TYPES_DATA3D_H_ 23 | #define GPUFLOW3D_DATA_TYPES_DATA3D_H_ 24 | 25 | #include 26 | 27 | class Data3D 28 | { 29 | private: 30 | float *data_ = nullptr; 31 | size_t width_; 32 | size_t height_; 33 | size_t depth_; 34 | 35 | inline size_t Index(size_t x, size_t y, size_t z) { 36 | return (z * height_ + y) * width_ + x; 37 | } 38 | 39 | void Invalidate(); 40 | 41 | void* AllocateMemory(size_t width, size_t height, size_t depth); 42 | void FreeMemory(void* pointer); 43 | 44 | public: 45 | Data3D(); 46 | Data3D(size_t width, size_t height, size_t depth); 47 | 48 | inline size_t Width() { return width_; }; 49 | inline size_t Height() { return height_; }; 50 | inline size_t Depth() { return depth_; }; 51 | inline float* DataPtr() { return data_; }; 52 | inline float& Data(size_t x, size_t y, size_t z) { return data_[Index(x, y, z)]; }; 53 | 54 | void Swap(Data3D& data3d); 55 | 56 | void ZeroData(); 57 | 58 | bool ReadRAWFromFileU8(const char* filename, size_t width, size_t height, size_t depth); 59 | bool ReadRAWFromFileF32(const char* filename, size_t width, size_t height, size_t depth); 60 | 61 | bool WriteRAWToFileU8(const char* filename); 62 | bool WriteRAWToFileF32(const char* filename); 63 | 64 | static bool WriteFlowToFileVTK(const char* filename, const Data3D& flow_u, const Data3D& flow_v, const Data3D& flow_w); 65 | 66 | ~Data3D(); 67 | }; 68 | 69 | #endif // !GPUFLOW3D_DATA_TYPES_DATA3D_H_ -------------------------------------------------------------------------------- /src/data_types/data_structs.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #ifndef GPUFLOW3D_DATA_TYPES_DATA_STRUCTS_H_ 23 | #define GPUFLOW3D_DATA_TYPES_DATA_STRUCTS_H_ 24 | 25 | enum class Methods { OpticalFlow, Correlation }; 26 | 27 | enum class DataConstancy { Grey, Gradient, LogDerivatives }; 28 | 29 | 30 | 31 | struct DataSize3 { 32 | size_t width; 33 | size_t height; 34 | size_t pitch; 35 | }; 36 | 37 | 38 | struct Stat3 { 39 | float min; 40 | float max; 41 | float avg; 42 | }; 43 | 44 | #endif // !GPUFLOW3D_DATA_TYPES_DATA_STRUCTS_H_ 45 | -------------------------------------------------------------------------------- /src/data_types/operation_parameters.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include "src/data_types/operation_parameters.h" 23 | 24 | OperationParameters::OperationParameters() 25 | { 26 | } 27 | 28 | bool OperationParameters::PushValuePtr(std::string key, void* value_ptr) 29 | { 30 | if (!map_.count(key)) { 31 | map_.insert({ key, value_ptr }); 32 | return true; 33 | } 34 | return false; 35 | } 36 | 37 | void* OperationParameters::GetValuePtr(std::string key) const 38 | { 39 | if (map_.count(key)) { 40 | return map_.at(key); 41 | } 42 | return nullptr; 43 | } 44 | 45 | void OperationParameters::Clear() 46 | { 47 | map_.clear(); 48 | } -------------------------------------------------------------------------------- /src/data_types/operation_parameters.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #ifndef GPUFLOW3D_DATA_TYPES_OPERATION_PARAMETERS_H_ 23 | #define GPUFLOW3D_DATA_TYPES_OPERATION_PARAMETERS_H_ 24 | 25 | #include 26 | #include 27 | 28 | class OperationParameters { 29 | private: 30 | std::unordered_map map_; 31 | 32 | public: 33 | OperationParameters(); 34 | 35 | bool PushValuePtr(std::string key, void* value_ptr); 36 | void* GetValuePtr(std::string key) const; 37 | void Clear(); 38 | }; 39 | 40 | #endif // !GPUFLOW3D_DATA_TYPES_OPERATION_PARAMETERS_H_ 41 | -------------------------------------------------------------------------------- /src/kernels/add_2d.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include 23 | #include 24 | 25 | #include "src/data_types/data_structs.h" 26 | 27 | //#define IND(X, Y, Z) (((Z) * container_size.height + (Y)) * (container_size.pitch / sizeof(float)) + (X)) 28 | #define IND(X, Y) ((Y) * (container_size.pitch / sizeof(float)) + (X)) 29 | 30 | 31 | __constant__ DataSize3 container_size; 32 | 33 | extern "C" __global__ void add_2d( 34 | float* operand_0, 35 | const float* operand_1, 36 | size_t width, 37 | size_t height) 38 | { 39 | dim3 global_id(blockDim.x * blockIdx.x + threadIdx.x, 40 | blockDim.y * blockIdx.y + threadIdx.y); 41 | 42 | if (global_id.x < width && global_id.y < height) { 43 | operand_0[IND(global_id.x, global_id.y)] += 44 | operand_1[IND(global_id.x, global_id.y)]; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/kernels/convolution_2d.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | /* 23 | * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. 24 | * 25 | * Please refer to the NVIDIA end user license agreement (EULA) associated 26 | * with this source code for terms and conditions that govern your use of 27 | * this software. Any use, reproduction, disclosure, or distribution of 28 | * this software and related documentation outside the terms of the EULA 29 | * is strictly prohibited. 30 | * 31 | */ 32 | 33 | #include 34 | //#include 35 | #include 36 | #include 37 | #include 38 | 39 | #define __CUDACC__ 40 | 41 | #include 42 | #include 43 | 44 | #include "src/data_types/data_structs.h" 45 | 46 | using namespace std; 47 | 48 | 49 | #define MAX_KERNEL_LENGTH 51 50 | 51 | __constant__ DataSize3 container_size; 52 | 53 | 54 | 55 | //////////////////////////////////////////////////////////////////////////////// 56 | // Convolution kernel storage 57 | //////////////////////////////////////////////////////////////////////////////// 58 | __constant__ float c_Kernel[MAX_KERNEL_LENGTH]; 59 | 60 | //extern "C" void setConvolutionKernel(float *h_Kernel, unsigned int kernel_length) 61 | //{ 62 | // cudaMemcpyToSymbol(c_Kernel, h_Kernel, kernel_length * sizeof(float)); 63 | //} 64 | 65 | 66 | //////////////////////////////////////////////////////////////////////////////// 67 | // Row convolution filter 68 | //////////////////////////////////////////////////////////////////////////////// 69 | #define ROWS_BLOCKDIM_X 16 70 | #define ROWS_BLOCKDIM_Y 4 71 | #define ROWS_RESULT_STEPS 4 72 | #define ROWS_HALO_STEPS 1 73 | 74 | extern "C" __global__ void convolutionRowsKernel( 75 | float *d_Dst, 76 | const float *d_Src, 77 | int imageW, 78 | int imageH, 79 | int pitch, 80 | int kernel_radius 81 | ) 82 | { 83 | 84 | /* dim3 global_id(blockDim.x * blockIdx.x * ROWS_RESULT_STEPS + threadIdx.x, 85 | blockDim.y * blockIdx.y + threadIdx.y); 86 | 87 | if (global_id.x > imageW) 88 | return;*/ 89 | 90 | // Handle to thread block group 91 | __shared__ float s_Data[ROWS_BLOCKDIM_Y][(ROWS_RESULT_STEPS + 2 * ROWS_HALO_STEPS) * ROWS_BLOCKDIM_X]; 92 | 93 | //Offset to the left halo edge 94 | const int baseX = (blockIdx.x * ROWS_RESULT_STEPS - ROWS_HALO_STEPS) * ROWS_BLOCKDIM_X + threadIdx.x; 95 | const int baseY = blockIdx.y * ROWS_BLOCKDIM_Y + threadIdx.y; 96 | 97 | d_Src += baseY * pitch + baseX; 98 | d_Dst += baseY * pitch + baseX; 99 | 100 | //Load main data 101 | #pragma unroll 102 | 103 | for (int i = ROWS_HALO_STEPS; i < ROWS_HALO_STEPS + ROWS_RESULT_STEPS; i++) 104 | { 105 | // Original NVIDIA version 106 | //s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = d_Src[i * ROWS_BLOCKDIM_X]; 107 | 108 | // Modified version to allow for arbitrary image width 109 | size_t global_x = blockIdx.x * ROWS_RESULT_STEPS * ROWS_BLOCKDIM_X + ROWS_BLOCKDIM_X*(i-ROWS_HALO_STEPS) + threadIdx.x; 110 | s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (global_x < imageW) ? d_Src[i * ROWS_BLOCKDIM_X] : 0; 111 | } 112 | 113 | //Load left halo 114 | #pragma unroll 115 | 116 | for (int i = 0; i < ROWS_HALO_STEPS; i++) 117 | { 118 | s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (baseX >= -i * ROWS_BLOCKDIM_X) ? d_Src[i * ROWS_BLOCKDIM_X] : 0; 119 | } 120 | 121 | //Load right halo 122 | #pragma unroll 123 | 124 | for (int i = ROWS_HALO_STEPS + ROWS_RESULT_STEPS; i < ROWS_HALO_STEPS + ROWS_RESULT_STEPS + ROWS_HALO_STEPS; i++) 125 | { 126 | s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (imageW - baseX > i * ROWS_BLOCKDIM_X) ? d_Src[i * ROWS_BLOCKDIM_X] : 0; 127 | 128 | 129 | // Testing 130 | /* size_t global_x = blockIdx.x * ROWS_RESULT_STEPS * ROWS_BLOCKDIM_X + ROWS_BLOCKDIM_X*(i-5) + threadIdx.x; 131 | 132 | s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (global_x < imageW) ? d_Src[i * ROWS_BLOCKDIM_X] : 0; 133 | 134 | if (global_x > imageW) 135 | std::printf("Bx:%u tx:%u iter:%d global.x: %lu data:%f \n", blockIdx.x, threadIdx.x, i, static_cast(global_x), s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X]); 136 | */ 137 | } 138 | 139 | //Compute and store results 140 | 141 | 142 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // 143 | // -------------------------------------------------------- // 144 | __syncthreads(); 145 | // -------------------------------------------------------- // 146 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // 147 | 148 | #pragma unroll 149 | 150 | for (int i = ROWS_HALO_STEPS; i < ROWS_HALO_STEPS + ROWS_RESULT_STEPS; i++) 151 | { 152 | size_t global_x = blockIdx.x * ROWS_RESULT_STEPS * ROWS_BLOCKDIM_X + ROWS_BLOCKDIM_X*(i-ROWS_HALO_STEPS) + threadIdx.x; 153 | 154 | if (global_x >= imageW) 155 | return; 156 | 157 | float sum = 0; 158 | 159 | #pragma unroll 160 | 161 | for (int j = -kernel_radius; j <= kernel_radius; j++) 162 | { 163 | sum += c_Kernel[kernel_radius - j] * s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X + j]; 164 | } 165 | 166 | d_Dst[i * ROWS_BLOCKDIM_X] = sum; 167 | } 168 | } 169 | 170 | 171 | 172 | 173 | //////////////////////////////////////////////////////////////////////////////// 174 | // Column convolution filter 175 | //////////////////////////////////////////////////////////////////////////////// 176 | #define COLUMNS_BLOCKDIM_X 4 177 | #define COLUMNS_BLOCKDIM_Y 16 178 | #define COLUMNS_RESULT_STEPS 4 179 | #define COLUMNS_HALO_STEPS 1 180 | 181 | extern "C" __global__ void convolutionColumnsKernel( 182 | float *d_Dst, 183 | const float *d_Src, 184 | int imageW, 185 | int imageH, 186 | int pitch, 187 | int kernel_radius 188 | ) 189 | { 190 | __shared__ float s_Data[COLUMNS_BLOCKDIM_X][(COLUMNS_RESULT_STEPS + 2 * COLUMNS_HALO_STEPS) * COLUMNS_BLOCKDIM_Y + 0]; 191 | 192 | //Offset to the upper halo edge 193 | const int baseX = blockIdx.x * COLUMNS_BLOCKDIM_X + threadIdx.x; 194 | const int baseY = (blockIdx.y * COLUMNS_RESULT_STEPS - COLUMNS_HALO_STEPS) * COLUMNS_BLOCKDIM_Y + threadIdx.y; 195 | 196 | d_Src += baseY * pitch + baseX; 197 | d_Dst += baseY * pitch + baseX; 198 | 199 | //Main data 200 | #pragma unroll 201 | 202 | for (int i = COLUMNS_HALO_STEPS; i < COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS; i++) 203 | { 204 | // Original NVIDIA version 205 | //s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y] = d_Src[i * COLUMNS_BLOCKDIM_Y * pitch]; 206 | 207 | // Modified version to allow for arbitrary image height 208 | size_t global_y = blockIdx.y * COLUMNS_RESULT_STEPS * COLUMNS_BLOCKDIM_Y + COLUMNS_BLOCKDIM_Y*(i-COLUMNS_HALO_STEPS) + threadIdx.y; 209 | s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y]= (global_y < imageH) ? d_Src[i * COLUMNS_BLOCKDIM_Y * pitch] : 0; 210 | 211 | } 212 | 213 | //Upper halo 214 | #pragma unroll 215 | 216 | for (int i = 0; i < COLUMNS_HALO_STEPS; i++) 217 | { 218 | s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y] = (baseY >= -i * COLUMNS_BLOCKDIM_Y) ? d_Src[i * COLUMNS_BLOCKDIM_Y * pitch] : 0; 219 | } 220 | 221 | //Lower halo 222 | #pragma unroll 223 | 224 | for (int i = COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS; i < COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS + COLUMNS_HALO_STEPS; i++) 225 | { 226 | s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y]= (imageH - baseY > i * COLUMNS_BLOCKDIM_Y) ? d_Src[i * COLUMNS_BLOCKDIM_Y * pitch] : 0; 227 | 228 | 229 | } 230 | 231 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // 232 | // -------------------------------------------------------- // 233 | __syncthreads(); 234 | // -------------------------------------------------------- // 235 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // 236 | 237 | #pragma unroll 238 | 239 | for (int i = COLUMNS_HALO_STEPS; i < COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS; i++) 240 | { 241 | 242 | size_t global_y = blockIdx.y * COLUMNS_RESULT_STEPS * COLUMNS_BLOCKDIM_Y + COLUMNS_BLOCKDIM_Y*(i-COLUMNS_HALO_STEPS) + threadIdx.y; 243 | 244 | //std::printf("By:%u ty:%u iter:%d global.y: %lu\n", blockIdx.y, threadIdx.y, i, static_cast(global_y)); 245 | 246 | if (global_y >= imageH) 247 | return; 248 | 249 | float sum = 0; 250 | #pragma unroll 251 | 252 | for (int j = -kernel_radius; j <= kernel_radius; j++) 253 | { 254 | sum += c_Kernel[kernel_radius - j] * s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y + j]; 255 | } 256 | 257 | d_Dst[i * COLUMNS_BLOCKDIM_Y * pitch] = sum; 258 | 259 | //std::printf("out: %d \n", i * COLUMNS_BLOCKDIM_Y * pitch); 260 | } 261 | } 262 | 263 | 264 | -------------------------------------------------------------------------------- /src/kernels/median_2d.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include 23 | 24 | #define __CUDACC__ 25 | 26 | #include 27 | #include 28 | 29 | #include "src/data_types/data_structs.h" 30 | 31 | 32 | #define IND(X, Y) ((Y) * (container_size.pitch / sizeof(float)) + (X)) 33 | #define SIND(X, Y) ((((Y) + radius_2)) * shared_block_size.x + ((X) + radius_2)) 34 | 35 | __constant__ DataSize3 container_size; 36 | 37 | 38 | extern __shared__ float shared[]; 39 | 40 | __device__ void bubbleSort(float* buffer, size_t length) 41 | { 42 | for (int i = 0; i < length - 1; i++) { 43 | for (int k = 0; k < length - i - 1; k++) { 44 | if (buffer[k] > buffer[k + 1]) { 45 | float a = buffer[k]; 46 | buffer[k] = buffer[k + 1]; 47 | buffer[k + 1] = a; 48 | } 49 | } 50 | } 51 | } 52 | 53 | __device__ void insertionSort(float* window, size_t size) 54 | { 55 | int i, j; 56 | float temp; 57 | for (i = 0; i < size; i++) { 58 | temp = window[i]; 59 | for (j = i-1; j >= 0 && temp < window[j]; j--) { 60 | window[j+1] = window[j]; 61 | } 62 | window[j+1] = temp; 63 | } 64 | } 65 | 66 | // \test Experimental. Shows worse performance. TODO: Clarify 67 | __device__ void partialSelection(float* window, size_t size) 68 | { 69 | for (unsigned int j=0; j<(size*size+1)/2; ++j) 70 | { 71 | // Find position of minimum element 72 | int min_index=j; 73 | for (unsigned int l=j+1; l= 0 ? offset : -offset; 117 | shared[SIND(-radius_2 + threadIdx.x, threadIdx.y)] = input[IND(global_x_l, global_y)]; 118 | } 119 | 120 | /* Right slice */ 121 | if (threadIdx.x > blockDim.x - 1 - radius_2) { 122 | int index = blockDim.x - threadIdx.x; 123 | int offset = blockDim.x *(blockIdx.x + 1) + radius_2 - index; 124 | size_t global_x_r = offset < width ? offset : 2 * width - offset - 2; 125 | shared[SIND(radius_2 + threadIdx.x, threadIdx.y)] = input[IND(global_x_r, global_y)]; 126 | } 127 | 128 | /* Upper slice */ 129 | if (threadIdx.y < radius_2) { 130 | int offset = blockDim.y * blockIdx.y - radius_2 + threadIdx.y; 131 | size_t global_y_u = offset >= 0 ? offset : -offset; 132 | shared[SIND(threadIdx.x, -radius_2 + threadIdx.y)] = input[IND(global_x, global_y_u)]; 133 | } 134 | 135 | /* Bottom slice */ 136 | if (threadIdx.y > blockDim.y - 1 - radius_2) { 137 | int index = blockDim.y - threadIdx.y; 138 | int offset = blockDim.y *(blockIdx.y + 1) + radius_2 - index; 139 | size_t global_y_b = offset < height ? offset : 2 * height - offset - 2; 140 | shared[SIND(threadIdx.x, radius_2 + threadIdx.y)] = input[IND(global_x, global_y_b)]; 141 | } 142 | 143 | 144 | /* 12 edges */ 145 | { 146 | //int global_x_e; 147 | //int global_y_e; 148 | //int global_z_e; 149 | 150 | ///* 4 along X */ 151 | //if (threadIdx.y < radius_2 && threadIdx.z < radius_2) { 152 | // /* Front upper */ 153 | // global_y_e = blockDim.y * blockIdx.y - radius_2 + threadIdx.y; 154 | // global_y_e = global_y_e > 0 ? global_y_e : -global_y_e; 155 | // shared[SIND(threadIdx.x, threadIdx.y - radius_2)] = 156 | // input[IND(global_x, global_y_e)]; 157 | 158 | // /* Rear upper */ 159 | // global_z_e = blockDim.z *(blockIdx.z + 1) + threadIdx.z; 160 | // global_z_e = global_z_e < depth ? global_z_e : 2 * depth - global_z_e - 2; 161 | // shared[SIND(threadIdx.x, threadIdx.y - radius_2, blockDim.z + threadIdx.z)] = 162 | // input[IND(global_x, global_y_e, global_z_e)]; 163 | 164 | // /* Rear bottom */ 165 | // global_y_e = blockDim.y *(blockIdx.y + 1) + threadIdx.y; 166 | // global_y_e = global_y_e < height ? global_y_e : 2 * height - global_y_e - 2; 167 | // shared[SIND(threadIdx.x, blockDim.y + threadIdx.y, blockDim.z + threadIdx.z)] = 168 | // input[IND(global_x, global_y_e, global_z_e)]; 169 | 170 | // /* Front bottom */ 171 | // global_z_e = blockDim.z * blockIdx.z - radius_2 + threadIdx.z; 172 | // global_z_e = global_z_e > 0 ? global_z_e : -global_z_e; 173 | // shared[SIND(threadIdx.x, blockDim.y + threadIdx.y, threadIdx.z - radius_2)] = 174 | // input[IND(global_x, global_y_e, global_z_e)]; 175 | //} 176 | 177 | ///* 4 along Y */ 178 | //if (threadIdx.x < radius_2 && threadIdx.z < radius_2) { 179 | // /* Front left */ 180 | // global_x_e = blockDim.x * blockIdx.x - radius_2 + threadIdx.x; 181 | // global_x_e = global_x_e > 0 ? global_x_e : -global_x_e; 182 | // global_z_e = blockDim.z * blockIdx.z - radius_2 + threadIdx.z; 183 | // global_z_e = global_z_e > 0 ? global_z_e : -global_z_e; 184 | // shared[SIND(threadIdx.x - radius_2, threadIdx.y, threadIdx.z - radius_2)] = 185 | // input[IND(global_x_e, global_y, global_z_e)]; 186 | 187 | // /* Rear left */ 188 | // global_z_e = blockDim.z *(blockIdx.z + 1) + threadIdx.z; 189 | // global_z_e = global_z_e < depth ? global_z_e : 2 * depth - global_z_e - 2; 190 | // shared[SIND(threadIdx.x - radius_2, threadIdx.y, blockDim.z + threadIdx.z)] = 191 | // input[IND(global_x_e, global_y, global_z_e)]; 192 | 193 | // /* Rear right */ 194 | // global_x_e = blockDim.x *(blockIdx.x + 1) + threadIdx.x; 195 | // global_x_e = global_x_e < width ? global_x_e : 2 * width - global_x_e - 2; 196 | // shared[SIND(blockDim.x + threadIdx.x, threadIdx.y, blockDim.z + threadIdx.z)] = 197 | // input[IND(global_x_e, global_y, global_z_e)]; 198 | 199 | // /* Front right */ 200 | // global_z_e = blockDim.z * blockIdx.z - radius_2 + threadIdx.z; 201 | // global_z_e = global_z_e > 0 ? global_z_e : -global_z_e; 202 | // shared[SIND(blockDim.x + threadIdx.x, threadIdx.y, threadIdx.z - radius_2)] = 203 | // input[IND(global_x_e, global_y, global_z_e)]; 204 | //} 205 | 206 | ///* 4 along Z */ 207 | //if (threadIdx.x < radius_2 && threadIdx.y < radius_2) { 208 | // /* Upper left */ 209 | // global_x_e = blockDim.x * blockIdx.x - radius_2 + threadIdx.x; 210 | // global_x_e = global_x_e > 0 ? global_x_e : -global_x_e; 211 | // global_y_e = blockDim.y * blockIdx.y - radius_2 + threadIdx.y; 212 | // global_y_e = global_y_e > 0 ? global_y_e : -global_y_e; 213 | // shared[SIND(threadIdx.x - radius_2, threadIdx.y - radius_2, threadIdx.z)] = 214 | // input[IND(global_x_e, global_y_e, global_z)]; 215 | 216 | // /* Upper riight */ 217 | // global_x_e = blockDim.x *(blockIdx.x + 1) + threadIdx.x; 218 | // global_x_e = global_x_e < width ? global_x_e : 2 * width - global_x_e - 2; 219 | // shared[SIND(blockDim.x + threadIdx.x, threadIdx.y - radius_2, threadIdx.z)] = 220 | // input[IND(global_x_e, global_y_e, global_z)]; 221 | 222 | // /* Bottom right */ 223 | // global_y_e = blockDim.y *(blockIdx.y + 1) + threadIdx.y; 224 | // global_y_e = global_y_e < height ? global_y_e : 2 * height - global_y_e - 2; 225 | // shared[SIND(blockDim.x + threadIdx.x, blockDim.y + threadIdx.y, threadIdx.z)] = 226 | // input[IND(global_x_e, global_y_e, global_z)]; 227 | 228 | // /* Bottom left */ 229 | // global_x_e = blockDim.x * blockIdx.x - radius_2 + threadIdx.x; 230 | // global_x_e = global_x_e > 0 ? global_x_e : -global_x_e; 231 | // shared[SIND(threadIdx.x - radius_2, blockDim.y + threadIdx.y, threadIdx.z)] = 232 | // input[IND(global_x_e, global_y_e, global_z)]; 233 | //} 234 | } 235 | 236 | /* 4 corners */ 237 | { 238 | int global_x_c; 239 | int global_y_c; 240 | 241 | if (threadIdx.x < radius_2 && threadIdx.y < radius_2) { 242 | 243 | global_x_c = blockDim.x * blockIdx.x - radius_2 + threadIdx.x; 244 | global_x_c = global_x_c > 0 ? global_x_c : -global_x_c; 245 | 246 | global_y_c = blockDim.y * blockIdx.y - radius_2 + threadIdx.y; 247 | global_y_c = global_y_c > 0 ? global_y_c : -global_y_c; 248 | 249 | /* Front upper left */ 250 | shared[SIND(threadIdx.x - radius_2,threadIdx.y - radius_2)] = 251 | input[IND(global_x_c, global_y_c)]; 252 | 253 | /* Front upper right */ 254 | global_x_c = blockDim.x *(blockIdx.x + 1) + threadIdx.x; 255 | global_x_c = global_x_c < width ? global_x_c : 2 * width - global_x_c - 2; 256 | shared[SIND(blockDim.x + threadIdx.x, threadIdx.y - radius_2)] = 257 | input[IND(global_x_c, global_y_c)]; 258 | 259 | /* Front bottom right */ 260 | global_y_c = blockDim.y *(blockIdx.y + 1) + threadIdx.y; 261 | global_y_c = global_y_c < height ? global_y_c : 2 * height - global_y_c - 2; 262 | shared[SIND(blockDim.x + threadIdx.x, blockDim.y + threadIdx.y)] = 263 | input[IND(global_x_c, global_y_c)]; 264 | 265 | /* Front bottom left */ 266 | global_x_c = blockDim.x * blockIdx.x - radius_2 + threadIdx.x; 267 | global_x_c = global_x_c > 0 ? global_x_c : -global_x_c; 268 | shared[SIND(threadIdx.x - radius_2, blockDim.y + threadIdx.y)] = 269 | input[IND(global_x_c, global_y_c)]; 270 | 271 | } 272 | } 273 | 274 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // 275 | // -------------------------------------------------------- // 276 | __syncthreads(); 277 | // -------------------------------------------------------- // 278 | // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // 279 | 280 | if (global_id.x < width && global_id.y < height) { 281 | float buffer[49]; /* Max supported radius is 7, we have to store 7*7 values. */ 282 | for (size_t iy = 0; iy < radius; ++iy) { 283 | for (size_t ix = 0; ix < radius; ++ix) { 284 | size_t lx = threadIdx.x - ix + radius_2; 285 | size_t ly = threadIdx.y - iy + radius_2; 286 | buffer[iy * radius + ix] = shared[SIND(lx, ly)]; 287 | } 288 | } 289 | 290 | 291 | size_t length = radius * radius; 292 | 293 | //bubbleSort(buffer, length); 294 | insertionSort(buffer, length); 295 | //partialSelection(buffer, radius); 296 | 297 | output[IND(global_id.x, global_id.y)] = buffer[length / 2]; 298 | } 299 | } -------------------------------------------------------------------------------- /src/kernels/registration_2d.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include 23 | 24 | #define __CUDACC__ 25 | #include 26 | 27 | #include "src/data_types/data_structs.h" 28 | 29 | //#define IND(X, Y, Z) (((Z) * container_size.height + (Y)) * (container_size.pitch / sizeof(float)) + (X)) 30 | #define IND(X, Y) ((Y) * (container_size.pitch / sizeof(float)) + (X)) 31 | 32 | __constant__ DataSize3 container_size; 33 | 34 | extern "C" __global__ void registration_2d( 35 | const float* frame_0, 36 | const float* frame_1, 37 | const float* flow_u, 38 | const float* flow_v, 39 | size_t width, 40 | size_t height, 41 | float hx, 42 | float hy, 43 | float* output) 44 | { 45 | dim3 global_id(blockDim.x * blockIdx.x + threadIdx.x, 46 | blockDim.y * blockIdx.y + threadIdx.y); 47 | 48 | if (global_id.x < width && global_id.y < height) { 49 | float x_f = global_id.x + (flow_u[IND(global_id.x, global_id.y)] * (1.f / hx)); 50 | float y_f = global_id.y + (flow_v[IND(global_id.x, global_id.y)] * (1.f / hy)); 51 | 52 | if ((x_f < 0.) || (x_f > width - 1) || (y_f < 0.) || (y_f > height - 1) || isnan(x_f) || isnan(y_f)) { 53 | output[IND(global_id.x, global_id.y)] = frame_0[IND(global_id.x, global_id.y)]; 54 | 55 | } else { 56 | int x = (int) floorf(x_f); 57 | int y = (int) floorf(y_f); 58 | float delta_x = x_f - (float) x; 59 | float delta_y = y_f - (float) y; 60 | 61 | int x_1 = min(int(width -1), x + 1); 62 | int y_1 = min(int(height - 1), y + 1); 63 | 64 | float value = 65 | (1.f - delta_x) * (1.f - delta_y) * frame_1[IND(x , y )] + 66 | ( delta_x) * (1.f - delta_y) * frame_1[IND(x_1, y )] + 67 | (1.f - delta_x) * ( delta_y) * frame_1[IND(x , y_1)] + 68 | ( delta_x) * ( delta_y) * frame_1[IND(x_1, y_1)]; 69 | 70 | 71 | output[IND(global_id.x, global_id.y)] = value; 72 | } 73 | } 74 | } -------------------------------------------------------------------------------- /src/kernels/resample_2d.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include 23 | 24 | #define __CUDACC__ 25 | #include 26 | 27 | #include "src/data_types/data_structs.h" 28 | 29 | //#define IND(X, Y, Z) (((Z) * container_size.height + (Y)) * (container_size.pitch / sizeof(float)) + (X)) 30 | #define IND(X, Y) ((Y) * (container_size.pitch / sizeof(float)) + (X)) 31 | 32 | __constant__ DataSize3 container_size; 33 | 34 | extern "C" __global__ void resample_x( 35 | const float* input, 36 | float* output, 37 | size_t out_width, 38 | size_t out_height, 39 | size_t in_width) 40 | { 41 | dim3 globalID(blockDim.x * blockIdx.x + threadIdx.x, 42 | blockDim.y * blockIdx.y + threadIdx.y); 43 | 44 | if (globalID.x < out_width && globalID.y < out_height) { 45 | float delta = in_width / static_cast(out_width); 46 | float normalization = out_width / static_cast(in_width); 47 | 48 | float left_f = globalID.x * delta; 49 | float right_f = (globalID.x + 1) * delta; 50 | 51 | int left_i = static_cast(floor(left_f)); 52 | int right_i = min(static_cast(in_width), static_cast(ceil(right_f))); 53 | 54 | float value = 0.f; 55 | 56 | for (int j = 0; j < (right_i - left_i); j++) { 57 | float frac = 1.f; 58 | 59 | /* left boundary */ 60 | if (j == 0) { 61 | frac = static_cast(left_i + 1) - left_f; 62 | } 63 | /* right boundary */ 64 | if (j == (right_i - left_i) - 1) { 65 | frac = right_f - static_cast(left_i + j); 66 | } 67 | /* if the left and right boundaries are in the same cell */ 68 | if ((right_i - left_i) == 1) { 69 | frac = delta; 70 | } 71 | value += input[IND(left_i + j, globalID.y)] * frac; 72 | } 73 | output[IND(globalID.x, globalID.y)] = value * normalization; 74 | } 75 | } 76 | 77 | extern "C" __global__ void resample_y( 78 | const float* input, 79 | float* output, 80 | size_t out_width, 81 | size_t out_height, 82 | size_t in_height) 83 | { 84 | dim3 globalID(blockDim.x * blockIdx.x + threadIdx.x, 85 | blockDim.y * blockIdx.y + threadIdx.y); 86 | 87 | if (globalID.x < out_width && globalID.y < out_height) { 88 | float delta = in_height / static_cast(out_height); 89 | float normalization = out_height / static_cast(in_height); 90 | 91 | float left_f = globalID.y * delta; 92 | float right_f = (globalID.y + 1) * delta; 93 | 94 | int left_i = static_cast(floor(left_f)); 95 | int right_i = min(static_cast(in_height), static_cast(ceil(right_f))); 96 | 97 | float value = 0.f; 98 | 99 | for (int j = 0; j < (right_i - left_i); j++) { 100 | float frac = 1.f; 101 | 102 | /* left boundary */ 103 | if (j == 0) { 104 | frac = static_cast(left_i + 1) - left_f; 105 | } 106 | /* right boundary */ 107 | if (j == (right_i - left_i) - 1) { 108 | frac = right_f - static_cast(left_i + j); 109 | } 110 | /* if the left and right boundaries are in the same cell */ 111 | if ((right_i - left_i) == 1) { 112 | frac = delta; 113 | } 114 | value += input[IND(globalID.x, left_i + j)] * frac; 115 | } 116 | output[IND(globalID.x, globalID.y)] = value * normalization; 117 | } 118 | } 119 | 120 | //extern "C" __global__ void resample_z( 121 | // const float* input, 122 | // float* output, 123 | // size_t out_width, 124 | // size_t out_height, 125 | // size_t out_depth, 126 | // size_t in_depth) 127 | //{ 128 | // dim3 globalID(blockDim.x * blockIdx.x + threadIdx.x, 129 | // blockDim.y * blockIdx.y + threadIdx.y, 130 | // blockDim.z * blockIdx.z + threadIdx.z); 131 | // 132 | // if (globalID.x < out_width && globalID.y < out_height && globalID.z < out_depth) { 133 | // float delta = in_depth / static_cast(out_depth); 134 | // float normalization = out_depth / static_cast(in_depth); 135 | // 136 | // float left_f = globalID.z * delta; 137 | // float right_f = (globalID.z + 1) * delta; 138 | // 139 | // int left_i = static_cast(floor(left_f)); 140 | // int right_i = min(in_depth, static_cast(ceil(right_f))); 141 | // 142 | // float value = 0.f; 143 | // 144 | // for (int j = 0; j < (right_i - left_i); j++) { 145 | // float frac = 1.f; 146 | // 147 | // /* left boundary */ 148 | // if (j == 0) { 149 | // frac = static_cast(left_i + 1) - left_f; 150 | // } 151 | // /* right boundary */ 152 | // if (j == (right_i - left_i) - 1) { 153 | // frac = right_f - static_cast(left_i + j); 154 | // } 155 | // /* if the left and right boundaries are in the same cell */ 156 | // if ((right_i - left_i) == 1) { 157 | // frac = delta; 158 | // } 159 | // value += input[IND(globalID.x, globalID.y, left_i + j)] * frac; 160 | // } 161 | // output[IND(globalID.x, globalID.y, globalID.z)] = value * normalization; 162 | // } 163 | //} 164 | 165 | //extern "C" __global__ void resample_x_debug( 166 | // const float* input, 167 | // float* output, 168 | // size_t width, 169 | // size_t height, 170 | // size_t depth, 171 | // size_t resample_width) 172 | //{ 173 | // dim3 globalID(blockDim.x * blockIdx.x + threadIdx.x, 174 | // blockDim.y * blockIdx.y + threadIdx.y, 175 | // blockDim.z * blockIdx.z + threadIdx.z); 176 | // 177 | // if (globalID.y < height && globalID.z < depth) { 178 | // for (size_t x = 0; x < width; x += 4) { 179 | // *((float4*)(&(output[IND(x, globalID.y, globalID.z)]))) = 180 | // *((const float4*)(&(input[IND(x, globalID.y, globalID.z)]))); 181 | // } 182 | // for (size_t x = 0; x < width; x++) { 183 | // output[IND(x, globalID.y, globalID.z)] = input[IND(x, globalID.y, globalID.z)]; 184 | // } 185 | // } 186 | //} -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include 28 | 29 | #include "src/data_types/data2d.h" 30 | #include "src/data_types/data_structs.h" 31 | #include "src/data_types/operation_parameters.h" 32 | #include "src/utils/cuda_utils.h" 33 | #include "src/utils/io_utils.h" 34 | #include "src/utils/settings.h" 35 | 36 | #include "src/optical_flow/optical_flow_2d.h" 37 | 38 | using namespace OpticFlow; 39 | 40 | 41 | const bool key_press = true; 42 | const bool use_visualization = false; 43 | const bool silent_mode = true; 44 | 45 | 46 | int main(int argc, char** argv) 47 | { 48 | 49 | 50 | /* Initialize CUDA */ 51 | CUcontext cu_context; 52 | if (!InitCudaContextWithFirstAvailableDevice(&cu_context)) { 53 | return 1; 54 | } 55 | 56 | std::printf("//----------------------------------------------------------------------//\n"); 57 | std::printf("// 2D Optical flow using NVIDIA CUDA. Version 0.5.0 //\n"); 58 | std::printf("// //\n"); 59 | std::printf("// Karlsruhe Institute of Technology. 2015 - 2018 //\n"); 60 | std::printf("//----------------------------------------------------------------------//\n"); 61 | 62 | 63 | 64 | /* Dataset variables */ 65 | size_t width = 584; //584; 66 | size_t height = 388; ////388; 67 | 68 | 69 | /* Optical flow variables */ 70 | size_t warp_levels_count = 50; 71 | float warp_scale_factor = 0.9f; 72 | size_t outer_iterations_count = 40; 73 | size_t inner_iterations_count = 5; 74 | float equation_alpha = 35.0f; // 3.5f; 75 | float equation_smoothness = 0.001f; 76 | float equation_data = 0.001f; 77 | size_t median_radius = 5; 78 | float gaussian_sigma = 1.5f; 79 | 80 | DataConstancy data_constancy = DataConstancy::Grey; 81 | 82 | string file_name1 = "rub1.raw"; 83 | string file_name2 = "rub2.raw"; 84 | string input_path = "./data/"; 85 | string output_path = "./data/output/"; 86 | string counter = ""; 87 | 88 | 89 | /* Optical flow computation class */ 90 | OpticalFlow2D optical_flow; 91 | 92 | 93 | Data2D frame_0; 94 | Data2D frame_1; 95 | 96 | /* Read settings */ 97 | string settingsPath = "settings.xml"; 98 | 99 | /* 100 | Usage: 101 | 102 | 1. cuda-flow2d. settings.xml in the current directory will be used for settings 103 | 2. cuda-flow2d 104 | 3. cuda-flow2d 105 | */ 106 | 107 | if (argc == 6 || argc == 7 || argc == 9) { 108 | 109 | file_name1 = argv[1]; 110 | file_name2 = argv[2]; 111 | 112 | output_path = string(argv[6]); 113 | 114 | width = atoi(argv[3]); 115 | height = atoi(argv[4]); 116 | 117 | if (argc == 7) 118 | counter = argv[5]; 119 | 120 | if (argc == 9){ 121 | equation_alpha = atof(argv[7]); 122 | gaussian_sigma = atof(argv[8]); 123 | counter = "alpha"+ string(argv[7])+"_sigma"+ string(argv[8])+"_"; 124 | } 125 | } 126 | else if (argc < 3) { 127 | string settingsFile = (argc==1)? settingsPath : string(argv[1]); 128 | 129 | // Create Settings class 130 | cout<<"Reading settings: "<. Otherwise settings.xml in the current directory is used"<. 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include "src/optical_flow/optical_flow_2d.h" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | //#include "../viewflow3d/src/visualization.h" 31 | #include "src/utils/common_utils.h" 32 | #include "src/utils/cuda_utils.h" 33 | #include "src/data_types/data_structs.h" 34 | 35 | using namespace std; 36 | 37 | OpticalFlow2D::OpticalFlow2D() 38 | : OpticalFlowBase2D("Optical Flow 2D Single GPU") 39 | { 40 | cuda_operations_.push_front(&cuop_add_); 41 | cuda_operations_.push_front(&cuop_convolution_); 42 | cuda_operations_.push_front(&cuop_median_); 43 | cuda_operations_.push_front(&cuop_register_); 44 | cuda_operations_.push_front(&cuop_resample_); 45 | cuda_operations_.push_front(&cuop_solve_); 46 | } 47 | 48 | bool OpticalFlow2D::Initialize(const DataSize3& data_size, DataConstancy data_constancy) 49 | { 50 | data_constancy_ = data_constancy; 51 | dev_container_size_ = data_size; 52 | dev_container_size_.pitch = 0; 53 | 54 | initialized_ = InitCudaMemory() && InitCudaOperations(); 55 | return initialized_; 56 | } 57 | 58 | bool OpticalFlow2D::InitCudaOperations() 59 | { 60 | if (dev_container_size_.pitch == 0) { 61 | std::printf("Initialization failed. Device pitch is 0.\n"); 62 | return false; 63 | } 64 | 65 | std::printf("Initialization of cuda operations...\n"); 66 | 67 | OperationParameters op; 68 | op.PushValuePtr("container_size", &dev_container_size_); 69 | op.PushValuePtr("data_constancy", &data_constancy_); 70 | 71 | for (CudaOperationBase* cuop : cuda_operations_) { 72 | std::printf("%-18s: ", cuop->GetName()); 73 | bool result = cuop->Initialize(&op); 74 | if (result) { 75 | std::printf("OK\n"); 76 | } else { 77 | Destroy(); 78 | return false; 79 | } 80 | } 81 | return true; 82 | } 83 | 84 | bool OpticalFlow2D::InitCudaMemory() 85 | { 86 | std::printf("Allocating memory on the device...\n"); 87 | /* Check available memory on the cuda device */ 88 | size_t free_memory; 89 | size_t total_memory; 90 | 91 | CheckCudaError(cuMemGetInfo(&free_memory, &total_memory)); 92 | 93 | std::printf("Available\t:\t%.0fMB / %.0fMB\n", 94 | free_memory / static_cast(1024 * 1024), 95 | total_memory / static_cast(1024 * 1024)); 96 | 97 | /* Estimate needed memory (approx.)*/ 98 | int alignment = 512; 99 | CUdevice cu_device; 100 | CheckCudaError(cuCtxGetDevice(&cu_device)); 101 | CheckCudaError(cuDeviceGetAttribute(&alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, cu_device)); 102 | alignment /= sizeof(float); 103 | 104 | size_t pitch = ((dev_container_size_.width % alignment == 0) ? 105 | (dev_container_size_.width) : 106 | (dev_container_size_.width + alignment - (dev_container_size_.width % alignment))) * 107 | sizeof(float); 108 | 109 | size_t needed_memory = pitch * dev_container_size_.height * dev_containers_count_; 110 | std::printf("Needed (approx.):\t%.0fMB\n", needed_memory / static_cast(1024 * 1024)); 111 | 112 | size_t allocated_memory = 0; 113 | if (needed_memory < free_memory) { 114 | for (size_t i = 0; i < dev_containers_count_; ++i) { 115 | CUdeviceptr dev_container; 116 | bool error = CheckCudaError(cuMemAllocPitch(&dev_container, 117 | &pitch, 118 | dev_container_size_.width * sizeof(float), 119 | dev_container_size_.height * 1, 120 | sizeof(float))); 121 | 122 | /* Pitch should be the same for all containers */ 123 | if (error || (i != 0 && dev_container_size_.pitch != pitch)) { 124 | std::printf("Error during device memory allocation."); 125 | Destroy(); 126 | return false; 127 | } else { 128 | size_t container_size; 129 | CheckCudaError(cuMemGetAddressRange(NULL, &container_size, dev_container)); 130 | allocated_memory += container_size; 131 | 132 | cuda_memory_ptrs_.push(dev_container); 133 | dev_container_size_.pitch = pitch; 134 | } 135 | } 136 | std::printf("Allocated\t:\t%.0fMB\n", allocated_memory / static_cast(1024 * 1024)); 137 | return true; 138 | } 139 | return false; 140 | } 141 | 142 | void OpticalFlow2D::ComputeFlow(Data2D& frame_0, Data2D& frame_1, Data2D& flow_u, Data2D& flow_v, OperationParameters& params) 143 | { 144 | if (!IsInitialized()) { 145 | return; 146 | } 147 | 148 | /* Optical flow algorithm's parameters */ 149 | size_t warp_levels_count; 150 | float warp_scale_factor; 151 | size_t outer_iterations_count; 152 | size_t inner_iterations_count; 153 | float equation_alpha; 154 | float equation_smoothness; 155 | float equation_data; 156 | size_t median_radius; 157 | float gaussian_sigma; 158 | 159 | /* Lambda function for correct working of GET_PARAM_OR_RETURN */ 160 | GET_PARAM_OR_RETURN(params, size_t, warp_levels_count, "warp_levels_count"); 161 | GET_PARAM_OR_RETURN(params, float, warp_scale_factor, "warp_scale_factor"); 162 | GET_PARAM_OR_RETURN(params, size_t, outer_iterations_count, "outer_iterations_count"); 163 | GET_PARAM_OR_RETURN(params, size_t, inner_iterations_count, "inner_iterations_count"); 164 | GET_PARAM_OR_RETURN(params, float, equation_alpha, "equation_alpha"); 165 | GET_PARAM_OR_RETURN(params, float, equation_smoothness, "equation_smoothness"); 166 | GET_PARAM_OR_RETURN(params, float, equation_data, "equation_data"); 167 | GET_PARAM_OR_RETURN(params, size_t, median_radius, "median_radius"); 168 | GET_PARAM_OR_RETURN(params, float, gaussian_sigma, "gaussian_sigma"); 169 | 170 | std::printf("\nStarting optical flow computation...\n"); 171 | 172 | /* Create CUDA event for the time measure */ 173 | CUevent cu_event_start; 174 | CUevent cu_event_stop; 175 | 176 | CheckCudaError(cuEventCreate(&cu_event_start, CU_EVENT_DEFAULT)); 177 | CheckCudaError(cuEventCreate(&cu_event_stop, CU_EVENT_DEFAULT)); 178 | 179 | CheckCudaError(cuEventRecord(cu_event_start, NULL)); 180 | 181 | /* Auxiliary variables */ 182 | float hx; // spacing in x-direction (current resol.) 183 | float hy; // spacing in y-direction (current resol.) 184 | DataSize3 original_data_size = { frame_0.Width(), frame_0.Height(), 0 }; 185 | DataSize3 current_data_size = { 0 }; 186 | DataSize3 prev_data_size = { 0 }; 187 | 188 | size_t max_warp_level = GetMaxWarpLevel(original_data_size.width, original_data_size.height, warp_scale_factor); 189 | int current_warp_level = std::min(warp_levels_count, max_warp_level) - 1; 190 | 191 | OperationParameters op; 192 | 193 | /* Copy input data to the device */ 194 | CUdeviceptr dev_frame_0 = cuda_memory_ptrs_.top(); 195 | cuda_memory_ptrs_.pop(); 196 | CUdeviceptr dev_frame_1 = cuda_memory_ptrs_.top(); 197 | cuda_memory_ptrs_.pop(); 198 | CUdeviceptr dev_frame_0_res = cuda_memory_ptrs_.top(); 199 | cuda_memory_ptrs_.pop(); 200 | CUdeviceptr dev_frame_1_res_br = cuda_memory_ptrs_.top(); 201 | cuda_memory_ptrs_.pop(); 202 | 203 | CUdeviceptr dev_flow_u = cuda_memory_ptrs_.top(); 204 | cuda_memory_ptrs_.pop(); 205 | CUdeviceptr dev_flow_v = cuda_memory_ptrs_.top(); 206 | cuda_memory_ptrs_.pop(); 207 | 208 | CUdeviceptr dev_flow_du = cuda_memory_ptrs_.top(); 209 | cuda_memory_ptrs_.pop(); 210 | CUdeviceptr dev_flow_dv = cuda_memory_ptrs_.top(); 211 | cuda_memory_ptrs_.pop(); 212 | 213 | 214 | CopyData2DtoDevice(frame_0, dev_frame_0, dev_container_size_.height, dev_container_size_.pitch); 215 | CopyData2DtoDevice(frame_1, dev_frame_1, dev_container_size_.height, dev_container_size_.pitch); 216 | 217 | /* Gaussian blurring */ 218 | if (gaussian_sigma > 0.0) { 219 | 220 | CUdeviceptr dev_temp = cuda_memory_ptrs_.top(); 221 | cuda_memory_ptrs_.pop(); 222 | 223 | op.Clear(); 224 | op.PushValuePtr("dev_input", &dev_frame_0); 225 | op.PushValuePtr("dev_output", &dev_flow_u); 226 | op.PushValuePtr("dev_temp", &dev_temp); 227 | op.PushValuePtr("data_size", &original_data_size); 228 | op.PushValuePtr("gaussian_sigma", &gaussian_sigma); 229 | cuop_convolution_.Execute(op); 230 | 231 | //CheckCudaError(cuStreamSynchronize(NULL)); 232 | //std::this_thread::sleep_for(std::chrono::milliseconds(500)); 233 | 234 | 235 | op.Clear(); 236 | op.PushValuePtr("dev_input", &dev_frame_1); 237 | op.PushValuePtr("dev_output", &dev_flow_v); 238 | op.PushValuePtr("dev_temp", &dev_temp); 239 | op.PushValuePtr("data_size", &original_data_size); 240 | op.PushValuePtr("gaussian_sigma", &gaussian_sigma); 241 | cuop_convolution_.Execute(op); 242 | 243 | std::swap(dev_frame_0, dev_flow_u); 244 | std::swap(dev_frame_1, dev_flow_v); 245 | 246 | cuda_memory_ptrs_.push(dev_temp); 247 | 248 | // Test: Save smoothed image 249 | /* CopyData2DFromDevice(dev_frame_0, frame_0, dev_container_size_.height, dev_container_size_.pitch); 250 | CopyData2DFromDevice(dev_frame_1, frame_1, dev_container_size_.height, dev_container_size_.pitch); 251 | 252 | std::string filename = 253 | "-" + std::to_string(dev_container_size_.width) + 254 | "-" + std::to_string(dev_container_size_.height) + ".raw"; 255 | 256 | frame_0.WriteRAWToFileF32(std::string("./data/output/test1_smooth" + filename).c_str()); 257 | 258 | frame_1.WriteRAWToFileF32(std::string("./data/output/test2_smooth" + filename).c_str());*/ 259 | 260 | } 261 | 262 | 263 | 264 | /* ---------------------------------------------------- */ 265 | /* Main loop */ 266 | /* ---------------------------------------------------- */ 267 | while (current_warp_level >= 0) { 268 | float scale = std::pow(warp_scale_factor, static_cast(current_warp_level)); 269 | current_data_size.width = static_cast(std::ceil(original_data_size.width * scale)); 270 | current_data_size.height = static_cast(std::ceil(original_data_size.height * scale)); 271 | hx = original_data_size.width / static_cast(current_data_size.width); 272 | hy = original_data_size.height / static_cast(current_data_size.height); 273 | 274 | if (!this->silent) 275 | std::printf("Solve level %2d (%4d x%4d) \n", current_warp_level, current_data_size.width, current_data_size.height); 276 | 277 | 278 | /* Data resampling */ 279 | { 280 | if (current_warp_level == 0) { 281 | std::swap(dev_frame_0, dev_frame_0_res); 282 | std::swap(dev_frame_1, dev_frame_1_res_br); 283 | } else { 284 | CUdeviceptr dev_temp = cuda_memory_ptrs_.top(); 285 | cuda_memory_ptrs_.pop(); 286 | 287 | op.Clear(); 288 | op.PushValuePtr("dev_input", &dev_frame_0); 289 | op.PushValuePtr("dev_output", &dev_frame_0_res); 290 | op.PushValuePtr("dev_temp", &dev_temp); 291 | op.PushValuePtr("data_size", &original_data_size); 292 | op.PushValuePtr("resample_size", ¤t_data_size); 293 | cuop_resample_.Execute(op); 294 | 295 | op.Clear(); 296 | op.PushValuePtr("dev_input", &dev_frame_1); 297 | op.PushValuePtr("dev_output", &dev_frame_1_res_br); 298 | op.PushValuePtr("dev_temp", &dev_temp); 299 | op.PushValuePtr("data_size", &original_data_size); 300 | op.PushValuePtr("resample_size", ¤t_data_size); 301 | cuop_resample_.Execute(op); 302 | 303 | cuda_memory_ptrs_.push(dev_temp); 304 | } 305 | } 306 | 307 | /* Flow field resampling */ 308 | { 309 | if (prev_data_size.width == 0) { 310 | CheckCudaError(cuMemsetD2D8(dev_flow_u, dev_container_size_.pitch, 0, dev_container_size_.width * sizeof(float), 311 | dev_container_size_.height * 1)); 312 | CheckCudaError(cuMemsetD2D8(dev_flow_v, dev_container_size_.pitch, 0, dev_container_size_.width * sizeof(float), 313 | dev_container_size_.height * 1)); 314 | 315 | } else { 316 | CUdeviceptr dev_temp = cuda_memory_ptrs_.top(); 317 | cuda_memory_ptrs_.pop(); 318 | 319 | op.Clear(); 320 | op.PushValuePtr("dev_input", &dev_flow_u); 321 | op.PushValuePtr("dev_output", &dev_flow_du); 322 | op.PushValuePtr("dev_temp", &dev_temp); 323 | op.PushValuePtr("data_size", &prev_data_size); 324 | op.PushValuePtr("resample_size", ¤t_data_size); 325 | cuop_resample_.Execute(op); 326 | 327 | op.Clear(); 328 | op.PushValuePtr("dev_input", &dev_flow_v); 329 | op.PushValuePtr("dev_output", &dev_flow_dv); 330 | op.PushValuePtr("dev_temp", &dev_temp); 331 | op.PushValuePtr("data_size", &prev_data_size); 332 | op.PushValuePtr("resample_size", ¤t_data_size); 333 | cuop_resample_.Execute(op); 334 | 335 | 336 | std::swap(dev_flow_u, dev_flow_du); 337 | std::swap(dev_flow_v, dev_flow_dv); 338 | 339 | cuda_memory_ptrs_.push(dev_temp); 340 | } 341 | } 342 | 343 | /* Backward registration */ 344 | { 345 | CUdeviceptr dev_temp = cuda_memory_ptrs_.top(); 346 | cuda_memory_ptrs_.pop(); 347 | 348 | op.Clear(); 349 | op.PushValuePtr("dev_frame_0", &dev_frame_0_res); 350 | op.PushValuePtr("dev_frame_1", &dev_frame_1_res_br); 351 | op.PushValuePtr("dev_flow_u", &dev_flow_u); 352 | op.PushValuePtr("dev_flow_v", &dev_flow_v); 353 | op.PushValuePtr("dev_output", &dev_temp); 354 | op.PushValuePtr("data_size", ¤t_data_size); 355 | op.PushValuePtr("hx", &hx); 356 | op.PushValuePtr("hy", &hy); 357 | 358 | cuop_register_.Execute(op); 359 | 360 | std::swap(dev_frame_1_res_br, dev_temp); 361 | 362 | cuda_memory_ptrs_.push(dev_temp); 363 | } 364 | 365 | /* Difference problem solver */ 366 | { 367 | CUdeviceptr dev_phi = cuda_memory_ptrs_.top(); 368 | cuda_memory_ptrs_.pop(); 369 | CUdeviceptr dev_ksi = cuda_memory_ptrs_.top(); 370 | cuda_memory_ptrs_.pop(); 371 | CUdeviceptr dev_temp_du = cuda_memory_ptrs_.top(); 372 | cuda_memory_ptrs_.pop(); 373 | CUdeviceptr dev_temp_dv = cuda_memory_ptrs_.top(); 374 | cuda_memory_ptrs_.pop(); 375 | 376 | op.Clear(); 377 | op.PushValuePtr("dev_frame_0", &dev_frame_0_res); 378 | op.PushValuePtr("dev_frame_1", &dev_frame_1_res_br); 379 | op.PushValuePtr("dev_flow_u", &dev_flow_u); 380 | op.PushValuePtr("dev_flow_v", &dev_flow_v); 381 | op.PushValuePtr("dev_flow_du", &dev_flow_du); 382 | op.PushValuePtr("dev_flow_dv", &dev_flow_dv); 383 | op.PushValuePtr("dev_phi", &dev_phi); 384 | op.PushValuePtr("dev_ksi", &dev_ksi); 385 | op.PushValuePtr("dev_temp_du", &dev_temp_du); 386 | op.PushValuePtr("dev_temp_dv", &dev_temp_dv); 387 | 388 | op.PushValuePtr("data_constancy", &data_constancy_); 389 | op.PushValuePtr("outer_iterations_count", &outer_iterations_count); 390 | op.PushValuePtr("inner_iterations_count", &inner_iterations_count); 391 | op.PushValuePtr("equation_alpha", &equation_alpha); 392 | op.PushValuePtr("equation_smoothness", &equation_smoothness); 393 | op.PushValuePtr("equation_data", &equation_data); 394 | op.PushValuePtr("data_size", ¤t_data_size); 395 | op.PushValuePtr("hx", &hx); 396 | op.PushValuePtr("hy", &hy); 397 | 398 | 399 | cuop_solve_.silent = silent; 400 | cuop_solve_.Execute(op); 401 | 402 | cuda_memory_ptrs_.push(dev_phi); 403 | cuda_memory_ptrs_.push(dev_ksi); 404 | cuda_memory_ptrs_.push(dev_temp_du); 405 | cuda_memory_ptrs_.push(dev_temp_dv); 406 | } 407 | 408 | /* Add the solved flow increment to the global flow */ 409 | { 410 | op.Clear(); 411 | op.PushValuePtr("operand_0", &dev_flow_u); 412 | op.PushValuePtr("operand_1", &dev_flow_du); 413 | op.PushValuePtr("data_size", ¤t_data_size); 414 | cuop_add_.Execute(op); 415 | 416 | op.Clear(); 417 | op.PushValuePtr("operand_0", &dev_flow_v); 418 | op.PushValuePtr("operand_1", &dev_flow_dv); 419 | op.PushValuePtr("data_size", ¤t_data_size); 420 | cuop_add_.Execute(op); 421 | 422 | } 423 | 424 | prev_data_size = current_data_size; 425 | --current_warp_level; 426 | 427 | /* Flow field median filtering */ 428 | { 429 | CUdeviceptr dev_temp = cuda_memory_ptrs_.top(); 430 | cuda_memory_ptrs_.pop(); 431 | 432 | op.Clear(); 433 | op.PushValuePtr("dev_input", &dev_flow_u); 434 | op.PushValuePtr("dev_output", &dev_temp); 435 | op.PushValuePtr("data_size", ¤t_data_size); 436 | op.PushValuePtr("radius", &median_radius); 437 | cuop_median_.Execute(op); 438 | std::swap(dev_flow_u, dev_temp); 439 | 440 | op.Clear(); 441 | op.PushValuePtr("dev_input", &dev_flow_v); 442 | op.PushValuePtr("dev_output", &dev_temp); 443 | op.PushValuePtr("data_size", ¤t_data_size); 444 | op.PushValuePtr("radius", &median_radius); 445 | cuop_median_.Execute(op); 446 | std::swap(dev_flow_v, dev_temp); 447 | 448 | cuda_memory_ptrs_.push(dev_temp); 449 | } 450 | 451 | 452 | 453 | /* Get data for visualization */ 454 | //{ 455 | // Visualization& visualization = Visualization::GetInstance(); 456 | 457 | // if (visualization.IsInitialized()) { 458 | // CUdeviceptr dev_temp_u = cuda_memory_ptrs_.top(); 459 | // cuda_memory_ptrs_.pop(); 460 | // CUdeviceptr dev_temp_v = cuda_memory_ptrs_.top(); 461 | // cuda_memory_ptrs_.pop(); 462 | // CUdeviceptr dev_temp_w = cuda_memory_ptrs_.top(); 463 | // cuda_memory_ptrs_.pop(); 464 | // CUdeviceptr dev_temp = cuda_memory_ptrs_.top(); 465 | // cuda_memory_ptrs_.pop(); 466 | 467 | // /* Resample the flow field to original size */ 468 | // op.Clear(); 469 | // op.PushValuePtr("dev_input", &dev_flow_u); 470 | // op.PushValuePtr("dev_output", &dev_temp_u); 471 | // op.PushValuePtr("dev_temp", &dev_temp); 472 | // op.PushValuePtr("data_size", ¤t_data_size); 473 | // op.PushValuePtr("resample_size", &original_data_size); 474 | // cuop_resample_.Execute(op); 475 | 476 | // op.Clear(); 477 | // op.PushValuePtr("dev_input", &dev_flow_v); 478 | // op.PushValuePtr("dev_output", &dev_temp_v); 479 | // op.PushValuePtr("dev_temp", &dev_temp); 480 | // op.PushValuePtr("data_size", ¤t_data_size); 481 | // op.PushValuePtr("resample_size", &original_data_size); 482 | // cuop_resample_.Execute(op); 483 | 484 | 485 | // /* Read output data back from the device */ 486 | // CopyData2DFromDevice(dev_temp_u, flow_u, dev_container_size_.height, dev_container_size_.pitch); 487 | // CopyData2DFromDevice(dev_temp_v, flow_v, dev_container_size_.height, dev_container_size_.pitch); 488 | 489 | 490 | // /* Update 3D texture */ 491 | // visualization.Load3DFlowTextureFromData3DUVW(flow_u, flow_v, NULL); 492 | 493 | // cuda_memory_ptrs_.push(dev_temp); 494 | // cuda_memory_ptrs_.push(dev_temp_u); 495 | // cuda_memory_ptrs_.push(dev_temp_v); 496 | // cuda_memory_ptrs_.push(dev_temp_w); 497 | 498 | // visualization.DoNextStep(); 499 | // } 500 | //} 501 | 502 | } 503 | /* ---------------------------------------------------- */ 504 | /* Main loop END */ 505 | /* ---------------------------------------------------- */ 506 | 507 | /* DEBUG Apply computed flow to the input data */ 508 | if (false) { 509 | /* CopyData2DtoDevice(frame_0, dev_frame_0, dev_container_size_.height, dev_container_size_.pitch); 510 | CopyData2DtoDevice(frame_1, dev_frame_1, dev_container_size_.height, dev_container_size_.pitch); 511 | 512 | CheckCudaError(cuStreamSynchronize(NULL)); 513 | 514 | CUdeviceptr dev_temp = cuda_memory_ptrs_.top(); 515 | cuda_memory_ptrs_.pop(); 516 | 517 | hx = 1.f; 518 | hy = 1.f; 519 | 520 | op.Clear(); 521 | op.PushValuePtr("dev_frame_0", &dev_frame_0); 522 | op.PushValuePtr("dev_frame_1", &dev_frame_1); 523 | op.PushValuePtr("dev_flow_u", &dev_flow_u); 524 | op.PushValuePtr("dev_flow_v", &dev_flow_v); 525 | op.PushValuePtr("dev_output", &dev_temp); 526 | op.PushValuePtr("data_size", &original_data_size); 527 | op.PushValuePtr("hx", &hx); 528 | op.PushValuePtr("hy", &hy); 529 | 530 | 531 | cuop_register_.Execute(op); 532 | CheckCudaError(cuStreamSynchronize(NULL)); 533 | CopyData2DFromDevice(dev_temp, flow_u, dev_container_size_.height, dev_container_size_.pitch); 534 | CheckCudaError(cuStreamSynchronize(NULL)); 535 | 536 | flow_u.WriteRAWToFileU8("./data/output/registrated-180-180-151.raw"); 537 | flow_u.WriteRAWToFileF32("./data/output/registrated-180-180-151-f.raw"); 538 | 539 | 540 | cuda_memory_ptrs_.push(dev_temp);*/ 541 | } 542 | 543 | /* Read output data back from the device */ 544 | CopyData2DFromDevice(dev_flow_u, flow_u, dev_container_size_.height, dev_container_size_.pitch); 545 | CopyData2DFromDevice(dev_flow_v, flow_v, dev_container_size_.height, dev_container_size_.pitch); 546 | 547 | /* Estimate GPU computation time */ 548 | CheckCudaError(cuEventRecord(cu_event_stop, NULL)); 549 | CheckCudaError(cuEventSynchronize(cu_event_stop)); 550 | 551 | float elapsed_time; 552 | CheckCudaError(cuEventElapsedTime(&elapsed_time, cu_event_start, cu_event_stop)); 553 | 554 | std::printf("Total GPU computation time: % 4.4fs\n", elapsed_time / 1000.); 555 | 556 | CheckCudaError(cuEventDestroy(cu_event_start)); 557 | CheckCudaError(cuEventDestroy(cu_event_stop)); 558 | 559 | /* Return all memory pointers to stack */ 560 | cuda_memory_ptrs_.push(dev_frame_0); 561 | cuda_memory_ptrs_.push(dev_frame_1); 562 | cuda_memory_ptrs_.push(dev_frame_0_res); 563 | cuda_memory_ptrs_.push(dev_frame_1_res_br); 564 | cuda_memory_ptrs_.push(dev_flow_u); 565 | cuda_memory_ptrs_.push(dev_flow_v); 566 | cuda_memory_ptrs_.push(dev_flow_du); 567 | cuda_memory_ptrs_.push(dev_flow_dv); 568 | 569 | } 570 | 571 | void OpticalFlow2D::Destroy() 572 | { 573 | for (CudaOperationBase* cuop : cuda_operations_) { 574 | cuop->Destroy(); 575 | } 576 | 577 | size_t freed_containers_count = 0; 578 | while (!cuda_memory_ptrs_.empty()) { 579 | CUdeviceptr dev_container = cuda_memory_ptrs_.top(); 580 | CheckCudaError(cuMemFree(dev_container)); 581 | 582 | ++freed_containers_count; 583 | cuda_memory_ptrs_.pop(); 584 | } 585 | if (freed_containers_count && freed_containers_count != dev_containers_count_) { 586 | std::printf("Warning. Not all device memory allocations were freed.\n"); 587 | } 588 | 589 | initialized_ = false; 590 | } 591 | 592 | OpticalFlow2D::~OpticalFlow2D() 593 | { 594 | Destroy(); 595 | } -------------------------------------------------------------------------------- /src/optical_flow/optical_flow_2d.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #ifndef GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_2D_H_ 23 | #define GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_2D_H_ 24 | 25 | #include 26 | #include 27 | 28 | #include 29 | 30 | #include "src/cuda_operations/cuda_operation_base.h" 31 | #include "src/cuda_operations/2d/cuda_operation_add_2d.h" 32 | #include "src/cuda_operations/2d/cuda_operation_median_2d.h" 33 | #include "src/cuda_operations/2d/cuda_operation_convolution_2d.h" 34 | #include "src/cuda_operations/2d/cuda_operation_registration_2d.h" 35 | #include "src/cuda_operations/2d/cuda_operation_resample_2d.h" 36 | #include "src/cuda_operations/2d/cuda_operation_solve_2d.h" 37 | 38 | #include "src/data_types/operation_parameters.h" 39 | #include "src/data_types/data2d.h" 40 | 41 | #include "src/optical_flow/optical_flow_base_2d.h" 42 | 43 | class OpticalFlow2D : public OpticalFlowBase2D{ 44 | private: 45 | const size_t dev_containers_count_ = 12; 46 | DataSize3 dev_container_size_; 47 | 48 | std::forward_list cuda_operations_; 49 | std::stack cuda_memory_ptrs_; 50 | 51 | CudaOperationAdd2D cuop_add_; 52 | CudaOperationConvolution2D cuop_convolution_; 53 | CudaOperationMedian2D cuop_median_; 54 | CudaOperationRegistration2D cuop_register_; 55 | CudaOperationResample2D cuop_resample_; 56 | CudaOperationSolve2D cuop_solve_; 57 | 58 | bool InitCudaOperations(); 59 | bool InitCudaMemory(); 60 | 61 | public: 62 | OpticalFlow2D(); 63 | 64 | bool Initialize(const DataSize3& data_size, DataConstancy data_constancy = DataConstancy::Grey) override; 65 | void ComputeFlow(Data2D& frame_0, Data2D& frame_1, Data2D& flow_u, Data2D& flow_v, OperationParameters& params) override; 66 | void Destroy() override; 67 | 68 | bool silent = false; 69 | 70 | ~OpticalFlow2D() override; 71 | }; 72 | 73 | 74 | #endif // !GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_2D_H_ 75 | -------------------------------------------------------------------------------- /src/optical_flow/optical_flow_base_2d.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include "src/optical_flow/optical_flow_base_2d.h" 23 | 24 | #include 25 | 26 | OpticalFlowBase2D::OpticalFlowBase2D(const char* name) 27 | : name_(name) 28 | { 29 | } 30 | 31 | const char* OpticalFlowBase2D::GetName() const 32 | { 33 | return name_; 34 | } 35 | 36 | size_t OpticalFlowBase2D::GetMaxWarpLevel(size_t width, size_t height, float scale_factor) const 37 | { 38 | /* Compute maximum number of warping levels for given image size and warping reduction factor */ 39 | size_t r_width = 1; 40 | size_t r_height = 1; 41 | size_t level_counter = 1; 42 | 43 | while (scale_factor < 1.f) { 44 | float scale = std::pow(scale_factor, static_cast(level_counter)); 45 | r_width = static_cast(std::ceil(width * scale)); 46 | r_height = static_cast(std::ceil(height * scale)); 47 | 48 | if (r_width < 4 || r_height < 4 ) { 49 | break; 50 | } 51 | ++level_counter; 52 | } 53 | 54 | if (r_width == 1 || r_height == 1) { 55 | --level_counter; 56 | } 57 | 58 | return level_counter; 59 | } 60 | 61 | bool OpticalFlowBase2D::IsInitialized() const 62 | { 63 | if (!initialized_) { 64 | std::printf("Error: '%s' was not initialized.\n", name_); 65 | } 66 | return initialized_; 67 | } 68 | 69 | void OpticalFlowBase2D::ComputeFlow(Data2D& frame_0, Data2D& frame_1, Data2D& flow_u, Data2D& flow_v, OperationParameters& params) 70 | { 71 | std::printf("Warning: '%s' ComputeFlow() was not defined.\n", name_); 72 | 73 | } 74 | 75 | void OpticalFlowBase2D::Destroy() 76 | { 77 | initialized_ = false; 78 | } 79 | 80 | OpticalFlowBase2D::~OpticalFlowBase2D() 81 | { 82 | if (initialized_) { 83 | Destroy(); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/optical_flow/optical_flow_base_2d.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #ifndef GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_BASE_2D_H_ 23 | #define GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_BASE_2D_H_ 24 | 25 | #include "src/data_types/data2d.h" 26 | #include "src/data_types/data_structs.h" 27 | #include "src/data_types/operation_parameters.h" 28 | 29 | class OpticalFlowBase2D { 30 | private: 31 | const char* name_ = nullptr; 32 | 33 | protected: 34 | bool initialized_ = false; 35 | 36 | DataConstancy data_constancy_; 37 | 38 | OpticalFlowBase2D(const char* name); 39 | 40 | size_t GetMaxWarpLevel(size_t width, size_t height, float scale_factor) const; 41 | 42 | bool IsInitialized() const; 43 | 44 | public: 45 | const char* GetName() const; 46 | 47 | virtual bool Initialize(const DataSize3& data_size, DataConstancy data_constancy = DataConstancy::Grey) = 0; 48 | virtual void ComputeFlow(Data2D& frame_0, Data2D& frame_1, Data2D& flow_u, Data2D& flow_v, OperationParameters& params); 49 | virtual void Destroy(); 50 | 51 | virtual ~OpticalFlowBase2D(); 52 | }; 53 | 54 | 55 | 56 | #endif // !GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_BASE_2D_H_ 57 | -------------------------------------------------------------------------------- /src/utils/common_utils.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include "common_utils.h" 23 | 24 | #include 25 | 26 | #ifdef _WIN32 27 | #include 28 | #endif 29 | #ifdef __gnu_linux__ 30 | #include 31 | #endif 32 | 33 | namespace Utils { 34 | 35 | void GetExecutablePath(char* path, size_t size) 36 | { 37 | #ifdef _WIN32 38 | GetModuleFileName(NULL, path, size); 39 | char* last_char = std::strrchr(path, '\\'); 40 | if (last_char) { 41 | *last_char = '\0'; 42 | } 43 | #endif 44 | #ifdef __gnu_linux__ 45 | ssize_t link_size = readlink("/proc/self/exe", path, size); 46 | path[link_size] = '\0'; 47 | char* last_char = std::strrchr(path, '/'); 48 | if (last_char) { 49 | *last_char = '\0'; 50 | } 51 | #endif 52 | } 53 | 54 | void PrintProgressBar(float complete) 55 | { 56 | const size_t width = 40; 57 | char buffer[80]; 58 | char* buffer_ptr = buffer; 59 | 60 | *(buffer_ptr++) = '['; 61 | for (size_t i = 0; i < width; ++i) { 62 | *(buffer_ptr++) = (i / static_cast(width) < complete) ? '=' : ' '; 63 | } 64 | *(buffer_ptr++) = ']'; 65 | *(buffer_ptr++) = '\0'; 66 | 67 | printf("\r%s", buffer); 68 | } 69 | 70 | } // namespace Utils -------------------------------------------------------------------------------- /src/utils/common_utils.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #ifndef GPUFLOW3D_UTILS_COMMON_UTILS_H_ 23 | #define GPUFLOW3D_UTILS_COMMON_UTILS_H_ 24 | 25 | #include 26 | 27 | namespace Utils { 28 | 29 | void GetExecutablePath(char* path, size_t size); 30 | void PrintProgressBar(float complete); 31 | 32 | } // namespace Utils 33 | 34 | #define GET_PARAM_OR_RETURN(P, T, V, N) \ 35 | do { \ 36 | void* v_ptr = (P).GetValuePtr((N)); \ 37 | if (v_ptr) { \ 38 | (V) = *(static_cast(v_ptr)); \ 39 | } else { \ 40 | std::printf("Operation: '%s'. Missing parameter '%s'.\n", GetName(), (N)); \ 41 | return; \ 42 | } \ 43 | } while (0) 44 | 45 | #define GET_PARAM_OR_RETURN_VALUE(P, T, V, N, R) \ 46 | do { \ 47 | void* v_ptr = (P).GetValuePtr((N)); \ 48 | if (v_ptr) { \ 49 | (V) = *(static_cast(v_ptr)); \ 50 | } else { \ 51 | std::printf("Operation: '%s'. Missing parameter '%s'.\n", GetName(), (N)); \ 52 | return (R); \ 53 | } \ 54 | } while (0) 55 | 56 | #define GET_PARAM_PTR_OR_RETURN(P, T, PTR, N) \ 57 | do { \ 58 | void* v_ptr = (P).GetValuePtr((N)); \ 59 | if (v_ptr) { \ 60 | (PTR) = (static_cast(v_ptr)); \ 61 | } else { \ 62 | std::printf("Operation: '%s'. Missing parameter '%s'.\n", GetName(), (N)); \ 63 | return; \ 64 | } \ 65 | } while (0) 66 | 67 | #endif // !GPUFLOW3D_UTILS_COMMON_UTILS_H_ 68 | -------------------------------------------------------------------------------- /src/utils/cuda_utils.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include "cuda_utils.h" 23 | 24 | #include 25 | 26 | bool InitCudaContextWithFirstAvailableDevice(CUcontext* cu_context) 27 | { 28 | if (CheckCudaError(cuInit(0))) { 29 | return false; 30 | } 31 | 32 | int cu_device_count; 33 | if (CheckCudaError(cuDeviceGetCount(&cu_device_count))) { 34 | return false; 35 | } 36 | CUdevice cu_device; 37 | 38 | if (cu_device_count == 0) { 39 | printf("There are no cuda capable devices."); 40 | return false; 41 | } 42 | 43 | if (CheckCudaError(cuDeviceGet(&cu_device, 0))) { 44 | return false; 45 | } 46 | 47 | char cu_device_name[64]; 48 | if (CheckCudaError(cuDeviceGetName(cu_device_name, 64, cu_device))) { 49 | return false; 50 | } 51 | 52 | int launch_timeout; 53 | CheckCudaError(cuDeviceGetAttribute(&launch_timeout, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, cu_device)); 54 | 55 | printf("CUDA Device: %s. Launch timeout: %s\n", cu_device_name, (launch_timeout ? "Yes" : "No")); 56 | 57 | if (CheckCudaError(cuCtxCreate(cu_context, 0, cu_device))) { 58 | return false; 59 | } 60 | 61 | return true; 62 | } 63 | 64 | 65 | 66 | void CopyData2DtoDevice(Data2D& data2d, CUdeviceptr device_ptr, size_t device_height, size_t device_pitch) 67 | { 68 | CUDA_MEMCPY2D cu_copy2d; 69 | std::memset(&cu_copy2d, 0, sizeof(CUDA_MEMCPY2D)); 70 | 71 | cu_copy2d.srcMemoryType = CU_MEMORYTYPE_HOST; 72 | cu_copy2d.srcHost = data2d.DataPtr(); 73 | cu_copy2d.srcPitch = data2d.Width() * sizeof(float); 74 | 75 | cu_copy2d.dstMemoryType = CU_MEMORYTYPE_DEVICE; 76 | cu_copy2d.dstDevice = device_ptr; 77 | cu_copy2d.dstPitch = device_pitch; 78 | 79 | cu_copy2d.WidthInBytes = data2d.Width() * sizeof(float); 80 | cu_copy2d.Height = data2d.Height(); 81 | 82 | CheckCudaError(cuMemcpy2D(&cu_copy2d)); 83 | } 84 | 85 | 86 | void CopyData2DFromDevice(CUdeviceptr device_ptr, Data2D& data2d, size_t device_height, size_t device_pitch) 87 | { 88 | CUDA_MEMCPY2D cu_copy2d; 89 | std::memset(&cu_copy2d, 0, sizeof(CUDA_MEMCPY2D)); 90 | 91 | cu_copy2d.srcMemoryType = CU_MEMORYTYPE_DEVICE; 92 | cu_copy2d.srcDevice = device_ptr; 93 | cu_copy2d.srcPitch = device_pitch; 94 | 95 | 96 | cu_copy2d.dstMemoryType = CU_MEMORYTYPE_HOST; 97 | cu_copy2d.dstHost = data2d.DataPtr(); 98 | cu_copy2d.dstPitch = data2d.Width() * sizeof(float); 99 | 100 | 101 | cu_copy2d.WidthInBytes = data2d.Width() * sizeof(float); 102 | cu_copy2d.Height = data2d.Height(); 103 | 104 | CheckCudaError(cuMemcpy2D(&cu_copy2d)); 105 | } 106 | -------------------------------------------------------------------------------- /src/utils/cuda_utils.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #ifndef GPUFLOW3D_UTILS_CUDA_UTILS_H_ 23 | #define GPUFLOW3D_UTILS_CUDA_UTILS_H_ 24 | 25 | #include 26 | 27 | #include 28 | 29 | #include "src/data_types/data2d.h" 30 | 31 | #define CheckCudaError(error) __CheckCudaError (error, __FILE__, __LINE__) 32 | 33 | inline bool __CheckCudaError(CUresult error, const char* file, const int line) 34 | { 35 | if (error != CUDA_SUCCESS) 36 | { 37 | const char *error_name = nullptr; 38 | const char *error_string = nullptr; 39 | 40 | #define NEW_CUDA_DRIVER 41 | #ifdef NEW_CUDA_DRIVER 42 | cuGetErrorName(error, &error_name); 43 | cuGetErrorString(error, &error_string); 44 | #endif 45 | 46 | std::fprintf(stderr, "Driver API error = %04d %s\n%s\n from file <%s>, line %i.\n", 47 | error, error_name, error_string, file, line); 48 | return true; 49 | } 50 | return false; 51 | } 52 | 53 | bool InitCudaContextWithFirstAvailableDevice(CUcontext* cu_context); 54 | 55 | void CopyData2DtoDevice(Data2D& data2d, CUdeviceptr device_ptr, size_t device_height, size_t device_pitch); 56 | void CopyData2DFromDevice(CUdeviceptr device_ptr, Data2D& data2d, size_t device_height, size_t device_pitch); 57 | 58 | 59 | 60 | #endif // !GPUFLOW3D_UTILS_CUDA_UTILS_H_ 61 | -------------------------------------------------------------------------------- /src/utils/io_utils.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include "io_utils.h" 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | #include "src/data_types/data2d.h" 29 | 30 | using namespace IOUtils; 31 | 32 | 33 | 34 | 35 | void IOUtils::WriteFlowToImageRGB(Data2D& u, Data2D& v, float flowMaxScale, string fileName) 36 | { 37 | 38 | float maxLength = flowMaxScale; 39 | 40 | // Compute scaling factor 41 | float factor = 1.0 / maxLength; 42 | 43 | RGBColor rgb; 44 | 45 | fstream ofile(fileName.c_str(), ios::out | ios::binary); 46 | 47 | if (!ofile.is_open()) 48 | { 49 | std::cerr << "Error: cannot save file " << std::endl; 50 | exit(255); 51 | } 52 | 53 | int nx = u.Width(); 54 | int ny = u.Height(); 55 | 56 | GRAY res; 57 | 58 | ofile<<"P6 \n"; 59 | ofile<r = 0; 119 | this->g = 0; 120 | this->b = 0; 121 | } 122 | 123 | RGBColor::RGBColor(int r, int g, int b) 124 | { 125 | this->r = r; 126 | this->g = g; 127 | this->b = b; 128 | } 129 | 130 | 131 | /*****************************************************************************/ 132 | /* */ 133 | /* Copyright 08/2006 by Dr. Andres Bruhn */ 134 | /* Faculty of Mathematics and Computer Science, Saarland University, */ 135 | /* Saarbruecken, Germany. */ 136 | /* */ 137 | /* Modified by Alexey Ershov */ 138 | /* */ 139 | /*****************************************************************************/ 140 | RGBColor IOUtils::ConvertToRGB(float x, float y) 141 | { 142 | /********************************************************/ 143 | float Pi; /* pi */ 144 | float amp; /* amplitude (magnitude) */ 145 | float phi; /* phase (angle) */ 146 | float alpha, beta; /* weights for linear interpolation */ 147 | /********************************************************/ 148 | 149 | RGBColor rgb; 150 | 151 | /* if (isUnknownFlow(x, y)) { 152 | x = 0.0; 153 | y = 0.0; 154 | }*/ 155 | 156 | /* set pi */ 157 | Pi = 2.0 * acos(0.0); 158 | 159 | /* determine amplitude and phase (cut amp at 1) */ 160 | amp = sqrt(x * x + y * y); 161 | if (amp > 1) amp = 1; 162 | if (x == 0.0) 163 | if (y >= 0.0) phi = 0.5 * Pi; 164 | else phi = 1.5 * Pi; 165 | else if (x > 0.0) 166 | if (y >= 0.0) phi = atan(y/x); 167 | else phi = 2.0 * Pi + atan(y/x); 168 | else phi = Pi + atan(y/x); 169 | 170 | phi = phi / 2.0; 171 | 172 | // interpolation between red (0) and blue (0.25 * Pi) 173 | if ((phi >= 0.0) && (phi < 0.125 * Pi)) { 174 | beta = phi / (0.125 * Pi); 175 | alpha = 1.0 - beta; 176 | rgb.r = (int)floor(amp * (alpha * 255.0 + beta * 255.0)); 177 | rgb.g = (int)floor(amp * (alpha * 0.0 + beta * 0.0)); 178 | rgb.b = (int)floor(amp * (alpha * 0.0 + beta * 255.0)); 179 | } 180 | if ((phi >= 0.125 * Pi) && (phi < 0.25 * Pi)) { 181 | beta = (phi-0.125 * Pi) / (0.125 * Pi); 182 | alpha = 1.0 - beta; 183 | rgb.r = (int)floor(amp * (alpha * 255.0 + beta * 64.0)); 184 | rgb.g = (int)floor(amp * (alpha * 0.0 + beta * 64.0)); 185 | rgb.b = (int)floor(amp * (alpha * 255.0 + beta * 255.0)); 186 | } 187 | // interpolation between blue (0.25 * Pi) and green (0.5 * Pi) 188 | if ((phi >= 0.25 * Pi) && (phi < 0.375 * Pi)) { 189 | beta = (phi - 0.25 * Pi) / (0.125 * Pi); 190 | alpha = 1.0 - beta; 191 | rgb.r = (int)floor(amp * (alpha * 64.0 + beta * 0.0)); 192 | rgb.g = (int)floor(amp * (alpha * 64.0 + beta * 255.0)); 193 | rgb.b = (int)floor(amp * (alpha * 255.0 + beta * 255.0)); 194 | } 195 | if ((phi >= 0.375 * Pi) && (phi < 0.5 * Pi)) { 196 | beta = (phi - 0.375 * Pi) / (0.125 * Pi); 197 | alpha = 1.0 - beta; 198 | rgb.r = (int)floor(amp * (alpha * 0.0 + beta * 0.0)); 199 | rgb.g = (int)floor(amp * (alpha * 255.0 + beta * 255.0)); 200 | rgb.b = (int)floor(amp * (alpha * 255.0 + beta * 0.0)); 201 | } 202 | // interpolation between green (0.5 * Pi) and yellow (0.75 * Pi) 203 | if ((phi >= 0.5 * Pi) && (phi < 0.75 * Pi)) { 204 | beta = (phi - 0.5 * Pi) / (0.25 * Pi); 205 | alpha = 1.0 - beta; 206 | rgb.r = (int)floor(amp * (alpha * 0.0 + beta * 255.0)); 207 | rgb.g = (int)floor(amp * (alpha * 255.0 + beta * 255.0)); 208 | rgb.b = (int)floor(amp * (alpha * 0.0 + beta * 0.0)); 209 | } 210 | // interpolation between yellow (0.75 * Pi) and red (Pi) 211 | if ((phi >= 0.75 * Pi) && (phi <= Pi)) { 212 | beta = (phi - 0.75 * Pi) / (0.25 * Pi); 213 | alpha = 1.0 - beta; 214 | rgb.r = (int)floor(amp * (alpha * 255.0 + beta * 255.0)); 215 | rgb.g = (int)floor(amp * (alpha * 255.0 + beta * 0.0)); 216 | rgb.b = (int)floor(amp * (alpha * 0.0 + beta * 0.0)); 217 | } 218 | 219 | /* check RGBColor range */ 220 | rgb.r = ConvertToByte(rgb.r); 221 | rgb.g = ConvertToByte(rgb.g); 222 | rgb.b = ConvertToByte(rgb.b); 223 | 224 | return rgb; 225 | } 226 | 227 | -------------------------------------------------------------------------------- /src/utils/io_utils.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #ifndef GPUFLOW3D_UTILS_IO_UTILS_H_ 23 | #define GPUFLOW3D_UTILS_IO_UTILS_H_ 24 | 25 | #include "src/data_types/data2d.h" 26 | 27 | #include 28 | #include 29 | 30 | 31 | using namespace std; 32 | 33 | namespace IOUtils { 34 | 35 | typedef unsigned char GRAY; 36 | 37 | struct RGBColor 38 | { 39 | int r; 40 | int g; 41 | int b; 42 | 43 | RGBColor(); 44 | 45 | RGBColor(int r, int g, int b) ; 46 | }; 47 | 48 | 49 | //------------------------------------------------------------------ 50 | // 2D Writing Routines 51 | //------------------------------------------------------------------ 52 | void WriteFlowToImageRGB(Data2D& u, Data2D& v, float flowMaxScale, string fileName); 53 | void WriteMagnitudeToFileF32(Data2D& u, Data2D& v, string fileName); 54 | 55 | // Data convertion routines 56 | RGBColor ConvertToRGB(float x, float y); 57 | 58 | inline int ConvertToByte(int num) 59 | { 60 | return (num >=255) * 255 + ((num < 255) && (num > 0))* num; 61 | } 62 | 63 | inline GRAY ConvertToGray(float number) 64 | { 65 | GRAY result; 66 | 67 | if (number < 0.0) 68 | result = (GRAY)(0.0); 69 | else if (number > 255.0) 70 | result = (GRAY)(255.0); 71 | else 72 | result = (GRAY)(number); 73 | 74 | return result; 75 | 76 | } 77 | 78 | } 79 | 80 | #endif // !GPUFLOW3D_UTILS_IO_UTILS_H_ 81 | 82 | -------------------------------------------------------------------------------- /src/utils/settings.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #include "settings.h" 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | using namespace OpticFlow; 29 | 30 | 31 | 32 | 33 | void Settings::LoadSettingsManually() 34 | { 35 | 36 | this->sigma = 1.2; 37 | 38 | // Solver settings 39 | this->iterInner = 5; 40 | this->iterOuter = 20; 41 | 42 | this->alpha = 40; 43 | 44 | this->levels = 1; 45 | this->warpScale = 0.5; 46 | 47 | 48 | this->e_smooth = 0.1; 49 | this->e_data = 0.001; 50 | 51 | } 52 | 53 | int Settings::LoadSettings(string fileName) 54 | { 55 | 56 | string m_name; 57 | 58 | TiXmlDocument doc(fileName.c_str()); 59 | 60 | if (!doc.LoadFile()) { 61 | cout<<"Cannot read settings file: "<Value(); 85 | 86 | // save this for later 87 | hRoot=TiXmlHandle(pElem); 88 | 89 | int value; 90 | float dbValue; 91 | 92 | 93 | pElem=hRoot.FirstChild( "Input" ).FirstChild("Path").Element(); 94 | this->inputPath = pElem->Attribute("inputPath"); 95 | pElem=hRoot.FirstChild( "Output").FirstChild("Path").Element(); 96 | this->outputPath = pElem->Attribute("outputPath"); 97 | 98 | this->fileName1 = hRoot.FirstChild( "Input" ).FirstChild("Mode").FirstChild("Files").Element()->Attribute("file1"); 99 | this->fileName2 = hRoot.FirstChild( "Input" ).FirstChild("Mode").FirstChild("Files").Element()->Attribute("file2"); 100 | 101 | hRoot.FirstChild( "Parameters" ).FirstChild("Method").Element()->QueryIntAttribute("key", &value); 102 | this->press_key = value; 103 | 104 | hRoot.FirstChild( "Input" ).FirstChild("Mode").Element()->QueryIntAttribute("Nx", &value); 105 | this->width = value; 106 | 107 | hRoot.FirstChild( "Input" ).FirstChild("Mode").Element()->QueryIntAttribute("Ny", &value); 108 | this->height = value; 109 | 110 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Model").Element()->QueryFloatAttribute("sigma", &dbValue); 111 | this->sigma = dbValue; 112 | 113 | 114 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Iterations").Element()->QueryIntAttribute("inner", &value); 115 | this->iterInner = value; 116 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Iterations").Element()->QueryIntAttribute("outer", &value); 117 | this->iterOuter = value; 118 | 119 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Warping").Element()->QueryIntAttribute("levels", &value); 120 | this->levels= value; 121 | 122 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Warping").Element()->QueryFloatAttribute("scaling", &dbValue); 123 | this->warpScale= dbValue; 124 | 125 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Warping").Element()->QueryIntAttribute("medianRadius", &value); 126 | this->medianRadius = value; 127 | 128 | 129 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Model").Element()->QueryFloatAttribute("alpha", &dbValue); 130 | this->alpha= dbValue; 131 | 132 | 133 | 134 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Model").Element()->QueryFloatAttribute("e_smooth", &dbValue); 135 | this->e_smooth= dbValue; 136 | hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Model").Element()->QueryFloatAttribute("e_data", &dbValue); 137 | this->e_data= dbValue; 138 | 139 | 140 | 141 | return 0; 142 | 143 | 144 | } 145 | 146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /src/utils/settings.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 2D Optical flow using NVIDIA CUDA 3 | * @author Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology 4 | * 5 | * @date 2015-2018 6 | * @version 0.5.0 7 | * 8 | * 9 | * @section LICENSE 10 | * 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation, 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany; 13 | * 14 | * The current implemetation contains the following licenses: 15 | * 16 | * 1. TinyXml package: 17 | * Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). . 18 | * See src/utils/tinyxml.h for details. 19 | * 20 | */ 21 | 22 | #ifndef GPUFLOW3D_OPTICAL_FLOW_SETTINGS_H_ 23 | #define GPUFLOW3D_OPTICAL_FLOW_SETTINGS_H_ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include "tinyxml.h" 31 | 32 | using namespace std; 33 | 34 | namespace OpticFlow { 35 | 36 | /** 37 | * Settings class, which contains all the settings for Optical flow framework 38 | */ 39 | class Settings 40 | { 41 | public: 42 | 43 | // Input settings 44 | string inputPath; 45 | string outputPath; 46 | string fileName1; 47 | string fileName2; 48 | 49 | // General 50 | int width; 51 | int height; 52 | float sigma; 53 | float precision; 54 | int medianRadius; 55 | 56 | // Solver settings 57 | int iterInner; 58 | int iterOuter; 59 | float alpha; 60 | 61 | float e_smooth; 62 | float e_data; 63 | 64 | int levels; 65 | float warpScale; 66 | 67 | float flowScale; 68 | 69 | bool press_key; 70 | 71 | 72 | private: 73 | string content; 74 | 75 | 76 | public: 77 | int LoadSettings(string fileName); 78 | void LoadSettingsManually(); 79 | }; 80 | 81 | 82 | 83 | 84 | 85 | } 86 | 87 | #endif // !GPUFLOW3D_OPTICAL_FLOW_SETTINGS_H_ 88 | -------------------------------------------------------------------------------- /src/utils/tinystr.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/axruff/cuda-flow2d/827c3470351972aca7584a83aa9b85778f522be4/src/utils/tinystr.cpp -------------------------------------------------------------------------------- /src/utils/tinystr.h: -------------------------------------------------------------------------------- 1 | /* 2 | www.sourceforge.net/projects/tinyxml 3 | Original file by Yves Berquin. 4 | 5 | This software is provided 'as-is', without any express or implied 6 | warranty. In no event will the authors be held liable for any 7 | damages arising from the use of this software. 8 | 9 | Permission is granted to anyone to use this software for any 10 | purpose, including commercial applications, and to alter it and 11 | redistribute it freely, subject to the following restrictions: 12 | 13 | 1. The origin of this software must not be misrepresented; you must 14 | not claim that you wrote the original software. If you use this 15 | software in a product, an acknowledgment in the product documentation 16 | would be appreciated but is not required. 17 | 18 | 2. Altered source versions must be plainly marked as such, and 19 | must not be misrepresented as being the original software. 20 | 21 | 3. This notice may not be removed or altered from any source 22 | distribution. 23 | */ 24 | 25 | /* 26 | * THIS FILE WAS ALTERED BY Tyge Lovset, 7. April 2005. 27 | * 28 | * - completely rewritten. compact, clean, and fast implementation. 29 | * - sizeof(TiXmlString) = pointer size (4 bytes on 32-bit systems) 30 | * - fixed reserve() to work as per specification. 31 | * - fixed buggy compares operator==(), operator<(), and operator>() 32 | * - fixed operator+=() to take a const ref argument, following spec. 33 | * - added "copy" constructor with length, and most compare operators. 34 | * - added swap(), clear(), size(), capacity(), operator+(). 35 | */ 36 | 37 | #ifndef TIXML_USE_STL 38 | 39 | #ifndef TIXML_STRING_INCLUDED 40 | #define TIXML_STRING_INCLUDED 41 | 42 | #include 43 | #include 44 | 45 | /* The support for explicit isn't that universal, and it isn't really 46 | required - it is used to check that the TiXmlString class isn't incorrectly 47 | used. Be nice to old compilers and macro it here: 48 | */ 49 | #if defined(_MSC_VER) && (_MSC_VER >= 1200 ) 50 | // Microsoft visual studio, version 6 and higher. 51 | #define TIXML_EXPLICIT explicit 52 | #elif defined(__GNUC__) && (__GNUC__ >= 3 ) 53 | // GCC version 3 and higher.s 54 | #define TIXML_EXPLICIT explicit 55 | #else 56 | #define TIXML_EXPLICIT 57 | #endif 58 | 59 | 60 | /* 61 | TiXmlString is an emulation of a subset of the std::string template. 62 | Its purpose is to allow compiling TinyXML on compilers with no or poor STL support. 63 | Only the member functions relevant to the TinyXML project have been implemented. 64 | The buffer allocation is made by a simplistic power of 2 like mechanism : if we increase 65 | a string and there's no more room, we allocate a buffer twice as big as we need. 66 | */ 67 | class TiXmlString 68 | { 69 | public : 70 | // The size type used 71 | typedef size_t size_type; 72 | 73 | // Error value for find primitive 74 | static const size_type npos; // = -1; 75 | 76 | 77 | // TiXmlString empty constructor 78 | TiXmlString () : rep_(&nullrep_) 79 | { 80 | } 81 | 82 | // TiXmlString copy constructor 83 | TiXmlString ( const TiXmlString & copy) : rep_(0) 84 | { 85 | init(copy.length()); 86 | memcpy(start(), copy.data(), length()); 87 | } 88 | 89 | // TiXmlString constructor, based on a string 90 | TIXML_EXPLICIT TiXmlString ( const char * copy) : rep_(0) 91 | { 92 | init( static_cast( strlen(copy) )); 93 | memcpy(start(), copy, length()); 94 | } 95 | 96 | // TiXmlString constructor, based on a string 97 | TIXML_EXPLICIT TiXmlString ( const char * str, size_type len) : rep_(0) 98 | { 99 | init(len); 100 | memcpy(start(), str, len); 101 | } 102 | 103 | // TiXmlString destructor 104 | ~TiXmlString () 105 | { 106 | quit(); 107 | } 108 | 109 | // = operator 110 | TiXmlString& operator = (const char * copy) 111 | { 112 | return assign( copy, (size_type)strlen(copy)); 113 | } 114 | 115 | // = operator 116 | TiXmlString& operator = (const TiXmlString & copy) 117 | { 118 | return assign(copy.start(), copy.length()); 119 | } 120 | 121 | 122 | // += operator. Maps to append 123 | TiXmlString& operator += (const char * suffix) 124 | { 125 | return append(suffix, static_cast( strlen(suffix) )); 126 | } 127 | 128 | // += operator. Maps to append 129 | TiXmlString& operator += (char single) 130 | { 131 | return append(&single, 1); 132 | } 133 | 134 | // += operator. Maps to append 135 | TiXmlString& operator += (const TiXmlString & suffix) 136 | { 137 | return append(suffix.data(), suffix.length()); 138 | } 139 | 140 | 141 | // Convert a TiXmlString into a null-terminated char * 142 | const char * c_str () const { return rep_->str; } 143 | 144 | // Convert a TiXmlString into a char * (need not be null terminated). 145 | const char * data () const { return rep_->str; } 146 | 147 | // Return the length of a TiXmlString 148 | size_type length () const { return rep_->size; } 149 | 150 | // Alias for length() 151 | size_type size () const { return rep_->size; } 152 | 153 | // Checks if a TiXmlString is empty 154 | bool empty () const { return rep_->size == 0; } 155 | 156 | // Return capacity of string 157 | size_type capacity () const { return rep_->capacity; } 158 | 159 | 160 | // single char extraction 161 | const char& at (size_type index) const 162 | { 163 | assert( index < length() ); 164 | return rep_->str[ index ]; 165 | } 166 | 167 | // [] operator 168 | char& operator [] (size_type index) const 169 | { 170 | assert( index < length() ); 171 | return rep_->str[ index ]; 172 | } 173 | 174 | // find a char in a string. Return TiXmlString::npos if not found 175 | size_type find (char lookup) const 176 | { 177 | return find(lookup, 0); 178 | } 179 | 180 | // find a char in a string from an offset. Return TiXmlString::npos if not found 181 | size_type find (char tofind, size_type offset) const 182 | { 183 | if (offset >= length()) return npos; 184 | 185 | for (const char* p = c_str() + offset; *p != '\0'; ++p) 186 | { 187 | if (*p == tofind) return static_cast< size_type >( p - c_str() ); 188 | } 189 | return npos; 190 | } 191 | 192 | void clear () 193 | { 194 | //Lee: 195 | //The original was just too strange, though correct: 196 | // TiXmlString().swap(*this); 197 | //Instead use the quit & re-init: 198 | quit(); 199 | init(0,0); 200 | } 201 | 202 | /* Function to reserve a big amount of data when we know we'll need it. Be aware that this 203 | function DOES NOT clear the content of the TiXmlString if any exists. 204 | */ 205 | void reserve (size_type cap); 206 | 207 | TiXmlString& assign (const char* str, size_type len); 208 | 209 | TiXmlString& append (const char* str, size_type len); 210 | 211 | void swap (TiXmlString& other) 212 | { 213 | Rep* r = rep_; 214 | rep_ = other.rep_; 215 | other.rep_ = r; 216 | } 217 | 218 | private: 219 | 220 | void init(size_type sz) { init(sz, sz); } 221 | void set_size(size_type sz) { rep_->str[ rep_->size = sz ] = '\0'; } 222 | char* start() const { return rep_->str; } 223 | char* finish() const { return rep_->str + rep_->size; } 224 | 225 | struct Rep 226 | { 227 | size_type size, capacity; 228 | char str[1]; 229 | }; 230 | 231 | void init(size_type sz, size_type cap) 232 | { 233 | if (cap) 234 | { 235 | // Lee: the original form: 236 | // rep_ = static_cast(operator new(sizeof(Rep) + cap)); 237 | // doesn't work in some cases of new being overloaded. Switching 238 | // to the normal allocation, although use an 'int' for systems 239 | // that are overly picky about structure alignment. 240 | const size_type bytesNeeded = sizeof(Rep) + cap; 241 | const size_type intsNeeded = ( bytesNeeded + sizeof(int) - 1 ) / sizeof( int ); 242 | rep_ = reinterpret_cast( new int[ intsNeeded ] ); 243 | 244 | rep_->str[ rep_->size = sz ] = '\0'; 245 | rep_->capacity = cap; 246 | } 247 | else 248 | { 249 | rep_ = &nullrep_; 250 | } 251 | } 252 | 253 | void quit() 254 | { 255 | if (rep_ != &nullrep_) 256 | { 257 | // The rep_ is really an array of ints. (see the allocator, above). 258 | // Cast it back before delete, so the compiler won't incorrectly call destructors. 259 | delete [] ( reinterpret_cast( rep_ ) ); 260 | } 261 | } 262 | 263 | Rep * rep_; 264 | static Rep nullrep_; 265 | 266 | } ; 267 | 268 | 269 | inline bool operator == (const TiXmlString & a, const TiXmlString & b) 270 | { 271 | return ( a.length() == b.length() ) // optimization on some platforms 272 | && ( strcmp(a.c_str(), b.c_str()) == 0 ); // actual compare 273 | } 274 | inline bool operator < (const TiXmlString & a, const TiXmlString & b) 275 | { 276 | return strcmp(a.c_str(), b.c_str()) < 0; 277 | } 278 | 279 | inline bool operator != (const TiXmlString & a, const TiXmlString & b) { return !(a == b); } 280 | inline bool operator > (const TiXmlString & a, const TiXmlString & b) { return b < a; } 281 | inline bool operator <= (const TiXmlString & a, const TiXmlString & b) { return !(b < a); } 282 | inline bool operator >= (const TiXmlString & a, const TiXmlString & b) { return !(a < b); } 283 | 284 | inline bool operator == (const TiXmlString & a, const char* b) { return strcmp(a.c_str(), b) == 0; } 285 | inline bool operator == (const char* a, const TiXmlString & b) { return b == a; } 286 | inline bool operator != (const TiXmlString & a, const char* b) { return !(a == b); } 287 | inline bool operator != (const char* a, const TiXmlString & b) { return !(b == a); } 288 | 289 | TiXmlString operator + (const TiXmlString & a, const TiXmlString & b); 290 | TiXmlString operator + (const TiXmlString & a, const char* b); 291 | TiXmlString operator + (const char* a, const TiXmlString & b); 292 | 293 | 294 | /* 295 | TiXmlOutStream is an emulation of std::ostream. It is based on TiXmlString. 296 | Only the operators that we need for TinyXML have been developped. 297 | */ 298 | class TiXmlOutStream : public TiXmlString 299 | { 300 | public : 301 | 302 | // TiXmlOutStream << operator. 303 | TiXmlOutStream & operator << (const TiXmlString & in) 304 | { 305 | *this += in; 306 | return *this; 307 | } 308 | 309 | // TiXmlOutStream << operator. 310 | TiXmlOutStream & operator << (const char * in) 311 | { 312 | *this += in; 313 | return *this; 314 | } 315 | 316 | } ; 317 | 318 | #endif // TIXML_STRING_INCLUDED 319 | #endif // TIXML_USE_STL 320 | -------------------------------------------------------------------------------- /src/utils/tinyxmlerror.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | www.sourceforge.net/projects/tinyxml 3 | Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com) 4 | 5 | This software is provided 'as-is', without any express or implied 6 | warranty. In no event will the authors be held liable for any 7 | damages arising from the use of this software. 8 | 9 | Permission is granted to anyone to use this software for any 10 | purpose, including commercial applications, and to alter it and 11 | redistribute it freely, subject to the following restrictions: 12 | 13 | 1. The origin of this software must not be misrepresented; you must 14 | not claim that you wrote the original software. If you use this 15 | software in a product, an acknowledgment in the product documentation 16 | would be appreciated but is not required. 17 | 18 | 2. Altered source versions must be plainly marked as such, and 19 | must not be misrepresented as being the original software. 20 | 21 | 3. This notice may not be removed or altered from any source 22 | distribution. 23 | */ 24 | 25 | //#include "stdafx.h" 26 | #include "tinyxml.h" 27 | 28 | // The goal of the seperate error file is to make the first 29 | // step towards localization. tinyxml (currently) only supports 30 | // english error messages, but the could now be translated. 31 | // 32 | // It also cleans up the code a bit. 33 | // 34 | 35 | const char* TiXmlBase::errorString[ TIXML_ERROR_STRING_COUNT ] = 36 | { 37 | "No error", 38 | "Error", 39 | "Failed to open file", 40 | "Memory allocation failed.", 41 | "Error parsing Element.", 42 | "Failed to read Element name", 43 | "Error reading Element value.", 44 | "Error reading Attributes.", 45 | "Error: empty tag.", 46 | "Error reading end tag.", 47 | "Error parsing Unknown.", 48 | "Error parsing Comment.", 49 | "Error parsing Declaration.", 50 | "Error document empty.", 51 | "Error null (0) or unexpected EOF found in input stream.", 52 | "Error parsing CDATA.", 53 | "Error when TiXmlDocument added to document, because TiXmlDocument can only be at the root.", 54 | }; 55 | --------------------------------------------------------------------------------