├── Makefile
├── README.md
├── data
    ├── rub1.raw
    └── rub2.raw
├── examples
    ├── insect.png
    └── optical_flow_example.png
├── settings.xml
└── src
    ├── cuda_operations
        ├── 2d
        │   ├── cuda_operation_add_2d.cpp
        │   ├── cuda_operation_add_2d.h
        │   ├── cuda_operation_convolution_2d.cpp
        │   ├── cuda_operation_convolution_2d.h
        │   ├── cuda_operation_median_2d.cpp
        │   ├── cuda_operation_median_2d.h
        │   ├── cuda_operation_registration_2d.cpp
        │   ├── cuda_operation_registration_2d.h
        │   ├── cuda_operation_resample_2d.cpp
        │   ├── cuda_operation_resample_2d.h
        │   ├── cuda_operation_solve_2d.cpp
        │   └── cuda_operation_solve_2d.h
        ├── cuda_operation_base.cpp
        └── cuda_operation_base.h
    ├── data_types
        ├── data2d.cpp
        ├── data2d.h
        ├── data3d.cpp
        ├── data3d.h
        ├── data_structs.h
        ├── operation_parameters.cpp
        └── operation_parameters.h
    ├── kernels
        ├── add_2d.cu
        ├── convolution_2d.cu
        ├── median_2d.cu
        ├── registration_2d.cu
        ├── resample_2d.cu
        └── solve_2d.cu
    ├── main.cpp
    ├── optical_flow
        ├── optical_flow_2d.cpp
        ├── optical_flow_2d.h
        ├── optical_flow_base_2d.cpp
        └── optical_flow_base_2d.h
    └── utils
        ├── common_utils.cpp
        ├── common_utils.h
        ├── cuda_utils.cpp
        ├── cuda_utils.h
        ├── io_utils.cpp
        ├── io_utils.h
        ├── settings.cpp
        ├── settings.h
        ├── tinystr.cpp
        ├── tinystr.h
        ├── tinyxml.cpp
        ├── tinyxml.h
        ├── tinyxmlerror.cpp
        └── tinyxmlparser.cpp


/Makefile:
--------------------------------------------------------------------------------
 1 | BASEDIR :=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
 2 | INC-DIR := "-I$(BASEDIR)"
 3 | 
 4 | CC := g++
 5 | CFLAGS := -c -std=c++11 -O3 -funroll-all-loops  -Wno-deprecated
 6 | CUDA-FLAGS   = -ptx -std=c++11
 7 | 
 8 | 
 9 | MACHINE := rtx
10 | 
11 | 
12 | ifeq ($(MACHINE),rtx)
13 |   # anka-rtx
14 |   TARGET := cuda-flow2d-rtx
15 |   CUDA-TOP     = /home/ws/fe0968/local/cuda-10.0
16 |   CUDA         = $(CUDA-TOP)/bin/nvcc 
17 |   CUDA-INC-DIR = -I$(CUDA-TOP)/include 
18 |   CUDA-LIB-DIR = -L$(CUDA-TOP)/lib64 -lcudart -lcuda  
19 | else
20 |   # ankaimage-concert
21 |   TARGET := cuda-flow2d-concert
22 |   CUDA         = /usr/bin/nvcc 
23 |   CUDA-INC-DIR = -I/usr/include
24 |   CUDA-LIB-DIR = -L/usr/lib/x86_64-linux-gnu -lcudart -lcuda 
25 | endif
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | # $(wildcard *.cpp /xxx/xxx/*.cpp): get all .cpp files from the current directory and dir "/xxx/xxx/"
33 | SRCS := $(wildcard $(BASEDIR)/src/*.cpp \
34 | 			$(BASEDIR)/src/*/*/*.cpp \
35 | 			$(BASEDIR)/src/*/*.cpp)
36 | # $(patsubst %.cpp,%.o,$(SRCS)): substitute all ".cpp" file name strings to ".o" file name strings
37 | OBJS := $(patsubst %.cpp,%.o,$(SRCS))
38 | 
39 | CUDAOBJECTS 	= $(CUDASOURCES:.cu=.ptx)
40 | 
41 | #OBJS := $(SRCS:.cpp=.o)
42 | 
43 | all: $(TARGET)
44 |  
45 |  $(TARGET): $(OBJS) cuda
46 | 	$(CC) $(OBJS) $(CUDA-LIB-DIR) -o $@
47 | 
48 | %.o: %.cpp
49 | 	$(CC) $(CFLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR)  $< -o $@
50 |  
51 |  
52 |  
53 | 
54 | cuda: 
55 | 	$(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/add_2d.cu -o $(BASEDIR)/kernels/add_2d.ptx
56 | 	$(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/median_2d.cu -o $(BASEDIR)/kernels/median_2d.ptx
57 | 	$(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/convolution_2d.cu -o $(BASEDIR)/kernels/convolution_2d.ptx
58 | 	$(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/registration_2d.cu -o $(BASEDIR)/kernels/registration_2d.ptx
59 | 	$(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/resample_2d.cu -o $(BASEDIR)/kernels/resample_2d.ptx
60 | 	$(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/solve_2d.cu -o $(BASEDIR)/kernels/solve_2d.ptx
61 | 	$(CUDA) $(CUDA-FLAGS) $(INC-DIR) $(CUDA-INC-DIR) $(CUDA-LIB-DIR) $(BASEDIR)/src/kernels/correlation_2d.cu -o $(BASEDIR)/kernels/correlation_2d.ptx
62 | 
63 | 
64 | clean:
65 | 	rm -rf $(TARGET) $(OBJS) $(CUDAOBJECTS)
66 | 	
67 | .PHONY: all clean
68 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GPU-based 2D Optical flow using NVIDIA CUDA
 2 | 
 3 | Optical flow using *variational methods* which determine the unknown displacement field as a minimal solution
 4 | of the energy functional. 
 5 | 
 6 | In general, such energy-based formulations are composed of two
 7 | parts: a *data term* which assumes constancy of specific image features, and a *smoothness term* which regularizes the spatial variation of the flow field.
 8 | 
 9 | ## Features
10 | 
11 | * **Computational model guarantees**:
12 |    * a unique solution (global optimal solution)
13 |    * stability of the algorithm (no critical dependence on parameters)
14 | * **High quality**:
15 |    * dense flow results (one displacement for each pixel)
16 |    * allows large displacements
17 |    * different types of motion (translation, rotation, local elastic transformation)
18 |    * sub-pixel accuracy
19 | * **Robustness**:
20 |    * under noise
21 |    * under varying illumination (brightness changes) 
22 |    * with respect to artifacts
23 | 
24 | ## Examples
25 | 
26 | ![alt text](https://github.com/axruff/cuda-flow2d/raw/master/examples/optical_flow_example.png "Examples")
27 | 
28 | **Figure**: **Left**: Head of the feeding cockroach *Periplaneta americana* imaged by fast X-ray radiography and computed flow field, which captures the movements of the insect during chewing process. **Right**: Flow dynamics of liquid droplets in a fuel spray. Color coding: color represents direction and its brightness represents flow magnitude.
29 | 
30 | ## Model
31 | 
32 | * Brightness constancy data term [1]
33 | * Gradient constancy data term [2]
34 | * Data term based on higher-order derivatives [3]
35 | * Robust modeling of data term [4]
36 | * Flow-driven smoothness [3]
37 | * Coarse-to-fine flow estimation [2]
38 | * Intermediate flow median filtering [5]
39 |  
40 |  
41 |  ## References
42 |  
43 | * [1] B. Horn and B. Schunck. *Determining optical flow*. Artificial Intelligence, 17:185{203, 1981.
44 | * [2] T. Brox, A. Bruhn, N. Papenberg, and J. Weickert. *High accuracy optic flow estimation based on a theory for warping*. In T. Pajdla and J. Matas, editors, Computer Vision, ECCV 2004, volume 3024 of Lecture Notes in Computer Science, pages 25-36. Springer, Berlin, 2004.
45 | * [3] N. Papenberg, A. Bruhn, T. Brox, S. Didas, and J. Weickert. *Highly accurate optic ow computation with theoretically justified warping*. Int. J. Comput. Vision, 67(2):141-158, April 2006.
46 | * [4] M. J. Black and P. Anandan. *The robust estimation of multiple motions: Parametric and piecewise-smooth flow fields*. Computer Vision and Image Understanding, 63(1):75- 104, 1996.
47 | * [5] D. Sun, S. Roth, and M. J. Black. *A quantitative analysis of current practices in optical flow estimation and the principles behind them*. International Journal of Computer Vision, 106(2):115-137, 2014.
48 | 


--------------------------------------------------------------------------------
/data/rub1.raw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/axruff/cuda-flow2d/827c3470351972aca7584a83aa9b85778f522be4/data/rub1.raw


--------------------------------------------------------------------------------
/data/rub2.raw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/axruff/cuda-flow2d/827c3470351972aca7584a83aa9b85778f522be4/data/rub2.raw


--------------------------------------------------------------------------------
/examples/insect.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/axruff/cuda-flow2d/827c3470351972aca7584a83aa9b85778f522be4/examples/insect.png


--------------------------------------------------------------------------------
/examples/optical_flow_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/axruff/cuda-flow2d/827c3470351972aca7584a83aa9b85778f522be4/examples/optical_flow_example.png


--------------------------------------------------------------------------------
/settings.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!-- Settings for the Optical flow computation program -->
 3 | <OpticalFlow>
 4 |   
 5 |   <Input>
 6 |     <Path inputPath="c:\Users\fe0968\Documents\gpuflow3d\gpuflow2d\data\"/>
 7 |     
 8 |     <Mode Nx="128" Ny="128" imageType="8-bit">
 9 |     	<Files file1 ="real_frame-128-128.raw" file2 ="frame2.raw"/>
10 |     </Mode>
11 |   </Input>
12 | 
13 |   <Parameters>
14 |     <Method mode ="2d" run="flow" key="0" />
15 | 
16 |     <Solver>
17 |       <Iterations inner="5" outer="20"/>
18 |       <Warping levels="20" scaling="0.9" medianRadius="5"/>
19 |       <Model sigma="0.45" alpha ="3.5" e_smooth="0.001" e_data="0.001"/>
20 |     </Solver>
21 |     
22 |   </Parameters>
23 | 
24 |   <Output>
25 |     <Path outputPath="c:\Users\fe0968\Documents\gpuflow3d\gpuflow2d\data\output\"/>
26 |   </Output>
27 | </OpticalFlow>
28 | 


--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_add_2d.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 | * @file    2D Optical flow using NVIDIA CUDA
  3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
  4 | *
  5 | * @date    2015-2018
  6 | * @version 0.5.0
  7 | *
  8 | *
  9 | * @section LICENSE
 10 | *
 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
 13 | *
 14 | * The current implemetation contains the following licenses:
 15 | *
 16 | * 1. TinyXml package:
 17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
 18 | *      See src/utils/tinyxml.h for details.
 19 | *
 20 | */
 21 | 
 22 | #include "cuda_operation_add_2d.h"
 23 | 
 24 | #include <cstring>
 25 | 
 26 | #include "src/data_types/data_structs.h"
 27 | 
 28 | #include <vector_types.h>
 29 | 
 30 | #include "src/utils/common_utils.h"
 31 | #include "src/utils/cuda_utils.h"
 32 | 
 33 | CudaOperationAdd2D::CudaOperationAdd2D()
 34 |   : CudaOperationBase("CUDA Add 2D")
 35 | {
 36 | }
 37 | 
 38 | bool CudaOperationAdd2D::Initialize(const OperationParameters* params)
 39 | {
 40 |   initialized_ = false;
 41 |   
 42 |   if (!params) {
 43 |     std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName());
 44 |     return initialized_;
 45 |   }
 46 | 
 47 |   DataSize3 container_size;
 48 |   GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_);
 49 |  
 50 |   char exec_path[256];
 51 |   Utils::GetExecutablePath(exec_path, 256);
 52 |   std::strcat(exec_path, "/kernels/add_2d.ptx");
 53 | 
 54 |   if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) {
 55 |     if (!CheckCudaError(cuModuleGetFunction(&cuf_add_, cu_module_, "add_2d"))) {
 56 |       size_t const_size;
 57 | 
 58 |       /* Get the pointer to the constant memory and copy data */
 59 |       if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) {
 60 |         if (const_size == sizeof(container_size)) {
 61 |           if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) {
 62 |             initialized_ = true;
 63 |           }
 64 |         }
 65 |       }
 66 | 
 67 |     } else {
 68 |       CheckCudaError(cuModuleUnload(cu_module_));
 69 |       cu_module_ = nullptr;
 70 |     }
 71 |   }
 72 |   return initialized_;
 73 | }
 74 | 
 75 | void CudaOperationAdd2D::Execute(OperationParameters& params)
 76 | {
 77 |   if (!IsInitialized()) {
 78 |     return;
 79 |   }
 80 | 
 81 |   CUdeviceptr operand_0 = 0;
 82 |   CUdeviceptr operand_1 = 0;
 83 |   DataSize3 data_size;
 84 | 
 85 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, operand_0, "operand_0");
 86 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, operand_1, "operand_1");
 87 |   GET_PARAM_OR_RETURN(params, DataSize3, data_size, "data_size");
 88 | 
 89 |   dim3 block_dim = { 16, 8};
 90 |   dim3 grid_dim = { static_cast<unsigned int>((data_size.width  + block_dim.x - 1) / block_dim.x),
 91 |                     static_cast<unsigned int>((data_size.height + block_dim.y - 1) / block_dim.y) };
 92 | 
 93 |   void* args[4] = { 
 94 |     &operand_0, 
 95 |     &operand_1,
 96 |     &data_size.width, 
 97 |     &data_size.height};
 98 | 
 99 |   CheckCudaError(cuLaunchKernel(cuf_add_,
100 |                                 grid_dim.x, grid_dim.y, grid_dim.z,
101 |                                 block_dim.x, block_dim.y, block_dim.z,
102 |                                 0,
103 |                                 NULL,
104 |                                 args,
105 |                                 NULL));
106 | }


--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_add_2d.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_ADD_H_
23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_ADD_H_
24 | 
25 | #include <cuda.h>
26 | 
27 | #include "src/cuda_operations/cuda_operation_base.h"
28 | #include "src/data_types/operation_parameters.h"
29 | 
30 | class CudaOperationAdd2D : public CudaOperationBase {
31 | private:
32 |   CUfunction cuf_add_;
33 | 
34 | public:
35 |     CudaOperationAdd2D();
36 | 
37 |   bool Initialize(const OperationParameters* params = nullptr) override;
38 |   void Execute(OperationParameters& params) override;
39 | };
40 | 
41 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_ADD_H_
42 | 


--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_convolution_2d.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 | * @file    2D Optical flow using NVIDIA CUDA
  3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
  4 | *
  5 | * @date    2015-2018
  6 | * @version 0.5.0
  7 | *
  8 | *
  9 | * @section LICENSE
 10 | *
 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
 13 | *
 14 | * The current implemetation contains the following licenses:
 15 | *
 16 | * 1. TinyXml package:
 17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
 18 | *      See src/utils/tinyxml.h for details.
 19 | *
 20 | */
 21 | 
 22 | #include "cuda_operation_convolution_2d.h"
 23 | 
 24 | #include <cstring>
 25 | #include <iostream>
 26 | #include <cmath>
 27 | 
 28 | #include <cuda.h>
 29 | #include <cuda_runtime.h>
 30 | #include <vector_types.h>
 31 | 
 32 | #include "src/data_types/data_structs.h"
 33 | #include "src/utils/common_utils.h"
 34 | #include "src/utils/cuda_utils.h"
 35 | 
 36 | using namespace std;
 37 | 
 38 | CudaOperationConvolution2D::CudaOperationConvolution2D()
 39 |   : CudaOperationBase("CUDA Convolution 2D")
 40 | {
 41 | }
 42 | 
 43 | bool CudaOperationConvolution2D::Initialize(const OperationParameters* params)
 44 | {
 45 |   initialized_ = false;
 46 |   
 47 |   if (!params) {
 48 |     std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName());
 49 |     return initialized_;
 50 |   }
 51 | 
 52 |   DataSize3 container_size;
 53 |   GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_);
 54 | 
 55 |   dev_container_size_ = container_size;
 56 |  
 57 |   char exec_path[256];
 58 |   Utils::GetExecutablePath(exec_path, 256);
 59 |   std::strcat(exec_path, "/kernels/convolution_2d.ptx");
 60 | 
 61 |   if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) {
 62 |     if (!CheckCudaError(cuModuleGetFunction(&cuf_convolution_rows_, cu_module_, "convolutionRowsKernel")) &&
 63 |         !CheckCudaError(cuModuleGetFunction(&cuf_convolution_cols_, cu_module_, "convolutionColumnsKernel"))) {
 64 |       size_t const_size;
 65 | 
 66 |       /* Get the pointer to the constant memory and copy data */
 67 |       if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) {
 68 |         if (const_size == sizeof(container_size)) {
 69 |           if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) {
 70 |             initialized_ = true;
 71 |           }
 72 |         }
 73 |       }
 74 | 
 75 |     } else {
 76 |       CheckCudaError(cuModuleUnload(cu_module_));
 77 |       cu_module_ = nullptr;
 78 |     }
 79 |   }
 80 |   return initialized_;
 81 | }
 82 | 
 83 | void CudaOperationConvolution2D::ComputeGaussianKernel(float sigma, size_t precision, float pixel_size)
 84 | {
 85 |     kernel_radius_ = (size_t)(precision * sigma / pixel_size);
 86 | 
 87 |     kernel_ = new float[2*kernel_radius_ + 1];
 88 |     kernel_length_ = 2*kernel_radius_ + 1;
 89 | 
 90 |     int r = static_cast<int>(kernel_radius_);
 91 | 
 92 |     for (int i = -r; i <= r; i++) {
 93 |         float val = 1.0 / (sigma * std::sqrt(2.0 * 3.1415926)) * std::exp(-(i * i * pixel_size * pixel_size) / (2.0 * sigma * sigma));
 94 |         kernel_[i+r] = val;
 95 |     }
 96 | 
 97 |     // Normalize convolution mask vector 
 98 |     float sum = 0.0;
 99 | 
100 |     for (unsigned int i = 0; i < kernel_length_; i++) {
101 |         sum = sum + kernel_[i];
102 |     }
103 | 
104 |     for (unsigned int i = 0; i < kernel_length_; i++) {
105 |         kernel_[i] = kernel_[i] / sum;
106 |     }
107 | 
108 |    /* for (unsigned int i = 0; i < kernel_length_; i++) {
109 |         kernel_[i] = 1.0;
110 |     }*/
111 | 
112 | }
113 | 
114 | void CudaOperationConvolution2D::PrintConvolutionKernel()
115 | {
116 |     if (kernel_ == nullptr)
117 |         std::printf("Error: Convolution kernel is not initialized.\n");
118 |     else {
119 |         std::printf("Convolution kernel (radius = %d)\n", kernel_radius_);
120 | 
121 |         for (unsigned int i = 0; i < kernel_length_; i++) {
122 |             std::printf("%.4f ", kernel_[i]);
123 |         }
124 |         std::printf("\n\n");
125 | 
126 |     }
127 | 
128 | }
129 | 
130 | 
131 | 
132 | void CudaOperationConvolution2D::Execute(OperationParameters& params)
133 | {  
134 |   if (!IsInitialized()) {
135 |     return;
136 |   }
137 | 
138 |   CUdeviceptr dev_input = 0;
139 |   CUdeviceptr dev_output = 0;
140 |   CUdeviceptr dev_temp = 0;
141 |   DataSize3 data_size;
142 |   float gaussian_sigma;
143 | 
144 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_input,  "dev_input");
145 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_output, "dev_output");
146 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_temp, "dev_temp");
147 |   GET_PARAM_OR_RETURN(params, DataSize3, data_size,    "data_size");
148 |   GET_PARAM_OR_RETURN(params, float, gaussian_sigma, "gaussian_sigma");
149 | 
150 | 
151 |   if (dev_input == dev_output) {
152 |     std::printf("Operation '%s': Error. Input buffer cannot serve as output buffer.", GetName());
153 |     return;
154 |   }
155 |     
156 |     // TODO: Make computation of kernel once for all images
157 |     ComputeGaussianKernel(gaussian_sigma, 3, 1.0);
158 |     //PrintConvolutionKernel();
159 | 
160 |     size_t const_size;
161 | 
162 |     /* Get the pointer to the constant memory and copy data */
163 |     CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "c_Kernel"));
164 |     CheckCudaError(cuMemcpyHtoD(dev_constants_, kernel_, kernel_length_*sizeof(float)));
165 | 
166 |     int pitch = static_cast<int>(dev_container_size_.pitch / sizeof(float));
167 | 
168 |     /* 1. Process in horizontal direction (rows) */
169 |     ResampleRows(dev_input, dev_temp, data_size, pitch);
170 | 
171 | 
172 |     /* 2. Process in vertical direction (columns) */
173 |     ResampleColumns(dev_temp, dev_output, data_size, pitch);
174 | 
175 | 
176 | }
177 | 
178 | void CudaOperationConvolution2D::ResampleRows(CUdeviceptr input, CUdeviceptr output, DataSize3& data_size, size_t pitch) const
179 | {
180 |     int kernel_radius = static_cast<int>(kernel_radius_);
181 | 
182 |     unsigned int rows_results_steps = 4;
183 |     unsigned int rows_halo_steps = 1;
184 | 
185 |     dim3 row_block_dim ={ 16, 4 };
186 |     dim3 row_grid_dim ={ static_cast<unsigned int>((data_size.width  + (row_block_dim.x * rows_results_steps) - 1) / (row_block_dim.x * rows_results_steps)),
187 |         static_cast<unsigned int>((data_size.height + row_block_dim.y - 1) / row_block_dim.y) };
188 | 
189 |     /* Calculate the needed shared memory size */
190 |     int shared_memory_size;
191 |     CUdevice cu_device;
192 |     CheckCudaError(cuDeviceGet(&cu_device, 0));
193 |     CheckCudaError(cuDeviceGetAttribute(&shared_memory_size, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, cu_device));
194 | 
195 |     int needed_shared_memory_size =
196 |         ((rows_results_steps + 2*rows_halo_steps)*row_block_dim.x) * (row_block_dim.y) * sizeof(float);
197 | 
198 |     //std::printf("Shared memory: %d Needed: %d \n", shared_memory_size, needed_shared_memory_size);
199 | 
200 |     if (needed_shared_memory_size > shared_memory_size) {
201 |         std::printf("<%s>: Error shared memory allocation. Reduce the thread block size.\n", GetName());
202 |         std::printf("Shared memory: %d Needed: %d \n", shared_memory_size, needed_shared_memory_size);
203 |         return;
204 |     }
205 | 
206 |     /*std::printf("Starting Convolution kernel: Grid: %d x %d, Blocks %d x %d \n", row_grid_dim.x,
207 |         row_grid_dim.y,
208 |         row_block_dim.x,
209 |         row_block_dim.y);*/
210 | 
211 | 
212 |     void* args[6] ={
213 |         &output,
214 |         &input,
215 |         &data_size.width,
216 |         &data_size.height,
217 |         &pitch,
218 |         &kernel_radius };
219 | 
220 |     CheckCudaError(cuLaunchKernel(cuf_convolution_rows_,
221 |         row_grid_dim.x, row_grid_dim.y, row_grid_dim.z,
222 |         row_block_dim.x, row_block_dim.y, row_block_dim.z,
223 |         needed_shared_memory_size,
224 |         NULL,
225 |         args,
226 |         NULL));
227 | }
228 | 
229 | void CudaOperationConvolution2D::ResampleColumns(CUdeviceptr input, CUdeviceptr output, DataSize3& data_size, size_t pitch) const
230 | {
231 |     int kernel_radius = static_cast<int>(kernel_radius_);
232 | 
233 |     unsigned int cols_results_steps = 4;
234 |     unsigned int cols_halo_steps = 1;
235 | 
236 |     dim3 col_block_dim ={ 4, 16 };
237 |     dim3 col_grid_dim ={ static_cast<unsigned int>((data_size.width  + col_block_dim.x - 1) / col_block_dim.x),
238 |         static_cast<unsigned int>((data_size.height + (col_block_dim.y * cols_results_steps) - 1) / (col_block_dim.y * cols_results_steps)) };
239 | 
240 |     int shared_memory_size;
241 |     CUdevice cu_device;
242 |     CheckCudaError(cuDeviceGet(&cu_device, 0));
243 |     CheckCudaError(cuDeviceGetAttribute(&shared_memory_size, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, cu_device));
244 | 
245 |     int needed_shared_memory_size =
246 |         (col_block_dim.x * ((cols_results_steps + 2*cols_halo_steps)*col_block_dim.y + 0)) * sizeof(float);
247 | 
248 |     //std::printf("Shared memory: %d Needed: %d \n", shared_memory_size, needed_shared_memory_size);
249 | 
250 |     if (needed_shared_memory_size > shared_memory_size) {
251 |         std::printf("<%s>: Error shared memory allocation. Reduce the thread block size.\n", GetName());
252 |         std::printf("Shared memory: %d Needed: %d \n", shared_memory_size, needed_shared_memory_size);
253 |         return;
254 |     }
255 | 
256 |    /* std::printf("Starting Convolution kernel: Grid: %d x %d, Blocks %d x %d \n", col_grid_dim.x,
257 |         col_grid_dim.y,
258 |         col_block_dim.x,
259 |         col_block_dim.y);
260 | */
261 | 
262 | 
263 |     void* args[6] ={
264 |         &output,
265 |         &input,
266 |         &data_size.width,
267 |         &data_size.height,
268 |         &pitch,
269 |         &kernel_radius };
270 | 
271 |     CheckCudaError(cuLaunchKernel(cuf_convolution_cols_,
272 |         col_grid_dim.x, col_grid_dim.y, col_grid_dim.z,
273 |         col_block_dim.x, col_block_dim.y, col_block_dim.z,
274 |         needed_shared_memory_size,
275 |         NULL,
276 |         args,
277 |         NULL));
278 | }
279 | 
280 | 
281 | 
282 | 
283 | CudaOperationConvolution2D::~CudaOperationConvolution2D()
284 | {
285 |     if (kernel_ != nullptr)
286 |         delete kernel_;
287 | 
288 | }


--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_convolution_2d.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_CONVOLUTION_2D_H_
23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_CONVOLUTION_2D_H_
24 | 
25 | #include <cuda.h>
26 | 
27 | #include "src/cuda_operations/cuda_operation_base.h"
28 | #include "src/data_types/data_structs.h"
29 | 
30 | class CudaOperationConvolution2D : public CudaOperationBase {
31 | private:
32 |   CUfunction cuf_convolution_rows_;
33 |   CUfunction cuf_convolution_cols_;
34 | 
35 |   DataSize3 dev_container_size_;
36 | 
37 |   float* kernel_ = nullptr;
38 |   size_t kernel_length_;
39 |   size_t kernel_radius_;
40 | 
41 |   void ComputeGaussianKernel(float sigma, size_t precision, float pixel_size);
42 | 
43 |   void ResampleRows(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, size_t pitch) const;
44 |   void ResampleColumns(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, size_t pitch) const;
45 | 
46 | public:
47 |     CudaOperationConvolution2D();
48 | 
49 |   bool Initialize(const OperationParameters* params = nullptr) override;
50 |   void Execute(OperationParameters& params) override;
51 |   void PrintConvolutionKernel();
52 | 
53 |   ~CudaOperationConvolution2D();
54 | };
55 | 
56 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_CONVOLUTION_2D_H_
57 | 


--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_median_2d.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 | * @file    2D Optical flow using NVIDIA CUDA
  3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
  4 | *
  5 | * @date    2015-2018
  6 | * @version 0.5.0
  7 | *
  8 | *
  9 | * @section LICENSE
 10 | *
 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
 13 | *
 14 | * The current implemetation contains the following licenses:
 15 | *
 16 | * 1. TinyXml package:
 17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
 18 | *      See src/utils/tinyxml.h for details.
 19 | *
 20 | */
 21 | 
 22 | #include "cuda_operation_median_2d.h"
 23 | 
 24 | #include <cuda.h>
 25 | 
 26 | #include <vector_types.h>
 27 | #include <cstring>
 28 | 
 29 | #include "src/data_types/data_structs.h"
 30 | #include "src/utils/common_utils.h"
 31 | #include "src/utils/cuda_utils.h"
 32 | 
 33 | CudaOperationMedian2D::CudaOperationMedian2D()
 34 |   : CudaOperationBase("CUDA Median 2D")
 35 | {
 36 | }
 37 | 
 38 | bool CudaOperationMedian2D::Initialize(const OperationParameters* params)
 39 | {
 40 |   initialized_ = false;
 41 |   
 42 |   if (!params) {
 43 |     std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName());
 44 |     return initialized_;
 45 |   }
 46 | 
 47 |   DataSize3 container_size;
 48 |   GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_);
 49 | 
 50 |   dev_container_size_ = container_size;
 51 |  
 52 |   char exec_path[256];
 53 |   Utils::GetExecutablePath(exec_path, 256);
 54 |   std::strcat(exec_path, "/kernels/median_2d.ptx");
 55 | 
 56 |   if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) {
 57 |     if (!CheckCudaError(cuModuleGetFunction(&cuf_median_, cu_module_, "median_2d"))) {
 58 |       size_t const_size;
 59 | 
 60 |       /* Get the pointer to the constant memory and copy data */
 61 |       if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) {
 62 |         if (const_size == sizeof(container_size)) {
 63 |           if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) {
 64 |             initialized_ = true;
 65 |           }
 66 |         }
 67 |       }
 68 | 
 69 |     } else {
 70 |       CheckCudaError(cuModuleUnload(cu_module_));
 71 |       cu_module_ = nullptr;
 72 |     }
 73 |   }
 74 |   return initialized_;
 75 | }
 76 | 
 77 | void CudaOperationMedian2D::Execute(OperationParameters& params)
 78 | {
 79 |   if (!IsInitialized()) {
 80 |     return;
 81 |   }
 82 | 
 83 |   CUdeviceptr dev_input;
 84 |   CUdeviceptr dev_output;
 85 | 
 86 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_input,  "dev_input");
 87 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_output, "dev_output");
 88 | 
 89 |   DataSize3 data_size;
 90 |   size_t radius;
 91 |   GET_PARAM_OR_RETURN(params, DataSize3, data_size, "data_size");
 92 |   GET_PARAM_OR_RETURN(params, size_t,    radius,    "radius");
 93 | 
 94 |   if (dev_input == dev_output) {
 95 |     std::printf("Operation '%s': Error. Input buffer cannot serve as output buffer.", GetName());
 96 |     return;
 97 |   }
 98 | 
 99 |   /* If the radius equals 1 skip the filtering and copy data from the input to the output */
100 |   if (radius == 1) {
101 |     CheckCudaError(cuMemcpyDtoD(dev_output, dev_input,
102 |       dev_container_size_.pitch * dev_container_size_.height));
103 |     return;
104 |   }
105 | 
106 |   if (radius % 2 == 0) {
107 |     std::printf("Warning. Median raduis is even (%d), decresaing by 1...\n", radius);
108 |     radius -= 1;
109 |   }
110 | 
111 |   if (radius >= 3 && radius <= 7) {
112 |     /* Be careful with the thread block size. Thread block size in each dimension
113 |        should be greater than radius / 2, because radius / 2 threads are used to 
114 |        load halo around the thread block. */
115 |     dim3 block_dim = { 8, 8};
116 | 
117 |     dim3 grid_dim ={ static_cast<unsigned int>((data_size.width  + block_dim.x - 1) / block_dim.x),
118 |                      static_cast<unsigned int>((data_size.height + block_dim.y - 1) / block_dim.y)};
119 | 
120 | 
121 |     /* Calculate the needed shared memory size */
122 |     int shared_memory_size;
123 |     CUdevice cu_device;
124 |     CheckCudaError(cuDeviceGet(&cu_device, 0));
125 |     CheckCudaError(cuDeviceGetAttribute(&shared_memory_size, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, cu_device));
126 | 
127 |     int radius_2 = radius / 2;
128 |     int needed_shared_memory_size =
129 |       (block_dim.x + 2 * radius_2) * (block_dim.y + 2 * radius_2) * sizeof(float);
130 | 
131 |     if (needed_shared_memory_size > shared_memory_size) {
132 |       std::printf("<%s>: Error shared memory allocation. Reduce the thread block size.\n", GetName());
133 |       std::printf("Shared memory: %d Needed: %d \n", shared_memory_size, needed_shared_memory_size);
134 |       return;
135 |     }
136 | 
137 |     
138 |     void* args[5] = { 
139 |       &dev_input,
140 |       &data_size.width,
141 |       &data_size.height,
142 |       &radius,
143 |       &dev_output};
144 | 
145 |     CheckCudaError(cuLaunchKernel(cuf_median_,
146 |                                   grid_dim.x, grid_dim.y, grid_dim.z,
147 |                                   block_dim.x, block_dim.y, block_dim.z,
148 |                                   needed_shared_memory_size,
149 |                                   NULL,
150 |                                   args,
151 |                                   NULL));
152 |   } else {
153 |     std::printf("Error. Wrong median raduis (%d). Supported values: 3, 5, 7\n", radius);
154 |   }
155 | }


--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_median_2d.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_MEDIAN_H_
23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_MEDIAN_H_
24 | 
25 | #include <cuda.h>
26 | 
27 | #include "src/cuda_operations/cuda_operation_base.h"
28 | #include "src/data_types/data_structs.h"
29 | 
30 | class CudaOperationMedian2D : public CudaOperationBase {
31 | private:
32 |   CUfunction cuf_median_;
33 | 
34 |   DataSize3 dev_container_size_;
35 | 
36 | public:
37 |     CudaOperationMedian2D();
38 | 
39 |   bool Initialize(const OperationParameters* params = nullptr) override;
40 |   void Execute(OperationParameters& params) override;
41 | };
42 | 
43 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_MEDIAN_H_
44 | 


--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_registration_2d.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 | * @file    2D Optical flow using NVIDIA CUDA
  3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
  4 | *
  5 | * @date    2015-2018
  6 | * @version 0.5.0
  7 | *
  8 | *
  9 | * @section LICENSE
 10 | *
 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
 13 | *
 14 | * The current implemetation contains the following licenses:
 15 | *
 16 | * 1. TinyXml package:
 17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
 18 | *      See src/utils/tinyxml.h for details.
 19 | *
 20 | */
 21 | 
 22 | #include "cuda_operation_registration_2d.h"
 23 | 
 24 | #include <cuda.h>
 25 | #include <vector_types.h>
 26 | #include <cstring>
 27 | 
 28 | #include "src/data_types/data_structs.h"
 29 | #include "src/utils/common_utils.h"
 30 | #include "src/utils/cuda_utils.h"
 31 | 
 32 | CudaOperationRegistration2D::CudaOperationRegistration2D()
 33 |   : CudaOperationBase("CUDA Registration 2D")
 34 | {
 35 | }
 36 | 
 37 | bool CudaOperationRegistration2D::Initialize(const OperationParameters* params)
 38 | {
 39 |   initialized_ = false;
 40 |   
 41 |   if (!params) {
 42 |     std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName());
 43 |     return initialized_;
 44 |   }
 45 | 
 46 |   DataSize3 container_size;
 47 |   GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_);
 48 |  
 49 |   char exec_path[256];
 50 |   Utils::GetExecutablePath(exec_path, 256);
 51 |   std::strcat(exec_path, "/kernels/registration_2d.ptx");
 52 | 
 53 |   if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) {
 54 |     if (!CheckCudaError(cuModuleGetFunction(&cuf_registration_, cu_module_, "registration_2d"))) {
 55 |       size_t const_size;
 56 | 
 57 |       /* Get the pointer to the constant memory and copy data */
 58 |       if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) {
 59 |         if (const_size == sizeof(container_size)) {
 60 |           if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) {
 61 |             initialized_ = true;
 62 |           }
 63 |         }
 64 |       }
 65 | 
 66 |     } else {
 67 |       CheckCudaError(cuModuleUnload(cu_module_));
 68 |       cu_module_ = nullptr;
 69 |     }
 70 |   }
 71 |   return initialized_;
 72 | }
 73 | 
 74 | void CudaOperationRegistration2D::Execute(OperationParameters& params)
 75 | {
 76 |   if (!IsInitialized()) {
 77 |     return;
 78 |   }
 79 | 
 80 |   CUdeviceptr dev_frame_0;
 81 |   CUdeviceptr dev_frame_1;
 82 |   CUdeviceptr dev_flow_u;
 83 |   CUdeviceptr dev_flow_v;
 84 |   CUdeviceptr dev_output;
 85 | 
 86 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_frame_0, "dev_frame_0");
 87 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_frame_1, "dev_frame_1");
 88 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_flow_u,  "dev_flow_u");
 89 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_flow_v,  "dev_flow_v");
 90 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_output,  "dev_output");
 91 | 
 92 |   float hx;
 93 |   float hy;
 94 |   DataSize3 data_size;
 95 | 
 96 |   GET_PARAM_OR_RETURN(params, float,     hx,        "hx");
 97 |   GET_PARAM_OR_RETURN(params, float,     hy,        "hy");
 98 |   GET_PARAM_OR_RETURN(params, DataSize3, data_size, "data_size");
 99 | 
100 |   if (dev_frame_1 == dev_output) {
101 |     std::printf("Operation '%s': Error. Input buffer cannot serve as output buffer.", GetName());
102 |     return;
103 |   }
104 | 
105 |   dim3 block_dim = { 16, 8, 1 };
106 |   dim3 grid_dim = { static_cast<unsigned int>((data_size.width + block_dim.x - 1) / block_dim.x),
107 |                     static_cast<unsigned int>((data_size.height + block_dim.y - 1) / block_dim.y),
108 |                     1 };
109 | 
110 |   void* args[12] = { 
111 |       &dev_frame_0,
112 |       &dev_frame_1,
113 |       &dev_flow_u,
114 |       &dev_flow_v,
115 |       &data_size.width,
116 |       &data_size.height,
117 |       &hx,
118 |       &hy,
119 |       &dev_output};
120 | 
121 |   CheckCudaError(cuLaunchKernel(cuf_registration_,
122 |                                 grid_dim.x, grid_dim.y, grid_dim.z,
123 |                                 block_dim.x, block_dim.y, block_dim.z,
124 |                                 0,
125 |                                 NULL,
126 |                                 args,
127 |                                 NULL));
128 | }


--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_registration_2d.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_REGISTRATION_H_
23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_REGISTRATION_H_
24 | 
25 | #include <cuda.h>
26 | 
27 | #include "src/cuda_operations/cuda_operation_base.h"
28 | 
29 | class CudaOperationRegistration2D : public CudaOperationBase {
30 | private:
31 |   CUfunction cuf_registration_;
32 | 
33 | public:
34 |     CudaOperationRegistration2D();
35 | 
36 |   bool Initialize(const OperationParameters* params = nullptr) override;
37 |   void Execute(OperationParameters& params) override;
38 | };
39 | 
40 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_REGISTRATION_H_
41 | 


--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_resample_2d.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 | * @file    2D Optical flow using NVIDIA CUDA
  3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
  4 | *
  5 | * @date    2015-2018
  6 | * @version 0.5.0
  7 | *
  8 | *
  9 | * @section LICENSE
 10 | *
 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
 13 | *
 14 | * The current implemetation contains the following licenses:
 15 | *
 16 | * 1. TinyXml package:
 17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
 18 | *      See src/utils/tinyxml.h for details.
 19 | *
 20 | */
 21 | 
 22 | #include "cuda_operation_resample_2d.h"
 23 | 
 24 | #include <cstring>
 25 | 
 26 | #include <vector_types.h>
 27 | 
 28 | #include "src/data_types/data_structs.h"
 29 | #include "src/utils/common_utils.h"
 30 | #include "src/utils/cuda_utils.h"
 31 | 
 32 | CudaOperationResample2D::CudaOperationResample2D()
 33 |   : CudaOperationBase("CUDA Resample 2D")
 34 | {
 35 | }
 36 | 
 37 | bool CudaOperationResample2D::Initialize(const OperationParameters* params)
 38 | {
 39 |   initialized_ = false;
 40 | 
 41 |   if (!params) {
 42 |     std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName());
 43 |     return initialized_;
 44 |   }
 45 | 
 46 |   DataSize3 container_size;
 47 |   GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_);
 48 | 
 49 | 
 50 |   char exec_path[256];
 51 |   Utils::GetExecutablePath(exec_path, 256);
 52 |   std::strcat(exec_path, "/kernels/resample_2d.ptx");
 53 | 
 54 |   if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) {
 55 |     if (!CheckCudaError(cuModuleGetFunction(&cuf_resample_x_, cu_module_, "resample_x")) &&
 56 |         !CheckCudaError(cuModuleGetFunction(&cuf_resample_y_, cu_module_, "resample_y"))) {
 57 |       size_t const_size;
 58 | 
 59 |       /* Get the pointer to the constant memory and copy data */
 60 |       if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) {
 61 |         if (const_size == sizeof(container_size)) {
 62 |           if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) {
 63 |             initialized_ = true;
 64 |           }
 65 |         }
 66 |       }
 67 | 
 68 |     } else {
 69 |       CheckCudaError(cuModuleUnload(cu_module_));
 70 |       cu_module_ = nullptr;
 71 |     }
 72 |   }
 73 |   return initialized_;
 74 | }
 75 | 
 76 | void CudaOperationResample2D::Execute(OperationParameters& params)
 77 | {
 78 |   if (!IsInitialized()) {
 79 |     return;
 80 |   }
 81 |   CUdeviceptr dev_input = 0;
 82 |   CUdeviceptr dev_output = 0;
 83 |   CUdeviceptr dev_temp = 0;
 84 | 
 85 |   DataSize3 data_size;
 86 |   DataSize3 resample_size;
 87 | 
 88 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_input, "dev_input");
 89 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_output, "dev_output");
 90 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_temp, "dev_temp");
 91 |   GET_PARAM_OR_RETURN(params, DataSize3, data_size, "data_size");
 92 |   GET_PARAM_OR_RETURN(params, DataSize3, resample_size, "resample_size");
 93 | 
 94 |   if (dev_input == dev_output) {
 95 |     std::printf("Operation '%s': Error. Input buffer cannot serve as output buffer.", GetName());
 96 |     return;
 97 |   }
 98 | 
 99 |   DataSize3 output_size = data_size;
100 |   output_size.width = resample_size.width;
101 |   ResampleX(dev_input, dev_temp, data_size, output_size);
102 | 
103 |   data_size.width = output_size.width;
104 |   output_size.height = resample_size.height;
105 |   ResampleY(dev_temp, dev_output, data_size, output_size);
106 | 
107 | }
108 | 
109 | void CudaOperationResample2D::ResampleX(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, DataSize3& output_size) const
110 | {
111 |   dim3 block_dim = { 16, 8};
112 |   dim3 grid_dim =  { static_cast<unsigned int>((output_size.width + block_dim.x - 1) / block_dim.x),
113 |                      static_cast<unsigned int>((output_size.height + block_dim.y - 1) / block_dim.y)
114 |   };
115 | 
116 |   void* args[5] = { &input, 
117 |                     &output,
118 |                     &output_size.width,
119 |                     &output_size.height,
120 |                     &input_size.width };
121 | 
122 | 
123 |   CheckCudaError(cuLaunchKernel(cuf_resample_x_,
124 |                                 grid_dim.x, grid_dim.y, grid_dim.z,
125 |                                 block_dim.x, block_dim.y, block_dim.z,
126 |                                 0,
127 |                                 NULL,
128 |                                 args,
129 |                                 NULL));
130 | }
131 | 
132 | void CudaOperationResample2D::ResampleY(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, DataSize3& output_size) const
133 | {
134 |   dim3 block_dim = { 16, 8};
135 |   dim3 grid_dim =  { static_cast<unsigned int>((output_size.width + block_dim.x - 1) / block_dim.x),
136 |                      static_cast<unsigned int>((output_size.height + block_dim.y - 1) / block_dim.y)
137 |                      };
138 | 
139 |   void* args[5] = { &input,
140 |                     &output,
141 |                     &output_size.width,
142 |                     &output_size.height,
143 |                     &input_size.height };
144 | 
145 |   CheckCudaError(cuLaunchKernel(cuf_resample_y_,
146 |                                 grid_dim.x, grid_dim.y, grid_dim.z,
147 |                                 block_dim.x, block_dim.y, block_dim.z,
148 |                                 0,
149 |                                 NULL,
150 |                                 args,
151 |                                 NULL));
152 | }
153 | 
154 | 


--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_resample_2d.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_RESAMPLE_2D_H_
23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_RESAMPLE_2D_H_
24 | 
25 | #include <cuda.h>
26 | 
27 | #include "src/cuda_operations/cuda_operation_base.h"
28 | #include "src/data_types/data_structs.h"
29 | 
30 | class CudaOperationResample2D : public CudaOperationBase {
31 | private: 
32 |   CUfunction cuf_resample_x_;
33 |   CUfunction cuf_resample_y_;
34 | 
35 |   void ResampleX(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, DataSize3& output_size) const;
36 |   void ResampleY(CUdeviceptr input, CUdeviceptr output, DataSize3& input_size, DataSize3& output_size) const;
37 | 
38 | 
39 | public:
40 |   CudaOperationResample2D();
41 | 
42 |   bool Initialize(const OperationParameters* params = nullptr) override;
43 |   void Execute(OperationParameters& params) override;
44 | };
45 | 
46 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_RESAMPLE_2D_H_
47 | 


--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_solve_2d.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 | * @file    2D Optical flow using NVIDIA CUDA
  3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
  4 | *
  5 | * @date    2015-2018
  6 | * @version 0.5.0
  7 | *
  8 | *
  9 | * @section LICENSE
 10 | *
 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
 13 | *
 14 | * The current implemetation contains the following licenses:
 15 | *
 16 | * 1. TinyXml package:
 17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
 18 | *      See src/utils/tinyxml.h for details.
 19 | *
 20 | */
 21 | 
 22 | #include "cuda_operation_solve_2d.h"
 23 | 
 24 | #include <cstdio>
 25 | #include <cstring>
 26 | #include <iostream>
 27 | #include <vector_types.h>
 28 | 
 29 | #include "src/data_types/data_structs.h"
 30 | #include "src/utils/common_utils.h"
 31 | #include "src/utils/cuda_utils.h"
 32 | 
 33 | 
 34 | CudaOperationSolve2D::CudaOperationSolve2D()
 35 |   : CudaOperationBase("CUDA Solve 2D")
 36 | {
 37 | }
 38 | 
 39 | bool CudaOperationSolve2D::Initialize(const OperationParameters* params)
 40 | {
 41 |   initialized_ = false;
 42 |   
 43 |   if (!params) {
 44 |     std::printf("Operation: '%s'. Initialization parameters are missing.\n", GetName());
 45 |     return initialized_;
 46 |   }
 47 | 
 48 |   DataSize3 container_size;
 49 |   DataConstancy data_constancy = DataConstancy::LogDerivatives;
 50 | 
 51 |   GET_PARAM_OR_RETURN_VALUE(*params, DataSize3, container_size, "container_size", initialized_);
 52 |   GET_PARAM_OR_RETURN_VALUE(*params, DataConstancy, data_constancy, "data_constancy", initialized_);
 53 |  
 54 |   dev_container_size_ = container_size;
 55 | 
 56 |   char exec_path[256];
 57 |   Utils::GetExecutablePath(exec_path, 256);
 58 |   std::strcat(exec_path, "/kernels/solve_2d.ptx");
 59 | 
 60 |   if (!CheckCudaError(cuModuleLoad(&cu_module_, exec_path))) {
 61 | 
 62 |       std::string solve_function_name;
 63 | 
 64 | 
 65 |       switch (data_constancy)
 66 |       {
 67 |       case DataConstancy::Grey:
 68 |             solve_function_name = "solve_2d";
 69 |             break;
 70 | 
 71 |       case DataConstancy::Gradient:
 72 |           solve_function_name = "solve_2d_grad";
 73 |           break;
 74 | 
 75 |       case DataConstancy::LogDerivatives:
 76 |             solve_function_name = "solve_2d_log";
 77 |             break;
 78 |         
 79 |       default:
 80 |           solve_function_name = "solve_2d";
 81 |           break;
 82 |       }
 83 | 
 84 |     if (!CheckCudaError(cuModuleGetFunction(&cuf_compute_phi_ksi_, cu_module_, "compute_phi_ksi")) &&
 85 |         !CheckCudaError(cuModuleGetFunction(&cuf_solve_, cu_module_, solve_function_name.c_str()))
 86 |         ) {
 87 |       size_t const_size;
 88 | 
 89 |       /* Get the pointer to the constant memory and copy data */
 90 |       if (!CheckCudaError(cuModuleGetGlobal(&dev_constants_, &const_size, cu_module_, "container_size"))) {
 91 |         if (const_size == sizeof(container_size)) {
 92 |           if (!CheckCudaError(cuMemcpyHtoD(dev_constants_, &container_size, sizeof(container_size)))) {
 93 |              initialized_ = true;
 94 |           }
 95 |         }
 96 |       }
 97 | 
 98 |     } else {
 99 |       CheckCudaError(cuModuleUnload(cu_module_));
100 |       cu_module_ = nullptr;
101 |     }
102 |   }
103 |   return initialized_;
104 | }
105 | 
106 | void CudaOperationSolve2D::Execute(OperationParameters& params)
107 | {
108 |   if (!IsInitialized()) {
109 |     return;
110 |   }
111 | 
112 |   CUdeviceptr dev_frame_0;
113 |   CUdeviceptr dev_frame_1;
114 |   CUdeviceptr dev_flow_u;
115 |   CUdeviceptr dev_flow_v;
116 |   CUdeviceptr dev_phi;
117 |   CUdeviceptr dev_ksi;
118 | 
119 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_frame_0, "dev_frame_0");
120 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_frame_1, "dev_frame_1");
121 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_flow_u,  "dev_flow_u");
122 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_flow_v,  "dev_flow_v");
123 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_phi,     "dev_phi");
124 |   GET_PARAM_OR_RETURN(params, CUdeviceptr, dev_ksi,     "dev_ksi");
125 | 
126 |   /* Use references to guaranty correct pointer values
127 |      outside this operation after using std::swap() */
128 |   CUdeviceptr* dev_flow_du_ptr;
129 |   CUdeviceptr* dev_flow_dv_ptr;
130 |   CUdeviceptr* dev_temp_du_ptr;
131 |   CUdeviceptr* dev_temp_dv_ptr;
132 | 
133 |   GET_PARAM_PTR_OR_RETURN(params, CUdeviceptr, dev_flow_du_ptr, "dev_flow_du");
134 |   GET_PARAM_PTR_OR_RETURN(params, CUdeviceptr, dev_flow_dv_ptr, "dev_flow_dv");
135 |   GET_PARAM_PTR_OR_RETURN(params, CUdeviceptr, dev_temp_du_ptr, "dev_temp_du");
136 |   GET_PARAM_PTR_OR_RETURN(params, CUdeviceptr, dev_temp_dv_ptr, "dev_temp_dv");
137 | 
138 | 
139 |   CUdeviceptr& dev_flow_du = *dev_flow_du_ptr;
140 |   CUdeviceptr& dev_flow_dv = *dev_flow_dv_ptr;
141 |   CUdeviceptr& dev_temp_du = *dev_temp_du_ptr;
142 |   CUdeviceptr& dev_temp_dv = *dev_temp_dv_ptr;
143 | 
144 |   size_t outer_iterations_count;
145 |   size_t inner_iterations_count;
146 |   float equation_alpha;
147 |   float equation_smoothness;
148 |   float equation_data;
149 |   float hx;
150 |   float hy;
151 |   DataSize3 data_size;
152 | 
153 |   GET_PARAM_OR_RETURN(params, size_t,    outer_iterations_count, "outer_iterations_count");
154 |   GET_PARAM_OR_RETURN(params, size_t,    inner_iterations_count, "inner_iterations_count");
155 |   GET_PARAM_OR_RETURN(params, float,     equation_alpha,         "equation_alpha");
156 |   GET_PARAM_OR_RETURN(params, float,     equation_smoothness,    "equation_smoothness");
157 |   GET_PARAM_OR_RETURN(params, float,     equation_data,          "equation_data");
158 |   GET_PARAM_OR_RETURN(params, float,     hx,                     "hx");
159 |   GET_PARAM_OR_RETURN(params, float,     hy,                     "hy");
160 |   GET_PARAM_OR_RETURN(params, DataSize3, data_size,              "data_size");
161 | 
162 | 
163 |   /* Run solver kernels */
164 |   dim3 block_dim = { 16, 8};
165 | 
166 |   int shared_memory_size;
167 |   CUdevice cu_device;
168 |   CheckCudaError(cuDeviceGet(&cu_device, 0));
169 |   CheckCudaError(cuDeviceGetAttribute(&shared_memory_size, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, cu_device));
170 | 
171 |   /* We need 6 extended blocks in the shared memory */
172 |   int number_shared_blocks = 6;
173 |   int needed_shared_memory_size =
174 |     (block_dim.x + 2) * (block_dim.y + 2) * sizeof(float) * number_shared_blocks;
175 | 
176 |   int number_shared_blocks_solve = 8;
177 | 
178 |   DataConstancy data_constancy;
179 |   GET_PARAM_OR_RETURN(params, DataConstancy, data_constancy, "data_constancy");
180 | 
181 |   switch (data_constancy)
182 |   {
183 |   case DataConstancy::Grey:
184 |       number_shared_blocks_solve = 8;
185 |       break;
186 | 
187 |   case DataConstancy::Gradient:
188 |       number_shared_blocks_solve = 11;
189 |       break;
190 | 
191 |   case DataConstancy::LogDerivatives:
192 |       number_shared_blocks_solve = 11;
193 |       break;
194 | 
195 |   default:
196 |       number_shared_blocks_solve = 8;
197 |       break;
198 |   }
199 | 
200 | 
201 | 
202 |   int needed_shared_memory_size_solve =
203 |     (block_dim.x + 2) * (block_dim.y + 2) * sizeof(float) * number_shared_blocks_solve;
204 | 
205 |   if (needed_shared_memory_size > shared_memory_size || needed_shared_memory_size_solve > shared_memory_size) {
206 |     std::printf("<%s>: Error shared memory allocation. Reduce thread block size.\n", GetName());
207 |     std::printf("Shared memory: %d Needed: %d | %d\n", shared_memory_size, needed_shared_memory_size, needed_shared_memory_size_solve);
208 |     return;
209 |   }
210 | 
211 |   /* Mesure execution time */
212 |   size_t total_iterations_count = outer_iterations_count * inner_iterations_count;
213 | 
214 |   CUevent cu_event_start;
215 |   CUevent cu_event_stop;
216 | 
217 |   CheckCudaError(cuEventCreate(&cu_event_start, CU_EVENT_DEFAULT));
218 |   CheckCudaError(cuEventCreate(&cu_event_stop, CU_EVENT_DEFAULT));
219 | 
220 |   CheckCudaError(cuEventRecord(cu_event_start, NULL));
221 | 
222 |   /* Display computation status */
223 |   if (!this->silent) {
224 |       Utils::PrintProgressBar(0.f);
225 |       std::printf(" % 3.0f%%", 0.f);
226 |   }
227 | 
228 |   /* Initialize flow increment buffers with 0 */
229 |   CheckCudaError(cuMemsetD2D8(dev_flow_du, dev_container_size_.pitch, 0, data_size.width * sizeof(float),
230 |                                            dev_container_size_.height));
231 |   CheckCudaError(cuMemsetD2D8(dev_flow_dv, dev_container_size_.pitch, 0, data_size.width * sizeof(float),
232 |                                            dev_container_size_.height));
233 | 
234 | 
235 |   dim3 grid_dim = { static_cast<unsigned int>((data_size.width + block_dim.x - 1) / block_dim.x),
236 |                     static_cast<unsigned int>((data_size.height + block_dim.y - 1) / block_dim.y)};
237 | 
238 |   for (size_t i = 0; i < outer_iterations_count; ++i) {
239 |     void* args[14] = { 
240 |       &dev_frame_0,
241 |       &dev_frame_1,
242 |       &dev_flow_u,
243 |       &dev_flow_v,
244 |       &dev_flow_du,
245 |       &dev_flow_dv,
246 |       &data_size.width,
247 |       &data_size.height,
248 |       &hx,
249 |       &hy,
250 |       &equation_smoothness,
251 |       &equation_data,
252 |       &dev_phi,
253 |       &dev_ksi };
254 | 
255 |     CheckCudaError(cuLaunchKernel(cuf_compute_phi_ksi_,
256 |                                   grid_dim.x, grid_dim.y, grid_dim.z,
257 |                                   block_dim.x, block_dim.y, block_dim.z,
258 |                                   needed_shared_memory_size,
259 |                                   NULL,
260 |                                   args,
261 |                                   NULL));
262 | 
263 |     for (size_t j = 0; j < inner_iterations_count; ++j) {
264 |       void* args[15] = {
265 |         &dev_frame_0,
266 |         &dev_frame_1,
267 |         &dev_flow_u,
268 |         &dev_flow_v,
269 |         &dev_flow_du,
270 |         &dev_flow_dv,
271 |         &dev_phi,
272 |         &dev_ksi,
273 |         &data_size.width,
274 |         &data_size.height,
275 |         &hx,
276 |         &hy,
277 |         &equation_alpha,
278 |         &dev_temp_du,
279 |         &dev_temp_dv};
280 | 
281 |       CheckCudaError(cuLaunchKernel(cuf_solve_,
282 |                                     grid_dim.x, grid_dim.y, grid_dim.z,
283 |                                     block_dim.x, block_dim.y, block_dim.z,
284 |                                     needed_shared_memory_size_solve,
285 |                                     NULL,
286 |                                     args,
287 |                                     NULL));
288 |       std::swap(dev_flow_du, dev_temp_du);
289 |       std::swap(dev_flow_dv, dev_temp_dv);
290 |       
291 |       CheckCudaError(cuStreamSynchronize(NULL));
292 | 
293 |       /* Display computation status */
294 |       if (!this->silent) {
295 |           float complete = (i * inner_iterations_count + j) / static_cast<float>(total_iterations_count);
296 |           Utils::PrintProgressBar(complete);
297 |           std::printf(" % 3.0f%%", complete * 100);
298 |       }
299 |     }
300 |   }
301 |   /* Estimate GPU computation time */
302 |   CheckCudaError(cuEventRecord(cu_event_stop, NULL));
303 |   CheckCudaError(cuEventSynchronize(cu_event_stop));
304 | 
305 |   float elapsed_time;
306 |   CheckCudaError(cuEventElapsedTime(&elapsed_time, cu_event_start, cu_event_stop));
307 |   
308 |   if (!this->silent) {
309 |       Utils::PrintProgressBar(1.f);
310 |       std::printf(" %8.4fs\n", elapsed_time / 1000.);
311 |   }
312 | 
313 |   CheckCudaError(cuEventDestroy(cu_event_start));
314 |   CheckCudaError(cuEventDestroy(cu_event_stop));
315 | }


--------------------------------------------------------------------------------
/src/cuda_operations/2d/cuda_operation_solve_2d.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_SOLVE_2D_H_
23 | #define GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_SOLVE_2D_H_
24 | 
25 | #include <cuda.h>
26 | 
27 | #include "src/cuda_operations/cuda_operation_base.h"
28 | #include "src/data_types/data_structs.h"
29 | 
30 | class CudaOperationSolve2D : public CudaOperationBase {
31 | private:
32 |   CUfunction cuf_compute_phi_ksi_;
33 |   CUfunction cuf_solve_;
34 | 
35 |   DataSize3 dev_container_size_;
36 | 
37 | public:
38 |   CudaOperationSolve2D();
39 | 
40 |   bool Initialize(const OperationParameters* params = nullptr) override;
41 |   void Execute(OperationParameters& params) override;
42 | 
43 |   bool silent = false;
44 | };
45 | 
46 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_2D_CUDA_OPERATION_SOLVE_2D_H_
47 | 


--------------------------------------------------------------------------------
/src/cuda_operations/cuda_operation_base.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #include "cuda_operation_base.h"
23 | 
24 | #include <cstdio>
25 | #include <cstring>
26 | 
27 | #include "src/utils/cuda_utils.h"
28 | 
29 | CudaOperationBase::CudaOperationBase(const char* name)
30 |   : name_(name) 
31 | {
32 | }
33 | 
34 | const char* CudaOperationBase::GetName() const
35 | {
36 |   return name_;
37 | };
38 | 
39 | bool CudaOperationBase::IsInitialized() const
40 | {
41 |   if (!initialized_) {
42 |     std::printf("Error: Operation '%s' was not initialized.\n", name_);
43 |   }
44 |   return initialized_;
45 | }
46 | 
47 | void CudaOperationBase::Execute(OperationParameters& params)
48 | {
49 |   std::printf("Warning: '%s' Execute() was not defined.\n", name_);
50 | }
51 | 
52 | void CudaOperationBase::Destroy()
53 | {
54 |   if (cu_module_) {
55 |     CheckCudaError(cuModuleUnload(cu_module_));
56 |     cu_module_ = nullptr;
57 |   }
58 |   initialized_ = false;
59 | }
60 | 
61 | CudaOperationBase::~CudaOperationBase()
62 | {
63 |   if (initialized_) {
64 |     Destroy();
65 |   }
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/src/cuda_operations/cuda_operation_base.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #ifndef GPUFLOW3D_CUDA_OPERATIONS_CUDA_OPERATION_BASE_H_
23 | #define GPUFLOW3D_CUDA_OPERATIONS_CUDA_OPERATION_BASE_H_
24 | 
25 | #include <cuda.h>
26 | 
27 | #include "src/data_types/operation_parameters.h"
28 | 
29 | class CudaOperationBase {
30 | private:
31 |   const char* name_ = nullptr;
32 | 
33 | protected:
34 |   CUmodule cu_module_ = nullptr;
35 | 
36 |   CUdeviceptr dev_constants_ = 0;
37 | 
38 |   bool initialized_ = false;
39 | 
40 |   CudaOperationBase(const char* name);
41 | 
42 |   bool IsInitialized() const;
43 | 
44 | public:
45 |   const char* GetName() const;
46 |   
47 |   virtual bool Initialize(const OperationParameters* params = nullptr) = 0;
48 |   virtual void Execute(OperationParameters& params);
49 |   virtual void Destroy();
50 | 
51 |   virtual ~CudaOperationBase();
52 |   
53 | };
54 | 
55 | #endif // !GPUFLOW3D_CUDA_OPERATIONS_CUDA_OPERATION_BASE_H_
56 | 


--------------------------------------------------------------------------------
/src/data_types/data2d.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 | * @file    2D Optical flow using NVIDIA CUDA
  3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
  4 | *
  5 | * @date    2015-2018
  6 | * @version 0.5.0
  7 | *
  8 | *
  9 | * @section LICENSE
 10 | *
 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
 13 | *
 14 | * The current implemetation contains the following licenses:
 15 | *
 16 | * 1. TinyXml package:
 17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
 18 | *      See src/utils/tinyxml.h for details.
 19 | *
 20 | */
 21 | 
 22 | #include "data2d.h"
 23 | 
 24 | #include <algorithm>
 25 | #include <cstdio>
 26 | #include <climits>
 27 | #include <cstring>
 28 | 
 29 | #include <cuda.h>
 30 | 
 31 | #include "src/utils/cuda_utils.h"
 32 | 
 33 | 
 34 | //#define ALLOCATE_PINNED_MEMORY
 35 | 
 36 | Data2D::Data2D()
 37 |   : width_(0), height_(0)
 38 | {}
 39 | 
 40 | Data2D::Data2D(size_t width, size_t height)
 41 |   : width_(width), height_(height)
 42 | {
 43 |   data_ = static_cast<float*>(AllocateMemory(width_, height_));
 44 | }
 45 | 
 46 | void Data2D::Swap(Data2D& data2d)
 47 | {
 48 |   if (this->width_ == data2d.width_ &&
 49 |       this->height_ == data2d.height_) {
 50 |     std::swap(this->data_, data2d.data_);
 51 |   } else {
 52 |     std::printf("Error. Cannot swap two Data2D objects (wrong dimensions).\n");
 53 |   }
 54 | }
 55 | 
 56 | void* Data2D::AllocateMemory(size_t width, size_t height)
 57 | {
 58 |   void* ptr = nullptr;
 59 | 
 60 | #ifdef ALLOCATE_PINNED_MEMORY
 61 |   if (CheckCudaError(cuMemAllocHost(&ptr, width * height * sizeof(float)))) {
 62 |     Invalidate();
 63 |     return nullptr;
 64 |   }
 65 | #else
 66 |   try {
 67 |     ptr = new float[width * height];
 68 |   }
 69 |   catch (std::bad_alloc& ba) {
 70 |     Invalidate();
 71 |     std::printf("Error. Cannot allocate memory on the host. (%s)\n", ba.what());
 72 |   }
 73 | #endif
 74 | 
 75 |   return ptr;
 76 | }
 77 | 
 78 | void Data2D::FreeMemory(void* pointer)
 79 | {
 80 | #ifdef ALLOCATE_PINNED_MEMORY
 81 |   if (pointer)
 82 |     CheckCudaError(cuMemFreeHost(pointer));
 83 | #else
 84 |   if (pointer) {
 85 |     delete[] pointer;
 86 |     pointer = nullptr;
 87 |   }
 88 | #endif
 89 | }
 90 | 
 91 | void Data2D::ZeroData()
 92 | {
 93 |   if (data_) {
 94 |     std::memset(data_, 0, width_ * height_ * sizeof(float));
 95 |   }
 96 | }
 97 | 
 98 | bool Data2D::ReadRAWFromFileU8(const char* filename, size_t width, size_t height)
 99 | {
100 |   bool loaded = false;
101 |   std::FILE *file = std::fopen(filename, "rb");
102 |   if (file) {
103 |     this->Invalidate();
104 | 
105 |     width_ = width;
106 |     height_ = height;
107 |     data_ = static_cast<float*>(AllocateMemory(width_, height_));
108 | 
109 |     if (data_) {
110 |       unsigned char *dataU8 = new unsigned char[width_];
111 | 
112 |         for (size_t y = 0; y < height_; ++y) {
113 |             size_t readed = std::fread(dataU8, sizeof(unsigned char), width_, file);
114 | 
115 |             if (readed == width_) {
116 |             for (size_t x = 0; x < width_; ++x) {
117 |                 data_[Index(x, y)] = static_cast<float>(dataU8[x]);
118 |             }
119 |             } else {
120 |             goto loop_break;
121 |             }
122 |         }
123 |       
124 | 
125 |       if (std::fread(dataU8, sizeof(unsigned char), width_, file) == 0) {
126 |         loaded = true;
127 |       } else {
128 | 
129 |       loop_break:
130 |         std::printf("Error reading RAW data from file '%s': wrong dimensions.", filename);
131 |         this->Invalidate();
132 |       }
133 | 
134 |       delete[] dataU8;
135 |       std::fclose(file);
136 |     }
137 |   } else {
138 |     std::printf("Cannot open file '%s'.\n", filename);
139 |   }
140 |   return loaded;
141 | }
142 | 
143 | bool Data2D::ReadRAWFromFileF32(const char* filename, size_t width, size_t height)
144 | {
145 |   bool loaded = false;
146 |   std::FILE *file = std::fopen(filename, "rb");
147 |   if (file) {
148 |     this->Invalidate();
149 | 
150 |     width_ = width;
151 |     height_ = height;
152 |     data_ = static_cast<float*>(AllocateMemory(width_, height_));
153 | 
154 |     if (data_) {
155 | 
156 |         for (size_t y = 0; y < height_; ++y) {
157 |           size_t readed = std::fread(&data_[Index(0, y)], sizeof(float), width_, file);
158 |           if (readed != width_) {
159 |             goto loop_break;
160 |           }
161 |         }
162 |       
163 | 
164 |       if (std::fread(data_, sizeof(unsigned char), width_, file) == 0) {
165 |         loaded = true;
166 |       } else {
167 |       loop_break:
168 |         std::printf("Error reading RAW data from file '%s': wrong dimensions.", filename);
169 |         this->Invalidate();
170 |       }
171 | 
172 |       std::fclose(file);
173 |     }
174 |   } else {
175 |     std::printf("Cannot open file '%s'.\n", filename);
176 |   }
177 |   return loaded;
178 | }
179 | 
180 | bool Data2D::WriteRAWToFileU8(const char* filename)
181 | {
182 |   std::FILE *file = std::fopen(filename, "wb");
183 |   if (file) {
184 |    unsigned char *dataU8 = new unsigned char[width_];
185 | 
186 | 
187 |       for (size_t y = 0; y < height_; ++y) {
188 |         for (size_t x = 0; x < width_; ++x) {
189 |           float dataF32 = std::min(255.f, std::max(0.f, data_[Index(x, y)]));
190 |           dataU8[x] = static_cast<unsigned char>(dataF32);
191 |         }
192 |         size_t written = std::fwrite(dataU8, sizeof(unsigned char), width_, file);
193 |         if (written != width_) {
194 |           std::printf("Error writing RAW data to file '%s'.", filename);
195 |           delete[] dataU8;
196 |           std::fclose(file);
197 |           return false;
198 |         }
199 |       }
200 |     
201 |     delete[] dataU8;
202 |     std::fclose(file);
203 |     return true;
204 |   } else {
205 |     std::printf("Cannot open file '%s'.\n", filename);
206 |     return false;
207 |   }
208 | }
209 | 
210 | bool Data2D::WriteRAWToFileF32(const char* filename)
211 | {
212 |   std::FILE *file = std::fopen(filename, "wb");
213 |   if (file) {
214 |    unsigned char *dataU8 = new unsigned char[width_];
215 | 
216 |       for (size_t y = 0; y < height_; ++y) {
217 |         size_t written = std::fwrite(&data_[Index(0, y)], sizeof(float), width_, file);
218 |         if (written != width_) {
219 |           std::printf("Error writing RAW data to file '%s'.", filename);
220 |           std::fclose(file);
221 |           return false;
222 |         }
223 |       }
224 |     
225 |     std::fclose(file);
226 |     return true;
227 |   } else {
228 |     std::printf("Cannot open file '%s'.\n", filename);
229 |     return false;
230 |   }
231 | }
232 | 
233 | //bool Data3D::WriteFlowToFileVTK(const char* filename, const Data3D& flow_u, const Data3D& flow_v, const Data3D& flow_w)
234 | //{
235 | //  std::FILE *file = std::fopen(filename, "wb");
236 | //  if (file) {
237 | //    std::fprintf(file, "# vtk DataFile Version 2.0\n");
238 | //    std::fprintf(file, "3D Vector field computed by GpuFlow3D\n");
239 | //    std::fprintf(file, "BINARY\n");
240 | //    std::fprintf(file, "DATASET STRUCTURED_POINTS\n");
241 | //    std::fprintf(file, "DIMENSIONS %d %d %d\n", flow_u.width_, flow_u.height_, flow_u.depth_);
242 | //    std::fprintf(file, "ORIGIN 0 0 0\n");
243 | //    std::fprintf(file, "SPACING 1 1 1\n");
244 | //    std::fprintf(file, "POINT_DATA %d\n", flow_u.width_ * flow_u.height_ * flow_u.depth_);
245 | //    std::fprintf(file, "VECTORS vectors float\n");
246 | //
247 | //    for (size_t z = 0; z < flow_u.depth_; ++z) {
248 | //      for (size_t y = 0; y < flow_u.height_; ++y) {
249 | //        for (size_t x = 0; x < flow_u.width_; ++x) {
250 | //          std::fwrite(&flow_u.data_[(z * flow_u.height_ + y) * flow_u.width_ + x], sizeof(float), 1, file);
251 | //          std::fwrite(&flow_v.data_[(z * flow_v.height_ + y) * flow_v.width_ + x], sizeof(float), 1, file);
252 | //          std::fwrite(&flow_w.data_[(z * flow_w.height_ + y) * flow_w.width_ + x], sizeof(float), 1, file);
253 | //        }
254 | //      }
255 | //    }
256 | //
257 | //    std::fclose(file);
258 | //    return true;
259 | //  } else {
260 | //    std::printf("Cannot open file '%s'.\n", filename);
261 | //    return false;
262 | //  }
263 | //}
264 | 
265 | void Data2D::Invalidate()
266 | {
267 |   width_ = 0;
268 |   height_ = 0;
269 |   FreeMemory(data_);
270 | }
271 | 
272 | Data2D::~Data2D()
273 | {
274 |   FreeMemory(data_);
275 | }


--------------------------------------------------------------------------------
/src/data_types/data2d.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #ifndef GPUFLOW3D_DATA_TYPES_DATA2D_H_
23 | #define GPUFLOW3D_DATA_TYPES_DATA2D_H_
24 | 
25 | #include <cstdlib>
26 | 
27 | class Data2D
28 | {
29 | private:
30 |   float   *data_ = nullptr;
31 |   size_t  width_;
32 |   size_t  height_;
33 | 
34 |   inline size_t Index(size_t x, size_t y) {
35 |     return y * width_ + x;
36 |   }
37 |   
38 |   void Invalidate();
39 | 
40 |   void* AllocateMemory(size_t width, size_t height);
41 |   void FreeMemory(void* pointer);
42 | 
43 | public:
44 |   Data2D();
45 |   Data2D(size_t width, size_t height);
46 |   
47 |   inline size_t Width() { return width_; };
48 |   inline size_t Height() { return height_; };
49 |   inline float* DataPtr() { return data_; };
50 |   inline float& Data(size_t x, size_t y) { return data_[Index(x, y)]; };
51 | 
52 |   void Swap(Data2D& data2d);
53 | 
54 |   void ZeroData();
55 | 
56 |   bool ReadRAWFromFileU8(const char* filename, size_t width, size_t height);
57 |   bool ReadRAWFromFileF32(const char* filename, size_t width, size_t height);
58 | 
59 |   bool WriteRAWToFileU8(const char* filename);
60 |   bool WriteRAWToFileF32(const char* filename);
61 |   
62 |   //static bool WriteFlowToFileVTK(const char* filename, const Data3D& flow_u, const Data3D& flow_v, const Data3D& flow_w);
63 | 
64 |   ~Data2D();
65 | };
66 | 
67 | 
68 | 
69 | #endif // !GPUFLOW3D_DATA_TYPES_DATA2D_H_


--------------------------------------------------------------------------------
/src/data_types/data3d.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 | * @file    2D Optical flow using NVIDIA CUDA
  3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
  4 | *
  5 | * @date    2015-2018
  6 | * @version 0.5.0
  7 | *
  8 | *
  9 | * @section LICENSE
 10 | *
 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
 13 | *
 14 | * The current implemetation contains the following licenses:
 15 | *
 16 | * 1. TinyXml package:
 17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
 18 | *      See src/utils/tinyxml.h for details.
 19 | *
 20 | */
 21 | 
 22 | #include "src/data_types/data3d.h"
 23 | 
 24 | #include <algorithm>
 25 | #include <cstdio>
 26 | #include <climits>
 27 | #include <cstring>
 28 | 
 29 | #include <cuda.h>
 30 | 
 31 | #include "src/utils/cuda_utils.h"
 32 | 
 33 | 
 34 | 
 35 | //#define ALLOCATE_PINNED_MEMORY
 36 | 
 37 | Data3D::Data3D()
 38 |   : width_(0), height_(0), depth_(0)
 39 | {}
 40 | 
 41 | Data3D::Data3D(size_t width, size_t height, size_t depth)
 42 |   : width_(width), height_(height), depth_(depth)
 43 | {
 44 |   data_ = static_cast<float*>(AllocateMemory(width_, height_, depth_));
 45 | }
 46 | 
 47 | void Data3D::Swap(Data3D& data3d)
 48 | {
 49 |   if (this->width_ == data3d.width_ &&
 50 |       this->height_ == data3d.height_ &&
 51 |       this->depth_ == data3d.depth_) {
 52 |     std::swap(this->data_, data3d.data_);
 53 |   } else {
 54 |     std::printf("Error. Cannot swap two Data3D objects (wrong dimensions).\n");
 55 |   }
 56 | }
 57 | 
 58 | void* Data3D::AllocateMemory(size_t width, size_t height, size_t depth)
 59 | {
 60 |   void* ptr = nullptr;
 61 | 
 62 | #ifdef ALLOCATE_PINNED_MEMORY
 63 |   if (CheckCudaError(cuMemAllocHost(&ptr, width * height * depth * sizeof(float)))) {
 64 |     Invalidate();
 65 |     return nullptr;
 66 |   }
 67 | #else
 68 |   try {
 69 |     ptr = new float[width * height * depth];
 70 |   }
 71 |   catch (std::bad_alloc& ba) {
 72 |     Invalidate();
 73 |     std::printf("Error. Cannot allocate memory on the host. (%s)\n", ba.what());
 74 |   }
 75 | #endif
 76 | 
 77 |   return ptr;
 78 | }
 79 | 
 80 | void Data3D::FreeMemory(void* pointer)
 81 | {
 82 | #ifdef ALLOCATE_PINNED_MEMORY
 83 |   if (pointer)
 84 |     CheckCudaError(cuMemFreeHost(pointer));
 85 | #else
 86 |   if (pointer) {
 87 |     delete[] pointer;
 88 |     pointer = nullptr;
 89 |   }
 90 | #endif
 91 | }
 92 | 
 93 | void Data3D::ZeroData()
 94 | {
 95 |   if (data_) {
 96 |     std::memset(data_, 0, width_ * height_ * depth_ * sizeof(float));
 97 |   }
 98 | }
 99 | 
100 | bool Data3D::ReadRAWFromFileU8(const char* filename, size_t width, size_t height, size_t depth)
101 | {
102 |   bool loaded = false;
103 |   std::FILE *file = std::fopen(filename, "rb");
104 |   if (file) {
105 |     this->Invalidate();
106 | 
107 |     width_ = width;
108 |     height_ = height;
109 |     depth_ = depth;
110 |     data_ = static_cast<float*>(AllocateMemory(width_, height_, depth_));
111 | 
112 |     if (data_) {
113 |       unsigned char *dataU8 = new unsigned char[width_];
114 | 
115 |       for (size_t z = 0; z < depth_; ++z) {
116 |         for (size_t y = 0; y < height_; ++y) {
117 |           size_t readed = std::fread(dataU8, sizeof(unsigned char), width_, file);
118 | 
119 |           if (readed == width_) {
120 |             for (size_t x = 0; x < width_; ++x) {
121 |               data_[Index(x, y, z)] = static_cast<float>(dataU8[x]);
122 |             }
123 |           } else {
124 |             goto loop_break;
125 |           }
126 |         }
127 |       }
128 | 
129 |       if (std::fread(dataU8, sizeof(unsigned char), width_, file) == 0) {
130 |         loaded = true;
131 |       } else {
132 | 
133 |       loop_break:
134 |         std::printf("Error reading RAW data from file '%s': wrong dimensions.", filename);
135 |         this->Invalidate();
136 |       }
137 | 
138 |       delete[] dataU8;
139 |       std::fclose(file);
140 |     }
141 |   } else {
142 |     std::printf("Cannot open file '%s'.\n", filename);
143 |   }
144 |   return loaded;
145 | }
146 | 
147 | bool Data3D::ReadRAWFromFileF32(const char* filename, size_t width, size_t height, size_t depth)
148 | {
149 |   bool loaded = false;
150 |   std::FILE *file = std::fopen(filename, "rb");
151 |   if (file) {
152 |     this->Invalidate();
153 | 
154 |     width_ = width;
155 |     height_ = height;
156 |     depth_ = depth;
157 |     data_ = static_cast<float*>(AllocateMemory(width_, height_, depth_));
158 | 
159 |     if (data_) {
160 |       for (size_t z = 0; z < depth_; ++z) {
161 |         for (size_t y = 0; y < height_; ++y) {
162 |           size_t readed = std::fread(&data_[Index(0, y, z)], sizeof(float), width_, file);
163 |           if (readed != width_) {
164 |             goto loop_break;
165 |           }
166 |         }
167 |       }
168 | 
169 |       if (std::fread(data_, sizeof(unsigned char), width_, file) == 0) {
170 |         loaded = true;
171 |       } else {
172 |       loop_break:
173 |         std::printf("Error reading RAW data from file '%s': wrong dimensions.", filename);
174 |         this->Invalidate();
175 |       }
176 | 
177 |       std::fclose(file);
178 |     }
179 |   } else {
180 |     std::printf("Cannot open file '%s'.\n", filename);
181 |   }
182 |   return loaded;
183 | }
184 | 
185 | bool Data3D::WriteRAWToFileU8(const char* filename)
186 | {
187 |   std::FILE *file = std::fopen(filename, "wb");
188 |   if (file) {
189 |    unsigned char *dataU8 = new unsigned char[width_];
190 | 
191 |     for (size_t z = 0; z < depth_; ++z) {
192 |       for (size_t y = 0; y < height_; ++y) {
193 |         for (size_t x = 0; x < width_; ++x) {
194 |           float dataF32 = std::min(255.f, std::max(0.f, data_[Index(x, y, z)]));
195 |           dataU8[x] = static_cast<unsigned char>(dataF32);
196 |         }
197 |         size_t written = std::fwrite(dataU8, sizeof(unsigned char), width_, file);
198 |         if (written != width_) {
199 |           std::printf("Error writing RAW data to file '%s'.", filename);
200 |           delete[] dataU8;
201 |           std::fclose(file);
202 |           return false;
203 |         }
204 |       }
205 |     }
206 |     delete[] dataU8;
207 |     std::fclose(file);
208 |     return true;
209 |   } else {
210 |     std::printf("Cannot open file '%s'.\n", filename);
211 |     return false;
212 |   }
213 | }
214 | 
215 | bool Data3D::WriteRAWToFileF32(const char* filename)
216 | {
217 |   std::FILE *file = std::fopen(filename, "wb");
218 |   if (file) {
219 |    unsigned char *dataU8 = new unsigned char[width_];
220 | 
221 |     for (size_t z = 0; z < depth_; ++z) {
222 |       for (size_t y = 0; y < height_; ++y) {
223 |         size_t written = std::fwrite(&data_[Index(0, y, z)], sizeof(float), width_, file);
224 |         if (written != width_) {
225 |           std::printf("Error writing RAW data to file '%s'.", filename);
226 |           std::fclose(file);
227 |           return false;
228 |         }
229 |       }
230 |     }
231 |     std::fclose(file);
232 |     return true;
233 |   } else {
234 |     std::printf("Cannot open file '%s'.\n", filename);
235 |     return false;
236 |   }
237 | }
238 | 
239 | bool Data3D::WriteFlowToFileVTK(const char* filename, const Data3D& flow_u, const Data3D& flow_v, const Data3D& flow_w)
240 | {
241 |   std::FILE *file = std::fopen(filename, "wb");
242 |   if (file) {
243 |     std::fprintf(file, "# vtk DataFile Version 2.0\n");
244 |     std::fprintf(file, "3D Vector field computed by GpuFlow3D\n");
245 |     std::fprintf(file, "BINARY\n");
246 |     std::fprintf(file, "DATASET STRUCTURED_POINTS\n");
247 |     std::fprintf(file, "DIMENSIONS %d %d %d\n", flow_u.width_, flow_u.height_, flow_u.depth_);
248 |     std::fprintf(file, "ORIGIN 0 0 0\n");
249 |     std::fprintf(file, "SPACING 1 1 1\n");
250 |     std::fprintf(file, "POINT_DATA %d\n", flow_u.width_ * flow_u.height_ * flow_u.depth_);
251 |     std::fprintf(file, "VECTORS vectors float\n");
252 | 
253 |     for (size_t z = 0; z < flow_u.depth_; ++z) {
254 |       for (size_t y = 0; y < flow_u.height_; ++y) {
255 |         for (size_t x = 0; x < flow_u.width_; ++x) {
256 |           std::fwrite(&flow_u.data_[(z * flow_u.height_ + y) * flow_u.width_ + x], sizeof(float), 1, file);
257 |           std::fwrite(&flow_v.data_[(z * flow_v.height_ + y) * flow_v.width_ + x], sizeof(float), 1, file);
258 |           std::fwrite(&flow_w.data_[(z * flow_w.height_ + y) * flow_w.width_ + x], sizeof(float), 1, file);
259 |         }
260 |       }
261 |     }
262 | 
263 |     std::fclose(file);
264 |     return true;
265 |   } else {
266 |     std::printf("Cannot open file '%s'.\n", filename);
267 |     return false;
268 |   }
269 | }
270 | 
271 | void Data3D::Invalidate()
272 | {
273 |   width_ = 0;
274 |   height_ = 0;
275 |   depth_ = 0;
276 |   FreeMemory(data_);
277 | }
278 | 
279 | Data3D::~Data3D()
280 | {
281 |   FreeMemory(data_);
282 | }


--------------------------------------------------------------------------------
/src/data_types/data3d.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #ifndef GPUFLOW3D_DATA_TYPES_DATA3D_H_
23 | #define GPUFLOW3D_DATA_TYPES_DATA3D_H_
24 | 
25 | #include <cstdlib>
26 | 
27 | class Data3D
28 | {
29 | private:
30 |   float   *data_ = nullptr;
31 |   size_t  width_;
32 |   size_t  height_;
33 |   size_t  depth_;
34 | 
35 |   inline size_t Index(size_t x, size_t y, size_t z) {
36 |     return (z * height_ + y) * width_ + x;
37 |   }
38 |   
39 |   void Invalidate();
40 | 
41 |   void* AllocateMemory(size_t width, size_t height, size_t depth);
42 |   void FreeMemory(void* pointer);
43 | 
44 | public:
45 |   Data3D();
46 |   Data3D(size_t width, size_t height, size_t depth);
47 |   
48 |   inline size_t Width() { return width_; };
49 |   inline size_t Height() { return height_; };
50 |   inline size_t Depth() { return depth_; };
51 |   inline float* DataPtr() { return data_; };
52 |   inline float& Data(size_t x, size_t y, size_t z) { return data_[Index(x, y, z)]; };
53 | 
54 |   void Swap(Data3D& data3d);
55 | 
56 |   void ZeroData();
57 | 
58 |   bool ReadRAWFromFileU8(const char* filename, size_t width, size_t height, size_t depth);
59 |   bool ReadRAWFromFileF32(const char* filename, size_t width, size_t height, size_t depth);
60 | 
61 |   bool WriteRAWToFileU8(const char* filename);
62 |   bool WriteRAWToFileF32(const char* filename);
63 |   
64 |   static bool WriteFlowToFileVTK(const char* filename, const Data3D& flow_u, const Data3D& flow_v, const Data3D& flow_w);
65 | 
66 |   ~Data3D();
67 | };
68 | 
69 | #endif // !GPUFLOW3D_DATA_TYPES_DATA3D_H_


--------------------------------------------------------------------------------
/src/data_types/data_structs.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #ifndef GPUFLOW3D_DATA_TYPES_DATA_STRUCTS_H_
23 | #define GPUFLOW3D_DATA_TYPES_DATA_STRUCTS_H_
24 | 
25 | enum class Methods { OpticalFlow, Correlation };
26 | 
27 | enum class DataConstancy { Grey, Gradient, LogDerivatives };
28 | 
29 | 
30 | 
31 | struct DataSize3 {
32 |     size_t width;
33 |     size_t height;
34 |     size_t pitch;
35 | };
36 | 
37 | 
38 | struct Stat3 {
39 |   float min;
40 |   float max;
41 |   float avg;
42 | };
43 | 
44 | #endif // !GPUFLOW3D_DATA_TYPES_DATA_STRUCTS_H_
45 | 


--------------------------------------------------------------------------------
/src/data_types/operation_parameters.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #include "src/data_types/operation_parameters.h"
23 | 
24 | OperationParameters::OperationParameters()
25 | {
26 | }
27 | 
28 | bool OperationParameters::PushValuePtr(std::string key, void* value_ptr)
29 | {
30 |   if (!map_.count(key)) {
31 |     map_.insert({ key, value_ptr });
32 |     return true;
33 |   }
34 |   return false;
35 | }
36 | 
37 | void* OperationParameters::GetValuePtr(std::string key) const
38 | {
39 |   if (map_.count(key)) {
40 |     return map_.at(key);
41 |   }
42 |   return nullptr;
43 | }
44 | 
45 | void OperationParameters::Clear()
46 | {
47 |   map_.clear();
48 | }


--------------------------------------------------------------------------------
/src/data_types/operation_parameters.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #ifndef GPUFLOW3D_DATA_TYPES_OPERATION_PARAMETERS_H_
23 | #define GPUFLOW3D_DATA_TYPES_OPERATION_PARAMETERS_H_
24 | 
25 | #include <string>
26 | #include <unordered_map>
27 | 
28 | class OperationParameters {
29 | private:
30 |   std::unordered_map<std::string, void*> map_;
31 | 
32 | public:
33 |   OperationParameters();
34 | 
35 |   bool PushValuePtr(std::string key, void* value_ptr);
36 |   void* GetValuePtr(std::string key) const;
37 |   void Clear();
38 | };
39 | 
40 | #endif // !GPUFLOW3D_DATA_TYPES_OPERATION_PARAMETERS_H_
41 | 


--------------------------------------------------------------------------------
/src/kernels/add_2d.cu:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #include <device_launch_parameters.h>
23 | #include <vector_types.h>
24 | 
25 | #include "src/data_types/data_structs.h"
26 | 
27 | //#define IND(X, Y, Z) (((Z) * container_size.height + (Y)) * (container_size.pitch / sizeof(float)) + (X)) 
28 | #define IND(X, Y) ((Y) * (container_size.pitch / sizeof(float)) + (X)) 
29 | 
30 | 
31 | __constant__ DataSize3 container_size;
32 | 
33 | extern "C" __global__ void add_2d(
34 |         float* operand_0,
35 |   const float* operand_1,
36 |         size_t width,
37 |         size_t height)
38 | {
39 |   dim3 global_id(blockDim.x * blockIdx.x + threadIdx.x,
40 |                  blockDim.y * blockIdx.y + threadIdx.y);
41 | 
42 |   if (global_id.x < width && global_id.y < height) {
43 |     operand_0[IND(global_id.x, global_id.y)] +=
44 |       operand_1[IND(global_id.x, global_id.y)];
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/src/kernels/convolution_2d.cu:
--------------------------------------------------------------------------------
  1 | /**
  2 | * @file    2D Optical flow using NVIDIA CUDA
  3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
  4 | *
  5 | * @date    2015-2018
  6 | * @version 0.5.0
  7 | *
  8 | *
  9 | * @section LICENSE
 10 | *
 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
 13 | *
 14 | * The current implemetation contains the following licenses:
 15 | *
 16 | * 1. TinyXml package:
 17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
 18 | *      See src/utils/tinyxml.h for details.
 19 | *
 20 | */
 21 | 
 22 | /*
 23 |  * Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
 24 |  *
 25 |  * Please refer to the NVIDIA end user license agreement (EULA) associated
 26 |  * with this source code for terms and conditions that govern your use of
 27 |  * this software. Any use, reproduction, disclosure, or distribution of
 28 |  * this software and related documentation outside the terms of the EULA
 29 |  * is strictly prohibited.
 30 |  *
 31 |  */
 32 | 
 33 | #include <assert.h>
 34 | //#include <helper_cuda.h>
 35 | #include <device_launch_parameters.h>
 36 | #include <iostream>
 37 | #include <cstdio>
 38 | 
 39 | #define __CUDACC__
 40 | 
 41 | #include <device_functions.h>
 42 | #include <math_functions.h>
 43 | 
 44 | #include "src/data_types/data_structs.h"
 45 | 
 46 | using namespace std;
 47 | 
 48 | 
 49 | #define MAX_KERNEL_LENGTH 51
 50 | 
 51 | __constant__ DataSize3 container_size;
 52 | 
 53 | 
 54 | 
 55 | ////////////////////////////////////////////////////////////////////////////////
 56 | // Convolution kernel storage
 57 | ////////////////////////////////////////////////////////////////////////////////
 58 | __constant__ float c_Kernel[MAX_KERNEL_LENGTH];
 59 | 
 60 | //extern "C" void setConvolutionKernel(float *h_Kernel, unsigned int kernel_length)
 61 | //{
 62 | //    cudaMemcpyToSymbol(c_Kernel, h_Kernel, kernel_length * sizeof(float));
 63 | //}
 64 | 
 65 | 
 66 | ////////////////////////////////////////////////////////////////////////////////
 67 | // Row convolution filter
 68 | ////////////////////////////////////////////////////////////////////////////////
 69 | #define   ROWS_BLOCKDIM_X 16
 70 | #define   ROWS_BLOCKDIM_Y 4
 71 | #define ROWS_RESULT_STEPS 4
 72 | #define   ROWS_HALO_STEPS 1
 73 | 
 74 | extern "C" __global__ void convolutionRowsKernel(
 75 |     float *d_Dst,
 76 |     const float *d_Src,
 77 |     int imageW,
 78 |     int imageH,
 79 |     int pitch,
 80 |     int kernel_radius
 81 | )
 82 | {
 83 | 
 84 |  /*   dim3 global_id(blockDim.x * blockIdx.x * ROWS_RESULT_STEPS + threadIdx.x,
 85 |         blockDim.y * blockIdx.y  + threadIdx.y);
 86 | 
 87 |     if (global_id.x > imageW)
 88 |         return;*/
 89 | 
 90 |     // Handle to thread block group
 91 |     __shared__ float s_Data[ROWS_BLOCKDIM_Y][(ROWS_RESULT_STEPS + 2 * ROWS_HALO_STEPS) * ROWS_BLOCKDIM_X];
 92 | 
 93 |     //Offset to the left halo edge
 94 |     const int baseX = (blockIdx.x * ROWS_RESULT_STEPS - ROWS_HALO_STEPS) * ROWS_BLOCKDIM_X + threadIdx.x;
 95 |     const int baseY = blockIdx.y * ROWS_BLOCKDIM_Y + threadIdx.y;
 96 | 
 97 |     d_Src += baseY * pitch + baseX;
 98 |     d_Dst += baseY * pitch + baseX;
 99 | 
100 |     //Load main data
101 | #pragma unroll
102 | 
103 |     for (int i = ROWS_HALO_STEPS; i < ROWS_HALO_STEPS + ROWS_RESULT_STEPS; i++)
104 |     {
105 |         // Original NVIDIA version
106 |         //s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = d_Src[i * ROWS_BLOCKDIM_X];
107 | 
108 |         // Modified version to allow for arbitrary image width
109 |         size_t global_x = blockIdx.x * ROWS_RESULT_STEPS * ROWS_BLOCKDIM_X + ROWS_BLOCKDIM_X*(i-ROWS_HALO_STEPS) + threadIdx.x;
110 |         s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (global_x < imageW) ? d_Src[i * ROWS_BLOCKDIM_X] : 0;
111 |     }
112 | 
113 |     //Load left halo
114 | #pragma unroll
115 | 
116 |     for (int i = 0; i < ROWS_HALO_STEPS; i++)
117 |     {
118 |         s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (baseX >= -i * ROWS_BLOCKDIM_X) ? d_Src[i * ROWS_BLOCKDIM_X] : 0;
119 |     }
120 | 
121 |     //Load right halo
122 | #pragma unroll
123 | 
124 |     for (int i = ROWS_HALO_STEPS + ROWS_RESULT_STEPS; i < ROWS_HALO_STEPS + ROWS_RESULT_STEPS + ROWS_HALO_STEPS; i++)
125 |     {
126 |         s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (imageW - baseX > i * ROWS_BLOCKDIM_X) ? d_Src[i * ROWS_BLOCKDIM_X] : 0;
127 | 
128 |        
129 |        // Testing 
130 |       /*  size_t global_x = blockIdx.x * ROWS_RESULT_STEPS * ROWS_BLOCKDIM_X + ROWS_BLOCKDIM_X*(i-5) + threadIdx.x;
131 | 
132 |         s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (global_x < imageW) ? d_Src[i * ROWS_BLOCKDIM_X] : 0;
133 | 
134 |         if (global_x > imageW)
135 |             std::printf("Bx:%u tx:%u iter:%d global.x: %lu data:%f \n", blockIdx.x, threadIdx.x, i, static_cast<unsigned long>(global_x), s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X]);
136 | */
137 |     }
138 | 
139 |     //Compute and store results
140 | 
141 | 
142 |     // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< //
143 |     // -------------------------------------------------------- //
144 |     __syncthreads();
145 |     // -------------------------------------------------------- //
146 |     // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< //
147 | 
148 | #pragma unroll
149 | 
150 |     for (int i = ROWS_HALO_STEPS; i < ROWS_HALO_STEPS + ROWS_RESULT_STEPS; i++)
151 |     {
152 |         size_t global_x = blockIdx.x * ROWS_RESULT_STEPS * ROWS_BLOCKDIM_X + ROWS_BLOCKDIM_X*(i-ROWS_HALO_STEPS) + threadIdx.x;
153 | 
154 |         if (global_x >= imageW)
155 |             return;
156 | 
157 |         float sum = 0;
158 | 
159 | #pragma unroll
160 | 
161 |         for (int j = -kernel_radius; j <= kernel_radius; j++)
162 |         {
163 |             sum += c_Kernel[kernel_radius - j] * s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X + j];
164 |         }
165 | 
166 |         d_Dst[i * ROWS_BLOCKDIM_X] = sum;
167 |     }
168 | }
169 | 
170 | 
171 | 
172 | 
173 | ////////////////////////////////////////////////////////////////////////////////
174 | // Column convolution filter
175 | ////////////////////////////////////////////////////////////////////////////////
176 | #define   COLUMNS_BLOCKDIM_X 4
177 | #define   COLUMNS_BLOCKDIM_Y 16
178 | #define COLUMNS_RESULT_STEPS 4
179 | #define   COLUMNS_HALO_STEPS 1
180 | 
181 | extern "C" __global__ void convolutionColumnsKernel(
182 |     float *d_Dst,
183 |     const float *d_Src,
184 |     int imageW,
185 |     int imageH,
186 |     int pitch,
187 |     int kernel_radius
188 | )
189 | {
190 |     __shared__ float s_Data[COLUMNS_BLOCKDIM_X][(COLUMNS_RESULT_STEPS + 2 * COLUMNS_HALO_STEPS) * COLUMNS_BLOCKDIM_Y + 0];
191 | 
192 |     //Offset to the upper halo edge
193 |     const int baseX = blockIdx.x * COLUMNS_BLOCKDIM_X + threadIdx.x;
194 |     const int baseY = (blockIdx.y * COLUMNS_RESULT_STEPS - COLUMNS_HALO_STEPS) * COLUMNS_BLOCKDIM_Y + threadIdx.y;
195 | 
196 |     d_Src += baseY * pitch + baseX;
197 |     d_Dst += baseY * pitch + baseX;
198 | 
199 |     //Main data
200 | #pragma unroll
201 | 
202 |     for (int i = COLUMNS_HALO_STEPS; i < COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS; i++)
203 |     {
204 |         // Original NVIDIA version
205 |         //s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y] = d_Src[i * COLUMNS_BLOCKDIM_Y * pitch];
206 | 
207 |         // Modified version to allow for arbitrary image height
208 |         size_t global_y = blockIdx.y * COLUMNS_RESULT_STEPS * COLUMNS_BLOCKDIM_Y + COLUMNS_BLOCKDIM_Y*(i-COLUMNS_HALO_STEPS) + threadIdx.y;
209 |         s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y]= (global_y < imageH) ? d_Src[i * COLUMNS_BLOCKDIM_Y * pitch] : 0;
210 | 
211 |     }
212 | 
213 |     //Upper halo
214 | #pragma unroll
215 | 
216 |     for (int i = 0; i < COLUMNS_HALO_STEPS; i++)
217 |     {
218 |         s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y] = (baseY >= -i * COLUMNS_BLOCKDIM_Y) ? d_Src[i * COLUMNS_BLOCKDIM_Y * pitch] : 0;
219 |     }
220 | 
221 |     //Lower halo
222 | #pragma unroll
223 | 
224 |     for (int i = COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS; i < COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS + COLUMNS_HALO_STEPS; i++)
225 |     {
226 |         s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y]= (imageH - baseY > i * COLUMNS_BLOCKDIM_Y) ? d_Src[i * COLUMNS_BLOCKDIM_Y * pitch] : 0;
227 | 
228 | 
229 |     }
230 | 
231 |     // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< //
232 |     // -------------------------------------------------------- //
233 |     __syncthreads();
234 |     // -------------------------------------------------------- //
235 |     // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< //
236 | 
237 | #pragma unroll
238 | 
239 |     for (int i = COLUMNS_HALO_STEPS; i < COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS; i++)
240 |     {
241 | 
242 |         size_t global_y =  blockIdx.y * COLUMNS_RESULT_STEPS * COLUMNS_BLOCKDIM_Y + COLUMNS_BLOCKDIM_Y*(i-COLUMNS_HALO_STEPS) + threadIdx.y;
243 | 
244 |         //std::printf("By:%u ty:%u iter:%d global.y: %lu\n", blockIdx.y, threadIdx.y, i, static_cast<unsigned long>(global_y));
245 | 
246 |         if (global_y >= imageH)
247 |             return;
248 | 
249 |         float sum = 0;
250 | #pragma unroll
251 | 
252 |         for (int j = -kernel_radius; j <= kernel_radius; j++)
253 |         {
254 |             sum += c_Kernel[kernel_radius - j] * s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y + j];
255 |         }
256 | 
257 |         d_Dst[i * COLUMNS_BLOCKDIM_Y * pitch] = sum;
258 | 
259 |         //std::printf("out: %d \n", i * COLUMNS_BLOCKDIM_Y * pitch);
260 |     }
261 | }
262 | 
263 | 
264 | 


--------------------------------------------------------------------------------
/src/kernels/median_2d.cu:
--------------------------------------------------------------------------------
  1 | /**
  2 | * @file    2D Optical flow using NVIDIA CUDA
  3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
  4 | *
  5 | * @date    2015-2018
  6 | * @version 0.5.0
  7 | *
  8 | *
  9 | * @section LICENSE
 10 | *
 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
 13 | *
 14 | * The current implemetation contains the following licenses:
 15 | *
 16 | * 1. TinyXml package:
 17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
 18 | *      See src/utils/tinyxml.h for details.
 19 | *
 20 | */
 21 | 
 22 | #include <device_launch_parameters.h>
 23 | 
 24 | #define __CUDACC__
 25 | 
 26 | #include <device_functions.h>
 27 | #include <math_functions.h>
 28 | 
 29 | #include "src/data_types/data_structs.h"
 30 | 
 31 | 
 32 | #define IND(X, Y) ((Y) * (container_size.pitch / sizeof(float)) + (X)) 
 33 | #define SIND(X, Y) ((((Y) + radius_2)) * shared_block_size.x + ((X) + radius_2))
 34 | 
 35 | __constant__ DataSize3 container_size;
 36 | 
 37 | 
 38 | extern __shared__ float shared[];
 39 | 
 40 | __device__ void bubbleSort(float* buffer, size_t length)
 41 | {
 42 |   for (int i = 0; i < length - 1; i++) {
 43 |     for (int k = 0; k < length - i - 1; k++) {
 44 |       if (buffer[k] > buffer[k + 1]) {
 45 |         float a = buffer[k];
 46 |         buffer[k] = buffer[k + 1];
 47 |         buffer[k + 1] = a;
 48 |       }
 49 |     }
 50 |   }
 51 | }
 52 | 
 53 | __device__ void insertionSort(float* window, size_t size)
 54 | {
 55 |     int i, j;
 56 |     float temp;
 57 |     for (i = 0; i < size; i++) {
 58 |         temp = window[i];
 59 |         for (j = i-1; j >= 0 && temp < window[j]; j--) {
 60 |             window[j+1] = window[j];
 61 |         }
 62 |         window[j+1] = temp;
 63 |     }
 64 | }
 65 | 
 66 | // \test Experimental. Shows worse performance. TODO: Clarify
 67 | __device__ void partialSelection(float* window, size_t size)
 68 | {
 69 |     for (unsigned int j=0; j<(size*size+1)/2; ++j)
 70 |     {
 71 |         // Find position of minimum element
 72 |         int min_index=j;
 73 |         for (unsigned int l=j+1; l<size*size; ++l)
 74 |         if (window[l] < window[min_index])
 75 |             min_index=l;
 76 | 
 77 |         // Put found minimum element in its place
 78 |         const unsigned char temp=window[j];
 79 |         window[j]=window[min_index];
 80 |         window[min_index]=temp;
 81 |     }
 82 | }
 83 | 
 84 | 
 85 | 
 86 | /* See a note about the thread block size in cuda_operation_median.cpp file.*/
 87 | extern "C" __global__ void median_2d(
 88 |   const float* input,
 89 |         size_t width,
 90 |         size_t height,
 91 |         size_t radius,
 92 |         float* output)
 93 | {
 94 |   int radius_2 = radius / 2;
 95 | 
 96 |   dim3 shared_block_size(
 97 |     blockDim.x + 2 * radius_2,
 98 |     blockDim.y + 2 * radius_2
 99 |    );
100 | 
101 |   dim3 global_id(
102 |     blockDim.x * blockIdx.x + threadIdx.x,
103 |     blockDim.y * blockIdx.y + threadIdx.y
104 |    );
105 | 
106 |   /* Load data to the shared memoty */
107 |   size_t global_x = global_id.x < width ? global_id.x : 2 * width - global_id.x - 2;
108 |   size_t global_y = global_id.y < height ? global_id.y : 2 * height - global_id.y - 2;
109 | 
110 |   /* Main area */
111 |   shared[SIND(threadIdx.x, threadIdx.y)] = input[IND(global_x, global_y)];
112 | 
113 |   /* Left slice */
114 |   if (threadIdx.x < radius_2) {
115 |     int offset = blockDim.x * blockIdx.x - radius_2 + threadIdx.x;
116 |     size_t global_x_l = offset >= 0 ? offset : -offset;
117 |     shared[SIND(-radius_2 + threadIdx.x, threadIdx.y)] = input[IND(global_x_l, global_y)];
118 |   }
119 | 
120 |   /* Right slice */
121 |   if (threadIdx.x > blockDim.x - 1 - radius_2) {
122 |     int index = blockDim.x - threadIdx.x;
123 |     int offset = blockDim.x *(blockIdx.x + 1) + radius_2 - index;
124 |     size_t global_x_r = offset < width ? offset : 2 * width - offset - 2;
125 |     shared[SIND(radius_2 + threadIdx.x, threadIdx.y)] = input[IND(global_x_r, global_y)];
126 |   }
127 | 
128 |   /* Upper slice */
129 |   if (threadIdx.y < radius_2) {
130 |     int offset = blockDim.y * blockIdx.y - radius_2 + threadIdx.y;
131 |     size_t global_y_u = offset >= 0 ? offset : -offset;
132 |     shared[SIND(threadIdx.x, -radius_2 + threadIdx.y)] = input[IND(global_x, global_y_u)];
133 |   }
134 | 
135 |   /* Bottom slice */
136 |   if (threadIdx.y > blockDim.y - 1 - radius_2) {
137 |     int index = blockDim.y - threadIdx.y;
138 |     int offset = blockDim.y *(blockIdx.y + 1) + radius_2 - index;
139 |     size_t global_y_b = offset < height ? offset : 2 * height - offset - 2;
140 |     shared[SIND(threadIdx.x, radius_2 + threadIdx.y)] = input[IND(global_x, global_y_b)];
141 |   }
142 | 
143 | 
144 |   /* 12 edges */
145 |   {
146 |     //int global_x_e;
147 |     //int global_y_e;
148 |     //int global_z_e;
149 | 
150 |     ///* 4 along X */
151 |     //if (threadIdx.y < radius_2 && threadIdx.z < radius_2) {
152 |     //  /* Front upper */
153 |     //  global_y_e = blockDim.y * blockIdx.y - radius_2 + threadIdx.y;
154 |     //  global_y_e = global_y_e > 0 ? global_y_e : -global_y_e;
155 |     //  shared[SIND(threadIdx.x, threadIdx.y - radius_2)] = 
156 |     //    input[IND(global_x, global_y_e)];
157 | 
158 |     //  /* Rear upper */
159 |     //  global_z_e = blockDim.z *(blockIdx.z + 1) + threadIdx.z;
160 |     //  global_z_e = global_z_e < depth ? global_z_e : 2 * depth - global_z_e - 2;
161 |     //  shared[SIND(threadIdx.x, threadIdx.y - radius_2, blockDim.z + threadIdx.z)] = 
162 |     //    input[IND(global_x, global_y_e, global_z_e)];
163 | 
164 |     //  /* Rear bottom */
165 |     //  global_y_e = blockDim.y *(blockIdx.y + 1) + threadIdx.y;
166 |     //  global_y_e = global_y_e < height ? global_y_e : 2 * height - global_y_e - 2;
167 |     //  shared[SIND(threadIdx.x, blockDim.y + threadIdx.y, blockDim.z + threadIdx.z)] =
168 |     //    input[IND(global_x, global_y_e, global_z_e)];
169 | 
170 |     //  /* Front bottom */
171 |     //  global_z_e = blockDim.z * blockIdx.z - radius_2 + threadIdx.z;
172 |     //  global_z_e = global_z_e > 0 ? global_z_e : -global_z_e;
173 |     //  shared[SIND(threadIdx.x, blockDim.y + threadIdx.y, threadIdx.z - radius_2)] = 
174 |     //    input[IND(global_x, global_y_e, global_z_e)];
175 |     //}
176 | 
177 |     ///* 4 along Y */
178 |     //if (threadIdx.x < radius_2 && threadIdx.z < radius_2) {
179 |     //  /* Front left */
180 |     //  global_x_e = blockDim.x * blockIdx.x - radius_2 + threadIdx.x;
181 |     //  global_x_e = global_x_e > 0 ? global_x_e : -global_x_e;
182 |     //  global_z_e = blockDim.z * blockIdx.z - radius_2 + threadIdx.z;
183 |     //  global_z_e = global_z_e > 0 ? global_z_e : -global_z_e;
184 |     //  shared[SIND(threadIdx.x - radius_2, threadIdx.y, threadIdx.z - radius_2)] =
185 |     //    input[IND(global_x_e, global_y, global_z_e)];
186 | 
187 |     //  /* Rear left */
188 |     //  global_z_e = blockDim.z *(blockIdx.z + 1) + threadIdx.z;
189 |     //  global_z_e = global_z_e < depth ? global_z_e : 2 * depth - global_z_e - 2;
190 |     //  shared[SIND(threadIdx.x - radius_2, threadIdx.y, blockDim.z + threadIdx.z)] =
191 |     //    input[IND(global_x_e, global_y, global_z_e)];
192 | 
193 |     //  /* Rear right */
194 |     //  global_x_e = blockDim.x *(blockIdx.x + 1) + threadIdx.x;
195 |     //  global_x_e = global_x_e < width ? global_x_e : 2 * width - global_x_e - 2;
196 |     //  shared[SIND(blockDim.x + threadIdx.x, threadIdx.y, blockDim.z + threadIdx.z)] =
197 |     //    input[IND(global_x_e, global_y, global_z_e)];
198 | 
199 |     //  /* Front right */
200 |     //  global_z_e = blockDim.z * blockIdx.z - radius_2 + threadIdx.z;
201 |     //  global_z_e = global_z_e > 0 ? global_z_e : -global_z_e;
202 |     //  shared[SIND(blockDim.x + threadIdx.x, threadIdx.y, threadIdx.z - radius_2)] =
203 |     //    input[IND(global_x_e, global_y, global_z_e)];
204 |     //}
205 | 
206 |     ///* 4 along Z */
207 |     //if (threadIdx.x < radius_2 && threadIdx.y < radius_2) {
208 |     //  /* Upper left */
209 |     //  global_x_e = blockDim.x * blockIdx.x - radius_2 + threadIdx.x;
210 |     //  global_x_e = global_x_e > 0 ? global_x_e : -global_x_e;
211 |     //  global_y_e = blockDim.y * blockIdx.y - radius_2 + threadIdx.y;
212 |     //  global_y_e = global_y_e > 0 ? global_y_e : -global_y_e;
213 |     //  shared[SIND(threadIdx.x - radius_2, threadIdx.y - radius_2, threadIdx.z)] =
214 |     //    input[IND(global_x_e, global_y_e, global_z)];
215 | 
216 |     //  /* Upper riight */
217 |     //  global_x_e = blockDim.x *(blockIdx.x + 1) + threadIdx.x;
218 |     //  global_x_e = global_x_e < width ? global_x_e : 2 * width - global_x_e - 2;
219 |     //  shared[SIND(blockDim.x + threadIdx.x, threadIdx.y - radius_2, threadIdx.z)] =
220 |     //    input[IND(global_x_e, global_y_e, global_z)];
221 | 
222 |     //  /* Bottom right */
223 |     //  global_y_e = blockDim.y *(blockIdx.y + 1) + threadIdx.y;
224 |     //  global_y_e = global_y_e < height ? global_y_e : 2 * height - global_y_e - 2;
225 |     //  shared[SIND(blockDim.x + threadIdx.x, blockDim.y + threadIdx.y, threadIdx.z)] =
226 |     //    input[IND(global_x_e, global_y_e, global_z)];
227 | 
228 |     //  /* Bottom left */
229 |     //  global_x_e = blockDim.x * blockIdx.x - radius_2 + threadIdx.x;
230 |     //  global_x_e = global_x_e > 0 ? global_x_e : -global_x_e;
231 |     //  shared[SIND(threadIdx.x - radius_2, blockDim.y + threadIdx.y, threadIdx.z)] =
232 |     //    input[IND(global_x_e, global_y_e, global_z)];
233 |     //}
234 |   }
235 | 
236 |   /* 4 corners */
237 |   {
238 |     int global_x_c;
239 |     int global_y_c;
240 | 
241 |     if (threadIdx.x < radius_2 && threadIdx.y < radius_2) {
242 |  
243 |         global_x_c = blockDim.x * blockIdx.x - radius_2 + threadIdx.x;
244 |         global_x_c = global_x_c > 0 ? global_x_c : -global_x_c;
245 | 
246 |         global_y_c = blockDim.y * blockIdx.y - radius_2 + threadIdx.y;
247 |         global_y_c = global_y_c > 0 ? global_y_c : -global_y_c;
248 | 
249 |       /* Front upper left */
250 |       shared[SIND(threadIdx.x - radius_2,threadIdx.y - radius_2)] =
251 |         input[IND(global_x_c, global_y_c)];
252 | 
253 |       /* Front upper right */
254 |       global_x_c = blockDim.x *(blockIdx.x + 1) + threadIdx.x;
255 |       global_x_c = global_x_c < width ? global_x_c : 2 * width - global_x_c - 2;
256 |       shared[SIND(blockDim.x + threadIdx.x, threadIdx.y - radius_2)] =
257 |         input[IND(global_x_c, global_y_c)];
258 | 
259 |       /* Front bottom right */
260 |       global_y_c = blockDim.y *(blockIdx.y + 1) + threadIdx.y;
261 |       global_y_c = global_y_c < height ? global_y_c : 2 * height - global_y_c - 2;
262 |       shared[SIND(blockDim.x + threadIdx.x, blockDim.y + threadIdx.y)] =
263 |         input[IND(global_x_c, global_y_c)];
264 | 
265 |       /* Front bottom left */
266 |       global_x_c = blockDim.x * blockIdx.x - radius_2 + threadIdx.x;
267 |       global_x_c = global_x_c > 0 ? global_x_c : -global_x_c;
268 |       shared[SIND(threadIdx.x - radius_2, blockDim.y + threadIdx.y)] =
269 |         input[IND(global_x_c, global_y_c)];
270 | 
271 |     }
272 |   }
273 | 
274 |   // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< //
275 |   // -------------------------------------------------------- //
276 |   __syncthreads();
277 |   // -------------------------------------------------------- //
278 |   // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< //
279 | 
280 |   if (global_id.x < width && global_id.y < height) {
281 |     float buffer[49]; /* Max supported radius is 7, we have to store 7*7 values. */
282 |       for (size_t iy = 0; iy < radius; ++iy) {
283 |         for (size_t ix = 0; ix < radius; ++ix) {
284 |           size_t lx = threadIdx.x - ix + radius_2;
285 |           size_t ly = threadIdx.y - iy + radius_2;
286 |           buffer[iy * radius + ix] = shared[SIND(lx, ly)];
287 |         }
288 |       }
289 |     
290 | 
291 |     size_t length = radius * radius;
292 | 
293 |     //bubbleSort(buffer, length);
294 |     insertionSort(buffer, length);
295 |     //partialSelection(buffer, radius);
296 | 
297 |     output[IND(global_id.x, global_id.y)] = buffer[length / 2];
298 |   }
299 | }


--------------------------------------------------------------------------------
/src/kernels/registration_2d.cu:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #include <device_launch_parameters.h>
23 | 
24 | #define __CUDACC__
25 | #include <math_functions.h>
26 | 
27 | #include "src/data_types/data_structs.h"
28 | 
29 | //#define IND(X, Y, Z) (((Z) * container_size.height + (Y)) * (container_size.pitch / sizeof(float)) + (X)) 
30 | #define IND(X, Y) ((Y) * (container_size.pitch / sizeof(float)) + (X)) 
31 | 
32 | __constant__ DataSize3 container_size;
33 | 
34 | extern "C" __global__ void registration_2d(
35 |   const float* frame_0,
36 |   const float* frame_1,
37 |   const float* flow_u,
38 |   const float* flow_v,
39 |         size_t width,
40 |         size_t height,
41 |         float  hx,
42 |         float  hy,
43 |         float* output)
44 | {
45 |   dim3 global_id(blockDim.x * blockIdx.x + threadIdx.x,
46 |                 blockDim.y * blockIdx.y + threadIdx.y);
47 | 
48 |   if (global_id.x < width && global_id.y < height) {
49 |     float x_f = global_id.x + (flow_u[IND(global_id.x, global_id.y)] * (1.f / hx));
50 |     float y_f = global_id.y + (flow_v[IND(global_id.x, global_id.y)] * (1.f / hy));
51 | 
52 |     if ((x_f < 0.) || (x_f > width - 1) || (y_f < 0.) || (y_f > height - 1) ||  isnan(x_f) || isnan(y_f)) { 
53 |       output[IND(global_id.x, global_id.y)] = frame_0[IND(global_id.x, global_id.y)];
54 | 
55 |     } else {
56 |       int x = (int) floorf(x_f); 
57 |       int y = (int) floorf(y_f); 
58 |       float delta_x = x_f - (float) x;
59 |       float delta_y = y_f - (float) y;
60 | 
61 |       int x_1 = min(int(width -1), x + 1);
62 |       int y_1 = min(int(height - 1), y + 1);
63 | 
64 |       float value =
65 |         (1.f - delta_x) * (1.f - delta_y) * frame_1[IND(x  , y )] +
66 |         (      delta_x) * (1.f - delta_y) * frame_1[IND(x_1, y )] +
67 |         (1.f - delta_x) * (      delta_y) * frame_1[IND(x  , y_1)] +
68 |         (      delta_x) * (      delta_y) * frame_1[IND(x_1, y_1)];
69 | 
70 | 
71 |       output[IND(global_id.x, global_id.y)] = value;
72 |     }
73 |   }
74 | }


--------------------------------------------------------------------------------
/src/kernels/resample_2d.cu:
--------------------------------------------------------------------------------
  1 | /**
  2 | * @file    2D Optical flow using NVIDIA CUDA
  3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
  4 | *
  5 | * @date    2015-2018
  6 | * @version 0.5.0
  7 | *
  8 | *
  9 | * @section LICENSE
 10 | *
 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
 13 | *
 14 | * The current implemetation contains the following licenses:
 15 | *
 16 | * 1. TinyXml package:
 17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
 18 | *      See src/utils/tinyxml.h for details.
 19 | *
 20 | */
 21 | 
 22 | #include <device_launch_parameters.h>
 23 | 
 24 | #define __CUDACC__
 25 | #include <math_functions.h>
 26 | 
 27 | #include "src/data_types/data_structs.h"
 28 | 
 29 | //#define IND(X, Y, Z) (((Z) * container_size.height + (Y)) * (container_size.pitch / sizeof(float)) + (X)) 
 30 | #define IND(X, Y) ((Y) * (container_size.pitch / sizeof(float)) + (X)) 
 31 | 
 32 | __constant__ DataSize3 container_size;
 33 | 
 34 | extern "C" __global__ void resample_x(
 35 |   const float* input,
 36 |         float* output,
 37 |         size_t out_width,
 38 |         size_t out_height,
 39 |         size_t in_width)
 40 | {
 41 |   dim3 globalID(blockDim.x * blockIdx.x + threadIdx.x,
 42 |                 blockDim.y * blockIdx.y + threadIdx.y);
 43 | 
 44 |   if (globalID.x < out_width &&  globalID.y < out_height) {
 45 |     float delta = in_width / static_cast<float>(out_width);
 46 |     float normalization = out_width / static_cast<float>(in_width);
 47 | 
 48 |     float left_f = globalID.x * delta;
 49 |     float right_f = (globalID.x + 1) * delta;
 50 | 
 51 |     int left_i = static_cast<int>(floor(left_f));
 52 |     int right_i = min(static_cast<int>(in_width), static_cast<int>(ceil(right_f)));
 53 | 
 54 |     float value = 0.f;
 55 |     
 56 |     for (int j = 0; j < (right_i - left_i); j++) {
 57 |       float frac = 1.f;
 58 | 
 59 |       /* left boundary */
 60 |       if (j == 0) {
 61 |         frac = static_cast<float>(left_i + 1) - left_f;
 62 |       }
 63 |       /* right boundary */
 64 |       if (j == (right_i - left_i) - 1) {
 65 |         frac = right_f - static_cast<float>(left_i + j);
 66 |       }
 67 |       /* if the left and right boundaries are in the same cell */
 68 |       if ((right_i - left_i) == 1) {
 69 |         frac = delta;
 70 |       }
 71 |       value += input[IND(left_i + j, globalID.y)] * frac;
 72 |     }
 73 |     output[IND(globalID.x, globalID.y)] = value * normalization;
 74 |   }
 75 | }
 76 | 
 77 | extern "C" __global__ void resample_y(
 78 |   const float* input,
 79 |         float* output,
 80 |         size_t out_width,
 81 |         size_t out_height,
 82 |         size_t in_height)
 83 | {
 84 |   dim3 globalID(blockDim.x * blockIdx.x + threadIdx.x,
 85 |                 blockDim.y * blockIdx.y + threadIdx.y);
 86 | 
 87 |   if (globalID.x < out_width &&  globalID.y < out_height) {
 88 |     float delta = in_height / static_cast<float>(out_height);
 89 |     float normalization = out_height / static_cast<float>(in_height);
 90 | 
 91 |     float left_f = globalID.y * delta;
 92 |     float right_f = (globalID.y + 1) * delta;
 93 | 
 94 |     int left_i = static_cast<int>(floor(left_f));
 95 |     int right_i = min(static_cast<int>(in_height), static_cast<int>(ceil(right_f)));
 96 | 
 97 |     float value = 0.f;
 98 |     
 99 |     for (int j = 0; j < (right_i - left_i); j++) {
100 |       float frac = 1.f;
101 | 
102 |       /* left boundary */
103 |       if (j == 0) {
104 |         frac = static_cast<float>(left_i + 1) - left_f;
105 |       }
106 |       /* right boundary */
107 |       if (j == (right_i - left_i) - 1) {
108 |         frac = right_f - static_cast<float>(left_i + j);
109 |       }
110 |       /* if the left and right boundaries are in the same cell */
111 |       if ((right_i - left_i) == 1) {
112 |         frac = delta;
113 |       }
114 |       value += input[IND(globalID.x, left_i + j)] * frac;
115 |     }
116 |     output[IND(globalID.x, globalID.y)] = value * normalization;
117 |   }
118 | }
119 | 
120 | //extern "C" __global__ void resample_z(
121 | //  const float* input,
122 | //        float* output,
123 | //        size_t out_width,
124 | //        size_t out_height,
125 | //        size_t out_depth,
126 | //        size_t in_depth)
127 | //{
128 | //  dim3 globalID(blockDim.x * blockIdx.x + threadIdx.x,
129 | //                blockDim.y * blockIdx.y + threadIdx.y,
130 | //                blockDim.z * blockIdx.z + threadIdx.z);
131 | //
132 | //  if (globalID.x < out_width &&  globalID.y < out_height && globalID.z < out_depth) {
133 | //    float delta = in_depth / static_cast<float>(out_depth);
134 | //    float normalization = out_depth / static_cast<float>(in_depth);
135 | //
136 | //    float left_f = globalID.z * delta;
137 | //    float right_f = (globalID.z + 1) * delta;
138 | //
139 | //    int left_i = static_cast<int>(floor(left_f));
140 | //    int right_i = min(in_depth, static_cast<size_t>(ceil(right_f)));
141 | //
142 | //    float value = 0.f;
143 | //    
144 | //    for (int j = 0; j < (right_i - left_i); j++) {
145 | //      float frac = 1.f;
146 | //
147 | //      /* left boundary */
148 | //      if (j == 0) {
149 | //        frac = static_cast<float>(left_i + 1) - left_f;
150 | //      }
151 | //      /* right boundary */
152 | //      if (j == (right_i - left_i) - 1) {
153 | //        frac = right_f - static_cast<float>(left_i + j);
154 | //      }
155 | //      /* if the left and right boundaries are in the same cell */
156 | //      if ((right_i - left_i) == 1) {
157 | //        frac = delta;
158 | //      }
159 | //      value += input[IND(globalID.x, globalID.y, left_i + j)] * frac;
160 | //    }
161 | //    output[IND(globalID.x, globalID.y, globalID.z)] = value * normalization;
162 | //  }
163 | //}
164 | 
165 | //extern "C" __global__ void resample_x_debug(
166 | //  const float* input,
167 | //  float* output,
168 | //  size_t width,
169 | //  size_t height,
170 | //  size_t depth,
171 | //  size_t resample_width)
172 | //{
173 | //  dim3 globalID(blockDim.x * blockIdx.x + threadIdx.x,
174 | //    blockDim.y * blockIdx.y + threadIdx.y,
175 | //    blockDim.z * blockIdx.z + threadIdx.z);
176 | //
177 | //  if (globalID.y < height && globalID.z < depth) {
178 | //    for (size_t x = 0; x < width; x += 4) {
179 | //      *((float4*)(&(output[IND(x, globalID.y, globalID.z)]))) =
180 | //        *((const float4*)(&(input[IND(x, globalID.y, globalID.z)])));
181 | //    }
182 | //    for (size_t x = 0; x < width; x++) {
183 | //      output[IND(x, globalID.y, globalID.z)] = input[IND(x, globalID.y, globalID.z)];
184 | //    }
185 | //  }
186 | //}


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 | * @file    2D Optical flow using NVIDIA CUDA
  3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
  4 | *          
  5 | * @date    2015-2018
  6 | * @version 0.5.0
  7 | *
  8 | *
  9 | * @section LICENSE
 10 | *
 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
 13 | *
 14 | * The current implemetation contains the following licenses:
 15 | *
 16 | * 1. TinyXml package:
 17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
 18 | *      See src/utils/tinyxml.h for details.
 19 | *
 20 | */
 21 | 
 22 | #include <cstdio>
 23 | #include <cstring>
 24 | #include <ctime>
 25 | #include <string>
 26 | 
 27 | #include <cuda.h>
 28 | 
 29 | #include "src/data_types/data2d.h"
 30 | #include "src/data_types/data_structs.h"
 31 | #include "src/data_types/operation_parameters.h"
 32 | #include "src/utils/cuda_utils.h"
 33 | #include "src/utils/io_utils.h"
 34 | #include "src/utils/settings.h"
 35 | 
 36 | #include "src/optical_flow/optical_flow_2d.h"
 37 | 
 38 | using namespace OpticFlow;
 39 | 
 40 | 
 41 | const bool key_press = true;
 42 | const bool use_visualization = false;
 43 | const bool silent_mode = true;
 44 | 
 45 | 
 46 | int main(int argc, char** argv)
 47 | {
 48 | 
 49 | 
 50 |     /* Initialize CUDA */
 51 |     CUcontext cu_context;
 52 |     if (!InitCudaContextWithFirstAvailableDevice(&cu_context)) {
 53 |         return 1;
 54 |     }
 55 | 
 56 |     std::printf("//----------------------------------------------------------------------//\n");
 57 |     std::printf("//            2D Optical flow using NVIDIA CUDA. Version 0.5.0	        //\n");
 58 |     std::printf("//                                                                      //\n");
 59 |     std::printf("//            Karlsruhe Institute of Technology. 2015 - 2018            //\n");
 60 |     std::printf("//----------------------------------------------------------------------//\n");
 61 | 
 62 | 
 63 | 
 64 |   /* Dataset variables */
 65 |     size_t width = 584; //584;
 66 |     size_t height = 388; ////388;
 67 | 
 68 | 
 69 |     /* Optical flow variables */
 70 |     size_t  warp_levels_count       = 50;
 71 |     float   warp_scale_factor       = 0.9f;
 72 |     size_t  outer_iterations_count  = 40;
 73 |     size_t  inner_iterations_count  = 5;
 74 |     float   equation_alpha          = 35.0f; // 3.5f;
 75 |     float   equation_smoothness     = 0.001f;
 76 |     float   equation_data           = 0.001f;
 77 |     size_t  median_radius           = 5;
 78 |     float   gaussian_sigma          = 1.5f;
 79 | 
 80 |     DataConstancy data_constancy    = DataConstancy::Grey;
 81 | 
 82 |     string  file_name1              = "rub1.raw";
 83 |     string  file_name2              = "rub2.raw";
 84 |     string  input_path              =  "./data/";
 85 |     string  output_path             =  "./data/output/";
 86 |     string  counter                 =  "";
 87 | 
 88 | 
 89 |     /* Optical flow computation class */
 90 |     OpticalFlow2D optical_flow;
 91 | 
 92 | 
 93 |     Data2D frame_0;
 94 |     Data2D frame_1;
 95 | 
 96 |     /* Read settings */
 97 |     string settingsPath = "settings.xml";
 98 | 
 99 |     /*
100 |     Usage:
101 | 
102 |     1. cuda-flow2d. settings.xml in the current directory will be used for settings
103 |     2. cuda-flow2d <settings file>
104 |     3. cuda-flow2d <file1> <file2> <image_width> <image_height>
105 |     */
106 | 
107 |     if (argc == 6 || argc == 7 || argc == 9)  {
108 | 
109 |         file_name1 = argv[1];
110 |         file_name2 = argv[2];
111 | 
112 |         output_path = string(argv[6]);
113 | 
114 |         width = atoi(argv[3]);
115 |         height = atoi(argv[4]);
116 | 
117 |         if (argc == 7)
118 |             counter = argv[5];
119 | 
120 |         if (argc == 9){
121 |             equation_alpha = atof(argv[7]);
122 |             gaussian_sigma = atof(argv[8]);
123 |             counter = "alpha"+ string(argv[7])+"_sigma"+ string(argv[8])+"_";
124 |         }
125 |     }
126 |     else if (argc < 3) {
127 |         string settingsFile = (argc==1)? settingsPath : string(argv[1]); 
128 | 
129 |         // Create Settings class
130 |         cout<<"Reading settings: "<<settingsFile<<endl;;
131 |         Settings settings = Settings();
132 | 
133 |         int settingsError = 0;
134 | 
135 |         try {
136 |             settingsError = settings.LoadSettings(settingsFile);
137 |         }
138 |         catch (...)
139 |         {
140 |             cout<<"Unexpected error reading settings file"<<endl;
141 |             settingsError = 1;
142 |         }
143 |         if (settingsError) {
144 |             cout<<"TERMINATING. Error reading settings: "<<settingsFile<<endl;
145 |             return 3;
146 |         }
147 |         else
148 |             cout<<"OK"<<endl<<endl;
149 | 
150 |         width = settings.width;
151 |         height = settings.height;
152 |         input_path = settings.inputPath;
153 |         output_path = settings.outputPath;
154 |         file_name1 = settings.fileName1;
155 |         file_name2 = settings.fileName2;
156 |         warp_levels_count = settings.levels;
157 |         warp_scale_factor = settings.warpScale;
158 |         outer_iterations_count = settings.iterOuter;
159 |         inner_iterations_count = settings.iterInner;
160 |         equation_alpha = settings.alpha;
161 |         equation_data = settings.e_data;
162 |         equation_smoothness = settings.e_smooth;
163 |         median_radius = settings.medianRadius;
164 |         gaussian_sigma = settings.sigma;   
165 | 
166 |     } else {
167 |     cout<<"Usage: "<< argv[0] <<" <settings file>. Otherwise settings.xml in the current directory is used"<<endl;
168 |     return 0; 
169 |     }
170 | 
171 | 
172 |     DataSize3 data_size ={ width, height, 1 };
173 | 
174 |     /* Load input data */
175 |     if (!frame_0.ReadRAWFromFileF32((file_name1).c_str(), data_size.width, data_size.height) ||
176 |         !frame_1.ReadRAWFromFileF32((file_name2).c_str(), data_size.width, data_size.height)) {
177 |         return 2;
178 |     } 
179 | 
180 |     //if (!frame_0.ReadRAWFromFileU8("./data/01_sim_spray-128-128.raw", data_size.width, data_size.height) ||
181 |     //    !frame_1.ReadRAWFromFileU8("./data/02_sim_spray-128-128.raw", data_size.width, data_size.height)) {
182 |     //    return 2;
183 |     //}
184 | 
185 |     if (optical_flow.Initialize(data_size, data_constancy)) {
186 | 
187 |         Data2D flow_u(data_size.width, data_size.height);
188 |         Data2D flow_v(data_size.width, data_size.height);
189 | 
190 |         optical_flow.silent = silent_mode;
191 | 
192 |         OperationParameters params;
193 |         params.PushValuePtr("warp_levels_count",      &warp_levels_count);
194 |         params.PushValuePtr("warp_scale_factor",      &warp_scale_factor);
195 |         params.PushValuePtr("outer_iterations_count", &outer_iterations_count);
196 |         params.PushValuePtr("inner_iterations_count", &inner_iterations_count);
197 |         params.PushValuePtr("equation_alpha",         &equation_alpha);
198 |         params.PushValuePtr("equation_smoothness",    &equation_smoothness);
199 |         params.PushValuePtr("equation_data",          &equation_data);
200 |         params.PushValuePtr("median_radius",          &median_radius);
201 |         params.PushValuePtr("gaussian_sigma",         &gaussian_sigma);
202 | 
203 |         optical_flow.ComputeFlow(frame_0, frame_1, flow_u, flow_v, params);
204 | 
205 |         std::string filename =
206 |             "-" + std::to_string(width) +
207 |             "-" + std::to_string(height) + ".raw";
208 | 
209 |         flow_u.WriteRAWToFileF32(std::string(output_path + counter + "flow-u" + filename).c_str());
210 |         flow_v.WriteRAWToFileF32(std::string(output_path + counter + "flow-v" + filename).c_str());
211 | 
212 |         IOUtils::WriteFlowToImageRGB(flow_u, flow_v, 10, output_path + counter + "res.pgm");
213 |         IOUtils::WriteMagnitudeToFileF32(flow_u, flow_v, std::string(output_path + counter + "amp" + filename).c_str());
214 | 
215 |         optical_flow.Destroy();
216 |     }
217 | 
218 |     if (key_press) {
219 |         std::printf("Press enter to continue...");
220 |         std::getchar();
221 |     }
222 | 
223 | 
224 | 
225 |     /* Release resources */
226 |     cuCtxDestroy(cu_context);
227 | 
228 |     return 0;
229 | }
230 | 


--------------------------------------------------------------------------------
/src/optical_flow/optical_flow_2d.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 | * @file    2D Optical flow using NVIDIA CUDA
  3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
  4 | *
  5 | * @date    2015-2018
  6 | * @version 0.5.0
  7 | *
  8 | *
  9 | * @section LICENSE
 10 | *
 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
 13 | *
 14 | * The current implemetation contains the following licenses:
 15 | *
 16 | * 1. TinyXml package:
 17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
 18 | *      See src/utils/tinyxml.h for details.
 19 | *
 20 | */
 21 | 
 22 | #include "src/optical_flow/optical_flow_2d.h"
 23 | 
 24 | #include <algorithm>
 25 | #include <cstdio>
 26 | #include <string>
 27 | #include <iostream>
 28 | #include <cmath>
 29 | 
 30 | //#include "../viewflow3d/src/visualization.h"
 31 | #include "src/utils/common_utils.h"
 32 | #include "src/utils/cuda_utils.h"
 33 | #include "src/data_types/data_structs.h"
 34 | 
 35 | using namespace std;
 36 | 
 37 | OpticalFlow2D::OpticalFlow2D()
 38 |   : OpticalFlowBase2D("Optical Flow 2D Single GPU")
 39 | {
 40 |   cuda_operations_.push_front(&cuop_add_);
 41 |   cuda_operations_.push_front(&cuop_convolution_);
 42 |   cuda_operations_.push_front(&cuop_median_);
 43 |   cuda_operations_.push_front(&cuop_register_);
 44 |   cuda_operations_.push_front(&cuop_resample_);
 45 |   cuda_operations_.push_front(&cuop_solve_);
 46 | }
 47 | 
 48 | bool OpticalFlow2D::Initialize(const DataSize3& data_size, DataConstancy data_constancy)
 49 | {
 50 |   data_constancy_ = data_constancy;
 51 |   dev_container_size_ = data_size;
 52 |   dev_container_size_.pitch = 0;
 53 | 
 54 |   initialized_ = InitCudaMemory() && InitCudaOperations();
 55 |   return initialized_;
 56 | }
 57 | 
 58 | bool OpticalFlow2D::InitCudaOperations()
 59 | {
 60 |   if (dev_container_size_.pitch == 0) {
 61 |     std::printf("Initialization failed. Device pitch is 0.\n");
 62 |     return false;
 63 |   }
 64 | 
 65 |   std::printf("Initialization of cuda operations...\n");
 66 | 
 67 |   OperationParameters op;
 68 |   op.PushValuePtr("container_size", &dev_container_size_);
 69 |   op.PushValuePtr("data_constancy", &data_constancy_);
 70 | 
 71 |   for (CudaOperationBase* cuop : cuda_operations_) {
 72 |     std::printf("%-18s: ", cuop->GetName());
 73 |     bool result = cuop->Initialize(&op);
 74 |     if (result) {
 75 |       std::printf("OK\n");
 76 |     } else {
 77 |       Destroy();
 78 |       return false;
 79 |     }
 80 |   }
 81 |   return true;
 82 | }
 83 | 
 84 | bool OpticalFlow2D::InitCudaMemory()
 85 | {
 86 |   std::printf("Allocating memory on the device...\n");
 87 |   /* Check available memory on the cuda device */
 88 |   size_t free_memory;
 89 |   size_t total_memory;
 90 | 
 91 |   CheckCudaError(cuMemGetInfo(&free_memory, &total_memory));
 92 | 
 93 |   std::printf("Available\t:\t%.0fMB / %.0fMB\n", 
 94 |               free_memory / static_cast<float>(1024 * 1024),
 95 |               total_memory / static_cast<float>(1024 * 1024));
 96 | 
 97 |   /* Estimate needed memory (approx.)*/
 98 |   int alignment = 512;
 99 |   CUdevice cu_device;
100 |   CheckCudaError(cuCtxGetDevice(&cu_device));
101 |   CheckCudaError(cuDeviceGetAttribute(&alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, cu_device));
102 |   alignment /= sizeof(float);
103 | 
104 |   size_t pitch = ((dev_container_size_.width % alignment == 0) ?
105 |                   (dev_container_size_.width) :
106 |                   (dev_container_size_.width + alignment - (dev_container_size_.width % alignment))) *
107 |                  sizeof(float);
108 | 
109 |   size_t needed_memory = pitch * dev_container_size_.height * dev_containers_count_;
110 |   std::printf("Needed (approx.):\t%.0fMB\n", needed_memory / static_cast<float>(1024 * 1024));
111 | 
112 |   size_t allocated_memory = 0;
113 |   if (needed_memory < free_memory) {
114 |     for (size_t i = 0; i < dev_containers_count_; ++i) {
115 |       CUdeviceptr dev_container;
116 |       bool error = CheckCudaError(cuMemAllocPitch(&dev_container,
117 |                                                   &pitch,
118 |                                                   dev_container_size_.width * sizeof(float),
119 |                                                   dev_container_size_.height * 1,
120 |                                                   sizeof(float)));
121 | 
122 |       /* Pitch should be the same for all containers */
123 |       if (error || (i != 0 && dev_container_size_.pitch != pitch)) {
124 |         std::printf("Error during device memory allocation.");
125 |         Destroy();
126 |         return false;
127 |       } else {
128 |         size_t container_size;
129 |         CheckCudaError(cuMemGetAddressRange(NULL, &container_size, dev_container));
130 |         allocated_memory += container_size;
131 | 
132 |         cuda_memory_ptrs_.push(dev_container);
133 |         dev_container_size_.pitch = pitch;
134 |       }
135 |     }
136 |     std::printf("Allocated\t:\t%.0fMB\n", allocated_memory / static_cast<float>(1024 * 1024));
137 |     return true;
138 |   } 
139 |   return false;
140 | }
141 | 
142 | void OpticalFlow2D::ComputeFlow(Data2D& frame_0, Data2D& frame_1, Data2D& flow_u, Data2D& flow_v, OperationParameters& params)
143 | {
144 |   if (!IsInitialized()) {
145 |     return;
146 |   }
147 | 
148 |   /* Optical flow algorithm's parameters */
149 |   size_t  warp_levels_count;
150 |   float   warp_scale_factor;
151 |   size_t  outer_iterations_count;
152 |   size_t  inner_iterations_count;
153 |   float   equation_alpha;
154 |   float   equation_smoothness;
155 |   float   equation_data;
156 |   size_t  median_radius;
157 |   float   gaussian_sigma;
158 | 
159 |   /* Lambda function for correct working of GET_PARAM_OR_RETURN */
160 |   GET_PARAM_OR_RETURN(params, size_t, warp_levels_count,      "warp_levels_count");
161 |   GET_PARAM_OR_RETURN(params, float,  warp_scale_factor,      "warp_scale_factor");
162 |   GET_PARAM_OR_RETURN(params, size_t, outer_iterations_count, "outer_iterations_count");
163 |   GET_PARAM_OR_RETURN(params, size_t, inner_iterations_count, "inner_iterations_count");
164 |   GET_PARAM_OR_RETURN(params, float,  equation_alpha,         "equation_alpha");
165 |   GET_PARAM_OR_RETURN(params, float,  equation_smoothness,    "equation_smoothness");
166 |   GET_PARAM_OR_RETURN(params, float,  equation_data,          "equation_data");
167 |   GET_PARAM_OR_RETURN(params, size_t, median_radius,          "median_radius");
168 |   GET_PARAM_OR_RETURN(params, float,  gaussian_sigma,         "gaussian_sigma");
169 | 
170 |   std::printf("\nStarting optical flow computation...\n");
171 | 
172 |   /* Create CUDA event for the time measure */
173 |   CUevent cu_event_start;
174 |   CUevent cu_event_stop;
175 | 
176 |   CheckCudaError(cuEventCreate(&cu_event_start, CU_EVENT_DEFAULT));
177 |   CheckCudaError(cuEventCreate(&cu_event_stop, CU_EVENT_DEFAULT));
178 | 
179 |   CheckCudaError(cuEventRecord(cu_event_start, NULL));
180 | 
181 |   /* Auxiliary variables */
182 |   float hx; // spacing in x-direction (current resol.) 
183 |   float hy; // spacing in y-direction (current resol.) 
184 |   DataSize3 original_data_size = { frame_0.Width(), frame_0.Height(), 0 };
185 |   DataSize3 current_data_size = { 0 };
186 |   DataSize3 prev_data_size = { 0 };
187 |   
188 |   size_t max_warp_level = GetMaxWarpLevel(original_data_size.width, original_data_size.height, warp_scale_factor);
189 |   int current_warp_level = std::min(warp_levels_count, max_warp_level) - 1;
190 | 
191 |   OperationParameters op;
192 | 
193 |   /* Copy input data to the device */
194 |   CUdeviceptr dev_frame_0 = cuda_memory_ptrs_.top();
195 |   cuda_memory_ptrs_.pop();
196 |   CUdeviceptr dev_frame_1 = cuda_memory_ptrs_.top();
197 |   cuda_memory_ptrs_.pop();
198 |   CUdeviceptr dev_frame_0_res = cuda_memory_ptrs_.top();
199 |   cuda_memory_ptrs_.pop();
200 |   CUdeviceptr dev_frame_1_res_br = cuda_memory_ptrs_.top();
201 |   cuda_memory_ptrs_.pop();
202 | 
203 |   CUdeviceptr dev_flow_u = cuda_memory_ptrs_.top();
204 |   cuda_memory_ptrs_.pop();
205 |   CUdeviceptr dev_flow_v = cuda_memory_ptrs_.top();
206 |   cuda_memory_ptrs_.pop();
207 | 
208 |   CUdeviceptr dev_flow_du = cuda_memory_ptrs_.top();
209 |   cuda_memory_ptrs_.pop();
210 |   CUdeviceptr dev_flow_dv = cuda_memory_ptrs_.top();
211 |   cuda_memory_ptrs_.pop();
212 | 
213 | 
214 |   CopyData2DtoDevice(frame_0, dev_frame_0, dev_container_size_.height, dev_container_size_.pitch);
215 |   CopyData2DtoDevice(frame_1, dev_frame_1, dev_container_size_.height, dev_container_size_.pitch);
216 | 
217 |   /* Gaussian blurring */
218 |   if (gaussian_sigma > 0.0) {
219 | 
220 |       CUdeviceptr dev_temp = cuda_memory_ptrs_.top();
221 |       cuda_memory_ptrs_.pop();
222 | 
223 |       op.Clear();
224 |       op.PushValuePtr("dev_input", &dev_frame_0);
225 |       op.PushValuePtr("dev_output", &dev_flow_u);
226 |       op.PushValuePtr("dev_temp", &dev_temp);
227 |       op.PushValuePtr("data_size", &original_data_size);
228 |       op.PushValuePtr("gaussian_sigma", &gaussian_sigma);
229 |       cuop_convolution_.Execute(op);
230 | 
231 |       //CheckCudaError(cuStreamSynchronize(NULL));
232 |       //std::this_thread::sleep_for(std::chrono::milliseconds(500));
233 | 
234 | 
235 |       op.Clear();
236 |       op.PushValuePtr("dev_input", &dev_frame_1);
237 |       op.PushValuePtr("dev_output", &dev_flow_v);
238 |       op.PushValuePtr("dev_temp", &dev_temp);
239 |       op.PushValuePtr("data_size", &original_data_size);
240 |       op.PushValuePtr("gaussian_sigma", &gaussian_sigma);
241 |       cuop_convolution_.Execute(op);
242 | 
243 |       std::swap(dev_frame_0, dev_flow_u);
244 |       std::swap(dev_frame_1, dev_flow_v);
245 | 
246 |       cuda_memory_ptrs_.push(dev_temp);
247 | 
248 |       // Test: Save smoothed image
249 |   /*    CopyData2DFromDevice(dev_frame_0, frame_0, dev_container_size_.height, dev_container_size_.pitch);
250 |       CopyData2DFromDevice(dev_frame_1, frame_1, dev_container_size_.height, dev_container_size_.pitch);
251 | 
252 |       std::string filename =
253 |       "-" + std::to_string(dev_container_size_.width) +
254 |       "-" + std::to_string(dev_container_size_.height) + ".raw";
255 | 
256 |       frame_0.WriteRAWToFileF32(std::string("./data/output/test1_smooth" + filename).c_str());
257 | 
258 |       frame_1.WriteRAWToFileF32(std::string("./data/output/test2_smooth" + filename).c_str());*/
259 | 
260 |   }
261 | 
262 | 
263 | 
264 |   /* ---------------------------------------------------- */
265 |   /* Main loop */
266 |   /* ---------------------------------------------------- */
267 |   while (current_warp_level >= 0) {
268 |     float scale = std::pow(warp_scale_factor, static_cast<float>(current_warp_level));
269 |     current_data_size.width = static_cast<size_t>(std::ceil(original_data_size.width * scale));
270 |     current_data_size.height = static_cast<size_t>(std::ceil(original_data_size.height * scale));
271 |     hx = original_data_size.width / static_cast<float>(current_data_size.width);
272 |     hy = original_data_size.height / static_cast<float>(current_data_size.height);
273 | 
274 |     if (!this->silent)
275 |         std::printf("Solve level %2d (%4d x%4d) \n", current_warp_level, current_data_size.width, current_data_size.height);
276 | 
277 | 
278 |     /* Data resampling */
279 |     {
280 |       if (current_warp_level == 0) {
281 |         std::swap(dev_frame_0, dev_frame_0_res);
282 |         std::swap(dev_frame_1, dev_frame_1_res_br);
283 |       } else {
284 |         CUdeviceptr dev_temp = cuda_memory_ptrs_.top();
285 |         cuda_memory_ptrs_.pop();
286 | 
287 |         op.Clear();
288 |         op.PushValuePtr("dev_input",     &dev_frame_0);
289 |         op.PushValuePtr("dev_output",    &dev_frame_0_res);
290 |         op.PushValuePtr("dev_temp",      &dev_temp);
291 |         op.PushValuePtr("data_size",     &original_data_size);
292 |         op.PushValuePtr("resample_size", &current_data_size);
293 |         cuop_resample_.Execute(op);
294 | 
295 |         op.Clear();
296 |         op.PushValuePtr("dev_input",     &dev_frame_1);
297 |         op.PushValuePtr("dev_output",    &dev_frame_1_res_br);
298 |         op.PushValuePtr("dev_temp",      &dev_temp);
299 |         op.PushValuePtr("data_size",     &original_data_size);
300 |         op.PushValuePtr("resample_size", &current_data_size);
301 |         cuop_resample_.Execute(op);
302 |   
303 |         cuda_memory_ptrs_.push(dev_temp);
304 |       }
305 |     }
306 | 
307 |     /* Flow field resampling */
308 |     {
309 |       if (prev_data_size.width == 0) {
310 |         CheckCudaError(cuMemsetD2D8(dev_flow_u, dev_container_size_.pitch, 0, dev_container_size_.width * sizeof(float), 
311 |                                                 dev_container_size_.height * 1));
312 |         CheckCudaError(cuMemsetD2D8(dev_flow_v, dev_container_size_.pitch, 0, dev_container_size_.width  * sizeof(float), 
313 |                                                 dev_container_size_.height * 1));
314 |      
315 |       } else {
316 |         CUdeviceptr dev_temp = cuda_memory_ptrs_.top();
317 |         cuda_memory_ptrs_.pop();
318 |   
319 |         op.Clear();
320 |         op.PushValuePtr("dev_input",     &dev_flow_u);
321 |         op.PushValuePtr("dev_output",    &dev_flow_du);
322 |         op.PushValuePtr("dev_temp",      &dev_temp);
323 |         op.PushValuePtr("data_size",     &prev_data_size);
324 |         op.PushValuePtr("resample_size", &current_data_size);
325 |         cuop_resample_.Execute(op);
326 |   
327 |         op.Clear();
328 |         op.PushValuePtr("dev_input",     &dev_flow_v);
329 |         op.PushValuePtr("dev_output",    &dev_flow_dv);
330 |         op.PushValuePtr("dev_temp",      &dev_temp);
331 |         op.PushValuePtr("data_size",     &prev_data_size);
332 |         op.PushValuePtr("resample_size", &current_data_size);
333 |         cuop_resample_.Execute(op);
334 |  
335 | 
336 |         std::swap(dev_flow_u, dev_flow_du);
337 |         std::swap(dev_flow_v, dev_flow_dv);
338 |  
339 |         cuda_memory_ptrs_.push(dev_temp);
340 |       }
341 |     }
342 | 
343 |     /* Backward registration */
344 |     {
345 |       CUdeviceptr dev_temp = cuda_memory_ptrs_.top();
346 |       cuda_memory_ptrs_.pop();
347 | 
348 |       op.Clear();
349 |       op.PushValuePtr("dev_frame_0", &dev_frame_0_res);
350 |       op.PushValuePtr("dev_frame_1", &dev_frame_1_res_br);
351 |       op.PushValuePtr("dev_flow_u",  &dev_flow_u);
352 |       op.PushValuePtr("dev_flow_v",  &dev_flow_v);
353 |       op.PushValuePtr("dev_output",  &dev_temp);
354 |       op.PushValuePtr("data_size",   &current_data_size);
355 |       op.PushValuePtr("hx",          &hx);
356 |       op.PushValuePtr("hy",          &hy);
357 | 
358 |       cuop_register_.Execute(op);
359 | 
360 |       std::swap(dev_frame_1_res_br, dev_temp);
361 | 
362 |       cuda_memory_ptrs_.push(dev_temp);
363 |     }
364 | 
365 |     /* Difference problem solver */
366 |     {
367 |       CUdeviceptr dev_phi = cuda_memory_ptrs_.top();
368 |       cuda_memory_ptrs_.pop();
369 |       CUdeviceptr dev_ksi = cuda_memory_ptrs_.top();
370 |       cuda_memory_ptrs_.pop();
371 |       CUdeviceptr dev_temp_du = cuda_memory_ptrs_.top();
372 |       cuda_memory_ptrs_.pop();
373 |       CUdeviceptr dev_temp_dv = cuda_memory_ptrs_.top();
374 |       cuda_memory_ptrs_.pop();
375 | 
376 |       op.Clear();
377 |       op.PushValuePtr("dev_frame_0", &dev_frame_0_res);
378 |       op.PushValuePtr("dev_frame_1", &dev_frame_1_res_br);
379 |       op.PushValuePtr("dev_flow_u",  &dev_flow_u);
380 |       op.PushValuePtr("dev_flow_v",  &dev_flow_v);
381 |       op.PushValuePtr("dev_flow_du", &dev_flow_du);
382 |       op.PushValuePtr("dev_flow_dv", &dev_flow_dv);
383 |       op.PushValuePtr("dev_phi",     &dev_phi);
384 |       op.PushValuePtr("dev_ksi",     &dev_ksi);
385 |       op.PushValuePtr("dev_temp_du", &dev_temp_du);
386 |       op.PushValuePtr("dev_temp_dv", &dev_temp_dv);
387 | 
388 |       op.PushValuePtr("data_constancy",         &data_constancy_);
389 |       op.PushValuePtr("outer_iterations_count", &outer_iterations_count);
390 |       op.PushValuePtr("inner_iterations_count", &inner_iterations_count);
391 |       op.PushValuePtr("equation_alpha",         &equation_alpha);
392 |       op.PushValuePtr("equation_smoothness",    &equation_smoothness);
393 |       op.PushValuePtr("equation_data",          &equation_data);
394 |       op.PushValuePtr("data_size",              &current_data_size);
395 |       op.PushValuePtr("hx",                     &hx);
396 |       op.PushValuePtr("hy",                     &hy);
397 | 
398 | 
399 |       cuop_solve_.silent = silent;
400 |       cuop_solve_.Execute(op);
401 | 
402 |       cuda_memory_ptrs_.push(dev_phi);
403 |       cuda_memory_ptrs_.push(dev_ksi);
404 |       cuda_memory_ptrs_.push(dev_temp_du);
405 |       cuda_memory_ptrs_.push(dev_temp_dv);
406 |     }
407 | 
408 |     /* Add the solved flow increment to the global flow */
409 |     {
410 |       op.Clear();
411 |       op.PushValuePtr("operand_0", &dev_flow_u);
412 |       op.PushValuePtr("operand_1", &dev_flow_du);
413 |       op.PushValuePtr("data_size", &current_data_size);
414 |       cuop_add_.Execute(op);
415 | 
416 |       op.Clear();
417 |       op.PushValuePtr("operand_0", &dev_flow_v);
418 |       op.PushValuePtr("operand_1", &dev_flow_dv);
419 |       op.PushValuePtr("data_size", &current_data_size);
420 |       cuop_add_.Execute(op);
421 | 
422 |     }
423 | 
424 |     prev_data_size = current_data_size;
425 |     --current_warp_level;
426 | 
427 |     /* Flow field median filtering */
428 |     {
429 |       CUdeviceptr dev_temp = cuda_memory_ptrs_.top();
430 |       cuda_memory_ptrs_.pop();
431 | 
432 |       op.Clear();
433 |       op.PushValuePtr("dev_input",  &dev_flow_u);
434 |       op.PushValuePtr("dev_output", &dev_temp);
435 |       op.PushValuePtr("data_size",  &current_data_size);
436 |       op.PushValuePtr("radius",     &median_radius);
437 |       cuop_median_.Execute(op);
438 |       std::swap(dev_flow_u, dev_temp);
439 | 
440 |       op.Clear();
441 |       op.PushValuePtr("dev_input",  &dev_flow_v);
442 |       op.PushValuePtr("dev_output", &dev_temp);
443 |       op.PushValuePtr("data_size",  &current_data_size);
444 |       op.PushValuePtr("radius",     &median_radius);
445 |       cuop_median_.Execute(op);
446 |       std::swap(dev_flow_v, dev_temp);
447 | 
448 |       cuda_memory_ptrs_.push(dev_temp);
449 |     }
450 | 
451 | 
452 | 
453 |     /* Get data for visualization */
454 |     //{
455 |     //  Visualization& visualization = Visualization::GetInstance();
456 | 
457 |     //  if (visualization.IsInitialized()) {
458 |     //    CUdeviceptr dev_temp_u = cuda_memory_ptrs_.top();
459 |     //    cuda_memory_ptrs_.pop();
460 |     //    CUdeviceptr dev_temp_v = cuda_memory_ptrs_.top();
461 |     //    cuda_memory_ptrs_.pop();
462 |     //    CUdeviceptr dev_temp_w = cuda_memory_ptrs_.top();
463 |     //    cuda_memory_ptrs_.pop();
464 |     //    CUdeviceptr dev_temp = cuda_memory_ptrs_.top();
465 |     //    cuda_memory_ptrs_.pop();
466 | 
467 |     //    /* Resample the flow field to original size */
468 |     //    op.Clear();
469 |     //    op.PushValuePtr("dev_input",     &dev_flow_u);
470 |     //    op.PushValuePtr("dev_output",    &dev_temp_u);
471 |     //    op.PushValuePtr("dev_temp",      &dev_temp);
472 |     //    op.PushValuePtr("data_size",     &current_data_size);
473 |     //    op.PushValuePtr("resample_size", &original_data_size);
474 |     //    cuop_resample_.Execute(op);
475 | 
476 |     //    op.Clear();
477 |     //    op.PushValuePtr("dev_input",     &dev_flow_v);
478 |     //    op.PushValuePtr("dev_output",    &dev_temp_v);
479 |     //    op.PushValuePtr("dev_temp",      &dev_temp);
480 |     //    op.PushValuePtr("data_size",     &current_data_size);
481 |     //    op.PushValuePtr("resample_size", &original_data_size);
482 |     //    cuop_resample_.Execute(op);
483 | 
484 | 
485 |     //    /* Read output data back from the device */
486 |     //    CopyData2DFromDevice(dev_temp_u, flow_u, dev_container_size_.height, dev_container_size_.pitch);
487 |     //    CopyData2DFromDevice(dev_temp_v, flow_v, dev_container_size_.height, dev_container_size_.pitch);
488 |   
489 | 
490 |     //    /* Update 3D texture */
491 |     //    visualization.Load3DFlowTextureFromData3DUVW(flow_u, flow_v, NULL);
492 | 
493 |     //    cuda_memory_ptrs_.push(dev_temp);
494 |     //    cuda_memory_ptrs_.push(dev_temp_u);
495 |     //    cuda_memory_ptrs_.push(dev_temp_v);
496 |     //    cuda_memory_ptrs_.push(dev_temp_w);
497 | 
498 |     //    visualization.DoNextStep();
499 |     //  }
500 |     //}
501 | 
502 |   }
503 |   /* ---------------------------------------------------- */
504 |   /* Main loop END */
505 |   /* ---------------------------------------------------- */
506 | 
507 |   /* DEBUG Apply computed flow to the input data */
508 |   if (false) {
509 |  /*   CopyData2DtoDevice(frame_0, dev_frame_0, dev_container_size_.height, dev_container_size_.pitch);
510 |     CopyData2DtoDevice(frame_1, dev_frame_1, dev_container_size_.height, dev_container_size_.pitch);
511 | 
512 |     CheckCudaError(cuStreamSynchronize(NULL));
513 | 
514 |     CUdeviceptr dev_temp = cuda_memory_ptrs_.top();
515 |     cuda_memory_ptrs_.pop();
516 | 
517 |     hx = 1.f;
518 |     hy = 1.f;
519 | 
520 |     op.Clear();
521 |     op.PushValuePtr("dev_frame_0", &dev_frame_0);
522 |     op.PushValuePtr("dev_frame_1", &dev_frame_1);
523 |     op.PushValuePtr("dev_flow_u", &dev_flow_u);
524 |     op.PushValuePtr("dev_flow_v", &dev_flow_v);
525 |     op.PushValuePtr("dev_output", &dev_temp);
526 |     op.PushValuePtr("data_size", &original_data_size);
527 |     op.PushValuePtr("hx", &hx);
528 |     op.PushValuePtr("hy", &hy);
529 | 
530 | 
531 |     cuop_register_.Execute(op);
532 |     CheckCudaError(cuStreamSynchronize(NULL));
533 |     CopyData2DFromDevice(dev_temp, flow_u, dev_container_size_.height, dev_container_size_.pitch);
534 |     CheckCudaError(cuStreamSynchronize(NULL));
535 | 
536 |     flow_u.WriteRAWToFileU8("./data/output/registrated-180-180-151.raw");
537 |     flow_u.WriteRAWToFileF32("./data/output/registrated-180-180-151-f.raw");
538 | 
539 | 
540 |     cuda_memory_ptrs_.push(dev_temp);*/
541 |   }
542 | 
543 |   /* Read output data back from the device */
544 |   CopyData2DFromDevice(dev_flow_u, flow_u, dev_container_size_.height, dev_container_size_.pitch);
545 |   CopyData2DFromDevice(dev_flow_v, flow_v, dev_container_size_.height, dev_container_size_.pitch);
546 | 
547 |   /* Estimate GPU computation time */
548 |   CheckCudaError(cuEventRecord(cu_event_stop, NULL));
549 |   CheckCudaError(cuEventSynchronize(cu_event_stop));
550 | 
551 |   float elapsed_time;
552 |   CheckCudaError(cuEventElapsedTime(&elapsed_time, cu_event_start, cu_event_stop));
553 |   
554 |   std::printf("Total GPU computation time: % 4.4fs\n", elapsed_time / 1000.);
555 | 
556 |   CheckCudaError(cuEventDestroy(cu_event_start));
557 |   CheckCudaError(cuEventDestroy(cu_event_stop));
558 | 
559 |   /* Return all memory pointers to stack */
560 |   cuda_memory_ptrs_.push(dev_frame_0);
561 |   cuda_memory_ptrs_.push(dev_frame_1);
562 |   cuda_memory_ptrs_.push(dev_frame_0_res);
563 |   cuda_memory_ptrs_.push(dev_frame_1_res_br);
564 |   cuda_memory_ptrs_.push(dev_flow_u);
565 |   cuda_memory_ptrs_.push(dev_flow_v);
566 |   cuda_memory_ptrs_.push(dev_flow_du);
567 |   cuda_memory_ptrs_.push(dev_flow_dv);
568 | 
569 | }
570 | 
571 | void OpticalFlow2D::Destroy()
572 | {
573 |   for (CudaOperationBase* cuop : cuda_operations_) {
574 |     cuop->Destroy();
575 |   }
576 | 
577 |   size_t freed_containers_count = 0;
578 |   while (!cuda_memory_ptrs_.empty()) {
579 |     CUdeviceptr dev_container = cuda_memory_ptrs_.top();
580 |     CheckCudaError(cuMemFree(dev_container));
581 |     
582 |     ++freed_containers_count;
583 |     cuda_memory_ptrs_.pop();
584 |   }
585 |   if (freed_containers_count && freed_containers_count != dev_containers_count_) {
586 |     std::printf("Warning. Not all device memory allocations were freed.\n");
587 |   }
588 | 
589 |   initialized_ = false;
590 | }
591 | 
592 | OpticalFlow2D::~OpticalFlow2D()
593 | {
594 |   Destroy();
595 | }


--------------------------------------------------------------------------------
/src/optical_flow/optical_flow_2d.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #ifndef GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_2D_H_
23 | #define GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_2D_H_
24 | 
25 | #include <forward_list>
26 | #include <stack>
27 | 
28 | #include <cuda.h>
29 | 
30 | #include "src/cuda_operations/cuda_operation_base.h"
31 | #include "src/cuda_operations/2d/cuda_operation_add_2d.h"
32 | #include "src/cuda_operations/2d/cuda_operation_median_2d.h"
33 | #include "src/cuda_operations/2d/cuda_operation_convolution_2d.h"
34 | #include "src/cuda_operations/2d/cuda_operation_registration_2d.h"
35 | #include "src/cuda_operations/2d/cuda_operation_resample_2d.h"
36 | #include "src/cuda_operations/2d/cuda_operation_solve_2d.h"
37 | 
38 | #include "src/data_types/operation_parameters.h"
39 | #include "src/data_types/data2d.h"
40 | 
41 | #include "src/optical_flow/optical_flow_base_2d.h"
42 | 
43 | class OpticalFlow2D : public OpticalFlowBase2D{
44 | private:
45 |   const size_t dev_containers_count_ = 12;
46 |   DataSize3 dev_container_size_;
47 | 
48 |   std::forward_list<CudaOperationBase*> cuda_operations_;
49 |   std::stack<CUdeviceptr> cuda_memory_ptrs_;
50 | 
51 |   CudaOperationAdd2D cuop_add_;
52 |   CudaOperationConvolution2D cuop_convolution_;
53 |   CudaOperationMedian2D cuop_median_;
54 |   CudaOperationRegistration2D cuop_register_;
55 |   CudaOperationResample2D cuop_resample_;
56 |   CudaOperationSolve2D cuop_solve_;
57 | 
58 |   bool InitCudaOperations();
59 |   bool InitCudaMemory();
60 | 
61 | public:
62 |     OpticalFlow2D();
63 | 
64 |     bool Initialize(const DataSize3& data_size, DataConstancy data_constancy = DataConstancy::Grey) override;
65 |   void ComputeFlow(Data2D& frame_0, Data2D& frame_1, Data2D& flow_u, Data2D& flow_v, OperationParameters& params) override;
66 |   void Destroy() override;
67 | 
68 |   bool silent = false;
69 | 
70 |   ~OpticalFlow2D() override;
71 | };
72 | 
73 | 
74 | #endif // !GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_2D_H_
75 | 


--------------------------------------------------------------------------------
/src/optical_flow/optical_flow_base_2d.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #include "src/optical_flow/optical_flow_base_2d.h"
23 | 
24 | #include <cmath>
25 | 
26 | OpticalFlowBase2D::OpticalFlowBase2D(const char* name)
27 |   : name_(name)
28 | {
29 | }
30 | 
31 | const char* OpticalFlowBase2D::GetName() const
32 | {
33 |   return name_;
34 | }
35 | 
36 | size_t OpticalFlowBase2D::GetMaxWarpLevel(size_t width, size_t height, float scale_factor) const
37 | {
38 |     /* Compute maximum number of warping levels for given image size and warping reduction factor */
39 |     size_t r_width = 1;
40 |     size_t r_height = 1;
41 |     size_t level_counter = 1;
42 | 
43 |     while (scale_factor < 1.f) {
44 |         float scale = std::pow(scale_factor, static_cast<float>(level_counter));
45 |         r_width = static_cast<size_t>(std::ceil(width * scale));
46 |         r_height = static_cast<size_t>(std::ceil(height * scale));
47 | 
48 |         if (r_width < 4 || r_height < 4 ) {
49 |             break;
50 |         }
51 |         ++level_counter;
52 |     }
53 | 
54 |     if (r_width == 1 || r_height == 1) {
55 |         --level_counter;
56 |     }
57 | 
58 |     return level_counter;
59 | }
60 | 
61 | bool OpticalFlowBase2D::IsInitialized() const
62 | {
63 |   if (!initialized_) {
64 |     std::printf("Error: '%s' was not initialized.\n", name_);
65 |   }
66 |   return initialized_;
67 | }
68 | 
69 | void OpticalFlowBase2D::ComputeFlow(Data2D& frame_0, Data2D& frame_1, Data2D& flow_u, Data2D& flow_v, OperationParameters& params)
70 | {
71 |   std::printf("Warning: '%s' ComputeFlow() was not defined.\n", name_);
72 | 
73 | }
74 | 
75 | void OpticalFlowBase2D::Destroy()
76 | {
77 |   initialized_ = false;
78 | }
79 | 
80 | OpticalFlowBase2D::~OpticalFlowBase2D()
81 | {
82 |   if (initialized_) {
83 |     Destroy();
84 |   }
85 | }
86 | 


--------------------------------------------------------------------------------
/src/optical_flow/optical_flow_base_2d.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #ifndef GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_BASE_2D_H_
23 | #define GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_BASE_2D_H_
24 | 
25 | #include "src/data_types/data2d.h"
26 | #include "src/data_types/data_structs.h"
27 | #include "src/data_types/operation_parameters.h"
28 | 
29 | class OpticalFlowBase2D {
30 | private:
31 |   const char* name_ = nullptr;
32 | 
33 | protected:
34 |   bool initialized_ = false;
35 | 
36 |   DataConstancy data_constancy_;
37 | 
38 |   OpticalFlowBase2D(const char* name);
39 | 
40 |   size_t GetMaxWarpLevel(size_t width, size_t height, float scale_factor) const;
41 | 
42 |   bool IsInitialized() const;
43 | 
44 | public:
45 |   const char* GetName() const;
46 | 
47 |   virtual bool Initialize(const DataSize3& data_size, DataConstancy data_constancy = DataConstancy::Grey) = 0;
48 |   virtual void ComputeFlow(Data2D& frame_0, Data2D& frame_1, Data2D& flow_u, Data2D& flow_v, OperationParameters& params);
49 |   virtual void Destroy();
50 | 
51 |   virtual ~OpticalFlowBase2D();
52 | };
53 | 
54 | 
55 | 
56 | #endif // !GPUFLOW3D_OPTICAL_FLOW_OPTICAL_FLOW_BASE_2D_H_
57 | 


--------------------------------------------------------------------------------
/src/utils/common_utils.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #include "common_utils.h"
23 | 
24 | #include <cstring>
25 | 
26 | #ifdef _WIN32
27 | #include <Windows.h>
28 | #endif
29 | #ifdef __gnu_linux__
30 | #include <unistd.h>
31 | #endif
32 | 
33 | namespace Utils {
34 | 
35 |   void GetExecutablePath(char* path, size_t size)
36 |   {
37 | #ifdef _WIN32
38 |     GetModuleFileName(NULL, path, size);
39 |     char* last_char = std::strrchr(path, '\\');
40 |     if (last_char) {
41 |       *last_char = '\0';
42 |     }
43 | #endif
44 | #ifdef __gnu_linux__
45 |     ssize_t link_size = readlink("/proc/self/exe", path, size);
46 |     path[link_size] = '\0';
47 |     char* last_char = std::strrchr(path, '/');
48 |     if (last_char) {
49 |       *last_char = '\0';
50 |     }
51 | #endif
52 |   }
53 | 
54 |   void PrintProgressBar(float complete)
55 |   {
56 |     const size_t width = 40;
57 |     char buffer[80];
58 |     char* buffer_ptr = buffer;
59 | 
60 |     *(buffer_ptr++) = '[';
61 |     for (size_t i = 0; i < width; ++i) {
62 |       *(buffer_ptr++) = (i / static_cast<float>(width) < complete) ? '=' : ' ';
63 |     }
64 |     *(buffer_ptr++) = ']';
65 |     *(buffer_ptr++) = '\0';
66 | 
67 |     printf("\r%s", buffer);
68 |   }
69 | 
70 | } // namespace Utils


--------------------------------------------------------------------------------
/src/utils/common_utils.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #ifndef GPUFLOW3D_UTILS_COMMON_UTILS_H_
23 | #define GPUFLOW3D_UTILS_COMMON_UTILS_H_
24 | 
25 | #include <cstdio>
26 | 
27 | namespace Utils {
28 | 
29 |   void GetExecutablePath(char* path, size_t size);
30 |   void PrintProgressBar(float complete);
31 | 
32 | } // namespace Utils
33 | 
34 | #define GET_PARAM_OR_RETURN(P, T, V, N)                                          \
35 |   do {                                                                           \
36 |     void* v_ptr = (P).GetValuePtr((N));                                          \
37 |     if (v_ptr) {                                                                 \
38 |       (V) = *(static_cast<T*>(v_ptr));                                           \
39 |     } else {                                                                     \
40 |       std::printf("Operation: '%s'. Missing parameter '%s'.\n", GetName(), (N)); \
41 |       return;                                                                    \
42 |     }                                                                            \
43 |   } while (0)
44 | 
45 | #define GET_PARAM_OR_RETURN_VALUE(P, T, V, N, R)                                 \
46 |   do {                                                                           \
47 |     void* v_ptr = (P).GetValuePtr((N));                                          \
48 |     if (v_ptr) {                                                                 \
49 |       (V) = *(static_cast<T*>(v_ptr));                                           \
50 |     } else {                                                                     \
51 |       std::printf("Operation: '%s'. Missing parameter '%s'.\n", GetName(), (N)); \
52 |       return (R);                                                                \
53 |     }                                                                            \
54 |   } while (0)
55 | 
56 | #define GET_PARAM_PTR_OR_RETURN(P, T, PTR, N)                                          \
57 |   do {                                                                           \
58 |     void* v_ptr = (P).GetValuePtr((N));                                          \
59 |     if (v_ptr) {                                                                 \
60 |       (PTR) = (static_cast<T*>(v_ptr));                                           \
61 |     } else {                                                                     \
62 |       std::printf("Operation: '%s'. Missing parameter '%s'.\n", GetName(), (N)); \
63 |       return;                                                                    \
64 |     }                                                                            \
65 |   } while (0)
66 | 
67 | #endif // !GPUFLOW3D_UTILS_COMMON_UTILS_H_
68 | 


--------------------------------------------------------------------------------
/src/utils/cuda_utils.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 | * @file    2D Optical flow using NVIDIA CUDA
  3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
  4 | *
  5 | * @date    2015-2018
  6 | * @version 0.5.0
  7 | *
  8 | *
  9 | * @section LICENSE
 10 | *
 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
 13 | *
 14 | * The current implemetation contains the following licenses:
 15 | *
 16 | * 1. TinyXml package:
 17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
 18 | *      See src/utils/tinyxml.h for details.
 19 | *
 20 | */
 21 | 
 22 | #include "cuda_utils.h"
 23 | 
 24 | #include <cstring>
 25 | 
 26 | bool InitCudaContextWithFirstAvailableDevice(CUcontext* cu_context)
 27 | {
 28 |   if (CheckCudaError(cuInit(0))) {
 29 |     return false;
 30 |   }
 31 | 
 32 |   int cu_device_count;
 33 |   if (CheckCudaError(cuDeviceGetCount(&cu_device_count))) {
 34 |     return false;
 35 |   }
 36 |   CUdevice cu_device;
 37 | 
 38 |   if (cu_device_count == 0) {
 39 |     printf("There are no cuda capable devices.");
 40 |     return false;
 41 |   }
 42 | 
 43 |   if (CheckCudaError(cuDeviceGet(&cu_device, 0))) {
 44 |     return false;
 45 |   }
 46 | 
 47 |   char cu_device_name[64];
 48 |   if (CheckCudaError(cuDeviceGetName(cu_device_name, 64, cu_device))) {
 49 |     return false;
 50 |   }
 51 | 
 52 |   int launch_timeout;
 53 |   CheckCudaError(cuDeviceGetAttribute(&launch_timeout, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, cu_device));
 54 | 
 55 |   printf("CUDA Device: %s. Launch timeout: %s\n", cu_device_name, (launch_timeout ? "Yes" : "No"));
 56 | 
 57 |   if (CheckCudaError(cuCtxCreate(cu_context, 0, cu_device))) {
 58 |     return false;
 59 |   }
 60 | 
 61 |   return true;
 62 | }
 63 | 
 64 | 
 65 | 
 66 | void CopyData2DtoDevice(Data2D& data2d, CUdeviceptr device_ptr, size_t device_height, size_t device_pitch)
 67 | {
 68 |     CUDA_MEMCPY2D cu_copy2d;
 69 |     std::memset(&cu_copy2d, 0, sizeof(CUDA_MEMCPY2D));
 70 | 
 71 |     cu_copy2d.srcMemoryType = CU_MEMORYTYPE_HOST;
 72 |     cu_copy2d.srcHost = data2d.DataPtr();
 73 |     cu_copy2d.srcPitch = data2d.Width() * sizeof(float);
 74 |          
 75 |     cu_copy2d.dstMemoryType = CU_MEMORYTYPE_DEVICE;
 76 |     cu_copy2d.dstDevice = device_ptr;
 77 |     cu_copy2d.dstPitch = device_pitch;
 78 |           
 79 |     cu_copy2d.WidthInBytes = data2d.Width() * sizeof(float);
 80 |     cu_copy2d.Height = data2d.Height();
 81 | 
 82 |     CheckCudaError(cuMemcpy2D(&cu_copy2d));
 83 | }
 84 | 
 85 | 
 86 | void CopyData2DFromDevice(CUdeviceptr device_ptr, Data2D& data2d, size_t device_height, size_t device_pitch)
 87 | {
 88 |     CUDA_MEMCPY2D cu_copy2d;
 89 |     std::memset(&cu_copy2d, 0, sizeof(CUDA_MEMCPY2D));
 90 | 
 91 |     cu_copy2d.srcMemoryType = CU_MEMORYTYPE_DEVICE;
 92 |     cu_copy2d.srcDevice = device_ptr;
 93 |     cu_copy2d.srcPitch = device_pitch;
 94 | 
 95 |          
 96 |     cu_copy2d.dstMemoryType = CU_MEMORYTYPE_HOST;
 97 |     cu_copy2d.dstHost = data2d.DataPtr();
 98 |     cu_copy2d.dstPitch = data2d.Width() * sizeof(float);
 99 | 
100 |        
101 |     cu_copy2d.WidthInBytes = data2d.Width() * sizeof(float);
102 |     cu_copy2d.Height = data2d.Height();
103 | 
104 |     CheckCudaError(cuMemcpy2D(&cu_copy2d));
105 | }
106 | 


--------------------------------------------------------------------------------
/src/utils/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #ifndef GPUFLOW3D_UTILS_CUDA_UTILS_H_
23 | #define GPUFLOW3D_UTILS_CUDA_UTILS_H_
24 | 
25 | #include <cstdio>
26 | 
27 | #include <cuda.h>
28 | 
29 | #include "src/data_types/data2d.h"
30 | 
31 | #define CheckCudaError(error) __CheckCudaError (error, __FILE__, __LINE__)
32 | 
33 | inline bool __CheckCudaError(CUresult error, const char* file, const int line)
34 | {
35 |   if (error != CUDA_SUCCESS)
36 |   {
37 |     const char *error_name = nullptr;
38 |     const char *error_string = nullptr;
39 | 
40 | #define NEW_CUDA_DRIVER
41 | #ifdef NEW_CUDA_DRIVER
42 |     cuGetErrorName(error, &error_name);
43 |     cuGetErrorString(error, &error_string);
44 | #endif
45 | 
46 |     std::fprintf(stderr, "Driver API error = %04d %s\n%s\n from file <%s>, line %i.\n",
47 |       error, error_name, error_string, file, line);
48 |     return true;
49 |   }
50 |   return false;
51 | }
52 | 
53 | bool InitCudaContextWithFirstAvailableDevice(CUcontext* cu_context);
54 | 
55 | void CopyData2DtoDevice(Data2D& data2d, CUdeviceptr device_ptr, size_t device_height, size_t device_pitch);
56 | void CopyData2DFromDevice(CUdeviceptr device_ptr, Data2D& data2d, size_t device_height, size_t device_pitch);
57 | 
58 | 
59 | 
60 | #endif // !GPUFLOW3D_UTILS_CUDA_UTILS_H_
61 | 


--------------------------------------------------------------------------------
/src/utils/io_utils.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 | * @file    2D Optical flow using NVIDIA CUDA
  3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
  4 | *
  5 | * @date    2015-2018
  6 | * @version 0.5.0
  7 | *
  8 | *
  9 | * @section LICENSE
 10 | *
 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
 13 | *
 14 | * The current implemetation contains the following licenses:
 15 | *
 16 | * 1. TinyXml package:
 17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
 18 | *      See src/utils/tinyxml.h for details.
 19 | *
 20 | */
 21 | 
 22 | #include "io_utils.h"
 23 | 
 24 | #include <math.h>
 25 | #include <fstream>
 26 | #include <iostream>
 27 | 
 28 | #include "src/data_types/data2d.h"
 29 | 
 30 | using namespace IOUtils;
 31 | 
 32 | 
 33 | 
 34 | 
 35 | void IOUtils::WriteFlowToImageRGB(Data2D& u, Data2D& v, float flowMaxScale, string fileName)
 36 | {
 37 | 
 38 |     float maxLength = flowMaxScale;
 39 | 
 40 |     // Compute scaling factor
 41 |     float factor = 1.0 / maxLength;
 42 | 
 43 |     RGBColor rgb;
 44 | 
 45 |     fstream ofile(fileName.c_str(), ios::out | ios::binary);
 46 | 
 47 |     if (!ofile.is_open())
 48 |     {
 49 |         std::cerr << "Error: cannot save file " << std::endl;
 50 |         exit(255);
 51 |     }
 52 | 
 53 |     int nx = u.Width(); 
 54 |     int ny = u.Height();
 55 | 
 56 |     GRAY res;
 57 | 
 58 |     ofile<<"P6 \n";
 59 |     ofile<<nx <<" "<<ny<<" \n255\n";
 60 | 
 61 |     /* write image data */
 62 |     for (int i = 0; i < ny; i++) {
 63 |         for (int j = 0; j < nx; j++) {
 64 | 
 65 |             rgb =  ConvertToRGB(u.Data(j, i) * factor, v.Data(j, i) * factor);
 66 | 
 67 |             res = ConvertToGray(rgb.r);
 68 |             ofile.write((char *)&res, sizeof(GRAY));
 69 | 
 70 |             res = ConvertToGray(rgb.g);
 71 |             ofile.write((char *)&res, sizeof(GRAY));
 72 | 
 73 |             res = ConvertToGray(rgb.b);
 74 |             ofile.write((char *)&res, sizeof(GRAY));
 75 |         }
 76 |     }
 77 |     ofile.close();
 78 | 
 79 | }
 80 | 
 81 | void IOUtils::WriteMagnitudeToFileF32(Data2D& u, Data2D& v, string fileName)
 82 | {
 83 |     //char str[300];
 84 | 
 85 |     fstream ofile(fileName.c_str(), ios::out | ios::binary);
 86 | 
 87 |     if (!ofile.is_open())
 88 |     {
 89 |         std::cerr << "Error: cannot save file " << std::endl;
 90 |         exit(255);
 91 |     }
 92 | 
 93 |     int nx = u.Width();
 94 |     int ny = u.Height();
 95 | 
 96 | 
 97 |     float res;
 98 | 
 99 |     float * buf = new float[nx];
100 | 
101 |     for (int i = 0; i < ny; i++) {
102 |         for (int j = 0; j < nx; j++) {
103 | 
104 |             res = sqrt(u.Data(j, i)*u.Data(j, i) + v.Data(j, i)*v.Data(j, i));
105 | 
106 |             buf[j] = res;
107 |         }
108 |         ofile.write((char *)buf, nx * sizeof(float));
109 |     }
110 | 
111 |     delete[] buf;
112 | 
113 |     ofile.close();
114 | }
115 | 
116 | RGBColor::RGBColor()
117 | {
118 |     this->r = 0;
119 |     this->g = 0;
120 |     this->b = 0;
121 | }
122 | 
123 | RGBColor::RGBColor(int r, int g, int b)
124 | {
125 |     this->r = r;
126 |     this->g = g;
127 |     this->b = b;
128 | }
129 | 
130 | 
131 | /*****************************************************************************/
132 | /*                                                                           */
133 | /*                   Copyright 08/2006 by Dr. Andres Bruhn                   */
134 | /*     Faculty of Mathematics and Computer Science, Saarland University,     */
135 | /*                           Saarbruecken, Germany.                          */
136 | /*																			 */
137 | /*						   Modified by Alexey Ershov		                 */
138 | /*																			 */
139 | /*****************************************************************************/
140 | RGBColor IOUtils::ConvertToRGB(float x, float y)
141 | {
142 |     /********************************************************/
143 |     float Pi;          /* pi                                                   */
144 |     float amp;         /* amplitude (magnitude)                                */
145 |     float phi;         /* phase (angle)                                        */
146 |     float alpha, beta; /* weights for linear interpolation                     */
147 |     /********************************************************/
148 | 
149 |     RGBColor rgb;
150 | 
151 |  /*   if (isUnknownFlow(x, y)) {
152 |         x = 0.0;
153 |         y = 0.0;
154 |     }*/
155 | 
156 |     /* set pi */
157 |     Pi = 2.0 * acos(0.0);
158 | 
159 |     /* determine amplitude and phase (cut amp at 1) */
160 |     amp = sqrt(x * x + y * y);
161 |     if (amp > 1) amp = 1;
162 |     if (x == 0.0)
163 |     if (y >= 0.0) phi = 0.5 * Pi;
164 |     else phi = 1.5 * Pi;
165 |     else if (x > 0.0)
166 |     if (y >= 0.0) phi = atan(y/x);
167 |     else phi = 2.0 * Pi + atan(y/x);
168 |     else phi = Pi + atan(y/x);
169 | 
170 |     phi = phi / 2.0;
171 | 
172 |     // interpolation between red (0) and blue (0.25 * Pi)
173 |     if ((phi >= 0.0) && (phi < 0.125 * Pi)) {
174 |         beta  = phi / (0.125 * Pi);
175 |         alpha = 1.0 - beta;
176 |         rgb.r = (int)floor(amp * (alpha * 255.0 + beta * 255.0));
177 |         rgb.g = (int)floor(amp * (alpha *   0.0 + beta *   0.0));
178 |         rgb.b = (int)floor(amp * (alpha *   0.0 + beta * 255.0));
179 |     }
180 |     if ((phi >= 0.125 * Pi) && (phi < 0.25 * Pi)) {
181 |         beta  = (phi-0.125 * Pi) / (0.125 * Pi);
182 |         alpha = 1.0 - beta;
183 |         rgb.r = (int)floor(amp * (alpha * 255.0 + beta *  64.0));
184 |         rgb.g = (int)floor(amp * (alpha *   0.0 + beta *  64.0));
185 |         rgb.b = (int)floor(amp * (alpha * 255.0 + beta * 255.0));
186 |     }
187 |     // interpolation between blue (0.25 * Pi) and green (0.5 * Pi)
188 |     if ((phi >= 0.25 * Pi) && (phi < 0.375 * Pi)) {
189 |         beta  = (phi - 0.25 * Pi) / (0.125 * Pi);
190 |         alpha = 1.0 - beta;
191 |         rgb.r = (int)floor(amp * (alpha *  64.0 + beta *   0.0));
192 |         rgb.g = (int)floor(amp * (alpha *  64.0 + beta * 255.0));
193 |         rgb.b = (int)floor(amp * (alpha * 255.0 + beta * 255.0));
194 |     }
195 |     if ((phi >= 0.375 * Pi) && (phi < 0.5 * Pi)) {
196 |         beta  = (phi - 0.375 * Pi) / (0.125 * Pi);
197 |         alpha = 1.0 - beta;
198 |         rgb.r = (int)floor(amp * (alpha *   0.0 + beta *   0.0));
199 |         rgb.g = (int)floor(amp * (alpha * 255.0 + beta * 255.0));
200 |         rgb.b = (int)floor(amp * (alpha * 255.0 + beta *   0.0));
201 |     }
202 |     // interpolation between green (0.5 * Pi) and yellow (0.75 * Pi)
203 |     if ((phi >= 0.5 * Pi) && (phi < 0.75 * Pi)) {
204 |         beta  = (phi - 0.5 * Pi) / (0.25 * Pi);
205 |         alpha = 1.0 - beta;
206 |         rgb.r = (int)floor(amp * (alpha * 0.0   + beta * 255.0));
207 |         rgb.g = (int)floor(amp * (alpha * 255.0 + beta * 255.0));
208 |         rgb.b = (int)floor(amp * (alpha * 0.0   + beta * 0.0));
209 |     }
210 |     // interpolation between yellow (0.75 * Pi) and red (Pi)
211 |     if ((phi >= 0.75 * Pi) && (phi <= Pi)) {
212 |         beta  = (phi - 0.75 * Pi) / (0.25 * Pi);
213 |         alpha = 1.0 - beta;
214 |         rgb.r = (int)floor(amp * (alpha * 255.0 + beta * 255.0));
215 |         rgb.g = (int)floor(amp * (alpha * 255.0 + beta *   0.0));
216 |         rgb.b = (int)floor(amp * (alpha * 0.0   + beta *   0.0));
217 |     }
218 | 
219 |     /* check RGBColor range */
220 |     rgb.r = ConvertToByte(rgb.r);
221 |     rgb.g = ConvertToByte(rgb.g);
222 |     rgb.b = ConvertToByte(rgb.b);
223 | 
224 |     return rgb;
225 | }
226 | 
227 | 


--------------------------------------------------------------------------------
/src/utils/io_utils.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #ifndef GPUFLOW3D_UTILS_IO_UTILS_H_
23 | #define GPUFLOW3D_UTILS_IO_UTILS_H_
24 | 
25 | #include "src/data_types/data2d.h"
26 | 
27 | #include <fstream>
28 | #include <iostream>
29 | 
30 | 
31 | using namespace std;
32 | 
33 | namespace IOUtils {
34 | 
35 |     typedef unsigned char GRAY;
36 | 
37 |     struct RGBColor
38 |     {
39 |         int r;
40 |         int g;
41 |         int b;
42 | 
43 |         RGBColor();
44 | 
45 |         RGBColor(int r, int g, int b) ;
46 |     };
47 | 
48 | 
49 |     //------------------------------------------------------------------
50 |     // 2D Writing Routines
51 |     //------------------------------------------------------------------
52 |     void WriteFlowToImageRGB(Data2D& u, Data2D& v, float flowMaxScale, string fileName);
53 |     void WriteMagnitudeToFileF32(Data2D& u, Data2D& v, string fileName);
54 | 
55 |     // Data convertion routines
56 |     RGBColor ConvertToRGB(float x, float y);
57 | 
58 |     inline int ConvertToByte(int num)
59 |     {
60 |         return (num >=255) * 255 + ((num < 255) && (num > 0))* num;
61 |     }
62 | 
63 |     inline GRAY ConvertToGray(float number)
64 |     {
65 |         GRAY result;
66 | 
67 |         if (number < 0.0)
68 |             result = (GRAY)(0.0);
69 |         else if (number > 255.0)
70 |             result = (GRAY)(255.0);
71 |         else
72 |             result = (GRAY)(number);
73 | 
74 |         return result;
75 | 
76 |     }
77 | 
78 | }
79 | 
80 | #endif // !GPUFLOW3D_UTILS_IO_UTILS_H_
81 | 
82 | 


--------------------------------------------------------------------------------
/src/utils/settings.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 | * @file    2D Optical flow using NVIDIA CUDA
  3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
  4 | *
  5 | * @date    2015-2018
  6 | * @version 0.5.0
  7 | *
  8 | *
  9 | * @section LICENSE
 10 | *
 11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
 12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
 13 | *
 14 | * The current implemetation contains the following licenses:
 15 | *
 16 | * 1. TinyXml package:
 17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
 18 | *      See src/utils/tinyxml.h for details.
 19 | *
 20 | */
 21 | 
 22 | #include "settings.h"
 23 | 
 24 | #include <iostream>
 25 | #include <fstream>
 26 | #include <sstream>
 27 | 
 28 | using namespace OpticFlow;
 29 | 
 30 | 
 31 | 
 32 | 
 33 | void Settings::LoadSettingsManually()
 34 | {
 35 | 
 36 | 	this->sigma = 1.2;
 37 | 
 38 | 	// Solver settings
 39 | 	this->iterInner = 5;
 40 | 	this->iterOuter = 20;
 41 | 
 42 | 	this->alpha = 40;
 43 | 
 44 | 	this->levels = 1;
 45 | 	this->warpScale = 0.5;
 46 | 
 47 | 
 48 | 	this->e_smooth = 0.1;
 49 | 	this->e_data = 0.001;
 50 | 
 51 | }
 52 | 
 53 | int Settings::LoadSettings(string fileName)
 54 | {
 55 | 
 56 | 	string m_name;
 57 | 
 58 | 	TiXmlDocument doc(fileName.c_str());
 59 | 	
 60 | 	if (!doc.LoadFile()) {
 61 | 		cout<<"Cannot read settings file: "<<fileName<<endl;
 62 | 		return -1;
 63 | 	}
 64 | 
 65 | 	//Read file the whole settings file content
 66 | 
 67 | 	std::ifstream f(fileName.c_str());
 68 | 	std::stringstream buffer;
 69 | 	buffer << f.rdbuf();
 70 | 	content = buffer.str();
 71 | 
 72 | 
 73 | 	TiXmlHandle hDoc(&doc);
 74 | 	TiXmlElement* pElem;
 75 | 	TiXmlHandle hRoot(0);
 76 | 
 77 | 	
 78 | 	pElem=hDoc.FirstChildElement().Element();
 79 | 
 80 | 	if (!pElem) {
 81 | 		cout<<"Problem with parsing settings file: "<<fileName<<endl;
 82 | 		return -1;
 83 | 	}
 84 | 	m_name=pElem->Value();
 85 | 
 86 | 	// save this for later
 87 | 	hRoot=TiXmlHandle(pElem);
 88 | 
 89 |     int value;
 90 |     float dbValue;
 91 | 	
 92 | 
 93 | 	pElem=hRoot.FirstChild( "Input" ).FirstChild("Path").Element();
 94 | 	this->inputPath = pElem->Attribute("inputPath");
 95 | 	pElem=hRoot.FirstChild( "Output").FirstChild("Path").Element();
 96 | 	this->outputPath = pElem->Attribute("outputPath");
 97 | 
 98 | 	this->fileName1 = hRoot.FirstChild( "Input" ).FirstChild("Mode").FirstChild("Files").Element()->Attribute("file1");
 99 | 	this->fileName2 = hRoot.FirstChild( "Input" ).FirstChild("Mode").FirstChild("Files").Element()->Attribute("file2");
100 | 	
101 | 	hRoot.FirstChild( "Parameters" ).FirstChild("Method").Element()->QueryIntAttribute("key", &value);
102 | 	this->press_key = value;
103 | 
104 | 	hRoot.FirstChild( "Input" ).FirstChild("Mode").Element()->QueryIntAttribute("Nx", &value);
105 | 	this->width = value;
106 | 	
107 | 	hRoot.FirstChild( "Input" ).FirstChild("Mode").Element()->QueryIntAttribute("Ny", &value);
108 | 	this->height = value;
109 | 	
110 | 	hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Model").Element()->QueryFloatAttribute("sigma", &dbValue);
111 | 	this->sigma = dbValue;
112 | 
113 | 
114 | 	hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Iterations").Element()->QueryIntAttribute("inner", &value);
115 | 	this->iterInner = value;
116 | 	hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Iterations").Element()->QueryIntAttribute("outer", &value);
117 | 	this->iterOuter = value;
118 | 
119 | 	hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Warping").Element()->QueryIntAttribute("levels", &value);
120 | 	this->levels= value;
121 | 
122 | 	hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Warping").Element()->QueryFloatAttribute("scaling", &dbValue);
123 | 	this->warpScale= dbValue;
124 | 
125 | 	hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Warping").Element()->QueryIntAttribute("medianRadius", &value);
126 | 	this->medianRadius = value;
127 | 
128 | 
129 | 	hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Model").Element()->QueryFloatAttribute("alpha", &dbValue);
130 | 	this->alpha= dbValue;
131 | 
132 | 
133 | 
134 | 	hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Model").Element()->QueryFloatAttribute("e_smooth", &dbValue);
135 | 	this->e_smooth= dbValue;
136 | 	hRoot.FirstChild( "Parameters" ).FirstChild("Solver").FirstChild("Model").Element()->QueryFloatAttribute("e_data", &dbValue);
137 | 	this->e_data= dbValue;
138 | 
139 | 
140 | 
141 | 	return 0;
142 | 
143 | 
144 | }
145 | 
146 | 
147 | 
148 | 
149 | 


--------------------------------------------------------------------------------
/src/utils/settings.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | * @file    2D Optical flow using NVIDIA CUDA
 3 | * @author  Institute for Photon Science and Synchrotron Radiation, Karlsruhe Institute of Technology
 4 | *
 5 | * @date    2015-2018
 6 | * @version 0.5.0
 7 | *
 8 | *
 9 | * @section LICENSE
10 | *
11 | * This program is copyrighted by the author and Institute for Photon Science and Synchrotron Radiation,
12 | * Karlsruhe Institute of Technology, Karlsruhe, Germany;
13 | *
14 | * The current implemetation contains the following licenses:
15 | *
16 | * 1. TinyXml package:
17 | *      Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com). <www.sourceforge.net/projects/tinyxml>.
18 | *      See src/utils/tinyxml.h for details.
19 | *
20 | */
21 | 
22 | #ifndef GPUFLOW3D_OPTICAL_FLOW_SETTINGS_H_
23 | #define GPUFLOW3D_OPTICAL_FLOW_SETTINGS_H_
24 | 
25 | #include <stdlib.h>
26 | #include <string>
27 | #include <map>
28 | #include <vector>
29 | 
30 | #include "tinyxml.h"
31 | 
32 | using namespace std;
33 | 
34 | namespace OpticFlow {
35 | 
36 | 	/**
37 | 	* Settings class, which contains all the settings for Optical flow framework
38 | 	*/
39 | 	class Settings
40 | 	{
41 | 	public:
42 | 
43 | 		// Input settings
44 | 		string inputPath;
45 | 		string outputPath;
46 | 		string fileName1;
47 | 		string fileName2;
48 | 
49 | 		// General
50 | 		int width;
51 | 		int height;
52 | 		float sigma;
53 | 		float precision;
54 | 		int medianRadius;
55 | 
56 | 		// Solver settings
57 | 		int iterInner;
58 | 		int iterOuter;
59 | 		float alpha;
60 | 
61 | 		float e_smooth;
62 | 		float e_data;
63 | 
64 | 		int levels;
65 | 		float warpScale;
66 | 
67 | 		float flowScale;
68 | 
69 |         bool press_key;
70 | 
71 | 
72 |     private:
73 |         string content;
74 | 
75 | 
76 | 	public:
77 | 		int LoadSettings(string fileName);
78 | 		void LoadSettingsManually();
79 | 	};
80 | 
81 | 	
82 | 
83 | 
84 | 
85 | }
86 | 
87 | #endif // !GPUFLOW3D_OPTICAL_FLOW_SETTINGS_H_
88 | 


--------------------------------------------------------------------------------
/src/utils/tinystr.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/axruff/cuda-flow2d/827c3470351972aca7584a83aa9b85778f522be4/src/utils/tinystr.cpp


--------------------------------------------------------------------------------
/src/utils/tinystr.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | www.sourceforge.net/projects/tinyxml
  3 | Original file by Yves Berquin.
  4 | 
  5 | This software is provided 'as-is', without any express or implied
  6 | warranty. In no event will the authors be held liable for any
  7 | damages arising from the use of this software.
  8 | 
  9 | Permission is granted to anyone to use this software for any
 10 | purpose, including commercial applications, and to alter it and
 11 | redistribute it freely, subject to the following restrictions:
 12 | 
 13 | 1. The origin of this software must not be misrepresented; you must
 14 | not claim that you wrote the original software. If you use this
 15 | software in a product, an acknowledgment in the product documentation
 16 | would be appreciated but is not required.
 17 | 
 18 | 2. Altered source versions must be plainly marked as such, and
 19 | must not be misrepresented as being the original software.
 20 | 
 21 | 3. This notice may not be removed or altered from any source
 22 | distribution.
 23 | */
 24 | 
 25 | /*
 26 |  * THIS FILE WAS ALTERED BY Tyge Lovset, 7. April 2005.
 27 |  *
 28 |  * - completely rewritten. compact, clean, and fast implementation.
 29 |  * - sizeof(TiXmlString) = pointer size (4 bytes on 32-bit systems)
 30 |  * - fixed reserve() to work as per specification.
 31 |  * - fixed buggy compares operator==(), operator<(), and operator>()
 32 |  * - fixed operator+=() to take a const ref argument, following spec.
 33 |  * - added "copy" constructor with length, and most compare operators.
 34 |  * - added swap(), clear(), size(), capacity(), operator+().
 35 |  */
 36 | 
 37 | #ifndef TIXML_USE_STL
 38 | 
 39 | #ifndef TIXML_STRING_INCLUDED
 40 | #define TIXML_STRING_INCLUDED
 41 | 
 42 | #include <assert.h>
 43 | #include <string.h>
 44 | 
 45 | /*	The support for explicit isn't that universal, and it isn't really
 46 | 	required - it is used to check that the TiXmlString class isn't incorrectly
 47 | 	used. Be nice to old compilers and macro it here:
 48 | */
 49 | #if defined(_MSC_VER) && (_MSC_VER >= 1200 )
 50 | 	// Microsoft visual studio, version 6 and higher.
 51 | 	#define TIXML_EXPLICIT explicit
 52 | #elif defined(__GNUC__) && (__GNUC__ >= 3 )
 53 | 	// GCC version 3 and higher.s
 54 | 	#define TIXML_EXPLICIT explicit
 55 | #else
 56 | 	#define TIXML_EXPLICIT
 57 | #endif
 58 | 
 59 | 
 60 | /*
 61 |    TiXmlString is an emulation of a subset of the std::string template.
 62 |    Its purpose is to allow compiling TinyXML on compilers with no or poor STL support.
 63 |    Only the member functions relevant to the TinyXML project have been implemented.
 64 |    The buffer allocation is made by a simplistic power of 2 like mechanism : if we increase
 65 |    a string and there's no more room, we allocate a buffer twice as big as we need.
 66 | */
 67 | class TiXmlString
 68 | {
 69 |   public :
 70 | 	// The size type used
 71 |   	typedef size_t size_type;
 72 | 
 73 | 	// Error value for find primitive
 74 | 	static const size_type npos; // = -1;
 75 | 
 76 | 
 77 | 	// TiXmlString empty constructor
 78 | 	TiXmlString () : rep_(&nullrep_)
 79 | 	{
 80 | 	}
 81 | 
 82 | 	// TiXmlString copy constructor
 83 | 	TiXmlString ( const TiXmlString & copy) : rep_(0)
 84 | 	{
 85 | 		init(copy.length());
 86 | 		memcpy(start(), copy.data(), length());
 87 | 	}
 88 | 
 89 | 	// TiXmlString constructor, based on a string
 90 | 	TIXML_EXPLICIT TiXmlString ( const char * copy) : rep_(0)
 91 | 	{
 92 | 		init( static_cast<size_type>( strlen(copy) ));
 93 | 		memcpy(start(), copy, length());
 94 | 	}
 95 | 
 96 | 	// TiXmlString constructor, based on a string
 97 | 	TIXML_EXPLICIT TiXmlString ( const char * str, size_type len) : rep_(0)
 98 | 	{
 99 | 		init(len);
100 | 		memcpy(start(), str, len);
101 | 	}
102 | 
103 | 	// TiXmlString destructor
104 | 	~TiXmlString ()
105 | 	{
106 | 		quit();
107 | 	}
108 | 
109 | 	// = operator
110 | 	TiXmlString& operator = (const char * copy)
111 | 	{
112 | 		return assign( copy, (size_type)strlen(copy));
113 | 	}
114 | 
115 | 	// = operator
116 | 	TiXmlString& operator = (const TiXmlString & copy)
117 | 	{
118 | 		return assign(copy.start(), copy.length());
119 | 	}
120 | 
121 | 
122 | 	// += operator. Maps to append
123 | 	TiXmlString& operator += (const char * suffix)
124 | 	{
125 | 		return append(suffix, static_cast<size_type>( strlen(suffix) ));
126 | 	}
127 | 
128 | 	// += operator. Maps to append
129 | 	TiXmlString& operator += (char single)
130 | 	{
131 | 		return append(&single, 1);
132 | 	}
133 | 
134 | 	// += operator. Maps to append
135 | 	TiXmlString& operator += (const TiXmlString & suffix)
136 | 	{
137 | 		return append(suffix.data(), suffix.length());
138 | 	}
139 | 
140 | 
141 | 	// Convert a TiXmlString into a null-terminated char *
142 | 	const char * c_str () const { return rep_->str; }
143 | 
144 | 	// Convert a TiXmlString into a char * (need not be null terminated).
145 | 	const char * data () const { return rep_->str; }
146 | 
147 | 	// Return the length of a TiXmlString
148 | 	size_type length () const { return rep_->size; }
149 | 
150 | 	// Alias for length()
151 | 	size_type size () const { return rep_->size; }
152 | 
153 | 	// Checks if a TiXmlString is empty
154 | 	bool empty () const { return rep_->size == 0; }
155 | 
156 | 	// Return capacity of string
157 | 	size_type capacity () const { return rep_->capacity; }
158 | 
159 | 
160 | 	// single char extraction
161 | 	const char& at (size_type index) const
162 | 	{
163 | 		assert( index < length() );
164 | 		return rep_->str[ index ];
165 | 	}
166 | 
167 | 	// [] operator
168 | 	char& operator [] (size_type index) const
169 | 	{
170 | 		assert( index < length() );
171 | 		return rep_->str[ index ];
172 | 	}
173 | 
174 | 	// find a char in a string. Return TiXmlString::npos if not found
175 | 	size_type find (char lookup) const
176 | 	{
177 | 		return find(lookup, 0);
178 | 	}
179 | 
180 | 	// find a char in a string from an offset. Return TiXmlString::npos if not found
181 | 	size_type find (char tofind, size_type offset) const
182 | 	{
183 | 		if (offset >= length()) return npos;
184 | 
185 | 		for (const char* p = c_str() + offset; *p != '\0'; ++p)
186 | 		{
187 | 		   if (*p == tofind) return static_cast< size_type >( p - c_str() );
188 | 		}
189 | 		return npos;
190 | 	}
191 | 
192 | 	void clear ()
193 | 	{
194 | 		//Lee:
195 | 		//The original was just too strange, though correct:
196 | 		//	TiXmlString().swap(*this);
197 | 		//Instead use the quit & re-init:
198 | 		quit();
199 | 		init(0,0);
200 | 	}
201 | 
202 | 	/*	Function to reserve a big amount of data when we know we'll need it. Be aware that this
203 | 		function DOES NOT clear the content of the TiXmlString if any exists.
204 | 	*/
205 | 	void reserve (size_type cap);
206 | 
207 | 	TiXmlString& assign (const char* str, size_type len);
208 | 
209 | 	TiXmlString& append (const char* str, size_type len);
210 | 
211 | 	void swap (TiXmlString& other)
212 | 	{
213 | 		Rep* r = rep_;
214 | 		rep_ = other.rep_;
215 | 		other.rep_ = r;
216 | 	}
217 | 
218 |   private:
219 | 
220 | 	void init(size_type sz) { init(sz, sz); }
221 | 	void set_size(size_type sz) { rep_->str[ rep_->size = sz ] = '\0'; }
222 | 	char* start() const { return rep_->str; }
223 | 	char* finish() const { return rep_->str + rep_->size; }
224 | 
225 | 	struct Rep
226 | 	{
227 | 		size_type size, capacity;
228 | 		char str[1];
229 | 	};
230 | 
231 | 	void init(size_type sz, size_type cap)
232 | 	{
233 | 		if (cap)
234 | 		{
235 | 			// Lee: the original form:
236 | 			//	rep_ = static_cast<Rep*>(operator new(sizeof(Rep) + cap));
237 | 			// doesn't work in some cases of new being overloaded. Switching
238 | 			// to the normal allocation, although use an 'int' for systems
239 | 			// that are overly picky about structure alignment.
240 | 			const size_type bytesNeeded = sizeof(Rep) + cap;
241 | 			const size_type intsNeeded = ( bytesNeeded + sizeof(int) - 1 ) / sizeof( int ); 
242 | 			rep_ = reinterpret_cast<Rep*>( new int[ intsNeeded ] );
243 | 
244 | 			rep_->str[ rep_->size = sz ] = '\0';
245 | 			rep_->capacity = cap;
246 | 		}
247 | 		else
248 | 		{
249 | 			rep_ = &nullrep_;
250 | 		}
251 | 	}
252 | 
253 | 	void quit()
254 | 	{
255 | 		if (rep_ != &nullrep_)
256 | 		{
257 | 			// The rep_ is really an array of ints. (see the allocator, above).
258 | 			// Cast it back before delete, so the compiler won't incorrectly call destructors.
259 | 			delete [] ( reinterpret_cast<int*>( rep_ ) );
260 | 		}
261 | 	}
262 | 
263 | 	Rep * rep_;
264 | 	static Rep nullrep_;
265 | 
266 | } ;
267 | 
268 | 
269 | inline bool operator == (const TiXmlString & a, const TiXmlString & b)
270 | {
271 | 	return    ( a.length() == b.length() )				// optimization on some platforms
272 | 	       && ( strcmp(a.c_str(), b.c_str()) == 0 );	// actual compare
273 | }
274 | inline bool operator < (const TiXmlString & a, const TiXmlString & b)
275 | {
276 | 	return strcmp(a.c_str(), b.c_str()) < 0;
277 | }
278 | 
279 | inline bool operator != (const TiXmlString & a, const TiXmlString & b) { return !(a == b); }
280 | inline bool operator >  (const TiXmlString & a, const TiXmlString & b) { return b < a; }
281 | inline bool operator <= (const TiXmlString & a, const TiXmlString & b) { return !(b < a); }
282 | inline bool operator >= (const TiXmlString & a, const TiXmlString & b) { return !(a < b); }
283 | 
284 | inline bool operator == (const TiXmlString & a, const char* b) { return strcmp(a.c_str(), b) == 0; }
285 | inline bool operator == (const char* a, const TiXmlString & b) { return b == a; }
286 | inline bool operator != (const TiXmlString & a, const char* b) { return !(a == b); }
287 | inline bool operator != (const char* a, const TiXmlString & b) { return !(b == a); }
288 | 
289 | TiXmlString operator + (const TiXmlString & a, const TiXmlString & b);
290 | TiXmlString operator + (const TiXmlString & a, const char* b);
291 | TiXmlString operator + (const char* a, const TiXmlString & b);
292 | 
293 | 
294 | /*
295 |    TiXmlOutStream is an emulation of std::ostream. It is based on TiXmlString.
296 |    Only the operators that we need for TinyXML have been developped.
297 | */
298 | class TiXmlOutStream : public TiXmlString
299 | {
300 | public :
301 | 
302 | 	// TiXmlOutStream << operator.
303 | 	TiXmlOutStream & operator << (const TiXmlString & in)
304 | 	{
305 | 		*this += in;
306 | 		return *this;
307 | 	}
308 | 
309 | 	// TiXmlOutStream << operator.
310 | 	TiXmlOutStream & operator << (const char * in)
311 | 	{
312 | 		*this += in;
313 | 		return *this;
314 | 	}
315 | 
316 | } ;
317 | 
318 | #endif	// TIXML_STRING_INCLUDED
319 | #endif	// TIXML_USE_STL
320 | 


--------------------------------------------------------------------------------
/src/utils/tinyxmlerror.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | www.sourceforge.net/projects/tinyxml
 3 | Original code (2.0 and earlier )copyright (c) 2000-2006 Lee Thomason (www.grinninglizard.com)
 4 | 
 5 | This software is provided 'as-is', without any express or implied 
 6 | warranty. In no event will the authors be held liable for any 
 7 | damages arising from the use of this software.
 8 | 
 9 | Permission is granted to anyone to use this software for any 
10 | purpose, including commercial applications, and to alter it and 
11 | redistribute it freely, subject to the following restrictions:
12 | 
13 | 1. The origin of this software must not be misrepresented; you must
14 | not claim that you wrote the original software. If you use this
15 | software in a product, an acknowledgment in the product documentation
16 | would be appreciated but is not required.
17 | 
18 | 2. Altered source versions must be plainly marked as such, and
19 | must not be misrepresented as being the original software.
20 | 
21 | 3. This notice may not be removed or altered from any source
22 | distribution.
23 | */
24 | 
25 | //#include "stdafx.h"
26 | #include "tinyxml.h"
27 | 
28 | // The goal of the seperate error file is to make the first
29 | // step towards localization. tinyxml (currently) only supports
30 | // english error messages, but the could now be translated.
31 | //
32 | // It also cleans up the code a bit.
33 | //
34 | 
35 | const char* TiXmlBase::errorString[ TIXML_ERROR_STRING_COUNT ] =
36 | {
37 | 	"No error",
38 | 	"Error",
39 | 	"Failed to open file",
40 | 	"Memory allocation failed.",
41 | 	"Error parsing Element.",
42 | 	"Failed to read Element name",
43 | 	"Error reading Element value.",
44 | 	"Error reading Attributes.",
45 | 	"Error: empty tag.",
46 | 	"Error reading end tag.",
47 | 	"Error parsing Unknown.",
48 | 	"Error parsing Comment.",
49 | 	"Error parsing Declaration.",
50 | 	"Error document empty.",
51 | 	"Error null (0) or unexpected EOF found in input stream.",
52 | 	"Error parsing CDATA.",
53 | 	"Error when TiXmlDocument added to document, because TiXmlDocument can only be at the root.",
54 | };
55 | 


--------------------------------------------------------------------------------