├── CMakeLists.txt
├── CMakePreBuild.sh
├── README.md
├── data
    ├── deploy.prototxt
    ├── fontmapA.png
    ├── labels.txt
    └── merge.caffemodel
├── face-recognition
    ├── CMakeLists.txt
    └── face-recognition.cpp
├── kernel.cu
├── pluginImplement.cpp
├── pluginImplement.h
├── script
    ├── merge_model.py
    └── rename_model.py
├── tensorNet.cpp
├── tensorNet.h
└── util
    ├── camera
        ├── gst-camera
        │   ├── CMakeLists.txt
        │   └── gst-camera.cpp
        ├── gstCamera.cpp
        ├── gstCamera.h
        ├── gstUtility.cpp
        ├── gstUtility.h
        ├── v4l2-console
        │   ├── CMakeLists.txt
        │   └── v4l2-console.cpp
        ├── v4l2-display
        │   ├── CMakeLists.txt
        │   └── v4l2-display.cpp
        ├── v4l2Camera.cpp
        └── v4l2Camera.h
    ├── commandLine.cpp
    ├── commandLine.h
    ├── cuda
        ├── cudaFont.cu
        ├── cudaFont.h
        ├── cudaMappedMemory.h
        ├── cudaNormalize.cu
        ├── cudaNormalize.h
        ├── cudaOverlay.cu
        ├── cudaOverlay.h
        ├── cudaRGB.cu
        ├── cudaRGB.h
        ├── cudaResize.cu
        ├── cudaResize.h
        ├── cudaUtility.h
        ├── cudaYUV-NV12.cu
        ├── cudaYUV-YUYV.cu
        ├── cudaYUV-YV12.cu
        └── cudaYUV.h
    ├── display
        ├── glDisplay.cpp
        ├── glDisplay.h
        ├── glTexture.cpp
        ├── glTexture.h
        └── glUtility.h
    ├── loadImage.cpp
    └── loadImage.h


/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | cmake_minimum_required(VERSION 2.8)
 3 | project(face-recognition)
 4 | 
 5 | # setup tensorRT flags
 6 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")	# -std=gnu++11
 7 | set(BUILD_DEPS "YES" CACHE BOOL "If YES, will install dependencies into sandbox.  Automatically reset to NO after dependencies are installed.")
 8 | 
 9 | 
10 | # if this is the first time running cmake, perform pre-build dependency install script (or if the user manually triggers re-building the dependencies)
11 | if( ${BUILD_DEPS} )
12 | 	message("Launching pre-build dependency installer script...")
13 | 
14 | 	execute_process(COMMAND sh ../CMakePreBuild.sh
15 | 				WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
16 | 				RESULT_VARIABLE PREBUILD_SCRIPT_RESULT)
17 | 
18 | 	set(BUILD_DEPS "NO" CACHE BOOL "If YES, will install dependencies into sandbox.  Automatically reset to NO after dependencies are installed." FORCE)
19 | 	message("Finished installing dependencies")
20 | endif()
21 | 
22 | 
23 | # Qt is used to load images (installed by ubuntu-desktop)
24 | find_package(Qt4 REQUIRED)
25 | include(${QT_USE_FILE})
26 | add_definitions(${QT_DEFINITIONS})
27 | 
28 | 
29 | # setup CUDA
30 | find_package(CUDA)
31 | 
32 | set(
33 | 	CUDA_NVCC_FLAGS
34 | 	${CUDA_NVCC_FLAGS}; 
35 |     -O3 
36 | 	-gencode arch=compute_53,code=sm_53
37 | 	-gencode arch=compute_62,code=sm_62
38 | )
39 | 
40 | 
41 | # setup project output paths
42 | set(PROJECT_OUTPUT_DIR  ${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_PROCESSOR})
43 | set(PROJECT_INCLUDE_DIR ${PROJECT_OUTPUT_DIR}/include)
44 | 
45 | file(MAKE_DIRECTORY ${PROJECT_INCLUDE_DIR})
46 | file(MAKE_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin)
47 | 
48 | message("-- system arch:  ${CMAKE_SYSTEM_PROCESSOR}")
49 | message("-- output path:  ${PROJECT_OUTPUT_DIR}")
50 | 
51 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin)
52 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib)
53 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib)
54 | 
55 | 
56 | # build C/C++ interface
57 | include_directories(${PROJECT_INCLUDE_DIR} ${GIE_PATH}/include)
58 | include_directories(/usr/include/gstreamer-1.0 /usr/lib/aarch64-linux-gnu/gstreamer-1.0/include /usr/include/glib-2.0 /usr/include/libxml2 /usr/lib/aarch64-linux-gnu/glib-2.0/include/)
59 | 
60 | file(GLOB inferenceSources *.cpp *.cu util/*.cpp util/camera/*.cpp util/cuda/*.cu util/display/*.cpp)
61 | file(GLOB inferenceIncludes *.h util/*.h util/camera/*.h util/cuda/*.h util/display/*.h)
62 | 
63 | cuda_add_library(jetson-inference SHARED ${inferenceSources})
64 | target_link_libraries(jetson-inference nvcaffe_parser nvinfer Qt4::QtGui GL GLEW gstreamer-1.0 gstapp-1.0)		# gstreamer-0.10 gstbase-0.10 gstapp-0.10 
65 | 
66 | # transfer all headers to the include directory
67 | foreach(include ${inferenceIncludes})
68 |         message("-- Copying ${include}")
69 |         configure_file(${include} ${PROJECT_INCLUDE_DIR} COPYONLY)
70 | endforeach()
71 | 
72 | add_subdirectory(face-recognition)
73 | 
74 | # install
75 | foreach(include ${inferenceIncludes})
76 |     install(FILES "${include}" DESTINATION include/jetson-inference)
77 | endforeach()
78 | 
79 | # install the shared library
80 | install(TARGETS jetson-inference DESTINATION lib/jetson-inference EXPORT jetson-inferenceConfig)
81 | 
82 | # install the cmake project, for importing
83 | install(EXPORT jetson-inferenceConfig DESTINATION share/jetson-inference/cmake)
84 | 
85 | 


--------------------------------------------------------------------------------
/CMakePreBuild.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # this script is automatically run from CMakeLists.txt
 3 | 
 4 | BUILD_ROOT=$PWD
 5 | TORCH_PREFIX=$PWD/torch
 6 | 
 7 | echo "[Pre-build]  dependency installer script running..."
 8 | echo "[Pre-build]  build root directory:       $BUILD_ROOT"
 9 | 
10 | 
11 | # break on errors
12 | #set -e
13 | 
14 | 
15 | # install packages
16 | sudo apt-get update
17 | sudo apt-get install -y libqt4-dev qt4-dev-tools libglew-dev glew-utils libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev libglib2.0-dev
18 | sudo apt-get update
19 | 
20 | sudo rm /usr/lib/aarch64-linux-gnu/libGL.so
21 | sudo ln -s /usr/lib/aarch64-linux-gnu/tegra/libGL.so /usr/lib/aarch64-linux-gnu/libGL.so
22 | 
23 | # maximize performance
24 | sudo nvpmodel -m 0
25 | sudo ~/jetson_clock.sh
26 | echo "[Pre-build]  Finished CMakePreBuild script"
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Face-Recognition
 2 | ===========================
 3 | This sample targets for demonstrating TensorRT2.1 Plugin API
 4 | 
 5 | We leverage most of the functions from jetson_inference; please check it first if you need more DL samples:
 6 | 
 7 | <https://github.com/dusty-nv/jetson-inference>
 8 | ***
 9 | </br>
10 | 
11 | 
12 | ## Environment
13 | Jetson TX2
14 | </br>
15 | JetPack-3.1
16 | </br>
17 | </br>
18 | </br>
19 | 
20 | 
21 | ## Build
22 | ```C
23 | $ sudo apt-get install git cmake
24 | $ git clone https://github.com/AastaNV/Face-Recognition
25 | $ cd Face-Recognition
26 | $ mkdir build
27 | $ cd build
28 | $ cmake ..
29 | $ make
30 | ```
31 | 
32 | </br>
33 | 
34 | 
35 | ## Run
36 | ```C
37 | $ cd aarch64/bin
38 | $ ./face-recognition
39 | ```
40 | 
41 | </br>
42 | 
43 | 
44 | ## Plugin Layer
45 | **BboxMergeLayer**
46 | </br>
47 | This plugin layer demonstrate how to implement a CPU-based Plugin layer
48 | </br>
49 | 1. Make required tensor as output
50 | 2. Allocate unified memory: CPU pointer== GPU pointer
51 | </br>
52 | 
53 | 
54 | **DataRoiLayer**
55 | </br>
56 | This plugin layer demonstrate how to implement a GPU Plugin layer
57 | </br>
58 | 1. Got input/output data pointer in enqueue function
59 | 2. Launch GPU kernel with same cuda stream
60 | </br>
61 | 
62 | 
63 | **RecognitionLayer**
64 | </br>
65 | This plugin layer demonstrate more complicated handling of Plugin layer
66 | </br>
67 | 1. This class can handle two differient layers: selectBbox and summaryLabel
68 | 2. Define some shared variable to make between layers communication easier
69 | </br>
70 | </br>
71 | 
72 | 
73 | ## Support
74 | Please rise your problem in our forum to get immediately support.
75 | </br>
76 | https://devtalk.nvidia.com/default/board/189/jetson-tx2/
77 | </br>
78 | </br>
79 | </br>
80 | 


--------------------------------------------------------------------------------
/data/fontmapA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AastaNV/Face-Recognition/c93a12570ac0d925ead2ccaa152c539d89c3cb5d/data/fontmapA.png


--------------------------------------------------------------------------------
/data/merge.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AastaNV/Face-Recognition/c93a12570ac0d925ead2ccaa152c539d89c3cb5d/data/merge.caffemodel


--------------------------------------------------------------------------------
/face-recognition/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 
2 | file(GLOB faceRecognitionSources *.cpp)
3 | file(GLOB faceRecognitionIncludes *.h )
4 | 
5 | cuda_add_executable(face-recognition ${faceRecognitionSources})
6 | target_link_libraries(face-recognition nvcaffe_parser nvinfer jetson-inference)
7 | 


--------------------------------------------------------------------------------
/face-recognition/face-recognition.cpp:
--------------------------------------------------------------------------------
  1 | #include <algorithm>
  2 | #include "gstCamera.h"
  3 | #include "glDisplay.h"
  4 | #include "glTexture.h"
  5 | 
  6 | #include "cudaNormalize.h"
  7 | #include "cudaOverlay.h"
  8 | #include "cudaFont.h"
  9 | #include "tensorNet.h"
 10 | 
 11 | 
 12 | using namespace nvinfer1;
 13 | using namespace nvcaffeparser1;
 14 | 
 15 | static const int BATCH_SIZE = 1;
 16 | static const int TIMING_ITERATIONS = 100;
 17 | 
 18 | const char* model  = "/home/nvidia/Face-Recognition/data/deploy.prototxt";
 19 | const char* weight = "/home/nvidia/Face-Recognition/data/merge.caffemodel";
 20 | const char* label  = "/home/nvidia/Face-Recognition/data/labels.txt";
 21 | 
 22 | const char* INPUT_BLOB_NAME = "data";
 23 | const char* OUTPUT_BLOB_COV = "coverage_fd";
 24 | const char* OUTPUT_BLOB_BOX = "bboxes_fd";
 25 | const char* OUTPUT_BLOB_NUM = "count_fd";
 26 | const char* OUTPUT_BLOB_SEL = "bbox_fr";
 27 | const char* OUTPUT_BLOB_IDX = "bbox_id";
 28 | const char* OUTPUT_BLOB_RES = "softmax_fr";
 29 | const char* OUTPUT_BLOB_LAB = "label";
 30 | 
 31 | #define DEFAULT_CAMERA -1        // -1 for onboard camera, or change to index of /dev/video V4L2 camera (>=0)    
 32 | 
 33 | cudaError_t cudaPreImageNetMean( float4* input, size_t inputWidth, size_t inputHeight, float* output, size_t outputWidth, size_t outputHeight, const float3& mean_value );
 34 | 
 35 | 
 36 | 
 37 | bool signal_recieved = false;
 38 | 
 39 | void sig_handler(int signo)
 40 | {
 41 |     if( signo == SIGINT )
 42 |     {
 43 |         printf("received SIGINT\n");
 44 |         signal_recieved = true;
 45 |     }
 46 | }
 47 | 
 48 | float* allocateMemory(DimsCHW dims, char* info)
 49 | {
 50 |     float* ptr;
 51 |     size_t size;
 52 |     std::cout << "Allocate memory: " << info << std::endl;
 53 |     size = BATCH_SIZE * dims.c() * dims.h() * dims.w();
 54 |     assert(!cudaMallocManaged(&ptr, size*sizeof(float)));
 55 |     return ptr;
 56 | }
 57 | 
 58 | // load label info
 59 | std::vector<std::string> loadLabelInfo(const char* filename)
 60 | {   
 61 |     assert(filename);
 62 |     std::vector<std::string> labelInfo;
 63 | 
 64 |     FILE* f = fopen(filename, "r");
 65 |     if( !f )
 66 |     {   
 67 |         printf("failed to open %s\n", filename);
 68 |         assert(0);
 69 |     }
 70 |     
 71 |     char str[512];
 72 |     while( fgets(str, 512, f) != NULL )
 73 |     {   
 74 |         const int syn = 9;  // length of synset prefix (in characters)
 75 |         const int len = strlen(str);
 76 |         
 77 |         if( len > syn && str[0] == 'n' && str[syn] == ' ' )
 78 |         {   
 79 |             str[syn]   = 0;
 80 |             str[len-1] = 0;
 81 |             
 82 |             const std::string b = (str + syn + 1);
 83 |             labelInfo.push_back(b);
 84 |         }
 85 |         else if( len > 0 )      // no 9-character synset prefix (i.e. from DIGITS snapshot)
 86 |         {   
 87 |             if( str[len-1] == '\n' ) str[len-1] = 0;
 88 |             labelInfo.push_back(str);
 89 |         }
 90 |     }
 91 |     fclose(f);
 92 |     return labelInfo;
 93 | }
 94 | 
 95 | bool DrawBoxes(float* input, float* output, uint32_t width, uint32_t height, const float scale_x, const float scale_y, float* conf, float* bbox, const int numBoundingBoxes)
 96 | {
 97 |     // Only handle single class here
 98 |     const float4 color = make_float4( 0.0f, 255.0f, 175.0f, 100.0f);
 99 | 
100 |     printf("%i bounding boxes detected\n", numBoundingBoxes);
101 |     for( int n=0; n < numBoundingBoxes; n++ )
102 |     {
103 |         float* bb = bbox + (n * 4);
104 |         bb[0] *= scale_x;
105 |         bb[1] *= scale_y;
106 |         bb[2] *= scale_x;
107 |         bb[3] *= scale_y;
108 |         printf("bounding box %i   (%f, %f)  (%f, %f)  w=%f  h=%f\n", n,  bb[0], bb[1], bb[2], bb[3], bb[2] - bb[0], bb[3] - bb[1]);
109 |     }
110 | 
111 |     if( numBoundingBoxes>0 )
112 |     {
113 |         if( CUDA_FAILED(cudaRectOutlineOverlay((float4*)input, (float4*)output, width, height, (float4*)bbox, numBoundingBoxes, color)))
114 |             printf("failed to draw boxes\n");
115 |         CUDA(cudaThreadSynchronize());
116 |     }
117 | }
118 | 
119 | void ShowClassification(cudaFont* font, void* input, void* output, uint32_t width, uint32_t height,
120 |                         float* lab, float* bbox, std::vector<std::string> &labelInfo, const int numBoundingBoxes)
121 | {
122 |     char str[512];
123 | 
124 |     if( font != NULL )
125 |     {
126 |         for( int i=0; i<numBoundingBoxes; i++)
127 |         {
128 |             sprintf(str, "%s", (lab[i]>-1)?labelInfo[int(lab[i])].c_str():"NAN");
129 |             std::cout << "bbox=" << i << " class=" << lab[i] << " label=" << str << std::endl;
130 | 
131 |             float* bb = bbox + (i * 4);
132 |             font->RenderOverlay((float4*)input, (float4*)output, width, height, (const char*)str, bb[0], bb[3], make_float4(255.0f, 255.0f, 255.0f, 255.0f));
133 |             CUDA(cudaThreadSynchronize());
134 |         }
135 |     }
136 | }
137 | 
138 | 
139 | 
140 | int main(int argc, char** argv)
141 | {
142 |     std::cout << "Building and running a GPU inference engine for " << model << ", N=" << BATCH_SIZE << "..." << std::endl;
143 | 
144 | 
145 |     /* camera */
146 |     if( signal(SIGINT, sig_handler) == SIG_ERR )
147 |         printf("\ncan't catch SIGINT\n");
148 | 
149 |     gstCamera* camera = gstCamera::Create(DEFAULT_CAMERA);
150 | 
151 |     if( !camera )
152 |     {
153 |         printf("failed to initialize video device\n");
154 |         return 0;
155 |     }
156 | 
157 |     printf("successfully initialized video device\n");
158 |     printf("    width:  %u\n", camera->GetWidth());
159 |     printf("   height:  %u\n", camera->GetHeight());
160 |     printf("    depth:  %u (bpp)\n\n", camera->GetPixelDepth());
161 | 
162 | 
163 |     /* create networks */
164 |     TensorNet tensorNet;
165 |     std::vector<std::string> labelInfo = loadLabelInfo(label);
166 |     tensorNet.caffeToTRTModel(model, weight, std::vector < std::string > {OUTPUT_BLOB_COV, OUTPUT_BLOB_BOX, OUTPUT_BLOB_NUM, OUTPUT_BLOB_SEL, OUTPUT_BLOB_IDX, OUTPUT_BLOB_RES, OUTPUT_BLOB_LAB}, BATCH_SIZE);
167 |     tensorNet.createInference();
168 | 
169 | 
170 |     /* openGL window */
171 |     cudaFont* font = cudaFont::Create();
172 |     glDisplay* display = glDisplay::Create();
173 |     glTexture* texture = NULL;
174 | 
175 |     if( !display ) {
176 |         printf("failed to create openGL display\n");
177 |     }
178 |     else
179 |     {
180 |         texture = glTexture::Create(camera->GetWidth(), camera->GetHeight(), GL_RGBA32F_ARB/*GL_RGBA8*/);
181 |         if( !texture ) printf("failed to create openGL texture\n");
182 |     }
183 | 
184 | 
185 |     /* open camera */
186 |     if( !camera->Open() )
187 |     {
188 |         printf("failed to open camera for streaming\n");
189 |         return 0;
190 |     }
191 | 
192 | 
193 |     /* prepare tensor */
194 |     DimsCHW dimsData = tensorNet.getTensorDims(INPUT_BLOB_NAME);
195 |     DimsCHW dimsConf = tensorNet.getTensorDims(OUTPUT_BLOB_COV);
196 |     DimsCHW dimsBbox = tensorNet.getTensorDims(OUTPUT_BLOB_BOX);
197 |     DimsCHW dimsNum  = tensorNet.getTensorDims(OUTPUT_BLOB_NUM);
198 |     DimsCHW dimsSel  = tensorNet.getTensorDims(OUTPUT_BLOB_SEL);
199 |     DimsCHW dimsIdx  = tensorNet.getTensorDims(OUTPUT_BLOB_IDX);
200 |     DimsCHW dimsRes  = tensorNet.getTensorDims(OUTPUT_BLOB_RES);
201 |     DimsCHW dimsLab  = tensorNet.getTensorDims(OUTPUT_BLOB_LAB);
202 | 
203 |     float* data = allocateMemory(dimsData, (char*)"input blob");
204 |     float* conf = allocateMemory(dimsConf, (char*)"coverage");     // for cpu plugin layer
205 |     float* bbox = allocateMemory(dimsBbox, (char*)"box");          // for cpu plugin layer
206 |     float* num = allocateMemory(dimsNum, (char*)"count");
207 |     float* sel = allocateMemory(dimsSel, (char*)"selected bbox");  // for cpu plugin layer
208 |     float* idx = allocateMemory(dimsIdx, (char*)"selected index"); // for cpu plugin layer
209 |     float* res = allocateMemory(dimsRes, (char*)"softmax");        // for cpu plugin layer
210 |     float* lab = allocateMemory(dimsLab, (char*)"label");
211 | 
212 | 
213 |     /* main loop */
214 |     while( !signal_recieved )
215 |     {
216 |         void* imgCPU  = NULL;
217 |         void* imgCUDA = NULL;
218 |         void* imgRGBA = NULL;
219 | 
220 |         if( !camera->Capture(&imgCPU, &imgCUDA, 1000) ) printf("failed to capture frame\n");
221 |         if( !camera->ConvertRGBA(imgCUDA, &imgRGBA) ) printf("failed to convert from NV12 to RGBA\n");
222 | 
223 |         if( CUDA_FAILED(cudaPreImageNetMean((float4*)imgRGBA, camera->GetWidth(), camera->GetHeight(), data, dimsData.w(), dimsData.h(), make_float3(127.0f, 127.0f, 127.0f))) )
224 |         {
225 |             printf("cudaPreImageNetMean failed\n");
226 |             return 0;
227 |         }
228 | 
229 | 
230 |         void* buffers[] = {data, conf, bbox, num, sel, idx, res, lab};
231 |         tensorNet.imageInference(buffers, 8, BATCH_SIZE);
232 | 
233 |         const float scale_x = float(camera->GetWidth())  / float(dimsData.w());
234 |         const float scale_y = float(camera->GetHeight()) / float(dimsData.h());
235 | 
236 |         int numBoundingBoxes = int(num[0]);
237 |         DrawBoxes((float*)imgRGBA, (float*)imgRGBA, camera->GetWidth(), camera->GetHeight(), scale_x, scale_y, conf, bbox, numBoundingBoxes);
238 |         ShowClassification(font, imgRGBA, imgRGBA, camera->GetWidth(), camera->GetHeight(), lab, bbox, labelInfo, numBoundingBoxes);
239 | 
240 |         if( display != NULL )
241 |         {
242 |             char str[256];
243 |             sprintf(str, "TensorRT build %x | %4.1f FPS", NV_GIE_VERSION, display->GetFPS());
244 |             display->SetTitle(str);
245 |         }
246 | 
247 |         if( display != NULL )
248 |         {
249 |             display->UserEvents();
250 |             display->BeginRender();
251 | 
252 |             if( texture != NULL )
253 |             {
254 |                 CUDA(cudaNormalizeRGBA((float4*)imgRGBA, make_float2(0.0f, 255.0f),
255 |                                        (float4*)imgRGBA, make_float2(0.0f, 1.0f),
256 |                                        camera->GetWidth(), camera->GetHeight()));
257 | 
258 |                 void* tex_map = texture->MapCUDA();
259 |                 if( tex_map != NULL )
260 |                 {
261 |                     cudaMemcpy(tex_map, imgRGBA, texture->GetSize(), cudaMemcpyDeviceToDevice);
262 |                     texture->Unmap();
263 |                 }
264 |                 texture->Render(100,100);
265 |              }
266 |              display->EndRender();
267 |         }
268 | 
269 |     }
270 | 
271 | 
272 |     /* destory */
273 |     tensorNet.destroy();
274 |     tensorNet.printTimes(TIMING_ITERATIONS);
275 | 
276 |     if( camera != NULL )
277 |     {
278 |         delete camera;
279 |         camera = NULL;
280 |     }
281 | 
282 |     if( display != NULL )
283 |     {
284 |         delete display;
285 |         display = NULL;
286 |     }
287 | 
288 |     std::cout << "Done." << std::endl;
289 |     return 0;
290 | }
291 | 


--------------------------------------------------------------------------------
/kernel.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * http://github.com/dusty-nv/jetson-inference
  3 |  */
  4 |  
  5 | #include "cudaUtility.h"
  6 | #include <iostream>
  7 | 
  8 | 
  9 | // gpuPreImageNet
 10 | __global__ void gpuPreImageNet( float2 scale, float4* input, int iWidth, float* output, int oWidth, int oHeight )
 11 | {
 12 | 	const int x = blockIdx.x * blockDim.x + threadIdx.x;
 13 | 	const int y = blockIdx.y * blockDim.y + threadIdx.y;
 14 | 	const int n = oWidth * oHeight;
 15 | 	
 16 | 	if( x >= oWidth || y >= oHeight )
 17 | 		return;
 18 | 
 19 | 	const int dx = ((float)x * scale.x);
 20 | 	const int dy = ((float)y * scale.y);
 21 | 
 22 | 	const float4 px  = input[ dy * iWidth + dx ];
 23 | 	const float3 bgr = make_float3(px.z, px.y, px.x);
 24 | 	
 25 | 	output[n * 0 + y * oWidth + x] = bgr.x;
 26 | 	output[n * 1 + y * oWidth + x] = bgr.y;
 27 | 	output[n * 2 + y * oWidth + x] = bgr.z;
 28 | }
 29 | 
 30 | 
 31 | // cudaPreImageNet
 32 | cudaError_t cudaPreImageNet( float4* input, size_t inputWidth, size_t inputHeight,
 33 | 				         float* output, size_t outputWidth, size_t outputHeight )
 34 | {
 35 | 	if( !input || !output )
 36 | 		return cudaErrorInvalidDevicePointer;
 37 | 
 38 | 	if( inputWidth == 0 || outputWidth == 0 || inputHeight == 0 || outputHeight == 0 )
 39 | 		return cudaErrorInvalidValue;
 40 | 
 41 | 	const float2 scale = make_float2( float(inputWidth) / float(outputWidth),
 42 | 							    float(inputHeight) / float(outputHeight) );
 43 | 
 44 | 	// launch kernel
 45 | 	const dim3 blockDim(8, 8);
 46 | 	const dim3 gridDim(iDivUp(outputWidth,blockDim.x), iDivUp(outputHeight,blockDim.y));
 47 | 
 48 | 	gpuPreImageNet<<<gridDim, blockDim>>>(scale, input, inputWidth, output, outputWidth, outputHeight);
 49 | 
 50 | 	return CUDA(cudaGetLastError());
 51 | }
 52 | 
 53 | 
 54 | 
 55 | 
 56 | // gpuPreImageNetMean
 57 | __global__ void gpuPreImageNetMean( float2 scale, float4* input, int iWidth, float* output, int oWidth, int oHeight, float3 mean_value )
 58 | {
 59 | 	const int x = blockIdx.x * blockDim.x + threadIdx.x;
 60 | 	const int y = blockIdx.y * blockDim.y + threadIdx.y;
 61 | 	const int n = oWidth * oHeight;
 62 | 	
 63 | 	if( x >= oWidth || y >= oHeight )
 64 | 		return;
 65 | 
 66 | 	const int dx = ((float)x * scale.x);
 67 | 	const int dy = ((float)y * scale.y);
 68 | 
 69 | 	const float4 px  = input[ dy * iWidth + dx ];
 70 | 	const float3 bgr = make_float3(px.z - mean_value.x, px.y - mean_value.y, px.x - mean_value.z);
 71 | 	
 72 | 	output[n * 0 + y * oWidth + x] = bgr.x;
 73 | 	output[n * 1 + y * oWidth + x] = bgr.y;
 74 | 	output[n * 2 + y * oWidth + x] = bgr.z;
 75 | }
 76 | 
 77 | 
 78 | // cudaPreImageNetMean
 79 | cudaError_t cudaPreImageNetMean( float4* input, size_t inputWidth, size_t inputHeight,
 80 | 				             float* output, size_t outputWidth, size_t outputHeight, const float3& mean_value )
 81 | {
 82 | 	if( !input || !output )
 83 | 		return cudaErrorInvalidDevicePointer;
 84 | 
 85 | 	if( inputWidth == 0 || outputWidth == 0 || inputHeight == 0 || outputHeight == 0 )
 86 | 		return cudaErrorInvalidValue;
 87 | 
 88 | 	const float2 scale = make_float2( float(inputWidth) / float(outputWidth),
 89 | 							    float(inputHeight) / float(outputHeight) );
 90 | 
 91 | 	// launch kernel
 92 | 	const dim3 blockDim(8, 8);
 93 | 	const dim3 gridDim(iDivUp(outputWidth,blockDim.x), iDivUp(outputHeight,blockDim.y));
 94 | 
 95 | 	gpuPreImageNetMean<<<gridDim, blockDim>>>(scale, input, inputWidth, output, outputWidth, outputHeight, mean_value);
 96 | 
 97 | 	return CUDA(cudaGetLastError());
 98 | }
 99 | 
100 | 
101 | __global__ void kernel_extract_roi(float* input, float* output, char* mean,
102 |     const int input_w, const int output_w, const int output_h,
103 |     const int in_plane_r, const int in_plane_g, const int in_plane_b,
104 |     const int out_plane_r, const int out_plane_g, const int out_plane_b,
105 |     const int bbox_x, const int bbox_y, const int bbox_w, const int bbox_h)
106 | {
107 |     uint x = blockIdx.x * blockDim.x + threadIdx.x;
108 |     uint y = blockIdx.y * blockDim.y + threadIdx.y;
109 | 
110 |     if( x < output_w && y < output_h)
111 |     {
112 |         float r[2] = { float(x) * bbox_w / output_w + bbox_x,
113 |                        float(y) * bbox_h / output_h + bbox_y };
114 | 
115 |         int   pos[4][2] = { { int(floor(r[0])), int(floor(r[1])) },
116 |                             { int( ceil(r[0])), int(floor(r[1])) },
117 |                             { int(floor(r[0])),  int(ceil(r[1])) },
118 |                             { int( ceil(r[0])),  int(ceil(r[1])) } };
119 | 
120 |         float u = r[0]-floor(r[0]);
121 |         float v = r[1]-floor(r[1]);
122 | 
123 |         float s[4] = { (1-u)*(1-v), u*(1-v), (1-u)*v, u*v };
124 | 
125 |         int map[4] = { pos[0][1]*input_w + pos[0][0], pos[1][1]*input_w + pos[1][0],
126 |                        pos[2][1]*input_w + pos[2][0], pos[3][1]*input_w + pos[3][0]};
127 | 
128 |         int idx = y * output_w + x;
129 |         output[idx+out_plane_r] = round( s[0]*input[map[0]+in_plane_r]
130 |                                        + s[1]*input[map[1]+in_plane_r]
131 |                                        + s[2]*input[map[2]+in_plane_r]
132 |                                        + s[3]*input[map[3]+in_plane_r] );// float(mean[idx+out_plane_r]));
133 |         output[idx+out_plane_g] = round( s[0]*input[map[0]+in_plane_g]
134 |                                        + s[1]*input[map[1]+in_plane_g]
135 |                                        + s[2]*input[map[2]+in_plane_g]
136 |                                        + s[3]*input[map[3]+in_plane_g] );//float(mean[idx+out_plane_g]));
137 |         output[idx+out_plane_b] = round( s[0]*input[map[0]+in_plane_b]
138 |                                        + s[1]*input[map[1]+in_plane_b]
139 |                                        + s[2]*input[map[2]+in_plane_b]
140 |                                        + s[3]*input[map[3]+in_plane_b] );//float(mean[idx+out_plane_b]));
141 |     }
142 | }
143 | 
144 | void convertROI(float* input, float* output, char* mean, const int* srcSize, const int* dstSize, const int* roi, cudaStream_t stream)
145 | {
146 |     int in_plane_r = 0;
147 |     int in_plane_g = srcSize[1] * srcSize[2];
148 |     int in_plane_b = srcSize[1] * srcSize[2] * 2;
149 | 
150 |     int out_plane_r = 0;
151 |     int out_plane_g = dstSize[1] * dstSize[2];
152 |     int out_plane_b = dstSize[1] * dstSize[2] * 2;
153 | 
154 |     int bbox_x = min(max(roi[0], 0), srcSize[2]-1);
155 |     int bbox_y = min(max(roi[1], 0), srcSize[1]-1);
156 |     int bbox_w = min(max(roi[2]-roi[0], 0), srcSize[2]-bbox_x-1 );
157 |     int bbox_h = min(max(roi[3]-roi[1], 0), srcSize[1]-bbox_y-1 );
158 | 
159 |     dim3 dimBlock(32,32);
160 |     dim3 dimGrid(dstSize[2]/dimBlock.x+1, dstSize[1]/dimBlock.y+1);
161 | 
162 |     std::cout << "ROI: " << bbox_x << " " << bbox_y << " " << bbox_w << " " << bbox_h << std::endl;
163 | 
164 |     kernel_extract_roi <<< dimGrid, dimBlock, 0, stream >>> (input, output, mean,
165 |                        srcSize[2], dstSize[2], dstSize[1],
166 |                        in_plane_r,   in_plane_g,  in_plane_b,
167 |                        out_plane_r, out_plane_g, out_plane_b,
168 |                        bbox_x, bbox_y, bbox_w, bbox_h);
169 | }
170 | 
171 | 


--------------------------------------------------------------------------------
/pluginImplement.cpp:
--------------------------------------------------------------------------------
  1 | #include <pluginImplement.h>
  2 | 
  3 | std::vector<bboxProfile*> RecognitionLayer::bboxTable;
  4 | std::vector<tagProfile*> RecognitionLayer::tagTable;
  5 | 
  6 | bool bboxOverlap(const float4& r1, const float4& r2)
  7 | {
  8 |     float unionSize = (std::max(r1.z, r2.z)-std::min(r1.x, r2.x)) * (std::max(r1.w, r2.w)-std::min(r1.y, r2.y));
  9 |     float interSize = (std::min(r1.z, r2.z)-std::max(r1.x, r2.x)) * (std::min(r1.w, r2.w)-std::max(r1.y, r2.y));
 10 |     if( unionSize == 0 ) return true;
 11 |     else return (interSize/unionSize) > 0.5;
 12 | }
 13 | 
 14 | /******************************/
 15 | // PluginFactory
 16 | /******************************/
 17 | nvinfer1::IPlugin* PluginFactory::createPlugin(const char* layerName, const nvinfer1::Weights* weights, int nbWeights)
 18 | {
 19 |     assert(isPlugin(layerName));
 20 |     if (!strcmp(layerName, "bboxMerge"))
 21 |     {
 22 |         assert(mBboxMergeLayer.get() == nullptr);
 23 |         mBboxMergeLayer = std::unique_ptr<BboxMergeLayer>(new BboxMergeLayer());
 24 |         return mBboxMergeLayer.get();
 25 |     }
 26 |     else if (!strcmp(layerName, "dataRoi"))
 27 |     {
 28 |         assert(mDataRoiLayer.get() == nullptr);
 29 |         mDataRoiLayer = std::unique_ptr<DataRoiLayer>(new DataRoiLayer());
 30 |         return mDataRoiLayer.get();
 31 |     }
 32 |     else if (!strcmp(layerName, "selectBbox"))
 33 |     {
 34 |         assert(mSelectLayer.get() == nullptr);
 35 |         mSelectLayer = std::unique_ptr<RecognitionLayer>(new RecognitionLayer(FunctionType::SELECT));
 36 |         return mSelectLayer.get();
 37 |     }
 38 |     else if (!strcmp(layerName, "summaryLabel"))
 39 |     {
 40 |         assert(mSummaryLayer.get() == nullptr);
 41 |         mSummaryLayer = std::unique_ptr<RecognitionLayer>(new RecognitionLayer(FunctionType::SUMMARY));
 42 |         return mSummaryLayer.get();
 43 |     }
 44 |     else
 45 |     {
 46 |         assert(0);
 47 |         return nullptr;
 48 |     }
 49 | }
 50 | 
 51 | IPlugin* PluginFactory::createPlugin(const char* layerName, const void* serialData, size_t serialLength)
 52 | {
 53 |     assert(isPlugin(layerName));
 54 |     if (!strcmp(layerName, "bboxMerge"))
 55 |     {
 56 |         assert(mBboxMergeLayer.get() == nullptr);
 57 |         mBboxMergeLayer = std::unique_ptr<BboxMergeLayer>(new BboxMergeLayer(serialData, serialLength));
 58 |         return mBboxMergeLayer.get();
 59 |     }
 60 |     else if (!strcmp(layerName, "dataRoi"))
 61 |     {
 62 |         assert(mDataRoiLayer.get() == nullptr);
 63 |         mDataRoiLayer = std::unique_ptr<DataRoiLayer>(new DataRoiLayer(serialData, serialLength));
 64 |         return mDataRoiLayer.get();
 65 |     }
 66 |     else if (!strcmp(layerName, "selectBbox"))
 67 |     {
 68 |         assert(mSelectLayer.get() == nullptr);
 69 |         mSelectLayer = std::unique_ptr<RecognitionLayer>(new RecognitionLayer(FunctionType::SELECT, serialData, serialLength));
 70 |         return mSelectLayer.get();
 71 |     }
 72 |     else if (!strcmp(layerName, "summaryLabel"))
 73 |     {
 74 |         assert(mSummaryLayer.get() == nullptr);
 75 |         mSummaryLayer = std::unique_ptr<RecognitionLayer>(new RecognitionLayer(FunctionType::SUMMARY, serialData, serialLength));
 76 |         return mSummaryLayer.get();
 77 |     }
 78 |     else
 79 |     {
 80 |         assert(0);
 81 |         return nullptr;
 82 |     }
 83 | }
 84 | 
 85 | bool PluginFactory::isPlugin(const char* name)
 86 | {
 87 |     return (!strcmp(name, "bboxMerge")
 88 |          || !strcmp(name, "dataRoi")
 89 |          || !strcmp(name, "selectBbox")
 90 |          || !strcmp(name, "summaryLabel"));
 91 | }
 92 | 
 93 | void PluginFactory::destroyPlugin()
 94 | {
 95 |     mBboxMergeLayer.release();
 96 |     mBboxMergeLayer = nullptr;
 97 |     mDataRoiLayer.release();
 98 |     mDataRoiLayer = nullptr;
 99 |     mSelectLayer.release();
100 |     mSelectLayer = nullptr;
101 |     mSummaryLayer.release();
102 |     mSummaryLayer = nullptr;
103 | }
104 | 
105 | 
106 | 
107 | /******************************/
108 | // BboxMerge Plugin Layer
109 | /******************************/
110 | BboxMergeLayer::BboxMergeLayer(const void* buffer, size_t size)
111 | {
112 |     assert(size==(9*sizeof(int)));
113 |     const int* d = reinterpret_cast<const int*>(buffer);
114 | 
115 |     dimsData = DimsCHW{d[0], d[1], d[2]};
116 |     dimsConf = DimsCHW{d[3], d[4], d[5]};
117 |     dimsBbox = DimsCHW{d[6], d[7], d[8]};
118 | }
119 | 
120 | Dims BboxMergeLayer::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
121 | {
122 |     assert(nbInputDims==3);
123 |     return DimsCHW(1, 1, 1);
124 | }
125 | 
126 | int BboxMergeLayer::initialize()
127 | {
128 |     ow  = dimsBbox.w();
129 |     oh  = dimsBbox.h();
130 |     owh = ow * oh;
131 |     cls = dimsConf.c();
132 | 
133 |     cell_width  = dimsData.w() / ow;
134 |     cell_height = dimsData.h() / oh;
135 |     return 0;
136 | }
137 | 
138 | int BboxMergeLayer::enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream)
139 | {
140 |     CHECK(cudaThreadSynchronize());
141 |     std::vector< std::vector<float4> > rects;
142 |     rects.resize(cls);
143 | 
144 |     float* conf = (float*)inputs[1];
145 |     float* bbox = (float*)inputs[2];
146 | 
147 |     for( uint32_t z=0; z < cls; z++ )
148 |     {
149 |         rects[z].reserve(owh);
150 |         for( uint32_t y=0; y < oh; y++ )
151 |         {
152 |             for( uint32_t x=0; x < ow; x++)
153 |             {
154 |                 const float coverage = conf[z * owh + y * ow + x];
155 |                 if( coverage > 0.5 )
156 |                 {
157 |                     const float mx = x * cell_width;
158 |                     const float my = y * cell_height;
159 | 
160 |                     const float x1 = (bbox[0 * owh + y * ow + x] + mx);
161 |                     const float y1 = (bbox[1 * owh + y * ow + x] + my);
162 |                     const float x2 = (bbox[2 * owh + y * ow + x] + mx);
163 |                     const float y2 = (bbox[3 * owh + y * ow + x] + my);
164 |                     mergeRect( rects[z], make_float4(x1, y1, x2, y2) );
165 |                 }
166 |             }
167 |         }
168 |     }
169 | 
170 |     int n = 0;
171 |     int numMax = dimsBbox.c() * dimsBbox.h() * dimsBbox.w();
172 |     for( uint32_t z = 0; z < cls; z++ )
173 |     {
174 |         const uint32_t numBox = rects[z].size();
175 | 
176 |         for( uint32_t b = 0; b < numBox && n < numMax; b++ )
177 |         {
178 |             const float4 r = rects[z][b];
179 | 
180 |             bbox[n * 4 + 0] = r.x;
181 |             bbox[n * 4 + 1] = r.y;
182 |             bbox[n * 4 + 2] = r.z;
183 |             bbox[n * 4 + 3] = r.w;
184 |             n++;
185 |         }
186 |     }
187 | 
188 |     float* count = (float*)outputs[0];
189 |     count[0] = float(n);
190 |     return 0;
191 | }
192 | 
193 | size_t BboxMergeLayer::getSerializationSize()
194 | {
195 |     return 9*sizeof(int);
196 | }
197 | 
198 | void BboxMergeLayer::serialize(void* buffer)
199 | {
200 |     int* d = reinterpret_cast<int*>(buffer);
201 |     d[0] = dimsData.c(); d[1] = dimsData.h(); d[2] = dimsData.w();
202 |     d[3] = dimsConf.c(); d[4] = dimsConf.h(); d[5] = dimsConf.w();
203 |     d[6] = dimsBbox.c(); d[7] = dimsBbox.h(); d[8] = dimsBbox.w();
204 | }
205 | 
206 | void BboxMergeLayer::configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int)
207 | {
208 |     dimsData = DimsCHW{inputs[0].d[0], inputs[0].d[1], inputs[0].d[2]};
209 |     dimsConf = DimsCHW{inputs[1].d[0], inputs[1].d[1], inputs[1].d[2]};
210 |     dimsBbox = DimsCHW{inputs[2].d[0], inputs[2].d[1], inputs[2].d[2]};
211 | }
212 | 
213 | void BboxMergeLayer::mergeRect(std::vector<float4>& rects, const float4& rect)
214 | {
215 |     const uint32_t num_rects = rects.size();
216 |     bool intersects = false;
217 | 
218 |     for( uint32_t r=0; r < num_rects; r++ )
219 |     {
220 |         if( bboxOverlap(rects[r], rect) )
221 |         {
222 |             intersects = true;
223 |             if( rect.x < rects[r].x )    rects[r].x = rect.x;
224 |             if( rect.y < rects[r].y )    rects[r].y = rect.y;
225 |             if( rect.z > rects[r].z )    rects[r].z = rect.z;
226 |             if( rect.w > rects[r].w )    rects[r].w = rect.w;
227 | 
228 |             break;
229 |         }
230 |     }
231 |     if( !intersects ) rects.push_back(rect);
232 | }
233 | 
234 | 
235 | 
236 | /******************************/
237 | // DataRoi Plugin Layer
238 | /******************************/
239 | void convertROI(float* input, float* output, char* mean, const int* srcSize, const int* dstSize, const int* roi, cudaStream_t stream);
240 | 
241 | DataRoiLayer::DataRoiLayer(const void* buffer, size_t size)
242 | {
243 |     assert(size==(6*sizeof(int)));
244 |     const int* d = reinterpret_cast<const int*>(buffer);
245 | 
246 |     dimsData  = DimsCHW{d[0], d[1], d[2]};
247 |     dimsRoi   = DimsCHW{d[3], d[4], d[5]};
248 | }
249 | 
250 | Dims DataRoiLayer::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
251 | {
252 |     assert(nbInputDims==2);
253 |     return DimsCHW(3, 224, 224);
254 | }
255 | 
256 | int DataRoiLayer::initialize()
257 | {
258 |     return 0;
259 | }
260 | 
261 | int DataRoiLayer::enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream)
262 | {
263 |     float* bbox = (float*)inputs[1];
264 | 
265 |     int srcSize[] {dimsData.c(), dimsData.h(), dimsData.w()};
266 |     int dstSize[] {dimsRoi.c(), dimsRoi.h(), dimsRoi.w()};
267 |     int roi[] = { int(bbox[0]+0.5), int(bbox[1]+0.5), int(bbox[2]+0.5), int(bbox[3]+0.5)}; //rounding  
268 |     convertROI((float*)inputs[0], (float*)outputs[0], nullptr, srcSize, dstSize, roi, stream);
269 | 
270 |     return 0;
271 | }
272 | 
273 | size_t DataRoiLayer::getSerializationSize()
274 | {
275 |     return 6*sizeof(int);
276 | }
277 | 
278 | void DataRoiLayer::serialize(void* buffer)
279 | {
280 |     int* d = reinterpret_cast<int*>(buffer);
281 |     d[0] = dimsData.c(); d[1] = dimsData.h(); d[2] = dimsData.w();
282 |     d[3] = dimsRoi.c();  d[4] = dimsRoi.h();  d[5] = dimsRoi.w();
283 | }
284 | 
285 | void DataRoiLayer::configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int)
286 | {
287 |     dimsData  = DimsCHW{inputs[0].d[0], inputs[0].d[1], inputs[0].d[2]};
288 |     dimsRoi   = DimsCHW{3, 224, 224};
289 | }
290 | 
291 | 
292 | 
293 | /******************************/
294 | // Recognition Plugin Layer
295 | /******************************/
296 | RecognitionLayer::RecognitionLayer(FunctionType t, const void* buffer, size_t size)
297 | {
298 |     assert(size==(sizeof(int)));
299 |     const int* d = reinterpret_cast<const int*>(buffer);
300 | 
301 |     classNum = d[0];
302 |     type = t;
303 | }
304 | 
305 | int RecognitionLayer::getNbOutputs() const
306 | {
307 |     if( type==FunctionType::SELECT ) return 2;
308 |     else if( type==FunctionType::SUMMARY ) return 1;
309 | }
310 | 
311 | Dims RecognitionLayer::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
312 | {
313 |     if( type==FunctionType::SELECT )
314 |     {
315 |         assert(nbInputDims==2);
316 |         return index==0 ? DimsCHW(4, 1, 1):DimsCHW(1, 1, 1);
317 |     }
318 |     else if( type==FunctionType::SUMMARY )
319 |     {
320 |         assert(nbInputDims==4);
321 |         classNum = inputs[3].d[0];
322 |         return DimsCHW(1, inputs[0].d[1], inputs[0].d[2]);
323 |     }
324 | }
325 | 
326 | int RecognitionLayer::enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream)
327 | {
328 |     CHECK(cudaThreadSynchronize());
329 | 
330 |     if( type==FunctionType::SELECT ) return select(inputs, outputs);
331 |     else if( type==FunctionType::SUMMARY ) return summary(inputs, outputs);
332 | }
333 | 
334 | size_t RecognitionLayer::getSerializationSize()
335 | {
336 |     return sizeof(int);
337 | }
338 | 
339 | void RecognitionLayer::serialize(void* buffer)
340 | {
341 |     int* d = reinterpret_cast<int*>(buffer);
342 |     d[0] = classNum;
343 | }
344 | 
345 | int RecognitionLayer::select(const void*const *inputs, void** outputs)
346 | {
347 |     float* bbox = (float*)inputs[0];
348 |     float* count = (float*)inputs[1];
349 |     float* select = (float*)outputs[0];
350 |     float* index = (float*)outputs[1];
351 | 
352 |     int queryIdx = -1;
353 |     int bboxNum = static_cast<int>(count[0]);
354 | 
355 |     for( size_t i=0,id=0; i < bboxNum; i++,id+=4 ) {
356 |         float4 p = make_float4(bbox[id+0], bbox[id+1], bbox[id+2], bbox[id+3]);
357 |         if( bboxExist(p, i)<0 ) {
358 |             if( queryIdx < 0 ) {
359 |                 bboxTable.push_back(new bboxProfile(p, i));
360 |                 queryIdx = bboxTable.size()-1;
361 |             }
362 |         }
363 |     }
364 | 
365 |     if( queryIdx < 0 && bboxTable.size() > 0 ) queryIdx = rand() % bboxTable.size();
366 |     if( queryIdx > -1 ) {
367 |         int queryNum = bboxTable[queryIdx]->bboxNum;
368 |         if( queryNum > -1 ) {
369 |             select[0] = bboxTable[queryIdx]->pos.x;
370 |             select[1] = bboxTable[queryIdx]->pos.y;
371 |             select[2] = bboxTable[queryIdx]->pos.z;
372 |             select[3] = bboxTable[queryIdx]->pos.w;
373 |             index[0] = queryIdx;
374 |             std::cout << "pass "<< queryIdx << " to trt" << std::endl;
375 |             std::cout << select[0] << " " << select[1] << " " << select[2] << " " << select[3] << " " << std::endl;
376 |         }
377 |     }
378 |     else index[0] = -1;
379 |     return 0;
380 | }
381 | 
382 | int RecognitionLayer::summary(const void*const *inputs, void** outputs)
383 | {
384 |     float* count = (float*)inputs[1];
385 |     float* index = (float*)inputs[2];
386 |     float* res = (float*)inputs[3];
387 |     float* label = (float*)outputs[0];
388 | 
389 |     int bboxNum = static_cast<int>(count[0]);
390 |     int queryIdx = static_cast<int>(index[0]);
391 |     if( queryIdx > -1 ) {
392 |         int classIndex = -1;
393 |         float classMax = -1.0f;
394 | 
395 |         for( size_t n=0; n < classNum; n++ )
396 |         {
397 |             const float value = res[n];
398 |             if( value > classMax )
399 |             {
400 |                 classIndex = n;
401 |                 classMax   = value;
402 |             }
403 |         }
404 |         bboxTable[queryIdx]->labelID = classIndex;
405 |         std::cout << "ID=" <<queryIdx << ", label=" << classIndex << std::endl;
406 | /*
407 |        if( tagExist(classIndex, queryIdx) < 0 ) {
408 |            tagTable.push_back(new tagProfile(queryIdx,classIndex));
409 |            bboxTable[queryIdx]->labelID = tagTable.size()-1;
410 |         }
411 | */
412 | 
413 |     }
414 | 
415 |     for( int i=0; i<bboxNum; i++ )
416 |     {
417 |         label[i] = -1;
418 |         for( int j=0; j<bboxTable.size(); j++)
419 |         {
420 |             if( bboxTable[j]->bboxNum==i )
421 |                 label[i] = bboxTable[j]->labelID;
422 |         }
423 | /*
424 |         for( int j=0; j<bboxTable.size(); j++)
425 |         {
426 |             if( bboxTable[j]->bboxNum==i )
427 |             {
428 |                 if( bboxTable[j]->labelID>-1 && tagTable[bboxTable[j]->labelID]->bboxID==i ) label[i] = tagTable[bboxTable[j]->labelID]->label;
429 |                 break;
430 |             }
431 |         }
432 | */
433 |     }
434 | 
435 |     for( int i=bboxTable.size()-1; i>=0; i-- )
436 |         if( bboxTable[i]->bboxNum==-1 ) bboxTable.erase(bboxTable.begin()+i);
437 |     for( int i=0; i<bboxTable.size(); i++) bboxTable[i]->bboxNum = -1;
438 | 
439 |     return 0;
440 | }
441 | 
442 | int RecognitionLayer::bboxExist(const float4& p, const int idx)
443 | {
444 |     for( size_t i = 0; i < bboxTable.size(); i++ )
445 |     {
446 |         if( bboxOverlap(bboxTable[i]->pos,p) )
447 |         {
448 |             bboxTable[i]->pos = p;
449 |             bboxTable[i]->bboxNum = idx;
450 |             return 0;
451 |         }
452 |     }
453 |     return -1;
454 | }
455 | 
456 | int RecognitionLayer::tagExist(int label, int idx)
457 | {
458 |     for( size_t i = 0; i < tagTable.size(); i++ )
459 |     {
460 |         if( label == tagTable[i]->label ) {
461 |             if( tagTable[i]->bboxID>-1 )  bboxTable[tagTable[i]->bboxID]->labelID = -1;
462 | 
463 |             tagTable[i]->bboxID = idx;
464 |             bboxTable[idx]->labelID = i;
465 |             return 0;
466 |         }
467 |     }
468 | 
469 |     return -1;
470 | }
471 | 


--------------------------------------------------------------------------------
/pluginImplement.h:
--------------------------------------------------------------------------------
  1 | #ifndef __PLUGIN_LAYER_H__
  2 | #define __PLUGIN_LAYER_H__
  3 | 
  4 | #include <cassert>
  5 | #include <iostream>
  6 | #include <cudnn.h>
  7 | #include <cstring>
  8 | 
  9 | #include "NvCaffeParser.h"
 10 | #include "NvInferPlugin.h"
 11 | 
 12 | #define CHECK(status)                                                                                           \
 13 |     {                                                                                                                           \
 14 |         if (status != 0)                                                                                                \
 15 |         {                                                                                                                               \
 16 |             std::cout << "Cuda failure: " << cudaGetErrorString(status) \
 17 |                       << " at line " << __LINE__                                                        \
 18 |                       << std::endl;                                                                     \
 19 |             abort();                                                                                                    \
 20 |         }                                                                                                                               \
 21 |     }
 22 | 
 23 | using namespace nvinfer1;
 24 | using namespace nvcaffeparser1;
 25 | using namespace plugin;
 26 | 
 27 | enum FunctionType
 28 | {
 29 |     SELECT=0,
 30 |     SUMMARY
 31 | };
 32 | 
 33 | class bboxProfile {
 34 | public:
 35 |     bboxProfile(float4& p, int idx): pos(p), bboxNum(idx) {}
 36 | 
 37 |     float4 pos;
 38 |     int bboxNum = -1;
 39 |     int labelID = -1;
 40 | };
 41 | 
 42 | class tagProfile {
 43 | public:
 44 |     tagProfile(int b, int l): bboxID(b), label(l) {}
 45 |     int bboxID;
 46 |     int label;
 47 | };
 48 | 
 49 | class BboxMergeLayer : public IPlugin
 50 | {
 51 | public:
 52 |     BboxMergeLayer() {};
 53 |     BboxMergeLayer(const void* buffer, size_t size);
 54 | 
 55 |     inline int getNbOutputs() const override { return 1; };
 56 |     Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override;
 57 | 
 58 |     int initialize() override;
 59 |     inline void terminate() override { ; }
 60 | 
 61 |     inline size_t getWorkspaceSize(int) const override { return 0; }
 62 |     int enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream) override;
 63 | 
 64 |     size_t getSerializationSize() override;
 65 |     void serialize(void* buffer) override;
 66 | 
 67 |     void configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int) override;
 68 | 
 69 | protected:
 70 |     void mergeRect(std::vector<float4>& rects, const float4& rect);
 71 | 
 72 |     DimsCHW dimsData;
 73 |     DimsCHW dimsConf;
 74 |     DimsCHW dimsBbox;
 75 | 
 76 |     int ow;
 77 |     int oh;
 78 |     int owh;
 79 |     int cls;
 80 |     float cell_width;
 81 |     float cell_height;
 82 | };
 83 | 
 84 | class RecognitionLayer : public IPlugin
 85 | {
 86 | public:
 87 |     RecognitionLayer(FunctionType t) { type = t; };
 88 |     RecognitionLayer(FunctionType t, const void* buffer, size_t size);
 89 | 
 90 |     int getNbOutputs() const override;
 91 |     Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override;
 92 | 
 93 |     inline int initialize() override { return 0; }
 94 |     inline void terminate() override { ; }
 95 | 
 96 |     inline size_t getWorkspaceSize(int) const override { return 0; }
 97 |     int enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream) override;
 98 | 
 99 |     size_t getSerializationSize() override;
100 |     void serialize(void* buffer) override;
101 | 
102 |     inline void configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int) override { ; };
103 | 
104 | protected:
105 |     int select(const void*const *inputs, void** outputs);
106 |     int summary(const void*const *inputs, void** outputs);
107 |     int bboxExist(const float4& pos, const int idx);
108 |     int tagExist(int label, int idx);
109 | 
110 |     size_t classNum;   
111 |     FunctionType type;
112 |     static std::vector<bboxProfile*> bboxTable;
113 |     static std::vector<tagProfile*> tagTable;
114 | };
115 | 
116 | class DataRoiLayer : public IPlugin
117 | {
118 | public:
119 |     DataRoiLayer() {};
120 |     DataRoiLayer(const void* buffer, size_t size);
121 | 
122 |     inline int getNbOutputs() const override { return 1; };
123 |     Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override;
124 | 
125 |     int initialize() override;
126 |     inline void terminate() override { ; }
127 | 
128 |     inline size_t getWorkspaceSize(int) const override { return 0; }
129 |     int enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream) override;
130 | 
131 |     size_t getSerializationSize() override;
132 |     void serialize(void* buffer) override;
133 | 
134 |     void configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int) override;
135 | 
136 | protected:
137 |     DimsCHW dimsData;
138 |     DimsCHW dimsRoi;
139 | };
140 | 
141 | class PluginFactory : public nvinfer1::IPluginFactory, public nvcaffeparser1::IPluginFactory
142 | {
143 | public:
144 |     virtual nvinfer1::IPlugin* createPlugin(const char* layerName, const nvinfer1::Weights* weights, int nbWeights) override;
145 |     IPlugin* createPlugin(const char* layerName, const void* serialData, size_t serialLength) override;
146 | 
147 |     bool isPlugin(const char* name) override;
148 |     void destroyPlugin();
149 | 
150 |     std::unique_ptr<BboxMergeLayer> mBboxMergeLayer{ nullptr };
151 |     std::unique_ptr<DataRoiLayer> mDataRoiLayer{ nullptr };
152 |     std::unique_ptr<RecognitionLayer> mSelectLayer{ nullptr };
153 |     std::unique_ptr<RecognitionLayer> mSummaryLayer{ nullptr };
154 | };
155 | 
156 | #endif
157 | 


--------------------------------------------------------------------------------
/script/merge_model.py:
--------------------------------------------------------------------------------
 1 | import sys 
 2 | import caffe
 3 | 
 4 | deploy_fd = 'detection.prototxt'
 5 | deploy_fr = 'classification.prototxt'
 6 | deploy_merge = 'deploy.prototxt'
 7 | 
 8 | model_fd = 'detection.caffemodel'
 9 | model_fr = 'classification.caffemodel'
10 | model_merge = 'snapshot_iter_1.caffemodel'
11 | 
12 | net_fd = caffe.Net(deploy_fd,model_fd, caffe.TEST)
13 | net_fr = caffe.Net(deploy_fr,model_fr, caffe.TEST)
14 | net_merge = caffe.Net(deploy_merge,model_merge, caffe.TEST)
15 | 
16 | fp1 = open(deploy_fd, 'r')
17 | fp2 = open(deploy_fr, 'r')
18 | line1 = fp1.readlines()
19 | line2 = fp2.readlines()
20 | 
21 | for l in line1:
22 |     tmp = l.replace(' ','')
23 |     field = tmp.split(':')
24 |     if( field[0]=='name'):
25 |         source = field[1].split('"')[1]
26 |         target = source+'_fd'
27 |         try:
28 |             for i in range(len(net_fd.params[source])):
29 |                 net_merge.params[target][i].data[...] = net_fd.params[source][i].data[...]
30 |             print 'update weight: ' + target
31 |         except KeyError:
32 |             print 'ignore weight: ' + target
33 | 
34 | for l in line2:
35 |     tmp = l.replace(' ','')
36 |     field = tmp.split(':')
37 |     if( field[0]=='name'):
38 |         source = field[1].split('"')[1]
39 |         target = source+'_fr'
40 |         try:
41 |             for i in range(len(net_fr.params[source])):
42 |                 net_merge.params[target][i].data[...] = net_fr.params[source][i].data[...]
43 |             print 'update weight: ' + target
44 |         except KeyError:
45 |             print 'ignore weight: ' + target
46 | 
47 | fp1.close()
48 | fp2.close()
49 | 
50 | net_merge.save('merge.caffemodel')
51 | 


--------------------------------------------------------------------------------
/script/rename_model.py:
--------------------------------------------------------------------------------
 1 | deploy_fd = '/home/vyu/Face/JEP/script/detection.prototxt'
 2 | deploy_fr = '/home/vyu/Face/JEP/script/classification.prototxt'
 3 | deploy_merge = '/home/vyu/Face/JEP/script/deploy.prototxt'
 4 | 
 5 | fp1 = open(deploy_fd, 'r')
 6 | fp2 = open(deploy_fr, 'r')
 7 | fp3 = open(deploy_merge, 'w')
 8 | 
 9 | line1 = fp1.readlines()
10 | line2 = fp2.readlines()
11 | 
12 | for l in line1:
13 |     tmp = l.replace(' ','')
14 |     field = tmp.split(':')
15 |     if( field[0]=='name' or field[0]=='top' or field[0]=='bottom'):
16 |         source = field[1].split('"')[1]
17 |         l = l.replace(source,(source+'_fd'))
18 |         print 'proto replace: ' + source
19 |     fp3.write(l)
20 | 
21 | for l in line2:
22 |     tmp = l.replace(' ','')
23 |     field = tmp.split(':')
24 |     if( field[0]=='name' or field[0]=='top' or field[0]=='bottom'):
25 |         source = field[1].split('"')[1]
26 |         l = l.replace(source,(source+'_fr'))
27 |         print 'proto replace: ' + source
28 |     fp3.write(l)
29 | 
30 | fp1.close()
31 | fp2.close()
32 | fp3.close()
33 | 


--------------------------------------------------------------------------------
/tensorNet.cpp:
--------------------------------------------------------------------------------
  1 | #include <algorithm>
  2 | #include "tensorNet.h"
  3 | 
  4 | 
  5 | 
  6 | void TensorNet::caffeToTRTModel(const std::string& deployFile,
  7 |                                 const std::string& modelFile,
  8 |                                 const std::vector<std::string>& outputs,
  9 |                                 unsigned int maxBatchSize)
 10 | {
 11 |     IBuilder* builder = createInferBuilder(gLogger);
 12 |     INetworkDefinition* network = builder->createNetwork();
 13 | 
 14 |     ICaffeParser* parser = createCaffeParser();
 15 |     parser->setPluginFactory(&pluginFactory);
 16 | 
 17 |     bool useFp16 = builder->platformHasFastFp16();
 18 |     DataType modelDataType = useFp16 ? DataType::kHALF : DataType::kFLOAT;
 19 | 
 20 |     const IBlobNameToTensor *blobNameToTensor =	parser->parse(deployFile.c_str(),
 21 |                                                               modelFile.c_str(),
 22 |                                                               *network,
 23 |                                                               modelDataType);
 24 | 
 25 |     assert(blobNameToTensor != nullptr);
 26 |     for (auto& s : outputs) network->markOutput(*blobNameToTensor->find(s.c_str()));
 27 | 
 28 |     builder->setMaxBatchSize(maxBatchSize);
 29 |     builder->setMaxWorkspaceSize(16 << 20);
 30 | 
 31 |     if(useFp16) builder->setHalf2Mode(true);
 32 | 
 33 |     ICudaEngine* engine = builder->buildCudaEngine(*network);
 34 |     assert(engine);
 35 | 
 36 |     network->destroy();
 37 |     parser->destroy();
 38 | 
 39 |     gieModelStream = engine->serialize();
 40 |     engine->destroy();
 41 |     builder->destroy();
 42 |     pluginFactory.destroyPlugin();
 43 |     shutdownProtobufLibrary();
 44 | }
 45 | 
 46 | void TensorNet::createInference()
 47 | {
 48 |     infer = createInferRuntime(gLogger);
 49 |     engine = infer->deserializeCudaEngine(gieModelStream->data(), gieModelStream->size(), &pluginFactory);
 50 | 
 51 |     printf("Bindings after deserializing:\n"); 
 52 |     for (int bi = 0; bi < engine->getNbBindings(); bi++) {
 53 |         if (engine->bindingIsInput(bi) == true) printf("Binding %d (%s): Input.\n",  bi, engine->getBindingName(bi));
 54 |         else printf("Binding %d (%s): Output.\n", bi, engine->getBindingName(bi));
 55 |     }
 56 | }
 57 | 
 58 | void TensorNet::imageInference(void** buffers, int nbBuffer, int batchSize)
 59 | {
 60 |     assert(engine->getNbBindings()==nbBuffer);
 61 | 
 62 |     IExecutionContext* context = engine->createExecutionContext();
 63 |     context->setProfiler(&gProfiler);
 64 |     context->execute(batchSize, buffers);
 65 |     context->destroy();
 66 | }
 67 | 
 68 | void TensorNet::timeInference(int iteration, int batchSize)
 69 | {
 70 |     int inputIdx = 0;
 71 |     size_t inputSize = 0;
 72 | 
 73 |     void* buffers[engine->getNbBindings()];
 74 | 
 75 |     for (int b = 0; b < engine->getNbBindings(); b++) {
 76 |         DimsCHW dims = static_cast<DimsCHW&&>(engine->getBindingDimensions(b));
 77 |         size_t size = batchSize * dims.c() * dims.h() * dims.w() * sizeof(float);
 78 |         CHECK(cudaMalloc(&buffers[b], size));
 79 | 
 80 |         if(engine->bindingIsInput(b) == true)
 81 |         {
 82 |             inputIdx = b;
 83 |             inputSize = size;
 84 |         }
 85 |     }
 86 | 
 87 |     IExecutionContext* context = engine->createExecutionContext();
 88 |     context->setProfiler(&gProfiler);
 89 | 
 90 |     CHECK(cudaMemset(buffers[inputIdx], 0, inputSize));
 91 | 
 92 |     for (int i = 0; i < iteration;i++) context->execute(batchSize, buffers);
 93 | 
 94 |     context->destroy();
 95 |     for (int b = 0; b < engine->getNbBindings(); b++) CHECK(cudaFree(buffers[b]));
 96 | }
 97 | 
 98 | DimsCHW TensorNet::getTensorDims(const char* name)
 99 | {
100 |     for (int b = 0; b < engine->getNbBindings(); b++) {
101 |         if( !strcmp(name, engine->getBindingName(b)) )
102 |             return static_cast<DimsCHW&&>(engine->getBindingDimensions(b));
103 |     }
104 |     return DimsCHW{0,0,0};
105 | }
106 | 
107 | void TensorNet::printTimes(int iteration)
108 | {
109 |     gProfiler.printLayerTimes(iteration);
110 | }
111 | 
112 | void TensorNet::destroy()
113 | {
114 |     pluginFactory.destroyPlugin();
115 |     engine->destroy();
116 |     infer->destroy();
117 | }
118 | 


--------------------------------------------------------------------------------
/tensorNet.h:
--------------------------------------------------------------------------------
 1 | #include "pluginImplement.h"
 2 | 
 3 | using namespace nvinfer1;
 4 | using namespace nvcaffeparser1;
 5 | 
 6 | 
 7 | 
 8 | /******************************/
 9 | // TensorRT utility
10 | /******************************/
11 | class Logger : public ILogger
12 | {
13 |     void log(Severity severity, const char* msg) override
14 |     {
15 |         if (severity!=Severity::kINFO) std::cout << msg << std::endl;
16 |     }
17 | };
18 | 
19 | struct Profiler : public IProfiler
20 | {
21 |     typedef std::pair<std::string, float> Record;
22 |     std::vector<Record> mProfile;
23 | 
24 |     virtual void reportLayerTime(const char* layerName, float ms)
25 |     {
26 |         auto record = std::find_if(mProfile.begin(), mProfile.end(), [&](const Record& r){ return r.first == layerName; });
27 | 
28 |         if (record == mProfile.end()) mProfile.push_back(std::make_pair(layerName, ms));
29 |         else record->second += ms;
30 |     }
31 | 
32 |     void printLayerTimes(const int TIMING_ITERATIONS)
33 |     {
34 |         float totalTime = 0;
35 |         for (size_t i = 0; i < mProfile.size(); i++)
36 |         {
37 |             printf("%-40.40s %4.3fms\n", mProfile[i].first.c_str(), mProfile[i].second / TIMING_ITERATIONS);
38 |             totalTime += mProfile[i].second;
39 |         }
40 |         printf("Time over all layers: %4.3f\n", totalTime / TIMING_ITERATIONS);
41 |     }
42 | };
43 | 
44 | 
45 | 
46 | /******************************/
47 | // TensorRT Main
48 | /******************************/
49 | class TensorNet
50 | {
51 | public:
52 |     void caffeToTRTModel(const std::string& deployFile,
53 |                          const std::string& modelFile,
54 |                          const std::vector<std::string>& outputs,
55 |                          unsigned int maxBatchSize);
56 |     void createInference();
57 | 
58 |     void imageInference(void** buffers, int nbBuffer, int batchSize);
59 |     void timeInference(int iteration, int batchSize);
60 | 
61 |     DimsCHW getTensorDims(const char* name);
62 | 
63 |     void printTimes(int iteration);
64 |     void destroy();
65 | 
66 | private:
67 |     PluginFactory pluginFactory;
68 |     IHostMemory *gieModelStream{nullptr};
69 | 
70 |     IRuntime* infer;
71 |     ICudaEngine* engine;
72 | 
73 |     Logger gLogger;
74 |     Profiler gProfiler;
75 | };
76 | 


--------------------------------------------------------------------------------
/util/camera/gst-camera/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 
2 | file(GLOB gstCameraSources *.cpp)
3 | file(GLOB gstCameraIncludes *.h )
4 | 
5 | add_executable(gst-camera ${gstCameraSources})
6 | target_link_libraries(gst-camera jetson-inference)
7 | 


--------------------------------------------------------------------------------
/util/camera/gst-camera/gst-camera.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * inference-101
  3 |  */
  4 | 
  5 | #include "gstCamera.h"
  6 | 
  7 | #include "glDisplay.h"
  8 | #include "glTexture.h"
  9 | 
 10 | #include <stdio.h>
 11 | #include <signal.h>
 12 | #include <unistd.h>
 13 | 
 14 | #include "cudaNormalize.h"
 15 | 
 16 | 
 17 | bool signal_recieved = false;
 18 | 
 19 | void sig_handler(int signo)
 20 | {
 21 | 	if( signo == SIGINT )
 22 | 	{
 23 | 		printf("received SIGINT\n");
 24 | 		signal_recieved = true;
 25 | 	}
 26 | }
 27 | 
 28 | 
 29 | int main( int argc, char** argv )
 30 | {
 31 | 	printf("gst-camera\n  args (%i):  ", argc);
 32 | 
 33 | 	for( int i=0; i < argc; i++ )
 34 | 		printf("%i [%s]  ", i, argv[i]);
 35 | 		
 36 | 	printf("\n");
 37 | 	
 38 | 		
 39 | 	if( signal(SIGINT, sig_handler) == SIG_ERR )
 40 | 		printf("\ncan't catch SIGINT\n");
 41 | 
 42 | 	/*
 43 | 	 * create the camera device
 44 | 	 */
 45 | 	gstCamera* camera = gstCamera::Create();
 46 | 	
 47 | 	if( !camera )
 48 | 	{
 49 | 		printf("\ngst-camera:  failed to initialize video device\n");
 50 | 		return 0;
 51 | 	}
 52 | 	
 53 | 	printf("\ngst-camera:  successfully initialized video device\n");
 54 | 	printf("    width:  %u\n", camera->GetWidth());
 55 | 	printf("   height:  %u\n", camera->GetHeight());
 56 | 	printf("    depth:  %u (bpp)\n", camera->GetPixelDepth());
 57 | 	
 58 | 
 59 | 
 60 | 	/*
 61 | 	 * create openGL window
 62 | 	 */
 63 | 	glDisplay* display = glDisplay::Create();
 64 | 	
 65 | 	if( !display )
 66 | 		printf("\ngst-camera:  failed to create openGL display\n");
 67 | 
 68 | 	const size_t texSz = camera->GetWidth() * camera->GetHeight() * sizeof(float4);
 69 | 	float4* texIn = (float4*)malloc(texSz);
 70 | 
 71 | 	/*if( texIn != NULL )
 72 | 		memset(texIn, 0, texSz);*/
 73 | 
 74 | 	if( texIn != NULL )
 75 | 		for( uint32_t y=0; y < camera->GetHeight(); y++ )
 76 | 			for( uint32_t x=0; x < camera->GetWidth(); x++ )
 77 | 				texIn[y*camera->GetWidth()+x] = make_float4(0.0f, 1.0f, 1.0f, 1.0f);
 78 | 
 79 | 	glTexture* texture = glTexture::Create(camera->GetWidth(), camera->GetHeight(), GL_RGBA32F_ARB/*GL_RGBA8*/, texIn);
 80 | 
 81 | 	if( !texture )
 82 | 		printf("gst-camera:  failed to create openGL texture\n");
 83 | 	
 84 | 	
 85 | 
 86 | 	/*
 87 | 	 * start streaming
 88 | 	 */
 89 | 	if( !camera->Open() )
 90 | 	{
 91 | 		printf("\ngst-camera:  failed to open camera for streaming\n");
 92 | 		return 0;
 93 | 	}
 94 | 	
 95 | 	printf("\ngst-camera:  camera open for streaming\n");
 96 | 	
 97 | 	
 98 | 	while( !signal_recieved )
 99 | 	{
100 | 		void* imgCPU  = NULL;
101 | 		void* imgCUDA = NULL;
102 | 		
103 | 		// get the latest frame
104 | 		if( !camera->Capture(&imgCPU, &imgCUDA, 1000) )
105 | 			printf("\ngst-camera:  failed to capture frame\n");
106 | 		else
107 | 			printf("gst-camera:  recieved new frame  CPU=0x%p  GPU=0x%p\n", imgCPU, imgCUDA);
108 | 		
109 | 		// convert from YUV to RGBA
110 | 		void* imgRGBA = NULL;
111 | 		
112 | 		if( !camera->ConvertRGBA(imgCUDA, &imgRGBA) )
113 | 			printf("gst-camera:  failed to convert from NV12 to RGBA\n");
114 | 
115 | 		// rescale image pixel intensities
116 | 		CUDA(cudaNormalizeRGBA((float4*)imgRGBA, make_float2(0.0f, 255.0f), 
117 | 						   (float4*)imgRGBA, make_float2(0.0f, 1.0f), 
118 |  						   camera->GetWidth(), camera->GetHeight()));
119 | 
120 | 		// update display
121 | 		if( display != NULL )
122 | 		{
123 | 			display->UserEvents();
124 | 			display->BeginRender();
125 | 
126 | 			if( texture != NULL )
127 | 			{
128 | 				void* tex_map = texture->MapCUDA();
129 | 
130 | 				if( tex_map != NULL )
131 | 				{
132 | 					cudaMemcpy(tex_map, imgRGBA, texture->GetSize(), cudaMemcpyDeviceToDevice);
133 | 					CUDA(cudaDeviceSynchronize());
134 | 
135 | 					texture->Unmap();
136 | 				}
137 | 				//texture->UploadCPU(texIn);
138 | 
139 | 				texture->Render(100,100);		
140 | 			}
141 | 
142 | 			display->EndRender();
143 | 		}
144 | 	}
145 | 	
146 | 	printf("\ngst-camera:  un-initializing video device\n");
147 | 	
148 | 	
149 | 	/*
150 | 	 * shutdown the camera device
151 | 	 */
152 | 	if( camera != NULL )
153 | 	{
154 | 		delete camera;
155 | 		camera = NULL;
156 | 	}
157 | 
158 | 	if( display != NULL )
159 | 	{
160 | 		delete display;
161 | 		display = NULL;
162 | 	}
163 | 	
164 | 	printf("gst-camera:  video device has been un-initialized.\n");
165 | 	printf("gst-camera:  this concludes the test of the video device.\n");
166 | 	return 0;
167 | }
168 | 


--------------------------------------------------------------------------------
/util/camera/gstCamera.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * inference-101
  3 |  */
  4 | 
  5 | #include "gstCamera.h"
  6 | #include "gstUtility.h"
  7 | 
  8 | #include <gst/gst.h>
  9 | #include <gst/app/gstappsink.h>
 10 | 
 11 | #include <sstream> 
 12 | #include <unistd.h>
 13 | #include <string.h>
 14 | 
 15 | #include <QMutex>
 16 | #include <QWaitCondition>
 17 | 
 18 | #include "cudaMappedMemory.h"
 19 | #include "cudaYUV.h"
 20 | #include "cudaRGB.h"
 21 | 
 22 | 
 23 | 
 24 | // constructor
 25 | gstCamera::gstCamera()
 26 | {	
 27 | 	mAppSink    = NULL;
 28 | 	mBus        = NULL;
 29 | 	mPipeline   = NULL;	
 30 | 	mV4L2Device = -1;
 31 | 	
 32 | 	mWidth  = 0;
 33 | 	mHeight = 0;
 34 | 	mDepth  = 0;
 35 | 	mSize   = 0;
 36 | 	
 37 | 	mWaitEvent  = new QWaitCondition();
 38 | 	mWaitMutex  = new QMutex();
 39 | 	mRingMutex  = new QMutex();
 40 | 	
 41 | 	mLatestRGBA       = 0;
 42 | 	mLatestRingbuffer = 0;
 43 | 	mLatestRetrieved  = false;
 44 | 	
 45 | 	for( uint32_t n=0; n < NUM_RINGBUFFERS; n++ )
 46 | 	{
 47 | 		mRingbufferCPU[n] = NULL;
 48 | 		mRingbufferGPU[n] = NULL;
 49 | 		mRGBA[n]          = NULL;
 50 | 	}
 51 | }
 52 | 
 53 | 
 54 | // destructor	
 55 | gstCamera::~gstCamera()
 56 | {
 57 | 	
 58 | }
 59 | 
 60 | 
 61 | // ConvertRGBA
 62 | bool gstCamera::ConvertRGBA( void* input, void** output, bool zeroCopy )
 63 | {
 64 | 	if( !input || !output )
 65 | 		return false;
 66 | 	
 67 | 	if( !mRGBA[0] )
 68 | 	{
 69 | 		const size_t size = mWidth * mHeight * sizeof(float4);
 70 | 
 71 | 		for( uint32_t n=0; n < NUM_RINGBUFFERS; n++ )
 72 | 		{
 73 | 			if( zeroCopy )
 74 | 			{
 75 | 				void* cpuPtr = NULL;
 76 | 				void* gpuPtr = NULL;
 77 | 
 78 | 				if( !cudaAllocMapped(&cpuPtr, &gpuPtr, size) )
 79 | 				{
 80 | 					printf(LOG_CUDA "gstCamera -- failed to allocate zeroCopy memory for %ux%xu RGBA texture\n", mWidth, mHeight);
 81 | 					return false;
 82 | 				}
 83 | 
 84 | 				if( cpuPtr != gpuPtr )
 85 | 				{
 86 | 					printf(LOG_CUDA "gstCamera -- zeroCopy memory has different pointers, please use a UVA-compatible GPU\n");
 87 | 					return false;
 88 | 				}
 89 | 
 90 | 				mRGBA[n] = gpuPtr;
 91 | 			}
 92 | 			else
 93 | 			{
 94 | 				if( CUDA_FAILED(cudaMalloc(&mRGBA[n], size)) )
 95 | 				{
 96 | 					printf(LOG_CUDA "gstCamera -- failed to allocate memory for %ux%u RGBA texture\n", mWidth, mHeight);
 97 | 					return false;
 98 | 				}
 99 | 			}
100 | 		}
101 | 		
102 | 		printf(LOG_CUDA "gstreamer camera -- allocated %u RGBA ringbuffers\n", NUM_RINGBUFFERS);
103 | 	}
104 | 	
105 | 	if( onboardCamera() )
106 | 	{
107 | 		// onboard camera is NV12
108 | 		if( CUDA_FAILED(cudaNV12ToRGBAf((uint8_t*)input, (float4*)mRGBA[mLatestRGBA], mWidth, mHeight)) )
109 | 			return false;
110 | 	}
111 | 	else
112 | 	{
113 | 		// USB webcam is RGB
114 | 		if( CUDA_FAILED(cudaRGBToRGBAf((uchar3*)input, (float4*)mRGBA[mLatestRGBA], mWidth, mHeight)) )
115 | 			return false;
116 | 	}
117 | 	
118 | 	*output     = mRGBA[mLatestRGBA];
119 | 	mLatestRGBA = (mLatestRGBA + 1) % NUM_RINGBUFFERS;
120 | 	return true;
121 | }
122 | 
123 | 
124 | // onEOS
125 | void gstCamera::onEOS(_GstAppSink* sink, void* user_data)
126 | {
127 | 	printf(LOG_GSTREAMER "gstreamer decoder onEOS\n");
128 | }
129 | 
130 | 
131 | // onPreroll
132 | GstFlowReturn gstCamera::onPreroll(_GstAppSink* sink, void* user_data)
133 | {
134 | 	printf(LOG_GSTREAMER "gstreamer decoder onPreroll\n");
135 | 	return GST_FLOW_OK;
136 | }
137 | 
138 | 
139 | // onBuffer
140 | GstFlowReturn gstCamera::onBuffer(_GstAppSink* sink, void* user_data)
141 | {
142 | 	//printf(LOG_GSTREAMER "gstreamer decoder onBuffer\n");
143 | 	
144 | 	if( !user_data )
145 | 		return GST_FLOW_OK;
146 | 		
147 | 	gstCamera* dec = (gstCamera*)user_data;
148 | 	
149 | 	dec->checkBuffer();
150 | 	dec->checkMsgBus();
151 | 	return GST_FLOW_OK;
152 | }
153 | 	
154 | 
155 | // Capture
156 | bool gstCamera::Capture( void** cpu, void** cuda, unsigned long timeout )
157 | {
158 | 	mWaitMutex->lock();
159 |     const bool wait_result = mWaitEvent->wait(mWaitMutex, timeout);
160 |     mWaitMutex->unlock();
161 | 	
162 | 	if( !wait_result )
163 | 		return false;
164 | 	
165 | 	mRingMutex->lock();
166 | 	const uint32_t latest = mLatestRingbuffer;
167 | 	const bool retrieved = mLatestRetrieved;
168 | 	mLatestRetrieved = true;
169 | 	mRingMutex->unlock();
170 | 	
171 | 	// skip if it was already retrieved
172 | 	if( retrieved )
173 | 		return false;
174 | 	
175 | 	if( cpu != NULL )
176 | 		*cpu = mRingbufferCPU[latest];
177 | 	
178 | 	if( cuda != NULL )
179 | 		*cuda = mRingbufferGPU[latest];
180 | 	
181 | 	return true;
182 | }
183 | 
184 | 
185 | #define release_return { gst_sample_unref(gstSample); return; }
186 | 
187 | 
188 | // checkBuffer
189 | void gstCamera::checkBuffer()
190 | {
191 | 	if( !mAppSink )
192 | 		return;
193 | 
194 | 	// block waiting for the buffer
195 | 	GstSample* gstSample = gst_app_sink_pull_sample(mAppSink);
196 | 	
197 | 	if( !gstSample )
198 | 	{
199 | 		printf(LOG_GSTREAMER "gstreamer camera -- gst_app_sink_pull_sample() returned NULL...\n");
200 | 		return;
201 | 	}
202 | 	
203 | 	GstBuffer* gstBuffer = gst_sample_get_buffer(gstSample);
204 | 	
205 | 	if( !gstBuffer )
206 | 	{
207 | 		printf(LOG_GSTREAMER "gstreamer camera -- gst_sample_get_buffer() returned NULL...\n");
208 | 		return;
209 | 	}
210 | 	
211 | 	// retrieve
212 | 	GstMapInfo map; 
213 | 
214 | 	if(	!gst_buffer_map(gstBuffer, &map, GST_MAP_READ) ) 
215 | 	{
216 | 		printf(LOG_GSTREAMER "gstreamer camera -- gst_buffer_map() failed...\n");
217 | 		return;
218 | 	}
219 | 	
220 | 	//gst_util_dump_mem(map.data, map.size); 
221 | 
222 | 	void* gstData = map.data; //GST_BUFFER_DATA(gstBuffer);
223 | 	const uint32_t gstSize = map.size; //GST_BUFFER_SIZE(gstBuffer);
224 | 	
225 | 	if( !gstData )
226 | 	{
227 | 		printf(LOG_GSTREAMER "gstreamer camera -- gst_buffer had NULL data pointer...\n");
228 | 		release_return;
229 | 	}
230 | 	
231 | 	// retrieve caps
232 | 	GstCaps* gstCaps = gst_sample_get_caps(gstSample);
233 | 	
234 | 	if( !gstCaps )
235 | 	{
236 | 		printf(LOG_GSTREAMER "gstreamer camera -- gst_buffer had NULL caps...\n");
237 | 		release_return;
238 | 	}
239 | 	
240 | 	GstStructure* gstCapsStruct = gst_caps_get_structure(gstCaps, 0);
241 | 	
242 | 	if( !gstCapsStruct )
243 | 	{
244 | 		printf(LOG_GSTREAMER "gstreamer camera -- gst_caps had NULL structure...\n");
245 | 		release_return;
246 | 	}
247 | 	
248 | 	// get width & height of the buffer
249 | 	int width  = 0;
250 | 	int height = 0;
251 | 	
252 | 	if( !gst_structure_get_int(gstCapsStruct, "width", &width) ||
253 | 		!gst_structure_get_int(gstCapsStruct, "height", &height) )
254 | 	{
255 | 		printf(LOG_GSTREAMER "gstreamer camera -- gst_caps missing width/height...\n");
256 | 		release_return;
257 | 	}
258 | 	
259 | 	if( width < 1 || height < 1 )
260 | 		release_return;
261 | 	
262 | 	mWidth  = width;
263 | 	mHeight = height;
264 | 	mDepth  = (gstSize * 8) / (width * height);
265 | 	mSize   = gstSize;
266 | 	
267 | 	//printf(LOG_GSTREAMER "gstreamer camera recieved %ix%i frame (%u bytes, %u bpp)\n", width, height, gstSize, mDepth);
268 | 	
269 | 	// make sure ringbuffer is allocated
270 | 	if( !mRingbufferCPU[0] )
271 | 	{
272 | 		for( uint32_t n=0; n < NUM_RINGBUFFERS; n++ )
273 | 		{
274 | 			if( !cudaAllocMapped(&mRingbufferCPU[n], &mRingbufferGPU[n], gstSize) )
275 | 				printf(LOG_CUDA "gstreamer camera -- failed to allocate ringbuffer %u  (size=%u)\n", n, gstSize);
276 | 		}
277 | 		
278 | 		printf(LOG_CUDA "gstreamer camera -- allocated %u ringbuffers, %u bytes each\n", NUM_RINGBUFFERS, gstSize);
279 | 	}
280 | 	
281 | 	// copy to next ringbuffer
282 | 	const uint32_t nextRingbuffer = (mLatestRingbuffer + 1) % NUM_RINGBUFFERS;		
283 | 	
284 | 	//printf(LOG_GSTREAMER "gstreamer camera -- using ringbuffer #%u for next frame\n", nextRingbuffer);
285 | 	memcpy(mRingbufferCPU[nextRingbuffer], gstData, gstSize);
286 | 	gst_buffer_unmap(gstBuffer, &map); 
287 | 	//gst_buffer_unref(gstBuffer);
288 | 	gst_sample_unref(gstSample);
289 | 	
290 | 	
291 | 	// update and signal sleeping threads
292 | 	mRingMutex->lock();
293 | 	mLatestRingbuffer = nextRingbuffer;
294 | 	mLatestRetrieved  = false;
295 | 	mRingMutex->unlock();
296 | 	mWaitEvent->wakeAll();
297 | }
298 | 
299 | 
300 | 
301 | // buildLaunchStr
302 | bool gstCamera::buildLaunchStr()
303 | {
304 | 	// gst-launch-1.0 nvcamerasrc fpsRange="30.0 30.0" ! 'video/x-raw(memory:NVMM), width=(int)1920, height=(int)1080, format=(string)I420, framerate=(fraction)30/1' ! \
305 | 	// nvvidconv flip-method=2 ! 'video/x-raw(memory:NVMM), format=(string)I420' ! fakesink silent=false -v
306 | 	std::ostringstream ss;
307 | 	
308 | //#define CAPS_STR "video/x-raw(memory:NVMM), width=(int)2592, height=(int)1944, format=(string)I420, framerate=(fraction)30/1"
309 | //#define CAPS_STR "video/x-raw(memory:NVMM), width=(int)1920, height=(int)1080, format=(string)I420, framerate=(fraction)30/1"
310 | 
311 | 	if( onboardCamera() )
312 | 	{
313 | 		ss << "nvcamerasrc fpsRange=\"30.0 30.0\" ! video/x-raw(memory:NVMM), width=(int)" << mWidth << ", height=(int)" << mHeight << ", format=(string)NV12 ! nvvidconv flip-method=2 ! "; //'video/x-raw(memory:NVMM), width=(int)1920, height=(int)1080, format=(string)I420, framerate=(fraction)30/1' ! ";
314 | 		ss << "video/x-raw ! appsink name=mysink";
315 | 	}
316 | 	else
317 | 	{
318 | 		ss << "v4l2src device=/dev/video" << mV4L2Device << " ! ";
319 | 		ss << "video/x-raw, width=(int)" << mWidth << ", height=(int)" << mHeight << ", "; 
320 | 		ss << "format=RGB ! videoconvert ! video/x-raw, format=RGB ! videoconvert !";
321 | 		ss << "appsink name=mysink";
322 | 	}
323 | 	
324 | 	mLaunchStr = ss.str();
325 | 
326 | 	printf(LOG_GSTREAMER "gstreamer decoder pipeline string:\n");
327 | 	printf("%s\n", mLaunchStr.c_str());
328 | 	return true;
329 | }
330 | 
331 | 
332 | // Create
333 | gstCamera* gstCamera::Create( uint32_t width, uint32_t height, int v4l2_device )
334 | {
335 | 	if( !gstreamerInit() )
336 | 	{
337 | 		printf(LOG_GSTREAMER "failed to initialize gstreamer API\n");
338 | 		return NULL;
339 | 	}
340 | 	
341 | 	gstCamera* cam = new gstCamera();
342 | 	
343 | 	if( !cam )
344 | 		return NULL;
345 | 	
346 | 	cam->mV4L2Device = v4l2_device;
347 | 	cam->mWidth      = width;
348 | 	cam->mHeight     = height;
349 | 	cam->mDepth      = cam->onboardCamera() ? 12 : 24;	// NV12 or RGB
350 | 	cam->mSize       = (width * height * cam->mDepth) / 8;
351 | 
352 | 	if( !cam->init() )
353 | 	{
354 | 		printf(LOG_GSTREAMER "failed to init gstCamera\n");
355 | 		return NULL;
356 | 	}
357 | 	
358 | 	return cam;
359 | }
360 | 
361 | 
362 | // Create
363 | gstCamera* gstCamera::Create( int v4l2_device )
364 | {
365 | 	return Create( DefaultWidth, DefaultHeight, v4l2_device );
366 | }
367 | 
368 | 
369 | // init
370 | bool gstCamera::init()
371 | {
372 | 	GError* err = NULL;
373 | 
374 | 	// build pipeline string
375 | 	if( !buildLaunchStr() )
376 | 	{
377 | 		printf(LOG_GSTREAMER "gstreamer decoder failed to build pipeline string\n");
378 | 		return false;
379 | 	}
380 | 
381 | 	// launch pipeline
382 | 	mPipeline = gst_parse_launch(mLaunchStr.c_str(), &err);
383 | 
384 | 	if( err != NULL )
385 | 	{
386 | 		printf(LOG_GSTREAMER "gstreamer decoder failed to create pipeline\n");
387 | 		printf(LOG_GSTREAMER "   (%s)\n", err->message);
388 | 		g_error_free(err);
389 | 		return false;
390 | 	}
391 | 
392 | 	GstPipeline* pipeline = GST_PIPELINE(mPipeline);
393 | 
394 | 	if( !pipeline )
395 | 	{
396 | 		printf(LOG_GSTREAMER "gstreamer failed to cast GstElement into GstPipeline\n");
397 | 		return false;
398 | 	}	
399 | 
400 | 	// retrieve pipeline bus
401 | 	/*GstBus**/ mBus = gst_pipeline_get_bus(pipeline);
402 | 
403 | 	if( !mBus )
404 | 	{
405 | 		printf(LOG_GSTREAMER "gstreamer failed to retrieve GstBus from pipeline\n");
406 | 		return false;
407 | 	}
408 | 
409 | 	// add watch for messages (disabled when we poll the bus ourselves, instead of gmainloop)
410 | 	//gst_bus_add_watch(mBus, (GstBusFunc)gst_message_print, NULL);
411 | 
412 | 	// get the appsrc
413 | 	GstElement* appsinkElement = gst_bin_get_by_name(GST_BIN(pipeline), "mysink");
414 | 	GstAppSink* appsink = GST_APP_SINK(appsinkElement);
415 | 
416 | 	if( !appsinkElement || !appsink)
417 | 	{
418 | 		printf(LOG_GSTREAMER "gstreamer failed to retrieve AppSink element from pipeline\n");
419 | 		return false;
420 | 	}
421 | 	
422 | 	mAppSink = appsink;
423 | 	
424 | 	// setup callbacks
425 | 	GstAppSinkCallbacks cb;
426 | 	memset(&cb, 0, sizeof(GstAppSinkCallbacks));
427 | 	
428 | 	cb.eos         = onEOS;
429 | 	cb.new_preroll = onPreroll;
430 | 	cb.new_sample  = onBuffer;
431 | 	
432 | 	gst_app_sink_set_callbacks(mAppSink, &cb, (void*)this, NULL);
433 | 	
434 | 	return true;
435 | }
436 | 
437 | 
438 | // Open
439 | bool gstCamera::Open()
440 | {
441 | 	// transition pipline to STATE_PLAYING
442 | 	printf(LOG_GSTREAMER "gstreamer transitioning pipeline to GST_STATE_PLAYING\n");
443 | 	
444 | 	const GstStateChangeReturn result = gst_element_set_state(mPipeline, GST_STATE_PLAYING);
445 | 
446 | 	if( result == GST_STATE_CHANGE_ASYNC )
447 | 	{
448 | #if 0
449 | 		GstMessage* asyncMsg = gst_bus_timed_pop_filtered(mBus, 5 * GST_SECOND, 
450 |     	 					      (GstMessageType)(GST_MESSAGE_ASYNC_DONE|GST_MESSAGE_ERROR)); 
451 | 
452 | 		if( asyncMsg != NULL )
453 | 		{
454 | 			gst_message_print(mBus, asyncMsg, this);
455 | 			gst_message_unref(asyncMsg);
456 | 		}
457 | 		else
458 | 			printf(LOG_GSTREAMER "gstreamer NULL message after transitioning pipeline to PLAYING...\n");
459 | #endif
460 | 	}
461 | 	else if( result != GST_STATE_CHANGE_SUCCESS )
462 | 	{
463 | 		printf(LOG_GSTREAMER "gstreamer failed to set pipeline state to PLAYING (error %u)\n", result);
464 | 		return false;
465 | 	}
466 | 
467 | 	checkMsgBus();
468 | 	usleep(100*1000);
469 | 	checkMsgBus();
470 | 
471 | 	return true;
472 | }
473 | 	
474 | 
475 | // Close
476 | void gstCamera::Close()
477 | {
478 | 	// stop pipeline
479 | 	printf(LOG_GSTREAMER "gstreamer transitioning pipeline to GST_STATE_NULL\n");
480 | 
481 | 	const GstStateChangeReturn result = gst_element_set_state(mPipeline, GST_STATE_NULL);
482 | 
483 | 	if( result != GST_STATE_CHANGE_SUCCESS )
484 | 		printf(LOG_GSTREAMER "gstreamer failed to set pipeline state to PLAYING (error %u)\n", result);
485 | 
486 | 	usleep(250*1000);
487 | }
488 | 
489 | 
490 | // checkMsgBus
491 | void gstCamera::checkMsgBus()
492 | {
493 | 	while(true)
494 | 	{
495 | 		GstMessage* msg = gst_bus_pop(mBus);
496 | 
497 | 		if( !msg )
498 | 			break;
499 | 
500 | 		gst_message_print(mBus, msg, this);
501 | 		gst_message_unref(msg);
502 | 	}
503 | }
504 | 


--------------------------------------------------------------------------------
/util/camera/gstCamera.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * inference-101
 3 |  */
 4 | 
 5 | #ifndef __GSTREAMER_CAMERA_H__
 6 | #define __GSTREAMER_CAMERA_H__
 7 | 
 8 | #include <gst/gst.h>
 9 | #include <string>
10 | 
11 | 
12 | struct _GstAppSink;
13 | class QWaitCondition;
14 | class QMutex;
15 | 
16 | 
17 | /**
18 |  * gstreamer CSI camera using nvcamerasrc (or optionally v4l2src)
19 |  * @ingroup util
20 |  */
21 | class gstCamera
22 | {
23 | public:
24 | 	// Create camera
25 | 	static gstCamera* Create( int v4l2_device=-1 );	// use onboard camera by default (>=0 for V4L2)
26 | 	static gstCamera* Create( uint32_t width, uint32_t height, int v4l2_device=-1 );
27 | 	
28 | 	// Destroy
29 | 	~gstCamera();
30 | 
31 | 	// Start/stop streaming
32 | 	bool Open();
33 | 	void Close();
34 | 	
35 | 	// Capture YUV (NV12)
36 | 	bool Capture( void** cpu, void** cuda, unsigned long timeout=ULONG_MAX );
37 | 	
38 | 	// Takes in captured YUV-NV12 CUDA image, converts to float4 RGBA (with pixel intensity 0-255)
39 | 	// Set zeroCopy to true if you need to access ConvertRGBA from CPU, otherwise it will be CUDA only.
40 | 	bool ConvertRGBA( void* input, void** output, bool zeroCopy=false );
41 | 	
42 | 	// Image dimensions
43 | 	inline uint32_t GetWidth() const	  { return mWidth; }
44 | 	inline uint32_t GetHeight() const	  { return mHeight; }
45 | 	inline uint32_t GetPixelDepth() const { return mDepth; }
46 | 	inline uint32_t GetSize() const		  { return mSize; }
47 | 	
48 | 	// Default resolution, unless otherwise specified during Create()
49 | 	static const uint32_t DefaultWidth  = 1280;
50 | 	static const uint32_t DefaultHeight = 720;
51 | 	
52 | private:
53 | 	static void onEOS(_GstAppSink* sink, void* user_data);
54 | 	static GstFlowReturn onPreroll(_GstAppSink* sink, void* user_data);
55 | 	static GstFlowReturn onBuffer(_GstAppSink* sink, void* user_data);
56 | 
57 | 	gstCamera();
58 | 	
59 | 	bool init();
60 | 	bool buildLaunchStr();
61 | 	void checkMsgBus();
62 | 	void checkBuffer();
63 | 	
64 | 	_GstBus*     mBus;
65 | 	_GstAppSink* mAppSink;
66 | 	_GstElement* mPipeline;
67 | 
68 | 	std::string  mLaunchStr;
69 | 	
70 | 	uint32_t mWidth;
71 | 	uint32_t mHeight;
72 | 	uint32_t mDepth;
73 | 	uint32_t mSize;
74 | 	
75 | 	static const uint32_t NUM_RINGBUFFERS = 16;
76 | 	
77 | 	void* mRingbufferCPU[NUM_RINGBUFFERS];
78 | 	void* mRingbufferGPU[NUM_RINGBUFFERS];
79 | 	
80 | 	QWaitCondition* mWaitEvent;
81 | 	
82 | 	QMutex* mWaitMutex;
83 | 	QMutex* mRingMutex;
84 | 	
85 | 	uint32_t mLatestRGBA;
86 | 	uint32_t mLatestRingbuffer;
87 | 	bool     mLatestRetrieved;
88 | 	
89 | 	void* mRGBA[NUM_RINGBUFFERS];
90 | 	int   mV4L2Device;	// -1 for onboard, >=0 for V4L2 device
91 | 	
92 | 	inline bool onboardCamera() const		{ return (mV4L2Device < 0); }
93 | };
94 | 
95 | #endif
96 | 


--------------------------------------------------------------------------------
/util/camera/gstUtility.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * inference-101
  3 |  */
  4 | 
  5 | #include "gstUtility.h"
  6 | 
  7 | #include <gst/gst.h>
  8 | #include <stdint.h>
  9 | #include <stdio.h>
 10 | 
 11 | 
 12 | inline const char* gst_debug_level_str( GstDebugLevel level )
 13 | {
 14 | 	switch (level)
 15 | 	{
 16 | 		case GST_LEVEL_NONE:	return "GST_LEVEL_NONE   ";
 17 | 		case GST_LEVEL_ERROR:	return "GST_LEVEL_ERROR  ";
 18 | 		case GST_LEVEL_WARNING:	return "GST_LEVEL_WARNING";
 19 | 		case GST_LEVEL_INFO:	return "GST_LEVEL_INFO   ";
 20 | 		case GST_LEVEL_DEBUG:	return "GST_LEVEL_DEBUG  ";
 21 | 		case GST_LEVEL_LOG:		return "GST_LEVEL_LOG    ";
 22 | 		case GST_LEVEL_FIXME:	return "GST_LEVEL_FIXME  ";
 23 | #ifdef GST_LEVEL_TRACE
 24 | 		case GST_LEVEL_TRACE:	return "GST_LEVEL_TRACE  ";
 25 | #endif
 26 | 		case GST_LEVEL_MEMDUMP:	return "GST_LEVEL_MEMDUMP";
 27 |     		default:				return "<unknown>        ";
 28 |     }
 29 | }
 30 | 
 31 | #define SEP "              "
 32 | 
 33 | void rilog_debug_function(GstDebugCategory* category, GstDebugLevel level,
 34 |                           const gchar* file, const char* function,
 35 |                           gint line, GObject* object, GstDebugMessage* message,
 36 |                           gpointer data)
 37 | {
 38 | 	if( level > GST_LEVEL_WARNING /*GST_LEVEL_INFO*/ )
 39 | 		return;
 40 | 
 41 | 	//gchar* name = NULL;
 42 | 	//if( object != NULL )
 43 | 	//	g_object_get(object, "name", &name, NULL);
 44 | 
 45 | 	const char* typeName  = " ";
 46 | 	const char* className = " ";
 47 | 
 48 | 	if( object != NULL )
 49 | 	{
 50 | 		typeName  = G_OBJECT_TYPE_NAME(object);
 51 | 		className = G_OBJECT_CLASS_NAME(object);
 52 | 	}
 53 | 
 54 | 	printf(LOG_GSTREAMER "%s %s %s\n" SEP "%s:%i  %s\n" SEP "%s\n", 
 55 | 		  	gst_debug_level_str(level), typeName,
 56 | 		  	gst_debug_category_get_name(category), file, line, function, 
 57 |             	gst_debug_message_get(message));
 58 | 
 59 | }
 60 | 
 61 | 
 62 | bool gstreamerInit()
 63 | {
 64 | 	int argc = 0;
 65 | 	//char* argv[] = { "none" };
 66 | 
 67 | 	if( !gst_init_check(&argc, NULL, NULL) )
 68 | 	{
 69 | 		printf(LOG_GSTREAMER "failed to initialize gstreamer library with gst_init()\n");
 70 | 		return false;
 71 | 	}
 72 | 
 73 | 	uint32_t ver[] = { 0, 0, 0, 0 };
 74 | 	gst_version( &ver[0], &ver[1], &ver[2], &ver[3] );
 75 | 
 76 | 	printf(LOG_GSTREAMER "initialized gstreamer, version %u.%u.%u.%u\n", ver[0], ver[1], ver[2], ver[3]);
 77 | 
 78 | 
 79 | 	// debugging
 80 | 	gst_debug_remove_log_function(gst_debug_log_default);
 81 | 	
 82 | 	if( true )
 83 | 	{
 84 | 		gst_debug_add_log_function(rilog_debug_function, NULL, NULL);
 85 | 
 86 | 		gst_debug_set_active(true);
 87 | 		gst_debug_set_colored(false);
 88 | 	}
 89 | 	
 90 | 	return true;
 91 | }
 92 | //---------------------------------------------------------------------------------------------
 93 | 
 94 | static void gst_print_one_tag(const GstTagList * list, const gchar * tag, gpointer user_data)
 95 | {
 96 |   int i, num;
 97 | 
 98 |   num = gst_tag_list_get_tag_size (list, tag);
 99 |   for (i = 0; i < num; ++i) {
100 |     const GValue *val;
101 | 
102 |     /* Note: when looking for specific tags, use the gst_tag_list_get_xyz() API,
103 |      * we only use the GValue approach here because it is more generic */
104 |     val = gst_tag_list_get_value_index (list, tag, i);
105 |     if (G_VALUE_HOLDS_STRING (val)) {
106 |       printf("\t%20s : %s\n", tag, g_value_get_string (val));
107 |     } else if (G_VALUE_HOLDS_UINT (val)) {
108 |       printf("\t%20s : %u\n", tag, g_value_get_uint (val));
109 |     } else if (G_VALUE_HOLDS_DOUBLE (val)) {
110 |       printf("\t%20s : %g\n", tag, g_value_get_double (val));
111 |     } else if (G_VALUE_HOLDS_BOOLEAN (val)) {
112 |       printf("\t%20s : %s\n", tag,
113 |           (g_value_get_boolean (val)) ? "true" : "false");
114 |     } else if (GST_VALUE_HOLDS_BUFFER (val)) {
115 |       //GstBuffer *buf = gst_value_get_buffer (val);
116 |       //guint buffer_size = GST_BUFFER_SIZE(buf);
117 | 
118 |       printf("\t%20s : buffer of size %u\n", tag, /*buffer_size*/0);
119 |     } /*else if (GST_VALUE_HOLDS_DATE_TIME (val)) {
120 |       GstDateTime *dt = (GstDateTime*)g_value_get_boxed (val);
121 |       gchar *dt_str = gst_date_time_to_iso8601_string (dt);
122 | 
123 |       printf("\t%20s : %s\n", tag, dt_str);
124 |       g_free (dt_str);
125 |     }*/ else {
126 |       printf("\t%20s : tag of type '%s'\n", tag, G_VALUE_TYPE_NAME (val));
127 |     }
128 |   }
129 | }
130 | 
131 | static const char* gst_stream_status_string( GstStreamStatusType status )
132 | {
133 | 	switch(status)
134 | 	{
135 | 		case GST_STREAM_STATUS_TYPE_CREATE:	return "CREATE";
136 | 		case GST_STREAM_STATUS_TYPE_ENTER:		return "ENTER";
137 | 		case GST_STREAM_STATUS_TYPE_LEAVE:		return "LEAVE";
138 | 		case GST_STREAM_STATUS_TYPE_DESTROY:	return "DESTROY";
139 | 		case GST_STREAM_STATUS_TYPE_START:		return "START";
140 | 		case GST_STREAM_STATUS_TYPE_PAUSE:		return "PAUSE";
141 | 		case GST_STREAM_STATUS_TYPE_STOP:		return "STOP";
142 | 		default:							return "UNKNOWN";
143 | 	}
144 | }
145 | 
146 | // gst_message_print
147 | gboolean gst_message_print(GstBus* bus, GstMessage* message, gpointer user_data)
148 | {
149 | 
150 | 	switch (GST_MESSAGE_TYPE (message)) 
151 | 	{
152 | 		case GST_MESSAGE_ERROR: 
153 | 		{
154 | 			GError *err = NULL;
155 | 			gchar *dbg_info = NULL;
156 |  
157 | 			gst_message_parse_error (message, &err, &dbg_info);
158 | 			printf(LOG_GSTREAMER "gstreamer %s ERROR %s\n", GST_OBJECT_NAME (message->src), err->message);
159 |         		printf(LOG_GSTREAMER "gstreamer Debugging info: %s\n", (dbg_info) ? dbg_info : "none");
160 |         
161 | 			g_error_free(err);
162 |         		g_free(dbg_info);
163 | 			//g_main_loop_quit (app->loop);
164 |         		break;
165 | 		}
166 | 		case GST_MESSAGE_EOS:
167 | 		{
168 | 			printf(LOG_GSTREAMER "gstreamer %s recieved EOS signal...\n", GST_OBJECT_NAME(message->src));
169 | 			//g_main_loop_quit (app->loop);		// TODO trigger plugin Close() upon error
170 | 			break;
171 | 		}
172 | 		case GST_MESSAGE_STATE_CHANGED:
173 | 		{
174 | 			GstState old_state, new_state;
175 |     
176 | 			gst_message_parse_state_changed(message, &old_state, &new_state, NULL);
177 | 			
178 | 			printf(LOG_GSTREAMER "gstreamer changed state from %s to %s ==> %s\n",
179 | 							gst_element_state_get_name(old_state),
180 | 							gst_element_state_get_name(new_state),
181 | 						     GST_OBJECT_NAME(message->src));
182 | 			break;
183 | 		}
184 | 		case GST_MESSAGE_STREAM_STATUS:
185 | 		{
186 | 			GstStreamStatusType streamStatus;
187 | 			gst_message_parse_stream_status(message, &streamStatus, NULL);
188 | 			
189 | 			printf(LOG_GSTREAMER "gstreamer stream status %s ==> %s\n",
190 | 							gst_stream_status_string(streamStatus), 
191 | 							GST_OBJECT_NAME(message->src));
192 | 			break;
193 | 		}
194 | 		case GST_MESSAGE_TAG: 
195 | 		{
196 | 			GstTagList *tags = NULL;
197 | 
198 | 			gst_message_parse_tag(message, &tags);
199 | 
200 | #ifdef gst_tag_list_to_string
201 | 			gchar* txt = gst_tag_list_to_string(tags);
202 | #else
203 | 			gchar* txt = "missing gst_tag_list_to_string()";
204 | #endif
205 | 
206 | 			printf(LOG_GSTREAMER "gstreamer %s %s\n", GST_OBJECT_NAME(message->src), txt);
207 | 
208 | 			g_free(txt);			
209 | 			//gst_tag_list_foreach(tags, gst_print_one_tag, NULL);
210 | 			gst_tag_list_free(tags);
211 | 			break;
212 | 		}
213 | 		default:
214 | 		{
215 | 			printf(LOG_GSTREAMER "gstreamer msg %s ==> %s\n", gst_message_type_get_name(GST_MESSAGE_TYPE(message)), GST_OBJECT_NAME(message->src));
216 | 			break;
217 | 		}
218 | 	}
219 | 
220 | 	return TRUE;
221 | }
222 | 
223 | 


--------------------------------------------------------------------------------
/util/camera/gstUtility.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * inference-101
 3 |  */
 4 | 
 5 | #ifndef __GSTREAMER_UTILITY_H__
 6 | #define __GSTREAMER_UTILITY_H__
 7 | 
 8 | 
 9 | #include <gst/gst.h>
10 | 
11 | 
12 | /**
13 |  * LOG_GSTREAMER printf prefix
14 |  * @ingroup util
15 |  */
16 | #define LOG_GSTREAMER "[gstreamer] "
17 | 
18 | 
19 | /**
20 |  * gstreamerInit
21 |  * @ingroup util
22 |  */
23 | bool gstreamerInit();
24 | 
25 | 
26 | /**
27 |  * gst_message_print
28 |  * @ingroup util
29 |  */
30 | gboolean gst_message_print(_GstBus* bus, _GstMessage* message, void* user_data);
31 | 
32 | 
33 | 
34 | #endif
35 | 
36 | 


--------------------------------------------------------------------------------
/util/camera/v4l2-console/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 
2 | file(GLOB v4l2ConsoleSources *.cpp)
3 | file(GLOB v4l2ConsoleIncludes *.h )
4 | 
5 | add_executable(v4l2-console ${v4l2ConsoleSources})
6 | target_link_libraries(v4l2-console jetson-inference)
7 | 


--------------------------------------------------------------------------------
/util/camera/v4l2-console/v4l2-console.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * inference-101
  3 |  */
  4 | 
  5 | #include "v4l2Camera.h"
  6 | 
  7 | #include <stdio.h>
  8 | #include <signal.h>
  9 | //#include <unistd.h>
 10 | #include <QImage>
 11 | 
 12 | 
 13 | bool signal_recieved = false;
 14 | 
 15 | void sig_handler(int signo)
 16 | {
 17 | 	if( signo == SIGINT )
 18 | 	{
 19 | 		printf("received SIGINT\n");
 20 | 		signal_recieved = true;
 21 | 	}
 22 | }
 23 | 
 24 | 
 25 | 
 26 | int main( int argc, char** argv )
 27 | {
 28 | 	printf("v4l2-console\n  args (%i):  ", argc);
 29 | 	
 30 | 	/*
 31 | 	 * verify parameters
 32 | 	 */
 33 | 	for( int i=0; i < argc; i++ )
 34 | 		printf("%i [%s]  ", i, argv[i]);
 35 | 		
 36 | 	printf("\n");
 37 | 	
 38 | 	if( argc < 2 )
 39 | 	{
 40 | 		printf("v4l2-console:  0 arguments were supplied.\n");
 41 | 		printf("usage:  v4l2-console <filename>\n");
 42 | 		printf("      ./v4l2-console /dev/video0\n");
 43 | 		
 44 | 		return 0;
 45 | 	}
 46 | 	
 47 | 	const char* dev_path = argv[1];
 48 | 	printf("v4l2-console:   attempting to initialize video device '%s'\n\n", dev_path);
 49 | 	
 50 | 	if( signal(SIGINT, sig_handler) == SIG_ERR )
 51 | 		printf("\ncan't catch SIGINT\n");
 52 | 
 53 | 	/*
 54 | 	 * create the camera device
 55 | 	 */
 56 | 	v4l2Camera* camera = v4l2Camera::Create(dev_path);
 57 | 	
 58 | 	if( !camera )
 59 | 	{
 60 | 		printf("\nv4l2-console:  failed to initialize video device '%s'\n", dev_path);
 61 | 		return 0;
 62 | 	}
 63 | 	
 64 | 	printf("\nv4l2-console:  successfully initialized video device '%s'\n", dev_path);
 65 | 	printf("    width:  %u\n", camera->GetWidth());
 66 | 	printf("   height:  %u\n", camera->GetHeight());
 67 | 	printf("    depth:  %u (bpp)\n", camera->GetPixelDepth());
 68 | 	
 69 | 	
 70 | 	/*
 71 | 	 * start streaming
 72 | 	 */
 73 | 	if( !camera->Open() )
 74 | 	{
 75 | 		printf("\nv4l2-console:  failed to open camera '%s' for streaming\n", dev_path);
 76 | 		return 0;
 77 | 	}
 78 | 	
 79 | 	printf("\nv4l2-console:  camera '%s' open for streaming\n", dev_path);
 80 | 	
 81 | 	
 82 | 	while( !signal_recieved )
 83 | 	{
 84 | 		uint8_t* img = (uint8_t*)camera->Capture(500);
 85 | 		
 86 | 		if( !img )
 87 | 		{
 88 | 			//printf("got NULL image from camera capture\n");
 89 | 			continue;
 90 | 		}
 91 | 		else
 92 | 		{
 93 | 			printf("recieved new video frame\n");
 94 | 			
 95 | 			static int num_frames = 0;
 96 | 			
 97 | 			const int width  = camera->GetWidth();
 98 | 			const int height = camera->GetHeight();
 99 | 			
100 | 			QImage qImg(width, height, QImage::Format_RGB32);
101 | 			
102 | 			for( int y=0; y < height; y++ )
103 | 			{
104 | 				for( int x=0; x < width; x++ )
105 | 				{
106 | 					const int value = img[y * width + x];
107 | 					if( value != 0 )
108 | 						printf("%i %i  %i\n", x, y, value);
109 | 					qImg.setPixel(x, y, qRgb(value, value, value));
110 | 				}
111 | 			}
112 | 			
113 | 			char output_filename[64];
114 | 			sprintf(output_filename, "camera-%u.jpg", num_frames);
115 | 			
116 | 			qImg.save(QString(output_filename));
117 | 			num_frames++;
118 | 		}
119 | 			
120 | 	}
121 | 	
122 | 	printf("\nv4l2-console:  un-initializing video device '%s'\n", dev_path);
123 | 	
124 | 	
125 | 	/*
126 | 	 * shutdown the camera device
127 | 	 */
128 | 	if( camera != NULL )
129 | 	{
130 | 		delete camera;
131 | 		camera = NULL;
132 | 	}
133 | 	
134 | 	printf("v4l2-console:  video device '%s' has been un-initialized.\n", dev_path);
135 | 	printf("v4l2-console:  this concludes the test of video device '%s'\n", dev_path);
136 | 	return 0;
137 | }


--------------------------------------------------------------------------------
/util/camera/v4l2-display/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 
2 | file(GLOB v4l2DisplaySources *.cpp)
3 | file(GLOB v4l2DisplayIncludes *.h )
4 | 
5 | add_executable(v4l2-display ${v4l2DisplaySources})
6 | target_link_libraries(v4l2-display jetson-inference)
7 | 


--------------------------------------------------------------------------------
/util/camera/v4l2-display/v4l2-display.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * inference-101
 3 |  */
 4 | 
 5 | #include "v4l2Camera.h"
 6 | #include "glDisplay.h"
 7 | #include "cudaMappedMemory.h"
 8 | 
 9 | #include <stdio.h>
10 | 
11 | 
12 | int main( int argc, char** argv )
13 | {
14 | 	printf("v4l2-display\n  args (%i):  ", argc);
15 | 	
16 | 	/*
17 | 	 * verify parameters
18 | 	 */
19 | 	for( int i=0; i < argc; i++ )
20 | 		printf("%i [%s]  ", i, argv[i]);
21 | 		
22 | 	printf("\n");
23 | 	
24 | 	if( argc < 2 )
25 | 	{
26 | 		printf("v4l2-display:  0 arguments were supplied.\n");
27 | 		printf("usage:  v4l2-display <filename>\n");
28 | 		printf("      ./v4l2-display /dev/video0\n");
29 | 		
30 | 		return 0;
31 | 	}
32 | 	
33 | 	const char* dev_path = argv[1];
34 | 	printf("v4l2-display:   attempting to initialize video device '%s'\n\n", dev_path);
35 | 	
36 | 	
37 | 	/*
38 | 	 * create the camera device
39 | 	 */
40 | 	v4l2Camera* camera = v4l2Camera::Create(dev_path);
41 | 	
42 | 	if( !camera )
43 | 	{
44 | 		printf("\nv4l2-display:  failed to initialize video device '%s'\n", dev_path);
45 | 		return 0;
46 | 	}
47 | 	
48 | 	printf("\nv4l2-display:  successfully initialized video device '%s'\n", dev_path);
49 | 	printf("    width:  %u\n", camera->GetWidth());
50 | 	printf("   height:  %u\n", camera->GetHeight());
51 | 	printf("    depth:  %u (bpp)\n", camera->GetPixelDepth());
52 | 	
53 | 	printf("\nv4l2-display:  un-initializing video device '%s'\n", dev_path);
54 | 	
55 | 	
56 | 	/*
57 | 	 * create openGL window
58 | 	 */
59 | 	glDisplay* display = glDisplay::Create();
60 | 	
61 | 	if( !display )
62 | 	{
63 | 		printf("\nv4l2-display:  failed to create openGL display\n");
64 | 		return 0;
65 | 	}
66 | 	
67 | 	glTexture* tex = glTexture::Create(camera->GetWidth(), camera->GetHeight(), GL_LUMINANCE8);
68 | 	
69 | 	if( !tex )
70 | 	{
71 | 		printf("v4l2-display:  failed to create %ux%u openGL texture\n", camera->GetWidth(), camera->GetHeight());
72 | 		return 0;
73 | 	}
74 | 	
75 | 	printf("v4l2-display:  initialized %u x %u openGL texture (%u bytes)\n", tex->GetWidth(), tex->GetHeight(), tex->GetSize());
76 | 	
77 | 	
78 | 	
79 | 
80 | 	/*
81 | 	 * shutdown
82 | 	 */
83 | 	if( display != NULL )
84 | 	{
85 | 		delete display;
86 | 		display = NULL;
87 | 	}
88 | 	
89 | 	if( camera != NULL )
90 | 	{
91 | 		delete camera;
92 | 		camera = NULL;
93 | 	}
94 | 	
95 | 	printf("v4l2-display:  video device '%s' has been un-initialized.\n", dev_path);
96 | 	printf("v4l2-display:  this concludes the test of video device '%s'\n", dev_path);
97 | 	return 0;
98 | }


--------------------------------------------------------------------------------
/util/camera/v4l2Camera.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * inference-101
  3 |  */
  4 | 
  5 | #include "v4l2Camera.h"
  6 | 
  7 | #include <fcntl.h> 
  8 | #include <unistd.h>
  9 | #include <errno.h>
 10 | #include <string.h>
 11 | #include <stdio.h>
 12 | #include <stdlib.h>
 13 | 
 14 | #include <sys/stat.h>
 15 | #include <sys/types.h>
 16 | #include <sys/time.h>
 17 | #include <sys/mman.h>
 18 | #include <sys/ioctl.h>
 19 | 
 20 | 
 21 | 
 22 | #define REQUESTED_RINGBUFFERS 	4
 23 | 
 24 | 
 25 | 
 26 | // ioctl
 27 | static int xioctl(int fd, int request, void* arg)
 28 | {
 29 |     int status;
 30 |     do { status = ioctl (fd, request, arg); } while (-1==status && EINTR==errno);
 31 |     return status;
 32 | }
 33 | 
 34 | 
 35 | 
 36 | // constructor
 37 | v4l2Camera::v4l2Camera( const char* device_path ) : mDevicePath(device_path)
 38 | {	
 39 | 	mFD = -1;
 40 | 
 41 | 	mBuffersMMap     = NULL;
 42 | 	mBufferCountMMap = 0;
 43 | 	mRequestWidth    = 0;
 44 | 	mRequestHeight   = 0;
 45 | 	mRequestFormat   = 1;
 46 | 	//mRequestFormat   = -1;	// index into V4L2 format table
 47 | 	
 48 | 	mWidth      = 0;
 49 | 	mHeight     = 0;
 50 | 	mPitch      = 0;
 51 | 	mPixelDepth = 0;
 52 | }
 53 | 
 54 | 
 55 | // destructor	
 56 | v4l2Camera::~v4l2Camera()
 57 | {
 58 | 	// close file
 59 | 	if( mFD >= 0 )
 60 | 	{
 61 | 		close(mFD);
 62 | 		mFD = -1;
 63 | 	}
 64 | }
 65 | 
 66 | 
 67 | // ProcessEmit
 68 | void* v4l2Camera::Capture( size_t timeout )
 69 | {
 70 | 	fd_set fds;
 71 | 	FD_ZERO(&fds);
 72 | 	FD_SET(mFD, &fds);
 73 | 
 74 | 	struct timeval tv;
 75 |  
 76 | 	tv.tv_sec  = 0;
 77 | 	tv.tv_usec = 0;
 78 | 
 79 | 	const bool threaded = true; //false;
 80 | 
 81 | 	if( timeout > 0 )
 82 | 	{
 83 | 		tv.tv_sec  = timeout / 1000;
 84 | 		tv.tv_usec = (timeout - (tv.tv_sec * 1000)) * 1000;
 85 | 	}
 86 | 	
 87 | 	//
 88 | 	const int result = select(mFD + 1, &fds, NULL, NULL, &tv);
 89 | 
 90 | 
 91 | 	if( result == -1 ) 
 92 | 	{
 93 | 		//if (EINTR == errno)
 94 | 		printf("v4l2 -- select() failed (errno=%i) (%s)\n", errno, strerror(errno));
 95 | 		return NULL;
 96 | 	}
 97 | 	else if( result == 0 )
 98 | 	{
 99 | 		if( timeout > 0 )
100 | 			printf("v4l2 -- select() timed out...\n");
101 | 		return NULL;	// timeout, not necessarily an error (TRY_AGAIN)
102 | 	}
103 | 
104 | 	// dequeue input buffer from V4L2
105 | 	struct v4l2_buffer buf;
106 | 	memset(&buf, 0, sizeof(v4l2_buffer));
107 | 
108 | 	buf.type   = V4L2_BUF_TYPE_VIDEO_CAPTURE;
109 | 	buf.memory = V4L2_MEMORY_MMAP;	//V4L2_MEMORY_USERPTR;
110 | 
111 | 	if( xioctl(mFD, VIDIOC_DQBUF, &buf) < 0 )
112 | 	{
113 | 		printf("v4l2 -- ioctl(VIDIOC_DQBUF) failed (errno=%i) (%s)\n", errno, strerror(errno));
114 | 		return NULL;
115 | 	}
116 | 	
117 | 	if( buf.index >= mBufferCountMMap )
118 | 	{
119 | 		printf("v4l2 -- invalid mmap buffer index (%u)\n", buf.index);
120 | 		return NULL;
121 | 	}
122 | 	
123 | 	// emit ringbuffer entry
124 | 	//printf("v4l2 -- recieved %ux%u video frame (index=%u)\n", mWidth, mHeight, (uint32_t)buf.index);
125 | 
126 | 	void* image_ptr = mBuffersMMap[buf.index].ptr;
127 | 
128 | 	// re-queue buffer to V4L2
129 | 	if( xioctl(mFD, VIDIOC_QBUF, &buf) < 0 )
130 | 		printf("v4l2 -- ioctl(VIDIOC_QBUF) failed (errno=%i) (%s)\n", errno, strerror(errno));
131 | 
132 | 	return image_ptr;
133 | }
134 | 
135 | 
136 | 
137 | // initMMap
138 | bool v4l2Camera::initMMap()
139 | {
140 | 	struct v4l2_requestbuffers req;
141 | 	memset(&req, 0, sizeof(v4l2_requestbuffers));
142 | 
143 | 	req.count  = REQUESTED_RINGBUFFERS;
144 | 	req.type   = V4L2_BUF_TYPE_VIDEO_CAPTURE;
145 | 	req.memory = V4L2_MEMORY_MMAP;
146 | 
147 | 	if( xioctl(mFD, VIDIOC_REQBUFS, &req) < 0 )
148 | 	{
149 | 		printf("v4l2 -- does not support mmap (errno=%i) (%s)\n", errno, strerror(errno));
150 | 		return false;
151 | 	}
152 | 
153 | 	if( req.count < 2 )
154 | 	{
155 | 		printf("v4l2 -- insufficient mmap memory\n");
156 | 		return false;
157 | 	}
158 | 
159 | 	mBuffersMMap = (v4l2_mmap*)malloc( req.count * sizeof(v4l2_mmap) );
160 | 	
161 | 	if( !mBuffersMMap )
162 | 		return false;
163 | 
164 | 	memset(mBuffersMMap, 0, req.count * sizeof(v4l2_mmap));
165 | 
166 | 	for( size_t n=0; n < req.count; n++ )
167 | 	{
168 | 		mBuffersMMap[n].buf.type   = V4L2_BUF_TYPE_VIDEO_CAPTURE;
169 | 		mBuffersMMap[n].buf.memory = V4L2_MEMORY_MMAP;
170 | 		mBuffersMMap[n].buf.index  = n;
171 | 		
172 | 		if( xioctl(mFD, VIDIOC_QUERYBUF, &mBuffersMMap[n].buf) < 0 )
173 | 		{
174 | 			printf( "v4l2 -- failed retrieve mmap buffer info (errno=%i) (%s)\n", errno, strerror(errno));
175 | 			return false;
176 | 		}
177 | 
178 | 		mBuffersMMap[n].ptr = mmap(NULL, mBuffersMMap[n].buf.length,
179 | 							  PROT_READ|PROT_WRITE, MAP_SHARED,
180 | 							  mFD, mBuffersMMap[n].buf.m.offset);
181 | 
182 | 		if( mBuffersMMap[n].ptr == MAP_FAILED )
183 | 		{
184 | 			printf( "v4l2 -- failed to mmap buffer (errno=%i) (%s)\n", errno, strerror(errno));
185 | 			return false;
186 | 		}
187 | 
188 | 		if( xioctl(mFD, VIDIOC_QBUF, &mBuffersMMap[n].buf) < 0 )
189 | 		{
190 | 			printf( "v4l2 -- failed to queue mmap buffer (errno=%i) (%s)\n", errno, strerror(errno));
191 | 			return false;
192 | 		}
193 | 	}
194 | 
195 | 	mBufferCountMMap = req.count;	
196 | 	printf("v4l2 -- mapped %zu capture buffers with mmap\n", mBufferCountMMap); 	
197 | 	return true;
198 | }
199 | 
200 | 
201 | inline const char* v4l2_format_str( uint32_t fmt )
202 | {
203 | 	if( fmt == V4L2_PIX_FMT_SBGGR8 )	   return "SBGGR8 (V4L2_PIX_FMT_SBGGR8)";
204 | 	else if( fmt == V4L2_PIX_FMT_SGBRG8 )  return "SGBRG8 (V4L2_PIX_FMT_SGBRG8)";
205 | 	else if( fmt == V4L2_PIX_FMT_SGRBG8 )  return "SGRBG8 (V4L2_PIX_FMT_SGRBG8)";
206 | 	else if( fmt == V4L2_PIX_FMT_SRGGB8 )  return "SRGGB8 (V4L2_PIX_FMT_SRGGB8)";
207 | 	else if( fmt == V4L2_PIX_FMT_SBGGR16 ) return "BYR2 (V4L2_PIX_FMT_SBGGR16)";
208 | 	else if( fmt == V4L2_PIX_FMT_SRGGB10 ) return "RG10 (V4L2_PIX_FMT_SRGGB10)";
209 | 	
210 | 	return "UNKNOWN";
211 | }
212 | 
213 | 
214 | inline void v4l2_print_format( const v4l2_format& fmt, const char* text )
215 | {
216 | 	printf("v4l2 -- %s\n", text);
217 | 	printf("v4l2 --   width  %u\n", fmt.fmt.pix.width);
218 | 	printf("v4l2 --   height %u\n", fmt.fmt.pix.height);
219 | 	printf("v4l2 --   pitch  %u\n", fmt.fmt.pix.bytesperline);
220 | 	printf("v4l2 --   size   %u\n", fmt.fmt.pix.sizeimage);
221 | 	printf("v4l2 --   format 0x%X  %s\n", fmt.fmt.pix.pixelformat, v4l2_format_str(fmt.fmt.pix.pixelformat));
222 | 	printf("v4l2 --   color  0x%X\n", fmt.fmt.pix.colorspace);
223 | 	printf("v4l2 --   field  0x%X\n", fmt.fmt.pix.field);
224 | }
225 | 
226 | 
227 | inline void v4l2_print_formatdesc( const v4l2_fmtdesc& desc )
228 | {
229 | 	printf("v4l2 -- format #%u\n", desc.index);
230 | 	printf("v4l2 --   desc   %s\n", desc.description);
231 | 	printf("v4l2 --   flags  %s\n", (desc.flags == 0 ? "V4L2_FMT_FLAG_UNCOMPRESSED" : "V4L2_FMT_FLAG_COMPRESSED"));
232 | 	printf("v4l2 --   fourcc 0x%X  %s\n", desc.pixelformat, v4l2_format_str(desc.pixelformat));
233 | 	
234 | }
235 | 	
236 | 
237 | bool v4l2Camera::initFormats()
238 | {
239 | 	struct v4l2_fmtdesc desc;
240 | 	memset(&desc, 0, sizeof(v4l2_fmtdesc));
241 | 
242 | 	desc.index = 0;
243 | 	desc.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
244 | 
245 | 	while( ioctl(mFD, VIDIOC_ENUM_FMT, &desc) == 0 )
246 | 	{
247 | 		mFormats.push_back(desc);
248 | 		v4l2_print_formatdesc( desc );
249 | 		desc.index++;
250 | 	}
251 | 
252 | 	return true;
253 | }
254 | 
255 | 
256 | // initStream
257 | bool v4l2Camera::initStream()
258 | {
259 | 	struct v4l2_format fmt;	
260 | 	memset(&fmt, 0, sizeof(v4l2_format));
261 | 	fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
262 | 
263 | 	// retrieve existing video format
264 | 	if( xioctl(mFD, VIDIOC_G_FMT, &fmt) < 0 )
265 | 	{
266 | 		const int err = errno;
267 | 		printf( "v4l2 -- failed to get video format of device (errno=%i) (%s)\n", errno, strerror(errno));
268 | 		return false;
269 | 	}
270 | 
271 | 	v4l2_print_format(fmt, "preexisting format");
272 | 
273 | #if 1
274 | 	// setup new format
275 | 	struct v4l2_format new_fmt;	
276 | 	memset(&new_fmt, 0, sizeof(v4l2_format));
277 | 
278 | 	new_fmt.type                = V4L2_BUF_TYPE_VIDEO_CAPTURE;
279 | 	new_fmt.fmt.pix.width       = fmt.fmt.pix.width;
280 | 	new_fmt.fmt.pix.height      = fmt.fmt.pix.height;
281 | 	new_fmt.fmt.pix.pixelformat = fmt.fmt.pix.pixelformat;
282 | 	new_fmt.fmt.pix.field       = fmt.fmt.pix.field;
283 | 	new_fmt.fmt.pix.colorspace  = fmt.fmt.pix.colorspace;
284 | 
285 | 	if( mRequestWidth > 0 && mRequestHeight > 0 )
286 | 	{
287 | 		new_fmt.fmt.pix.width  = mRequestWidth;
288 | 		new_fmt.fmt.pix.height = mRequestHeight;
289 | 	}
290 | 
291 | 	if( mRequestFormat >= 0 && mRequestFormat < mFormats.size() )
292 | 		new_fmt.fmt.pix.pixelformat = mFormats[mRequestFormat].pixelformat;
293 | 
294 | 	v4l2_print_format(new_fmt, "setting new format...");
295 | 
296 | 	if( xioctl(mFD, VIDIOC_S_FMT, &new_fmt) < 0 )
297 | 	{
298 | 		const int err = errno;
299 | 		printf( "v4l2 -- failed to set video format of device (errno=%i) (%s)\n", errno, strerror(errno));
300 | 		return false;
301 | 	}
302 | 
303 | 	
304 | 	// re-retrieve the current format, with detailed info like line pitch/ect.
305 | 	memset(&fmt, 0, sizeof(v4l2_format));
306 | 	fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
307 | 
308 | 	if( xioctl(mFD, VIDIOC_G_FMT, &fmt) < 0 )
309 | 	{
310 | 		const int err = errno;
311 | 		printf( "v4l2 -- failed to get video format of device (errno=%i) (%s)\n", errno, strerror(errno));
312 | 		return false;
313 | 	}
314 | 
315 | 	v4l2_print_format(fmt, "confirmed new format");
316 | #endif
317 | 
318 | 	mWidth      = fmt.fmt.pix.width;
319 | 	mHeight     = fmt.fmt.pix.height;
320 | 	mPitch      = fmt.fmt.pix.bytesperline;
321 | 	mPixelDepth = (mPitch * 8) / mWidth;
322 | 
323 | 	// initMMap
324 | 	if( !initMMap() )		// initUserPtr()
325 | 		return false;
326 | 
327 | 	return true;
328 | }
329 | 
330 | 
331 | // Create
332 | v4l2Camera* v4l2Camera::Create( const char* device_path )
333 | {
334 | 	v4l2Camera* cam = new v4l2Camera(device_path);
335 | 
336 | 	if( !cam->init() )
337 | 	{
338 | 		printf("v4l2 -- failed to create instance %s\n", device_path);
339 | 		delete cam;
340 | 		return NULL;
341 | 	}
342 | 	
343 | 	return cam;
344 | }
345 | 
346 | 
347 | // Init
348 | bool v4l2Camera::init()
349 | {
350 | 	// locate the /dev/event* path for this device
351 | 	mFD = open(mDevicePath.c_str(), O_RDWR | O_NONBLOCK, 0 );
352 | 
353 | 	if( mFD < 0 )
354 | 	{
355 | 		printf( "v4l2 -- failed to open %s\n", mDevicePath.c_str());
356 | 		return false;
357 | 	}
358 | 
359 | 	// initialize
360 | 	if( !initCaps() )
361 | 		return false;
362 | 
363 | 	if( !initFormats() )
364 | 		return false;
365 | 
366 | 	if( !initStream() )
367 | 		return false;
368 | 
369 | 	return true;
370 | }
371 | 
372 | 
373 | // Open
374 | bool v4l2Camera::Open()
375 | {
376 | 	printf( "v4l2Camera::Open(%s)\n", mDevicePath.c_str());
377 | 
378 | 	// begin streaming
379 | 	enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
380 | 
381 | 	printf( "v4l2 -- starting streaming %s with ioctl(VIDIOC_STREAMON)...\n", mDevicePath.c_str());
382 | 
383 | 	if( xioctl(mFD, VIDIOC_STREAMON, &type) < 0 )
384 | 	{
385 | 		printf( "v4l2 -- failed to start streaming (errno=%i) (%s)\n", errno, strerror(errno));
386 | 		return false;
387 | 	}
388 | 
389 | 	return true;
390 | }
391 | 
392 | 
393 | // Close
394 | bool v4l2Camera::Close()
395 | {
396 | 	// stop streaming
397 | 	enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
398 | 
399 | 	printf( "v4l2 -- stopping streaming %s with ioctl(VIDIOC_STREAMOFF)...\n", mDevicePath.c_str());
400 | 
401 | 	if( xioctl(mFD, VIDIOC_STREAMOFF, &type) < 0 )
402 | 	{
403 | 		printf( "v4l2 -- failed to stop streaming (errno=%i) (%s)\n", errno, strerror(errno));
404 | 		//return false;
405 | 	}
406 | 
407 | 	return true;
408 | }
409 | 
410 | 
411 | 
412 | // initCaps
413 | bool v4l2Camera::initCaps()
414 | {
415 | 	struct v4l2_capability caps;
416 | 
417 | 	if( xioctl(mFD, VIDIOC_QUERYCAP, &caps) < 0 )
418 | 	{
419 | 		printf( "v4l2 -- failed to query caps (xioctl VIDIOC_QUERYCAP) for %s\n", mDevicePath.c_str());
420 | 		return false;
421 | 	}
422 | 
423 | 	#define PRINT_CAP(x) printf( "v4l2 -- %-18s %s\n", #x, (caps.capabilities & x) ? "yes" : "no")
424 | 
425 | 	PRINT_CAP(V4L2_CAP_VIDEO_CAPTURE);
426 | 	PRINT_CAP(V4L2_CAP_READWRITE);
427 | 	PRINT_CAP(V4L2_CAP_ASYNCIO);
428 | 	PRINT_CAP(V4L2_CAP_STREAMING);
429 | 	
430 | 	if( !(caps.capabilities & V4L2_CAP_VIDEO_CAPTURE) )
431 | 	{
432 | 		printf( "v4l2 -- %s is not a video capture device\n", mDevicePath.c_str());
433 | 		return false;
434 | 	}
435 | 
436 | 	return true;
437 | }
438 | 
439 | 
440 | // initUserPtr
441 | bool v4l2Camera::initUserPtr()
442 | {
443 | 	// request buffers
444 | 	struct v4l2_requestbuffers req;
445 | 	memset(&req, 0, sizeof(v4l2_requestbuffers));
446 | 
447 | 	req.count  = REQUESTED_RINGBUFFERS;
448 | 	req.type   = V4L2_BUF_TYPE_VIDEO_CAPTURE;
449 | 	req.memory = V4L2_MEMORY_USERPTR;
450 | 
451 | 	if ( xioctl(mFD, VIDIOC_REQBUFS, &req) < 0 ) 
452 | 	{
453 | 		const int err = errno;
454 | 		printf( "v4l2 -- failed to request buffers (errno=%i) (%s)\n", errno, strerror(errno));
455 | 		return false;
456 | 	}
457 | 
458 | 	// queue ringbuffer
459 | #if 0
460 | 	for( size_t n=0; n < mRingbuffer.size(); n++ )
461 | 	{
462 | 		struct v4l2_buffer buf;
463 | 		memset(&buf, 0, sizeof(v4l2_buffer));
464 | 		
465 | 		buf.type   = V4L2_BUF_TYPE_VIDEO_CAPTURE;
466 | 		buf.memory = V4L2_MEMORY_USERPTR;
467 | 		buf.index  = n;
468 | 		buf.length = mRingbuffer[n]->GetSize();
469 | 
470 | 		buf.m.userptr = (unsigned long)mRingbuffer[n]->GetCPU();
471 | 
472 | 		if( xioctl(mFD, VIDIOC_QBUF, &buf) < 0 )
473 | 		{
474 | 			printf( "v4l2 -- failed to queue buffer %zu (errno=%i) (%s)\n", n, errno, strerror(errno));
475 | 			return false;
476 | 		}
477 | 	}
478 | #endif
479 | 
480 | 	return true;
481 | }


--------------------------------------------------------------------------------
/util/camera/v4l2Camera.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * inference-101
  3 |  */
  4 | 
  5 | #ifndef __V4L2_CAPTURE_H
  6 | #define __V4L2_CAPTURE_H
  7 | 
  8 | 
  9 | #include <linux/videodev2.h>
 10 | 
 11 | #include <stdint.h>
 12 | #include <string>
 13 | #include <vector>
 14 | 
 15 | 
 16 | 
 17 | struct v4l2_mmap
 18 | {
 19 | 	struct v4l2_buffer buf;
 20 | 	void*  ptr;
 21 | };
 22 | 
 23 | 
 24 | /**
 25 |  * Video4Linux2 camera capture streaming.
 26 |  * @ingroup util
 27 |  */
 28 | class v4l2Camera
 29 | {
 30 | public:	
 31 | 	/**
 32 | 	 * Create V4L2 interface
 33 | 	 * @param path Filename of the video device (e.g. /dev/video0)
 34 | 	 */
 35 | 	static v4l2Camera* Create( const char* device_path );
 36 | 
 37 | 	/**
 38 | 	 * Destructor
 39 | 	 */	
 40 | 	~v4l2Camera();
 41 | 
 42 | 	/**
 43 |  	 * Start streaming
 44 | 	 */
 45 | 	bool Open();
 46 | 
 47 | 	/**
 48 | 	 * Stop streaming
 49 | 	 */
 50 | 	bool Close();
 51 | 
 52 | 	/**
 53 | 	 * Return the next image.
 54 | 	 */
 55 | 	void* Capture( size_t timeout=0 );
 56 | 
 57 | 	/**
 58 | 	 * Get width, in pixels, of camera image.
 59 | 	 */
 60 | 	inline uint32_t GetWidth() const					{ return mWidth; }
 61 | 	
 62 | 	/**
 63 | 	 * Retrieve height, in pixels, of camera image.
 64 | 	 */
 65 | 	inline uint32_t GetHeight() const					{ return mHeight; }
 66 | 
 67 | 	/**
 68 |  	 * Return the size in bytes of one line of the image.
 69 | 	 */
 70 | 	inline uint32_t GetPitch() const					{ return mPitch; }
 71 | 
 72 | 	/**
 73 | 	 * Return the bit depth per pixel.
 74 | 	 */
 75 | 	inline uint32_t GetPixelDepth() const				{ return mPixelDepth; }
 76 | 
 77 | private:
 78 | 
 79 | 	v4l2Camera( const char* device_path );
 80 | 
 81 | 	bool init();
 82 | 	bool initCaps();
 83 | 	bool initFormats();
 84 | 	bool initStream();
 85 | 
 86 | 	bool initUserPtr();
 87 | 	bool initMMap();
 88 | 
 89 | 	int 	mFD;
 90 | 	int	    mRequestFormat;
 91 | 	uint32_t mRequestWidth;
 92 | 	uint32_t mRequestHeight;
 93 | 	uint32_t mWidth;
 94 | 	uint32_t mHeight;
 95 | 	uint32_t mPitch;
 96 | 	uint32_t mPixelDepth;
 97 | 
 98 | 	v4l2_mmap* mBuffersMMap;
 99 | 	size_t mBufferCountMMap;
100 | 
101 | 	std::vector<v4l2_fmtdesc> mFormats;
102 | 	std::string mDevicePath;
103 | };
104 | 
105 | 
106 | #endif
107 | 
108 | 
109 | 


--------------------------------------------------------------------------------
/util/commandLine.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * http://github.com/dusty-nv/jetson-inference
  3 |  */
  4 | 
  5 | #include "commandLine.h"
  6 | 
  7 | #include <stdlib.h>		// atoi
  8 | #include <string.h>
  9 | #include <strings.h>
 10 | 
 11 | 
 12 | 
 13 | // strRemoveDelimiter
 14 | static inline int strRemoveDelimiter(char delimiter, const char *string)
 15 | {
 16 |     int string_start = 0;
 17 | 
 18 |     while (string[string_start] == delimiter)
 19 |     {
 20 |         string_start++;
 21 |     }
 22 | 
 23 |     if (string_start >= (int)strlen(string)-1)
 24 |     {
 25 |         return 0;
 26 |     }
 27 | 
 28 |     return string_start;
 29 | }
 30 | 
 31 | 
 32 | // constructor
 33 | commandLine::commandLine( const int pArgc, char** pArgv )
 34 | {
 35 | 	argc = pArgc;
 36 | 	argv = pArgv;
 37 | }
 38 | 
 39 | 
 40 | // GetInt
 41 | int commandLine::GetInt( const char* string_ref )
 42 | {
 43 | 	if( argc < 1 )
 44 | 		return 0;
 45 | 
 46 | 	bool bFound = false;
 47 |     	int value = -1;
 48 | 
 49 | 	for( int i=1; i < argc; i++ )
 50 | 	{
 51 | 		int string_start = strRemoveDelimiter('-', argv[i]);
 52 | 		const char *string_argv = &argv[i][string_start];
 53 | 		int length = (int)strlen(string_ref);
 54 | 
 55 | 		if (!strncasecmp(string_argv, string_ref, length))
 56 | 		{
 57 | 			if (length+1 <= (int)strlen(string_argv))
 58 | 			{
 59 | 				int auto_inc = (string_argv[length] == '=') ? 1 : 0;
 60 | 				value = atoi(&string_argv[length + auto_inc]);
 61 | 			}
 62 | 			else
 63 | 			{
 64 | 				value = 0;
 65 | 			}
 66 | 
 67 | 			bFound = true;
 68 | 			continue;
 69 | 		}
 70 | 	}
 71 |  
 72 | 
 73 | 	if (bFound)
 74 | 		return value;
 75 |  
 76 | 	return 0;
 77 | }
 78 | 
 79 | 
 80 | // GetFloat
 81 | float commandLine::GetFloat( const char* string_ref )
 82 | {
 83 | 	if( argc < 1 )
 84 | 		return 0;
 85 | 
 86 | 	bool bFound = false;
 87 | 	float value = -1;
 88 | 
 89 | 	for (int i=1; i < argc; i++)
 90 | 	{
 91 | 		int string_start = strRemoveDelimiter('-', argv[i]);
 92 | 		const char *string_argv = &argv[i][string_start];
 93 | 		int length = (int)strlen(string_ref);
 94 | 
 95 | 		if (!strncasecmp(string_argv, string_ref, length))
 96 | 		{
 97 | 			if (length+1 <= (int)strlen(string_argv))
 98 | 			{
 99 | 				int auto_inc = (string_argv[length] == '=') ? 1 : 0;
100 | 				value = (float)atof(&string_argv[length + auto_inc]);
101 | 			}
102 | 			else
103 | 			{
104 | 				value = 0.f;
105 | 			}
106 | 
107 | 			bFound = true;
108 | 			continue;
109 | 		}
110 | 	}
111 | 
112 | 	if( bFound )
113 | 		return value;
114 | 
115 | 	return 0;
116 | }
117 | 
118 | 
119 | // GetString
120 | const char* commandLine::GetString( const char* string_ref )
121 | {
122 | 	if( argc < 1 )
123 | 		return 0;
124 | 
125 | 	for (int i=1; i < argc; i++)
126 | 	{
127 | 		int string_start  = strRemoveDelimiter('-', argv[i]);
128 | 		char *string_argv = (char *)&argv[i][string_start];
129 | 		int length = (int)strlen(string_ref);
130 | 
131 | 		if (!strncasecmp(string_argv, string_ref, length))
132 | 			return (string_argv + length + 1);
133 | 			//*string_retval = &string_argv[length+1];
134 | 	}
135 | 
136 | 	return NULL;
137 | }
138 | 
139 | 
140 | // GetFlag
141 | bool commandLine::GetFlag( const char* string_ref )
142 | {
143 | 	if( argc < 1 )
144 | 		return false;
145 | 
146 | 	for (int i=1; i < argc; i++)
147 | 	{
148 | 		int string_start = strRemoveDelimiter('-', argv[i]);
149 | 		const char *string_argv = &argv[i][string_start];
150 | 
151 | 		const char *equal_pos = strchr(string_argv, '=');
152 | 		int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv);
153 | 
154 | 		int length = (int)strlen(string_ref);
155 | 
156 | 		if( length == argv_length && !strncasecmp(string_argv, string_ref, length) )
157 | 			return true;
158 | 	}
159 |     
160 | 	return false;
161 | }
162 | 
163 | 
164 | 


--------------------------------------------------------------------------------
/util/commandLine.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * http://github.com/dusty-nv/jetson-inference
 3 |  */
 4 |  
 5 | #ifndef __COMMAND_LINE_H_
 6 | #define __COMMAND_LINE_H_
 7 | 
 8 | 
 9 | /**
10 |  * commandLine parser class
11 |  * @ingroup util
12 |  */
13 | class commandLine
14 | {
15 | public:
16 | 	/**
17 | 	 * constructor
18 | 	 */
19 | 	commandLine( const int argc, char** argv );
20 | 
21 | 
22 | 	/**
23 | 	 * Checks to see whether the specified flag was included on the 
24 | 	 * command line.   For example, if argv contained "--foo", then 
25 | 	 * GetFlag("foo") would return true.
26 | 	 *
27 | 	 * @returns true, if the flag with argName was found
28 | 	 *          false, if the flag with argName was not found
29 | 	 */
30 | 	bool GetFlag( const char* argName );
31 | 
32 | 	
33 | 	/**
34 | 	 * Get float argument.  For example if argv contained "--foo=3.14159", 
35 | 	 * then GetInt("foo") would return 3.14159.0f
36 | 	 *
37 | 	 * @returns 0, if the argumentcould not be found.
38 | 	 *          Otherwise, returns the value of the argument.
39 | 	 */
40 | 	float GetFloat( const char* argName );
41 | 
42 | 
43 | 	/**
44 | 	 * Get integer argument.  For example if argv contained "--foo=100", 
45 | 	 * then GetInt("foo") would return 100.
46 | 	 *
47 | 	 * @returns 0, if the argument could not be found.
48 | 	 *          Otherwise, returns the value of the argument. 
49 | 	 */
50 | 	int GetInt( const char* argName );
51 | 
52 | 
53 | 	/**
54 | 	 * Get string argument.  For example if argv contained "--foo=bar",
55 | 	 * then GetString("foo") would return "bar".
56 | 	 *
57 | 	 * @returns NULL, if the argument could not be found.
58 | 	 *          Otherwise, returns a pointer to the argument value string
59 | 	 *          from the argv array.
60 | 	 */
61 | 	const char* GetString( const char* argName );
62 | 
63 | 
64 | protected:
65 | 
66 | 	int argc;
67 | 	char** argv;
68 | };
69 | 
70 | 
71 | 
72 | #endif
73 | 
74 | 


--------------------------------------------------------------------------------
/util/cuda/cudaFont.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * http://github.com/dusty-nv/jetson-inference
  3 |  */
  4 | 
  5 | #include "cudaFont.h"
  6 | #include "cudaMappedMemory.h"
  7 | 
  8 | #include "loadImage.h"
  9 | 
 10 | 
 11 | // constructor
 12 | cudaFont::cudaFont()
 13 | {
 14 | 	mCommandCPU = NULL;
 15 | 	mCommandGPU = NULL;
 16 | 	mCmdEntries = 0;
 17 | 
 18 | 	mFontMapCPU = NULL;
 19 | 	mFontMapGPU = NULL;
 20 | 	
 21 | 	mFontMapWidth  = 0;
 22 | 	mFontMapHeight = 0;
 23 | 	
 24 | //mFontCellSize = make_int2(24,32);
 25 | 	mFontCellSize = make_int2(24,32);
 26 | }
 27 | 
 28 | 
 29 | 
 30 | // destructor
 31 | cudaFont::~cudaFont()
 32 | {
 33 | 	if( mFontMapCPU != NULL )
 34 | 	{
 35 | 		CUDA(cudaFreeHost(mFontMapCPU));
 36 | 		
 37 | 		mFontMapCPU = NULL; 
 38 | 		mFontMapGPU = NULL;
 39 | 	}
 40 | }
 41 | 
 42 | 
 43 | // Create
 44 | cudaFont* cudaFont::Create( const char* bitmap_path )
 45 | {
 46 | 	cudaFont* c = new cudaFont();
 47 | 	
 48 | 	if( !c )
 49 | 		return NULL;
 50 | 		
 51 | 	if( !c->init(bitmap_path) )
 52 | 		return NULL;
 53 | 		
 54 | 	return c;
 55 | }
 56 | 
 57 | 
 58 | // init
 59 | bool cudaFont::init( const char* bitmap_path )
 60 | {
 61 | 	if( !loadImageRGBA(bitmap_path, &mFontMapCPU, &mFontMapGPU, &mFontMapWidth, &mFontMapHeight) )
 62 | 		return false;
 63 | 	
 64 | 	if( !cudaAllocMapped((void**)&mCommandCPU, (void**)&mCommandGPU, sizeof(short4) * MaxCommands) )
 65 | 		return false;
 66 | 		
 67 | 	return true;
 68 | }
 69 | 
 70 | 
 71 | inline __host__ __device__ float4 operator*(float4 a, float4 b)
 72 | {
 73 |     return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
 74 | }
 75 | 
 76 | template<typename T>
 77 | __global__ void gpuOverlayText( T* font, int fontWidth, short4* text,
 78 | 						        T* output, int width, int height, float4 color ) 
 79 | {
 80 | 	const short4 t = text[blockIdx.x];
 81 | 
 82 | 	//printf("%i %hi %hi %hi %hi\n", blockIdx.x, t.x, t.y, t.z, t.w);
 83 | 
 84 | 	const int x = t.x + threadIdx.x;
 85 | 	const int y = t.y + threadIdx.y;
 86 | 
 87 | 	if( x < 0 || y < 0 || x >= width || y >= height )
 88 | 		return;
 89 | 
 90 | 	const int u = t.z + threadIdx.x;
 91 | 	const int v = t.w + threadIdx.y;
 92 | 
 93 | 	//printf("%i %i %i %i %i\n", blockIdx.x, x, y, u, v);
 94 | 	
 95 | 	const T px_font = font[v * fontWidth + u] * color;
 96 | 	      T px_out  = output[y * width + x];	// fixme:  add proper input support
 97 | 
 98 | 	const float alpha = px_font.w / 255.0f;
 99 | 	const float ialph = 1.0f - alpha;
100 | 	
101 | 	px_out.x = alpha * px_font.x + ialph * px_out.x;
102 | 	px_out.y = alpha * px_font.y + ialph * px_out.y;
103 | 	px_out.z = alpha * px_font.z + ialph * px_out.z; 
104 | 
105 | 	output[y * width + x] = px_out;	 
106 | }
107 | 
108 | 
109 | // processCUDA
110 | template<typename T>
111 | cudaError_t cudaOverlayText( T* font, const int2& fontCellSize, size_t fontMapWidth,
112 | 					    const float4& fontColor, short4* text, size_t length,
113 | 					    T* output, size_t width, size_t height)	
114 | {
115 | 	if( !font || !text || !output || length == 0 || width == 0 || height == 0 )
116 | 		return cudaErrorInvalidValue;
117 | 
118 | 	const float4 color_scale = make_float4( fontColor.x / 255.0f, fontColor.y / 255.0f, fontColor.z / 255.0f, fontColor.w / 255.0f );
119 | 	
120 | 	// setup arguments
121 | 	const dim3 block(fontCellSize.x, fontCellSize.y);
122 | 	const dim3 grid(length);
123 | 
124 | 	gpuOverlayText<<<grid, block>>>(font, fontMapWidth, text, output, width, height, color_scale); 
125 | 
126 | 	return cudaGetLastError();
127 | }
128 | 
129 | 
130 | // RenderOverlay
131 | bool cudaFont::RenderOverlay( float4* input, float4* output, uint32_t width, uint32_t height, const std::vector< std::pair< std::string, int2 > >& text, const float4& color )
132 | {
133 | 	if( !input || !output || width == 0 || height == 0 || text.size() == 0 )
134 | 		return false;
135 | 	
136 | 	const uint32_t cellsPerRow = mFontMapWidth / mFontCellSize.x;
137 | 	const uint32_t numText     = text.size();
138 | 	
139 | 	for( uint32_t t=0; t < numText; t++ )
140 | 	{
141 | 		const uint32_t numChars = text[t].first.size();
142 | 		
143 | 		int2 pos = text[t].second;
144 | 		
145 | 		for( uint32_t n=0; n < numChars; n++ )
146 | 		{
147 | 			char c = text[t].first[n];
148 | 			
149 | 			if( c < 32 || c > 126 )
150 | 				continue;
151 | 			
152 | 			c -= 32;
153 | 			
154 | 			const uint32_t font_y = c / cellsPerRow;
155 | 			const uint32_t font_x = c - (font_y * cellsPerRow);
156 | 			
157 | 			mCommandCPU[mCmdEntries++] = make_short4( pos.x, pos.y,
158 | 													  font_x * (mFontCellSize.x + 1),
159 | 													  font_y * (mFontCellSize.y + 1) );
160 | 		
161 | 			pos.x += mFontCellSize.x;
162 | 		}
163 | 	}
164 | 
165 | 	CUDA(cudaOverlayText<float4>( mFontMapGPU, mFontCellSize, mFontMapWidth, color,
166 | 				        mCommandGPU, mCmdEntries, 
167 | 				       output, width, height));
168 | 					   
169 | 	mCmdEntries = 0;
170 | 	return true;
171 | }
172 | 
173 | 
174 | bool cudaFont::RenderOverlay( float4* input, float4* output, uint32_t width, uint32_t height, 
175 | 							  const char* str, int x, int y, const float4& color )
176 | {
177 | 	if( !str )
178 | 		return NULL;
179 | 		
180 | 	std::vector< std::pair< std::string, int2 > > list;
181 | 	
182 | 	list.push_back( std::pair< std::string, int2 >( str, make_int2(x,y) ));
183 | 	
184 | 	return RenderOverlay(input, output, width, height, list, color);
185 | }
186 | 						
187 | 	
188 | 


--------------------------------------------------------------------------------
/util/cuda/cudaFont.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * http://github.com/dusty-nv/jetson-inference
 3 |  */
 4 | 
 5 | #ifndef __CUDA_FONT_H__
 6 | #define __CUDA_FONT_H__
 7 | 
 8 | #include "cudaUtility.h"
 9 | 
10 | #include <string>
11 | #include <vector>
12 | 
13 | 
14 | /**
15 |  * Font overlay rendering using CUDA
16 |  * @ingroup util
17 |  */
18 | class cudaFont
19 | {
20 | public:
21 | 	/**
22 | 	 * Create new CUDA font overlay object using textured fonts
23 | 	 */
24 | 	static cudaFont* Create( const char* font_bitmap="/home/nvidia/Face-Recognition/data/fontmapA.png" );
25 | 	
26 | 	/**
27 | 	 * Destructor
28 | 	 */
29 | 	~cudaFont();
30 | 	
31 | 	/**
32 | 	 * Draw font overlay onto image
33 | 	 */
34 | 	bool RenderOverlay( float4* input, float4* output, uint32_t width, uint32_t height, 
35 | 						const char* str, int x, int y, const float4& color=make_float4(0, 0, 0, 255));
36 | 						
37 | 	/**
38 | 	 * Draw font overlay onto image
39 | 	 */
40 | 	bool RenderOverlay( float4* input, float4* output, uint32_t width, uint32_t height, 
41 | 						const std::vector< std::pair< std::string, int2 > >& text,
42 | 						const float4& color=make_float4(0.0f, 0.0f, 0.0f, 255.0f));
43 | 	
44 | protected:
45 | 	cudaFont();
46 | 	bool init( const char* bitmap_path );
47 | 
48 | 	float4* mFontMapCPU;
49 | 	float4* mFontMapGPU;
50 | 	
51 | 	int mFontMapWidth;
52 | 	int mFontMapHeight;
53 | 	int2 mFontCellSize;
54 | 	
55 | 	short4* mCommandCPU;
56 | 	short4* mCommandGPU;
57 | 	int     mCmdEntries;
58 | 	
59 | 	static const uint32_t MaxCommands = 1024;
60 | };
61 | 
62 | #endif
63 | 


--------------------------------------------------------------------------------
/util/cuda/cudaMappedMemory.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * inference-101
 3 |  */
 4 | 
 5 | #ifndef __CUDA_MAPPED_MEMORY_H_
 6 | #define __CUDA_MAPPED_MEMORY_H_
 7 | 
 8 | 
 9 | #include "cudaUtility.h"
10 | 
11 | 
12 | /**
13 |  * Allocate ZeroCopy mapped memory, shared between CUDA and CPU.
14 |  * @ingroup util
15 |  */
16 | inline bool cudaAllocMapped( void** cpuPtr, void** gpuPtr, size_t size )
17 | {
18 | 	if( !cpuPtr || !gpuPtr || size == 0 )
19 | 		return false;
20 | 
21 | 	//CUDA(cudaSetDeviceFlags(cudaDeviceMapHost));
22 | 
23 | 	if( CUDA_FAILED(cudaHostAlloc(cpuPtr, size, cudaHostAllocMapped)) )
24 | 		return false;
25 | 
26 | 	if( CUDA_FAILED(cudaHostGetDevicePointer(gpuPtr, *cpuPtr, 0)) )
27 | 		return false;
28 | 
29 | 	memset(*cpuPtr, 0, size);
30 | 	printf("[cuda]  cudaAllocMapped %zu bytes, CPU %p GPU %p\n", size, *cpuPtr, *gpuPtr);
31 | 	return true;
32 | }
33 | 
34 | 
35 | #endif
36 | 


--------------------------------------------------------------------------------
/util/cuda/cudaNormalize.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * inference-101
 3 |  */
 4 | 
 5 | #include "cudaNormalize.h"
 6 | 
 7 | 
 8 | 
 9 | // gpuNormalize
10 | template <typename T>
11 | __global__ void gpuNormalize( T* input, T* output, int width, int height, float scaling_factor )
12 | {
13 | 	const int x = blockIdx.x * blockDim.x + threadIdx.x;
14 | 	const int y = blockIdx.y * blockDim.y + threadIdx.y;
15 | 
16 | 	if( x >= width || y >= height )
17 | 		return;
18 | 
19 | 	const T px = input[ y * width + x ];
20 | 
21 | 	output[y*width+x] = make_float4(px.x * scaling_factor,
22 | 							  px.y * scaling_factor,
23 | 							  px.z * scaling_factor,
24 | 							  px.w * scaling_factor);
25 | }
26 | 
27 | 
28 | // cudaNormalizeRGBA
29 | cudaError_t cudaNormalizeRGBA( float4* input, const float2& input_range,
30 | 						 float4* output, const float2& output_range,
31 | 						 size_t  width,  size_t height )
32 | {
33 | 	if( !input || !output )
34 | 		return cudaErrorInvalidDevicePointer;
35 | 
36 | 	if( width == 0 || height == 0  )
37 | 		return cudaErrorInvalidValue;
38 | 
39 | 	const float multiplier = output_range.y / input_range.y;
40 | 
41 | 	// launch kernel
42 | 	const dim3 blockDim(8, 8);
43 | 	const dim3 gridDim(iDivUp(width,blockDim.x), iDivUp(height,blockDim.y));
44 | 
45 | 	gpuNormalize<float4><<<gridDim, blockDim>>>(input, output, width, height, multiplier);
46 | 
47 | 	return CUDA(cudaGetLastError());
48 | }
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/util/cuda/cudaNormalize.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * inference-101
 3 |  */
 4 | 
 5 | #ifndef __CUDA_NORMALIZE_H__
 6 | #define __CUDA_NORMALIZE_H__
 7 | 
 8 | 
 9 | #include "cudaUtility.h"
10 | 
11 | 
12 | /**
13 |  * Rebase the pixel intensities of an image between two scales.
14 |  * For example, convert an image with values 0.0-255 to 0.0-1.0.
15 |  * @ingroup util
16 |  */
17 | cudaError_t cudaNormalizeRGBA( float4* input,  const float2& input_range,
18 | 						 float4* output, const float2& output_range,
19 | 						 size_t  width,  size_t height );
20 | 
21 | #endif
22 | 
23 | 


--------------------------------------------------------------------------------
/util/cuda/cudaOverlay.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * http://github.com/dusty-nv/jetson-inference
 3 |  */
 4 | 
 5 | #include "cudaOverlay.h"
 6 | 
 7 | 
 8 | static inline __device__ __host__ bool eq_less( float a, float b, float epsilon )
 9 | {
10 | 	return (a > (b - epsilon) && a < (b + epsilon)) ? true : false;
11 | }
12 | 
13 | template<typename T>
14 | __global__ void gpuRectOutlines( T* input, T* output, int width, int height,
15 | 						        float4* rects, int numRects, float4 color ) 
16 | {
17 | 	const int x = blockIdx.x * blockDim.x + threadIdx.x;
18 | 	const int y = blockIdx.y * blockDim.y + threadIdx.y;
19 | 
20 | 	if( x >= width || y >= height )
21 | 		return;
22 | 
23 | 	const T px_in = input[ y * width + x ];
24 | 	T px_out = px_in;
25 | 	
26 | 	const float fx = x;
27 | 	const float fy = y;
28 | 	
29 | 	const float thick = 10.0f;
30 | 	const float alpha = color.w / 255.0f;
31 | 	const float ialph = 1.0f - alpha;
32 | 	
33 | 	for( int nr=0; nr < numRects; nr++ )
34 | 	{
35 | 		const float4 r = rects[nr];
36 | 		
37 | 		//printf("%i %i %i  %f %f %f %f\n", numRects, x, y, r.x, r.y, r.z, r.w);
38 | 		
39 | 		if( fy >= r.y && fy <= r.w /*&& (eq_less(fx, r.x, ep) || eq_less(fx, r.z, ep))*/ )
40 | 		{
41 | 			if( fx >= r.x && fx <= r.z /*&& (eq_less(fy, r.y, ep) || eq_less(fy, r.w, ep))*/ )
42 | 			{
43 | 				//printf("cuda rect %i %i\n", x, y);
44 | 
45 | 				px_out.x = alpha * color.x + ialph * px_out.x;
46 | 				px_out.y = alpha * color.y + ialph * px_out.y;
47 | 				px_out.z = alpha * color.z + ialph * px_out.z;
48 | 			}
49 | 		}
50 | 	}
51 | 	
52 | 	output[y * width + x] = px_out;	 
53 | }
54 | 
55 | 
56 | cudaError_t cudaRectOutlineOverlay( float4* input, float4* output, uint32_t width, uint32_t height, float4* boundingBoxes, int numBoxes, const float4& color )
57 | {
58 | 	if( !input || !output || width == 0 || height == 0 || !boundingBoxes || numBoxes == 0 )
59 | 		return cudaErrorInvalidValue;
60 | 
61 | 	// launch kernel
62 | 	const dim3 blockDim(8, 8);
63 | 	const dim3 gridDim(iDivUp(width,blockDim.x), iDivUp(height,blockDim.y));
64 | 
65 | 	gpuRectOutlines<float4><<<gridDim, blockDim>>>(input, output, width, height, boundingBoxes, numBoxes, color); 
66 | 
67 | 	return cudaGetLastError();
68 | }
69 | 


--------------------------------------------------------------------------------
/util/cuda/cudaOverlay.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * http://github.com/dusty-nv/jetson-inference
 3 |  */
 4 | 
 5 | #ifndef __CUDA_OVERLAY_H__
 6 | #define __CUDA_OVERLAY_H__
 7 | 
 8 | #include "cudaUtility.h"
 9 | 
10 | 
11 | /**
12 |  * cudaRectOutlineOverlay
13 |  * @ingroup util
14 |  */
15 | cudaError_t cudaRectOutlineOverlay( float4* input, float4* output, uint32_t width, uint32_t height, float4* boundingBoxes, int numBoxes, const float4& color );
16 | 
17 | 
18 | /**
19 |  * cudaRectFillOverlay
20 |  * @ingroup util
21 |  */
22 | //cudaError_t cudaRectFillOverlay( float4* input, float4* output, uint32_t width, uint32_t height, float4* boundingBoxes, int numBoxes, const float4& color );
23 | 
24 | 
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/util/cuda/cudaRGB.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * http://github.com/dusty-nv
 3 |  */
 4 | 
 5 | #include "cudaRGB.h"
 6 | 
 7 | //-------------------------------------------------------------------------------------------------------------------------
 8 | 
 9 | __global__ void RGBToRGBAf(uchar3* srcImage,
10 |                            float4* dstImage,
11 |                            uint32_t width,       uint32_t height)
12 | {
13 |     int x, y, pixel;
14 | 
15 |     x = (blockIdx.x * blockDim.x) + threadIdx.x;
16 |     y = (blockIdx.y * blockDim.y) + threadIdx.y;
17 | 	
18 |     pixel = y * width + x;
19 | 
20 |     if (x >= width)
21 |         return; 
22 | 
23 |     if (y >= height)
24 |         return;
25 | 
26 | //	printf("cuda thread %i %i  %i %i pixel %i \n", x, y, width, height, pixel);
27 | 		
28 | 	const float  s  = 1.0f;
29 | 	const uchar3 px = srcImage[pixel];
30 | 	
31 | 	dstImage[pixel] = make_float4(px.x * s, px.y * s, px.z * s, 255.0f * s);
32 | }
33 | 
34 | cudaError_t cudaRGBToRGBAf( uchar3* srcDev, float4* destDev, size_t width, size_t height )
35 | {
36 | 	if( !srcDev || !destDev )
37 | 		return cudaErrorInvalidDevicePointer;
38 | 
39 | 	const dim3 blockDim(8,8,1);
40 | 	const dim3 gridDim(iDivUp(width,blockDim.x), iDivUp(height,blockDim.y), 1);
41 | 
42 | 	RGBToRGBAf<<<gridDim, blockDim>>>( srcDev, destDev, width, height );
43 | 	
44 | 	return CUDA(cudaGetLastError());
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/util/cuda/cudaRGB.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * http://github.com/dusty-nv/jetson-inference
 3 |  */
 4 | 
 5 | #ifndef __CUDA_RGB_CONVERT_H
 6 | #define __CUDA_RGB_CONVERT_H
 7 | 
 8 | 
 9 | #include "cudaUtility.h"
10 | #include <stdint.h>
11 | 
12 | 
13 | /**
14 |  * Convert 8-bit fixed-point RGB image to 32-bit floating-point RGBA image
15 |  * @ingroup util
16 |  */
17 | cudaError_t cudaRGBToRGBAf( uchar3* input, float4* output, size_t width, size_t height );
18 | 
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/util/cuda/cudaResize.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * inference-101
 3 |  */
 4 | 
 5 | #include "cudaResize.h"
 6 | 
 7 | 
 8 | 
 9 | // gpuResample
10 | template <typename T>
11 | __global__ void gpuResize( float2 scale, T* input, int iWidth, T* output, int oWidth, int oHeight )
12 | {
13 | 	const int x = blockIdx.x * blockDim.x + threadIdx.x;
14 | 	const int y = blockIdx.y * blockDim.y + threadIdx.y;
15 | 
16 | 	if( x >= oWidth || y >= oHeight )
17 | 		return;
18 | 
19 | 	const int dx = ((float)x * scale.x);
20 | 	const int dy = ((float)y * scale.y);
21 | 
22 | 	const T px = input[ dy * iWidth + dx ];
23 | 
24 | 	output[y*oWidth+x] = px;
25 | }
26 | 
27 | 
28 | // cudaResize
29 | cudaError_t cudaResize( float* input, size_t inputWidth, size_t inputHeight,
30 | 				        float* output, size_t outputWidth, size_t outputHeight )
31 | {
32 | 	if( !input || !output )
33 | 		return cudaErrorInvalidDevicePointer;
34 | 
35 | 	if( inputWidth == 0 || outputWidth == 0 || inputHeight == 0 || outputHeight == 0 )
36 | 		return cudaErrorInvalidValue;
37 | 
38 | 	const float2 scale = make_float2( float(inputWidth) / float(outputWidth),
39 | 							          float(inputHeight) / float(outputHeight) );
40 | 
41 | 	// launch kernel
42 | 	const dim3 blockDim(8, 8);
43 | 	const dim3 gridDim(iDivUp(outputWidth,blockDim.x), iDivUp(outputHeight,blockDim.y));
44 | 
45 | 	gpuResize<float><<<gridDim, blockDim>>>(scale, input, inputWidth, output, outputWidth, outputHeight);
46 | 
47 | 	return CUDA(cudaGetLastError());
48 | }
49 | 
50 | 
51 | // cudaResizeRGBA
52 | cudaError_t cudaResizeRGBA( float4* input,  size_t inputWidth, size_t inputHeight,
53 | 				            float4* output, size_t outputWidth, size_t outputHeight )
54 | {
55 | 	if( !input || !output )
56 | 		return cudaErrorInvalidDevicePointer;
57 | 
58 | 	if( inputWidth == 0 || outputWidth == 0 || inputHeight == 0 || outputHeight == 0 )
59 | 		return cudaErrorInvalidValue;
60 | 
61 | 	const float2 scale = make_float2( float(inputWidth) / float(outputWidth),
62 | 							    float(inputHeight) / float(outputHeight) );
63 | 
64 | 	// launch kernel
65 | 	const dim3 blockDim(8, 8);
66 | 	const dim3 gridDim(iDivUp(outputWidth,blockDim.x), iDivUp(outputHeight,blockDim.y));
67 | 
68 | 	gpuResize<float4><<<gridDim, blockDim>>>(scale, input, inputWidth, output, outputWidth, outputHeight);
69 | 
70 | 	return CUDA(cudaGetLastError());
71 | }
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/util/cuda/cudaResize.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * inference-101
 3 |  */
 4 | 
 5 | #ifndef __CUDA_RESIZE_H__
 6 | #define __CUDA_RESIZE_H__
 7 | 
 8 | 
 9 | #include "cudaUtility.h"
10 | 
11 | 
12 | /**
13 |  * Function for increasing or decreasing the size of an image on the GPU.
14 |  * @ingroup util
15 |  */
16 | cudaError_t cudaResize( float* input,  size_t inputWidth,  size_t inputHeight,
17 | 				    float* output, size_t outputWidth, size_t outputHeight );
18 | 
19 | 
20 | /**
21 |  * Function for increasing or decreasing the size of an image on the GPU.
22 |  * @ingroup util
23 |  */
24 | cudaError_t cudaResizeRGBA( float4* input,  size_t inputWidth,  size_t inputHeight,
25 | 				        float4* output, size_t outputWidth, size_t outputHeight );
26 | 
27 | 
28 | 						
29 | 
30 | #endif
31 | 
32 | 


--------------------------------------------------------------------------------
/util/cuda/cudaUtility.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * http://github.com/dusty-nv/jetson-inference
 3 |  */
 4 | 
 5 | #ifndef __CUDA_UTILITY_H_
 6 | #define __CUDA_UTILITY_H_
 7 | 
 8 | 
 9 | #include <cuda_runtime.h>
10 | #include <cuda.h>
11 | #include <stdio.h>
12 | #include <string.h>
13 | 
14 | 
15 | /**
16 |  * Execute a CUDA call and print out any errors
17 |  * @return the original cudaError_t result
18 |  * @ingroup util
19 |  */
20 | #define CUDA(x)				cudaCheckError((x), #x, __FILE__, __LINE__)
21 | 
22 | /**
23 |  * Evaluates to true on success
24 |  * @ingroup util
25 |  */
26 | #define CUDA_SUCCESS(x)			(CUDA(x) == cudaSuccess)
27 | 
28 | /**
29 |  * Evaluates to true on failure
30 |  * @ingroup util
31 |  */
32 | #define CUDA_FAILED(x)			(CUDA(x) != cudaSuccess)
33 | 
34 | /**
35 |  * Return from the boolean function if CUDA call fails
36 |  * @ingroup util
37 |  */
38 | #define CUDA_VERIFY(x)			if(CUDA_FAILED(x))	return false;
39 | 
40 | /**
41 |  * LOG_CUDA string.
42 |  * @ingroup util
43 |  */
44 | #define LOG_CUDA "[cuda]   "
45 | 
46 | /*
47 |  * define this if you want all cuda calls to be printed
48 |  */
49 | //#define CUDA_TRACE
50 | 
51 | 
52 | 
53 | /**
54 |  * cudaCheckError
55 |  * @ingroup util
56 |  */
57 | inline cudaError_t cudaCheckError(cudaError_t retval, const char* txt, const char* file, int line )
58 | {
59 | #if !defined(CUDA_TRACE)
60 | 	if( retval == cudaSuccess)
61 | 		return cudaSuccess;
62 | #endif
63 | 
64 | 	//int activeDevice = -1;
65 | 	//cudaGetDevice(&activeDevice);
66 | 
67 | 	//Log("[cuda]   device %i  -  %s\n", activeDevice, txt);
68 | 	
69 | 	printf(LOG_CUDA "%s\n", txt);
70 | 
71 | 
72 | 	if( retval != cudaSuccess )
73 | 	{
74 | 		printf(LOG_CUDA "   %s (error %u) (hex 0x%02X)\n", cudaGetErrorString(retval), retval, retval);
75 | 		printf(LOG_CUDA "   %s:%i\n", file, line);	
76 | 	}
77 | 
78 | 	return retval;
79 | }
80 | 
81 | 
82 | /**
83 |  * iDivUp
84 |  * @ingroup util
85 |  */
86 | inline __device__ __host__ int iDivUp( int a, int b )  		{ return (a % b != 0) ? (a / b + 1) : (a / b); }
87 | 
88 | 
89 | 
90 | #endif
91 | 


--------------------------------------------------------------------------------
/util/cuda/cudaYUV-NV12.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * inference-101
  3 |  */
  4 | 
  5 | #include "cudaYUV.h"
  6 | 
  7 | 
  8 | #define COLOR_COMPONENT_MASK            0x3FF
  9 | #define COLOR_COMPONENT_BIT_SIZE        10
 10 | 
 11 | #define FIXED_DECIMAL_POINT             24
 12 | #define FIXED_POINT_MULTIPLIER          1.0f
 13 | #define FIXED_COLOR_COMPONENT_MASK      0xffffffff
 14 | 
 15 | #define MUL(x,y)    (x*y)
 16 | 
 17 | 
 18 | 
 19 | __constant__ uint32_t constAlpha;
 20 | __constant__ float  constHueColorSpaceMat[9];
 21 | 
 22 | 
 23 | 
 24 | __device__ void YUV2RGB(uint32_t *yuvi, float *red, float *green, float *blue)
 25 | {
 26 |    
 27 | 
 28 |     // Prepare for hue adjustment
 29 |     /*
 30 | 	 float luma, chromaCb, chromaCr;
 31 | 
 32 | 	luma     = (float)yuvi[0];
 33 |     chromaCb = (float)((int)yuvi[1] - 512.0f);
 34 |     chromaCr = (float)((int)yuvi[2] - 512.0f);
 35 | 
 36 |     // Convert YUV To RGB with hue adjustment
 37 |     *red  = MUL(luma,     constHueColorSpaceMat[0]) +
 38 |             MUL(chromaCb, constHueColorSpaceMat[1]) +
 39 |             MUL(chromaCr, constHueColorSpaceMat[2]);
 40 |     *green= MUL(luma,     constHueColorSpaceMat[3]) +
 41 |             MUL(chromaCb, constHueColorSpaceMat[4]) +
 42 |             MUL(chromaCr, constHueColorSpaceMat[5]);
 43 |     *blue = MUL(luma,     constHueColorSpaceMat[6]) +
 44 |             MUL(chromaCb, constHueColorSpaceMat[7]) +
 45 |             MUL(chromaCr, constHueColorSpaceMat[8]);*/
 46 | 
 47 | 	const float luma = float(yuvi[0]);
 48 | 	const float u    = float(yuvi[1]) - 512.0f;
 49 | 	const float v    = float(yuvi[2]) - 512.0f;
 50 | 
 51 |    /*R = Y + 1.140V
 52 |    G = Y - 0.395U - 0.581V
 53 |    B = Y + 2.032U*/
 54 | 
 55 | 	/**green = luma + 1.140f * v;
 56 | 	*blue  = luma - 0.395f * u - 0.581f * v;
 57 | 	*red   = luma + 2.032f * u;*/
 58 | 
 59 | 	*red    = luma + 1.140f * v;
 60 | 	*green  = luma - 0.395f * u - 0.581f * v;
 61 | 	*blue   = luma + 2.032f * u;
 62 | }
 63 | 
 64 | 
 65 | __device__ uint32_t RGBAPACK_8bit(float red, float green, float blue, uint32_t alpha)
 66 | {
 67 |     uint32_t ARGBpixel = 0;
 68 | 
 69 |     // Clamp final 10 bit results
 70 |     red   = min(max(red,   0.0f), 255.0f);
 71 |     green = min(max(green, 0.0f), 255.0f);
 72 |     blue  = min(max(blue,  0.0f), 255.0f);
 73 | 
 74 |     // Convert to 8 bit unsigned integers per color component
 75 |     ARGBpixel = ((((uint32_t)red)   << 24) |
 76 |                  (((uint32_t)green) << 16) |
 77 | 		       (((uint32_t)blue)  <<  8) | (uint32_t)alpha);
 78 | 
 79 |     return  ARGBpixel;
 80 | }
 81 | 
 82 | 
 83 | __device__ uint32_t RGBAPACK_10bit(float red, float green, float blue, uint32_t alpha)
 84 | {
 85 |     uint32_t ARGBpixel = 0;
 86 | 
 87 |     // Clamp final 10 bit results
 88 |     red   = min(max(red,   0.0f), 1023.f);
 89 |     green = min(max(green, 0.0f), 1023.f);
 90 |     blue  = min(max(blue,  0.0f), 1023.f);
 91 | 
 92 |     // Convert to 8 bit unsigned integers per color component
 93 |     ARGBpixel = ((((uint32_t)red   >> 2) << 24) |
 94 |                  (((uint32_t)green >> 2) << 16) |
 95 |                  (((uint32_t)blue  >> 2) <<  8) | (uint32_t)alpha);
 96 | 
 97 |     return  ARGBpixel;
 98 | }
 99 | 
100 | 
101 | // CUDA kernel for outputing the final ARGB output from NV12;
102 | /*extern "C"*/
103 | __global__ void Passthru(uint32_t *srcImage,   size_t nSourcePitch,
104 |                          uint32_t *dstImage,   size_t nDestPitch,
105 |                          uint32_t width,       uint32_t height)
106 | {
107 |     int x, y;
108 |     uint32_t yuv101010Pel[2];
109 |     uint32_t processingPitch = ((width) + 63) & ~63;
110 |     uint32_t dstImagePitch   = nDestPitch >> 2;
111 |     uint8_t *srcImageU8     = (uint8_t *)srcImage;
112 | 
113 |     processingPitch = nSourcePitch;
114 | 
115 |     // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
116 |     x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
117 |     y = blockIdx.y *  blockDim.y       +  threadIdx.y;
118 | 
119 |     if (x >= width)
120 |         return; //x = width - 1;
121 | 
122 |     if (y >= height)
123 |         return; // y = height - 1;
124 | 
125 |     // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
126 |     // if we move to texture we could read 4 luminance values
127 |     yuv101010Pel[0] = (srcImageU8[y * processingPitch + x    ]);
128 |     yuv101010Pel[1] = (srcImageU8[y * processingPitch + x + 1]);
129 | 
130 |     // this steps performs the color conversion
131 |     float luma[2];
132 | 
133 |     luma[0]   = (yuv101010Pel[0]        & 0x00FF);
134 |     luma[1]   = (yuv101010Pel[1]        & 0x00FF);
135 | 
136 |     // Clamp the results to RGBA
137 |     dstImage[y * dstImagePitch + x     ] = RGBAPACK_8bit(luma[0], luma[0], luma[0], constAlpha);
138 |     dstImage[y * dstImagePitch + x + 1 ] = RGBAPACK_8bit(luma[1], luma[1], luma[1], constAlpha);
139 | }
140 | 
141 | 
142 | // CUDA kernel for outputing the final ARGB output from NV12;
143 | /*extern "C"*/
144 | __global__ void NV12ToARGB(uint32_t *srcImage,     size_t nSourcePitch,
145 |                            uint32_t *dstImage,     size_t nDestPitch,
146 |                            uint32_t width,         uint32_t height)
147 | {
148 |     int x, y;
149 |     uint32_t yuv101010Pel[2];
150 |     uint32_t processingPitch = ((width) + 63) & ~63;
151 |     uint32_t dstImagePitch   = nDestPitch >> 2;
152 |     uint8_t *srcImageU8     = (uint8_t *)srcImage;
153 | 
154 |     processingPitch = nSourcePitch;
155 | 
156 |     // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
157 |     x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
158 |     y = blockIdx.y *  blockDim.y       +  threadIdx.y;
159 | 
160 |     if (x >= width)
161 |         return; //x = width - 1;
162 | 
163 |     if (y >= height)
164 |         return; // y = height - 1;
165 | 
166 |     // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
167 |     // if we move to texture we could read 4 luminance values
168 |     yuv101010Pel[0] = (srcImageU8[y * processingPitch + x    ]) << 2;
169 |     yuv101010Pel[1] = (srcImageU8[y * processingPitch + x + 1]) << 2;
170 | 
171 |     uint32_t chromaOffset    = processingPitch * height;
172 |     int y_chroma = y >> 1;
173 | 
174 |     if (y & 1)  // odd scanline ?
175 |     {
176 |         uint32_t chromaCb;
177 |         uint32_t chromaCr;
178 | 
179 |         chromaCb = srcImageU8[chromaOffset + y_chroma * processingPitch + x    ];
180 |         chromaCr = srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1];
181 | 
182 |         if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically
183 |         {
184 |             chromaCb = (chromaCb + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x    ] + 1) >> 1;
185 |             chromaCr = (chromaCr + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x + 1] + 1) >> 1;
186 |         }
187 | 
188 |         yuv101010Pel[0] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE       + 2));
189 |         yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
190 | 
191 |         yuv101010Pel[1] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE       + 2));
192 |         yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
193 |     }
194 |     else
195 |     {
196 |         yuv101010Pel[0] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x    ] << (COLOR_COMPONENT_BIT_SIZE       + 2));
197 |         yuv101010Pel[0] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
198 | 
199 |         yuv101010Pel[1] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x    ] << (COLOR_COMPONENT_BIT_SIZE       + 2));
200 |         yuv101010Pel[1] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
201 |     }
202 | 
203 |     // this steps performs the color conversion
204 |     uint32_t yuvi[6];
205 |     float red[2], green[2], blue[2];
206 | 
207 |     yuvi[0] = (yuv101010Pel[0] &   COLOR_COMPONENT_MASK);
208 |     yuvi[1] = ((yuv101010Pel[0] >>  COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
209 |     yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
210 | 
211 |     yuvi[3] = (yuv101010Pel[1] &   COLOR_COMPONENT_MASK);
212 |     yuvi[4] = ((yuv101010Pel[1] >>  COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
213 |     yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
214 | 
215 |     // YUV to RGB Transformation conversion
216 |     YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]);
217 |     YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]);
218 | 
219 |     // Clamp the results to RGBA
220 |     dstImage[y * dstImagePitch + x     ] = RGBAPACK_10bit(red[0], green[0], blue[0], constAlpha);
221 |     dstImage[y * dstImagePitch + x + 1 ] = RGBAPACK_10bit(red[1], green[1], blue[1], constAlpha);
222 | }
223 | 
224 | 
225 | bool nv12ColorspaceSetup = false;
226 | 
227 | 
228 | // cudaNV12ToARGB32
229 | cudaError_t cudaNV12ToRGBA( uint8_t* srcDev, size_t srcPitch, uchar4* destDev, size_t destPitch, size_t width, size_t height )
230 | {
231 | 	if( !srcDev || !destDev )
232 | 		return cudaErrorInvalidDevicePointer;
233 | 
234 | 	if( srcPitch == 0 || destPitch == 0 || width == 0 || height == 0 )
235 | 		return cudaErrorInvalidValue;
236 | 
237 | 	if( !nv12ColorspaceSetup )
238 | 		cudaNV12SetupColorspace();
239 | 
240 | 	const dim3 blockDim(32,16,1);
241 | 	const dim3 gridDim((width+(2*blockDim.x-1))/(2*blockDim.x), (height+(blockDim.y-1))/blockDim.y, 1);
242 | 
243 | 	NV12ToARGB<<<gridDim, blockDim>>>( (uint32_t*)srcDev, srcPitch, (uint32_t*)destDev, destPitch, width, height );
244 | 	
245 | 	return CUDA(cudaGetLastError());
246 | }
247 | 
248 | cudaError_t cudaNV12ToRGBA( uint8_t* srcDev, uchar4* destDev, size_t width, size_t height )
249 | {
250 | 	return cudaNV12ToRGBA(srcDev, width * sizeof(uint8_t), destDev, width * sizeof(uchar4), width, height);
251 | }
252 | 
253 | 
254 | //-------------------------------------------------------------------------------------------------------------------------
255 | 
256 | __global__ void NV12ToRGBAf(uint32_t* srcImage,  size_t nSourcePitch,
257 |                            float4* dstImage,     size_t nDestPitch,
258 |                            uint32_t width,       uint32_t height)
259 | {
260 |     int x, y;
261 |     uint32_t yuv101010Pel[2];
262 |     uint32_t processingPitch = ((width) + 63) & ~63;
263 |     uint8_t *srcImageU8     = (uint8_t *)srcImage;
264 | 
265 |     processingPitch = nSourcePitch;
266 | 
267 |     // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
268 |     x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
269 |     y = blockIdx.y *  blockDim.y       +  threadIdx.y;
270 | 
271 |     if (x >= width)
272 |         return; //x = width - 1;
273 | 
274 |     if (y >= height)
275 |         return; // y = height - 1;
276 | 
277 | #if 1	
278 |     // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
279 |     // if we move to texture we could read 4 luminance values
280 |     yuv101010Pel[0] = (srcImageU8[y * processingPitch + x    ]) << 2;
281 |     yuv101010Pel[1] = (srcImageU8[y * processingPitch + x + 1]) << 2;
282 | 
283 |     uint32_t chromaOffset    = processingPitch * height;
284 |     int y_chroma = y >> 1;
285 | 
286 |     if (y & 1)  // odd scanline ?
287 |     {
288 |         uint32_t chromaCb;
289 |         uint32_t chromaCr;
290 | 
291 |         chromaCb = srcImageU8[chromaOffset + y_chroma * processingPitch + x    ];
292 |         chromaCr = srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1];
293 | 
294 |         if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically
295 |         {
296 |             chromaCb = (chromaCb + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x    ] + 1) >> 1;
297 |             chromaCr = (chromaCr + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x + 1] + 1) >> 1;
298 |         }
299 | 
300 |         yuv101010Pel[0] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE       + 2));
301 |         yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
302 | 
303 |         yuv101010Pel[1] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE       + 2));
304 |         yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
305 |     }
306 |     else
307 |     {
308 |         yuv101010Pel[0] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x    ] << (COLOR_COMPONENT_BIT_SIZE       + 2));
309 |         yuv101010Pel[0] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
310 | 
311 |         yuv101010Pel[1] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x    ] << (COLOR_COMPONENT_BIT_SIZE       + 2));
312 |         yuv101010Pel[1] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
313 |     }
314 | 
315 |     // this steps performs the color conversion
316 |     uint32_t yuvi[6];
317 |     float red[2], green[2], blue[2];
318 | 
319 |     yuvi[0] = (yuv101010Pel[0] &   COLOR_COMPONENT_MASK);
320 |     yuvi[1] = ((yuv101010Pel[0] >>  COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
321 |     yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
322 | 
323 |     yuvi[3] = (yuv101010Pel[1] &   COLOR_COMPONENT_MASK);
324 |     yuvi[4] = ((yuv101010Pel[1] >>  COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
325 |     yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
326 | 
327 |     // YUV to RGB Transformation conversion
328 |     YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]);
329 |     YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]);
330 | 
331 |     // Clamp the results to RGBA
332 | 	//printf("cuda thread %i %i  %f %f %f\n", x, y, red[0], green[0], blue[0]);
333 | 
334 | 	const float s = 1.0f / 1024.0f * 255.0f;
335 | 
336 | 	dstImage[y * width + x]     = make_float4(red[0] * s, green[0] * s, blue[0] * s, 1.0f);
337 | 	dstImage[y * width + x + 1] = make_float4(red[1] * s, green[1] * s, blue[1] * s, 1.0f);
338 | #else
339 | 	//printf("cuda thread %i %i  %i %i \n", x, y, width, height);
340 | 		
341 | 	dstImage[y * width + x]     = make_float4(1.0f, 0.0f, 0.0f, 1.0f);
342 | 	dstImage[y * width + x + 1] = make_float4(1.0f, 0.0f, 0.0f, 1.0f);
343 | #endif
344 | }
345 | 
346 | 
347 | 
348 | // cudaNV12ToRGBA
349 | cudaError_t cudaNV12ToRGBAf( uint8_t* srcDev, size_t srcPitch, float4* destDev, size_t destPitch, size_t width, size_t height )
350 | {
351 | 	if( !srcDev || !destDev )
352 | 		return cudaErrorInvalidDevicePointer;
353 | 
354 | 	if( srcPitch == 0 || destPitch == 0 || width == 0 || height == 0 )
355 | 		return cudaErrorInvalidValue;
356 | 
357 | 	if( !nv12ColorspaceSetup )
358 | 		cudaNV12SetupColorspace();
359 | 
360 | 	const dim3 blockDim(8,8,1);
361 | 	//const dim3 gridDim((width+(2*blockDim.x-1))/(2*blockDim.x), (height+(blockDim.y-1))/blockDim.y, 1);
362 | 	const dim3 gridDim(iDivUp(width,blockDim.x), iDivUp(height, blockDim.y), 1);
363 | 
364 | 	NV12ToRGBAf<<<gridDim, blockDim>>>( (uint32_t*)srcDev, srcPitch, destDev, destPitch, width, height );
365 | 	
366 | 	return CUDA(cudaGetLastError());
367 | }
368 | 
369 | cudaError_t cudaNV12ToRGBAf( uint8_t* srcDev, float4* destDev, size_t width, size_t height )
370 | {
371 | 	return cudaNV12ToRGBAf(srcDev, width * sizeof(uint8_t), destDev, width * sizeof(float4), width, height);
372 | }
373 | 
374 | 
375 | // cudaNV12SetupColorspace
376 | cudaError_t cudaNV12SetupColorspace( float hue )
377 | {
378 | 	const float hueSin = sin(hue);
379 | 	const float hueCos = cos(hue);
380 | 
381 | 	float hueCSC[9];
382 | 
383 | 	const bool itu601 = false;
384 | 
385 | 	if( itu601 /*CSC == ITU601*/)
386 | 	{
387 | 		//CCIR 601
388 | 		hueCSC[0] = 1.1644f;
389 | 		hueCSC[1] = hueSin * 1.5960f;
390 | 		hueCSC[2] = hueCos * 1.5960f;
391 | 		hueCSC[3] = 1.1644f;
392 | 		hueCSC[4] = (hueCos * -0.3918f) - (hueSin * 0.8130f);
393 | 		hueCSC[5] = (hueSin *  0.3918f) - (hueCos * 0.8130f);
394 | 		hueCSC[6] = 1.1644f;
395 | 		hueCSC[7] = hueCos *  2.0172f;
396 | 		hueCSC[8] = hueSin * -2.0172f;
397 | 	}
398 | 	else /*if(CSC == ITU709)*/
399 | 	{
400 | 		//CCIR 709
401 | 		hueCSC[0] = 1.0f;
402 | 		hueCSC[1] = hueSin * 1.57480f;
403 | 		hueCSC[2] = hueCos * 1.57480f;
404 | 		hueCSC[3] = 1.0;
405 | 		hueCSC[4] = (hueCos * -0.18732f) - (hueSin * 0.46812f);
406 | 		hueCSC[5] = (hueSin *  0.18732f) - (hueCos * 0.46812f);
407 | 		hueCSC[6] = 1.0f;
408 | 		hueCSC[7] = hueCos *  1.85560f;
409 | 		hueCSC[8] = hueSin * -1.85560f;
410 | 	}
411 | 
412 | 
413 | 	if( CUDA_FAILED(cudaMemcpyToSymbol(constHueColorSpaceMat, hueCSC, sizeof(float) * 9)) )
414 | 		return cudaErrorInvalidSymbol;
415 | 
416 | 	uint32_t cudaAlpha = ((uint32_t)0xff<< 24);
417 | 
418 | 	if( CUDA_FAILED(cudaMemcpyToSymbol(constAlpha, &cudaAlpha, sizeof(uint32_t))) )
419 | 		return cudaErrorInvalidSymbol;
420 | 
421 | 	nv12ColorspaceSetup = true;
422 | 	return cudaSuccess;
423 | }
424 | 
425 | 


--------------------------------------------------------------------------------
/util/cuda/cudaYUV-YUYV.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * http://github.com/dusty-nv/jetson-inference
  3 |  */
  4 | 
  5 | #include "cudaYUV.h"
  6 | 
  7 | 
  8 | inline __device__ __host__ float clamp(float f, float a, float b)
  9 | {
 10 |     return fmaxf(a, fminf(f, b));
 11 | }
 12 | 
 13 | 
 14 | /* From RGB to YUV
 15 | 
 16 |    Y = 0.299R + 0.587G + 0.114B
 17 |    U = 0.492 (B-Y)
 18 |    V = 0.877 (R-Y)
 19 | 
 20 |    It can also be represented as:
 21 | 
 22 |    Y =  0.299R + 0.587G + 0.114B
 23 |    U = -0.147R - 0.289G + 0.436B
 24 |    V =  0.615R - 0.515G - 0.100B
 25 | 
 26 |    From YUV to RGB
 27 | 
 28 |    R = Y + 1.140V
 29 |    G = Y - 0.395U - 0.581V
 30 |    B = Y + 2.032U
 31 |  */
 32 | 
 33 | struct __align__(8) uchar8
 34 | {
 35 |    uint8_t a0, a1, a2, a3, a4, a5, a6, a7;
 36 | };
 37 | static __host__ __device__ __forceinline__ uchar8 make_uchar8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7)
 38 | {
 39 |    uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
 40 |    return val;
 41 | }
 42 | 
 43 | 
 44 | //-----------------------------------------------------------------------------------
 45 | // YUYV/UYVY to RGBA
 46 | //-----------------------------------------------------------------------------------
 47 | template <bool formatUYVY>
 48 | __global__ void yuyvToRgba( uchar4* src, int srcAlignedWidth, uchar8* dst, int dstAlignedWidth, int width, int height )
 49 | {
 50 | 	const int x = blockIdx.x * blockDim.x + threadIdx.x;
 51 | 	const int y = blockIdx.y * blockDim.y + threadIdx.y;
 52 | 
 53 | 	if( x >= srcAlignedWidth || y >= height )
 54 | 		return;
 55 | 
 56 | 	const uchar4 macroPx = src[y * srcAlignedWidth + x];
 57 | 
 58 | 	// Y0 is the brightness of pixel 0, Y1 the brightness of pixel 1.
 59 | 	// U0 and V0 is the color of both pixels.
 60 | 	// UYVY [ U0 | Y0 | V0 | Y1 ] 
 61 | 	// YUYV [ Y0 | U0 | Y1 | V0 ]
 62 | 	const float y0 = formatUYVY ? macroPx.y : macroPx.x;
 63 | 	const float y1 = formatUYVY ? macroPx.w : macroPx.z; 
 64 | 	const float u = (formatUYVY ? macroPx.x : macroPx.y) - 128.0f;
 65 | 	const float v = (formatUYVY ? macroPx.z : macroPx.w) - 128.0f;
 66 | 
 67 | 	const float4 px0 = make_float4( y0 + 1.4065f * v,
 68 | 							  y0 - 0.3455f * u - 0.7169f * v,
 69 | 							  y0 + 1.7790f * u, 255.0f );
 70 | 
 71 | 	const float4 px1 = make_float4( y1 + 1.4065f * v,
 72 | 							  y1 - 0.3455f * u - 0.7169f * v,
 73 | 							  y1 + 1.7790f * u, 255.0f );
 74 | 
 75 | 	dst[y * dstAlignedWidth + x] = make_uchar8( clamp(px0.x, 0.0f, 255.0f), 
 76 | 									    clamp(px0.y, 0.0f, 255.0f),
 77 | 									    clamp(px0.z, 0.0f, 255.0f),
 78 | 									    clamp(px0.w, 0.0f, 255.0f),
 79 | 									    clamp(px1.x, 0.0f, 255.0f),
 80 | 									    clamp(px1.y, 0.0f, 255.0f),
 81 | 									    clamp(px1.z, 0.0f, 255.0f),
 82 | 									    clamp(px1.w, 0.0f, 255.0f) );
 83 | } 
 84 | 
 85 | template<bool formatUYVY>
 86 | cudaError_t launchYUYV( uchar2* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height)
 87 | {
 88 | 	if( !input || !inputPitch || !output || !outputPitch || !width || !height )
 89 | 		return cudaErrorInvalidValue;
 90 | 
 91 | 	const dim3 block(8,8);
 92 | 	const dim3 grid(iDivUp(width/2, block.x), iDivUp(height, block.y));
 93 | 
 94 | 	const int srcAlignedWidth = inputPitch / sizeof(uchar4);	// normally would be uchar2, but we're doubling up pixels
 95 | 	const int dstAlignedWidth = outputPitch / sizeof(uchar8);	// normally would be uchar4 ^^^
 96 | 
 97 | 	//printf("yuyvToRgba %zu %zu %i %i %i %i %i\n", width, height, (int)formatUYVY, srcAlignedWidth, dstAlignedWidth, grid.x, grid.y);
 98 | 
 99 | 	yuyvToRgba<formatUYVY><<<grid, block>>>((uchar4*)input, srcAlignedWidth, (uchar8*)output, dstAlignedWidth, width, height);
100 | 
101 | 	return CUDA(cudaGetLastError());
102 | }
103 | 
104 | 
105 | cudaError_t cudaUYVYToRGBA( uchar2* input, uchar4* output, size_t width, size_t height )
106 | {
107 | 	return cudaUYVYToRGBA(input, width * sizeof(uchar2), output, width * sizeof(uchar4), width, height);
108 | }
109 | 
110 | cudaError_t cudaUYVYToRGBA( uchar2* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height )
111 | {
112 | 	return launchYUYV<true>(input, inputPitch, output, outputPitch, width, height);
113 | }
114 | 
115 | cudaError_t cudaYUYVToRGBA( uchar2* input, uchar4* output, size_t width, size_t height )
116 | {
117 | 	return cudaYUYVToRGBA(input, width * sizeof(uchar2), output, width * sizeof(uchar4), width, height);
118 | }
119 | 
120 | cudaError_t cudaYUYVToRGBA( uchar2* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height )
121 | {
122 | 	return launchYUYV<false>(input, inputPitch, output, outputPitch, width, height);
123 | }
124 | 
125 | 
126 | //-----------------------------------------------------------------------------------
127 | // YUYV/UYVY to grayscale
128 | //-----------------------------------------------------------------------------------
129 | 
130 | template <bool formatUYVY>
131 | __global__ void yuyvToGray( uchar4* src, int srcAlignedWidth, float2* dst, int dstAlignedWidth, int width, int height )
132 | {
133 | 	const int x = blockIdx.x * blockDim.x + threadIdx.x;
134 | 	const int y = blockIdx.y * blockDim.y + threadIdx.y;
135 | 
136 | 	if( x >= srcAlignedWidth || y >= height )
137 | 		return;
138 | 
139 | 	const uchar4 macroPx = src[y * srcAlignedWidth + x];
140 | 
141 | 	const float y0 = formatUYVY ? macroPx.y : macroPx.x;
142 | 	const float y1 = formatUYVY ? macroPx.w : macroPx.z; 
143 | 
144 | 	dst[y * dstAlignedWidth + x] = make_float2(y0/255.0f, y1/255.0f);
145 | } 
146 | 
147 | template<bool formatUYVY>
148 | cudaError_t launchGrayYUYV( uchar2* input, size_t inputPitch, float* output, size_t outputPitch, size_t width, size_t height)
149 | {
150 | 	if( !input || !inputPitch || !output || !outputPitch || !width || !height )
151 | 		return cudaErrorInvalidValue;
152 | 
153 | 	const dim3 block(8,8);
154 | 	const dim3 grid(iDivUp(width/2, block.x), iDivUp(height, block.y));
155 | 
156 | 	const int srcAlignedWidth = inputPitch / sizeof(uchar4);	// normally would be uchar2, but we're doubling up pixels
157 | 	const int dstAlignedWidth = outputPitch / sizeof(float2);	// normally would be float ^^^
158 | 
159 | 	yuyvToGray<formatUYVY><<<grid, block>>>((uchar4*)input, srcAlignedWidth, (float2*)output, dstAlignedWidth, width, height);
160 | 
161 | 	return CUDA(cudaGetLastError());
162 | }
163 | 
164 | cudaError_t cudaUYVYToGray( uchar2* input, float* output, size_t width, size_t height )
165 | {
166 | 	return cudaUYVYToGray(input, width * sizeof(uchar2), output, width * sizeof(uint8_t), width, height);
167 | }
168 | 
169 | cudaError_t cudaUYVYToGray( uchar2* input, size_t inputPitch, float* output, size_t outputPitch, size_t width, size_t height )
170 | {
171 | 	return launchGrayYUYV<true>(input, inputPitch, output, outputPitch, width, height);
172 | }
173 | 
174 | cudaError_t cudaYUYVToGray( uchar2* input, float* output, size_t width, size_t height )
175 | {
176 | 	return cudaYUYVToGray(input, width * sizeof(uchar2), output, width * sizeof(float), width, height);
177 | }
178 | 
179 | cudaError_t cudaYUYVToGray( uchar2* input, size_t inputPitch, float* output, size_t outputPitch, size_t width, size_t height )
180 | {
181 | 	return launchGrayYUYV<false>(input, inputPitch, output, outputPitch, width, height);
182 | }
183 | 
184 | 


--------------------------------------------------------------------------------
/util/cuda/cudaYUV-YV12.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * inference-101
  3 |  */
  4 | 
  5 | #include "cudaYUV.h"
  6 | 
  7 | 
  8 | 
  9 | 
 10 | 
 11 | inline __device__ void rgb_to_y(const uint8_t r, const uint8_t g, const uint8_t b, uint8_t& y)
 12 | {
 13 | 	y = static_cast<uint8_t>(((int)(30 * r) + (int)(59 * g) + (int)(11 * b)) / 100);
 14 | }
 15 | 
 16 | inline __device__ void rgb_to_yuv(const uint8_t r, const uint8_t g, const uint8_t b, uint8_t& y, uint8_t& u, uint8_t& v)
 17 | {
 18 | 	rgb_to_y(r, g, b, y);
 19 | 	u = static_cast<uint8_t>(((int)(-17 * r) - (int)(33 * g) + (int)(50 * b) + 12800) / 100);
 20 | 	v = static_cast<uint8_t>(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100);
 21 | }
 22 | 
 23 | template <typename T, bool formatYV12>
 24 | __global__ void RGB_to_YV12( T* src, int srcAlignedWidth, uint8_t* dst, int dstPitch, int width, int height )
 25 | {
 26 | 	const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
 27 | 	const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
 28 | 
 29 | 	const int x1 = x + 1;
 30 | 	const int y1 = y + 1;
 31 | 
 32 | 	if( x1 >= width || y1 >= height )
 33 | 		return;
 34 | 
 35 | 	const int planeSize = height * dstPitch;
 36 | 	
 37 | 	uint8_t* y_plane = dst;
 38 | 	uint8_t* u_plane;
 39 | 	uint8_t* v_plane;
 40 | 
 41 | 	if( formatYV12 )
 42 | 	{
 43 | 		u_plane = y_plane + planeSize;
 44 | 		v_plane = u_plane + (planeSize / 4);	// size of U & V planes is 25% of Y plane
 45 | 	}
 46 | 	else
 47 | 	{
 48 | 		v_plane = y_plane + planeSize;		// in I420, order of U & V planes is reversed
 49 | 		u_plane = v_plane + (planeSize / 4);
 50 | 	}
 51 | 
 52 | 	T px;
 53 | 	uint8_t y_val, u_val, v_val;
 54 | 
 55 | 	px = src[y * srcAlignedWidth + x];
 56 | 	rgb_to_y(px.x, px.y, px.z, y_val);
 57 | 	y_plane[y * dstPitch + x] = y_val;
 58 | 
 59 | 	px = src[y * srcAlignedWidth + x1];
 60 | 	rgb_to_y(px.x, px.y, px.z, y_val);
 61 | 	y_plane[y * dstPitch + x1] = y_val;
 62 | 
 63 | 	px = src[y1 * srcAlignedWidth + x];
 64 | 	rgb_to_y(px.x, px.y, px.z, y_val);
 65 | 	y_plane[y1 * dstPitch + x] = y_val;
 66 | 	
 67 | 	px = src[y1 * srcAlignedWidth + x1];
 68 | 	rgb_to_yuv(px.x, px.y, px.z, y_val, u_val, v_val);
 69 | 	y_plane[y1 * dstPitch + x1] = y_val;
 70 | 
 71 | 	const int uvPitch = dstPitch / 2;
 72 | 	const int uvIndex = (y / 2) * uvPitch + (x / 2);
 73 | 
 74 | 	u_plane[uvIndex] = u_val;
 75 | 	v_plane[uvIndex] = v_val;
 76 | } 
 77 | 
 78 | template<typename T, bool formatYV12>
 79 | cudaError_t launch420( T* input, size_t inputPitch, uint8_t* output, size_t outputPitch, size_t width, size_t height)
 80 | {
 81 | 	if( !input || !inputPitch || !output || !outputPitch || !width || !height )
 82 | 		return cudaErrorInvalidValue;
 83 | 
 84 | 	const dim3 block(32, 8);
 85 | 	const dim3 grid(iDivUp(width, block.x * 2), iDivUp(height, block.y * 2));
 86 | 
 87 | 	const int inputAlignedWidth = inputPitch / sizeof(T);
 88 | 
 89 | 	RGB_to_YV12<T, formatYV12><<<grid, block>>>(input, inputAlignedWidth, output, outputPitch, width, height);
 90 | 
 91 | 	return CUDA(cudaGetLastError());
 92 | }
 93 | 
 94 | 
 95 | 
 96 | // cudaRGBAToYV12
 97 | cudaError_t cudaRGBAToYV12( uchar4* input, size_t inputPitch, uint8_t* output, size_t outputPitch, size_t width, size_t height )
 98 | {
 99 | 	return launch420<uchar4,false>( input, inputPitch, output, outputPitch, width, height );
100 | }
101 | 
102 | // cudaRGBAToYV12
103 | cudaError_t cudaRGBAToYV12( uchar4* input, uint8_t* output, size_t width, size_t height )
104 | {
105 | 	return cudaRGBAToYV12( input, width * sizeof(uchar4), output, width * sizeof(uint8_t), width, height );
106 | }
107 | 
108 | // cudaRGBAToI420
109 | cudaError_t cudaRGBAToI420( uchar4* input, size_t inputPitch, uint8_t* output, size_t outputPitch, size_t width, size_t height )
110 | {
111 | 	return launch420<uchar4,true>( input, inputPitch, output, outputPitch, width, height );
112 | }
113 | 
114 | // cudaRGBAToI420
115 | cudaError_t cudaRGBAToI420( uchar4* input, uint8_t* output, size_t width, size_t height )
116 | {
117 | 	return cudaRGBAToI420( input, width * sizeof(uchar4), output, width * sizeof(uint8_t), width, height );
118 | }
119 | 
120 | 
121 | 
122 | #if 0
123 | __global__ void Gray_to_YV12(const GlobPtrSz<uint8_t> src, GlobPtr<uint8_t> dst)
124 | {
125 | 	const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
126 | 	const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
127 | 
128 | 	if (x + 1 >= src.cols || y + 1 >= src.rows)
129 | 		return;
130 | 
131 | 	// get pointers to the data
132 | 	const size_t planeSize = src.rows * dst.step;
133 |    GlobPtr<uint8_t> y_plane = globPtr(dst.data, dst.step);
134 |    GlobPtr<uint8_t> u_plane = globPtr(y_plane.data + planeSize, dst.step / 2);
135 |    GlobPtr<uint8_t> v_plane = globPtr(u_plane.data + (planeSize / 4), dst.step / 2);
136 | 
137 |    uint8_t pix;
138 |    uint8_t y_val, u_val, v_val;
139 | 
140 |    pix = src(y, x);
141 |    rgb_to_y(pix, pix, pix, y_val);
142 |    y_plane(y, x) = y_val;
143 | 
144 |    pix = src(y, x + 1);
145 |    rgb_to_y(pix, pix, pix, y_val);
146 |    y_plane(y, x + 1) = y_val;
147 | 
148 |    pix = src(y + 1, x);
149 |    rgb_to_y(pix, pix, pix, y_val);
150 |    y_plane(y + 1, x) = y_val;
151 | 
152 |    pix = src(y + 1, x + 1);
153 |    rgb_to_yuv(pix, pix, pix, y_val, u_val, v_val);
154 |    y_plane(y + 1, x + 1) = y_val;
155 |    u_plane(y / 2, x / 2) = u_val;
156 |    v_plane(y / 2, x / 2) = v_val;
157 | }
158 | #endif
159 | 
160 | 


--------------------------------------------------------------------------------
/util/cuda/cudaYUV.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * http://github.com/dusty-nv/jetson-inference
  3 |  */
  4 | 
  5 | #ifndef __CUDA_YUV_CONVERT_H
  6 | #define __CUDA_YUV_CONVERT_H
  7 | 
  8 | 
  9 | #include "cudaUtility.h"
 10 | #include <stdint.h>
 11 | 
 12 | 
 13 | //////////////////////////////////////////////////////////////////////////////////
 14 | /// @name RGBA to YUV 4:2:0 planar (I420 & YV12)
 15 | /// @ingroup util
 16 | //////////////////////////////////////////////////////////////////////////////////
 17 | 
 18 | ///@{
 19 | 
 20 | /**
 21 |  * Convert an RGBA uchar4 buffer into YUV I420 planar.
 22 |  */
 23 | cudaError_t cudaRGBAToI420( uchar4* input, uint8_t* output, size_t width, size_t height );
 24 | 
 25 | /**
 26 |  * Convert an RGBA uchar4 texture into YUV I420 planar.
 27 |  */
 28 | cudaError_t cudaRGBAToI420( uchar4* input, size_t inputPitch, uint8_t* output, size_t outputPitch, size_t width, size_t height );
 29 | 
 30 | /**
 31 |  * Convert an RGBA uchar4 buffer into YUV YV12 planar.
 32 |  */
 33 | cudaError_t cudaRGBAToYV12( uchar4* input, uint8_t* output, size_t width, size_t height );
 34 | 
 35 | /**
 36 |  * Convert an RGBA uchar4 texture into YUV YV12 planar.
 37 |  */
 38 | cudaError_t cudaRGBAToYV12( uchar4* input, size_t inputPitch, uint8_t* output, size_t outputPitch, size_t width, size_t height );
 39 | 
 40 | ///@}
 41 | 
 42 | 
 43 | //////////////////////////////////////////////////////////////////////////////////
 44 | /// @name YUV 4:2:2 packed (UYVY & YUYV) to RGBA
 45 | /// @ingroup util
 46 | //////////////////////////////////////////////////////////////////////////////////
 47 | 
 48 | ///@{
 49 | 
 50 | /**
 51 |  * Convert a UYVY 422 packed image into RGBA uchar4.
 52 |  */
 53 | cudaError_t cudaUYVYToRGBA( uchar2* input, uchar4* output, size_t width, size_t height );
 54 | 
 55 | /**
 56 |  * Convert a UYVY 422 packed image into RGBA uchar4.
 57 |  */
 58 | cudaError_t cudaUYVYToRGBA( uchar2* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height );
 59 | 
 60 | /**
 61 |  * Convert a YUYV 422 packed image into RGBA uchar4.
 62 |  */
 63 | cudaError_t cudaYUYVToRGBA( uchar2* input, uchar4* output, size_t width, size_t height );
 64 | 
 65 | /**
 66 |  * Convert a YUYV 422 packed image into RGBA uchar4.
 67 |  */
 68 | cudaError_t cudaYUYVToRGBA( uchar2* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height );
 69 | 
 70 | ///@}
 71 | 
 72 | 
 73 | //////////////////////////////////////////////////////////////////////////////////
 74 | /// @name UYUV 4:2:2 packed (UYVY & YUYV) to grayscale
 75 | /// @ingroup util
 76 | //////////////////////////////////////////////////////////////////////////////////
 77 | 
 78 | ///@{
 79 | 
 80 | /**
 81 |  * Convert a UYVY 422 packed image into a uint8 grayscale.
 82 |  */
 83 | cudaError_t cudaUYVYToGray( uchar2* input, float* output, size_t width, size_t height );
 84 | 
 85 | /**
 86 |  * Convert a UYVY 422 packed image into a uint8 grayscale.
 87 |  */
 88 | cudaError_t cudaUYVYToGray( uchar2* input, size_t inputPitch, float* output, size_t outputPitch, size_t width, size_t height );
 89 | 
 90 | /**
 91 |  * Convert a YUYV 422 packed image into a uint8 grayscale.
 92 |  */
 93 | cudaError_t cudaYUYVToGray( uchar2* input, float* output, size_t width, size_t height );
 94 | 
 95 | /**
 96 |  * Convert a YUYV 422 packed image into a uint8 grayscale.
 97 |  */
 98 | cudaError_t cudaYUYVToGray( uchar2* input, size_t inputPitch, float* output, size_t outputPitch, size_t width, size_t height );
 99 | 
100 | ///@}
101 | 
102 | 
103 | //////////////////////////////////////////////////////////////////////////////////
104 | /// @name YUV NV12 to RGBA
105 | /// @ingroup util
106 | //////////////////////////////////////////////////////////////////////////////////
107 | 
108 | ///@{
109 | 
110 | /**
111 |  * Convert an NV12 texture (semi-planar 4:2:0) to ARGB uchar4 format.
112 |  * NV12 = 8-bit Y plane followed by an interleaved U/V plane with 2x2 subsampling.
113 |  */
114 | cudaError_t cudaNV12ToRGBA( uint8_t* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height );
115 | cudaError_t cudaNV12ToRGBA( uint8_t* input, uchar4* output, size_t width, size_t height );
116 | 
117 | cudaError_t cudaNV12ToRGBAf( uint8_t* input, size_t inputPitch, float4* output, size_t outputPitch, size_t width, size_t height );
118 | cudaError_t cudaNV12ToRGBAf( uint8_t* input, float4* output, size_t width, size_t height );
119 | 
120 | /**
121 |  * Setup NV12 color conversion constants.
122 |  * cudaNV12SetupColorspace() isn't necessary for the user to call, it will be
123 |  * called automatically by cudaNV12ToRGBA() with a hue of 0.0.
124 |  * However if you want to setup custom constants (ie with a hue different than 0),
125 |  * then you can call cudaNV12SetupColorspace() at any time, overriding the default.
126 |  */
127 | cudaError_t cudaNV12SetupColorspace( float hue = 0.0f ); 
128 | 
129 | ///@}
130 | 
131 | #endif
132 | 
133 | 


--------------------------------------------------------------------------------
/util/display/glDisplay.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * inference-101
  3 |  */
  4 |  
  5 | #include "glDisplay.h"
  6 | 
  7 | 
  8 |  
  9 | // Constructor
 10 | glDisplay::glDisplay()
 11 | {
 12 | 	mWindowX   = 0;
 13 | 	mScreenX   = NULL;
 14 | 	mVisualX   = NULL;
 15 | 	mContextGL = NULL;
 16 | 	mDisplayX  = NULL;
 17 | 	mWidth     = 0;
 18 | 	mHeight    = 0;
 19 | 	mAvgTime   = 1.0f;
 20 | 
 21 | 	clock_gettime(CLOCK_REALTIME, &mLastTime);
 22 | }
 23 | 
 24 | 
 25 | // Destructor
 26 | glDisplay::~glDisplay()
 27 | {
 28 | 	glXDestroyContext(mDisplayX, mContextGL);
 29 | }
 30 | 
 31 | 
 32 | // Create
 33 | glDisplay* glDisplay::Create()
 34 | {
 35 | 	glDisplay* vp = new glDisplay();
 36 | 	
 37 | 	if( !vp )
 38 | 		return NULL;
 39 | 		
 40 | 	if( !vp->initWindow() )
 41 | 	{
 42 | 		printf("[OpenGL]  failed to create X11 Window.\n");
 43 | 		delete vp;
 44 | 		return NULL;
 45 | 	}
 46 | 	
 47 | 	if( !vp->initGL() )
 48 | 	{
 49 | 		printf("[OpenGL]  failed to initialize OpenGL.\n");
 50 | 		delete vp;
 51 | 		return NULL;
 52 | 	}
 53 | 	
 54 | 	GLenum err = glewInit();
 55 | 	
 56 | 	if (GLEW_OK != err)
 57 | 	{
 58 | 		printf("[OpenGL]  GLEW Error: %s\n", glewGetErrorString(err));
 59 | 		delete vp;
 60 | 		return NULL;
 61 | 	}
 62 | 
 63 | 	printf("[OpenGL]  glDisplay display window initialized\n");
 64 | 	return vp;
 65 | }
 66 | 
 67 | 
 68 | // initWindow
 69 | bool glDisplay::initWindow()
 70 | {
 71 | 	if( !mDisplayX )
 72 | 		mDisplayX = XOpenDisplay(0);
 73 | 
 74 | 	if( !mDisplayX )
 75 | 	{
 76 | 		printf( "[OpenGL]  failed to open X11 server connection." );
 77 | 		return false;
 78 | 	}
 79 | 
 80 | 		
 81 | 	if( !mDisplayX )
 82 | 	{
 83 | 		printf( "InitWindow() - no X11 server connection." );
 84 | 		return false;
 85 | 	}
 86 | 
 87 | 	// retrieve screen info
 88 | 	const int screenIdx   = DefaultScreen(mDisplayX);
 89 | 	const int screenWidth = DisplayWidth(mDisplayX, screenIdx);
 90 | 	const int screenHeight = DisplayHeight(mDisplayX, screenIdx);
 91 | 	
 92 | 	printf("default X screen %i:   %i x %i\n", screenIdx, screenWidth, screenHeight);
 93 | 	
 94 | 	Screen* screen = XScreenOfDisplay(mDisplayX, screenIdx);
 95 | 
 96 | 	if( !screen )
 97 | 	{
 98 | 		printf("failed to retrieve default Screen instance\n");
 99 | 		return false;
100 | 	}
101 | 	
102 | 	Window winRoot = XRootWindowOfScreen(screen);
103 | 
104 | 	// get framebuffer format
105 | 	static int fbAttribs[] =
106 | 	{
107 | 			GLX_X_RENDERABLE, True,
108 | 			GLX_DRAWABLE_TYPE, GLX_WINDOW_BIT,
109 | 			GLX_RENDER_TYPE, GLX_RGBA_BIT,
110 | 			GLX_X_VISUAL_TYPE, GLX_TRUE_COLOR,
111 | 			GLX_RED_SIZE, 8,
112 | 			GLX_GREEN_SIZE, 8,
113 | 			GLX_BLUE_SIZE, 8,
114 | 			GLX_ALPHA_SIZE, 8,
115 | 			GLX_DEPTH_SIZE, 24,
116 | 			GLX_STENCIL_SIZE, 8,
117 | 			GLX_DOUBLEBUFFER, True,
118 | 			GLX_SAMPLE_BUFFERS, 0,
119 | 			GLX_SAMPLES, 0,
120 | 			None
121 | 	};
122 | 
123 | 	int fbCount = 0;
124 | 	GLXFBConfig* fbConfig = glXChooseFBConfig(mDisplayX, screenIdx, fbAttribs, &fbCount);
125 | 
126 | 	if( !fbConfig || fbCount == 0 )
127 | 		return false;
128 | 
129 | 	// get a 'visual'
130 | 	XVisualInfo* visual = glXGetVisualFromFBConfig(mDisplayX, fbConfig[0]);
131 | 
132 | 	if( !visual )
133 | 		return false;
134 | 
135 | 	// populate windows attributes
136 | 	XSetWindowAttributes winAttr;
137 | 	winAttr.colormap = XCreateColormap(mDisplayX, winRoot, visual->visual, AllocNone);
138 | 	winAttr.background_pixmap = None;
139 | 	winAttr.border_pixel = 0;
140 | 	winAttr.event_mask = StructureNotifyMask|KeyPressMask|KeyReleaseMask|PointerMotionMask|ButtonPressMask|ButtonReleaseMask;
141 | 
142 | 	
143 | 	// create window
144 | 	Window win = XCreateWindow(mDisplayX, winRoot, 0, 0, screenWidth, screenHeight, 0,
145 | 							   visual->depth, InputOutput, visual->visual, CWBorderPixel|CWColormap|CWEventMask, &winAttr);
146 | 
147 | 	if( !win )
148 | 		return false;
149 | 
150 | 	XStoreName(mDisplayX, win, "NVIDIA Jetson TX1 | L4T R24.1 aarch64 | Ubuntu 14.04 LTS");
151 | 	XMapWindow(mDisplayX, win);
152 | 
153 | 	// cleanup
154 | 	mWindowX = win;
155 | 	mScreenX = screen;
156 | 	mVisualX = visual;
157 | 	mWidth   = screenWidth;
158 | 	mHeight  = screenHeight;
159 | 	
160 | 	XFree(fbConfig);
161 | 	return true;
162 | }
163 | 
164 | 
165 | void glDisplay::SetTitle( const char* str )
166 | {
167 | 	XStoreName(mDisplayX, mWindowX, str);
168 | }
169 | 
170 | // initGL
171 | bool glDisplay::initGL()
172 | {
173 | 	mContextGL = glXCreateContext(mDisplayX, mVisualX, 0, True);
174 | 
175 | 	if( !mContextGL )
176 | 		return false;
177 | 
178 | 	GL(glXMakeCurrent(mDisplayX, mWindowX, mContextGL));
179 | 
180 | 	return true;
181 | }
182 | 
183 | 
184 | // MakeCurrent
185 | void glDisplay::BeginRender()
186 | {
187 | 	GL(glXMakeCurrent(mDisplayX, mWindowX, mContextGL));
188 | 
189 | 	GL(glClearColor(0.05f, 0.05f, 0.05f, 1.0f));
190 | 	GL(glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT|GL_STENCIL_BUFFER_BIT));
191 | 
192 | 	GL(glViewport(0, 0, mWidth, mHeight));
193 | 	GL(glMatrixMode(GL_PROJECTION));
194 | 	GL(glLoadIdentity());
195 | 	GL(glOrtho(0.0f, mWidth, mHeight, 0.0f, 0.0f, 1.0f));	
196 | }
197 | 
198 | 
199 | // timeDiff
200 | static timespec timeDiff( const timespec& start, const timespec& end)
201 | {
202 | 	timespec temp;
203 | 	if ((end.tv_nsec-start.tv_nsec)<0) {
204 | 		temp.tv_sec = end.tv_sec-start.tv_sec-1;
205 | 		temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
206 | 	} else {
207 | 		temp.tv_sec = end.tv_sec-start.tv_sec;
208 | 		temp.tv_nsec = end.tv_nsec-start.tv_nsec;
209 | 	}
210 | 	return temp;
211 | }
212 | 
213 | 
214 | // Refresh
215 | void glDisplay::EndRender()
216 | {
217 | 	glXSwapBuffers(mDisplayX, mWindowX);
218 | 
219 | 	// measure framerate
220 | 	timespec currTime;
221 | 	clock_gettime(CLOCK_REALTIME, &currTime);
222 | 
223 | 	const timespec diffTime = timeDiff(mLastTime, currTime);
224 | 	const float ns = 1000000000 * diffTime.tv_sec + diffTime.tv_nsec;
225 | 
226 | 	mAvgTime  = mAvgTime * 0.8f + ns * 0.2f;
227 | 	mLastTime = currTime;
228 | }
229 | 
230 | 
231 | #define MOUSE_MOVE		0
232 | #define MOUSE_BUTTON	1
233 | #define MOUSE_WHEEL		2
234 | #define MOUSE_DOUBLE	3
235 | #define KEY_STATE		4
236 | #define KEY_CHAR		5
237 | 
238 | 
239 | // OnEvent
240 | void glDisplay::onEvent( uint msg, int a, int b )
241 | {
242 | 	switch(msg)
243 | 	{
244 | 		case MOUSE_MOVE:
245 | 		{
246 | 			//mMousePos.Set(a,b);
247 | 			break;
248 | 		}
249 | 		case MOUSE_BUTTON:
250 | 		{
251 | 			/*if( mMouseButton[a] != (bool)b )
252 | 			{
253 | 				mMouseButton[a] = b;
254 | 
255 | 				if( b )
256 | 					mMouseDownEvent = true;
257 | 
258 | 				// ignore right-mouse up events
259 | 				if( !(a == 1 && !b) )
260 | 					mMouseEvent = true;
261 | 			}*/
262 | 
263 | 			break;
264 | 		}
265 | 		case MOUSE_DOUBLE:
266 | 		{
267 | 			/*mMouseDblClick = b;
268 | 
269 | 			if( b )
270 | 			{
271 | 				mMouseEvent = true;
272 | 				mMouseDownEvent = true;
273 | 			}*/
274 | 
275 | 			break;
276 | 		}
277 | 		case MOUSE_WHEEL:
278 | 		{
279 | 			//mMouseWheel = a;
280 | 			break;
281 | 		}
282 | 		case KEY_STATE:
283 | 		{
284 | 			//mKeys[a] = b;
285 | 			break;
286 | 		}
287 | 		case KEY_CHAR:
288 | 		{
289 | 			//mKeyText = a;
290 | 			break;
291 | 		}
292 | 	}
293 | 
294 | 	//if( msg == MOUSE_MOVE || msg == MOUSE_BUTTON || msg == MOUSE_DOUBLE || msg == MOUSE_WHEEL )
295 | 	//	mMouseEventLast = time();
296 | }
297 | 
298 | 
299 | // UserEvents()
300 | void glDisplay::UserEvents()
301 | {
302 | 	// reset input states
303 | 	/*mMouseEvent     = false;
304 | 	mMouseDownEvent = false;
305 | 	mMouseDblClick  = false;
306 | 	mMouseWheel     = 0;
307 | 	mKeyText		= 0;*/
308 | 
309 | 
310 | 	XEvent evt;
311 | 
312 | 	while( XEventsQueued(mDisplayX, QueuedAlready) > 0 )
313 | 	{
314 | 		XNextEvent(mDisplayX, &evt);
315 | 
316 | 		switch( evt.type )
317 | 		{
318 | 			case KeyPress:	     onEvent(KEY_STATE, evt.xkey.keycode, 1);		break;
319 | 			case KeyRelease:     onEvent(KEY_STATE, evt.xkey.keycode, 0);		break;
320 | 			case ButtonPress:	 onEvent(MOUSE_BUTTON, evt.xbutton.button, 1); 	break;
321 | 			case ButtonRelease:  onEvent(MOUSE_BUTTON, evt.xbutton.button, 0);	break;
322 | 			case MotionNotify:
323 | 			{
324 | 				XWindowAttributes attr;
325 | 				XGetWindowAttributes(mDisplayX, evt.xmotion.root, &attr);
326 | 				onEvent(MOUSE_MOVE, evt.xmotion.x_root + attr.x, evt.xmotion.y_root + attr.y);
327 | 				break;
328 | 			}
329 | 		}
330 | 	}
331 | }
332 | 
333 | 


--------------------------------------------------------------------------------
/util/display/glDisplay.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * inference-101
 3 |  */
 4 |  
 5 | #ifndef __GL_VIEWPORT_H__
 6 | #define __GL_VIEWPORT_H__
 7 | 
 8 | 
 9 | #include "glUtility.h"
10 | #include "glTexture.h"
11 | 
12 | #include <time.h>
13 | 
14 | 
15 | /**
16 |  * OpenGL display window / video viewer
17 |  */
18 | class glDisplay
19 | {
20 | public:
21 | 	/**
22 | 	 * Create a new maximized openGL display window.
23 | 	 */
24 | 	static glDisplay* Create();
25 | 
26 | 	/**
27 | 	 * Destroy window
28 | 	 */
29 | 	~glDisplay();
30 | 
31 | 	/**
32 |  	 * Clear window and begin rendering a frame.
33 | 	 */
34 | 	void BeginRender();
35 | 
36 | 	/**
37 | 	 * Finish rendering and refresh / flip the backbuffer.
38 | 	 */
39 | 	void EndRender();
40 | 
41 | 	/**
42 | 	 * Process UI events.
43 | 	 */
44 | 	void UserEvents();
45 | 		
46 | 	/**
47 | 	 * UI event handler.
48 | 	 */
49 | 	void onEvent( uint msg, int a, int b );
50 | 
51 | 	/**
52 | 	 * Set the window title string.
53 | 	 */
54 | 	void SetTitle( const char* str );
55 | 
56 | 	/**
57 | 	 * Get the average frame time (in milliseconds).
58 | 	 */
59 | 	inline float GetFPS()	{ return 1000000000.0f / mAvgTime; }
60 | 		
61 | protected:
62 | 	glDisplay();
63 | 		
64 | 	bool initWindow();
65 | 	bool initGL();
66 | 
67 | 	static const int screenIdx = 0;
68 | 		
69 | 	Display*     mDisplayX;
70 | 	Screen*      mScreenX;
71 | 	XVisualInfo* mVisualX;
72 | 	Window       mWindowX;
73 | 	GLXContext   mContextGL;
74 | 		
75 | 	uint32_t mWidth;
76 | 	uint32_t mHeight;
77 | 
78 | 	timespec mLastTime;
79 | 	float    mAvgTime;
80 | };
81 | 
82 | #endif
83 | 
84 | 


--------------------------------------------------------------------------------
/util/display/glTexture.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * inference-101
  3 |  */
  4 | 
  5 | #include "glUtility.h"
  6 | #include "glTexture.h"
  7 | 
  8 | #include "cudaMappedMemory.h"
  9 | 
 10 | 
 11 | //-----------------------------------------------------------------------------------
 12 | inline uint32_t glTextureLayout( uint32_t format )
 13 | {
 14 | 	switch(format)
 15 | 	{
 16 | 		case GL_LUMINANCE8:
 17 | 		case GL_LUMINANCE16:			
 18 | 		case GL_LUMINANCE32UI_EXT:
 19 | 		case GL_LUMINANCE8I_EXT:
 20 | 		case GL_LUMINANCE16I_EXT:
 21 | 		case GL_LUMINANCE32I_EXT:
 22 | 		case GL_LUMINANCE16F_ARB:
 23 | 		case GL_LUMINANCE32F_ARB:		return GL_LUMINANCE;
 24 | 
 25 | 		case GL_LUMINANCE8_ALPHA8:		
 26 | 		case GL_LUMINANCE16_ALPHA16:
 27 | 		case GL_LUMINANCE_ALPHA32UI_EXT:
 28 | 		case GL_LUMINANCE_ALPHA8I_EXT:
 29 | 		case GL_LUMINANCE_ALPHA16I_EXT:
 30 | 		case GL_LUMINANCE_ALPHA32I_EXT:
 31 | 		case GL_LUMINANCE_ALPHA16F_ARB:
 32 | 		case GL_LUMINANCE_ALPHA32F_ARB: return GL_LUMINANCE_ALPHA;
 33 | 		
 34 | 		case GL_RGB8:					
 35 | 		case GL_RGB16:
 36 | 		case GL_RGB32UI:
 37 | 		case GL_RGB8I:
 38 | 		case GL_RGB16I:
 39 | 		case GL_RGB32I:
 40 | 		case GL_RGB16F_ARB:
 41 | 		case GL_RGB32F_ARB:				return GL_RGB;
 42 | 
 43 | 		case GL_RGBA8:
 44 | 		case GL_RGBA16:
 45 | 		case GL_RGBA32UI:
 46 | 		case GL_RGBA8I:
 47 | 		case GL_RGBA16I:
 48 | 		case GL_RGBA32I:
 49 | 		//case GL_RGBA_FLOAT32:
 50 | 		case GL_RGBA16F_ARB:
 51 | 		case GL_RGBA32F_ARB:			return GL_RGBA;
 52 | 	}
 53 | 
 54 | 	return 0;
 55 | }
 56 | 
 57 | 
 58 | inline uint32_t glTextureLayoutChannels( uint32_t format )
 59 | {
 60 | 	const uint layout = glTextureLayout(format);
 61 | 
 62 | 	switch(layout)
 63 | 	{
 64 | 		case GL_LUMINANCE:			return 1;
 65 | 		case GL_LUMINANCE_ALPHA:	return 2;
 66 | 		case GL_RGB:				return 3;
 67 | 		case GL_RGBA:				return 4;
 68 | 	}
 69 | 
 70 | 	return 0;
 71 | }
 72 | 
 73 | 
 74 | inline uint32_t glTextureType( uint32_t format )
 75 | {
 76 | 	switch(format)
 77 | 	{
 78 | 		case GL_LUMINANCE8:
 79 | 		case GL_LUMINANCE8_ALPHA8:
 80 | 		case GL_RGB8:
 81 | 		case GL_RGBA8:					return GL_UNSIGNED_BYTE;
 82 | 
 83 | 		case GL_LUMINANCE16:
 84 | 		case GL_LUMINANCE16_ALPHA16:
 85 | 		case GL_RGB16:
 86 | 		case GL_RGBA16:					return GL_UNSIGNED_SHORT;
 87 | 
 88 | 		case GL_LUMINANCE32UI_EXT:
 89 | 		case GL_LUMINANCE_ALPHA32UI_EXT:
 90 | 		case GL_RGB32UI:
 91 | 		case GL_RGBA32UI:				return GL_UNSIGNED_INT;
 92 | 
 93 | 		case GL_LUMINANCE8I_EXT:
 94 | 		case GL_LUMINANCE_ALPHA8I_EXT:
 95 | 		case GL_RGB8I:
 96 | 		case GL_RGBA8I:					return GL_BYTE;
 97 | 
 98 | 		case GL_LUMINANCE16I_EXT:
 99 | 		case GL_LUMINANCE_ALPHA16I_EXT:
100 | 		case GL_RGB16I:
101 | 		case GL_RGBA16I:				return GL_SHORT;
102 | 
103 | 		case GL_LUMINANCE32I_EXT:
104 | 		case GL_LUMINANCE_ALPHA32I_EXT:
105 | 		case GL_RGB32I:
106 | 		case GL_RGBA32I:				return GL_INT;
107 | 
108 | 
109 | 		case GL_LUMINANCE16F_ARB:
110 | 		case GL_LUMINANCE_ALPHA16F_ARB:
111 | 		case GL_RGB16F_ARB:
112 | 		case GL_RGBA16F_ARB:			return GL_FLOAT;
113 | 
114 | 		case GL_LUMINANCE32F_ARB:
115 | 		case GL_LUMINANCE_ALPHA32F_ARB:
116 | 		//case GL_RGBA_FLOAT32:
117 | 		case GL_RGB32F_ARB:
118 | 		case GL_RGBA32F_ARB:			return GL_FLOAT;
119 | 	}
120 | 
121 | 	return 0;
122 | }
123 | 
124 | 
125 | inline uint glTextureTypeSize( uint32_t format )
126 | {
127 | 	const uint type = glTextureType(format);
128 | 
129 | 	switch(type)
130 | 	{
131 | 		case GL_UNSIGNED_BYTE:
132 | 		case GL_BYTE:					return 1;
133 | 
134 | 		case GL_UNSIGNED_SHORT:
135 | 		case GL_SHORT:					return 2;
136 | 
137 | 		case GL_UNSIGNED_INT:
138 | 		case GL_INT:
139 | 		case GL_FLOAT:					return 4;
140 | 	}
141 | 
142 | 	return 0;
143 | }
144 | //-----------------------------------------------------------------------------------
145 | 
146 | // constructor
147 | glTexture::glTexture()
148 | {
149 | 	mID     = 0;
150 | 	mDMA    = 0;
151 | 	mWidth  = 0;
152 | 	mHeight = 0;
153 | 	mFormat = 0;
154 | 	mSize   = 0;
155 | 	
156 | 	mInteropCUDA   = NULL;
157 | 	mInteropHost   = NULL;
158 | 	mInteropDevice = NULL;
159 | }
160 | 
161 | 
162 | // destructor
163 | glTexture::~glTexture()
164 | {
165 | 	GL(glDeleteTextures(1, &mID));
166 | }
167 | 	
168 | 
169 | // Create
170 | glTexture* glTexture::Create( uint32_t width, uint32_t height, uint32_t format, void* data )
171 | {
172 | 	glTexture* tex = new glTexture();
173 | 	
174 | 	if( !tex->init(width, height, format, data) )
175 | 	{
176 | 		printf("[OpenGL]  failed to create %ux%u texture\n", width, height);
177 | 		return NULL;
178 | 	}
179 | 	
180 | 	return tex;
181 | }
182 | 		
183 | 		
184 | // Alloc
185 | bool glTexture::init( uint32_t width, uint32_t height, uint32_t format, void* data )
186 | {
187 | 	const uint32_t size = width * height * glTextureLayoutChannels(format) * glTextureTypeSize(format);
188 | 
189 | 	if( size == 0 )
190 | 		return NULL;
191 | 		
192 | 	// generate texture objects
193 | 	uint32_t id = 0;
194 | 	
195 | 	GL(glEnable(GL_TEXTURE_2D));
196 | 	GL(glGenTextures(1, &id));
197 | 	GL(glBindTexture(GL_TEXTURE_2D, id));
198 | 	
199 | 	// set default texture parameters
200 | 	GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE));
201 | 	GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE));
202 | 	GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR));
203 | 	GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR));
204 | 
205 | 	printf("[OpenGL]   creating %ux%u texture\n", width, height);
206 | 	
207 | 	// allocate texture
208 | 	GL_VERIFYN(glTexImage2D(GL_TEXTURE_2D, 0, format, width, height, 0, glTextureLayout(format), glTextureType(format), data));
209 | 	GL(glBindTexture(GL_TEXTURE_2D, 0));
210 | 	
211 | 	// allocate DMA PBO
212 | 	uint32_t dma = 0;
213 | 	
214 | 	GL(glGenBuffers(1, &dma));
215 | 	GL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, dma));
216 | 	GL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, size, NULL, GL_DYNAMIC_DRAW_ARB));
217 | 	GL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
218 | 	
219 | 	
220 | 	mID     = id;
221 | 	mDMA    = dma;
222 | 	mWidth  = width;
223 | 	mHeight = height;
224 | 	mFormat = format;
225 | 	mSize   = size;
226 | 	return true;
227 | }
228 | 
229 | 
230 | // MapCUDA
231 | void* glTexture::MapCUDA()
232 | {
233 | 	if( !mInteropCUDA )
234 | 	{
235 | 		if( CUDA_FAILED(cudaGraphicsGLRegisterBuffer(&mInteropCUDA, mDMA, cudaGraphicsRegisterFlagsWriteDiscard)) )
236 | 			return NULL;
237 | 
238 | 		printf( "[cuda]   registered %u byte openGL texture for interop access (%ux%u)\n", mSize, mWidth, mHeight);
239 | 	}
240 | 	
241 | 	if( CUDA_FAILED(cudaGraphicsMapResources(1, &mInteropCUDA)) )
242 | 		return NULL;
243 | 	
244 | 	void*  devPtr     = NULL;
245 | 	size_t mappedSize = 0;
246 | 
247 | 	if( CUDA_FAILED(cudaGraphicsResourceGetMappedPointer(&devPtr, &mappedSize, mInteropCUDA)) )
248 | 	{
249 | 		CUDA(cudaGraphicsUnmapResources(1, &mInteropCUDA));
250 | 		return NULL;
251 | 	}
252 | 	
253 | 	if( mSize != mappedSize )
254 | 		printf("[OpenGL]  glTexture::MapCUDA() -- size mismatch %zu bytes  (expected=%u)\n", mappedSize, mSize);
255 | 		
256 | 	return devPtr;
257 | }
258 | 
259 | 
260 | // Unmap
261 | void glTexture::Unmap()
262 | {
263 | 	if( !mInteropCUDA )
264 | 		return;
265 | 		
266 | 	CUDA(cudaGraphicsUnmapResources(1, &mInteropCUDA));
267 | 	
268 | 	GL(glEnable(GL_TEXTURE_2D));
269 | 	GL(glBindTexture(GL_TEXTURE_2D, mID));
270 | 	GL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, mDMA));
271 | 	GL(glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, mWidth, mHeight, glTextureLayout(mFormat), glTextureType(mFormat), NULL));
272 | 	
273 | 	GL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
274 | 	GL(glBindTexture(GL_TEXTURE_2D, 0));
275 | 	GL(glDisable(GL_TEXTURE_2D));
276 | }
277 | 
278 | 
279 | // Upload
280 | bool glTexture::UploadCPU( void* data )
281 | {
282 | 	// activate texture & pbo
283 | 	GL(glEnable(GL_TEXTURE_2D));
284 | 	GL(glActiveTextureARB(GL_TEXTURE0_ARB));
285 | 	GL(glBindTexture(GL_TEXTURE_2D, mID));
286 | 	GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0));
287 | 	GL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, mDMA));
288 | 
289 | 	//GL(glPixelStorei(GL_UNPACK_ALIGNMENT, 1));
290 | 	//GL(glPixelStorei(GL_UNPACK_ROW_LENGTH, img->GetWidth()));
291 | 	//GL(glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, img->GetHeight()));
292 | 
293 | 	// hint to driver to double-buffer
294 | 	// glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, mImage->GetSize(), NULL, GL_STREAM_DRAW_ARB);	
295 | 
296 | 	// map PBO
297 | 	GLubyte* ptr = (GLubyte*)glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY_ARB);
298 | 	        
299 | 	if( !ptr )
300 | 	{
301 | 		GL_CHECK("glMapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY_ARB)");
302 | 		return NULL;
303 | 	}
304 | 
305 | 	memcpy(ptr, data, mSize);
306 | 
307 | 	GL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB)); 
308 | 
309 | 	//GL(glEnable(GL_TEXTURE_2D));
310 | 	//GL(glBindTexture(GL_TEXTURE_2D, mID));
311 | 	//GL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, mDMA));
312 | 	GL(glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, mWidth, mHeight, glTextureLayout(mFormat), glTextureType(mFormat), NULL));
313 | 	
314 | 	GL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
315 | 	GL(glBindTexture(GL_TEXTURE_2D, 0));
316 | 	GL(glDisable(GL_TEXTURE_2D));
317 | 
318 | 	/*if( !mInteropHost || !mInteropDevice )
319 | 	{
320 | 		if( !cudaAllocMapped(&mInteropHost, &mInteropDevice, mSize) )
321 | 			return false;
322 | 	}
323 | 	
324 | 	memcpy(mInteropHost, data, mSize);
325 | 	
326 | 	void* devGL = MapCUDA();
327 | 	
328 | 	if( !devGL )
329 | 		return false;
330 | 		
331 | 	CUDA(cudaMemcpy(devGL, mInteropDevice, mSize, cudaMemcpyDeviceToDevice));
332 | 	Unmap();*/
333 | 
334 | 	return true;
335 | }
336 | 
337 | 	
338 | // Render
339 | void glTexture::Render( const float4& rect )
340 | {
341 | 	GL(glEnable(GL_TEXTURE_2D));
342 | 	GL(glBindTexture(GL_TEXTURE_2D, mID));
343 | 
344 | 	glBegin(GL_QUADS);
345 | 
346 | 		glColor4f(1.0f,1.0f,1.0f,1.0f);
347 | 
348 | 		glTexCoord2f(0.0f, 0.0f); 
349 | 		glVertex2d(rect.x, rect.y);
350 | 
351 | 		glTexCoord2f(1.0f, 0.0f); 
352 | 		glVertex2d(rect.z, rect.y);	
353 | 
354 | 		glTexCoord2f(1.0f, 1.0f); 
355 | 		glVertex2d(rect.z, rect.w);
356 | 
357 | 		glTexCoord2f(0.0f, 1.0f); 
358 | 		glVertex2d(rect.x, rect.w);
359 | 
360 | 	glEnd();
361 | 
362 | 	GL(glBindTexture(GL_TEXTURE_2D, 0));
363 | }
364 | 
365 | 
366 | 
367 | void glTexture::Render( float x, float y )
368 | {
369 | 	Render(x, y, mWidth, mHeight);
370 | }
371 | 
372 | void glTexture::Render( float x, float y, float width, float height )
373 | {
374 | 	Render(make_float4(x, y, x + width, y + height));
375 | }
376 | 
377 | 
378 | 


--------------------------------------------------------------------------------
/util/display/glTexture.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * inference-101
 3 |  */
 4 |  
 5 | #ifndef __GL_TEXTURE_H__
 6 | #define __GL_TEXTURE_H__
 7 | 
 8 | 
 9 | #include "cudaUtility.h"
10 | #include "cuda_gl_interop.h"
11 | 
12 | 
13 | /**
14 |  * OpenGL texture
15 |  */
16 | class glTexture
17 | {
18 | public:
19 | 	static glTexture* Create( uint32_t width, uint32_t height, uint32_t format, void* data=NULL );
20 | 	~glTexture();
21 | 	
22 | 	void Render( float x, float y );
23 | 	void Render( float x, float y, float width, float height );
24 | 	void Render( const float4& rect );
25 | 	
26 | 	inline uint32_t GetID() const		{ return mID; }
27 | 	inline uint32_t GetWidth() const	{ return mWidth; }
28 | 	inline uint32_t GetHeight() const	{ return mHeight; }
29 | 	inline uint32_t GetFormat() const	{ return mFormat; }
30 | 	inline uint32_t GetSize() const	{ return mSize; }
31 | 	
32 | 	void* MapCUDA();
33 | 	void  Unmap();
34 | 	
35 | 	bool UploadCPU( void* data );
36 | 	
37 | private:
38 | 	glTexture();
39 | 	bool init(uint32_t width, uint32_t height, uint32_t format, void* data);
40 | 	
41 | 	uint32_t mID;
42 | 	uint32_t mDMA;
43 | 	uint32_t mWidth;
44 | 	uint32_t mHeight;
45 | 	uint32_t mFormat;
46 | 	uint32_t mSize;
47 | 	
48 | 	cudaGraphicsResource* mInteropCUDA;
49 | 	void* mInteropHost;
50 | 	void* mInteropDevice;
51 | };
52 | 
53 | 
54 | #endif
55 | 


--------------------------------------------------------------------------------
/util/display/glUtility.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * http://github.com/dusty-nv/jetson-inference
  3 |  */
  4 | 
  5 | #ifndef __OPENGL_UTILITY_H
  6 | #define __OPENGL_UTILITY_H
  7 | 
  8 | 
  9 | #include <GL/glew.h>
 10 | #include <GL/glx.h>
 11 | 
 12 | #include <stdio.h>
 13 | 
 14 | 
 15 | /**
 16 |  * LOG_GL printf prefix.
 17 |  * @ingroup renderGL
 18 |  */
 19 | #define LOG_GL   			"[openGL] "
 20 | 
 21 | 
 22 | 
 23 | #define GL(x)				{ x; glCheckError( #x, __FILE__, __LINE__ ); }
 24 | #define GL_VERIFY(x)		{ x; if(glCheckError( #x, __FILE__, __LINE__ )) return false; }
 25 | #define GL_VERIFYN(x)		{ x; if(glCheckError( #x, __FILE__, __LINE__ )) return NULL; }
 26 | #define GL_CHECK(msg)		{ glCheckError(msg, __FILE__, __LINE__); }
 27 | 
 28 | 
 29 | /**
 30 |  * openGL error logging macros
 31 |  * @ingroup renderGL
 32 |  */
 33 | inline bool glCheckError(const char* msg, const char* file, int line)
 34 | {
 35 | 	GLenum err = glGetError();
 36 | 
 37 | 	if( err == GL_NO_ERROR )
 38 | 		return false;
 39 | 
 40 | 	const char* e = NULL;
 41 | 
 42 | 	switch(err)
 43 | 	{
 44 | 		  case GL_INVALID_ENUM:          e = "invalid enum";      break;
 45 | 		  case GL_INVALID_VALUE:         e = "invalid value";     break;
 46 | 		  case GL_INVALID_OPERATION:     e = "invalid operation"; break;
 47 | 		  case GL_STACK_OVERFLOW:        e = "stack overflow";    break;
 48 | 		  case GL_STACK_UNDERFLOW:       e = "stack underflow";   break;
 49 | 		  case GL_OUT_OF_MEMORY:         e = "out of memory";     break;
 50 | 		#ifdef GL_TABLE_TOO_LARGE_EXT
 51 | 		  case GL_TABLE_TOO_LARGE_EXT:   e = "table too large";   break;
 52 | 		#endif
 53 | 		#ifdef GL_TEXTURE_TOO_LARGE_EXT
 54 | 		  case GL_TEXTURE_TOO_LARGE_EXT: e = "texture too large"; break;
 55 | 		#endif
 56 | 		  default:						 e = "unknown error";
 57 | 	}
 58 | 
 59 | 	printf(LOG_GL "Error %i - '%s'\n", (uint)err, e);
 60 | 	printf(LOG_GL "   %s::%i\n", file, line );
 61 | 	printf(LOG_GL "   %s\n", msg );
 62 | 	
 63 | 	return true;
 64 | }
 65 | 
 66 | 
 67 | /**
 68 |  * openGL error check + logging
 69 |  * @ingroup renderGL
 70 |  */
 71 | inline bool glCheckError(const char* msg)
 72 | {
 73 | 	GLenum err = glGetError();
 74 | 
 75 | 	if( err == GL_NO_ERROR )
 76 | 		return false;
 77 | 
 78 | 	const char* e = NULL;
 79 | 
 80 | 	switch(err)
 81 | 	{
 82 | 		  case GL_INVALID_ENUM:          e = "invalid enum";      break;
 83 | 		  case GL_INVALID_VALUE:         e = "invalid value";     break;
 84 | 		  case GL_INVALID_OPERATION:     e = "invalid operation"; break;
 85 | 		  case GL_STACK_OVERFLOW:        e = "stack overflow";    break;
 86 | 		  case GL_STACK_UNDERFLOW:       e = "stack underflow";   break;
 87 | 		  case GL_OUT_OF_MEMORY:         e = "out of memory";     break;
 88 | 		#ifdef GL_TABLE_TOO_LARGE_EXT
 89 | 		  case GL_TABLE_TOO_LARGE_EXT:   e = "table too large";   break;
 90 | 		#endif
 91 | 		#ifdef GL_TEXTURE_TOO_LARGE_EXT
 92 | 		  case GL_TEXTURE_TOO_LARGE_EXT: e = "texture too large"; break;
 93 | 		#endif
 94 | 		  default:						 e = "unknown error";
 95 | 	}
 96 | 
 97 | 	printf(LOG_GL "%s    (error %i - %s)\n", msg, (uint)err, e);
 98 | 	return true;
 99 | }
100 | 
101 | 
102 | 
103 | #define GL_GPU_MEM_INFO_TOTAL_AVAILABLE_MEM_NVX 0x9048
104 | #define GL_GPU_MEM_INFO_CURRENT_AVAILABLE_MEM_NVX 0x9049
105 | 
106 | 
107 | /**
108 |  * glPrintFreeMem
109 |  * @ingroup renderGL
110 |  */
111 | inline void glPrintFreeMem()
112 | {
113 | 	GLint total_mem_kb = 0;
114 | 	GLint cur_avail_mem_kb = 0;
115 | 
116 | 	glGetIntegerv(GL_GPU_MEM_INFO_TOTAL_AVAILABLE_MEM_NVX, &total_mem_kb);
117 | 	glGetIntegerv(GL_GPU_MEM_INFO_CURRENT_AVAILABLE_MEM_NVX,&cur_avail_mem_kb);
118 | 
119 | 	printf("[openGL]   GPU memory free    %i / %i kb\n", cur_avail_mem_kb, total_mem_kb);
120 | }
121 | 
122 | 
123 | 
124 | #endif
125 | 
126 | 


--------------------------------------------------------------------------------
/util/loadImage.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * http://github.com/dusty-nv/jetson-inference
  3 |  */
  4 |  
  5 | #include "loadImage.h"
  6 | #include "cudaMappedMemory.h"
  7 | 
  8 | #include <QImage>
  9 | 
 10 | 
 11 | 
 12 | bool saveImageRGBA( const char* filename, float4* cpu, int width, int height, float max_pixel )
 13 | {
 14 | 	if( !filename || !cpu || !width || !height )
 15 | 	{
 16 | 		printf("saveImageRGBA - invalid parameter\n");
 17 | 		return false;
 18 | 	}
 19 | 	
 20 | 	const float scale = 255.0f / max_pixel;
 21 | 	QImage img(width, height, QImage::Format_RGB32);
 22 | 
 23 | 	for( int y=0; y < height; y++ )
 24 | 	{
 25 | 		for( int x=0; x < width; x++ )
 26 | 		{
 27 | 			const float4 px = cpu[y * width + x];
 28 | 			//printf("%03u %03u   %f\n", x, y, normPx);
 29 | 			img.setPixel(x, y, qRgb(px.x * scale, px.y * scale, px.z * scale));
 30 | 		}
 31 | 	}
 32 | 
 33 | 
 34 | 	/*
 35 | 	 * save file
 36 | 	 */
 37 | 	if( !img.save(filename/*, "PNG", 100*/) )
 38 | 	{
 39 | 		printf("failed to save %ix%i output image to %s\n", width, height, filename);
 40 | 		return false;
 41 | 	}
 42 | 	
 43 | 	return true;
 44 | }
 45 | 
 46 | 
 47 | // loadImageRGBA
 48 | bool loadImageRGBA( const char* filename, float4** cpu, float4** gpu, int* width, int* height )
 49 | {
 50 | 	if( !filename || !cpu || !gpu || !width || !height )
 51 | 	{
 52 | 		printf("loadImageRGBA - invalid parameter\n");
 53 | 		return false;
 54 | 	}
 55 | 	
 56 | 	// load original image
 57 | 	QImage qImg;
 58 | 
 59 | 	if( !qImg.load(filename) )
 60 | 	{
 61 | 		printf("failed to load image %s\n", filename);
 62 | 		return false;
 63 | 	}
 64 | 
 65 | 	if( *width != 0 && *height != 0 )
 66 | 		qImg = qImg.scaled(*width, *height, Qt::IgnoreAspectRatio);
 67 | 	
 68 | 	const uint32_t imgWidth  = qImg.width();
 69 | 	const uint32_t imgHeight = qImg.height();
 70 | 	const uint32_t imgPixels = imgWidth * imgHeight;
 71 | 	const size_t   imgSize   = imgWidth * imgHeight * sizeof(float) * 4;
 72 | 
 73 | 	printf("loaded image  %s  (%u x %u)  %zu bytes\n", filename, imgWidth, imgHeight, imgSize);
 74 | 
 75 | 	// allocate buffer for the image
 76 | 	if( !cudaAllocMapped((void**)cpu, (void**)gpu, imgSize) )
 77 | 	{
 78 | 		printf(LOG_CUDA "failed to allocated %zu bytes for image %s\n", imgSize, filename);
 79 | 		return false;
 80 | 	}
 81 | 
 82 | 	float4* cpuPtr = *cpu;
 83 | 	
 84 | 	for( uint32_t y=0; y < imgHeight; y++ )
 85 | 	{
 86 | 		for( uint32_t x=0; x < imgWidth; x++ )
 87 | 		{
 88 | 			const QRgb rgb  = qImg.pixel(x,y);
 89 | 			const float4 px = make_float4(float(qRed(rgb)), 
 90 | 										  float(qGreen(rgb)), 
 91 | 										  float(qBlue(rgb)),
 92 | 										  float(qAlpha(rgb)));
 93 | 			
 94 | 			cpuPtr[y*imgWidth+x] = px;
 95 | 		}
 96 | 	}
 97 | 	
 98 | 	*width  = imgWidth;
 99 | 	*height = imgHeight;	
100 | 	return true;
101 | }
102 | 
103 | 
104 | // loadImageRGB
105 | bool loadImageRGB( const char* filename, float3** cpu, float3** gpu, int* width, int* height, const float3& mean )
106 | {
107 | 	if( !filename || !cpu || !gpu || !width || !height )
108 | 	{
109 | 		printf("loadImageRGB - invalid parameter\n");
110 | 		return false;
111 | 	}
112 | 	
113 | 	// load original image
114 | 	QImage qImg;
115 | 
116 | 	if( !qImg.load(filename) )
117 | 	{
118 | 		printf("failed to load image %s\n", filename);
119 | 		return false;
120 | 	}
121 | 
122 | 	if( *width != 0 && *height != 0 )
123 | 		qImg = qImg.scaled(*width, *height, Qt::IgnoreAspectRatio);
124 | 	
125 | 	const uint32_t imgWidth  = qImg.width();
126 | 	const uint32_t imgHeight = qImg.height();
127 | 	const uint32_t imgPixels = imgWidth * imgHeight;
128 | 	const size_t   imgSize   = imgWidth * imgHeight * sizeof(float) * 3;
129 | 
130 | 	printf("loaded image  %s  (%u x %u)  %zu bytes\n", filename, imgWidth, imgHeight, imgSize);
131 | 
132 | 	// allocate buffer for the image
133 | 	if( !cudaAllocMapped((void**)cpu, (void**)gpu, imgSize) )
134 | 	{
135 | 		printf(LOG_CUDA "failed to allocated %zu bytes for image %s\n", imgSize, filename);
136 | 		return false;
137 | 	}
138 | 
139 | 	float* cpuPtr = (float*)*cpu;
140 | 	
141 | 	for( uint32_t y=0; y < imgHeight; y++ )
142 | 	{
143 | 		for( uint32_t x=0; x < imgWidth; x++ )
144 | 		{
145 | 			const QRgb rgb  = qImg.pixel(x,y);
146 | 			const float mul = 1.0f; 	//1.0f / 255.0f;
147 | 			const float3 px = make_float3((float(qRed(rgb))   - mean.x) * mul, 
148 | 										  (float(qGreen(rgb)) - mean.y) * mul, 
149 | 										  (float(qBlue(rgb))  - mean.z) * mul );
150 | 			
151 | 			// note:  caffe/GIE is band-sequential (as opposed to the typical Band Interleaved by Pixel)
152 | 			cpuPtr[imgPixels * 0 + y * imgWidth + x] = px.x; 
153 | 			cpuPtr[imgPixels * 1 + y * imgWidth + x] = px.y; 
154 | 			cpuPtr[imgPixels * 2 + y * imgWidth + x] = px.z; 
155 | 		}
156 | 	}
157 | 		
158 | 	*width  = imgWidth;
159 | 	*height = imgHeight;
160 | 	return true;
161 | }
162 | 
163 | 
164 | // loadImageBGR
165 | bool loadImageBGR( const char* filename, float3** cpu, float3** gpu, int* width, int* height, const float3& mean )
166 | {
167 | 	if( !filename || !cpu || !gpu || !width || !height )
168 | 	{
169 | 		printf("loadImageRGB - invalid parameter\n");
170 | 		return false;
171 | 	}
172 | 	
173 | 	// load original image
174 | 	QImage qImg;
175 | 
176 | 	if( !qImg.load(filename) )
177 | 	{
178 | 		printf("failed to load image %s\n", filename);
179 | 		return false;
180 | 	}
181 | 
182 | 	if( *width != 0 && *height != 0 )
183 | 		qImg = qImg.scaled(*width, *height, Qt::IgnoreAspectRatio);
184 | 	
185 | 	const uint32_t imgWidth  = qImg.width();
186 | 	const uint32_t imgHeight = qImg.height();
187 | 	const uint32_t imgPixels = imgWidth * imgHeight;
188 | 	const size_t   imgSize   = imgWidth * imgHeight * sizeof(float) * 3;
189 | 
190 | 	printf("loaded image  %s  (%u x %u)  %zu bytes\n", filename, imgWidth, imgHeight, imgSize);
191 | 
192 | 	// allocate buffer for the image
193 | 	if( !cudaAllocMapped((void**)cpu, (void**)gpu, imgSize) )
194 | 	{
195 | 		printf(LOG_CUDA "failed to allocated %zu bytes for image %s\n", imgSize, filename);
196 | 		return false;
197 | 	}
198 | 
199 | 	float* cpuPtr = (float*)*cpu;
200 | 	
201 | 	for( uint32_t y=0; y < imgHeight; y++ )
202 | 	{
203 | 		for( uint32_t x=0; x < imgWidth; x++ )
204 | 		{
205 | 			const QRgb rgb  = qImg.pixel(x,y);
206 | 			const float mul = 1.0f; 	//1.0f / 255.0f;
207 | 			const float3 px = make_float3((float(qBlue(rgb))  - mean.x) * mul, 
208 | 										  (float(qGreen(rgb)) - mean.y) * mul, 
209 | 										  (float(qRed(rgb))   - mean.z) * mul );
210 | 			
211 | 			// note:  caffe/GIE is band-sequential (as opposed to the typical Band Interleaved by Pixel)
212 | 			cpuPtr[imgPixels * 0 + y * imgWidth + x] = px.x; 
213 | 			cpuPtr[imgPixels * 1 + y * imgWidth + x] = px.y; 
214 | 			cpuPtr[imgPixels * 2 + y * imgWidth + x] = px.z; 
215 | 		}
216 | 	}
217 | 			
218 | 	return true;
219 | }
220 | 


--------------------------------------------------------------------------------
/util/loadImage.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * http://github.com/dusty-nv/jetson-inference
 3 |  */
 4 |  
 5 | #ifndef __IMAGE_LOADER_H_
 6 | #define __IMAGE_LOADER_H_
 7 | 
 8 | 
 9 | #include "cudaUtility.h"
10 | 
11 | 
12 | /**
13 |  * Load a color image from disk into CUDA memory with alpha.
14 |  * This function loads the image into shared CPU/GPU memory, using the functions from cudaMappedMemory.h
15 |  *
16 |  * @param filename Path to the image file on disk.
17 |  * @param cpu Pointer to CPU buffer allocated containing the image.
18 |  * @param gpu Pointer to CUDA device buffer residing on GPU containing image.
19 |  * @param width Variable containing width in pixels of the image.
20 |  * @param height Variable containing height in pixels of the image.
21 |  *
22 |  * @ingroup util
23 |  */
24 | bool loadImageRGBA( const char* filename, float4** cpu, float4** gpu, int* width, int* height );
25 | 
26 | 
27 | /**
28 |  * Save an image to disk
29 |  * @ingroup util
30 |  */
31 | bool saveImageRGBA( const char* filename, float4* cpu, int width, int height, float max_pixel=255.0f );
32 | 
33 | 
34 | /**
35 |  * Load a color image from disk into CUDA memory.
36 |  * This function loads the image into shared CPU/GPU memory, using the functions from cudaMappedMemory.h
37 |  *
38 |  * @param filename Path to the image file on disk.
39 |  * @param cpu Pointer to CPU buffer allocated containing the image.
40 |  * @param gpu Pointer to CUDA device buffer residing on GPU containing image.
41 |  * @param width Variable containing width in pixels of the image.
42 |  * @param height Variable containing height in pixels of the image.
43 |  *
44 |  * @ingroup util
45 |  */
46 | bool loadImageRGB( const char* filename, float3** cpu, float3** gpu, int* width, int* height, const float3& mean=make_float3(0,0,0) );
47 | 
48 | 
49 | /**
50 |  * Load a color image from disk into CUDA memory.
51 |  * This function loads the image into shared CPU/GPU memory, using the functions from cudaMappedMemory.h
52 |  *
53 |  * @param filename Path to the image file on disk.
54 |  * @param cpu Pointer to CPU buffer allocated containing the image.
55 |  * @param gpu Pointer to CUDA device buffer residing on GPU containing image.
56 |  * @param width Variable containing width in pixels of the image.
57 |  * @param height Variable containing height in pixels of the image.
58 |  *
59 |  * @ingroup util
60 |  */
61 | bool loadImageBGR( const char* filename, float3** cpu, float3** gpu, int* width, int* height, const float3& mean=make_float3(0,0,0) );
62 | 
63 | 
64 | 
65 | #endif
66 | 


--------------------------------------------------------------------------------