├── CMakeLists.txt ├── CMakePreBuild.sh ├── README.md ├── data ├── deploy.prototxt ├── fontmapA.png ├── labels.txt └── merge.caffemodel ├── face-recognition ├── CMakeLists.txt └── face-recognition.cpp ├── kernel.cu ├── pluginImplement.cpp ├── pluginImplement.h ├── script ├── merge_model.py └── rename_model.py ├── tensorNet.cpp ├── tensorNet.h └── util ├── camera ├── gst-camera │ ├── CMakeLists.txt │ └── gst-camera.cpp ├── gstCamera.cpp ├── gstCamera.h ├── gstUtility.cpp ├── gstUtility.h ├── v4l2-console │ ├── CMakeLists.txt │ └── v4l2-console.cpp ├── v4l2-display │ ├── CMakeLists.txt │ └── v4l2-display.cpp ├── v4l2Camera.cpp └── v4l2Camera.h ├── commandLine.cpp ├── commandLine.h ├── cuda ├── cudaFont.cu ├── cudaFont.h ├── cudaMappedMemory.h ├── cudaNormalize.cu ├── cudaNormalize.h ├── cudaOverlay.cu ├── cudaOverlay.h ├── cudaRGB.cu ├── cudaRGB.h ├── cudaResize.cu ├── cudaResize.h ├── cudaUtility.h ├── cudaYUV-NV12.cu ├── cudaYUV-YUYV.cu ├── cudaYUV-YV12.cu └── cudaYUV.h ├── display ├── glDisplay.cpp ├── glDisplay.h ├── glTexture.cpp ├── glTexture.h └── glUtility.h ├── loadImage.cpp └── loadImage.h /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 2.8) 3 | project(face-recognition) 4 | 5 | # setup tensorRT flags 6 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # -std=gnu++11 7 | set(BUILD_DEPS "YES" CACHE BOOL "If YES, will install dependencies into sandbox. Automatically reset to NO after dependencies are installed.") 8 | 9 | 10 | # if this is the first time running cmake, perform pre-build dependency install script (or if the user manually triggers re-building the dependencies) 11 | if( ${BUILD_DEPS} ) 12 | message("Launching pre-build dependency installer script...") 13 | 14 | execute_process(COMMAND sh ../CMakePreBuild.sh 15 | WORKING_DIRECTORY ${PROJECT_BINARY_DIR} 16 | RESULT_VARIABLE PREBUILD_SCRIPT_RESULT) 17 | 18 | set(BUILD_DEPS "NO" CACHE BOOL "If YES, will install dependencies into sandbox. Automatically reset to NO after dependencies are installed." FORCE) 19 | message("Finished installing dependencies") 20 | endif() 21 | 22 | 23 | # Qt is used to load images (installed by ubuntu-desktop) 24 | find_package(Qt4 REQUIRED) 25 | include(${QT_USE_FILE}) 26 | add_definitions(${QT_DEFINITIONS}) 27 | 28 | 29 | # setup CUDA 30 | find_package(CUDA) 31 | 32 | set( 33 | CUDA_NVCC_FLAGS 34 | ${CUDA_NVCC_FLAGS}; 35 | -O3 36 | -gencode arch=compute_53,code=sm_53 37 | -gencode arch=compute_62,code=sm_62 38 | ) 39 | 40 | 41 | # setup project output paths 42 | set(PROJECT_OUTPUT_DIR ${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_PROCESSOR}) 43 | set(PROJECT_INCLUDE_DIR ${PROJECT_OUTPUT_DIR}/include) 44 | 45 | file(MAKE_DIRECTORY ${PROJECT_INCLUDE_DIR}) 46 | file(MAKE_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin) 47 | 48 | message("-- system arch: ${CMAKE_SYSTEM_PROCESSOR}") 49 | message("-- output path: ${PROJECT_OUTPUT_DIR}") 50 | 51 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin) 52 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib) 53 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib) 54 | 55 | 56 | # build C/C++ interface 57 | include_directories(${PROJECT_INCLUDE_DIR} ${GIE_PATH}/include) 58 | include_directories(/usr/include/gstreamer-1.0 /usr/lib/aarch64-linux-gnu/gstreamer-1.0/include /usr/include/glib-2.0 /usr/include/libxml2 /usr/lib/aarch64-linux-gnu/glib-2.0/include/) 59 | 60 | file(GLOB inferenceSources *.cpp *.cu util/*.cpp util/camera/*.cpp util/cuda/*.cu util/display/*.cpp) 61 | file(GLOB inferenceIncludes *.h util/*.h util/camera/*.h util/cuda/*.h util/display/*.h) 62 | 63 | cuda_add_library(jetson-inference SHARED ${inferenceSources}) 64 | target_link_libraries(jetson-inference nvcaffe_parser nvinfer Qt4::QtGui GL GLEW gstreamer-1.0 gstapp-1.0) # gstreamer-0.10 gstbase-0.10 gstapp-0.10 65 | 66 | # transfer all headers to the include directory 67 | foreach(include ${inferenceIncludes}) 68 | message("-- Copying ${include}") 69 | configure_file(${include} ${PROJECT_INCLUDE_DIR} COPYONLY) 70 | endforeach() 71 | 72 | add_subdirectory(face-recognition) 73 | 74 | # install 75 | foreach(include ${inferenceIncludes}) 76 | install(FILES "${include}" DESTINATION include/jetson-inference) 77 | endforeach() 78 | 79 | # install the shared library 80 | install(TARGETS jetson-inference DESTINATION lib/jetson-inference EXPORT jetson-inferenceConfig) 81 | 82 | # install the cmake project, for importing 83 | install(EXPORT jetson-inferenceConfig DESTINATION share/jetson-inference/cmake) 84 | 85 | -------------------------------------------------------------------------------- /CMakePreBuild.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # this script is automatically run from CMakeLists.txt 3 | 4 | BUILD_ROOT=$PWD 5 | TORCH_PREFIX=$PWD/torch 6 | 7 | echo "[Pre-build] dependency installer script running..." 8 | echo "[Pre-build] build root directory: $BUILD_ROOT" 9 | 10 | 11 | # break on errors 12 | #set -e 13 | 14 | 15 | # install packages 16 | sudo apt-get update 17 | sudo apt-get install -y libqt4-dev qt4-dev-tools libglew-dev glew-utils libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev libglib2.0-dev 18 | sudo apt-get update 19 | 20 | sudo rm /usr/lib/aarch64-linux-gnu/libGL.so 21 | sudo ln -s /usr/lib/aarch64-linux-gnu/tegra/libGL.so /usr/lib/aarch64-linux-gnu/libGL.so 22 | 23 | # maximize performance 24 | sudo nvpmodel -m 0 25 | sudo ~/jetson_clock.sh 26 | echo "[Pre-build] Finished CMakePreBuild script" 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Face-Recognition 2 | =========================== 3 | This sample targets for demonstrating TensorRT2.1 Plugin API 4 | 5 | We leverage most of the functions from jetson_inference; please check it first if you need more DL samples: 6 | 7 | 8 | *** 9 |
10 | 11 | 12 | ## Environment 13 | Jetson TX2 14 |
15 | JetPack-3.1 16 |
17 |
18 |
19 | 20 | 21 | ## Build 22 | ```C 23 | $ sudo apt-get install git cmake 24 | $ git clone https://github.com/AastaNV/Face-Recognition 25 | $ cd Face-Recognition 26 | $ mkdir build 27 | $ cd build 28 | $ cmake .. 29 | $ make 30 | ``` 31 | 32 |
33 | 34 | 35 | ## Run 36 | ```C 37 | $ cd aarch64/bin 38 | $ ./face-recognition 39 | ``` 40 | 41 |
42 | 43 | 44 | ## Plugin Layer 45 | **BboxMergeLayer** 46 |
47 | This plugin layer demonstrate how to implement a CPU-based Plugin layer 48 |
49 | 1. Make required tensor as output 50 | 2. Allocate unified memory: CPU pointer== GPU pointer 51 |
52 | 53 | 54 | **DataRoiLayer** 55 |
56 | This plugin layer demonstrate how to implement a GPU Plugin layer 57 |
58 | 1. Got input/output data pointer in enqueue function 59 | 2. Launch GPU kernel with same cuda stream 60 |
61 | 62 | 63 | **RecognitionLayer** 64 |
65 | This plugin layer demonstrate more complicated handling of Plugin layer 66 |
67 | 1. This class can handle two differient layers: selectBbox and summaryLabel 68 | 2. Define some shared variable to make between layers communication easier 69 |
70 |
71 | 72 | 73 | ## Support 74 | Please rise your problem in our forum to get immediately support. 75 |
76 | https://devtalk.nvidia.com/default/board/189/jetson-tx2/ 77 |
78 |
79 |
80 | -------------------------------------------------------------------------------- /data/fontmapA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AastaNV/Face-Recognition/c93a12570ac0d925ead2ccaa152c539d89c3cb5d/data/fontmapA.png -------------------------------------------------------------------------------- /data/merge.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AastaNV/Face-Recognition/c93a12570ac0d925ead2ccaa152c539d89c3cb5d/data/merge.caffemodel -------------------------------------------------------------------------------- /face-recognition/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | file(GLOB faceRecognitionSources *.cpp) 3 | file(GLOB faceRecognitionIncludes *.h ) 4 | 5 | cuda_add_executable(face-recognition ${faceRecognitionSources}) 6 | target_link_libraries(face-recognition nvcaffe_parser nvinfer jetson-inference) 7 | -------------------------------------------------------------------------------- /face-recognition/face-recognition.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "gstCamera.h" 3 | #include "glDisplay.h" 4 | #include "glTexture.h" 5 | 6 | #include "cudaNormalize.h" 7 | #include "cudaOverlay.h" 8 | #include "cudaFont.h" 9 | #include "tensorNet.h" 10 | 11 | 12 | using namespace nvinfer1; 13 | using namespace nvcaffeparser1; 14 | 15 | static const int BATCH_SIZE = 1; 16 | static const int TIMING_ITERATIONS = 100; 17 | 18 | const char* model = "/home/nvidia/Face-Recognition/data/deploy.prototxt"; 19 | const char* weight = "/home/nvidia/Face-Recognition/data/merge.caffemodel"; 20 | const char* label = "/home/nvidia/Face-Recognition/data/labels.txt"; 21 | 22 | const char* INPUT_BLOB_NAME = "data"; 23 | const char* OUTPUT_BLOB_COV = "coverage_fd"; 24 | const char* OUTPUT_BLOB_BOX = "bboxes_fd"; 25 | const char* OUTPUT_BLOB_NUM = "count_fd"; 26 | const char* OUTPUT_BLOB_SEL = "bbox_fr"; 27 | const char* OUTPUT_BLOB_IDX = "bbox_id"; 28 | const char* OUTPUT_BLOB_RES = "softmax_fr"; 29 | const char* OUTPUT_BLOB_LAB = "label"; 30 | 31 | #define DEFAULT_CAMERA -1 // -1 for onboard camera, or change to index of /dev/video V4L2 camera (>=0) 32 | 33 | cudaError_t cudaPreImageNetMean( float4* input, size_t inputWidth, size_t inputHeight, float* output, size_t outputWidth, size_t outputHeight, const float3& mean_value ); 34 | 35 | 36 | 37 | bool signal_recieved = false; 38 | 39 | void sig_handler(int signo) 40 | { 41 | if( signo == SIGINT ) 42 | { 43 | printf("received SIGINT\n"); 44 | signal_recieved = true; 45 | } 46 | } 47 | 48 | float* allocateMemory(DimsCHW dims, char* info) 49 | { 50 | float* ptr; 51 | size_t size; 52 | std::cout << "Allocate memory: " << info << std::endl; 53 | size = BATCH_SIZE * dims.c() * dims.h() * dims.w(); 54 | assert(!cudaMallocManaged(&ptr, size*sizeof(float))); 55 | return ptr; 56 | } 57 | 58 | // load label info 59 | std::vector loadLabelInfo(const char* filename) 60 | { 61 | assert(filename); 62 | std::vector labelInfo; 63 | 64 | FILE* f = fopen(filename, "r"); 65 | if( !f ) 66 | { 67 | printf("failed to open %s\n", filename); 68 | assert(0); 69 | } 70 | 71 | char str[512]; 72 | while( fgets(str, 512, f) != NULL ) 73 | { 74 | const int syn = 9; // length of synset prefix (in characters) 75 | const int len = strlen(str); 76 | 77 | if( len > syn && str[0] == 'n' && str[syn] == ' ' ) 78 | { 79 | str[syn] = 0; 80 | str[len-1] = 0; 81 | 82 | const std::string b = (str + syn + 1); 83 | labelInfo.push_back(b); 84 | } 85 | else if( len > 0 ) // no 9-character synset prefix (i.e. from DIGITS snapshot) 86 | { 87 | if( str[len-1] == '\n' ) str[len-1] = 0; 88 | labelInfo.push_back(str); 89 | } 90 | } 91 | fclose(f); 92 | return labelInfo; 93 | } 94 | 95 | bool DrawBoxes(float* input, float* output, uint32_t width, uint32_t height, const float scale_x, const float scale_y, float* conf, float* bbox, const int numBoundingBoxes) 96 | { 97 | // Only handle single class here 98 | const float4 color = make_float4( 0.0f, 255.0f, 175.0f, 100.0f); 99 | 100 | printf("%i bounding boxes detected\n", numBoundingBoxes); 101 | for( int n=0; n < numBoundingBoxes; n++ ) 102 | { 103 | float* bb = bbox + (n * 4); 104 | bb[0] *= scale_x; 105 | bb[1] *= scale_y; 106 | bb[2] *= scale_x; 107 | bb[3] *= scale_y; 108 | printf("bounding box %i (%f, %f) (%f, %f) w=%f h=%f\n", n, bb[0], bb[1], bb[2], bb[3], bb[2] - bb[0], bb[3] - bb[1]); 109 | } 110 | 111 | if( numBoundingBoxes>0 ) 112 | { 113 | if( CUDA_FAILED(cudaRectOutlineOverlay((float4*)input, (float4*)output, width, height, (float4*)bbox, numBoundingBoxes, color))) 114 | printf("failed to draw boxes\n"); 115 | CUDA(cudaThreadSynchronize()); 116 | } 117 | } 118 | 119 | void ShowClassification(cudaFont* font, void* input, void* output, uint32_t width, uint32_t height, 120 | float* lab, float* bbox, std::vector &labelInfo, const int numBoundingBoxes) 121 | { 122 | char str[512]; 123 | 124 | if( font != NULL ) 125 | { 126 | for( int i=0; i-1)?labelInfo[int(lab[i])].c_str():"NAN"); 129 | std::cout << "bbox=" << i << " class=" << lab[i] << " label=" << str << std::endl; 130 | 131 | float* bb = bbox + (i * 4); 132 | font->RenderOverlay((float4*)input, (float4*)output, width, height, (const char*)str, bb[0], bb[3], make_float4(255.0f, 255.0f, 255.0f, 255.0f)); 133 | CUDA(cudaThreadSynchronize()); 134 | } 135 | } 136 | } 137 | 138 | 139 | 140 | int main(int argc, char** argv) 141 | { 142 | std::cout << "Building and running a GPU inference engine for " << model << ", N=" << BATCH_SIZE << "..." << std::endl; 143 | 144 | 145 | /* camera */ 146 | if( signal(SIGINT, sig_handler) == SIG_ERR ) 147 | printf("\ncan't catch SIGINT\n"); 148 | 149 | gstCamera* camera = gstCamera::Create(DEFAULT_CAMERA); 150 | 151 | if( !camera ) 152 | { 153 | printf("failed to initialize video device\n"); 154 | return 0; 155 | } 156 | 157 | printf("successfully initialized video device\n"); 158 | printf(" width: %u\n", camera->GetWidth()); 159 | printf(" height: %u\n", camera->GetHeight()); 160 | printf(" depth: %u (bpp)\n\n", camera->GetPixelDepth()); 161 | 162 | 163 | /* create networks */ 164 | TensorNet tensorNet; 165 | std::vector labelInfo = loadLabelInfo(label); 166 | tensorNet.caffeToTRTModel(model, weight, std::vector < std::string > {OUTPUT_BLOB_COV, OUTPUT_BLOB_BOX, OUTPUT_BLOB_NUM, OUTPUT_BLOB_SEL, OUTPUT_BLOB_IDX, OUTPUT_BLOB_RES, OUTPUT_BLOB_LAB}, BATCH_SIZE); 167 | tensorNet.createInference(); 168 | 169 | 170 | /* openGL window */ 171 | cudaFont* font = cudaFont::Create(); 172 | glDisplay* display = glDisplay::Create(); 173 | glTexture* texture = NULL; 174 | 175 | if( !display ) { 176 | printf("failed to create openGL display\n"); 177 | } 178 | else 179 | { 180 | texture = glTexture::Create(camera->GetWidth(), camera->GetHeight(), GL_RGBA32F_ARB/*GL_RGBA8*/); 181 | if( !texture ) printf("failed to create openGL texture\n"); 182 | } 183 | 184 | 185 | /* open camera */ 186 | if( !camera->Open() ) 187 | { 188 | printf("failed to open camera for streaming\n"); 189 | return 0; 190 | } 191 | 192 | 193 | /* prepare tensor */ 194 | DimsCHW dimsData = tensorNet.getTensorDims(INPUT_BLOB_NAME); 195 | DimsCHW dimsConf = tensorNet.getTensorDims(OUTPUT_BLOB_COV); 196 | DimsCHW dimsBbox = tensorNet.getTensorDims(OUTPUT_BLOB_BOX); 197 | DimsCHW dimsNum = tensorNet.getTensorDims(OUTPUT_BLOB_NUM); 198 | DimsCHW dimsSel = tensorNet.getTensorDims(OUTPUT_BLOB_SEL); 199 | DimsCHW dimsIdx = tensorNet.getTensorDims(OUTPUT_BLOB_IDX); 200 | DimsCHW dimsRes = tensorNet.getTensorDims(OUTPUT_BLOB_RES); 201 | DimsCHW dimsLab = tensorNet.getTensorDims(OUTPUT_BLOB_LAB); 202 | 203 | float* data = allocateMemory(dimsData, (char*)"input blob"); 204 | float* conf = allocateMemory(dimsConf, (char*)"coverage"); // for cpu plugin layer 205 | float* bbox = allocateMemory(dimsBbox, (char*)"box"); // for cpu plugin layer 206 | float* num = allocateMemory(dimsNum, (char*)"count"); 207 | float* sel = allocateMemory(dimsSel, (char*)"selected bbox"); // for cpu plugin layer 208 | float* idx = allocateMemory(dimsIdx, (char*)"selected index"); // for cpu plugin layer 209 | float* res = allocateMemory(dimsRes, (char*)"softmax"); // for cpu plugin layer 210 | float* lab = allocateMemory(dimsLab, (char*)"label"); 211 | 212 | 213 | /* main loop */ 214 | while( !signal_recieved ) 215 | { 216 | void* imgCPU = NULL; 217 | void* imgCUDA = NULL; 218 | void* imgRGBA = NULL; 219 | 220 | if( !camera->Capture(&imgCPU, &imgCUDA, 1000) ) printf("failed to capture frame\n"); 221 | if( !camera->ConvertRGBA(imgCUDA, &imgRGBA) ) printf("failed to convert from NV12 to RGBA\n"); 222 | 223 | if( CUDA_FAILED(cudaPreImageNetMean((float4*)imgRGBA, camera->GetWidth(), camera->GetHeight(), data, dimsData.w(), dimsData.h(), make_float3(127.0f, 127.0f, 127.0f))) ) 224 | { 225 | printf("cudaPreImageNetMean failed\n"); 226 | return 0; 227 | } 228 | 229 | 230 | void* buffers[] = {data, conf, bbox, num, sel, idx, res, lab}; 231 | tensorNet.imageInference(buffers, 8, BATCH_SIZE); 232 | 233 | const float scale_x = float(camera->GetWidth()) / float(dimsData.w()); 234 | const float scale_y = float(camera->GetHeight()) / float(dimsData.h()); 235 | 236 | int numBoundingBoxes = int(num[0]); 237 | DrawBoxes((float*)imgRGBA, (float*)imgRGBA, camera->GetWidth(), camera->GetHeight(), scale_x, scale_y, conf, bbox, numBoundingBoxes); 238 | ShowClassification(font, imgRGBA, imgRGBA, camera->GetWidth(), camera->GetHeight(), lab, bbox, labelInfo, numBoundingBoxes); 239 | 240 | if( display != NULL ) 241 | { 242 | char str[256]; 243 | sprintf(str, "TensorRT build %x | %4.1f FPS", NV_GIE_VERSION, display->GetFPS()); 244 | display->SetTitle(str); 245 | } 246 | 247 | if( display != NULL ) 248 | { 249 | display->UserEvents(); 250 | display->BeginRender(); 251 | 252 | if( texture != NULL ) 253 | { 254 | CUDA(cudaNormalizeRGBA((float4*)imgRGBA, make_float2(0.0f, 255.0f), 255 | (float4*)imgRGBA, make_float2(0.0f, 1.0f), 256 | camera->GetWidth(), camera->GetHeight())); 257 | 258 | void* tex_map = texture->MapCUDA(); 259 | if( tex_map != NULL ) 260 | { 261 | cudaMemcpy(tex_map, imgRGBA, texture->GetSize(), cudaMemcpyDeviceToDevice); 262 | texture->Unmap(); 263 | } 264 | texture->Render(100,100); 265 | } 266 | display->EndRender(); 267 | } 268 | 269 | } 270 | 271 | 272 | /* destory */ 273 | tensorNet.destroy(); 274 | tensorNet.printTimes(TIMING_ITERATIONS); 275 | 276 | if( camera != NULL ) 277 | { 278 | delete camera; 279 | camera = NULL; 280 | } 281 | 282 | if( display != NULL ) 283 | { 284 | delete display; 285 | display = NULL; 286 | } 287 | 288 | std::cout << "Done." << std::endl; 289 | return 0; 290 | } 291 | -------------------------------------------------------------------------------- /kernel.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * http://github.com/dusty-nv/jetson-inference 3 | */ 4 | 5 | #include "cudaUtility.h" 6 | #include 7 | 8 | 9 | // gpuPreImageNet 10 | __global__ void gpuPreImageNet( float2 scale, float4* input, int iWidth, float* output, int oWidth, int oHeight ) 11 | { 12 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 13 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 14 | const int n = oWidth * oHeight; 15 | 16 | if( x >= oWidth || y >= oHeight ) 17 | return; 18 | 19 | const int dx = ((float)x * scale.x); 20 | const int dy = ((float)y * scale.y); 21 | 22 | const float4 px = input[ dy * iWidth + dx ]; 23 | const float3 bgr = make_float3(px.z, px.y, px.x); 24 | 25 | output[n * 0 + y * oWidth + x] = bgr.x; 26 | output[n * 1 + y * oWidth + x] = bgr.y; 27 | output[n * 2 + y * oWidth + x] = bgr.z; 28 | } 29 | 30 | 31 | // cudaPreImageNet 32 | cudaError_t cudaPreImageNet( float4* input, size_t inputWidth, size_t inputHeight, 33 | float* output, size_t outputWidth, size_t outputHeight ) 34 | { 35 | if( !input || !output ) 36 | return cudaErrorInvalidDevicePointer; 37 | 38 | if( inputWidth == 0 || outputWidth == 0 || inputHeight == 0 || outputHeight == 0 ) 39 | return cudaErrorInvalidValue; 40 | 41 | const float2 scale = make_float2( float(inputWidth) / float(outputWidth), 42 | float(inputHeight) / float(outputHeight) ); 43 | 44 | // launch kernel 45 | const dim3 blockDim(8, 8); 46 | const dim3 gridDim(iDivUp(outputWidth,blockDim.x), iDivUp(outputHeight,blockDim.y)); 47 | 48 | gpuPreImageNet<<>>(scale, input, inputWidth, output, outputWidth, outputHeight); 49 | 50 | return CUDA(cudaGetLastError()); 51 | } 52 | 53 | 54 | 55 | 56 | // gpuPreImageNetMean 57 | __global__ void gpuPreImageNetMean( float2 scale, float4* input, int iWidth, float* output, int oWidth, int oHeight, float3 mean_value ) 58 | { 59 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 60 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 61 | const int n = oWidth * oHeight; 62 | 63 | if( x >= oWidth || y >= oHeight ) 64 | return; 65 | 66 | const int dx = ((float)x * scale.x); 67 | const int dy = ((float)y * scale.y); 68 | 69 | const float4 px = input[ dy * iWidth + dx ]; 70 | const float3 bgr = make_float3(px.z - mean_value.x, px.y - mean_value.y, px.x - mean_value.z); 71 | 72 | output[n * 0 + y * oWidth + x] = bgr.x; 73 | output[n * 1 + y * oWidth + x] = bgr.y; 74 | output[n * 2 + y * oWidth + x] = bgr.z; 75 | } 76 | 77 | 78 | // cudaPreImageNetMean 79 | cudaError_t cudaPreImageNetMean( float4* input, size_t inputWidth, size_t inputHeight, 80 | float* output, size_t outputWidth, size_t outputHeight, const float3& mean_value ) 81 | { 82 | if( !input || !output ) 83 | return cudaErrorInvalidDevicePointer; 84 | 85 | if( inputWidth == 0 || outputWidth == 0 || inputHeight == 0 || outputHeight == 0 ) 86 | return cudaErrorInvalidValue; 87 | 88 | const float2 scale = make_float2( float(inputWidth) / float(outputWidth), 89 | float(inputHeight) / float(outputHeight) ); 90 | 91 | // launch kernel 92 | const dim3 blockDim(8, 8); 93 | const dim3 gridDim(iDivUp(outputWidth,blockDim.x), iDivUp(outputHeight,blockDim.y)); 94 | 95 | gpuPreImageNetMean<<>>(scale, input, inputWidth, output, outputWidth, outputHeight, mean_value); 96 | 97 | return CUDA(cudaGetLastError()); 98 | } 99 | 100 | 101 | __global__ void kernel_extract_roi(float* input, float* output, char* mean, 102 | const int input_w, const int output_w, const int output_h, 103 | const int in_plane_r, const int in_plane_g, const int in_plane_b, 104 | const int out_plane_r, const int out_plane_g, const int out_plane_b, 105 | const int bbox_x, const int bbox_y, const int bbox_w, const int bbox_h) 106 | { 107 | uint x = blockIdx.x * blockDim.x + threadIdx.x; 108 | uint y = blockIdx.y * blockDim.y + threadIdx.y; 109 | 110 | if( x < output_w && y < output_h) 111 | { 112 | float r[2] = { float(x) * bbox_w / output_w + bbox_x, 113 | float(y) * bbox_h / output_h + bbox_y }; 114 | 115 | int pos[4][2] = { { int(floor(r[0])), int(floor(r[1])) }, 116 | { int( ceil(r[0])), int(floor(r[1])) }, 117 | { int(floor(r[0])), int(ceil(r[1])) }, 118 | { int( ceil(r[0])), int(ceil(r[1])) } }; 119 | 120 | float u = r[0]-floor(r[0]); 121 | float v = r[1]-floor(r[1]); 122 | 123 | float s[4] = { (1-u)*(1-v), u*(1-v), (1-u)*v, u*v }; 124 | 125 | int map[4] = { pos[0][1]*input_w + pos[0][0], pos[1][1]*input_w + pos[1][0], 126 | pos[2][1]*input_w + pos[2][0], pos[3][1]*input_w + pos[3][0]}; 127 | 128 | int idx = y * output_w + x; 129 | output[idx+out_plane_r] = round( s[0]*input[map[0]+in_plane_r] 130 | + s[1]*input[map[1]+in_plane_r] 131 | + s[2]*input[map[2]+in_plane_r] 132 | + s[3]*input[map[3]+in_plane_r] );// float(mean[idx+out_plane_r])); 133 | output[idx+out_plane_g] = round( s[0]*input[map[0]+in_plane_g] 134 | + s[1]*input[map[1]+in_plane_g] 135 | + s[2]*input[map[2]+in_plane_g] 136 | + s[3]*input[map[3]+in_plane_g] );//float(mean[idx+out_plane_g])); 137 | output[idx+out_plane_b] = round( s[0]*input[map[0]+in_plane_b] 138 | + s[1]*input[map[1]+in_plane_b] 139 | + s[2]*input[map[2]+in_plane_b] 140 | + s[3]*input[map[3]+in_plane_b] );//float(mean[idx+out_plane_b])); 141 | } 142 | } 143 | 144 | void convertROI(float* input, float* output, char* mean, const int* srcSize, const int* dstSize, const int* roi, cudaStream_t stream) 145 | { 146 | int in_plane_r = 0; 147 | int in_plane_g = srcSize[1] * srcSize[2]; 148 | int in_plane_b = srcSize[1] * srcSize[2] * 2; 149 | 150 | int out_plane_r = 0; 151 | int out_plane_g = dstSize[1] * dstSize[2]; 152 | int out_plane_b = dstSize[1] * dstSize[2] * 2; 153 | 154 | int bbox_x = min(max(roi[0], 0), srcSize[2]-1); 155 | int bbox_y = min(max(roi[1], 0), srcSize[1]-1); 156 | int bbox_w = min(max(roi[2]-roi[0], 0), srcSize[2]-bbox_x-1 ); 157 | int bbox_h = min(max(roi[3]-roi[1], 0), srcSize[1]-bbox_y-1 ); 158 | 159 | dim3 dimBlock(32,32); 160 | dim3 dimGrid(dstSize[2]/dimBlock.x+1, dstSize[1]/dimBlock.y+1); 161 | 162 | std::cout << "ROI: " << bbox_x << " " << bbox_y << " " << bbox_w << " " << bbox_h << std::endl; 163 | 164 | kernel_extract_roi <<< dimGrid, dimBlock, 0, stream >>> (input, output, mean, 165 | srcSize[2], dstSize[2], dstSize[1], 166 | in_plane_r, in_plane_g, in_plane_b, 167 | out_plane_r, out_plane_g, out_plane_b, 168 | bbox_x, bbox_y, bbox_w, bbox_h); 169 | } 170 | 171 | -------------------------------------------------------------------------------- /pluginImplement.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | std::vector RecognitionLayer::bboxTable; 4 | std::vector RecognitionLayer::tagTable; 5 | 6 | bool bboxOverlap(const float4& r1, const float4& r2) 7 | { 8 | float unionSize = (std::max(r1.z, r2.z)-std::min(r1.x, r2.x)) * (std::max(r1.w, r2.w)-std::min(r1.y, r2.y)); 9 | float interSize = (std::min(r1.z, r2.z)-std::max(r1.x, r2.x)) * (std::min(r1.w, r2.w)-std::max(r1.y, r2.y)); 10 | if( unionSize == 0 ) return true; 11 | else return (interSize/unionSize) > 0.5; 12 | } 13 | 14 | /******************************/ 15 | // PluginFactory 16 | /******************************/ 17 | nvinfer1::IPlugin* PluginFactory::createPlugin(const char* layerName, const nvinfer1::Weights* weights, int nbWeights) 18 | { 19 | assert(isPlugin(layerName)); 20 | if (!strcmp(layerName, "bboxMerge")) 21 | { 22 | assert(mBboxMergeLayer.get() == nullptr); 23 | mBboxMergeLayer = std::unique_ptr(new BboxMergeLayer()); 24 | return mBboxMergeLayer.get(); 25 | } 26 | else if (!strcmp(layerName, "dataRoi")) 27 | { 28 | assert(mDataRoiLayer.get() == nullptr); 29 | mDataRoiLayer = std::unique_ptr(new DataRoiLayer()); 30 | return mDataRoiLayer.get(); 31 | } 32 | else if (!strcmp(layerName, "selectBbox")) 33 | { 34 | assert(mSelectLayer.get() == nullptr); 35 | mSelectLayer = std::unique_ptr(new RecognitionLayer(FunctionType::SELECT)); 36 | return mSelectLayer.get(); 37 | } 38 | else if (!strcmp(layerName, "summaryLabel")) 39 | { 40 | assert(mSummaryLayer.get() == nullptr); 41 | mSummaryLayer = std::unique_ptr(new RecognitionLayer(FunctionType::SUMMARY)); 42 | return mSummaryLayer.get(); 43 | } 44 | else 45 | { 46 | assert(0); 47 | return nullptr; 48 | } 49 | } 50 | 51 | IPlugin* PluginFactory::createPlugin(const char* layerName, const void* serialData, size_t serialLength) 52 | { 53 | assert(isPlugin(layerName)); 54 | if (!strcmp(layerName, "bboxMerge")) 55 | { 56 | assert(mBboxMergeLayer.get() == nullptr); 57 | mBboxMergeLayer = std::unique_ptr(new BboxMergeLayer(serialData, serialLength)); 58 | return mBboxMergeLayer.get(); 59 | } 60 | else if (!strcmp(layerName, "dataRoi")) 61 | { 62 | assert(mDataRoiLayer.get() == nullptr); 63 | mDataRoiLayer = std::unique_ptr(new DataRoiLayer(serialData, serialLength)); 64 | return mDataRoiLayer.get(); 65 | } 66 | else if (!strcmp(layerName, "selectBbox")) 67 | { 68 | assert(mSelectLayer.get() == nullptr); 69 | mSelectLayer = std::unique_ptr(new RecognitionLayer(FunctionType::SELECT, serialData, serialLength)); 70 | return mSelectLayer.get(); 71 | } 72 | else if (!strcmp(layerName, "summaryLabel")) 73 | { 74 | assert(mSummaryLayer.get() == nullptr); 75 | mSummaryLayer = std::unique_ptr(new RecognitionLayer(FunctionType::SUMMARY, serialData, serialLength)); 76 | return mSummaryLayer.get(); 77 | } 78 | else 79 | { 80 | assert(0); 81 | return nullptr; 82 | } 83 | } 84 | 85 | bool PluginFactory::isPlugin(const char* name) 86 | { 87 | return (!strcmp(name, "bboxMerge") 88 | || !strcmp(name, "dataRoi") 89 | || !strcmp(name, "selectBbox") 90 | || !strcmp(name, "summaryLabel")); 91 | } 92 | 93 | void PluginFactory::destroyPlugin() 94 | { 95 | mBboxMergeLayer.release(); 96 | mBboxMergeLayer = nullptr; 97 | mDataRoiLayer.release(); 98 | mDataRoiLayer = nullptr; 99 | mSelectLayer.release(); 100 | mSelectLayer = nullptr; 101 | mSummaryLayer.release(); 102 | mSummaryLayer = nullptr; 103 | } 104 | 105 | 106 | 107 | /******************************/ 108 | // BboxMerge Plugin Layer 109 | /******************************/ 110 | BboxMergeLayer::BboxMergeLayer(const void* buffer, size_t size) 111 | { 112 | assert(size==(9*sizeof(int))); 113 | const int* d = reinterpret_cast(buffer); 114 | 115 | dimsData = DimsCHW{d[0], d[1], d[2]}; 116 | dimsConf = DimsCHW{d[3], d[4], d[5]}; 117 | dimsBbox = DimsCHW{d[6], d[7], d[8]}; 118 | } 119 | 120 | Dims BboxMergeLayer::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) 121 | { 122 | assert(nbInputDims==3); 123 | return DimsCHW(1, 1, 1); 124 | } 125 | 126 | int BboxMergeLayer::initialize() 127 | { 128 | ow = dimsBbox.w(); 129 | oh = dimsBbox.h(); 130 | owh = ow * oh; 131 | cls = dimsConf.c(); 132 | 133 | cell_width = dimsData.w() / ow; 134 | cell_height = dimsData.h() / oh; 135 | return 0; 136 | } 137 | 138 | int BboxMergeLayer::enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream) 139 | { 140 | CHECK(cudaThreadSynchronize()); 141 | std::vector< std::vector > rects; 142 | rects.resize(cls); 143 | 144 | float* conf = (float*)inputs[1]; 145 | float* bbox = (float*)inputs[2]; 146 | 147 | for( uint32_t z=0; z < cls; z++ ) 148 | { 149 | rects[z].reserve(owh); 150 | for( uint32_t y=0; y < oh; y++ ) 151 | { 152 | for( uint32_t x=0; x < ow; x++) 153 | { 154 | const float coverage = conf[z * owh + y * ow + x]; 155 | if( coverage > 0.5 ) 156 | { 157 | const float mx = x * cell_width; 158 | const float my = y * cell_height; 159 | 160 | const float x1 = (bbox[0 * owh + y * ow + x] + mx); 161 | const float y1 = (bbox[1 * owh + y * ow + x] + my); 162 | const float x2 = (bbox[2 * owh + y * ow + x] + mx); 163 | const float y2 = (bbox[3 * owh + y * ow + x] + my); 164 | mergeRect( rects[z], make_float4(x1, y1, x2, y2) ); 165 | } 166 | } 167 | } 168 | } 169 | 170 | int n = 0; 171 | int numMax = dimsBbox.c() * dimsBbox.h() * dimsBbox.w(); 172 | for( uint32_t z = 0; z < cls; z++ ) 173 | { 174 | const uint32_t numBox = rects[z].size(); 175 | 176 | for( uint32_t b = 0; b < numBox && n < numMax; b++ ) 177 | { 178 | const float4 r = rects[z][b]; 179 | 180 | bbox[n * 4 + 0] = r.x; 181 | bbox[n * 4 + 1] = r.y; 182 | bbox[n * 4 + 2] = r.z; 183 | bbox[n * 4 + 3] = r.w; 184 | n++; 185 | } 186 | } 187 | 188 | float* count = (float*)outputs[0]; 189 | count[0] = float(n); 190 | return 0; 191 | } 192 | 193 | size_t BboxMergeLayer::getSerializationSize() 194 | { 195 | return 9*sizeof(int); 196 | } 197 | 198 | void BboxMergeLayer::serialize(void* buffer) 199 | { 200 | int* d = reinterpret_cast(buffer); 201 | d[0] = dimsData.c(); d[1] = dimsData.h(); d[2] = dimsData.w(); 202 | d[3] = dimsConf.c(); d[4] = dimsConf.h(); d[5] = dimsConf.w(); 203 | d[6] = dimsBbox.c(); d[7] = dimsBbox.h(); d[8] = dimsBbox.w(); 204 | } 205 | 206 | void BboxMergeLayer::configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int) 207 | { 208 | dimsData = DimsCHW{inputs[0].d[0], inputs[0].d[1], inputs[0].d[2]}; 209 | dimsConf = DimsCHW{inputs[1].d[0], inputs[1].d[1], inputs[1].d[2]}; 210 | dimsBbox = DimsCHW{inputs[2].d[0], inputs[2].d[1], inputs[2].d[2]}; 211 | } 212 | 213 | void BboxMergeLayer::mergeRect(std::vector& rects, const float4& rect) 214 | { 215 | const uint32_t num_rects = rects.size(); 216 | bool intersects = false; 217 | 218 | for( uint32_t r=0; r < num_rects; r++ ) 219 | { 220 | if( bboxOverlap(rects[r], rect) ) 221 | { 222 | intersects = true; 223 | if( rect.x < rects[r].x ) rects[r].x = rect.x; 224 | if( rect.y < rects[r].y ) rects[r].y = rect.y; 225 | if( rect.z > rects[r].z ) rects[r].z = rect.z; 226 | if( rect.w > rects[r].w ) rects[r].w = rect.w; 227 | 228 | break; 229 | } 230 | } 231 | if( !intersects ) rects.push_back(rect); 232 | } 233 | 234 | 235 | 236 | /******************************/ 237 | // DataRoi Plugin Layer 238 | /******************************/ 239 | void convertROI(float* input, float* output, char* mean, const int* srcSize, const int* dstSize, const int* roi, cudaStream_t stream); 240 | 241 | DataRoiLayer::DataRoiLayer(const void* buffer, size_t size) 242 | { 243 | assert(size==(6*sizeof(int))); 244 | const int* d = reinterpret_cast(buffer); 245 | 246 | dimsData = DimsCHW{d[0], d[1], d[2]}; 247 | dimsRoi = DimsCHW{d[3], d[4], d[5]}; 248 | } 249 | 250 | Dims DataRoiLayer::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) 251 | { 252 | assert(nbInputDims==2); 253 | return DimsCHW(3, 224, 224); 254 | } 255 | 256 | int DataRoiLayer::initialize() 257 | { 258 | return 0; 259 | } 260 | 261 | int DataRoiLayer::enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream) 262 | { 263 | float* bbox = (float*)inputs[1]; 264 | 265 | int srcSize[] {dimsData.c(), dimsData.h(), dimsData.w()}; 266 | int dstSize[] {dimsRoi.c(), dimsRoi.h(), dimsRoi.w()}; 267 | int roi[] = { int(bbox[0]+0.5), int(bbox[1]+0.5), int(bbox[2]+0.5), int(bbox[3]+0.5)}; //rounding 268 | convertROI((float*)inputs[0], (float*)outputs[0], nullptr, srcSize, dstSize, roi, stream); 269 | 270 | return 0; 271 | } 272 | 273 | size_t DataRoiLayer::getSerializationSize() 274 | { 275 | return 6*sizeof(int); 276 | } 277 | 278 | void DataRoiLayer::serialize(void* buffer) 279 | { 280 | int* d = reinterpret_cast(buffer); 281 | d[0] = dimsData.c(); d[1] = dimsData.h(); d[2] = dimsData.w(); 282 | d[3] = dimsRoi.c(); d[4] = dimsRoi.h(); d[5] = dimsRoi.w(); 283 | } 284 | 285 | void DataRoiLayer::configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int) 286 | { 287 | dimsData = DimsCHW{inputs[0].d[0], inputs[0].d[1], inputs[0].d[2]}; 288 | dimsRoi = DimsCHW{3, 224, 224}; 289 | } 290 | 291 | 292 | 293 | /******************************/ 294 | // Recognition Plugin Layer 295 | /******************************/ 296 | RecognitionLayer::RecognitionLayer(FunctionType t, const void* buffer, size_t size) 297 | { 298 | assert(size==(sizeof(int))); 299 | const int* d = reinterpret_cast(buffer); 300 | 301 | classNum = d[0]; 302 | type = t; 303 | } 304 | 305 | int RecognitionLayer::getNbOutputs() const 306 | { 307 | if( type==FunctionType::SELECT ) return 2; 308 | else if( type==FunctionType::SUMMARY ) return 1; 309 | } 310 | 311 | Dims RecognitionLayer::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) 312 | { 313 | if( type==FunctionType::SELECT ) 314 | { 315 | assert(nbInputDims==2); 316 | return index==0 ? DimsCHW(4, 1, 1):DimsCHW(1, 1, 1); 317 | } 318 | else if( type==FunctionType::SUMMARY ) 319 | { 320 | assert(nbInputDims==4); 321 | classNum = inputs[3].d[0]; 322 | return DimsCHW(1, inputs[0].d[1], inputs[0].d[2]); 323 | } 324 | } 325 | 326 | int RecognitionLayer::enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream) 327 | { 328 | CHECK(cudaThreadSynchronize()); 329 | 330 | if( type==FunctionType::SELECT ) return select(inputs, outputs); 331 | else if( type==FunctionType::SUMMARY ) return summary(inputs, outputs); 332 | } 333 | 334 | size_t RecognitionLayer::getSerializationSize() 335 | { 336 | return sizeof(int); 337 | } 338 | 339 | void RecognitionLayer::serialize(void* buffer) 340 | { 341 | int* d = reinterpret_cast(buffer); 342 | d[0] = classNum; 343 | } 344 | 345 | int RecognitionLayer::select(const void*const *inputs, void** outputs) 346 | { 347 | float* bbox = (float*)inputs[0]; 348 | float* count = (float*)inputs[1]; 349 | float* select = (float*)outputs[0]; 350 | float* index = (float*)outputs[1]; 351 | 352 | int queryIdx = -1; 353 | int bboxNum = static_cast(count[0]); 354 | 355 | for( size_t i=0,id=0; i < bboxNum; i++,id+=4 ) { 356 | float4 p = make_float4(bbox[id+0], bbox[id+1], bbox[id+2], bbox[id+3]); 357 | if( bboxExist(p, i)<0 ) { 358 | if( queryIdx < 0 ) { 359 | bboxTable.push_back(new bboxProfile(p, i)); 360 | queryIdx = bboxTable.size()-1; 361 | } 362 | } 363 | } 364 | 365 | if( queryIdx < 0 && bboxTable.size() > 0 ) queryIdx = rand() % bboxTable.size(); 366 | if( queryIdx > -1 ) { 367 | int queryNum = bboxTable[queryIdx]->bboxNum; 368 | if( queryNum > -1 ) { 369 | select[0] = bboxTable[queryIdx]->pos.x; 370 | select[1] = bboxTable[queryIdx]->pos.y; 371 | select[2] = bboxTable[queryIdx]->pos.z; 372 | select[3] = bboxTable[queryIdx]->pos.w; 373 | index[0] = queryIdx; 374 | std::cout << "pass "<< queryIdx << " to trt" << std::endl; 375 | std::cout << select[0] << " " << select[1] << " " << select[2] << " " << select[3] << " " << std::endl; 376 | } 377 | } 378 | else index[0] = -1; 379 | return 0; 380 | } 381 | 382 | int RecognitionLayer::summary(const void*const *inputs, void** outputs) 383 | { 384 | float* count = (float*)inputs[1]; 385 | float* index = (float*)inputs[2]; 386 | float* res = (float*)inputs[3]; 387 | float* label = (float*)outputs[0]; 388 | 389 | int bboxNum = static_cast(count[0]); 390 | int queryIdx = static_cast(index[0]); 391 | if( queryIdx > -1 ) { 392 | int classIndex = -1; 393 | float classMax = -1.0f; 394 | 395 | for( size_t n=0; n < classNum; n++ ) 396 | { 397 | const float value = res[n]; 398 | if( value > classMax ) 399 | { 400 | classIndex = n; 401 | classMax = value; 402 | } 403 | } 404 | bboxTable[queryIdx]->labelID = classIndex; 405 | std::cout << "ID=" <& rects, const float4& rect); 71 | 72 | DimsCHW dimsData; 73 | DimsCHW dimsConf; 74 | DimsCHW dimsBbox; 75 | 76 | int ow; 77 | int oh; 78 | int owh; 79 | int cls; 80 | float cell_width; 81 | float cell_height; 82 | }; 83 | 84 | class RecognitionLayer : public IPlugin 85 | { 86 | public: 87 | RecognitionLayer(FunctionType t) { type = t; }; 88 | RecognitionLayer(FunctionType t, const void* buffer, size_t size); 89 | 90 | int getNbOutputs() const override; 91 | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override; 92 | 93 | inline int initialize() override { return 0; } 94 | inline void terminate() override { ; } 95 | 96 | inline size_t getWorkspaceSize(int) const override { return 0; } 97 | int enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream) override; 98 | 99 | size_t getSerializationSize() override; 100 | void serialize(void* buffer) override; 101 | 102 | inline void configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int) override { ; }; 103 | 104 | protected: 105 | int select(const void*const *inputs, void** outputs); 106 | int summary(const void*const *inputs, void** outputs); 107 | int bboxExist(const float4& pos, const int idx); 108 | int tagExist(int label, int idx); 109 | 110 | size_t classNum; 111 | FunctionType type; 112 | static std::vector bboxTable; 113 | static std::vector tagTable; 114 | }; 115 | 116 | class DataRoiLayer : public IPlugin 117 | { 118 | public: 119 | DataRoiLayer() {}; 120 | DataRoiLayer(const void* buffer, size_t size); 121 | 122 | inline int getNbOutputs() const override { return 1; }; 123 | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override; 124 | 125 | int initialize() override; 126 | inline void terminate() override { ; } 127 | 128 | inline size_t getWorkspaceSize(int) const override { return 0; } 129 | int enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream) override; 130 | 131 | size_t getSerializationSize() override; 132 | void serialize(void* buffer) override; 133 | 134 | void configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int) override; 135 | 136 | protected: 137 | DimsCHW dimsData; 138 | DimsCHW dimsRoi; 139 | }; 140 | 141 | class PluginFactory : public nvinfer1::IPluginFactory, public nvcaffeparser1::IPluginFactory 142 | { 143 | public: 144 | virtual nvinfer1::IPlugin* createPlugin(const char* layerName, const nvinfer1::Weights* weights, int nbWeights) override; 145 | IPlugin* createPlugin(const char* layerName, const void* serialData, size_t serialLength) override; 146 | 147 | bool isPlugin(const char* name) override; 148 | void destroyPlugin(); 149 | 150 | std::unique_ptr mBboxMergeLayer{ nullptr }; 151 | std::unique_ptr mDataRoiLayer{ nullptr }; 152 | std::unique_ptr mSelectLayer{ nullptr }; 153 | std::unique_ptr mSummaryLayer{ nullptr }; 154 | }; 155 | 156 | #endif 157 | -------------------------------------------------------------------------------- /script/merge_model.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import caffe 3 | 4 | deploy_fd = 'detection.prototxt' 5 | deploy_fr = 'classification.prototxt' 6 | deploy_merge = 'deploy.prototxt' 7 | 8 | model_fd = 'detection.caffemodel' 9 | model_fr = 'classification.caffemodel' 10 | model_merge = 'snapshot_iter_1.caffemodel' 11 | 12 | net_fd = caffe.Net(deploy_fd,model_fd, caffe.TEST) 13 | net_fr = caffe.Net(deploy_fr,model_fr, caffe.TEST) 14 | net_merge = caffe.Net(deploy_merge,model_merge, caffe.TEST) 15 | 16 | fp1 = open(deploy_fd, 'r') 17 | fp2 = open(deploy_fr, 'r') 18 | line1 = fp1.readlines() 19 | line2 = fp2.readlines() 20 | 21 | for l in line1: 22 | tmp = l.replace(' ','') 23 | field = tmp.split(':') 24 | if( field[0]=='name'): 25 | source = field[1].split('"')[1] 26 | target = source+'_fd' 27 | try: 28 | for i in range(len(net_fd.params[source])): 29 | net_merge.params[target][i].data[...] = net_fd.params[source][i].data[...] 30 | print 'update weight: ' + target 31 | except KeyError: 32 | print 'ignore weight: ' + target 33 | 34 | for l in line2: 35 | tmp = l.replace(' ','') 36 | field = tmp.split(':') 37 | if( field[0]=='name'): 38 | source = field[1].split('"')[1] 39 | target = source+'_fr' 40 | try: 41 | for i in range(len(net_fr.params[source])): 42 | net_merge.params[target][i].data[...] = net_fr.params[source][i].data[...] 43 | print 'update weight: ' + target 44 | except KeyError: 45 | print 'ignore weight: ' + target 46 | 47 | fp1.close() 48 | fp2.close() 49 | 50 | net_merge.save('merge.caffemodel') 51 | -------------------------------------------------------------------------------- /script/rename_model.py: -------------------------------------------------------------------------------- 1 | deploy_fd = '/home/vyu/Face/JEP/script/detection.prototxt' 2 | deploy_fr = '/home/vyu/Face/JEP/script/classification.prototxt' 3 | deploy_merge = '/home/vyu/Face/JEP/script/deploy.prototxt' 4 | 5 | fp1 = open(deploy_fd, 'r') 6 | fp2 = open(deploy_fr, 'r') 7 | fp3 = open(deploy_merge, 'w') 8 | 9 | line1 = fp1.readlines() 10 | line2 = fp2.readlines() 11 | 12 | for l in line1: 13 | tmp = l.replace(' ','') 14 | field = tmp.split(':') 15 | if( field[0]=='name' or field[0]=='top' or field[0]=='bottom'): 16 | source = field[1].split('"')[1] 17 | l = l.replace(source,(source+'_fd')) 18 | print 'proto replace: ' + source 19 | fp3.write(l) 20 | 21 | for l in line2: 22 | tmp = l.replace(' ','') 23 | field = tmp.split(':') 24 | if( field[0]=='name' or field[0]=='top' or field[0]=='bottom'): 25 | source = field[1].split('"')[1] 26 | l = l.replace(source,(source+'_fr')) 27 | print 'proto replace: ' + source 28 | fp3.write(l) 29 | 30 | fp1.close() 31 | fp2.close() 32 | fp3.close() 33 | -------------------------------------------------------------------------------- /tensorNet.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "tensorNet.h" 3 | 4 | 5 | 6 | void TensorNet::caffeToTRTModel(const std::string& deployFile, 7 | const std::string& modelFile, 8 | const std::vector& outputs, 9 | unsigned int maxBatchSize) 10 | { 11 | IBuilder* builder = createInferBuilder(gLogger); 12 | INetworkDefinition* network = builder->createNetwork(); 13 | 14 | ICaffeParser* parser = createCaffeParser(); 15 | parser->setPluginFactory(&pluginFactory); 16 | 17 | bool useFp16 = builder->platformHasFastFp16(); 18 | DataType modelDataType = useFp16 ? DataType::kHALF : DataType::kFLOAT; 19 | 20 | const IBlobNameToTensor *blobNameToTensor = parser->parse(deployFile.c_str(), 21 | modelFile.c_str(), 22 | *network, 23 | modelDataType); 24 | 25 | assert(blobNameToTensor != nullptr); 26 | for (auto& s : outputs) network->markOutput(*blobNameToTensor->find(s.c_str())); 27 | 28 | builder->setMaxBatchSize(maxBatchSize); 29 | builder->setMaxWorkspaceSize(16 << 20); 30 | 31 | if(useFp16) builder->setHalf2Mode(true); 32 | 33 | ICudaEngine* engine = builder->buildCudaEngine(*network); 34 | assert(engine); 35 | 36 | network->destroy(); 37 | parser->destroy(); 38 | 39 | gieModelStream = engine->serialize(); 40 | engine->destroy(); 41 | builder->destroy(); 42 | pluginFactory.destroyPlugin(); 43 | shutdownProtobufLibrary(); 44 | } 45 | 46 | void TensorNet::createInference() 47 | { 48 | infer = createInferRuntime(gLogger); 49 | engine = infer->deserializeCudaEngine(gieModelStream->data(), gieModelStream->size(), &pluginFactory); 50 | 51 | printf("Bindings after deserializing:\n"); 52 | for (int bi = 0; bi < engine->getNbBindings(); bi++) { 53 | if (engine->bindingIsInput(bi) == true) printf("Binding %d (%s): Input.\n", bi, engine->getBindingName(bi)); 54 | else printf("Binding %d (%s): Output.\n", bi, engine->getBindingName(bi)); 55 | } 56 | } 57 | 58 | void TensorNet::imageInference(void** buffers, int nbBuffer, int batchSize) 59 | { 60 | assert(engine->getNbBindings()==nbBuffer); 61 | 62 | IExecutionContext* context = engine->createExecutionContext(); 63 | context->setProfiler(&gProfiler); 64 | context->execute(batchSize, buffers); 65 | context->destroy(); 66 | } 67 | 68 | void TensorNet::timeInference(int iteration, int batchSize) 69 | { 70 | int inputIdx = 0; 71 | size_t inputSize = 0; 72 | 73 | void* buffers[engine->getNbBindings()]; 74 | 75 | for (int b = 0; b < engine->getNbBindings(); b++) { 76 | DimsCHW dims = static_cast(engine->getBindingDimensions(b)); 77 | size_t size = batchSize * dims.c() * dims.h() * dims.w() * sizeof(float); 78 | CHECK(cudaMalloc(&buffers[b], size)); 79 | 80 | if(engine->bindingIsInput(b) == true) 81 | { 82 | inputIdx = b; 83 | inputSize = size; 84 | } 85 | } 86 | 87 | IExecutionContext* context = engine->createExecutionContext(); 88 | context->setProfiler(&gProfiler); 89 | 90 | CHECK(cudaMemset(buffers[inputIdx], 0, inputSize)); 91 | 92 | for (int i = 0; i < iteration;i++) context->execute(batchSize, buffers); 93 | 94 | context->destroy(); 95 | for (int b = 0; b < engine->getNbBindings(); b++) CHECK(cudaFree(buffers[b])); 96 | } 97 | 98 | DimsCHW TensorNet::getTensorDims(const char* name) 99 | { 100 | for (int b = 0; b < engine->getNbBindings(); b++) { 101 | if( !strcmp(name, engine->getBindingName(b)) ) 102 | return static_cast(engine->getBindingDimensions(b)); 103 | } 104 | return DimsCHW{0,0,0}; 105 | } 106 | 107 | void TensorNet::printTimes(int iteration) 108 | { 109 | gProfiler.printLayerTimes(iteration); 110 | } 111 | 112 | void TensorNet::destroy() 113 | { 114 | pluginFactory.destroyPlugin(); 115 | engine->destroy(); 116 | infer->destroy(); 117 | } 118 | -------------------------------------------------------------------------------- /tensorNet.h: -------------------------------------------------------------------------------- 1 | #include "pluginImplement.h" 2 | 3 | using namespace nvinfer1; 4 | using namespace nvcaffeparser1; 5 | 6 | 7 | 8 | /******************************/ 9 | // TensorRT utility 10 | /******************************/ 11 | class Logger : public ILogger 12 | { 13 | void log(Severity severity, const char* msg) override 14 | { 15 | if (severity!=Severity::kINFO) std::cout << msg << std::endl; 16 | } 17 | }; 18 | 19 | struct Profiler : public IProfiler 20 | { 21 | typedef std::pair Record; 22 | std::vector mProfile; 23 | 24 | virtual void reportLayerTime(const char* layerName, float ms) 25 | { 26 | auto record = std::find_if(mProfile.begin(), mProfile.end(), [&](const Record& r){ return r.first == layerName; }); 27 | 28 | if (record == mProfile.end()) mProfile.push_back(std::make_pair(layerName, ms)); 29 | else record->second += ms; 30 | } 31 | 32 | void printLayerTimes(const int TIMING_ITERATIONS) 33 | { 34 | float totalTime = 0; 35 | for (size_t i = 0; i < mProfile.size(); i++) 36 | { 37 | printf("%-40.40s %4.3fms\n", mProfile[i].first.c_str(), mProfile[i].second / TIMING_ITERATIONS); 38 | totalTime += mProfile[i].second; 39 | } 40 | printf("Time over all layers: %4.3f\n", totalTime / TIMING_ITERATIONS); 41 | } 42 | }; 43 | 44 | 45 | 46 | /******************************/ 47 | // TensorRT Main 48 | /******************************/ 49 | class TensorNet 50 | { 51 | public: 52 | void caffeToTRTModel(const std::string& deployFile, 53 | const std::string& modelFile, 54 | const std::vector& outputs, 55 | unsigned int maxBatchSize); 56 | void createInference(); 57 | 58 | void imageInference(void** buffers, int nbBuffer, int batchSize); 59 | void timeInference(int iteration, int batchSize); 60 | 61 | DimsCHW getTensorDims(const char* name); 62 | 63 | void printTimes(int iteration); 64 | void destroy(); 65 | 66 | private: 67 | PluginFactory pluginFactory; 68 | IHostMemory *gieModelStream{nullptr}; 69 | 70 | IRuntime* infer; 71 | ICudaEngine* engine; 72 | 73 | Logger gLogger; 74 | Profiler gProfiler; 75 | }; 76 | -------------------------------------------------------------------------------- /util/camera/gst-camera/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | file(GLOB gstCameraSources *.cpp) 3 | file(GLOB gstCameraIncludes *.h ) 4 | 5 | add_executable(gst-camera ${gstCameraSources}) 6 | target_link_libraries(gst-camera jetson-inference) 7 | -------------------------------------------------------------------------------- /util/camera/gst-camera/gst-camera.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #include "gstCamera.h" 6 | 7 | #include "glDisplay.h" 8 | #include "glTexture.h" 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include "cudaNormalize.h" 15 | 16 | 17 | bool signal_recieved = false; 18 | 19 | void sig_handler(int signo) 20 | { 21 | if( signo == SIGINT ) 22 | { 23 | printf("received SIGINT\n"); 24 | signal_recieved = true; 25 | } 26 | } 27 | 28 | 29 | int main( int argc, char** argv ) 30 | { 31 | printf("gst-camera\n args (%i): ", argc); 32 | 33 | for( int i=0; i < argc; i++ ) 34 | printf("%i [%s] ", i, argv[i]); 35 | 36 | printf("\n"); 37 | 38 | 39 | if( signal(SIGINT, sig_handler) == SIG_ERR ) 40 | printf("\ncan't catch SIGINT\n"); 41 | 42 | /* 43 | * create the camera device 44 | */ 45 | gstCamera* camera = gstCamera::Create(); 46 | 47 | if( !camera ) 48 | { 49 | printf("\ngst-camera: failed to initialize video device\n"); 50 | return 0; 51 | } 52 | 53 | printf("\ngst-camera: successfully initialized video device\n"); 54 | printf(" width: %u\n", camera->GetWidth()); 55 | printf(" height: %u\n", camera->GetHeight()); 56 | printf(" depth: %u (bpp)\n", camera->GetPixelDepth()); 57 | 58 | 59 | 60 | /* 61 | * create openGL window 62 | */ 63 | glDisplay* display = glDisplay::Create(); 64 | 65 | if( !display ) 66 | printf("\ngst-camera: failed to create openGL display\n"); 67 | 68 | const size_t texSz = camera->GetWidth() * camera->GetHeight() * sizeof(float4); 69 | float4* texIn = (float4*)malloc(texSz); 70 | 71 | /*if( texIn != NULL ) 72 | memset(texIn, 0, texSz);*/ 73 | 74 | if( texIn != NULL ) 75 | for( uint32_t y=0; y < camera->GetHeight(); y++ ) 76 | for( uint32_t x=0; x < camera->GetWidth(); x++ ) 77 | texIn[y*camera->GetWidth()+x] = make_float4(0.0f, 1.0f, 1.0f, 1.0f); 78 | 79 | glTexture* texture = glTexture::Create(camera->GetWidth(), camera->GetHeight(), GL_RGBA32F_ARB/*GL_RGBA8*/, texIn); 80 | 81 | if( !texture ) 82 | printf("gst-camera: failed to create openGL texture\n"); 83 | 84 | 85 | 86 | /* 87 | * start streaming 88 | */ 89 | if( !camera->Open() ) 90 | { 91 | printf("\ngst-camera: failed to open camera for streaming\n"); 92 | return 0; 93 | } 94 | 95 | printf("\ngst-camera: camera open for streaming\n"); 96 | 97 | 98 | while( !signal_recieved ) 99 | { 100 | void* imgCPU = NULL; 101 | void* imgCUDA = NULL; 102 | 103 | // get the latest frame 104 | if( !camera->Capture(&imgCPU, &imgCUDA, 1000) ) 105 | printf("\ngst-camera: failed to capture frame\n"); 106 | else 107 | printf("gst-camera: recieved new frame CPU=0x%p GPU=0x%p\n", imgCPU, imgCUDA); 108 | 109 | // convert from YUV to RGBA 110 | void* imgRGBA = NULL; 111 | 112 | if( !camera->ConvertRGBA(imgCUDA, &imgRGBA) ) 113 | printf("gst-camera: failed to convert from NV12 to RGBA\n"); 114 | 115 | // rescale image pixel intensities 116 | CUDA(cudaNormalizeRGBA((float4*)imgRGBA, make_float2(0.0f, 255.0f), 117 | (float4*)imgRGBA, make_float2(0.0f, 1.0f), 118 | camera->GetWidth(), camera->GetHeight())); 119 | 120 | // update display 121 | if( display != NULL ) 122 | { 123 | display->UserEvents(); 124 | display->BeginRender(); 125 | 126 | if( texture != NULL ) 127 | { 128 | void* tex_map = texture->MapCUDA(); 129 | 130 | if( tex_map != NULL ) 131 | { 132 | cudaMemcpy(tex_map, imgRGBA, texture->GetSize(), cudaMemcpyDeviceToDevice); 133 | CUDA(cudaDeviceSynchronize()); 134 | 135 | texture->Unmap(); 136 | } 137 | //texture->UploadCPU(texIn); 138 | 139 | texture->Render(100,100); 140 | } 141 | 142 | display->EndRender(); 143 | } 144 | } 145 | 146 | printf("\ngst-camera: un-initializing video device\n"); 147 | 148 | 149 | /* 150 | * shutdown the camera device 151 | */ 152 | if( camera != NULL ) 153 | { 154 | delete camera; 155 | camera = NULL; 156 | } 157 | 158 | if( display != NULL ) 159 | { 160 | delete display; 161 | display = NULL; 162 | } 163 | 164 | printf("gst-camera: video device has been un-initialized.\n"); 165 | printf("gst-camera: this concludes the test of the video device.\n"); 166 | return 0; 167 | } 168 | -------------------------------------------------------------------------------- /util/camera/gstCamera.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #include "gstCamera.h" 6 | #include "gstUtility.h" 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | #include "cudaMappedMemory.h" 19 | #include "cudaYUV.h" 20 | #include "cudaRGB.h" 21 | 22 | 23 | 24 | // constructor 25 | gstCamera::gstCamera() 26 | { 27 | mAppSink = NULL; 28 | mBus = NULL; 29 | mPipeline = NULL; 30 | mV4L2Device = -1; 31 | 32 | mWidth = 0; 33 | mHeight = 0; 34 | mDepth = 0; 35 | mSize = 0; 36 | 37 | mWaitEvent = new QWaitCondition(); 38 | mWaitMutex = new QMutex(); 39 | mRingMutex = new QMutex(); 40 | 41 | mLatestRGBA = 0; 42 | mLatestRingbuffer = 0; 43 | mLatestRetrieved = false; 44 | 45 | for( uint32_t n=0; n < NUM_RINGBUFFERS; n++ ) 46 | { 47 | mRingbufferCPU[n] = NULL; 48 | mRingbufferGPU[n] = NULL; 49 | mRGBA[n] = NULL; 50 | } 51 | } 52 | 53 | 54 | // destructor 55 | gstCamera::~gstCamera() 56 | { 57 | 58 | } 59 | 60 | 61 | // ConvertRGBA 62 | bool gstCamera::ConvertRGBA( void* input, void** output, bool zeroCopy ) 63 | { 64 | if( !input || !output ) 65 | return false; 66 | 67 | if( !mRGBA[0] ) 68 | { 69 | const size_t size = mWidth * mHeight * sizeof(float4); 70 | 71 | for( uint32_t n=0; n < NUM_RINGBUFFERS; n++ ) 72 | { 73 | if( zeroCopy ) 74 | { 75 | void* cpuPtr = NULL; 76 | void* gpuPtr = NULL; 77 | 78 | if( !cudaAllocMapped(&cpuPtr, &gpuPtr, size) ) 79 | { 80 | printf(LOG_CUDA "gstCamera -- failed to allocate zeroCopy memory for %ux%xu RGBA texture\n", mWidth, mHeight); 81 | return false; 82 | } 83 | 84 | if( cpuPtr != gpuPtr ) 85 | { 86 | printf(LOG_CUDA "gstCamera -- zeroCopy memory has different pointers, please use a UVA-compatible GPU\n"); 87 | return false; 88 | } 89 | 90 | mRGBA[n] = gpuPtr; 91 | } 92 | else 93 | { 94 | if( CUDA_FAILED(cudaMalloc(&mRGBA[n], size)) ) 95 | { 96 | printf(LOG_CUDA "gstCamera -- failed to allocate memory for %ux%u RGBA texture\n", mWidth, mHeight); 97 | return false; 98 | } 99 | } 100 | } 101 | 102 | printf(LOG_CUDA "gstreamer camera -- allocated %u RGBA ringbuffers\n", NUM_RINGBUFFERS); 103 | } 104 | 105 | if( onboardCamera() ) 106 | { 107 | // onboard camera is NV12 108 | if( CUDA_FAILED(cudaNV12ToRGBAf((uint8_t*)input, (float4*)mRGBA[mLatestRGBA], mWidth, mHeight)) ) 109 | return false; 110 | } 111 | else 112 | { 113 | // USB webcam is RGB 114 | if( CUDA_FAILED(cudaRGBToRGBAf((uchar3*)input, (float4*)mRGBA[mLatestRGBA], mWidth, mHeight)) ) 115 | return false; 116 | } 117 | 118 | *output = mRGBA[mLatestRGBA]; 119 | mLatestRGBA = (mLatestRGBA + 1) % NUM_RINGBUFFERS; 120 | return true; 121 | } 122 | 123 | 124 | // onEOS 125 | void gstCamera::onEOS(_GstAppSink* sink, void* user_data) 126 | { 127 | printf(LOG_GSTREAMER "gstreamer decoder onEOS\n"); 128 | } 129 | 130 | 131 | // onPreroll 132 | GstFlowReturn gstCamera::onPreroll(_GstAppSink* sink, void* user_data) 133 | { 134 | printf(LOG_GSTREAMER "gstreamer decoder onPreroll\n"); 135 | return GST_FLOW_OK; 136 | } 137 | 138 | 139 | // onBuffer 140 | GstFlowReturn gstCamera::onBuffer(_GstAppSink* sink, void* user_data) 141 | { 142 | //printf(LOG_GSTREAMER "gstreamer decoder onBuffer\n"); 143 | 144 | if( !user_data ) 145 | return GST_FLOW_OK; 146 | 147 | gstCamera* dec = (gstCamera*)user_data; 148 | 149 | dec->checkBuffer(); 150 | dec->checkMsgBus(); 151 | return GST_FLOW_OK; 152 | } 153 | 154 | 155 | // Capture 156 | bool gstCamera::Capture( void** cpu, void** cuda, unsigned long timeout ) 157 | { 158 | mWaitMutex->lock(); 159 | const bool wait_result = mWaitEvent->wait(mWaitMutex, timeout); 160 | mWaitMutex->unlock(); 161 | 162 | if( !wait_result ) 163 | return false; 164 | 165 | mRingMutex->lock(); 166 | const uint32_t latest = mLatestRingbuffer; 167 | const bool retrieved = mLatestRetrieved; 168 | mLatestRetrieved = true; 169 | mRingMutex->unlock(); 170 | 171 | // skip if it was already retrieved 172 | if( retrieved ) 173 | return false; 174 | 175 | if( cpu != NULL ) 176 | *cpu = mRingbufferCPU[latest]; 177 | 178 | if( cuda != NULL ) 179 | *cuda = mRingbufferGPU[latest]; 180 | 181 | return true; 182 | } 183 | 184 | 185 | #define release_return { gst_sample_unref(gstSample); return; } 186 | 187 | 188 | // checkBuffer 189 | void gstCamera::checkBuffer() 190 | { 191 | if( !mAppSink ) 192 | return; 193 | 194 | // block waiting for the buffer 195 | GstSample* gstSample = gst_app_sink_pull_sample(mAppSink); 196 | 197 | if( !gstSample ) 198 | { 199 | printf(LOG_GSTREAMER "gstreamer camera -- gst_app_sink_pull_sample() returned NULL...\n"); 200 | return; 201 | } 202 | 203 | GstBuffer* gstBuffer = gst_sample_get_buffer(gstSample); 204 | 205 | if( !gstBuffer ) 206 | { 207 | printf(LOG_GSTREAMER "gstreamer camera -- gst_sample_get_buffer() returned NULL...\n"); 208 | return; 209 | } 210 | 211 | // retrieve 212 | GstMapInfo map; 213 | 214 | if( !gst_buffer_map(gstBuffer, &map, GST_MAP_READ) ) 215 | { 216 | printf(LOG_GSTREAMER "gstreamer camera -- gst_buffer_map() failed...\n"); 217 | return; 218 | } 219 | 220 | //gst_util_dump_mem(map.data, map.size); 221 | 222 | void* gstData = map.data; //GST_BUFFER_DATA(gstBuffer); 223 | const uint32_t gstSize = map.size; //GST_BUFFER_SIZE(gstBuffer); 224 | 225 | if( !gstData ) 226 | { 227 | printf(LOG_GSTREAMER "gstreamer camera -- gst_buffer had NULL data pointer...\n"); 228 | release_return; 229 | } 230 | 231 | // retrieve caps 232 | GstCaps* gstCaps = gst_sample_get_caps(gstSample); 233 | 234 | if( !gstCaps ) 235 | { 236 | printf(LOG_GSTREAMER "gstreamer camera -- gst_buffer had NULL caps...\n"); 237 | release_return; 238 | } 239 | 240 | GstStructure* gstCapsStruct = gst_caps_get_structure(gstCaps, 0); 241 | 242 | if( !gstCapsStruct ) 243 | { 244 | printf(LOG_GSTREAMER "gstreamer camera -- gst_caps had NULL structure...\n"); 245 | release_return; 246 | } 247 | 248 | // get width & height of the buffer 249 | int width = 0; 250 | int height = 0; 251 | 252 | if( !gst_structure_get_int(gstCapsStruct, "width", &width) || 253 | !gst_structure_get_int(gstCapsStruct, "height", &height) ) 254 | { 255 | printf(LOG_GSTREAMER "gstreamer camera -- gst_caps missing width/height...\n"); 256 | release_return; 257 | } 258 | 259 | if( width < 1 || height < 1 ) 260 | release_return; 261 | 262 | mWidth = width; 263 | mHeight = height; 264 | mDepth = (gstSize * 8) / (width * height); 265 | mSize = gstSize; 266 | 267 | //printf(LOG_GSTREAMER "gstreamer camera recieved %ix%i frame (%u bytes, %u bpp)\n", width, height, gstSize, mDepth); 268 | 269 | // make sure ringbuffer is allocated 270 | if( !mRingbufferCPU[0] ) 271 | { 272 | for( uint32_t n=0; n < NUM_RINGBUFFERS; n++ ) 273 | { 274 | if( !cudaAllocMapped(&mRingbufferCPU[n], &mRingbufferGPU[n], gstSize) ) 275 | printf(LOG_CUDA "gstreamer camera -- failed to allocate ringbuffer %u (size=%u)\n", n, gstSize); 276 | } 277 | 278 | printf(LOG_CUDA "gstreamer camera -- allocated %u ringbuffers, %u bytes each\n", NUM_RINGBUFFERS, gstSize); 279 | } 280 | 281 | // copy to next ringbuffer 282 | const uint32_t nextRingbuffer = (mLatestRingbuffer + 1) % NUM_RINGBUFFERS; 283 | 284 | //printf(LOG_GSTREAMER "gstreamer camera -- using ringbuffer #%u for next frame\n", nextRingbuffer); 285 | memcpy(mRingbufferCPU[nextRingbuffer], gstData, gstSize); 286 | gst_buffer_unmap(gstBuffer, &map); 287 | //gst_buffer_unref(gstBuffer); 288 | gst_sample_unref(gstSample); 289 | 290 | 291 | // update and signal sleeping threads 292 | mRingMutex->lock(); 293 | mLatestRingbuffer = nextRingbuffer; 294 | mLatestRetrieved = false; 295 | mRingMutex->unlock(); 296 | mWaitEvent->wakeAll(); 297 | } 298 | 299 | 300 | 301 | // buildLaunchStr 302 | bool gstCamera::buildLaunchStr() 303 | { 304 | // gst-launch-1.0 nvcamerasrc fpsRange="30.0 30.0" ! 'video/x-raw(memory:NVMM), width=(int)1920, height=(int)1080, format=(string)I420, framerate=(fraction)30/1' ! \ 305 | // nvvidconv flip-method=2 ! 'video/x-raw(memory:NVMM), format=(string)I420' ! fakesink silent=false -v 306 | std::ostringstream ss; 307 | 308 | //#define CAPS_STR "video/x-raw(memory:NVMM), width=(int)2592, height=(int)1944, format=(string)I420, framerate=(fraction)30/1" 309 | //#define CAPS_STR "video/x-raw(memory:NVMM), width=(int)1920, height=(int)1080, format=(string)I420, framerate=(fraction)30/1" 310 | 311 | if( onboardCamera() ) 312 | { 313 | ss << "nvcamerasrc fpsRange=\"30.0 30.0\" ! video/x-raw(memory:NVMM), width=(int)" << mWidth << ", height=(int)" << mHeight << ", format=(string)NV12 ! nvvidconv flip-method=2 ! "; //'video/x-raw(memory:NVMM), width=(int)1920, height=(int)1080, format=(string)I420, framerate=(fraction)30/1' ! "; 314 | ss << "video/x-raw ! appsink name=mysink"; 315 | } 316 | else 317 | { 318 | ss << "v4l2src device=/dev/video" << mV4L2Device << " ! "; 319 | ss << "video/x-raw, width=(int)" << mWidth << ", height=(int)" << mHeight << ", "; 320 | ss << "format=RGB ! videoconvert ! video/x-raw, format=RGB ! videoconvert !"; 321 | ss << "appsink name=mysink"; 322 | } 323 | 324 | mLaunchStr = ss.str(); 325 | 326 | printf(LOG_GSTREAMER "gstreamer decoder pipeline string:\n"); 327 | printf("%s\n", mLaunchStr.c_str()); 328 | return true; 329 | } 330 | 331 | 332 | // Create 333 | gstCamera* gstCamera::Create( uint32_t width, uint32_t height, int v4l2_device ) 334 | { 335 | if( !gstreamerInit() ) 336 | { 337 | printf(LOG_GSTREAMER "failed to initialize gstreamer API\n"); 338 | return NULL; 339 | } 340 | 341 | gstCamera* cam = new gstCamera(); 342 | 343 | if( !cam ) 344 | return NULL; 345 | 346 | cam->mV4L2Device = v4l2_device; 347 | cam->mWidth = width; 348 | cam->mHeight = height; 349 | cam->mDepth = cam->onboardCamera() ? 12 : 24; // NV12 or RGB 350 | cam->mSize = (width * height * cam->mDepth) / 8; 351 | 352 | if( !cam->init() ) 353 | { 354 | printf(LOG_GSTREAMER "failed to init gstCamera\n"); 355 | return NULL; 356 | } 357 | 358 | return cam; 359 | } 360 | 361 | 362 | // Create 363 | gstCamera* gstCamera::Create( int v4l2_device ) 364 | { 365 | return Create( DefaultWidth, DefaultHeight, v4l2_device ); 366 | } 367 | 368 | 369 | // init 370 | bool gstCamera::init() 371 | { 372 | GError* err = NULL; 373 | 374 | // build pipeline string 375 | if( !buildLaunchStr() ) 376 | { 377 | printf(LOG_GSTREAMER "gstreamer decoder failed to build pipeline string\n"); 378 | return false; 379 | } 380 | 381 | // launch pipeline 382 | mPipeline = gst_parse_launch(mLaunchStr.c_str(), &err); 383 | 384 | if( err != NULL ) 385 | { 386 | printf(LOG_GSTREAMER "gstreamer decoder failed to create pipeline\n"); 387 | printf(LOG_GSTREAMER " (%s)\n", err->message); 388 | g_error_free(err); 389 | return false; 390 | } 391 | 392 | GstPipeline* pipeline = GST_PIPELINE(mPipeline); 393 | 394 | if( !pipeline ) 395 | { 396 | printf(LOG_GSTREAMER "gstreamer failed to cast GstElement into GstPipeline\n"); 397 | return false; 398 | } 399 | 400 | // retrieve pipeline bus 401 | /*GstBus**/ mBus = gst_pipeline_get_bus(pipeline); 402 | 403 | if( !mBus ) 404 | { 405 | printf(LOG_GSTREAMER "gstreamer failed to retrieve GstBus from pipeline\n"); 406 | return false; 407 | } 408 | 409 | // add watch for messages (disabled when we poll the bus ourselves, instead of gmainloop) 410 | //gst_bus_add_watch(mBus, (GstBusFunc)gst_message_print, NULL); 411 | 412 | // get the appsrc 413 | GstElement* appsinkElement = gst_bin_get_by_name(GST_BIN(pipeline), "mysink"); 414 | GstAppSink* appsink = GST_APP_SINK(appsinkElement); 415 | 416 | if( !appsinkElement || !appsink) 417 | { 418 | printf(LOG_GSTREAMER "gstreamer failed to retrieve AppSink element from pipeline\n"); 419 | return false; 420 | } 421 | 422 | mAppSink = appsink; 423 | 424 | // setup callbacks 425 | GstAppSinkCallbacks cb; 426 | memset(&cb, 0, sizeof(GstAppSinkCallbacks)); 427 | 428 | cb.eos = onEOS; 429 | cb.new_preroll = onPreroll; 430 | cb.new_sample = onBuffer; 431 | 432 | gst_app_sink_set_callbacks(mAppSink, &cb, (void*)this, NULL); 433 | 434 | return true; 435 | } 436 | 437 | 438 | // Open 439 | bool gstCamera::Open() 440 | { 441 | // transition pipline to STATE_PLAYING 442 | printf(LOG_GSTREAMER "gstreamer transitioning pipeline to GST_STATE_PLAYING\n"); 443 | 444 | const GstStateChangeReturn result = gst_element_set_state(mPipeline, GST_STATE_PLAYING); 445 | 446 | if( result == GST_STATE_CHANGE_ASYNC ) 447 | { 448 | #if 0 449 | GstMessage* asyncMsg = gst_bus_timed_pop_filtered(mBus, 5 * GST_SECOND, 450 | (GstMessageType)(GST_MESSAGE_ASYNC_DONE|GST_MESSAGE_ERROR)); 451 | 452 | if( asyncMsg != NULL ) 453 | { 454 | gst_message_print(mBus, asyncMsg, this); 455 | gst_message_unref(asyncMsg); 456 | } 457 | else 458 | printf(LOG_GSTREAMER "gstreamer NULL message after transitioning pipeline to PLAYING...\n"); 459 | #endif 460 | } 461 | else if( result != GST_STATE_CHANGE_SUCCESS ) 462 | { 463 | printf(LOG_GSTREAMER "gstreamer failed to set pipeline state to PLAYING (error %u)\n", result); 464 | return false; 465 | } 466 | 467 | checkMsgBus(); 468 | usleep(100*1000); 469 | checkMsgBus(); 470 | 471 | return true; 472 | } 473 | 474 | 475 | // Close 476 | void gstCamera::Close() 477 | { 478 | // stop pipeline 479 | printf(LOG_GSTREAMER "gstreamer transitioning pipeline to GST_STATE_NULL\n"); 480 | 481 | const GstStateChangeReturn result = gst_element_set_state(mPipeline, GST_STATE_NULL); 482 | 483 | if( result != GST_STATE_CHANGE_SUCCESS ) 484 | printf(LOG_GSTREAMER "gstreamer failed to set pipeline state to PLAYING (error %u)\n", result); 485 | 486 | usleep(250*1000); 487 | } 488 | 489 | 490 | // checkMsgBus 491 | void gstCamera::checkMsgBus() 492 | { 493 | while(true) 494 | { 495 | GstMessage* msg = gst_bus_pop(mBus); 496 | 497 | if( !msg ) 498 | break; 499 | 500 | gst_message_print(mBus, msg, this); 501 | gst_message_unref(msg); 502 | } 503 | } 504 | -------------------------------------------------------------------------------- /util/camera/gstCamera.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #ifndef __GSTREAMER_CAMERA_H__ 6 | #define __GSTREAMER_CAMERA_H__ 7 | 8 | #include 9 | #include 10 | 11 | 12 | struct _GstAppSink; 13 | class QWaitCondition; 14 | class QMutex; 15 | 16 | 17 | /** 18 | * gstreamer CSI camera using nvcamerasrc (or optionally v4l2src) 19 | * @ingroup util 20 | */ 21 | class gstCamera 22 | { 23 | public: 24 | // Create camera 25 | static gstCamera* Create( int v4l2_device=-1 ); // use onboard camera by default (>=0 for V4L2) 26 | static gstCamera* Create( uint32_t width, uint32_t height, int v4l2_device=-1 ); 27 | 28 | // Destroy 29 | ~gstCamera(); 30 | 31 | // Start/stop streaming 32 | bool Open(); 33 | void Close(); 34 | 35 | // Capture YUV (NV12) 36 | bool Capture( void** cpu, void** cuda, unsigned long timeout=ULONG_MAX ); 37 | 38 | // Takes in captured YUV-NV12 CUDA image, converts to float4 RGBA (with pixel intensity 0-255) 39 | // Set zeroCopy to true if you need to access ConvertRGBA from CPU, otherwise it will be CUDA only. 40 | bool ConvertRGBA( void* input, void** output, bool zeroCopy=false ); 41 | 42 | // Image dimensions 43 | inline uint32_t GetWidth() const { return mWidth; } 44 | inline uint32_t GetHeight() const { return mHeight; } 45 | inline uint32_t GetPixelDepth() const { return mDepth; } 46 | inline uint32_t GetSize() const { return mSize; } 47 | 48 | // Default resolution, unless otherwise specified during Create() 49 | static const uint32_t DefaultWidth = 1280; 50 | static const uint32_t DefaultHeight = 720; 51 | 52 | private: 53 | static void onEOS(_GstAppSink* sink, void* user_data); 54 | static GstFlowReturn onPreroll(_GstAppSink* sink, void* user_data); 55 | static GstFlowReturn onBuffer(_GstAppSink* sink, void* user_data); 56 | 57 | gstCamera(); 58 | 59 | bool init(); 60 | bool buildLaunchStr(); 61 | void checkMsgBus(); 62 | void checkBuffer(); 63 | 64 | _GstBus* mBus; 65 | _GstAppSink* mAppSink; 66 | _GstElement* mPipeline; 67 | 68 | std::string mLaunchStr; 69 | 70 | uint32_t mWidth; 71 | uint32_t mHeight; 72 | uint32_t mDepth; 73 | uint32_t mSize; 74 | 75 | static const uint32_t NUM_RINGBUFFERS = 16; 76 | 77 | void* mRingbufferCPU[NUM_RINGBUFFERS]; 78 | void* mRingbufferGPU[NUM_RINGBUFFERS]; 79 | 80 | QWaitCondition* mWaitEvent; 81 | 82 | QMutex* mWaitMutex; 83 | QMutex* mRingMutex; 84 | 85 | uint32_t mLatestRGBA; 86 | uint32_t mLatestRingbuffer; 87 | bool mLatestRetrieved; 88 | 89 | void* mRGBA[NUM_RINGBUFFERS]; 90 | int mV4L2Device; // -1 for onboard, >=0 for V4L2 device 91 | 92 | inline bool onboardCamera() const { return (mV4L2Device < 0); } 93 | }; 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /util/camera/gstUtility.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #include "gstUtility.h" 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | inline const char* gst_debug_level_str( GstDebugLevel level ) 13 | { 14 | switch (level) 15 | { 16 | case GST_LEVEL_NONE: return "GST_LEVEL_NONE "; 17 | case GST_LEVEL_ERROR: return "GST_LEVEL_ERROR "; 18 | case GST_LEVEL_WARNING: return "GST_LEVEL_WARNING"; 19 | case GST_LEVEL_INFO: return "GST_LEVEL_INFO "; 20 | case GST_LEVEL_DEBUG: return "GST_LEVEL_DEBUG "; 21 | case GST_LEVEL_LOG: return "GST_LEVEL_LOG "; 22 | case GST_LEVEL_FIXME: return "GST_LEVEL_FIXME "; 23 | #ifdef GST_LEVEL_TRACE 24 | case GST_LEVEL_TRACE: return "GST_LEVEL_TRACE "; 25 | #endif 26 | case GST_LEVEL_MEMDUMP: return "GST_LEVEL_MEMDUMP"; 27 | default: return " "; 28 | } 29 | } 30 | 31 | #define SEP " " 32 | 33 | void rilog_debug_function(GstDebugCategory* category, GstDebugLevel level, 34 | const gchar* file, const char* function, 35 | gint line, GObject* object, GstDebugMessage* message, 36 | gpointer data) 37 | { 38 | if( level > GST_LEVEL_WARNING /*GST_LEVEL_INFO*/ ) 39 | return; 40 | 41 | //gchar* name = NULL; 42 | //if( object != NULL ) 43 | // g_object_get(object, "name", &name, NULL); 44 | 45 | const char* typeName = " "; 46 | const char* className = " "; 47 | 48 | if( object != NULL ) 49 | { 50 | typeName = G_OBJECT_TYPE_NAME(object); 51 | className = G_OBJECT_CLASS_NAME(object); 52 | } 53 | 54 | printf(LOG_GSTREAMER "%s %s %s\n" SEP "%s:%i %s\n" SEP "%s\n", 55 | gst_debug_level_str(level), typeName, 56 | gst_debug_category_get_name(category), file, line, function, 57 | gst_debug_message_get(message)); 58 | 59 | } 60 | 61 | 62 | bool gstreamerInit() 63 | { 64 | int argc = 0; 65 | //char* argv[] = { "none" }; 66 | 67 | if( !gst_init_check(&argc, NULL, NULL) ) 68 | { 69 | printf(LOG_GSTREAMER "failed to initialize gstreamer library with gst_init()\n"); 70 | return false; 71 | } 72 | 73 | uint32_t ver[] = { 0, 0, 0, 0 }; 74 | gst_version( &ver[0], &ver[1], &ver[2], &ver[3] ); 75 | 76 | printf(LOG_GSTREAMER "initialized gstreamer, version %u.%u.%u.%u\n", ver[0], ver[1], ver[2], ver[3]); 77 | 78 | 79 | // debugging 80 | gst_debug_remove_log_function(gst_debug_log_default); 81 | 82 | if( true ) 83 | { 84 | gst_debug_add_log_function(rilog_debug_function, NULL, NULL); 85 | 86 | gst_debug_set_active(true); 87 | gst_debug_set_colored(false); 88 | } 89 | 90 | return true; 91 | } 92 | //--------------------------------------------------------------------------------------------- 93 | 94 | static void gst_print_one_tag(const GstTagList * list, const gchar * tag, gpointer user_data) 95 | { 96 | int i, num; 97 | 98 | num = gst_tag_list_get_tag_size (list, tag); 99 | for (i = 0; i < num; ++i) { 100 | const GValue *val; 101 | 102 | /* Note: when looking for specific tags, use the gst_tag_list_get_xyz() API, 103 | * we only use the GValue approach here because it is more generic */ 104 | val = gst_tag_list_get_value_index (list, tag, i); 105 | if (G_VALUE_HOLDS_STRING (val)) { 106 | printf("\t%20s : %s\n", tag, g_value_get_string (val)); 107 | } else if (G_VALUE_HOLDS_UINT (val)) { 108 | printf("\t%20s : %u\n", tag, g_value_get_uint (val)); 109 | } else if (G_VALUE_HOLDS_DOUBLE (val)) { 110 | printf("\t%20s : %g\n", tag, g_value_get_double (val)); 111 | } else if (G_VALUE_HOLDS_BOOLEAN (val)) { 112 | printf("\t%20s : %s\n", tag, 113 | (g_value_get_boolean (val)) ? "true" : "false"); 114 | } else if (GST_VALUE_HOLDS_BUFFER (val)) { 115 | //GstBuffer *buf = gst_value_get_buffer (val); 116 | //guint buffer_size = GST_BUFFER_SIZE(buf); 117 | 118 | printf("\t%20s : buffer of size %u\n", tag, /*buffer_size*/0); 119 | } /*else if (GST_VALUE_HOLDS_DATE_TIME (val)) { 120 | GstDateTime *dt = (GstDateTime*)g_value_get_boxed (val); 121 | gchar *dt_str = gst_date_time_to_iso8601_string (dt); 122 | 123 | printf("\t%20s : %s\n", tag, dt_str); 124 | g_free (dt_str); 125 | }*/ else { 126 | printf("\t%20s : tag of type '%s'\n", tag, G_VALUE_TYPE_NAME (val)); 127 | } 128 | } 129 | } 130 | 131 | static const char* gst_stream_status_string( GstStreamStatusType status ) 132 | { 133 | switch(status) 134 | { 135 | case GST_STREAM_STATUS_TYPE_CREATE: return "CREATE"; 136 | case GST_STREAM_STATUS_TYPE_ENTER: return "ENTER"; 137 | case GST_STREAM_STATUS_TYPE_LEAVE: return "LEAVE"; 138 | case GST_STREAM_STATUS_TYPE_DESTROY: return "DESTROY"; 139 | case GST_STREAM_STATUS_TYPE_START: return "START"; 140 | case GST_STREAM_STATUS_TYPE_PAUSE: return "PAUSE"; 141 | case GST_STREAM_STATUS_TYPE_STOP: return "STOP"; 142 | default: return "UNKNOWN"; 143 | } 144 | } 145 | 146 | // gst_message_print 147 | gboolean gst_message_print(GstBus* bus, GstMessage* message, gpointer user_data) 148 | { 149 | 150 | switch (GST_MESSAGE_TYPE (message)) 151 | { 152 | case GST_MESSAGE_ERROR: 153 | { 154 | GError *err = NULL; 155 | gchar *dbg_info = NULL; 156 | 157 | gst_message_parse_error (message, &err, &dbg_info); 158 | printf(LOG_GSTREAMER "gstreamer %s ERROR %s\n", GST_OBJECT_NAME (message->src), err->message); 159 | printf(LOG_GSTREAMER "gstreamer Debugging info: %s\n", (dbg_info) ? dbg_info : "none"); 160 | 161 | g_error_free(err); 162 | g_free(dbg_info); 163 | //g_main_loop_quit (app->loop); 164 | break; 165 | } 166 | case GST_MESSAGE_EOS: 167 | { 168 | printf(LOG_GSTREAMER "gstreamer %s recieved EOS signal...\n", GST_OBJECT_NAME(message->src)); 169 | //g_main_loop_quit (app->loop); // TODO trigger plugin Close() upon error 170 | break; 171 | } 172 | case GST_MESSAGE_STATE_CHANGED: 173 | { 174 | GstState old_state, new_state; 175 | 176 | gst_message_parse_state_changed(message, &old_state, &new_state, NULL); 177 | 178 | printf(LOG_GSTREAMER "gstreamer changed state from %s to %s ==> %s\n", 179 | gst_element_state_get_name(old_state), 180 | gst_element_state_get_name(new_state), 181 | GST_OBJECT_NAME(message->src)); 182 | break; 183 | } 184 | case GST_MESSAGE_STREAM_STATUS: 185 | { 186 | GstStreamStatusType streamStatus; 187 | gst_message_parse_stream_status(message, &streamStatus, NULL); 188 | 189 | printf(LOG_GSTREAMER "gstreamer stream status %s ==> %s\n", 190 | gst_stream_status_string(streamStatus), 191 | GST_OBJECT_NAME(message->src)); 192 | break; 193 | } 194 | case GST_MESSAGE_TAG: 195 | { 196 | GstTagList *tags = NULL; 197 | 198 | gst_message_parse_tag(message, &tags); 199 | 200 | #ifdef gst_tag_list_to_string 201 | gchar* txt = gst_tag_list_to_string(tags); 202 | #else 203 | gchar* txt = "missing gst_tag_list_to_string()"; 204 | #endif 205 | 206 | printf(LOG_GSTREAMER "gstreamer %s %s\n", GST_OBJECT_NAME(message->src), txt); 207 | 208 | g_free(txt); 209 | //gst_tag_list_foreach(tags, gst_print_one_tag, NULL); 210 | gst_tag_list_free(tags); 211 | break; 212 | } 213 | default: 214 | { 215 | printf(LOG_GSTREAMER "gstreamer msg %s ==> %s\n", gst_message_type_get_name(GST_MESSAGE_TYPE(message)), GST_OBJECT_NAME(message->src)); 216 | break; 217 | } 218 | } 219 | 220 | return TRUE; 221 | } 222 | 223 | -------------------------------------------------------------------------------- /util/camera/gstUtility.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #ifndef __GSTREAMER_UTILITY_H__ 6 | #define __GSTREAMER_UTILITY_H__ 7 | 8 | 9 | #include 10 | 11 | 12 | /** 13 | * LOG_GSTREAMER printf prefix 14 | * @ingroup util 15 | */ 16 | #define LOG_GSTREAMER "[gstreamer] " 17 | 18 | 19 | /** 20 | * gstreamerInit 21 | * @ingroup util 22 | */ 23 | bool gstreamerInit(); 24 | 25 | 26 | /** 27 | * gst_message_print 28 | * @ingroup util 29 | */ 30 | gboolean gst_message_print(_GstBus* bus, _GstMessage* message, void* user_data); 31 | 32 | 33 | 34 | #endif 35 | 36 | -------------------------------------------------------------------------------- /util/camera/v4l2-console/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | file(GLOB v4l2ConsoleSources *.cpp) 3 | file(GLOB v4l2ConsoleIncludes *.h ) 4 | 5 | add_executable(v4l2-console ${v4l2ConsoleSources}) 6 | target_link_libraries(v4l2-console jetson-inference) 7 | -------------------------------------------------------------------------------- /util/camera/v4l2-console/v4l2-console.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #include "v4l2Camera.h" 6 | 7 | #include 8 | #include 9 | //#include 10 | #include 11 | 12 | 13 | bool signal_recieved = false; 14 | 15 | void sig_handler(int signo) 16 | { 17 | if( signo == SIGINT ) 18 | { 19 | printf("received SIGINT\n"); 20 | signal_recieved = true; 21 | } 22 | } 23 | 24 | 25 | 26 | int main( int argc, char** argv ) 27 | { 28 | printf("v4l2-console\n args (%i): ", argc); 29 | 30 | /* 31 | * verify parameters 32 | */ 33 | for( int i=0; i < argc; i++ ) 34 | printf("%i [%s] ", i, argv[i]); 35 | 36 | printf("\n"); 37 | 38 | if( argc < 2 ) 39 | { 40 | printf("v4l2-console: 0 arguments were supplied.\n"); 41 | printf("usage: v4l2-console \n"); 42 | printf(" ./v4l2-console /dev/video0\n"); 43 | 44 | return 0; 45 | } 46 | 47 | const char* dev_path = argv[1]; 48 | printf("v4l2-console: attempting to initialize video device '%s'\n\n", dev_path); 49 | 50 | if( signal(SIGINT, sig_handler) == SIG_ERR ) 51 | printf("\ncan't catch SIGINT\n"); 52 | 53 | /* 54 | * create the camera device 55 | */ 56 | v4l2Camera* camera = v4l2Camera::Create(dev_path); 57 | 58 | if( !camera ) 59 | { 60 | printf("\nv4l2-console: failed to initialize video device '%s'\n", dev_path); 61 | return 0; 62 | } 63 | 64 | printf("\nv4l2-console: successfully initialized video device '%s'\n", dev_path); 65 | printf(" width: %u\n", camera->GetWidth()); 66 | printf(" height: %u\n", camera->GetHeight()); 67 | printf(" depth: %u (bpp)\n", camera->GetPixelDepth()); 68 | 69 | 70 | /* 71 | * start streaming 72 | */ 73 | if( !camera->Open() ) 74 | { 75 | printf("\nv4l2-console: failed to open camera '%s' for streaming\n", dev_path); 76 | return 0; 77 | } 78 | 79 | printf("\nv4l2-console: camera '%s' open for streaming\n", dev_path); 80 | 81 | 82 | while( !signal_recieved ) 83 | { 84 | uint8_t* img = (uint8_t*)camera->Capture(500); 85 | 86 | if( !img ) 87 | { 88 | //printf("got NULL image from camera capture\n"); 89 | continue; 90 | } 91 | else 92 | { 93 | printf("recieved new video frame\n"); 94 | 95 | static int num_frames = 0; 96 | 97 | const int width = camera->GetWidth(); 98 | const int height = camera->GetHeight(); 99 | 100 | QImage qImg(width, height, QImage::Format_RGB32); 101 | 102 | for( int y=0; y < height; y++ ) 103 | { 104 | for( int x=0; x < width; x++ ) 105 | { 106 | const int value = img[y * width + x]; 107 | if( value != 0 ) 108 | printf("%i %i %i\n", x, y, value); 109 | qImg.setPixel(x, y, qRgb(value, value, value)); 110 | } 111 | } 112 | 113 | char output_filename[64]; 114 | sprintf(output_filename, "camera-%u.jpg", num_frames); 115 | 116 | qImg.save(QString(output_filename)); 117 | num_frames++; 118 | } 119 | 120 | } 121 | 122 | printf("\nv4l2-console: un-initializing video device '%s'\n", dev_path); 123 | 124 | 125 | /* 126 | * shutdown the camera device 127 | */ 128 | if( camera != NULL ) 129 | { 130 | delete camera; 131 | camera = NULL; 132 | } 133 | 134 | printf("v4l2-console: video device '%s' has been un-initialized.\n", dev_path); 135 | printf("v4l2-console: this concludes the test of video device '%s'\n", dev_path); 136 | return 0; 137 | } -------------------------------------------------------------------------------- /util/camera/v4l2-display/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | file(GLOB v4l2DisplaySources *.cpp) 3 | file(GLOB v4l2DisplayIncludes *.h ) 4 | 5 | add_executable(v4l2-display ${v4l2DisplaySources}) 6 | target_link_libraries(v4l2-display jetson-inference) 7 | -------------------------------------------------------------------------------- /util/camera/v4l2-display/v4l2-display.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #include "v4l2Camera.h" 6 | #include "glDisplay.h" 7 | #include "cudaMappedMemory.h" 8 | 9 | #include 10 | 11 | 12 | int main( int argc, char** argv ) 13 | { 14 | printf("v4l2-display\n args (%i): ", argc); 15 | 16 | /* 17 | * verify parameters 18 | */ 19 | for( int i=0; i < argc; i++ ) 20 | printf("%i [%s] ", i, argv[i]); 21 | 22 | printf("\n"); 23 | 24 | if( argc < 2 ) 25 | { 26 | printf("v4l2-display: 0 arguments were supplied.\n"); 27 | printf("usage: v4l2-display \n"); 28 | printf(" ./v4l2-display /dev/video0\n"); 29 | 30 | return 0; 31 | } 32 | 33 | const char* dev_path = argv[1]; 34 | printf("v4l2-display: attempting to initialize video device '%s'\n\n", dev_path); 35 | 36 | 37 | /* 38 | * create the camera device 39 | */ 40 | v4l2Camera* camera = v4l2Camera::Create(dev_path); 41 | 42 | if( !camera ) 43 | { 44 | printf("\nv4l2-display: failed to initialize video device '%s'\n", dev_path); 45 | return 0; 46 | } 47 | 48 | printf("\nv4l2-display: successfully initialized video device '%s'\n", dev_path); 49 | printf(" width: %u\n", camera->GetWidth()); 50 | printf(" height: %u\n", camera->GetHeight()); 51 | printf(" depth: %u (bpp)\n", camera->GetPixelDepth()); 52 | 53 | printf("\nv4l2-display: un-initializing video device '%s'\n", dev_path); 54 | 55 | 56 | /* 57 | * create openGL window 58 | */ 59 | glDisplay* display = glDisplay::Create(); 60 | 61 | if( !display ) 62 | { 63 | printf("\nv4l2-display: failed to create openGL display\n"); 64 | return 0; 65 | } 66 | 67 | glTexture* tex = glTexture::Create(camera->GetWidth(), camera->GetHeight(), GL_LUMINANCE8); 68 | 69 | if( !tex ) 70 | { 71 | printf("v4l2-display: failed to create %ux%u openGL texture\n", camera->GetWidth(), camera->GetHeight()); 72 | return 0; 73 | } 74 | 75 | printf("v4l2-display: initialized %u x %u openGL texture (%u bytes)\n", tex->GetWidth(), tex->GetHeight(), tex->GetSize()); 76 | 77 | 78 | 79 | 80 | /* 81 | * shutdown 82 | */ 83 | if( display != NULL ) 84 | { 85 | delete display; 86 | display = NULL; 87 | } 88 | 89 | if( camera != NULL ) 90 | { 91 | delete camera; 92 | camera = NULL; 93 | } 94 | 95 | printf("v4l2-display: video device '%s' has been un-initialized.\n", dev_path); 96 | printf("v4l2-display: this concludes the test of video device '%s'\n", dev_path); 97 | return 0; 98 | } -------------------------------------------------------------------------------- /util/camera/v4l2Camera.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #include "v4l2Camera.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | 21 | 22 | #define REQUESTED_RINGBUFFERS 4 23 | 24 | 25 | 26 | // ioctl 27 | static int xioctl(int fd, int request, void* arg) 28 | { 29 | int status; 30 | do { status = ioctl (fd, request, arg); } while (-1==status && EINTR==errno); 31 | return status; 32 | } 33 | 34 | 35 | 36 | // constructor 37 | v4l2Camera::v4l2Camera( const char* device_path ) : mDevicePath(device_path) 38 | { 39 | mFD = -1; 40 | 41 | mBuffersMMap = NULL; 42 | mBufferCountMMap = 0; 43 | mRequestWidth = 0; 44 | mRequestHeight = 0; 45 | mRequestFormat = 1; 46 | //mRequestFormat = -1; // index into V4L2 format table 47 | 48 | mWidth = 0; 49 | mHeight = 0; 50 | mPitch = 0; 51 | mPixelDepth = 0; 52 | } 53 | 54 | 55 | // destructor 56 | v4l2Camera::~v4l2Camera() 57 | { 58 | // close file 59 | if( mFD >= 0 ) 60 | { 61 | close(mFD); 62 | mFD = -1; 63 | } 64 | } 65 | 66 | 67 | // ProcessEmit 68 | void* v4l2Camera::Capture( size_t timeout ) 69 | { 70 | fd_set fds; 71 | FD_ZERO(&fds); 72 | FD_SET(mFD, &fds); 73 | 74 | struct timeval tv; 75 | 76 | tv.tv_sec = 0; 77 | tv.tv_usec = 0; 78 | 79 | const bool threaded = true; //false; 80 | 81 | if( timeout > 0 ) 82 | { 83 | tv.tv_sec = timeout / 1000; 84 | tv.tv_usec = (timeout - (tv.tv_sec * 1000)) * 1000; 85 | } 86 | 87 | // 88 | const int result = select(mFD + 1, &fds, NULL, NULL, &tv); 89 | 90 | 91 | if( result == -1 ) 92 | { 93 | //if (EINTR == errno) 94 | printf("v4l2 -- select() failed (errno=%i) (%s)\n", errno, strerror(errno)); 95 | return NULL; 96 | } 97 | else if( result == 0 ) 98 | { 99 | if( timeout > 0 ) 100 | printf("v4l2 -- select() timed out...\n"); 101 | return NULL; // timeout, not necessarily an error (TRY_AGAIN) 102 | } 103 | 104 | // dequeue input buffer from V4L2 105 | struct v4l2_buffer buf; 106 | memset(&buf, 0, sizeof(v4l2_buffer)); 107 | 108 | buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 109 | buf.memory = V4L2_MEMORY_MMAP; //V4L2_MEMORY_USERPTR; 110 | 111 | if( xioctl(mFD, VIDIOC_DQBUF, &buf) < 0 ) 112 | { 113 | printf("v4l2 -- ioctl(VIDIOC_DQBUF) failed (errno=%i) (%s)\n", errno, strerror(errno)); 114 | return NULL; 115 | } 116 | 117 | if( buf.index >= mBufferCountMMap ) 118 | { 119 | printf("v4l2 -- invalid mmap buffer index (%u)\n", buf.index); 120 | return NULL; 121 | } 122 | 123 | // emit ringbuffer entry 124 | //printf("v4l2 -- recieved %ux%u video frame (index=%u)\n", mWidth, mHeight, (uint32_t)buf.index); 125 | 126 | void* image_ptr = mBuffersMMap[buf.index].ptr; 127 | 128 | // re-queue buffer to V4L2 129 | if( xioctl(mFD, VIDIOC_QBUF, &buf) < 0 ) 130 | printf("v4l2 -- ioctl(VIDIOC_QBUF) failed (errno=%i) (%s)\n", errno, strerror(errno)); 131 | 132 | return image_ptr; 133 | } 134 | 135 | 136 | 137 | // initMMap 138 | bool v4l2Camera::initMMap() 139 | { 140 | struct v4l2_requestbuffers req; 141 | memset(&req, 0, sizeof(v4l2_requestbuffers)); 142 | 143 | req.count = REQUESTED_RINGBUFFERS; 144 | req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 145 | req.memory = V4L2_MEMORY_MMAP; 146 | 147 | if( xioctl(mFD, VIDIOC_REQBUFS, &req) < 0 ) 148 | { 149 | printf("v4l2 -- does not support mmap (errno=%i) (%s)\n", errno, strerror(errno)); 150 | return false; 151 | } 152 | 153 | if( req.count < 2 ) 154 | { 155 | printf("v4l2 -- insufficient mmap memory\n"); 156 | return false; 157 | } 158 | 159 | mBuffersMMap = (v4l2_mmap*)malloc( req.count * sizeof(v4l2_mmap) ); 160 | 161 | if( !mBuffersMMap ) 162 | return false; 163 | 164 | memset(mBuffersMMap, 0, req.count * sizeof(v4l2_mmap)); 165 | 166 | for( size_t n=0; n < req.count; n++ ) 167 | { 168 | mBuffersMMap[n].buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 169 | mBuffersMMap[n].buf.memory = V4L2_MEMORY_MMAP; 170 | mBuffersMMap[n].buf.index = n; 171 | 172 | if( xioctl(mFD, VIDIOC_QUERYBUF, &mBuffersMMap[n].buf) < 0 ) 173 | { 174 | printf( "v4l2 -- failed retrieve mmap buffer info (errno=%i) (%s)\n", errno, strerror(errno)); 175 | return false; 176 | } 177 | 178 | mBuffersMMap[n].ptr = mmap(NULL, mBuffersMMap[n].buf.length, 179 | PROT_READ|PROT_WRITE, MAP_SHARED, 180 | mFD, mBuffersMMap[n].buf.m.offset); 181 | 182 | if( mBuffersMMap[n].ptr == MAP_FAILED ) 183 | { 184 | printf( "v4l2 -- failed to mmap buffer (errno=%i) (%s)\n", errno, strerror(errno)); 185 | return false; 186 | } 187 | 188 | if( xioctl(mFD, VIDIOC_QBUF, &mBuffersMMap[n].buf) < 0 ) 189 | { 190 | printf( "v4l2 -- failed to queue mmap buffer (errno=%i) (%s)\n", errno, strerror(errno)); 191 | return false; 192 | } 193 | } 194 | 195 | mBufferCountMMap = req.count; 196 | printf("v4l2 -- mapped %zu capture buffers with mmap\n", mBufferCountMMap); 197 | return true; 198 | } 199 | 200 | 201 | inline const char* v4l2_format_str( uint32_t fmt ) 202 | { 203 | if( fmt == V4L2_PIX_FMT_SBGGR8 ) return "SBGGR8 (V4L2_PIX_FMT_SBGGR8)"; 204 | else if( fmt == V4L2_PIX_FMT_SGBRG8 ) return "SGBRG8 (V4L2_PIX_FMT_SGBRG8)"; 205 | else if( fmt == V4L2_PIX_FMT_SGRBG8 ) return "SGRBG8 (V4L2_PIX_FMT_SGRBG8)"; 206 | else if( fmt == V4L2_PIX_FMT_SRGGB8 ) return "SRGGB8 (V4L2_PIX_FMT_SRGGB8)"; 207 | else if( fmt == V4L2_PIX_FMT_SBGGR16 ) return "BYR2 (V4L2_PIX_FMT_SBGGR16)"; 208 | else if( fmt == V4L2_PIX_FMT_SRGGB10 ) return "RG10 (V4L2_PIX_FMT_SRGGB10)"; 209 | 210 | return "UNKNOWN"; 211 | } 212 | 213 | 214 | inline void v4l2_print_format( const v4l2_format& fmt, const char* text ) 215 | { 216 | printf("v4l2 -- %s\n", text); 217 | printf("v4l2 -- width %u\n", fmt.fmt.pix.width); 218 | printf("v4l2 -- height %u\n", fmt.fmt.pix.height); 219 | printf("v4l2 -- pitch %u\n", fmt.fmt.pix.bytesperline); 220 | printf("v4l2 -- size %u\n", fmt.fmt.pix.sizeimage); 221 | printf("v4l2 -- format 0x%X %s\n", fmt.fmt.pix.pixelformat, v4l2_format_str(fmt.fmt.pix.pixelformat)); 222 | printf("v4l2 -- color 0x%X\n", fmt.fmt.pix.colorspace); 223 | printf("v4l2 -- field 0x%X\n", fmt.fmt.pix.field); 224 | } 225 | 226 | 227 | inline void v4l2_print_formatdesc( const v4l2_fmtdesc& desc ) 228 | { 229 | printf("v4l2 -- format #%u\n", desc.index); 230 | printf("v4l2 -- desc %s\n", desc.description); 231 | printf("v4l2 -- flags %s\n", (desc.flags == 0 ? "V4L2_FMT_FLAG_UNCOMPRESSED" : "V4L2_FMT_FLAG_COMPRESSED")); 232 | printf("v4l2 -- fourcc 0x%X %s\n", desc.pixelformat, v4l2_format_str(desc.pixelformat)); 233 | 234 | } 235 | 236 | 237 | bool v4l2Camera::initFormats() 238 | { 239 | struct v4l2_fmtdesc desc; 240 | memset(&desc, 0, sizeof(v4l2_fmtdesc)); 241 | 242 | desc.index = 0; 243 | desc.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 244 | 245 | while( ioctl(mFD, VIDIOC_ENUM_FMT, &desc) == 0 ) 246 | { 247 | mFormats.push_back(desc); 248 | v4l2_print_formatdesc( desc ); 249 | desc.index++; 250 | } 251 | 252 | return true; 253 | } 254 | 255 | 256 | // initStream 257 | bool v4l2Camera::initStream() 258 | { 259 | struct v4l2_format fmt; 260 | memset(&fmt, 0, sizeof(v4l2_format)); 261 | fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 262 | 263 | // retrieve existing video format 264 | if( xioctl(mFD, VIDIOC_G_FMT, &fmt) < 0 ) 265 | { 266 | const int err = errno; 267 | printf( "v4l2 -- failed to get video format of device (errno=%i) (%s)\n", errno, strerror(errno)); 268 | return false; 269 | } 270 | 271 | v4l2_print_format(fmt, "preexisting format"); 272 | 273 | #if 1 274 | // setup new format 275 | struct v4l2_format new_fmt; 276 | memset(&new_fmt, 0, sizeof(v4l2_format)); 277 | 278 | new_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 279 | new_fmt.fmt.pix.width = fmt.fmt.pix.width; 280 | new_fmt.fmt.pix.height = fmt.fmt.pix.height; 281 | new_fmt.fmt.pix.pixelformat = fmt.fmt.pix.pixelformat; 282 | new_fmt.fmt.pix.field = fmt.fmt.pix.field; 283 | new_fmt.fmt.pix.colorspace = fmt.fmt.pix.colorspace; 284 | 285 | if( mRequestWidth > 0 && mRequestHeight > 0 ) 286 | { 287 | new_fmt.fmt.pix.width = mRequestWidth; 288 | new_fmt.fmt.pix.height = mRequestHeight; 289 | } 290 | 291 | if( mRequestFormat >= 0 && mRequestFormat < mFormats.size() ) 292 | new_fmt.fmt.pix.pixelformat = mFormats[mRequestFormat].pixelformat; 293 | 294 | v4l2_print_format(new_fmt, "setting new format..."); 295 | 296 | if( xioctl(mFD, VIDIOC_S_FMT, &new_fmt) < 0 ) 297 | { 298 | const int err = errno; 299 | printf( "v4l2 -- failed to set video format of device (errno=%i) (%s)\n", errno, strerror(errno)); 300 | return false; 301 | } 302 | 303 | 304 | // re-retrieve the current format, with detailed info like line pitch/ect. 305 | memset(&fmt, 0, sizeof(v4l2_format)); 306 | fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 307 | 308 | if( xioctl(mFD, VIDIOC_G_FMT, &fmt) < 0 ) 309 | { 310 | const int err = errno; 311 | printf( "v4l2 -- failed to get video format of device (errno=%i) (%s)\n", errno, strerror(errno)); 312 | return false; 313 | } 314 | 315 | v4l2_print_format(fmt, "confirmed new format"); 316 | #endif 317 | 318 | mWidth = fmt.fmt.pix.width; 319 | mHeight = fmt.fmt.pix.height; 320 | mPitch = fmt.fmt.pix.bytesperline; 321 | mPixelDepth = (mPitch * 8) / mWidth; 322 | 323 | // initMMap 324 | if( !initMMap() ) // initUserPtr() 325 | return false; 326 | 327 | return true; 328 | } 329 | 330 | 331 | // Create 332 | v4l2Camera* v4l2Camera::Create( const char* device_path ) 333 | { 334 | v4l2Camera* cam = new v4l2Camera(device_path); 335 | 336 | if( !cam->init() ) 337 | { 338 | printf("v4l2 -- failed to create instance %s\n", device_path); 339 | delete cam; 340 | return NULL; 341 | } 342 | 343 | return cam; 344 | } 345 | 346 | 347 | // Init 348 | bool v4l2Camera::init() 349 | { 350 | // locate the /dev/event* path for this device 351 | mFD = open(mDevicePath.c_str(), O_RDWR | O_NONBLOCK, 0 ); 352 | 353 | if( mFD < 0 ) 354 | { 355 | printf( "v4l2 -- failed to open %s\n", mDevicePath.c_str()); 356 | return false; 357 | } 358 | 359 | // initialize 360 | if( !initCaps() ) 361 | return false; 362 | 363 | if( !initFormats() ) 364 | return false; 365 | 366 | if( !initStream() ) 367 | return false; 368 | 369 | return true; 370 | } 371 | 372 | 373 | // Open 374 | bool v4l2Camera::Open() 375 | { 376 | printf( "v4l2Camera::Open(%s)\n", mDevicePath.c_str()); 377 | 378 | // begin streaming 379 | enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 380 | 381 | printf( "v4l2 -- starting streaming %s with ioctl(VIDIOC_STREAMON)...\n", mDevicePath.c_str()); 382 | 383 | if( xioctl(mFD, VIDIOC_STREAMON, &type) < 0 ) 384 | { 385 | printf( "v4l2 -- failed to start streaming (errno=%i) (%s)\n", errno, strerror(errno)); 386 | return false; 387 | } 388 | 389 | return true; 390 | } 391 | 392 | 393 | // Close 394 | bool v4l2Camera::Close() 395 | { 396 | // stop streaming 397 | enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 398 | 399 | printf( "v4l2 -- stopping streaming %s with ioctl(VIDIOC_STREAMOFF)...\n", mDevicePath.c_str()); 400 | 401 | if( xioctl(mFD, VIDIOC_STREAMOFF, &type) < 0 ) 402 | { 403 | printf( "v4l2 -- failed to stop streaming (errno=%i) (%s)\n", errno, strerror(errno)); 404 | //return false; 405 | } 406 | 407 | return true; 408 | } 409 | 410 | 411 | 412 | // initCaps 413 | bool v4l2Camera::initCaps() 414 | { 415 | struct v4l2_capability caps; 416 | 417 | if( xioctl(mFD, VIDIOC_QUERYCAP, &caps) < 0 ) 418 | { 419 | printf( "v4l2 -- failed to query caps (xioctl VIDIOC_QUERYCAP) for %s\n", mDevicePath.c_str()); 420 | return false; 421 | } 422 | 423 | #define PRINT_CAP(x) printf( "v4l2 -- %-18s %s\n", #x, (caps.capabilities & x) ? "yes" : "no") 424 | 425 | PRINT_CAP(V4L2_CAP_VIDEO_CAPTURE); 426 | PRINT_CAP(V4L2_CAP_READWRITE); 427 | PRINT_CAP(V4L2_CAP_ASYNCIO); 428 | PRINT_CAP(V4L2_CAP_STREAMING); 429 | 430 | if( !(caps.capabilities & V4L2_CAP_VIDEO_CAPTURE) ) 431 | { 432 | printf( "v4l2 -- %s is not a video capture device\n", mDevicePath.c_str()); 433 | return false; 434 | } 435 | 436 | return true; 437 | } 438 | 439 | 440 | // initUserPtr 441 | bool v4l2Camera::initUserPtr() 442 | { 443 | // request buffers 444 | struct v4l2_requestbuffers req; 445 | memset(&req, 0, sizeof(v4l2_requestbuffers)); 446 | 447 | req.count = REQUESTED_RINGBUFFERS; 448 | req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 449 | req.memory = V4L2_MEMORY_USERPTR; 450 | 451 | if ( xioctl(mFD, VIDIOC_REQBUFS, &req) < 0 ) 452 | { 453 | const int err = errno; 454 | printf( "v4l2 -- failed to request buffers (errno=%i) (%s)\n", errno, strerror(errno)); 455 | return false; 456 | } 457 | 458 | // queue ringbuffer 459 | #if 0 460 | for( size_t n=0; n < mRingbuffer.size(); n++ ) 461 | { 462 | struct v4l2_buffer buf; 463 | memset(&buf, 0, sizeof(v4l2_buffer)); 464 | 465 | buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 466 | buf.memory = V4L2_MEMORY_USERPTR; 467 | buf.index = n; 468 | buf.length = mRingbuffer[n]->GetSize(); 469 | 470 | buf.m.userptr = (unsigned long)mRingbuffer[n]->GetCPU(); 471 | 472 | if( xioctl(mFD, VIDIOC_QBUF, &buf) < 0 ) 473 | { 474 | printf( "v4l2 -- failed to queue buffer %zu (errno=%i) (%s)\n", n, errno, strerror(errno)); 475 | return false; 476 | } 477 | } 478 | #endif 479 | 480 | return true; 481 | } -------------------------------------------------------------------------------- /util/camera/v4l2Camera.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #ifndef __V4L2_CAPTURE_H 6 | #define __V4L2_CAPTURE_H 7 | 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | 16 | 17 | struct v4l2_mmap 18 | { 19 | struct v4l2_buffer buf; 20 | void* ptr; 21 | }; 22 | 23 | 24 | /** 25 | * Video4Linux2 camera capture streaming. 26 | * @ingroup util 27 | */ 28 | class v4l2Camera 29 | { 30 | public: 31 | /** 32 | * Create V4L2 interface 33 | * @param path Filename of the video device (e.g. /dev/video0) 34 | */ 35 | static v4l2Camera* Create( const char* device_path ); 36 | 37 | /** 38 | * Destructor 39 | */ 40 | ~v4l2Camera(); 41 | 42 | /** 43 | * Start streaming 44 | */ 45 | bool Open(); 46 | 47 | /** 48 | * Stop streaming 49 | */ 50 | bool Close(); 51 | 52 | /** 53 | * Return the next image. 54 | */ 55 | void* Capture( size_t timeout=0 ); 56 | 57 | /** 58 | * Get width, in pixels, of camera image. 59 | */ 60 | inline uint32_t GetWidth() const { return mWidth; } 61 | 62 | /** 63 | * Retrieve height, in pixels, of camera image. 64 | */ 65 | inline uint32_t GetHeight() const { return mHeight; } 66 | 67 | /** 68 | * Return the size in bytes of one line of the image. 69 | */ 70 | inline uint32_t GetPitch() const { return mPitch; } 71 | 72 | /** 73 | * Return the bit depth per pixel. 74 | */ 75 | inline uint32_t GetPixelDepth() const { return mPixelDepth; } 76 | 77 | private: 78 | 79 | v4l2Camera( const char* device_path ); 80 | 81 | bool init(); 82 | bool initCaps(); 83 | bool initFormats(); 84 | bool initStream(); 85 | 86 | bool initUserPtr(); 87 | bool initMMap(); 88 | 89 | int mFD; 90 | int mRequestFormat; 91 | uint32_t mRequestWidth; 92 | uint32_t mRequestHeight; 93 | uint32_t mWidth; 94 | uint32_t mHeight; 95 | uint32_t mPitch; 96 | uint32_t mPixelDepth; 97 | 98 | v4l2_mmap* mBuffersMMap; 99 | size_t mBufferCountMMap; 100 | 101 | std::vector mFormats; 102 | std::string mDevicePath; 103 | }; 104 | 105 | 106 | #endif 107 | 108 | 109 | -------------------------------------------------------------------------------- /util/commandLine.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * http://github.com/dusty-nv/jetson-inference 3 | */ 4 | 5 | #include "commandLine.h" 6 | 7 | #include // atoi 8 | #include 9 | #include 10 | 11 | 12 | 13 | // strRemoveDelimiter 14 | static inline int strRemoveDelimiter(char delimiter, const char *string) 15 | { 16 | int string_start = 0; 17 | 18 | while (string[string_start] == delimiter) 19 | { 20 | string_start++; 21 | } 22 | 23 | if (string_start >= (int)strlen(string)-1) 24 | { 25 | return 0; 26 | } 27 | 28 | return string_start; 29 | } 30 | 31 | 32 | // constructor 33 | commandLine::commandLine( const int pArgc, char** pArgv ) 34 | { 35 | argc = pArgc; 36 | argv = pArgv; 37 | } 38 | 39 | 40 | // GetInt 41 | int commandLine::GetInt( const char* string_ref ) 42 | { 43 | if( argc < 1 ) 44 | return 0; 45 | 46 | bool bFound = false; 47 | int value = -1; 48 | 49 | for( int i=1; i < argc; i++ ) 50 | { 51 | int string_start = strRemoveDelimiter('-', argv[i]); 52 | const char *string_argv = &argv[i][string_start]; 53 | int length = (int)strlen(string_ref); 54 | 55 | if (!strncasecmp(string_argv, string_ref, length)) 56 | { 57 | if (length+1 <= (int)strlen(string_argv)) 58 | { 59 | int auto_inc = (string_argv[length] == '=') ? 1 : 0; 60 | value = atoi(&string_argv[length + auto_inc]); 61 | } 62 | else 63 | { 64 | value = 0; 65 | } 66 | 67 | bFound = true; 68 | continue; 69 | } 70 | } 71 | 72 | 73 | if (bFound) 74 | return value; 75 | 76 | return 0; 77 | } 78 | 79 | 80 | // GetFloat 81 | float commandLine::GetFloat( const char* string_ref ) 82 | { 83 | if( argc < 1 ) 84 | return 0; 85 | 86 | bool bFound = false; 87 | float value = -1; 88 | 89 | for (int i=1; i < argc; i++) 90 | { 91 | int string_start = strRemoveDelimiter('-', argv[i]); 92 | const char *string_argv = &argv[i][string_start]; 93 | int length = (int)strlen(string_ref); 94 | 95 | if (!strncasecmp(string_argv, string_ref, length)) 96 | { 97 | if (length+1 <= (int)strlen(string_argv)) 98 | { 99 | int auto_inc = (string_argv[length] == '=') ? 1 : 0; 100 | value = (float)atof(&string_argv[length + auto_inc]); 101 | } 102 | else 103 | { 104 | value = 0.f; 105 | } 106 | 107 | bFound = true; 108 | continue; 109 | } 110 | } 111 | 112 | if( bFound ) 113 | return value; 114 | 115 | return 0; 116 | } 117 | 118 | 119 | // GetString 120 | const char* commandLine::GetString( const char* string_ref ) 121 | { 122 | if( argc < 1 ) 123 | return 0; 124 | 125 | for (int i=1; i < argc; i++) 126 | { 127 | int string_start = strRemoveDelimiter('-', argv[i]); 128 | char *string_argv = (char *)&argv[i][string_start]; 129 | int length = (int)strlen(string_ref); 130 | 131 | if (!strncasecmp(string_argv, string_ref, length)) 132 | return (string_argv + length + 1); 133 | //*string_retval = &string_argv[length+1]; 134 | } 135 | 136 | return NULL; 137 | } 138 | 139 | 140 | // GetFlag 141 | bool commandLine::GetFlag( const char* string_ref ) 142 | { 143 | if( argc < 1 ) 144 | return false; 145 | 146 | for (int i=1; i < argc; i++) 147 | { 148 | int string_start = strRemoveDelimiter('-', argv[i]); 149 | const char *string_argv = &argv[i][string_start]; 150 | 151 | const char *equal_pos = strchr(string_argv, '='); 152 | int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv); 153 | 154 | int length = (int)strlen(string_ref); 155 | 156 | if( length == argv_length && !strncasecmp(string_argv, string_ref, length) ) 157 | return true; 158 | } 159 | 160 | return false; 161 | } 162 | 163 | 164 | -------------------------------------------------------------------------------- /util/commandLine.h: -------------------------------------------------------------------------------- 1 | /* 2 | * http://github.com/dusty-nv/jetson-inference 3 | */ 4 | 5 | #ifndef __COMMAND_LINE_H_ 6 | #define __COMMAND_LINE_H_ 7 | 8 | 9 | /** 10 | * commandLine parser class 11 | * @ingroup util 12 | */ 13 | class commandLine 14 | { 15 | public: 16 | /** 17 | * constructor 18 | */ 19 | commandLine( const int argc, char** argv ); 20 | 21 | 22 | /** 23 | * Checks to see whether the specified flag was included on the 24 | * command line. For example, if argv contained "--foo", then 25 | * GetFlag("foo") would return true. 26 | * 27 | * @returns true, if the flag with argName was found 28 | * false, if the flag with argName was not found 29 | */ 30 | bool GetFlag( const char* argName ); 31 | 32 | 33 | /** 34 | * Get float argument. For example if argv contained "--foo=3.14159", 35 | * then GetInt("foo") would return 3.14159.0f 36 | * 37 | * @returns 0, if the argumentcould not be found. 38 | * Otherwise, returns the value of the argument. 39 | */ 40 | float GetFloat( const char* argName ); 41 | 42 | 43 | /** 44 | * Get integer argument. For example if argv contained "--foo=100", 45 | * then GetInt("foo") would return 100. 46 | * 47 | * @returns 0, if the argument could not be found. 48 | * Otherwise, returns the value of the argument. 49 | */ 50 | int GetInt( const char* argName ); 51 | 52 | 53 | /** 54 | * Get string argument. For example if argv contained "--foo=bar", 55 | * then GetString("foo") would return "bar". 56 | * 57 | * @returns NULL, if the argument could not be found. 58 | * Otherwise, returns a pointer to the argument value string 59 | * from the argv array. 60 | */ 61 | const char* GetString( const char* argName ); 62 | 63 | 64 | protected: 65 | 66 | int argc; 67 | char** argv; 68 | }; 69 | 70 | 71 | 72 | #endif 73 | 74 | -------------------------------------------------------------------------------- /util/cuda/cudaFont.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * http://github.com/dusty-nv/jetson-inference 3 | */ 4 | 5 | #include "cudaFont.h" 6 | #include "cudaMappedMemory.h" 7 | 8 | #include "loadImage.h" 9 | 10 | 11 | // constructor 12 | cudaFont::cudaFont() 13 | { 14 | mCommandCPU = NULL; 15 | mCommandGPU = NULL; 16 | mCmdEntries = 0; 17 | 18 | mFontMapCPU = NULL; 19 | mFontMapGPU = NULL; 20 | 21 | mFontMapWidth = 0; 22 | mFontMapHeight = 0; 23 | 24 | //mFontCellSize = make_int2(24,32); 25 | mFontCellSize = make_int2(24,32); 26 | } 27 | 28 | 29 | 30 | // destructor 31 | cudaFont::~cudaFont() 32 | { 33 | if( mFontMapCPU != NULL ) 34 | { 35 | CUDA(cudaFreeHost(mFontMapCPU)); 36 | 37 | mFontMapCPU = NULL; 38 | mFontMapGPU = NULL; 39 | } 40 | } 41 | 42 | 43 | // Create 44 | cudaFont* cudaFont::Create( const char* bitmap_path ) 45 | { 46 | cudaFont* c = new cudaFont(); 47 | 48 | if( !c ) 49 | return NULL; 50 | 51 | if( !c->init(bitmap_path) ) 52 | return NULL; 53 | 54 | return c; 55 | } 56 | 57 | 58 | // init 59 | bool cudaFont::init( const char* bitmap_path ) 60 | { 61 | if( !loadImageRGBA(bitmap_path, &mFontMapCPU, &mFontMapGPU, &mFontMapWidth, &mFontMapHeight) ) 62 | return false; 63 | 64 | if( !cudaAllocMapped((void**)&mCommandCPU, (void**)&mCommandGPU, sizeof(short4) * MaxCommands) ) 65 | return false; 66 | 67 | return true; 68 | } 69 | 70 | 71 | inline __host__ __device__ float4 operator*(float4 a, float4 b) 72 | { 73 | return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); 74 | } 75 | 76 | template 77 | __global__ void gpuOverlayText( T* font, int fontWidth, short4* text, 78 | T* output, int width, int height, float4 color ) 79 | { 80 | const short4 t = text[blockIdx.x]; 81 | 82 | //printf("%i %hi %hi %hi %hi\n", blockIdx.x, t.x, t.y, t.z, t.w); 83 | 84 | const int x = t.x + threadIdx.x; 85 | const int y = t.y + threadIdx.y; 86 | 87 | if( x < 0 || y < 0 || x >= width || y >= height ) 88 | return; 89 | 90 | const int u = t.z + threadIdx.x; 91 | const int v = t.w + threadIdx.y; 92 | 93 | //printf("%i %i %i %i %i\n", blockIdx.x, x, y, u, v); 94 | 95 | const T px_font = font[v * fontWidth + u] * color; 96 | T px_out = output[y * width + x]; // fixme: add proper input support 97 | 98 | const float alpha = px_font.w / 255.0f; 99 | const float ialph = 1.0f - alpha; 100 | 101 | px_out.x = alpha * px_font.x + ialph * px_out.x; 102 | px_out.y = alpha * px_font.y + ialph * px_out.y; 103 | px_out.z = alpha * px_font.z + ialph * px_out.z; 104 | 105 | output[y * width + x] = px_out; 106 | } 107 | 108 | 109 | // processCUDA 110 | template 111 | cudaError_t cudaOverlayText( T* font, const int2& fontCellSize, size_t fontMapWidth, 112 | const float4& fontColor, short4* text, size_t length, 113 | T* output, size_t width, size_t height) 114 | { 115 | if( !font || !text || !output || length == 0 || width == 0 || height == 0 ) 116 | return cudaErrorInvalidValue; 117 | 118 | const float4 color_scale = make_float4( fontColor.x / 255.0f, fontColor.y / 255.0f, fontColor.z / 255.0f, fontColor.w / 255.0f ); 119 | 120 | // setup arguments 121 | const dim3 block(fontCellSize.x, fontCellSize.y); 122 | const dim3 grid(length); 123 | 124 | gpuOverlayText<<>>(font, fontMapWidth, text, output, width, height, color_scale); 125 | 126 | return cudaGetLastError(); 127 | } 128 | 129 | 130 | // RenderOverlay 131 | bool cudaFont::RenderOverlay( float4* input, float4* output, uint32_t width, uint32_t height, const std::vector< std::pair< std::string, int2 > >& text, const float4& color ) 132 | { 133 | if( !input || !output || width == 0 || height == 0 || text.size() == 0 ) 134 | return false; 135 | 136 | const uint32_t cellsPerRow = mFontMapWidth / mFontCellSize.x; 137 | const uint32_t numText = text.size(); 138 | 139 | for( uint32_t t=0; t < numText; t++ ) 140 | { 141 | const uint32_t numChars = text[t].first.size(); 142 | 143 | int2 pos = text[t].second; 144 | 145 | for( uint32_t n=0; n < numChars; n++ ) 146 | { 147 | char c = text[t].first[n]; 148 | 149 | if( c < 32 || c > 126 ) 150 | continue; 151 | 152 | c -= 32; 153 | 154 | const uint32_t font_y = c / cellsPerRow; 155 | const uint32_t font_x = c - (font_y * cellsPerRow); 156 | 157 | mCommandCPU[mCmdEntries++] = make_short4( pos.x, pos.y, 158 | font_x * (mFontCellSize.x + 1), 159 | font_y * (mFontCellSize.y + 1) ); 160 | 161 | pos.x += mFontCellSize.x; 162 | } 163 | } 164 | 165 | CUDA(cudaOverlayText( mFontMapGPU, mFontCellSize, mFontMapWidth, color, 166 | mCommandGPU, mCmdEntries, 167 | output, width, height)); 168 | 169 | mCmdEntries = 0; 170 | return true; 171 | } 172 | 173 | 174 | bool cudaFont::RenderOverlay( float4* input, float4* output, uint32_t width, uint32_t height, 175 | const char* str, int x, int y, const float4& color ) 176 | { 177 | if( !str ) 178 | return NULL; 179 | 180 | std::vector< std::pair< std::string, int2 > > list; 181 | 182 | list.push_back( std::pair< std::string, int2 >( str, make_int2(x,y) )); 183 | 184 | return RenderOverlay(input, output, width, height, list, color); 185 | } 186 | 187 | 188 | -------------------------------------------------------------------------------- /util/cuda/cudaFont.h: -------------------------------------------------------------------------------- 1 | /* 2 | * http://github.com/dusty-nv/jetson-inference 3 | */ 4 | 5 | #ifndef __CUDA_FONT_H__ 6 | #define __CUDA_FONT_H__ 7 | 8 | #include "cudaUtility.h" 9 | 10 | #include 11 | #include 12 | 13 | 14 | /** 15 | * Font overlay rendering using CUDA 16 | * @ingroup util 17 | */ 18 | class cudaFont 19 | { 20 | public: 21 | /** 22 | * Create new CUDA font overlay object using textured fonts 23 | */ 24 | static cudaFont* Create( const char* font_bitmap="/home/nvidia/Face-Recognition/data/fontmapA.png" ); 25 | 26 | /** 27 | * Destructor 28 | */ 29 | ~cudaFont(); 30 | 31 | /** 32 | * Draw font overlay onto image 33 | */ 34 | bool RenderOverlay( float4* input, float4* output, uint32_t width, uint32_t height, 35 | const char* str, int x, int y, const float4& color=make_float4(0, 0, 0, 255)); 36 | 37 | /** 38 | * Draw font overlay onto image 39 | */ 40 | bool RenderOverlay( float4* input, float4* output, uint32_t width, uint32_t height, 41 | const std::vector< std::pair< std::string, int2 > >& text, 42 | const float4& color=make_float4(0.0f, 0.0f, 0.0f, 255.0f)); 43 | 44 | protected: 45 | cudaFont(); 46 | bool init( const char* bitmap_path ); 47 | 48 | float4* mFontMapCPU; 49 | float4* mFontMapGPU; 50 | 51 | int mFontMapWidth; 52 | int mFontMapHeight; 53 | int2 mFontCellSize; 54 | 55 | short4* mCommandCPU; 56 | short4* mCommandGPU; 57 | int mCmdEntries; 58 | 59 | static const uint32_t MaxCommands = 1024; 60 | }; 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /util/cuda/cudaMappedMemory.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #ifndef __CUDA_MAPPED_MEMORY_H_ 6 | #define __CUDA_MAPPED_MEMORY_H_ 7 | 8 | 9 | #include "cudaUtility.h" 10 | 11 | 12 | /** 13 | * Allocate ZeroCopy mapped memory, shared between CUDA and CPU. 14 | * @ingroup util 15 | */ 16 | inline bool cudaAllocMapped( void** cpuPtr, void** gpuPtr, size_t size ) 17 | { 18 | if( !cpuPtr || !gpuPtr || size == 0 ) 19 | return false; 20 | 21 | //CUDA(cudaSetDeviceFlags(cudaDeviceMapHost)); 22 | 23 | if( CUDA_FAILED(cudaHostAlloc(cpuPtr, size, cudaHostAllocMapped)) ) 24 | return false; 25 | 26 | if( CUDA_FAILED(cudaHostGetDevicePointer(gpuPtr, *cpuPtr, 0)) ) 27 | return false; 28 | 29 | memset(*cpuPtr, 0, size); 30 | printf("[cuda] cudaAllocMapped %zu bytes, CPU %p GPU %p\n", size, *cpuPtr, *gpuPtr); 31 | return true; 32 | } 33 | 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /util/cuda/cudaNormalize.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #include "cudaNormalize.h" 6 | 7 | 8 | 9 | // gpuNormalize 10 | template 11 | __global__ void gpuNormalize( T* input, T* output, int width, int height, float scaling_factor ) 12 | { 13 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 14 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 15 | 16 | if( x >= width || y >= height ) 17 | return; 18 | 19 | const T px = input[ y * width + x ]; 20 | 21 | output[y*width+x] = make_float4(px.x * scaling_factor, 22 | px.y * scaling_factor, 23 | px.z * scaling_factor, 24 | px.w * scaling_factor); 25 | } 26 | 27 | 28 | // cudaNormalizeRGBA 29 | cudaError_t cudaNormalizeRGBA( float4* input, const float2& input_range, 30 | float4* output, const float2& output_range, 31 | size_t width, size_t height ) 32 | { 33 | if( !input || !output ) 34 | return cudaErrorInvalidDevicePointer; 35 | 36 | if( width == 0 || height == 0 ) 37 | return cudaErrorInvalidValue; 38 | 39 | const float multiplier = output_range.y / input_range.y; 40 | 41 | // launch kernel 42 | const dim3 blockDim(8, 8); 43 | const dim3 gridDim(iDivUp(width,blockDim.x), iDivUp(height,blockDim.y)); 44 | 45 | gpuNormalize<<>>(input, output, width, height, multiplier); 46 | 47 | return CUDA(cudaGetLastError()); 48 | } 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /util/cuda/cudaNormalize.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #ifndef __CUDA_NORMALIZE_H__ 6 | #define __CUDA_NORMALIZE_H__ 7 | 8 | 9 | #include "cudaUtility.h" 10 | 11 | 12 | /** 13 | * Rebase the pixel intensities of an image between two scales. 14 | * For example, convert an image with values 0.0-255 to 0.0-1.0. 15 | * @ingroup util 16 | */ 17 | cudaError_t cudaNormalizeRGBA( float4* input, const float2& input_range, 18 | float4* output, const float2& output_range, 19 | size_t width, size_t height ); 20 | 21 | #endif 22 | 23 | -------------------------------------------------------------------------------- /util/cuda/cudaOverlay.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * http://github.com/dusty-nv/jetson-inference 3 | */ 4 | 5 | #include "cudaOverlay.h" 6 | 7 | 8 | static inline __device__ __host__ bool eq_less( float a, float b, float epsilon ) 9 | { 10 | return (a > (b - epsilon) && a < (b + epsilon)) ? true : false; 11 | } 12 | 13 | template 14 | __global__ void gpuRectOutlines( T* input, T* output, int width, int height, 15 | float4* rects, int numRects, float4 color ) 16 | { 17 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 18 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 19 | 20 | if( x >= width || y >= height ) 21 | return; 22 | 23 | const T px_in = input[ y * width + x ]; 24 | T px_out = px_in; 25 | 26 | const float fx = x; 27 | const float fy = y; 28 | 29 | const float thick = 10.0f; 30 | const float alpha = color.w / 255.0f; 31 | const float ialph = 1.0f - alpha; 32 | 33 | for( int nr=0; nr < numRects; nr++ ) 34 | { 35 | const float4 r = rects[nr]; 36 | 37 | //printf("%i %i %i %f %f %f %f\n", numRects, x, y, r.x, r.y, r.z, r.w); 38 | 39 | if( fy >= r.y && fy <= r.w /*&& (eq_less(fx, r.x, ep) || eq_less(fx, r.z, ep))*/ ) 40 | { 41 | if( fx >= r.x && fx <= r.z /*&& (eq_less(fy, r.y, ep) || eq_less(fy, r.w, ep))*/ ) 42 | { 43 | //printf("cuda rect %i %i\n", x, y); 44 | 45 | px_out.x = alpha * color.x + ialph * px_out.x; 46 | px_out.y = alpha * color.y + ialph * px_out.y; 47 | px_out.z = alpha * color.z + ialph * px_out.z; 48 | } 49 | } 50 | } 51 | 52 | output[y * width + x] = px_out; 53 | } 54 | 55 | 56 | cudaError_t cudaRectOutlineOverlay( float4* input, float4* output, uint32_t width, uint32_t height, float4* boundingBoxes, int numBoxes, const float4& color ) 57 | { 58 | if( !input || !output || width == 0 || height == 0 || !boundingBoxes || numBoxes == 0 ) 59 | return cudaErrorInvalidValue; 60 | 61 | // launch kernel 62 | const dim3 blockDim(8, 8); 63 | const dim3 gridDim(iDivUp(width,blockDim.x), iDivUp(height,blockDim.y)); 64 | 65 | gpuRectOutlines<<>>(input, output, width, height, boundingBoxes, numBoxes, color); 66 | 67 | return cudaGetLastError(); 68 | } 69 | -------------------------------------------------------------------------------- /util/cuda/cudaOverlay.h: -------------------------------------------------------------------------------- 1 | /* 2 | * http://github.com/dusty-nv/jetson-inference 3 | */ 4 | 5 | #ifndef __CUDA_OVERLAY_H__ 6 | #define __CUDA_OVERLAY_H__ 7 | 8 | #include "cudaUtility.h" 9 | 10 | 11 | /** 12 | * cudaRectOutlineOverlay 13 | * @ingroup util 14 | */ 15 | cudaError_t cudaRectOutlineOverlay( float4* input, float4* output, uint32_t width, uint32_t height, float4* boundingBoxes, int numBoxes, const float4& color ); 16 | 17 | 18 | /** 19 | * cudaRectFillOverlay 20 | * @ingroup util 21 | */ 22 | //cudaError_t cudaRectFillOverlay( float4* input, float4* output, uint32_t width, uint32_t height, float4* boundingBoxes, int numBoxes, const float4& color ); 23 | 24 | 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /util/cuda/cudaRGB.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * http://github.com/dusty-nv 3 | */ 4 | 5 | #include "cudaRGB.h" 6 | 7 | //------------------------------------------------------------------------------------------------------------------------- 8 | 9 | __global__ void RGBToRGBAf(uchar3* srcImage, 10 | float4* dstImage, 11 | uint32_t width, uint32_t height) 12 | { 13 | int x, y, pixel; 14 | 15 | x = (blockIdx.x * blockDim.x) + threadIdx.x; 16 | y = (blockIdx.y * blockDim.y) + threadIdx.y; 17 | 18 | pixel = y * width + x; 19 | 20 | if (x >= width) 21 | return; 22 | 23 | if (y >= height) 24 | return; 25 | 26 | // printf("cuda thread %i %i %i %i pixel %i \n", x, y, width, height, pixel); 27 | 28 | const float s = 1.0f; 29 | const uchar3 px = srcImage[pixel]; 30 | 31 | dstImage[pixel] = make_float4(px.x * s, px.y * s, px.z * s, 255.0f * s); 32 | } 33 | 34 | cudaError_t cudaRGBToRGBAf( uchar3* srcDev, float4* destDev, size_t width, size_t height ) 35 | { 36 | if( !srcDev || !destDev ) 37 | return cudaErrorInvalidDevicePointer; 38 | 39 | const dim3 blockDim(8,8,1); 40 | const dim3 gridDim(iDivUp(width,blockDim.x), iDivUp(height,blockDim.y), 1); 41 | 42 | RGBToRGBAf<<>>( srcDev, destDev, width, height ); 43 | 44 | return CUDA(cudaGetLastError()); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /util/cuda/cudaRGB.h: -------------------------------------------------------------------------------- 1 | /* 2 | * http://github.com/dusty-nv/jetson-inference 3 | */ 4 | 5 | #ifndef __CUDA_RGB_CONVERT_H 6 | #define __CUDA_RGB_CONVERT_H 7 | 8 | 9 | #include "cudaUtility.h" 10 | #include 11 | 12 | 13 | /** 14 | * Convert 8-bit fixed-point RGB image to 32-bit floating-point RGBA image 15 | * @ingroup util 16 | */ 17 | cudaError_t cudaRGBToRGBAf( uchar3* input, float4* output, size_t width, size_t height ); 18 | 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /util/cuda/cudaResize.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #include "cudaResize.h" 6 | 7 | 8 | 9 | // gpuResample 10 | template 11 | __global__ void gpuResize( float2 scale, T* input, int iWidth, T* output, int oWidth, int oHeight ) 12 | { 13 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 14 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 15 | 16 | if( x >= oWidth || y >= oHeight ) 17 | return; 18 | 19 | const int dx = ((float)x * scale.x); 20 | const int dy = ((float)y * scale.y); 21 | 22 | const T px = input[ dy * iWidth + dx ]; 23 | 24 | output[y*oWidth+x] = px; 25 | } 26 | 27 | 28 | // cudaResize 29 | cudaError_t cudaResize( float* input, size_t inputWidth, size_t inputHeight, 30 | float* output, size_t outputWidth, size_t outputHeight ) 31 | { 32 | if( !input || !output ) 33 | return cudaErrorInvalidDevicePointer; 34 | 35 | if( inputWidth == 0 || outputWidth == 0 || inputHeight == 0 || outputHeight == 0 ) 36 | return cudaErrorInvalidValue; 37 | 38 | const float2 scale = make_float2( float(inputWidth) / float(outputWidth), 39 | float(inputHeight) / float(outputHeight) ); 40 | 41 | // launch kernel 42 | const dim3 blockDim(8, 8); 43 | const dim3 gridDim(iDivUp(outputWidth,blockDim.x), iDivUp(outputHeight,blockDim.y)); 44 | 45 | gpuResize<<>>(scale, input, inputWidth, output, outputWidth, outputHeight); 46 | 47 | return CUDA(cudaGetLastError()); 48 | } 49 | 50 | 51 | // cudaResizeRGBA 52 | cudaError_t cudaResizeRGBA( float4* input, size_t inputWidth, size_t inputHeight, 53 | float4* output, size_t outputWidth, size_t outputHeight ) 54 | { 55 | if( !input || !output ) 56 | return cudaErrorInvalidDevicePointer; 57 | 58 | if( inputWidth == 0 || outputWidth == 0 || inputHeight == 0 || outputHeight == 0 ) 59 | return cudaErrorInvalidValue; 60 | 61 | const float2 scale = make_float2( float(inputWidth) / float(outputWidth), 62 | float(inputHeight) / float(outputHeight) ); 63 | 64 | // launch kernel 65 | const dim3 blockDim(8, 8); 66 | const dim3 gridDim(iDivUp(outputWidth,blockDim.x), iDivUp(outputHeight,blockDim.y)); 67 | 68 | gpuResize<<>>(scale, input, inputWidth, output, outputWidth, outputHeight); 69 | 70 | return CUDA(cudaGetLastError()); 71 | } 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /util/cuda/cudaResize.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #ifndef __CUDA_RESIZE_H__ 6 | #define __CUDA_RESIZE_H__ 7 | 8 | 9 | #include "cudaUtility.h" 10 | 11 | 12 | /** 13 | * Function for increasing or decreasing the size of an image on the GPU. 14 | * @ingroup util 15 | */ 16 | cudaError_t cudaResize( float* input, size_t inputWidth, size_t inputHeight, 17 | float* output, size_t outputWidth, size_t outputHeight ); 18 | 19 | 20 | /** 21 | * Function for increasing or decreasing the size of an image on the GPU. 22 | * @ingroup util 23 | */ 24 | cudaError_t cudaResizeRGBA( float4* input, size_t inputWidth, size_t inputHeight, 25 | float4* output, size_t outputWidth, size_t outputHeight ); 26 | 27 | 28 | 29 | 30 | #endif 31 | 32 | -------------------------------------------------------------------------------- /util/cuda/cudaUtility.h: -------------------------------------------------------------------------------- 1 | /* 2 | * http://github.com/dusty-nv/jetson-inference 3 | */ 4 | 5 | #ifndef __CUDA_UTILITY_H_ 6 | #define __CUDA_UTILITY_H_ 7 | 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | 15 | /** 16 | * Execute a CUDA call and print out any errors 17 | * @return the original cudaError_t result 18 | * @ingroup util 19 | */ 20 | #define CUDA(x) cudaCheckError((x), #x, __FILE__, __LINE__) 21 | 22 | /** 23 | * Evaluates to true on success 24 | * @ingroup util 25 | */ 26 | #define CUDA_SUCCESS(x) (CUDA(x) == cudaSuccess) 27 | 28 | /** 29 | * Evaluates to true on failure 30 | * @ingroup util 31 | */ 32 | #define CUDA_FAILED(x) (CUDA(x) != cudaSuccess) 33 | 34 | /** 35 | * Return from the boolean function if CUDA call fails 36 | * @ingroup util 37 | */ 38 | #define CUDA_VERIFY(x) if(CUDA_FAILED(x)) return false; 39 | 40 | /** 41 | * LOG_CUDA string. 42 | * @ingroup util 43 | */ 44 | #define LOG_CUDA "[cuda] " 45 | 46 | /* 47 | * define this if you want all cuda calls to be printed 48 | */ 49 | //#define CUDA_TRACE 50 | 51 | 52 | 53 | /** 54 | * cudaCheckError 55 | * @ingroup util 56 | */ 57 | inline cudaError_t cudaCheckError(cudaError_t retval, const char* txt, const char* file, int line ) 58 | { 59 | #if !defined(CUDA_TRACE) 60 | if( retval == cudaSuccess) 61 | return cudaSuccess; 62 | #endif 63 | 64 | //int activeDevice = -1; 65 | //cudaGetDevice(&activeDevice); 66 | 67 | //Log("[cuda] device %i - %s\n", activeDevice, txt); 68 | 69 | printf(LOG_CUDA "%s\n", txt); 70 | 71 | 72 | if( retval != cudaSuccess ) 73 | { 74 | printf(LOG_CUDA " %s (error %u) (hex 0x%02X)\n", cudaGetErrorString(retval), retval, retval); 75 | printf(LOG_CUDA " %s:%i\n", file, line); 76 | } 77 | 78 | return retval; 79 | } 80 | 81 | 82 | /** 83 | * iDivUp 84 | * @ingroup util 85 | */ 86 | inline __device__ __host__ int iDivUp( int a, int b ) { return (a % b != 0) ? (a / b + 1) : (a / b); } 87 | 88 | 89 | 90 | #endif 91 | -------------------------------------------------------------------------------- /util/cuda/cudaYUV-NV12.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #include "cudaYUV.h" 6 | 7 | 8 | #define COLOR_COMPONENT_MASK 0x3FF 9 | #define COLOR_COMPONENT_BIT_SIZE 10 10 | 11 | #define FIXED_DECIMAL_POINT 24 12 | #define FIXED_POINT_MULTIPLIER 1.0f 13 | #define FIXED_COLOR_COMPONENT_MASK 0xffffffff 14 | 15 | #define MUL(x,y) (x*y) 16 | 17 | 18 | 19 | __constant__ uint32_t constAlpha; 20 | __constant__ float constHueColorSpaceMat[9]; 21 | 22 | 23 | 24 | __device__ void YUV2RGB(uint32_t *yuvi, float *red, float *green, float *blue) 25 | { 26 | 27 | 28 | // Prepare for hue adjustment 29 | /* 30 | float luma, chromaCb, chromaCr; 31 | 32 | luma = (float)yuvi[0]; 33 | chromaCb = (float)((int)yuvi[1] - 512.0f); 34 | chromaCr = (float)((int)yuvi[2] - 512.0f); 35 | 36 | // Convert YUV To RGB with hue adjustment 37 | *red = MUL(luma, constHueColorSpaceMat[0]) + 38 | MUL(chromaCb, constHueColorSpaceMat[1]) + 39 | MUL(chromaCr, constHueColorSpaceMat[2]); 40 | *green= MUL(luma, constHueColorSpaceMat[3]) + 41 | MUL(chromaCb, constHueColorSpaceMat[4]) + 42 | MUL(chromaCr, constHueColorSpaceMat[5]); 43 | *blue = MUL(luma, constHueColorSpaceMat[6]) + 44 | MUL(chromaCb, constHueColorSpaceMat[7]) + 45 | MUL(chromaCr, constHueColorSpaceMat[8]);*/ 46 | 47 | const float luma = float(yuvi[0]); 48 | const float u = float(yuvi[1]) - 512.0f; 49 | const float v = float(yuvi[2]) - 512.0f; 50 | 51 | /*R = Y + 1.140V 52 | G = Y - 0.395U - 0.581V 53 | B = Y + 2.032U*/ 54 | 55 | /**green = luma + 1.140f * v; 56 | *blue = luma - 0.395f * u - 0.581f * v; 57 | *red = luma + 2.032f * u;*/ 58 | 59 | *red = luma + 1.140f * v; 60 | *green = luma - 0.395f * u - 0.581f * v; 61 | *blue = luma + 2.032f * u; 62 | } 63 | 64 | 65 | __device__ uint32_t RGBAPACK_8bit(float red, float green, float blue, uint32_t alpha) 66 | { 67 | uint32_t ARGBpixel = 0; 68 | 69 | // Clamp final 10 bit results 70 | red = min(max(red, 0.0f), 255.0f); 71 | green = min(max(green, 0.0f), 255.0f); 72 | blue = min(max(blue, 0.0f), 255.0f); 73 | 74 | // Convert to 8 bit unsigned integers per color component 75 | ARGBpixel = ((((uint32_t)red) << 24) | 76 | (((uint32_t)green) << 16) | 77 | (((uint32_t)blue) << 8) | (uint32_t)alpha); 78 | 79 | return ARGBpixel; 80 | } 81 | 82 | 83 | __device__ uint32_t RGBAPACK_10bit(float red, float green, float blue, uint32_t alpha) 84 | { 85 | uint32_t ARGBpixel = 0; 86 | 87 | // Clamp final 10 bit results 88 | red = min(max(red, 0.0f), 1023.f); 89 | green = min(max(green, 0.0f), 1023.f); 90 | blue = min(max(blue, 0.0f), 1023.f); 91 | 92 | // Convert to 8 bit unsigned integers per color component 93 | ARGBpixel = ((((uint32_t)red >> 2) << 24) | 94 | (((uint32_t)green >> 2) << 16) | 95 | (((uint32_t)blue >> 2) << 8) | (uint32_t)alpha); 96 | 97 | return ARGBpixel; 98 | } 99 | 100 | 101 | // CUDA kernel for outputing the final ARGB output from NV12; 102 | /*extern "C"*/ 103 | __global__ void Passthru(uint32_t *srcImage, size_t nSourcePitch, 104 | uint32_t *dstImage, size_t nDestPitch, 105 | uint32_t width, uint32_t height) 106 | { 107 | int x, y; 108 | uint32_t yuv101010Pel[2]; 109 | uint32_t processingPitch = ((width) + 63) & ~63; 110 | uint32_t dstImagePitch = nDestPitch >> 2; 111 | uint8_t *srcImageU8 = (uint8_t *)srcImage; 112 | 113 | processingPitch = nSourcePitch; 114 | 115 | // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread 116 | x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1); 117 | y = blockIdx.y * blockDim.y + threadIdx.y; 118 | 119 | if (x >= width) 120 | return; //x = width - 1; 121 | 122 | if (y >= height) 123 | return; // y = height - 1; 124 | 125 | // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way. 126 | // if we move to texture we could read 4 luminance values 127 | yuv101010Pel[0] = (srcImageU8[y * processingPitch + x ]); 128 | yuv101010Pel[1] = (srcImageU8[y * processingPitch + x + 1]); 129 | 130 | // this steps performs the color conversion 131 | float luma[2]; 132 | 133 | luma[0] = (yuv101010Pel[0] & 0x00FF); 134 | luma[1] = (yuv101010Pel[1] & 0x00FF); 135 | 136 | // Clamp the results to RGBA 137 | dstImage[y * dstImagePitch + x ] = RGBAPACK_8bit(luma[0], luma[0], luma[0], constAlpha); 138 | dstImage[y * dstImagePitch + x + 1 ] = RGBAPACK_8bit(luma[1], luma[1], luma[1], constAlpha); 139 | } 140 | 141 | 142 | // CUDA kernel for outputing the final ARGB output from NV12; 143 | /*extern "C"*/ 144 | __global__ void NV12ToARGB(uint32_t *srcImage, size_t nSourcePitch, 145 | uint32_t *dstImage, size_t nDestPitch, 146 | uint32_t width, uint32_t height) 147 | { 148 | int x, y; 149 | uint32_t yuv101010Pel[2]; 150 | uint32_t processingPitch = ((width) + 63) & ~63; 151 | uint32_t dstImagePitch = nDestPitch >> 2; 152 | uint8_t *srcImageU8 = (uint8_t *)srcImage; 153 | 154 | processingPitch = nSourcePitch; 155 | 156 | // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread 157 | x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1); 158 | y = blockIdx.y * blockDim.y + threadIdx.y; 159 | 160 | if (x >= width) 161 | return; //x = width - 1; 162 | 163 | if (y >= height) 164 | return; // y = height - 1; 165 | 166 | // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way. 167 | // if we move to texture we could read 4 luminance values 168 | yuv101010Pel[0] = (srcImageU8[y * processingPitch + x ]) << 2; 169 | yuv101010Pel[1] = (srcImageU8[y * processingPitch + x + 1]) << 2; 170 | 171 | uint32_t chromaOffset = processingPitch * height; 172 | int y_chroma = y >> 1; 173 | 174 | if (y & 1) // odd scanline ? 175 | { 176 | uint32_t chromaCb; 177 | uint32_t chromaCr; 178 | 179 | chromaCb = srcImageU8[chromaOffset + y_chroma * processingPitch + x ]; 180 | chromaCr = srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1]; 181 | 182 | if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically 183 | { 184 | chromaCb = (chromaCb + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x ] + 1) >> 1; 185 | chromaCr = (chromaCr + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x + 1] + 1) >> 1; 186 | } 187 | 188 | yuv101010Pel[0] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2)); 189 | yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); 190 | 191 | yuv101010Pel[1] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2)); 192 | yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); 193 | } 194 | else 195 | { 196 | yuv101010Pel[0] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x ] << (COLOR_COMPONENT_BIT_SIZE + 2)); 197 | yuv101010Pel[0] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); 198 | 199 | yuv101010Pel[1] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x ] << (COLOR_COMPONENT_BIT_SIZE + 2)); 200 | yuv101010Pel[1] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); 201 | } 202 | 203 | // this steps performs the color conversion 204 | uint32_t yuvi[6]; 205 | float red[2], green[2], blue[2]; 206 | 207 | yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK); 208 | yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); 209 | yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); 210 | 211 | yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK); 212 | yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); 213 | yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); 214 | 215 | // YUV to RGB Transformation conversion 216 | YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]); 217 | YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]); 218 | 219 | // Clamp the results to RGBA 220 | dstImage[y * dstImagePitch + x ] = RGBAPACK_10bit(red[0], green[0], blue[0], constAlpha); 221 | dstImage[y * dstImagePitch + x + 1 ] = RGBAPACK_10bit(red[1], green[1], blue[1], constAlpha); 222 | } 223 | 224 | 225 | bool nv12ColorspaceSetup = false; 226 | 227 | 228 | // cudaNV12ToARGB32 229 | cudaError_t cudaNV12ToRGBA( uint8_t* srcDev, size_t srcPitch, uchar4* destDev, size_t destPitch, size_t width, size_t height ) 230 | { 231 | if( !srcDev || !destDev ) 232 | return cudaErrorInvalidDevicePointer; 233 | 234 | if( srcPitch == 0 || destPitch == 0 || width == 0 || height == 0 ) 235 | return cudaErrorInvalidValue; 236 | 237 | if( !nv12ColorspaceSetup ) 238 | cudaNV12SetupColorspace(); 239 | 240 | const dim3 blockDim(32,16,1); 241 | const dim3 gridDim((width+(2*blockDim.x-1))/(2*blockDim.x), (height+(blockDim.y-1))/blockDim.y, 1); 242 | 243 | NV12ToARGB<<>>( (uint32_t*)srcDev, srcPitch, (uint32_t*)destDev, destPitch, width, height ); 244 | 245 | return CUDA(cudaGetLastError()); 246 | } 247 | 248 | cudaError_t cudaNV12ToRGBA( uint8_t* srcDev, uchar4* destDev, size_t width, size_t height ) 249 | { 250 | return cudaNV12ToRGBA(srcDev, width * sizeof(uint8_t), destDev, width * sizeof(uchar4), width, height); 251 | } 252 | 253 | 254 | //------------------------------------------------------------------------------------------------------------------------- 255 | 256 | __global__ void NV12ToRGBAf(uint32_t* srcImage, size_t nSourcePitch, 257 | float4* dstImage, size_t nDestPitch, 258 | uint32_t width, uint32_t height) 259 | { 260 | int x, y; 261 | uint32_t yuv101010Pel[2]; 262 | uint32_t processingPitch = ((width) + 63) & ~63; 263 | uint8_t *srcImageU8 = (uint8_t *)srcImage; 264 | 265 | processingPitch = nSourcePitch; 266 | 267 | // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread 268 | x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1); 269 | y = blockIdx.y * blockDim.y + threadIdx.y; 270 | 271 | if (x >= width) 272 | return; //x = width - 1; 273 | 274 | if (y >= height) 275 | return; // y = height - 1; 276 | 277 | #if 1 278 | // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way. 279 | // if we move to texture we could read 4 luminance values 280 | yuv101010Pel[0] = (srcImageU8[y * processingPitch + x ]) << 2; 281 | yuv101010Pel[1] = (srcImageU8[y * processingPitch + x + 1]) << 2; 282 | 283 | uint32_t chromaOffset = processingPitch * height; 284 | int y_chroma = y >> 1; 285 | 286 | if (y & 1) // odd scanline ? 287 | { 288 | uint32_t chromaCb; 289 | uint32_t chromaCr; 290 | 291 | chromaCb = srcImageU8[chromaOffset + y_chroma * processingPitch + x ]; 292 | chromaCr = srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1]; 293 | 294 | if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically 295 | { 296 | chromaCb = (chromaCb + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x ] + 1) >> 1; 297 | chromaCr = (chromaCr + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x + 1] + 1) >> 1; 298 | } 299 | 300 | yuv101010Pel[0] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2)); 301 | yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); 302 | 303 | yuv101010Pel[1] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2)); 304 | yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); 305 | } 306 | else 307 | { 308 | yuv101010Pel[0] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x ] << (COLOR_COMPONENT_BIT_SIZE + 2)); 309 | yuv101010Pel[0] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); 310 | 311 | yuv101010Pel[1] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x ] << (COLOR_COMPONENT_BIT_SIZE + 2)); 312 | yuv101010Pel[1] |= ((uint32_t)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); 313 | } 314 | 315 | // this steps performs the color conversion 316 | uint32_t yuvi[6]; 317 | float red[2], green[2], blue[2]; 318 | 319 | yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK); 320 | yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); 321 | yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); 322 | 323 | yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK); 324 | yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); 325 | yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); 326 | 327 | // YUV to RGB Transformation conversion 328 | YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]); 329 | YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]); 330 | 331 | // Clamp the results to RGBA 332 | //printf("cuda thread %i %i %f %f %f\n", x, y, red[0], green[0], blue[0]); 333 | 334 | const float s = 1.0f / 1024.0f * 255.0f; 335 | 336 | dstImage[y * width + x] = make_float4(red[0] * s, green[0] * s, blue[0] * s, 1.0f); 337 | dstImage[y * width + x + 1] = make_float4(red[1] * s, green[1] * s, blue[1] * s, 1.0f); 338 | #else 339 | //printf("cuda thread %i %i %i %i \n", x, y, width, height); 340 | 341 | dstImage[y * width + x] = make_float4(1.0f, 0.0f, 0.0f, 1.0f); 342 | dstImage[y * width + x + 1] = make_float4(1.0f, 0.0f, 0.0f, 1.0f); 343 | #endif 344 | } 345 | 346 | 347 | 348 | // cudaNV12ToRGBA 349 | cudaError_t cudaNV12ToRGBAf( uint8_t* srcDev, size_t srcPitch, float4* destDev, size_t destPitch, size_t width, size_t height ) 350 | { 351 | if( !srcDev || !destDev ) 352 | return cudaErrorInvalidDevicePointer; 353 | 354 | if( srcPitch == 0 || destPitch == 0 || width == 0 || height == 0 ) 355 | return cudaErrorInvalidValue; 356 | 357 | if( !nv12ColorspaceSetup ) 358 | cudaNV12SetupColorspace(); 359 | 360 | const dim3 blockDim(8,8,1); 361 | //const dim3 gridDim((width+(2*blockDim.x-1))/(2*blockDim.x), (height+(blockDim.y-1))/blockDim.y, 1); 362 | const dim3 gridDim(iDivUp(width,blockDim.x), iDivUp(height, blockDim.y), 1); 363 | 364 | NV12ToRGBAf<<>>( (uint32_t*)srcDev, srcPitch, destDev, destPitch, width, height ); 365 | 366 | return CUDA(cudaGetLastError()); 367 | } 368 | 369 | cudaError_t cudaNV12ToRGBAf( uint8_t* srcDev, float4* destDev, size_t width, size_t height ) 370 | { 371 | return cudaNV12ToRGBAf(srcDev, width * sizeof(uint8_t), destDev, width * sizeof(float4), width, height); 372 | } 373 | 374 | 375 | // cudaNV12SetupColorspace 376 | cudaError_t cudaNV12SetupColorspace( float hue ) 377 | { 378 | const float hueSin = sin(hue); 379 | const float hueCos = cos(hue); 380 | 381 | float hueCSC[9]; 382 | 383 | const bool itu601 = false; 384 | 385 | if( itu601 /*CSC == ITU601*/) 386 | { 387 | //CCIR 601 388 | hueCSC[0] = 1.1644f; 389 | hueCSC[1] = hueSin * 1.5960f; 390 | hueCSC[2] = hueCos * 1.5960f; 391 | hueCSC[3] = 1.1644f; 392 | hueCSC[4] = (hueCos * -0.3918f) - (hueSin * 0.8130f); 393 | hueCSC[5] = (hueSin * 0.3918f) - (hueCos * 0.8130f); 394 | hueCSC[6] = 1.1644f; 395 | hueCSC[7] = hueCos * 2.0172f; 396 | hueCSC[8] = hueSin * -2.0172f; 397 | } 398 | else /*if(CSC == ITU709)*/ 399 | { 400 | //CCIR 709 401 | hueCSC[0] = 1.0f; 402 | hueCSC[1] = hueSin * 1.57480f; 403 | hueCSC[2] = hueCos * 1.57480f; 404 | hueCSC[3] = 1.0; 405 | hueCSC[4] = (hueCos * -0.18732f) - (hueSin * 0.46812f); 406 | hueCSC[5] = (hueSin * 0.18732f) - (hueCos * 0.46812f); 407 | hueCSC[6] = 1.0f; 408 | hueCSC[7] = hueCos * 1.85560f; 409 | hueCSC[8] = hueSin * -1.85560f; 410 | } 411 | 412 | 413 | if( CUDA_FAILED(cudaMemcpyToSymbol(constHueColorSpaceMat, hueCSC, sizeof(float) * 9)) ) 414 | return cudaErrorInvalidSymbol; 415 | 416 | uint32_t cudaAlpha = ((uint32_t)0xff<< 24); 417 | 418 | if( CUDA_FAILED(cudaMemcpyToSymbol(constAlpha, &cudaAlpha, sizeof(uint32_t))) ) 419 | return cudaErrorInvalidSymbol; 420 | 421 | nv12ColorspaceSetup = true; 422 | return cudaSuccess; 423 | } 424 | 425 | -------------------------------------------------------------------------------- /util/cuda/cudaYUV-YUYV.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * http://github.com/dusty-nv/jetson-inference 3 | */ 4 | 5 | #include "cudaYUV.h" 6 | 7 | 8 | inline __device__ __host__ float clamp(float f, float a, float b) 9 | { 10 | return fmaxf(a, fminf(f, b)); 11 | } 12 | 13 | 14 | /* From RGB to YUV 15 | 16 | Y = 0.299R + 0.587G + 0.114B 17 | U = 0.492 (B-Y) 18 | V = 0.877 (R-Y) 19 | 20 | It can also be represented as: 21 | 22 | Y = 0.299R + 0.587G + 0.114B 23 | U = -0.147R - 0.289G + 0.436B 24 | V = 0.615R - 0.515G - 0.100B 25 | 26 | From YUV to RGB 27 | 28 | R = Y + 1.140V 29 | G = Y - 0.395U - 0.581V 30 | B = Y + 2.032U 31 | */ 32 | 33 | struct __align__(8) uchar8 34 | { 35 | uint8_t a0, a1, a2, a3, a4, a5, a6, a7; 36 | }; 37 | static __host__ __device__ __forceinline__ uchar8 make_uchar8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7) 38 | { 39 | uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; 40 | return val; 41 | } 42 | 43 | 44 | //----------------------------------------------------------------------------------- 45 | // YUYV/UYVY to RGBA 46 | //----------------------------------------------------------------------------------- 47 | template 48 | __global__ void yuyvToRgba( uchar4* src, int srcAlignedWidth, uchar8* dst, int dstAlignedWidth, int width, int height ) 49 | { 50 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 51 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 52 | 53 | if( x >= srcAlignedWidth || y >= height ) 54 | return; 55 | 56 | const uchar4 macroPx = src[y * srcAlignedWidth + x]; 57 | 58 | // Y0 is the brightness of pixel 0, Y1 the brightness of pixel 1. 59 | // U0 and V0 is the color of both pixels. 60 | // UYVY [ U0 | Y0 | V0 | Y1 ] 61 | // YUYV [ Y0 | U0 | Y1 | V0 ] 62 | const float y0 = formatUYVY ? macroPx.y : macroPx.x; 63 | const float y1 = formatUYVY ? macroPx.w : macroPx.z; 64 | const float u = (formatUYVY ? macroPx.x : macroPx.y) - 128.0f; 65 | const float v = (formatUYVY ? macroPx.z : macroPx.w) - 128.0f; 66 | 67 | const float4 px0 = make_float4( y0 + 1.4065f * v, 68 | y0 - 0.3455f * u - 0.7169f * v, 69 | y0 + 1.7790f * u, 255.0f ); 70 | 71 | const float4 px1 = make_float4( y1 + 1.4065f * v, 72 | y1 - 0.3455f * u - 0.7169f * v, 73 | y1 + 1.7790f * u, 255.0f ); 74 | 75 | dst[y * dstAlignedWidth + x] = make_uchar8( clamp(px0.x, 0.0f, 255.0f), 76 | clamp(px0.y, 0.0f, 255.0f), 77 | clamp(px0.z, 0.0f, 255.0f), 78 | clamp(px0.w, 0.0f, 255.0f), 79 | clamp(px1.x, 0.0f, 255.0f), 80 | clamp(px1.y, 0.0f, 255.0f), 81 | clamp(px1.z, 0.0f, 255.0f), 82 | clamp(px1.w, 0.0f, 255.0f) ); 83 | } 84 | 85 | template 86 | cudaError_t launchYUYV( uchar2* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height) 87 | { 88 | if( !input || !inputPitch || !output || !outputPitch || !width || !height ) 89 | return cudaErrorInvalidValue; 90 | 91 | const dim3 block(8,8); 92 | const dim3 grid(iDivUp(width/2, block.x), iDivUp(height, block.y)); 93 | 94 | const int srcAlignedWidth = inputPitch / sizeof(uchar4); // normally would be uchar2, but we're doubling up pixels 95 | const int dstAlignedWidth = outputPitch / sizeof(uchar8); // normally would be uchar4 ^^^ 96 | 97 | //printf("yuyvToRgba %zu %zu %i %i %i %i %i\n", width, height, (int)formatUYVY, srcAlignedWidth, dstAlignedWidth, grid.x, grid.y); 98 | 99 | yuyvToRgba<<>>((uchar4*)input, srcAlignedWidth, (uchar8*)output, dstAlignedWidth, width, height); 100 | 101 | return CUDA(cudaGetLastError()); 102 | } 103 | 104 | 105 | cudaError_t cudaUYVYToRGBA( uchar2* input, uchar4* output, size_t width, size_t height ) 106 | { 107 | return cudaUYVYToRGBA(input, width * sizeof(uchar2), output, width * sizeof(uchar4), width, height); 108 | } 109 | 110 | cudaError_t cudaUYVYToRGBA( uchar2* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height ) 111 | { 112 | return launchYUYV(input, inputPitch, output, outputPitch, width, height); 113 | } 114 | 115 | cudaError_t cudaYUYVToRGBA( uchar2* input, uchar4* output, size_t width, size_t height ) 116 | { 117 | return cudaYUYVToRGBA(input, width * sizeof(uchar2), output, width * sizeof(uchar4), width, height); 118 | } 119 | 120 | cudaError_t cudaYUYVToRGBA( uchar2* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height ) 121 | { 122 | return launchYUYV(input, inputPitch, output, outputPitch, width, height); 123 | } 124 | 125 | 126 | //----------------------------------------------------------------------------------- 127 | // YUYV/UYVY to grayscale 128 | //----------------------------------------------------------------------------------- 129 | 130 | template 131 | __global__ void yuyvToGray( uchar4* src, int srcAlignedWidth, float2* dst, int dstAlignedWidth, int width, int height ) 132 | { 133 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 134 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 135 | 136 | if( x >= srcAlignedWidth || y >= height ) 137 | return; 138 | 139 | const uchar4 macroPx = src[y * srcAlignedWidth + x]; 140 | 141 | const float y0 = formatUYVY ? macroPx.y : macroPx.x; 142 | const float y1 = formatUYVY ? macroPx.w : macroPx.z; 143 | 144 | dst[y * dstAlignedWidth + x] = make_float2(y0/255.0f, y1/255.0f); 145 | } 146 | 147 | template 148 | cudaError_t launchGrayYUYV( uchar2* input, size_t inputPitch, float* output, size_t outputPitch, size_t width, size_t height) 149 | { 150 | if( !input || !inputPitch || !output || !outputPitch || !width || !height ) 151 | return cudaErrorInvalidValue; 152 | 153 | const dim3 block(8,8); 154 | const dim3 grid(iDivUp(width/2, block.x), iDivUp(height, block.y)); 155 | 156 | const int srcAlignedWidth = inputPitch / sizeof(uchar4); // normally would be uchar2, but we're doubling up pixels 157 | const int dstAlignedWidth = outputPitch / sizeof(float2); // normally would be float ^^^ 158 | 159 | yuyvToGray<<>>((uchar4*)input, srcAlignedWidth, (float2*)output, dstAlignedWidth, width, height); 160 | 161 | return CUDA(cudaGetLastError()); 162 | } 163 | 164 | cudaError_t cudaUYVYToGray( uchar2* input, float* output, size_t width, size_t height ) 165 | { 166 | return cudaUYVYToGray(input, width * sizeof(uchar2), output, width * sizeof(uint8_t), width, height); 167 | } 168 | 169 | cudaError_t cudaUYVYToGray( uchar2* input, size_t inputPitch, float* output, size_t outputPitch, size_t width, size_t height ) 170 | { 171 | return launchGrayYUYV(input, inputPitch, output, outputPitch, width, height); 172 | } 173 | 174 | cudaError_t cudaYUYVToGray( uchar2* input, float* output, size_t width, size_t height ) 175 | { 176 | return cudaYUYVToGray(input, width * sizeof(uchar2), output, width * sizeof(float), width, height); 177 | } 178 | 179 | cudaError_t cudaYUYVToGray( uchar2* input, size_t inputPitch, float* output, size_t outputPitch, size_t width, size_t height ) 180 | { 181 | return launchGrayYUYV(input, inputPitch, output, outputPitch, width, height); 182 | } 183 | 184 | -------------------------------------------------------------------------------- /util/cuda/cudaYUV-YV12.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #include "cudaYUV.h" 6 | 7 | 8 | 9 | 10 | 11 | inline __device__ void rgb_to_y(const uint8_t r, const uint8_t g, const uint8_t b, uint8_t& y) 12 | { 13 | y = static_cast(((int)(30 * r) + (int)(59 * g) + (int)(11 * b)) / 100); 14 | } 15 | 16 | inline __device__ void rgb_to_yuv(const uint8_t r, const uint8_t g, const uint8_t b, uint8_t& y, uint8_t& u, uint8_t& v) 17 | { 18 | rgb_to_y(r, g, b, y); 19 | u = static_cast(((int)(-17 * r) - (int)(33 * g) + (int)(50 * b) + 12800) / 100); 20 | v = static_cast(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100); 21 | } 22 | 23 | template 24 | __global__ void RGB_to_YV12( T* src, int srcAlignedWidth, uint8_t* dst, int dstPitch, int width, int height ) 25 | { 26 | const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2; 27 | const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2; 28 | 29 | const int x1 = x + 1; 30 | const int y1 = y + 1; 31 | 32 | if( x1 >= width || y1 >= height ) 33 | return; 34 | 35 | const int planeSize = height * dstPitch; 36 | 37 | uint8_t* y_plane = dst; 38 | uint8_t* u_plane; 39 | uint8_t* v_plane; 40 | 41 | if( formatYV12 ) 42 | { 43 | u_plane = y_plane + planeSize; 44 | v_plane = u_plane + (planeSize / 4); // size of U & V planes is 25% of Y plane 45 | } 46 | else 47 | { 48 | v_plane = y_plane + planeSize; // in I420, order of U & V planes is reversed 49 | u_plane = v_plane + (planeSize / 4); 50 | } 51 | 52 | T px; 53 | uint8_t y_val, u_val, v_val; 54 | 55 | px = src[y * srcAlignedWidth + x]; 56 | rgb_to_y(px.x, px.y, px.z, y_val); 57 | y_plane[y * dstPitch + x] = y_val; 58 | 59 | px = src[y * srcAlignedWidth + x1]; 60 | rgb_to_y(px.x, px.y, px.z, y_val); 61 | y_plane[y * dstPitch + x1] = y_val; 62 | 63 | px = src[y1 * srcAlignedWidth + x]; 64 | rgb_to_y(px.x, px.y, px.z, y_val); 65 | y_plane[y1 * dstPitch + x] = y_val; 66 | 67 | px = src[y1 * srcAlignedWidth + x1]; 68 | rgb_to_yuv(px.x, px.y, px.z, y_val, u_val, v_val); 69 | y_plane[y1 * dstPitch + x1] = y_val; 70 | 71 | const int uvPitch = dstPitch / 2; 72 | const int uvIndex = (y / 2) * uvPitch + (x / 2); 73 | 74 | u_plane[uvIndex] = u_val; 75 | v_plane[uvIndex] = v_val; 76 | } 77 | 78 | template 79 | cudaError_t launch420( T* input, size_t inputPitch, uint8_t* output, size_t outputPitch, size_t width, size_t height) 80 | { 81 | if( !input || !inputPitch || !output || !outputPitch || !width || !height ) 82 | return cudaErrorInvalidValue; 83 | 84 | const dim3 block(32, 8); 85 | const dim3 grid(iDivUp(width, block.x * 2), iDivUp(height, block.y * 2)); 86 | 87 | const int inputAlignedWidth = inputPitch / sizeof(T); 88 | 89 | RGB_to_YV12<<>>(input, inputAlignedWidth, output, outputPitch, width, height); 90 | 91 | return CUDA(cudaGetLastError()); 92 | } 93 | 94 | 95 | 96 | // cudaRGBAToYV12 97 | cudaError_t cudaRGBAToYV12( uchar4* input, size_t inputPitch, uint8_t* output, size_t outputPitch, size_t width, size_t height ) 98 | { 99 | return launch420( input, inputPitch, output, outputPitch, width, height ); 100 | } 101 | 102 | // cudaRGBAToYV12 103 | cudaError_t cudaRGBAToYV12( uchar4* input, uint8_t* output, size_t width, size_t height ) 104 | { 105 | return cudaRGBAToYV12( input, width * sizeof(uchar4), output, width * sizeof(uint8_t), width, height ); 106 | } 107 | 108 | // cudaRGBAToI420 109 | cudaError_t cudaRGBAToI420( uchar4* input, size_t inputPitch, uint8_t* output, size_t outputPitch, size_t width, size_t height ) 110 | { 111 | return launch420( input, inputPitch, output, outputPitch, width, height ); 112 | } 113 | 114 | // cudaRGBAToI420 115 | cudaError_t cudaRGBAToI420( uchar4* input, uint8_t* output, size_t width, size_t height ) 116 | { 117 | return cudaRGBAToI420( input, width * sizeof(uchar4), output, width * sizeof(uint8_t), width, height ); 118 | } 119 | 120 | 121 | 122 | #if 0 123 | __global__ void Gray_to_YV12(const GlobPtrSz src, GlobPtr dst) 124 | { 125 | const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2; 126 | const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2; 127 | 128 | if (x + 1 >= src.cols || y + 1 >= src.rows) 129 | return; 130 | 131 | // get pointers to the data 132 | const size_t planeSize = src.rows * dst.step; 133 | GlobPtr y_plane = globPtr(dst.data, dst.step); 134 | GlobPtr u_plane = globPtr(y_plane.data + planeSize, dst.step / 2); 135 | GlobPtr v_plane = globPtr(u_plane.data + (planeSize / 4), dst.step / 2); 136 | 137 | uint8_t pix; 138 | uint8_t y_val, u_val, v_val; 139 | 140 | pix = src(y, x); 141 | rgb_to_y(pix, pix, pix, y_val); 142 | y_plane(y, x) = y_val; 143 | 144 | pix = src(y, x + 1); 145 | rgb_to_y(pix, pix, pix, y_val); 146 | y_plane(y, x + 1) = y_val; 147 | 148 | pix = src(y + 1, x); 149 | rgb_to_y(pix, pix, pix, y_val); 150 | y_plane(y + 1, x) = y_val; 151 | 152 | pix = src(y + 1, x + 1); 153 | rgb_to_yuv(pix, pix, pix, y_val, u_val, v_val); 154 | y_plane(y + 1, x + 1) = y_val; 155 | u_plane(y / 2, x / 2) = u_val; 156 | v_plane(y / 2, x / 2) = v_val; 157 | } 158 | #endif 159 | 160 | -------------------------------------------------------------------------------- /util/cuda/cudaYUV.h: -------------------------------------------------------------------------------- 1 | /* 2 | * http://github.com/dusty-nv/jetson-inference 3 | */ 4 | 5 | #ifndef __CUDA_YUV_CONVERT_H 6 | #define __CUDA_YUV_CONVERT_H 7 | 8 | 9 | #include "cudaUtility.h" 10 | #include 11 | 12 | 13 | ////////////////////////////////////////////////////////////////////////////////// 14 | /// @name RGBA to YUV 4:2:0 planar (I420 & YV12) 15 | /// @ingroup util 16 | ////////////////////////////////////////////////////////////////////////////////// 17 | 18 | ///@{ 19 | 20 | /** 21 | * Convert an RGBA uchar4 buffer into YUV I420 planar. 22 | */ 23 | cudaError_t cudaRGBAToI420( uchar4* input, uint8_t* output, size_t width, size_t height ); 24 | 25 | /** 26 | * Convert an RGBA uchar4 texture into YUV I420 planar. 27 | */ 28 | cudaError_t cudaRGBAToI420( uchar4* input, size_t inputPitch, uint8_t* output, size_t outputPitch, size_t width, size_t height ); 29 | 30 | /** 31 | * Convert an RGBA uchar4 buffer into YUV YV12 planar. 32 | */ 33 | cudaError_t cudaRGBAToYV12( uchar4* input, uint8_t* output, size_t width, size_t height ); 34 | 35 | /** 36 | * Convert an RGBA uchar4 texture into YUV YV12 planar. 37 | */ 38 | cudaError_t cudaRGBAToYV12( uchar4* input, size_t inputPitch, uint8_t* output, size_t outputPitch, size_t width, size_t height ); 39 | 40 | ///@} 41 | 42 | 43 | ////////////////////////////////////////////////////////////////////////////////// 44 | /// @name YUV 4:2:2 packed (UYVY & YUYV) to RGBA 45 | /// @ingroup util 46 | ////////////////////////////////////////////////////////////////////////////////// 47 | 48 | ///@{ 49 | 50 | /** 51 | * Convert a UYVY 422 packed image into RGBA uchar4. 52 | */ 53 | cudaError_t cudaUYVYToRGBA( uchar2* input, uchar4* output, size_t width, size_t height ); 54 | 55 | /** 56 | * Convert a UYVY 422 packed image into RGBA uchar4. 57 | */ 58 | cudaError_t cudaUYVYToRGBA( uchar2* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height ); 59 | 60 | /** 61 | * Convert a YUYV 422 packed image into RGBA uchar4. 62 | */ 63 | cudaError_t cudaYUYVToRGBA( uchar2* input, uchar4* output, size_t width, size_t height ); 64 | 65 | /** 66 | * Convert a YUYV 422 packed image into RGBA uchar4. 67 | */ 68 | cudaError_t cudaYUYVToRGBA( uchar2* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height ); 69 | 70 | ///@} 71 | 72 | 73 | ////////////////////////////////////////////////////////////////////////////////// 74 | /// @name UYUV 4:2:2 packed (UYVY & YUYV) to grayscale 75 | /// @ingroup util 76 | ////////////////////////////////////////////////////////////////////////////////// 77 | 78 | ///@{ 79 | 80 | /** 81 | * Convert a UYVY 422 packed image into a uint8 grayscale. 82 | */ 83 | cudaError_t cudaUYVYToGray( uchar2* input, float* output, size_t width, size_t height ); 84 | 85 | /** 86 | * Convert a UYVY 422 packed image into a uint8 grayscale. 87 | */ 88 | cudaError_t cudaUYVYToGray( uchar2* input, size_t inputPitch, float* output, size_t outputPitch, size_t width, size_t height ); 89 | 90 | /** 91 | * Convert a YUYV 422 packed image into a uint8 grayscale. 92 | */ 93 | cudaError_t cudaYUYVToGray( uchar2* input, float* output, size_t width, size_t height ); 94 | 95 | /** 96 | * Convert a YUYV 422 packed image into a uint8 grayscale. 97 | */ 98 | cudaError_t cudaYUYVToGray( uchar2* input, size_t inputPitch, float* output, size_t outputPitch, size_t width, size_t height ); 99 | 100 | ///@} 101 | 102 | 103 | ////////////////////////////////////////////////////////////////////////////////// 104 | /// @name YUV NV12 to RGBA 105 | /// @ingroup util 106 | ////////////////////////////////////////////////////////////////////////////////// 107 | 108 | ///@{ 109 | 110 | /** 111 | * Convert an NV12 texture (semi-planar 4:2:0) to ARGB uchar4 format. 112 | * NV12 = 8-bit Y plane followed by an interleaved U/V plane with 2x2 subsampling. 113 | */ 114 | cudaError_t cudaNV12ToRGBA( uint8_t* input, size_t inputPitch, uchar4* output, size_t outputPitch, size_t width, size_t height ); 115 | cudaError_t cudaNV12ToRGBA( uint8_t* input, uchar4* output, size_t width, size_t height ); 116 | 117 | cudaError_t cudaNV12ToRGBAf( uint8_t* input, size_t inputPitch, float4* output, size_t outputPitch, size_t width, size_t height ); 118 | cudaError_t cudaNV12ToRGBAf( uint8_t* input, float4* output, size_t width, size_t height ); 119 | 120 | /** 121 | * Setup NV12 color conversion constants. 122 | * cudaNV12SetupColorspace() isn't necessary for the user to call, it will be 123 | * called automatically by cudaNV12ToRGBA() with a hue of 0.0. 124 | * However if you want to setup custom constants (ie with a hue different than 0), 125 | * then you can call cudaNV12SetupColorspace() at any time, overriding the default. 126 | */ 127 | cudaError_t cudaNV12SetupColorspace( float hue = 0.0f ); 128 | 129 | ///@} 130 | 131 | #endif 132 | 133 | -------------------------------------------------------------------------------- /util/display/glDisplay.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #include "glDisplay.h" 6 | 7 | 8 | 9 | // Constructor 10 | glDisplay::glDisplay() 11 | { 12 | mWindowX = 0; 13 | mScreenX = NULL; 14 | mVisualX = NULL; 15 | mContextGL = NULL; 16 | mDisplayX = NULL; 17 | mWidth = 0; 18 | mHeight = 0; 19 | mAvgTime = 1.0f; 20 | 21 | clock_gettime(CLOCK_REALTIME, &mLastTime); 22 | } 23 | 24 | 25 | // Destructor 26 | glDisplay::~glDisplay() 27 | { 28 | glXDestroyContext(mDisplayX, mContextGL); 29 | } 30 | 31 | 32 | // Create 33 | glDisplay* glDisplay::Create() 34 | { 35 | glDisplay* vp = new glDisplay(); 36 | 37 | if( !vp ) 38 | return NULL; 39 | 40 | if( !vp->initWindow() ) 41 | { 42 | printf("[OpenGL] failed to create X11 Window.\n"); 43 | delete vp; 44 | return NULL; 45 | } 46 | 47 | if( !vp->initGL() ) 48 | { 49 | printf("[OpenGL] failed to initialize OpenGL.\n"); 50 | delete vp; 51 | return NULL; 52 | } 53 | 54 | GLenum err = glewInit(); 55 | 56 | if (GLEW_OK != err) 57 | { 58 | printf("[OpenGL] GLEW Error: %s\n", glewGetErrorString(err)); 59 | delete vp; 60 | return NULL; 61 | } 62 | 63 | printf("[OpenGL] glDisplay display window initialized\n"); 64 | return vp; 65 | } 66 | 67 | 68 | // initWindow 69 | bool glDisplay::initWindow() 70 | { 71 | if( !mDisplayX ) 72 | mDisplayX = XOpenDisplay(0); 73 | 74 | if( !mDisplayX ) 75 | { 76 | printf( "[OpenGL] failed to open X11 server connection." ); 77 | return false; 78 | } 79 | 80 | 81 | if( !mDisplayX ) 82 | { 83 | printf( "InitWindow() - no X11 server connection." ); 84 | return false; 85 | } 86 | 87 | // retrieve screen info 88 | const int screenIdx = DefaultScreen(mDisplayX); 89 | const int screenWidth = DisplayWidth(mDisplayX, screenIdx); 90 | const int screenHeight = DisplayHeight(mDisplayX, screenIdx); 91 | 92 | printf("default X screen %i: %i x %i\n", screenIdx, screenWidth, screenHeight); 93 | 94 | Screen* screen = XScreenOfDisplay(mDisplayX, screenIdx); 95 | 96 | if( !screen ) 97 | { 98 | printf("failed to retrieve default Screen instance\n"); 99 | return false; 100 | } 101 | 102 | Window winRoot = XRootWindowOfScreen(screen); 103 | 104 | // get framebuffer format 105 | static int fbAttribs[] = 106 | { 107 | GLX_X_RENDERABLE, True, 108 | GLX_DRAWABLE_TYPE, GLX_WINDOW_BIT, 109 | GLX_RENDER_TYPE, GLX_RGBA_BIT, 110 | GLX_X_VISUAL_TYPE, GLX_TRUE_COLOR, 111 | GLX_RED_SIZE, 8, 112 | GLX_GREEN_SIZE, 8, 113 | GLX_BLUE_SIZE, 8, 114 | GLX_ALPHA_SIZE, 8, 115 | GLX_DEPTH_SIZE, 24, 116 | GLX_STENCIL_SIZE, 8, 117 | GLX_DOUBLEBUFFER, True, 118 | GLX_SAMPLE_BUFFERS, 0, 119 | GLX_SAMPLES, 0, 120 | None 121 | }; 122 | 123 | int fbCount = 0; 124 | GLXFBConfig* fbConfig = glXChooseFBConfig(mDisplayX, screenIdx, fbAttribs, &fbCount); 125 | 126 | if( !fbConfig || fbCount == 0 ) 127 | return false; 128 | 129 | // get a 'visual' 130 | XVisualInfo* visual = glXGetVisualFromFBConfig(mDisplayX, fbConfig[0]); 131 | 132 | if( !visual ) 133 | return false; 134 | 135 | // populate windows attributes 136 | XSetWindowAttributes winAttr; 137 | winAttr.colormap = XCreateColormap(mDisplayX, winRoot, visual->visual, AllocNone); 138 | winAttr.background_pixmap = None; 139 | winAttr.border_pixel = 0; 140 | winAttr.event_mask = StructureNotifyMask|KeyPressMask|KeyReleaseMask|PointerMotionMask|ButtonPressMask|ButtonReleaseMask; 141 | 142 | 143 | // create window 144 | Window win = XCreateWindow(mDisplayX, winRoot, 0, 0, screenWidth, screenHeight, 0, 145 | visual->depth, InputOutput, visual->visual, CWBorderPixel|CWColormap|CWEventMask, &winAttr); 146 | 147 | if( !win ) 148 | return false; 149 | 150 | XStoreName(mDisplayX, win, "NVIDIA Jetson TX1 | L4T R24.1 aarch64 | Ubuntu 14.04 LTS"); 151 | XMapWindow(mDisplayX, win); 152 | 153 | // cleanup 154 | mWindowX = win; 155 | mScreenX = screen; 156 | mVisualX = visual; 157 | mWidth = screenWidth; 158 | mHeight = screenHeight; 159 | 160 | XFree(fbConfig); 161 | return true; 162 | } 163 | 164 | 165 | void glDisplay::SetTitle( const char* str ) 166 | { 167 | XStoreName(mDisplayX, mWindowX, str); 168 | } 169 | 170 | // initGL 171 | bool glDisplay::initGL() 172 | { 173 | mContextGL = glXCreateContext(mDisplayX, mVisualX, 0, True); 174 | 175 | if( !mContextGL ) 176 | return false; 177 | 178 | GL(glXMakeCurrent(mDisplayX, mWindowX, mContextGL)); 179 | 180 | return true; 181 | } 182 | 183 | 184 | // MakeCurrent 185 | void glDisplay::BeginRender() 186 | { 187 | GL(glXMakeCurrent(mDisplayX, mWindowX, mContextGL)); 188 | 189 | GL(glClearColor(0.05f, 0.05f, 0.05f, 1.0f)); 190 | GL(glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT|GL_STENCIL_BUFFER_BIT)); 191 | 192 | GL(glViewport(0, 0, mWidth, mHeight)); 193 | GL(glMatrixMode(GL_PROJECTION)); 194 | GL(glLoadIdentity()); 195 | GL(glOrtho(0.0f, mWidth, mHeight, 0.0f, 0.0f, 1.0f)); 196 | } 197 | 198 | 199 | // timeDiff 200 | static timespec timeDiff( const timespec& start, const timespec& end) 201 | { 202 | timespec temp; 203 | if ((end.tv_nsec-start.tv_nsec)<0) { 204 | temp.tv_sec = end.tv_sec-start.tv_sec-1; 205 | temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec; 206 | } else { 207 | temp.tv_sec = end.tv_sec-start.tv_sec; 208 | temp.tv_nsec = end.tv_nsec-start.tv_nsec; 209 | } 210 | return temp; 211 | } 212 | 213 | 214 | // Refresh 215 | void glDisplay::EndRender() 216 | { 217 | glXSwapBuffers(mDisplayX, mWindowX); 218 | 219 | // measure framerate 220 | timespec currTime; 221 | clock_gettime(CLOCK_REALTIME, &currTime); 222 | 223 | const timespec diffTime = timeDiff(mLastTime, currTime); 224 | const float ns = 1000000000 * diffTime.tv_sec + diffTime.tv_nsec; 225 | 226 | mAvgTime = mAvgTime * 0.8f + ns * 0.2f; 227 | mLastTime = currTime; 228 | } 229 | 230 | 231 | #define MOUSE_MOVE 0 232 | #define MOUSE_BUTTON 1 233 | #define MOUSE_WHEEL 2 234 | #define MOUSE_DOUBLE 3 235 | #define KEY_STATE 4 236 | #define KEY_CHAR 5 237 | 238 | 239 | // OnEvent 240 | void glDisplay::onEvent( uint msg, int a, int b ) 241 | { 242 | switch(msg) 243 | { 244 | case MOUSE_MOVE: 245 | { 246 | //mMousePos.Set(a,b); 247 | break; 248 | } 249 | case MOUSE_BUTTON: 250 | { 251 | /*if( mMouseButton[a] != (bool)b ) 252 | { 253 | mMouseButton[a] = b; 254 | 255 | if( b ) 256 | mMouseDownEvent = true; 257 | 258 | // ignore right-mouse up events 259 | if( !(a == 1 && !b) ) 260 | mMouseEvent = true; 261 | }*/ 262 | 263 | break; 264 | } 265 | case MOUSE_DOUBLE: 266 | { 267 | /*mMouseDblClick = b; 268 | 269 | if( b ) 270 | { 271 | mMouseEvent = true; 272 | mMouseDownEvent = true; 273 | }*/ 274 | 275 | break; 276 | } 277 | case MOUSE_WHEEL: 278 | { 279 | //mMouseWheel = a; 280 | break; 281 | } 282 | case KEY_STATE: 283 | { 284 | //mKeys[a] = b; 285 | break; 286 | } 287 | case KEY_CHAR: 288 | { 289 | //mKeyText = a; 290 | break; 291 | } 292 | } 293 | 294 | //if( msg == MOUSE_MOVE || msg == MOUSE_BUTTON || msg == MOUSE_DOUBLE || msg == MOUSE_WHEEL ) 295 | // mMouseEventLast = time(); 296 | } 297 | 298 | 299 | // UserEvents() 300 | void glDisplay::UserEvents() 301 | { 302 | // reset input states 303 | /*mMouseEvent = false; 304 | mMouseDownEvent = false; 305 | mMouseDblClick = false; 306 | mMouseWheel = 0; 307 | mKeyText = 0;*/ 308 | 309 | 310 | XEvent evt; 311 | 312 | while( XEventsQueued(mDisplayX, QueuedAlready) > 0 ) 313 | { 314 | XNextEvent(mDisplayX, &evt); 315 | 316 | switch( evt.type ) 317 | { 318 | case KeyPress: onEvent(KEY_STATE, evt.xkey.keycode, 1); break; 319 | case KeyRelease: onEvent(KEY_STATE, evt.xkey.keycode, 0); break; 320 | case ButtonPress: onEvent(MOUSE_BUTTON, evt.xbutton.button, 1); break; 321 | case ButtonRelease: onEvent(MOUSE_BUTTON, evt.xbutton.button, 0); break; 322 | case MotionNotify: 323 | { 324 | XWindowAttributes attr; 325 | XGetWindowAttributes(mDisplayX, evt.xmotion.root, &attr); 326 | onEvent(MOUSE_MOVE, evt.xmotion.x_root + attr.x, evt.xmotion.y_root + attr.y); 327 | break; 328 | } 329 | } 330 | } 331 | } 332 | 333 | -------------------------------------------------------------------------------- /util/display/glDisplay.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #ifndef __GL_VIEWPORT_H__ 6 | #define __GL_VIEWPORT_H__ 7 | 8 | 9 | #include "glUtility.h" 10 | #include "glTexture.h" 11 | 12 | #include 13 | 14 | 15 | /** 16 | * OpenGL display window / video viewer 17 | */ 18 | class glDisplay 19 | { 20 | public: 21 | /** 22 | * Create a new maximized openGL display window. 23 | */ 24 | static glDisplay* Create(); 25 | 26 | /** 27 | * Destroy window 28 | */ 29 | ~glDisplay(); 30 | 31 | /** 32 | * Clear window and begin rendering a frame. 33 | */ 34 | void BeginRender(); 35 | 36 | /** 37 | * Finish rendering and refresh / flip the backbuffer. 38 | */ 39 | void EndRender(); 40 | 41 | /** 42 | * Process UI events. 43 | */ 44 | void UserEvents(); 45 | 46 | /** 47 | * UI event handler. 48 | */ 49 | void onEvent( uint msg, int a, int b ); 50 | 51 | /** 52 | * Set the window title string. 53 | */ 54 | void SetTitle( const char* str ); 55 | 56 | /** 57 | * Get the average frame time (in milliseconds). 58 | */ 59 | inline float GetFPS() { return 1000000000.0f / mAvgTime; } 60 | 61 | protected: 62 | glDisplay(); 63 | 64 | bool initWindow(); 65 | bool initGL(); 66 | 67 | static const int screenIdx = 0; 68 | 69 | Display* mDisplayX; 70 | Screen* mScreenX; 71 | XVisualInfo* mVisualX; 72 | Window mWindowX; 73 | GLXContext mContextGL; 74 | 75 | uint32_t mWidth; 76 | uint32_t mHeight; 77 | 78 | timespec mLastTime; 79 | float mAvgTime; 80 | }; 81 | 82 | #endif 83 | 84 | -------------------------------------------------------------------------------- /util/display/glTexture.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #include "glUtility.h" 6 | #include "glTexture.h" 7 | 8 | #include "cudaMappedMemory.h" 9 | 10 | 11 | //----------------------------------------------------------------------------------- 12 | inline uint32_t glTextureLayout( uint32_t format ) 13 | { 14 | switch(format) 15 | { 16 | case GL_LUMINANCE8: 17 | case GL_LUMINANCE16: 18 | case GL_LUMINANCE32UI_EXT: 19 | case GL_LUMINANCE8I_EXT: 20 | case GL_LUMINANCE16I_EXT: 21 | case GL_LUMINANCE32I_EXT: 22 | case GL_LUMINANCE16F_ARB: 23 | case GL_LUMINANCE32F_ARB: return GL_LUMINANCE; 24 | 25 | case GL_LUMINANCE8_ALPHA8: 26 | case GL_LUMINANCE16_ALPHA16: 27 | case GL_LUMINANCE_ALPHA32UI_EXT: 28 | case GL_LUMINANCE_ALPHA8I_EXT: 29 | case GL_LUMINANCE_ALPHA16I_EXT: 30 | case GL_LUMINANCE_ALPHA32I_EXT: 31 | case GL_LUMINANCE_ALPHA16F_ARB: 32 | case GL_LUMINANCE_ALPHA32F_ARB: return GL_LUMINANCE_ALPHA; 33 | 34 | case GL_RGB8: 35 | case GL_RGB16: 36 | case GL_RGB32UI: 37 | case GL_RGB8I: 38 | case GL_RGB16I: 39 | case GL_RGB32I: 40 | case GL_RGB16F_ARB: 41 | case GL_RGB32F_ARB: return GL_RGB; 42 | 43 | case GL_RGBA8: 44 | case GL_RGBA16: 45 | case GL_RGBA32UI: 46 | case GL_RGBA8I: 47 | case GL_RGBA16I: 48 | case GL_RGBA32I: 49 | //case GL_RGBA_FLOAT32: 50 | case GL_RGBA16F_ARB: 51 | case GL_RGBA32F_ARB: return GL_RGBA; 52 | } 53 | 54 | return 0; 55 | } 56 | 57 | 58 | inline uint32_t glTextureLayoutChannels( uint32_t format ) 59 | { 60 | const uint layout = glTextureLayout(format); 61 | 62 | switch(layout) 63 | { 64 | case GL_LUMINANCE: return 1; 65 | case GL_LUMINANCE_ALPHA: return 2; 66 | case GL_RGB: return 3; 67 | case GL_RGBA: return 4; 68 | } 69 | 70 | return 0; 71 | } 72 | 73 | 74 | inline uint32_t glTextureType( uint32_t format ) 75 | { 76 | switch(format) 77 | { 78 | case GL_LUMINANCE8: 79 | case GL_LUMINANCE8_ALPHA8: 80 | case GL_RGB8: 81 | case GL_RGBA8: return GL_UNSIGNED_BYTE; 82 | 83 | case GL_LUMINANCE16: 84 | case GL_LUMINANCE16_ALPHA16: 85 | case GL_RGB16: 86 | case GL_RGBA16: return GL_UNSIGNED_SHORT; 87 | 88 | case GL_LUMINANCE32UI_EXT: 89 | case GL_LUMINANCE_ALPHA32UI_EXT: 90 | case GL_RGB32UI: 91 | case GL_RGBA32UI: return GL_UNSIGNED_INT; 92 | 93 | case GL_LUMINANCE8I_EXT: 94 | case GL_LUMINANCE_ALPHA8I_EXT: 95 | case GL_RGB8I: 96 | case GL_RGBA8I: return GL_BYTE; 97 | 98 | case GL_LUMINANCE16I_EXT: 99 | case GL_LUMINANCE_ALPHA16I_EXT: 100 | case GL_RGB16I: 101 | case GL_RGBA16I: return GL_SHORT; 102 | 103 | case GL_LUMINANCE32I_EXT: 104 | case GL_LUMINANCE_ALPHA32I_EXT: 105 | case GL_RGB32I: 106 | case GL_RGBA32I: return GL_INT; 107 | 108 | 109 | case GL_LUMINANCE16F_ARB: 110 | case GL_LUMINANCE_ALPHA16F_ARB: 111 | case GL_RGB16F_ARB: 112 | case GL_RGBA16F_ARB: return GL_FLOAT; 113 | 114 | case GL_LUMINANCE32F_ARB: 115 | case GL_LUMINANCE_ALPHA32F_ARB: 116 | //case GL_RGBA_FLOAT32: 117 | case GL_RGB32F_ARB: 118 | case GL_RGBA32F_ARB: return GL_FLOAT; 119 | } 120 | 121 | return 0; 122 | } 123 | 124 | 125 | inline uint glTextureTypeSize( uint32_t format ) 126 | { 127 | const uint type = glTextureType(format); 128 | 129 | switch(type) 130 | { 131 | case GL_UNSIGNED_BYTE: 132 | case GL_BYTE: return 1; 133 | 134 | case GL_UNSIGNED_SHORT: 135 | case GL_SHORT: return 2; 136 | 137 | case GL_UNSIGNED_INT: 138 | case GL_INT: 139 | case GL_FLOAT: return 4; 140 | } 141 | 142 | return 0; 143 | } 144 | //----------------------------------------------------------------------------------- 145 | 146 | // constructor 147 | glTexture::glTexture() 148 | { 149 | mID = 0; 150 | mDMA = 0; 151 | mWidth = 0; 152 | mHeight = 0; 153 | mFormat = 0; 154 | mSize = 0; 155 | 156 | mInteropCUDA = NULL; 157 | mInteropHost = NULL; 158 | mInteropDevice = NULL; 159 | } 160 | 161 | 162 | // destructor 163 | glTexture::~glTexture() 164 | { 165 | GL(glDeleteTextures(1, &mID)); 166 | } 167 | 168 | 169 | // Create 170 | glTexture* glTexture::Create( uint32_t width, uint32_t height, uint32_t format, void* data ) 171 | { 172 | glTexture* tex = new glTexture(); 173 | 174 | if( !tex->init(width, height, format, data) ) 175 | { 176 | printf("[OpenGL] failed to create %ux%u texture\n", width, height); 177 | return NULL; 178 | } 179 | 180 | return tex; 181 | } 182 | 183 | 184 | // Alloc 185 | bool glTexture::init( uint32_t width, uint32_t height, uint32_t format, void* data ) 186 | { 187 | const uint32_t size = width * height * glTextureLayoutChannels(format) * glTextureTypeSize(format); 188 | 189 | if( size == 0 ) 190 | return NULL; 191 | 192 | // generate texture objects 193 | uint32_t id = 0; 194 | 195 | GL(glEnable(GL_TEXTURE_2D)); 196 | GL(glGenTextures(1, &id)); 197 | GL(glBindTexture(GL_TEXTURE_2D, id)); 198 | 199 | // set default texture parameters 200 | GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE)); 201 | GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE)); 202 | GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR)); 203 | GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR)); 204 | 205 | printf("[OpenGL] creating %ux%u texture\n", width, height); 206 | 207 | // allocate texture 208 | GL_VERIFYN(glTexImage2D(GL_TEXTURE_2D, 0, format, width, height, 0, glTextureLayout(format), glTextureType(format), data)); 209 | GL(glBindTexture(GL_TEXTURE_2D, 0)); 210 | 211 | // allocate DMA PBO 212 | uint32_t dma = 0; 213 | 214 | GL(glGenBuffers(1, &dma)); 215 | GL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, dma)); 216 | GL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, size, NULL, GL_DYNAMIC_DRAW_ARB)); 217 | GL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0)); 218 | 219 | 220 | mID = id; 221 | mDMA = dma; 222 | mWidth = width; 223 | mHeight = height; 224 | mFormat = format; 225 | mSize = size; 226 | return true; 227 | } 228 | 229 | 230 | // MapCUDA 231 | void* glTexture::MapCUDA() 232 | { 233 | if( !mInteropCUDA ) 234 | { 235 | if( CUDA_FAILED(cudaGraphicsGLRegisterBuffer(&mInteropCUDA, mDMA, cudaGraphicsRegisterFlagsWriteDiscard)) ) 236 | return NULL; 237 | 238 | printf( "[cuda] registered %u byte openGL texture for interop access (%ux%u)\n", mSize, mWidth, mHeight); 239 | } 240 | 241 | if( CUDA_FAILED(cudaGraphicsMapResources(1, &mInteropCUDA)) ) 242 | return NULL; 243 | 244 | void* devPtr = NULL; 245 | size_t mappedSize = 0; 246 | 247 | if( CUDA_FAILED(cudaGraphicsResourceGetMappedPointer(&devPtr, &mappedSize, mInteropCUDA)) ) 248 | { 249 | CUDA(cudaGraphicsUnmapResources(1, &mInteropCUDA)); 250 | return NULL; 251 | } 252 | 253 | if( mSize != mappedSize ) 254 | printf("[OpenGL] glTexture::MapCUDA() -- size mismatch %zu bytes (expected=%u)\n", mappedSize, mSize); 255 | 256 | return devPtr; 257 | } 258 | 259 | 260 | // Unmap 261 | void glTexture::Unmap() 262 | { 263 | if( !mInteropCUDA ) 264 | return; 265 | 266 | CUDA(cudaGraphicsUnmapResources(1, &mInteropCUDA)); 267 | 268 | GL(glEnable(GL_TEXTURE_2D)); 269 | GL(glBindTexture(GL_TEXTURE_2D, mID)); 270 | GL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, mDMA)); 271 | GL(glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, mWidth, mHeight, glTextureLayout(mFormat), glTextureType(mFormat), NULL)); 272 | 273 | GL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0)); 274 | GL(glBindTexture(GL_TEXTURE_2D, 0)); 275 | GL(glDisable(GL_TEXTURE_2D)); 276 | } 277 | 278 | 279 | // Upload 280 | bool glTexture::UploadCPU( void* data ) 281 | { 282 | // activate texture & pbo 283 | GL(glEnable(GL_TEXTURE_2D)); 284 | GL(glActiveTextureARB(GL_TEXTURE0_ARB)); 285 | GL(glBindTexture(GL_TEXTURE_2D, mID)); 286 | GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0)); 287 | GL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, mDMA)); 288 | 289 | //GL(glPixelStorei(GL_UNPACK_ALIGNMENT, 1)); 290 | //GL(glPixelStorei(GL_UNPACK_ROW_LENGTH, img->GetWidth())); 291 | //GL(glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, img->GetHeight())); 292 | 293 | // hint to driver to double-buffer 294 | // glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, mImage->GetSize(), NULL, GL_STREAM_DRAW_ARB); 295 | 296 | // map PBO 297 | GLubyte* ptr = (GLubyte*)glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY_ARB); 298 | 299 | if( !ptr ) 300 | { 301 | GL_CHECK("glMapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY_ARB)"); 302 | return NULL; 303 | } 304 | 305 | memcpy(ptr, data, mSize); 306 | 307 | GL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB)); 308 | 309 | //GL(glEnable(GL_TEXTURE_2D)); 310 | //GL(glBindTexture(GL_TEXTURE_2D, mID)); 311 | //GL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, mDMA)); 312 | GL(glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, mWidth, mHeight, glTextureLayout(mFormat), glTextureType(mFormat), NULL)); 313 | 314 | GL(glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0)); 315 | GL(glBindTexture(GL_TEXTURE_2D, 0)); 316 | GL(glDisable(GL_TEXTURE_2D)); 317 | 318 | /*if( !mInteropHost || !mInteropDevice ) 319 | { 320 | if( !cudaAllocMapped(&mInteropHost, &mInteropDevice, mSize) ) 321 | return false; 322 | } 323 | 324 | memcpy(mInteropHost, data, mSize); 325 | 326 | void* devGL = MapCUDA(); 327 | 328 | if( !devGL ) 329 | return false; 330 | 331 | CUDA(cudaMemcpy(devGL, mInteropDevice, mSize, cudaMemcpyDeviceToDevice)); 332 | Unmap();*/ 333 | 334 | return true; 335 | } 336 | 337 | 338 | // Render 339 | void glTexture::Render( const float4& rect ) 340 | { 341 | GL(glEnable(GL_TEXTURE_2D)); 342 | GL(glBindTexture(GL_TEXTURE_2D, mID)); 343 | 344 | glBegin(GL_QUADS); 345 | 346 | glColor4f(1.0f,1.0f,1.0f,1.0f); 347 | 348 | glTexCoord2f(0.0f, 0.0f); 349 | glVertex2d(rect.x, rect.y); 350 | 351 | glTexCoord2f(1.0f, 0.0f); 352 | glVertex2d(rect.z, rect.y); 353 | 354 | glTexCoord2f(1.0f, 1.0f); 355 | glVertex2d(rect.z, rect.w); 356 | 357 | glTexCoord2f(0.0f, 1.0f); 358 | glVertex2d(rect.x, rect.w); 359 | 360 | glEnd(); 361 | 362 | GL(glBindTexture(GL_TEXTURE_2D, 0)); 363 | } 364 | 365 | 366 | 367 | void glTexture::Render( float x, float y ) 368 | { 369 | Render(x, y, mWidth, mHeight); 370 | } 371 | 372 | void glTexture::Render( float x, float y, float width, float height ) 373 | { 374 | Render(make_float4(x, y, x + width, y + height)); 375 | } 376 | 377 | 378 | -------------------------------------------------------------------------------- /util/display/glTexture.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inference-101 3 | */ 4 | 5 | #ifndef __GL_TEXTURE_H__ 6 | #define __GL_TEXTURE_H__ 7 | 8 | 9 | #include "cudaUtility.h" 10 | #include "cuda_gl_interop.h" 11 | 12 | 13 | /** 14 | * OpenGL texture 15 | */ 16 | class glTexture 17 | { 18 | public: 19 | static glTexture* Create( uint32_t width, uint32_t height, uint32_t format, void* data=NULL ); 20 | ~glTexture(); 21 | 22 | void Render( float x, float y ); 23 | void Render( float x, float y, float width, float height ); 24 | void Render( const float4& rect ); 25 | 26 | inline uint32_t GetID() const { return mID; } 27 | inline uint32_t GetWidth() const { return mWidth; } 28 | inline uint32_t GetHeight() const { return mHeight; } 29 | inline uint32_t GetFormat() const { return mFormat; } 30 | inline uint32_t GetSize() const { return mSize; } 31 | 32 | void* MapCUDA(); 33 | void Unmap(); 34 | 35 | bool UploadCPU( void* data ); 36 | 37 | private: 38 | glTexture(); 39 | bool init(uint32_t width, uint32_t height, uint32_t format, void* data); 40 | 41 | uint32_t mID; 42 | uint32_t mDMA; 43 | uint32_t mWidth; 44 | uint32_t mHeight; 45 | uint32_t mFormat; 46 | uint32_t mSize; 47 | 48 | cudaGraphicsResource* mInteropCUDA; 49 | void* mInteropHost; 50 | void* mInteropDevice; 51 | }; 52 | 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /util/display/glUtility.h: -------------------------------------------------------------------------------- 1 | /* 2 | * http://github.com/dusty-nv/jetson-inference 3 | */ 4 | 5 | #ifndef __OPENGL_UTILITY_H 6 | #define __OPENGL_UTILITY_H 7 | 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | 15 | /** 16 | * LOG_GL printf prefix. 17 | * @ingroup renderGL 18 | */ 19 | #define LOG_GL "[openGL] " 20 | 21 | 22 | 23 | #define GL(x) { x; glCheckError( #x, __FILE__, __LINE__ ); } 24 | #define GL_VERIFY(x) { x; if(glCheckError( #x, __FILE__, __LINE__ )) return false; } 25 | #define GL_VERIFYN(x) { x; if(glCheckError( #x, __FILE__, __LINE__ )) return NULL; } 26 | #define GL_CHECK(msg) { glCheckError(msg, __FILE__, __LINE__); } 27 | 28 | 29 | /** 30 | * openGL error logging macros 31 | * @ingroup renderGL 32 | */ 33 | inline bool glCheckError(const char* msg, const char* file, int line) 34 | { 35 | GLenum err = glGetError(); 36 | 37 | if( err == GL_NO_ERROR ) 38 | return false; 39 | 40 | const char* e = NULL; 41 | 42 | switch(err) 43 | { 44 | case GL_INVALID_ENUM: e = "invalid enum"; break; 45 | case GL_INVALID_VALUE: e = "invalid value"; break; 46 | case GL_INVALID_OPERATION: e = "invalid operation"; break; 47 | case GL_STACK_OVERFLOW: e = "stack overflow"; break; 48 | case GL_STACK_UNDERFLOW: e = "stack underflow"; break; 49 | case GL_OUT_OF_MEMORY: e = "out of memory"; break; 50 | #ifdef GL_TABLE_TOO_LARGE_EXT 51 | case GL_TABLE_TOO_LARGE_EXT: e = "table too large"; break; 52 | #endif 53 | #ifdef GL_TEXTURE_TOO_LARGE_EXT 54 | case GL_TEXTURE_TOO_LARGE_EXT: e = "texture too large"; break; 55 | #endif 56 | default: e = "unknown error"; 57 | } 58 | 59 | printf(LOG_GL "Error %i - '%s'\n", (uint)err, e); 60 | printf(LOG_GL " %s::%i\n", file, line ); 61 | printf(LOG_GL " %s\n", msg ); 62 | 63 | return true; 64 | } 65 | 66 | 67 | /** 68 | * openGL error check + logging 69 | * @ingroup renderGL 70 | */ 71 | inline bool glCheckError(const char* msg) 72 | { 73 | GLenum err = glGetError(); 74 | 75 | if( err == GL_NO_ERROR ) 76 | return false; 77 | 78 | const char* e = NULL; 79 | 80 | switch(err) 81 | { 82 | case GL_INVALID_ENUM: e = "invalid enum"; break; 83 | case GL_INVALID_VALUE: e = "invalid value"; break; 84 | case GL_INVALID_OPERATION: e = "invalid operation"; break; 85 | case GL_STACK_OVERFLOW: e = "stack overflow"; break; 86 | case GL_STACK_UNDERFLOW: e = "stack underflow"; break; 87 | case GL_OUT_OF_MEMORY: e = "out of memory"; break; 88 | #ifdef GL_TABLE_TOO_LARGE_EXT 89 | case GL_TABLE_TOO_LARGE_EXT: e = "table too large"; break; 90 | #endif 91 | #ifdef GL_TEXTURE_TOO_LARGE_EXT 92 | case GL_TEXTURE_TOO_LARGE_EXT: e = "texture too large"; break; 93 | #endif 94 | default: e = "unknown error"; 95 | } 96 | 97 | printf(LOG_GL "%s (error %i - %s)\n", msg, (uint)err, e); 98 | return true; 99 | } 100 | 101 | 102 | 103 | #define GL_GPU_MEM_INFO_TOTAL_AVAILABLE_MEM_NVX 0x9048 104 | #define GL_GPU_MEM_INFO_CURRENT_AVAILABLE_MEM_NVX 0x9049 105 | 106 | 107 | /** 108 | * glPrintFreeMem 109 | * @ingroup renderGL 110 | */ 111 | inline void glPrintFreeMem() 112 | { 113 | GLint total_mem_kb = 0; 114 | GLint cur_avail_mem_kb = 0; 115 | 116 | glGetIntegerv(GL_GPU_MEM_INFO_TOTAL_AVAILABLE_MEM_NVX, &total_mem_kb); 117 | glGetIntegerv(GL_GPU_MEM_INFO_CURRENT_AVAILABLE_MEM_NVX,&cur_avail_mem_kb); 118 | 119 | printf("[openGL] GPU memory free %i / %i kb\n", cur_avail_mem_kb, total_mem_kb); 120 | } 121 | 122 | 123 | 124 | #endif 125 | 126 | -------------------------------------------------------------------------------- /util/loadImage.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * http://github.com/dusty-nv/jetson-inference 3 | */ 4 | 5 | #include "loadImage.h" 6 | #include "cudaMappedMemory.h" 7 | 8 | #include 9 | 10 | 11 | 12 | bool saveImageRGBA( const char* filename, float4* cpu, int width, int height, float max_pixel ) 13 | { 14 | if( !filename || !cpu || !width || !height ) 15 | { 16 | printf("saveImageRGBA - invalid parameter\n"); 17 | return false; 18 | } 19 | 20 | const float scale = 255.0f / max_pixel; 21 | QImage img(width, height, QImage::Format_RGB32); 22 | 23 | for( int y=0; y < height; y++ ) 24 | { 25 | for( int x=0; x < width; x++ ) 26 | { 27 | const float4 px = cpu[y * width + x]; 28 | //printf("%03u %03u %f\n", x, y, normPx); 29 | img.setPixel(x, y, qRgb(px.x * scale, px.y * scale, px.z * scale)); 30 | } 31 | } 32 | 33 | 34 | /* 35 | * save file 36 | */ 37 | if( !img.save(filename/*, "PNG", 100*/) ) 38 | { 39 | printf("failed to save %ix%i output image to %s\n", width, height, filename); 40 | return false; 41 | } 42 | 43 | return true; 44 | } 45 | 46 | 47 | // loadImageRGBA 48 | bool loadImageRGBA( const char* filename, float4** cpu, float4** gpu, int* width, int* height ) 49 | { 50 | if( !filename || !cpu || !gpu || !width || !height ) 51 | { 52 | printf("loadImageRGBA - invalid parameter\n"); 53 | return false; 54 | } 55 | 56 | // load original image 57 | QImage qImg; 58 | 59 | if( !qImg.load(filename) ) 60 | { 61 | printf("failed to load image %s\n", filename); 62 | return false; 63 | } 64 | 65 | if( *width != 0 && *height != 0 ) 66 | qImg = qImg.scaled(*width, *height, Qt::IgnoreAspectRatio); 67 | 68 | const uint32_t imgWidth = qImg.width(); 69 | const uint32_t imgHeight = qImg.height(); 70 | const uint32_t imgPixels = imgWidth * imgHeight; 71 | const size_t imgSize = imgWidth * imgHeight * sizeof(float) * 4; 72 | 73 | printf("loaded image %s (%u x %u) %zu bytes\n", filename, imgWidth, imgHeight, imgSize); 74 | 75 | // allocate buffer for the image 76 | if( !cudaAllocMapped((void**)cpu, (void**)gpu, imgSize) ) 77 | { 78 | printf(LOG_CUDA "failed to allocated %zu bytes for image %s\n", imgSize, filename); 79 | return false; 80 | } 81 | 82 | float4* cpuPtr = *cpu; 83 | 84 | for( uint32_t y=0; y < imgHeight; y++ ) 85 | { 86 | for( uint32_t x=0; x < imgWidth; x++ ) 87 | { 88 | const QRgb rgb = qImg.pixel(x,y); 89 | const float4 px = make_float4(float(qRed(rgb)), 90 | float(qGreen(rgb)), 91 | float(qBlue(rgb)), 92 | float(qAlpha(rgb))); 93 | 94 | cpuPtr[y*imgWidth+x] = px; 95 | } 96 | } 97 | 98 | *width = imgWidth; 99 | *height = imgHeight; 100 | return true; 101 | } 102 | 103 | 104 | // loadImageRGB 105 | bool loadImageRGB( const char* filename, float3** cpu, float3** gpu, int* width, int* height, const float3& mean ) 106 | { 107 | if( !filename || !cpu || !gpu || !width || !height ) 108 | { 109 | printf("loadImageRGB - invalid parameter\n"); 110 | return false; 111 | } 112 | 113 | // load original image 114 | QImage qImg; 115 | 116 | if( !qImg.load(filename) ) 117 | { 118 | printf("failed to load image %s\n", filename); 119 | return false; 120 | } 121 | 122 | if( *width != 0 && *height != 0 ) 123 | qImg = qImg.scaled(*width, *height, Qt::IgnoreAspectRatio); 124 | 125 | const uint32_t imgWidth = qImg.width(); 126 | const uint32_t imgHeight = qImg.height(); 127 | const uint32_t imgPixels = imgWidth * imgHeight; 128 | const size_t imgSize = imgWidth * imgHeight * sizeof(float) * 3; 129 | 130 | printf("loaded image %s (%u x %u) %zu bytes\n", filename, imgWidth, imgHeight, imgSize); 131 | 132 | // allocate buffer for the image 133 | if( !cudaAllocMapped((void**)cpu, (void**)gpu, imgSize) ) 134 | { 135 | printf(LOG_CUDA "failed to allocated %zu bytes for image %s\n", imgSize, filename); 136 | return false; 137 | } 138 | 139 | float* cpuPtr = (float*)*cpu; 140 | 141 | for( uint32_t y=0; y < imgHeight; y++ ) 142 | { 143 | for( uint32_t x=0; x < imgWidth; x++ ) 144 | { 145 | const QRgb rgb = qImg.pixel(x,y); 146 | const float mul = 1.0f; //1.0f / 255.0f; 147 | const float3 px = make_float3((float(qRed(rgb)) - mean.x) * mul, 148 | (float(qGreen(rgb)) - mean.y) * mul, 149 | (float(qBlue(rgb)) - mean.z) * mul ); 150 | 151 | // note: caffe/GIE is band-sequential (as opposed to the typical Band Interleaved by Pixel) 152 | cpuPtr[imgPixels * 0 + y * imgWidth + x] = px.x; 153 | cpuPtr[imgPixels * 1 + y * imgWidth + x] = px.y; 154 | cpuPtr[imgPixels * 2 + y * imgWidth + x] = px.z; 155 | } 156 | } 157 | 158 | *width = imgWidth; 159 | *height = imgHeight; 160 | return true; 161 | } 162 | 163 | 164 | // loadImageBGR 165 | bool loadImageBGR( const char* filename, float3** cpu, float3** gpu, int* width, int* height, const float3& mean ) 166 | { 167 | if( !filename || !cpu || !gpu || !width || !height ) 168 | { 169 | printf("loadImageRGB - invalid parameter\n"); 170 | return false; 171 | } 172 | 173 | // load original image 174 | QImage qImg; 175 | 176 | if( !qImg.load(filename) ) 177 | { 178 | printf("failed to load image %s\n", filename); 179 | return false; 180 | } 181 | 182 | if( *width != 0 && *height != 0 ) 183 | qImg = qImg.scaled(*width, *height, Qt::IgnoreAspectRatio); 184 | 185 | const uint32_t imgWidth = qImg.width(); 186 | const uint32_t imgHeight = qImg.height(); 187 | const uint32_t imgPixels = imgWidth * imgHeight; 188 | const size_t imgSize = imgWidth * imgHeight * sizeof(float) * 3; 189 | 190 | printf("loaded image %s (%u x %u) %zu bytes\n", filename, imgWidth, imgHeight, imgSize); 191 | 192 | // allocate buffer for the image 193 | if( !cudaAllocMapped((void**)cpu, (void**)gpu, imgSize) ) 194 | { 195 | printf(LOG_CUDA "failed to allocated %zu bytes for image %s\n", imgSize, filename); 196 | return false; 197 | } 198 | 199 | float* cpuPtr = (float*)*cpu; 200 | 201 | for( uint32_t y=0; y < imgHeight; y++ ) 202 | { 203 | for( uint32_t x=0; x < imgWidth; x++ ) 204 | { 205 | const QRgb rgb = qImg.pixel(x,y); 206 | const float mul = 1.0f; //1.0f / 255.0f; 207 | const float3 px = make_float3((float(qBlue(rgb)) - mean.x) * mul, 208 | (float(qGreen(rgb)) - mean.y) * mul, 209 | (float(qRed(rgb)) - mean.z) * mul ); 210 | 211 | // note: caffe/GIE is band-sequential (as opposed to the typical Band Interleaved by Pixel) 212 | cpuPtr[imgPixels * 0 + y * imgWidth + x] = px.x; 213 | cpuPtr[imgPixels * 1 + y * imgWidth + x] = px.y; 214 | cpuPtr[imgPixels * 2 + y * imgWidth + x] = px.z; 215 | } 216 | } 217 | 218 | return true; 219 | } 220 | -------------------------------------------------------------------------------- /util/loadImage.h: -------------------------------------------------------------------------------- 1 | /* 2 | * http://github.com/dusty-nv/jetson-inference 3 | */ 4 | 5 | #ifndef __IMAGE_LOADER_H_ 6 | #define __IMAGE_LOADER_H_ 7 | 8 | 9 | #include "cudaUtility.h" 10 | 11 | 12 | /** 13 | * Load a color image from disk into CUDA memory with alpha. 14 | * This function loads the image into shared CPU/GPU memory, using the functions from cudaMappedMemory.h 15 | * 16 | * @param filename Path to the image file on disk. 17 | * @param cpu Pointer to CPU buffer allocated containing the image. 18 | * @param gpu Pointer to CUDA device buffer residing on GPU containing image. 19 | * @param width Variable containing width in pixels of the image. 20 | * @param height Variable containing height in pixels of the image. 21 | * 22 | * @ingroup util 23 | */ 24 | bool loadImageRGBA( const char* filename, float4** cpu, float4** gpu, int* width, int* height ); 25 | 26 | 27 | /** 28 | * Save an image to disk 29 | * @ingroup util 30 | */ 31 | bool saveImageRGBA( const char* filename, float4* cpu, int width, int height, float max_pixel=255.0f ); 32 | 33 | 34 | /** 35 | * Load a color image from disk into CUDA memory. 36 | * This function loads the image into shared CPU/GPU memory, using the functions from cudaMappedMemory.h 37 | * 38 | * @param filename Path to the image file on disk. 39 | * @param cpu Pointer to CPU buffer allocated containing the image. 40 | * @param gpu Pointer to CUDA device buffer residing on GPU containing image. 41 | * @param width Variable containing width in pixels of the image. 42 | * @param height Variable containing height in pixels of the image. 43 | * 44 | * @ingroup util 45 | */ 46 | bool loadImageRGB( const char* filename, float3** cpu, float3** gpu, int* width, int* height, const float3& mean=make_float3(0,0,0) ); 47 | 48 | 49 | /** 50 | * Load a color image from disk into CUDA memory. 51 | * This function loads the image into shared CPU/GPU memory, using the functions from cudaMappedMemory.h 52 | * 53 | * @param filename Path to the image file on disk. 54 | * @param cpu Pointer to CPU buffer allocated containing the image. 55 | * @param gpu Pointer to CUDA device buffer residing on GPU containing image. 56 | * @param width Variable containing width in pixels of the image. 57 | * @param height Variable containing height in pixels of the image. 58 | * 59 | * @ingroup util 60 | */ 61 | bool loadImageBGR( const char* filename, float3** cpu, float3** gpu, int* width, int* height, const float3& mean=make_float3(0,0,0) ); 62 | 63 | 64 | 65 | #endif 66 | --------------------------------------------------------------------------------