├── .gitignore ├── README.md ├── build ├── CMakeLists.txt ├── release.bat └── release.sh └── src ├── test-nvidia-decode-v0.cpp ├── test-nvidia-decode-v1.cpp ├── test-nvidia-decode-v2.cpp └── test-nvidia-decode-v3.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | 34 | build 35 | install -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NVDECODE EXPERIMENTS 2 | 3 | Experimenting with NVDECODE. Not production ready, purely meant 4 | as a first look into the NVDECODE API. Leaving this on Github only 5 | for people to have a peek at the code. This was tested on Arch Linux. 6 | 7 | www.roxlu.com 8 | 9 | ## How to build on Windows 10 | 11 | - Tested this with Cuda 9.1 12 | - Make sure the *CUDA_PATH* environment variable is set. 13 | - Make sure that _ncuvid.dll_ is stored in C:\Windows\system32 14 | - Install cmake 15 | - [download and install CUDA](https://developer.nvidia.com/cuda-download) 16 | - [download nvidia Video Codec SDK](https://developer.nvidia.com/nvidia-video-codec-sdk#Download). Extract the *Video Codec SDK* into: 17 | 18 | [repos-dir]/extern/Video_Codec_SDK/ 19 | 20 | - Open a terminal and: 21 | 22 | cd build 23 | release.bat release 24 | 25 | 26 | ## How to build on Linux 27 | 28 | - Install CUDA using your package manager 29 | - Open a terminal and: 30 | 31 | cd build 32 | ./release.sh 33 | 34 | -------------------------------------------------------------------------------- /build/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.4) 2 | project(nvidia-h264-decode C CXX) 3 | 4 | set(CMAKE_CXX_STANDARD 11) 5 | set(bd ${CMAKE_CURRENT_LIST_DIR}/../) 6 | set(sd ${bd}/src) 7 | 8 | # Find CUDA which sets: 9 | # - CUDA_INCLUDE_DIRS 10 | # - CUDA_LIBRARIES 11 | find_package(CUDA) 12 | 13 | if (WIN32) 14 | list(APPEND libs 15 | $ENV{CUDA_PATH}/lib/x64/cuda.lib 16 | ${bd}/extern/Video_Codec_SDK/Samples/NvCodec/Lib/x64/nvcuvid.lib 17 | ) 18 | elseif(UNIX) 19 | list(APPEND libs 20 | nvcuvid 21 | cuda 22 | ) 23 | endif() 24 | 25 | include_directories( 26 | ${sd} 27 | ${CUDA_INCLUDE_DIRS} 28 | ${bd}/extern/Video_Codec_SDK/Samples/NvCodec/ 29 | ) 30 | 31 | list(APPEND libs 32 | ${CUDA_LIBRARIES} 33 | ) 34 | 35 | if (NOT EXISTS ${CMAKE_INSTALL_PREFIX}/bin/moonlight.264) 36 | file(DOWNLOAD http://samples.mplayerhq.hu/V-codecs/h264/moonlight.264 ${CMAKE_INSTALL_PREFIX}/bin/moonlight.264) 37 | endif() 38 | 39 | if(CMAKE_BUILD_TYPE MATCHES Debug) 40 | set(debug_flag "_debug") 41 | endif() 42 | 43 | macro(create_test name) 44 | set(test_name "test-${name}${debug_flag}") 45 | add_executable(${test_name} ${sd}/test-${name}.cpp) 46 | target_link_libraries(${test_name} ${libs} ) 47 | install(TARGETS ${test_name} DESTINATION bin/) 48 | endmacro() 49 | 50 | create_test("nvidia-decode-v0") 51 | create_test("nvidia-decode-v1") 52 | create_test("nvidia-decode-v2") 53 | create_test("nvidia-decode-v3") 54 | 55 | 56 | -------------------------------------------------------------------------------- /build/release.bat: -------------------------------------------------------------------------------- 1 | ; echo off 2 | set d=%CD% 3 | set bd=win-x86_64 4 | set id=%d%\..\install 5 | set type=%1 6 | set cmake_bt="Release" 7 | set cmake_gen="Visual Studio 15 2017 Win64" 8 | 9 | if "%type%" == "" ( 10 | echo "Usage: build.bat [debug, release]" 11 | exit /b 2 12 | ) 13 | 14 | if "%type%" == "debug" ( 15 | set bd="%bd%d" 16 | set cmake_bt="Debug" 17 | ) 18 | 19 | if not exist "%d%\%bd%" ( 20 | mkdir %d%\%bd% 21 | ) 22 | 23 | cd %d%\%bd% 24 | cmake -DCMAKE_BUILD_TYPE=%cmake_bt% ^ 25 | -DCMAKE_INSTALL_PREFIX=%id% ^ 26 | -G %cmake_gen% ^ 27 | %cmake_opt% .. 28 | 29 | cmake --build . ^ 30 | --target install ^ 31 | --config %cmake_bt% 32 | 33 | cd %id%\bin 34 | test-nvidia-decode-v2.exe 35 | cd %d% 36 | -------------------------------------------------------------------------------- /build/release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Using gist: https://gist.github.com/roxlu/2ac1aa06222ef788f9df235a5b2fbf7c 3 | d=${PWD} 4 | bd=${d}/../ 5 | sd=${bd}/src/ 6 | id=${bd}/install 7 | ed=${d}/../ 8 | rd=${d}/../reference/ 9 | d=${PWD} 10 | is_debug="n" 11 | build_dir="build_unix" 12 | cmake_build_type="Release" 13 | cmake_config="Release" 14 | debug_flag="" 15 | debugger="" 16 | os_debugger="" 17 | parallel_builds="" 18 | cmake_generator="" 19 | 20 | # Detect OS. 21 | if [ "$(uname)" == "Darwin" ]; then 22 | if [ "${cmake_generator}" = "" ] ; then 23 | cmake_generator="Unix Makefiles" 24 | fi 25 | os="mac" 26 | os_debugger="lldb" 27 | parallel_builds="-j8" 28 | elif [ "$(expr substr $(uname -s) 1 5)" = "Linux" ]; then 29 | if [ "${cmake_generator}" = "" ] ; then 30 | cmake_generator="Unix Makefiles" 31 | fi 32 | os="linux" 33 | os_debugger="gdb" 34 | parallel_builds="-j8" 35 | else 36 | if [ "${cmake_generator}" = "" ] ; then 37 | cmake_generator="Visual Studio 15 2017 Win64" 38 | build_dir="build_vs2017" 39 | fi 40 | os="win" 41 | os_debugger="cdb" 42 | parallel_builds="/verbosity:q /maxcpucount:8" 43 | fi 44 | 45 | # Detect Command Line Options 46 | for var in "$@" 47 | do 48 | if [ "${var}" = "debug" ] ; then 49 | is_debug="y" 50 | cmake_build_type="Debug" 51 | cmake_config="Debug" 52 | debug_flag="_debug" 53 | debugger="${os_debugger}" 54 | 55 | elif [ "${var}" = "xcode" ] ; then 56 | build_dir="build_xcode" 57 | cmake_generator="Xcode" 58 | build_dir="build_xcode" 59 | parallel_builds="" 60 | fi 61 | done 62 | 63 | # Create unique name for this build type. 64 | bd="${d}/${build_dir}.${cmake_build_type}" 65 | 66 | if [ ! -d ${bd} ] ; then 67 | mkdir ${bd} 68 | fi 69 | 70 | # Compile the library. 71 | cd ${bd} 72 | cmake -DCMAKE_INSTALL_PREFIX=${id} \ 73 | -DCMAKE_BUILD_TYPE=${cmake_build_type} \ 74 | -G "${cmake_generator}" \ 75 | .. 76 | 77 | if [ $? -ne 0 ] ; then 78 | echo "Failed to configure" 79 | exit 80 | fi 81 | 82 | cmake --build . \ 83 | --target install \ 84 | --config ${cmake_build_type} \ 85 | -- ${parallel_builds} 86 | 87 | if [ $? -ne 0 ] ; then 88 | echo "Failed to build" 89 | exit 90 | fi 91 | 92 | cd ${id}/bin 93 | # ${debugger} ./test-nvidia-decode-v0${debug_flag} 94 | #${debugger} ./test-nvidia-decode-v1${debug_flag} 95 | ${debugger} ./test-nvidia-decode-v2${debug_flag} 96 | #${debugger} ./test-nvidia-decode-v3${debug_flag} 97 | 98 | -------------------------------------------------------------------------------- /src/test-nvidia-decode-v0.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | NVIDIA DECODE EXPERIMENTS 4 | ========================= 5 | 6 | GENERAL INFO: 7 | 8 | This repository contains a collection of experiments using 9 | the NVDECODE SDK to decode H264 using hardware 10 | acceleration. These tests are meant to be minimal and should 11 | not be used in production environments. The code was written 12 | while diving into the APIs so things might be incorrect. 13 | 14 | QUESTIONS: 15 | 16 | Q1: Should I use the CUVIDDECODECREATEINFO.vidLock .. and when? 17 | A1: ... 18 | 19 | Q2: What are the video parser callbacks supposed to return? 20 | A2: .... 21 | 22 | Q3: When calling a cuvidCreateVideoParser(), do I need to provide `pExtVideoInfo` ? 23 | A3: I tested this by setting the pExtVideoInfo member to 24 | nullptr in the cudaDecodeGL example and things were 25 | working fine w/o. 26 | 27 | REFERENCES: 28 | 29 | [0]: http://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf "Cuda C Programming Guide" 30 | [1]: https://github.com/gpac/gpac/blob/9bf9d23283553bf8214d13b286ce759ddd216be0/modules/nvdec/nvdec.c "GPAC implementation of NVDECODE" 31 | 32 | */ 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | /* ------------------------------------------------ */ 41 | 42 | static void print_cuvid_decode_caps(CUVIDDECODECAPS* caps); 43 | static void print_cuvid_parser_disp_info(CUVIDPARSERDISPINFO* info); 44 | static void print_cuvid_pic_params(CUVIDPICPARAMS* pic); 45 | static int parser_sequence_callback(void* user, CUVIDEOFORMAT* fmt); 46 | static int parser_decode_picture_callback(void* user, CUVIDPICPARAMS* pic); 47 | static int parser_display_picture_callback(void* user, CUVIDPARSERDISPINFO* info); 48 | 49 | /* ------------------------------------------------ */ 50 | 51 | CUcontext context = { 0 }; 52 | CUvideodecoder decoder = nullptr; 53 | CUdevice device = { 0 }; 54 | 55 | /* ------------------------------------------------ */ 56 | 57 | int main() { 58 | 59 | printf("\n\nnvidia decode test v0.\n\n"); 60 | 61 | CUresult r = CUDA_SUCCESS; 62 | const char* err_str = nullptr; 63 | 64 | /* Initialize cuda, must be done before anything else. */ 65 | r = cuInit(0); 66 | if (CUDA_SUCCESS != r) { 67 | cuGetErrorString(r, &err_str); 68 | printf("Failed to initialize cuda: %s. (exiting).\n", err_str); 69 | exit(EXIT_FAILURE); 70 | } 71 | 72 | int device_count = 0; 73 | r = cuDeviceGetCount(&device_count); 74 | if (CUDA_SUCCESS != r) { 75 | cuGetErrorString(r, &err_str); 76 | printf("Failed to get the cuda device count: %s. (exiting).\n", err_str); 77 | exit(EXIT_FAILURE); 78 | } 79 | 80 | printf("We have %d cuda device(s).\n", device_count); 81 | 82 | r = cuDeviceGet(&device, 0); 83 | if (CUDA_SUCCESS != r) { 84 | cuGetErrorString(r, &err_str); 85 | printf("Failed to get a handle to the cuda device: %s. (exiting).\n", err_str); 86 | exit(EXIT_FAILURE); 87 | } 88 | 89 | char name[80] = { 0 }; 90 | r = cuDeviceGetName(name, sizeof(name), device); 91 | if (CUDA_SUCCESS != r) { 92 | cuGetErrorString(r, &err_str); 93 | printf("Failed to get the cuda device name: %s. (exiting).\n", err_str); 94 | exit(EXIT_FAILURE); 95 | } 96 | 97 | printf("Cuda device: %s.\n", name); 98 | 99 | r = cuCtxCreate(&context, 0, device); 100 | if (CUDA_SUCCESS != r) { 101 | cuGetErrorString(r, &err_str); 102 | printf("Failed to create a cuda context: %s. (exiting).\n", err_str); 103 | exit(EXIT_FAILURE); 104 | } 105 | 106 | /* Query capabilities. */ 107 | CUVIDDECODECAPS decode_caps = {}; 108 | decode_caps.eCodecType = cudaVideoCodec_H264; 109 | decode_caps.eChromaFormat = cudaVideoChromaFormat_420; 110 | decode_caps.nBitDepthMinus8 = 0; 111 | 112 | r = cuvidGetDecoderCaps(&decode_caps); 113 | if (CUDA_SUCCESS != r) { 114 | cuGetErrorString(r, &err_str); 115 | printf("Failed to get decoder caps: %s (exiting).\n", err_str); 116 | exit(EXIT_FAILURE); 117 | } 118 | 119 | /* Create decoder context. */ 120 | CUVIDDECODECREATEINFO create_info = { 0 }; 121 | create_info.CodecType = decode_caps.eCodecType; /* cudaVideoCodex_XXX */ 122 | create_info.ChromaFormat = decode_caps.eChromaFormat; /* cudaVideoChromaFormat_XXX */ 123 | create_info.OutputFormat = cudaVideoSurfaceFormat_NV12; /* cudaVideoSurfaceFormat_XXX */ 124 | create_info.ulCreationFlags = cudaVideoCreate_PreferCUVID; /* cudaVideoCreate_XXX */ 125 | create_info.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave; /* cudaVideoDeinterlaceMode_XXX */ 126 | create_info.bitDepthMinus8 = decode_caps.nBitDepthMinus8;; 127 | create_info.ulNumOutputSurfaces = 2; /* Maximum number of internal decode surfaces. */ 128 | create_info.ulNumDecodeSurfaces = 4; /* @todo from NvDecoder.cpp, assuming worst case here ... Maximum number of internal decode surfaces. */ 129 | create_info.ulIntraDecodeOnly = 0; /* @todo this seems like an interesting flag. */ 130 | 131 | /* Size is specific for the moonlight.264 file. */ 132 | create_info.ulWidth = 512; /* Coded sequence width in pixels. */ 133 | create_info.ulHeight = 384; /* Coded sequence height in pixels. */ 134 | create_info.ulTargetWidth = create_info.ulWidth; /* Post-processed output width (should be aligned to 2). */ 135 | create_info.ulTargetHeight = create_info.ulHeight; /* Post-processed output height (should be aligned to 2). */ 136 | 137 | 138 | /* @todo do we need this? */ 139 | /* create_info.vidLock = ...*/ 140 | 141 | r = cuvidCreateDecoder(&decoder, &create_info); 142 | if (CUDA_SUCCESS != r) { 143 | cuGetErrorString(r, &err_str); 144 | printf("Failed to create the decoder: %s. (exiting).\n", err_str); 145 | exit(EXIT_FAILURE); 146 | } 147 | 148 | /* Create a video parser that gives us the CUVIDPICPARAMS structures. */ 149 | CUVIDPARSERPARAMS parser_params; 150 | memset((void*)&parser_params, 0x00, sizeof(parser_params)); 151 | parser_params.CodecType = create_info.CodecType; 152 | parser_params.ulMaxNumDecodeSurfaces = create_info.ulNumDecodeSurfaces; 153 | parser_params.ulClockRate = 0; 154 | parser_params.ulErrorThreshold = 0; 155 | parser_params.ulMaxDisplayDelay = 1; 156 | parser_params.pUserData = nullptr; 157 | parser_params.pfnSequenceCallback = parser_sequence_callback; 158 | parser_params.pfnDecodePicture = parser_decode_picture_callback; 159 | parser_params.pfnDisplayPicture = parser_display_picture_callback; 160 | 161 | CUvideoparser parser = nullptr; 162 | r = cuvidCreateVideoParser(&parser, &parser_params); 163 | 164 | if (CUDA_SUCCESS != r) { 165 | cuGetErrorString(r, &err_str); 166 | printf("Failed to create a video parser: %s (exiting).\n", err_str); 167 | exit(EXIT_FAILURE); 168 | } 169 | 170 | /* Load our h264 nal parser. */ 171 | std::string filename = ""; 172 | filename = "./moonlight.264"; 173 | 174 | /* Instead of reading the file one nal at a time, we just read a huge chunk and feed that into the decoder. */ 175 | std::ifstream ifs(filename.c_str(), std::ios::in | std::ios::binary); 176 | if (!ifs.is_open()) { 177 | printf("Failed to open the input .h264 file: %s. (exiting).\n", filename.c_str()); 178 | exit(EXIT_FAILURE); 179 | } 180 | 181 | ifs.seekg(0, std::ifstream::end); 182 | size_t ifs_size = ifs.tellg(); 183 | ifs.seekg(0, std::ifstream::beg); 184 | printf("Loaded %s which holds %zu bytes.\n", filename.c_str(), ifs_size); 185 | 186 | char* ifs_buf = (char*)malloc(ifs_size); 187 | ifs.read(ifs_buf, ifs_size); 188 | 189 | CUVIDSOURCEDATAPACKET pkt; 190 | pkt.flags = 0; 191 | pkt.payload_size = ifs_size; 192 | pkt.payload = (uint8_t*)ifs_buf; 193 | pkt.timestamp = 0; 194 | 195 | r = cuvidParseVideoData(parser, &pkt); 196 | if (CUDA_SUCCESS != r) { 197 | cuGetErrorString(r, &err_str); 198 | printf("Failed to parse h264 packet: %s (exiting).\n", err_str); 199 | exit(EXIT_FAILURE); 200 | } 201 | 202 | /* Cleanup */ 203 | /* ------------------------------------------------------ */ 204 | 205 | r = cuCtxDestroy(context); 206 | if (CUDA_SUCCESS != r) { 207 | cuGetErrorString(r, &err_str); 208 | printf("Failed to cleanly destroy the cuda context: %s (exiting).\n", err_str); 209 | exit(EXIT_FAILURE); 210 | } 211 | 212 | r = cuvidDestroyDecoder(decoder); 213 | if (CUDA_SUCCESS != r) { 214 | cuGetErrorString(r, &err_str); 215 | printf("Failed to cleanly destroy the decoder context: %s. (exiting).\n", err_str); 216 | exit(EXIT_FAILURE); 217 | } 218 | 219 | if (nullptr != parser) { 220 | r = cuvidDestroyVideoParser(parser); 221 | if (CUDA_SUCCESS != r) { 222 | cuGetErrorString(r, &err_str); 223 | printf("Failed to the video parser context: %s. (exiting).\n", err_str); 224 | exit(EXIT_FAILURE); 225 | } 226 | } 227 | 228 | context = nullptr; 229 | decoder = nullptr; 230 | parser = nullptr; 231 | 232 | return 0; 233 | } 234 | 235 | /* ------------------------------------------------ */ 236 | 237 | static int parser_sequence_callback(void* user, CUVIDEOFORMAT* fmt) { 238 | printf("CUVIDEOFORMAT.Coded size: %d x %d\n", fmt->coded_width, fmt->coded_height); 239 | printf("CUVIDEOFORMAT.Display area: %d %d %d %d\n", fmt->display_area.left, fmt->display_area.top, fmt->display_area.right, fmt->display_area.bottom); 240 | printf("CUVIDEOFORMAT.Bitrate: %u\n", fmt->bitrate); 241 | return 0; 242 | } 243 | 244 | static int parser_decode_picture_callback(void* user, CUVIDPICPARAMS* pic) { 245 | 246 | CUresult r = CUDA_SUCCESS; 247 | 248 | if (nullptr == decoder) { 249 | printf("decoder is nullptr. (exiting)."); 250 | exit(EXIT_FAILURE); 251 | } 252 | 253 | r = cuvidDecodePicture(decoder, pic); 254 | if (CUDA_SUCCESS != r) { 255 | printf("Failed to decode the picture."); 256 | } 257 | 258 | return 1; 259 | } 260 | 261 | static int parser_display_picture_callback(void* user, CUVIDPARSERDISPINFO* info) { 262 | 263 | const char* err_str = nullptr; 264 | CUresult r = CUDA_SUCCESS; 265 | CUVIDPROCPARAMS vpp = { 0 }; 266 | unsigned int pitch = 0; 267 | int to_map = info->picture_index; 268 | 269 | vpp.progressive_frame = info->progressive_frame; 270 | vpp.top_field_first = info->top_field_first; 271 | vpp.unpaired_field = (info->repeat_first_field < 0); 272 | vpp.second_field = 0; 273 | 274 | r = cuvidMapVideoFrame(decoder, to_map, (unsigned long long*)&device, &pitch, &vpp); 275 | 276 | if (CUDA_SUCCESS != r) { 277 | cuGetErrorString(r, &err_str); 278 | printf("- mapping: %u failed: %s\n", to_map, err_str); 279 | return 0; 280 | } 281 | 282 | printf("+ mapping: %u succeeded\n", to_map); 283 | 284 | r = cuvidUnmapVideoFrame(decoder, (unsigned long long)device); 285 | if (CUDA_SUCCESS != r) { 286 | cuGetErrorString(r, &err_str); 287 | printf("- failed to unmap the video frame: %s, %d\n", err_str, to_map); 288 | return 0; 289 | } 290 | 291 | return 1; 292 | } 293 | 294 | /* ------------------------------------------------ */ 295 | 296 | static void print_cuvid_decode_caps(CUVIDDECODECAPS* caps) { 297 | 298 | if (nullptr == caps) { 299 | printf("Cannot print the cuvid decode caps as the given pointer is a nullptr."); 300 | return; 301 | } 302 | 303 | printf("CUVIDDECODECAPS.nBitDepthMinus8: %u\n", caps->nBitDepthMinus8); 304 | printf("CUVIDDECODECAPS.bIsSupported: %u\n", caps->bIsSupported); 305 | printf("CUVIDDECODECAPS.nMaxWidth: %u\n", caps->nMaxWidth); 306 | printf("CUVIDDECODECAPS.nMaxHeight: %u\n", caps->nMaxHeight); 307 | printf("CUVIDDECODECAPS.nMaxMBCount: %u\n", caps->nMaxMBCount); 308 | printf("CUVIDDECODECAPS.nMinWidth: %u\n", caps->nMinWidth); 309 | printf("CUVIDDECODECAPS.nMinHeight: %u\n", caps->nMinHeight); 310 | } 311 | 312 | static void print_cuvid_parser_disp_info(CUVIDPARSERDISPINFO* info) { 313 | 314 | if (nullptr == info) { 315 | printf("Cannot print the cuvid parser disp info, nullptr given."); 316 | return; 317 | } 318 | 319 | printf("CUVIDPARSERDISPINFO.picture_index: %d\n", info->picture_index); 320 | printf("CUVIDPARSERDISPINFO.progressive_frame: %d\n", info->progressive_frame); 321 | printf("CUVIDPARSERDISPINFO.top_field_first: %d\n", info->top_field_first); 322 | printf("CUVIDPARSERDISPINFO.repeat_first_field: %d\n", info->repeat_first_field); 323 | printf("CUVIDPARSERDISPINFO.timestamp: %lld\n", info->timestamp); 324 | } 325 | 326 | static void print_cuvid_pic_params(CUVIDPICPARAMS* pic) { 327 | 328 | if (nullptr == pic) { 329 | printf("Cannot print the cuvid pic params, nullptr given."); 330 | return; 331 | } 332 | 333 | printf("CUVIDPICPARAMS.PicWithInMbs: %d\n", pic->PicWidthInMbs); 334 | printf("CUVIDPICPARAMS.FrameHeightInMbs: %d\n", pic->FrameHeightInMbs); 335 | printf("CUVIDPICPARAMS.CurrPicIdx: %d\n", pic->CurrPicIdx); 336 | printf("CUVIDPICPARAMS.field_pic_flag: %d\n", pic->field_pic_flag); 337 | printf("CUVIDPICPARAMS.bottom_field_flag: %d\n", pic->bottom_field_flag); 338 | printf("CUVIDPICPARAMS.second_field: %d\n", pic->second_field); 339 | printf("CUVIDPICPARAMS.nBitstreamDataLen: %u\n", pic->nBitstreamDataLen); 340 | printf("CUVIDPICPARAMS.nNumSlices: %u\n", pic->nNumSlices); 341 | printf("CUVIDPICPARAMS.ref_pic_flag: %d\n", pic->ref_pic_flag); 342 | printf("CUVIDPICPARAMS.intra_pic_flag: %d\n", pic->intra_pic_flag); 343 | } 344 | 345 | /* ------------------------------------------------ */ 346 | 347 | -------------------------------------------------------------------------------- /src/test-nvidia-decode-v1.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | NVIDIA DECODE EXPERIMENTS 4 | ========================= 5 | 6 | GENERAL INFO: 7 | 8 | This repository contains a collection of experiments using 9 | the NVDECODE SDK to decode H264 using hardware 10 | acceleration. These tests are meant to be minimal and should 11 | not be used in production environments. The code was written 12 | while diving into the APIs so things might be incorrect. 13 | 14 | QUESTIONS: 15 | 16 | Q1: Should I use the CUVIDDECODECREATEINFO.vidLock .. and when? 17 | A1: ... 18 | 19 | Q2: What are the video parser callbacks supposed to return? 20 | A2: .... 21 | 22 | Q3: When calling a cuvidCreateVideoParser(), do I need to provide `pExtVideoInfo` ? 23 | A3: I tested this by setting the pExtVideoInfo member to 24 | nullptr in the cudaDecodeGL example and things were 25 | working fine w/o. 26 | 27 | REFERENCES: 28 | 29 | [0]: http://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf "Cuda C Programming Guide" 30 | [1]: https://github.com/gpac/gpac/blob/9bf9d23283553bf8214d13b286ce759ddd216be0/modules/nvdec/nvdec.c "GPAC implementation of NVDECODE" 31 | 32 | */ 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | /* ------------------------------------------------ */ 41 | 42 | static void print_cuvid_decode_caps(CUVIDDECODECAPS* caps); 43 | static void print_cuvid_parser_disp_info(CUVIDPARSERDISPINFO* info); 44 | static void print_cuvid_pic_params(CUVIDPICPARAMS* pic); 45 | static int parser_sequence_callback(void* user, CUVIDEOFORMAT* fmt); 46 | static int parser_decode_picture_callback(void* user, CUVIDPICPARAMS* pic); 47 | static int parser_display_picture_callback(void* user, CUVIDPARSERDISPINFO* info); 48 | 49 | /* ------------------------------------------------ */ 50 | 51 | CUcontext context = nullptr; 52 | CUvideodecoder decoder = nullptr; 53 | CUdevice device = 0; 54 | 55 | /* ------------------------------------------------ */ 56 | 57 | int main() { 58 | 59 | printf("\n\nnvidia decode test v1.\n\n"); 60 | 61 | CUresult r = CUDA_SUCCESS; 62 | const char* err_str = nullptr; 63 | 64 | /* Initialize cuda, must be done before anything else. */ 65 | r = cuInit(0); 66 | if (CUDA_SUCCESS != r) { 67 | cuGetErrorString(r, &err_str); 68 | printf("Failed to initialize cuda: %s. (exiting).\n", err_str); 69 | exit(EXIT_FAILURE); 70 | } 71 | 72 | int device_count = 0; 73 | r = cuDeviceGetCount(&device_count); 74 | if (CUDA_SUCCESS != r) { 75 | cuGetErrorString(r, &err_str); 76 | printf("Failed to get the cuda device count: %s. (exiting).\n", err_str); 77 | exit(EXIT_FAILURE); 78 | } 79 | 80 | printf("We have %d cuda device(s).\n", device_count); 81 | 82 | r = cuDeviceGet(&device, 0); 83 | if (CUDA_SUCCESS != r) { 84 | cuGetErrorString(r, &err_str); 85 | printf("Failed to get a handle to the cuda device: %s. (exiting).\n", err_str); 86 | exit(EXIT_FAILURE); 87 | } 88 | 89 | char name[80] = { 0 }; 90 | r = cuDeviceGetName(name, sizeof(name), device); 91 | if (CUDA_SUCCESS != r) { 92 | cuGetErrorString(r, &err_str); 93 | printf("Failed to get the cuda device name: %s. (exiting).\n", err_str); 94 | exit(EXIT_FAILURE); 95 | } 96 | 97 | printf("Cuda device: %s.\n", name); 98 | 99 | r = cuCtxCreate(&context, 0, device); 100 | if (CUDA_SUCCESS != r) { 101 | cuGetErrorString(r, &err_str); 102 | printf("Failed to create a cuda context: %s. (exiting).\n", err_str); 103 | exit(EXIT_FAILURE); 104 | } 105 | 106 | /* Create a video parser that gives us the CUVIDPICPARAMS structures. */ 107 | CUVIDPARSERPARAMS parser_params; 108 | memset((void*)&parser_params, 0x00, sizeof(parser_params)); 109 | parser_params.CodecType = cudaVideoCodec_H264; 110 | parser_params.ulMaxNumDecodeSurfaces = 1; 111 | parser_params.ulMaxDisplayDelay = 0; 112 | parser_params.ulClockRate = 0; 113 | parser_params.ulErrorThreshold = 0; 114 | parser_params.pUserData = nullptr; 115 | parser_params.pfnSequenceCallback = parser_sequence_callback; 116 | parser_params.pfnDecodePicture = parser_decode_picture_callback; 117 | parser_params.pfnDisplayPicture = parser_display_picture_callback; 118 | 119 | CUvideoparser parser = nullptr; 120 | r = cuvidCreateVideoParser(&parser, &parser_params); 121 | 122 | if (CUDA_SUCCESS != r) { 123 | cuGetErrorString(r, &err_str); 124 | printf("Failed to create a video parser: %s (exiting).\n", err_str); 125 | exit(EXIT_FAILURE); 126 | } 127 | 128 | /* Load our h264 nal parser. */ 129 | std::string filename = ""; 130 | filename = "./moonlight.264"; 131 | 132 | /* Instead of reading the file one nal at a time, we just read a huge chunk and feed that into the decoder. */ 133 | std::ifstream ifs(filename.c_str(), std::ios::in | std::ios::binary); 134 | if (!ifs.is_open()) { 135 | printf("Failed to open the input .h264 file: %s. (exiting).\n", filename.c_str()); 136 | exit(EXIT_FAILURE); 137 | } 138 | 139 | ifs.seekg(0, std::ifstream::end); 140 | size_t ifs_size = ifs.tellg(); 141 | ifs.seekg(0, std::ifstream::beg); 142 | printf("Loaded %s which holds %zu bytes.\n", filename.c_str(), ifs_size); 143 | 144 | char* ifs_buf = (char*)malloc(ifs_size); 145 | ifs.read(ifs_buf, ifs_size); 146 | 147 | CUVIDSOURCEDATAPACKET pkt; 148 | pkt.flags = 0; 149 | pkt.payload_size = ifs_size; 150 | pkt.payload = (uint8_t*)ifs_buf; 151 | pkt.timestamp = 0; 152 | 153 | r = cuvidParseVideoData(parser, &pkt); 154 | if (CUDA_SUCCESS != r) { 155 | cuGetErrorString(r, &err_str); 156 | printf("Failed to parse h264 packet: %s (exiting).\n", err_str); 157 | exit(EXIT_FAILURE); 158 | } 159 | 160 | if (nullptr == decoder) { 161 | printf("Error: no decoder created yet, should have been done inside the sequence callback. (exiting).\n"); 162 | exit(EXIT_FAILURE); 163 | } 164 | 165 | /* Cleanup */ 166 | /* ------------------------------------------------------ */ 167 | 168 | if (nullptr != context) { 169 | r = cuCtxDestroy(context); 170 | if (CUDA_SUCCESS != r) { 171 | cuGetErrorString(r, &err_str); 172 | printf("Failed to cleanly destroy the cuda context: %s (exiting).\n", err_str); 173 | exit(EXIT_FAILURE); 174 | } 175 | } 176 | 177 | if (nullptr != decoder) { 178 | r = cuvidDestroyDecoder(decoder); 179 | if (CUDA_SUCCESS != r) { 180 | cuGetErrorString(r, &err_str); 181 | printf("Failed to cleanly destroy the decoder context: %s. (exiting).\n", err_str); 182 | exit(EXIT_FAILURE); 183 | } 184 | } 185 | 186 | if (nullptr != parser) { 187 | r = cuvidDestroyVideoParser(parser); 188 | if (CUDA_SUCCESS != r) { 189 | cuGetErrorString(r, &err_str); 190 | printf("Failed to the video parser context: %s. (exiting).\n", err_str); 191 | exit(EXIT_FAILURE); 192 | } 193 | } 194 | 195 | context = nullptr; 196 | decoder = nullptr; 197 | parser = nullptr; 198 | 199 | return 0; 200 | } 201 | 202 | /* ------------------------------------------------ */ 203 | 204 | static int parser_sequence_callback(void* user, CUVIDEOFORMAT* fmt) { 205 | 206 | const char* err_str = nullptr; 207 | 208 | if (nullptr == context) { 209 | printf("The CUcontext is nullptr, you should initialize it before kicking off the decoder.\n"); 210 | exit(EXIT_FAILURE); 211 | } 212 | 213 | printf("CUVIDEOFORMAT.Coded size: %d x %d\n", fmt->coded_width, fmt->coded_height); 214 | printf("CUVIDEOFORMAT.Display area: %d %d %d %d\n", fmt->display_area.left, fmt->display_area.top, fmt->display_area.right, fmt->display_area.bottom); 215 | printf("CUVIDEOFORMAT.Bitrate: %u\n", fmt->bitrate); 216 | 217 | CUVIDDECODECAPS decode_caps; 218 | memset((char*)&decode_caps, 0x00, sizeof(decode_caps)); 219 | decode_caps.eCodecType = fmt->codec; 220 | decode_caps.eChromaFormat = fmt->chroma_format; 221 | decode_caps.nBitDepthMinus8 = fmt->bit_depth_luma_minus8; 222 | 223 | CUresult r = cuvidGetDecoderCaps(&decode_caps); 224 | if (CUDA_SUCCESS != r) { 225 | cuGetErrorString(r, &err_str); 226 | printf("Failed to get decoder caps: %s (exiting).\n", err_str); 227 | exit(EXIT_FAILURE); 228 | } 229 | 230 | if (!decode_caps.bIsSupported) { 231 | printf("The video file format is not supported by NVDECODE. (exiting).\n"); 232 | exit(EXIT_FAILURE); 233 | } 234 | 235 | /* Create decoder context. */ 236 | CUVIDDECODECREATEINFO create_info = { 0 }; 237 | create_info.CodecType = fmt->codec; 238 | create_info.ChromaFormat = fmt->chroma_format; 239 | create_info.OutputFormat = (fmt->bit_depth_luma_minus8) ? cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12; 240 | create_info.bitDepthMinus8 = fmt->bit_depth_luma_minus8; 241 | create_info.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave; 242 | create_info.ulNumOutputSurfaces = 2; 243 | create_info.ulNumDecodeSurfaces = 20; 244 | create_info.ulCreationFlags = cudaVideoCreate_PreferCUVID; 245 | create_info.vidLock = nullptr; 246 | create_info.ulIntraDecodeOnly = 0; 247 | create_info.ulTargetWidth = fmt->coded_width; 248 | create_info.ulTargetHeight = fmt->coded_height; 249 | create_info.ulWidth = fmt->coded_width; 250 | create_info.ulHeight = fmt->coded_height; 251 | 252 | cuCtxPushCurrent(context); 253 | { 254 | r = cuvidCreateDecoder(&decoder, &create_info); 255 | if (CUDA_SUCCESS != r) { 256 | cuGetErrorString(r, &err_str); 257 | printf("Failed to create the decoder: %s. (exiting).\n", err_str); 258 | exit(EXIT_FAILURE); 259 | } 260 | } 261 | cuCtxPopCurrent(nullptr); 262 | 263 | printf("Created the decoder.\n"); 264 | 265 | return create_info.ulNumDecodeSurfaces; 266 | } 267 | 268 | static int parser_decode_picture_callback(void* user, CUVIDPICPARAMS* pic) { 269 | 270 | CUresult r = CUDA_SUCCESS; 271 | 272 | if (nullptr == decoder) { 273 | printf("decoder is nullptr. (exiting)."); 274 | exit(EXIT_FAILURE); 275 | } 276 | 277 | r = cuvidDecodePicture(decoder, pic); 278 | if (CUDA_SUCCESS != r) { 279 | printf("Failed to decode the picture."); 280 | } 281 | 282 | return 1; 283 | } 284 | 285 | static int parser_display_picture_callback(void* user, CUVIDPARSERDISPINFO* info) { 286 | 287 | const char* err_str = nullptr; 288 | CUresult r = CUDA_SUCCESS; 289 | unsigned int pitch = 0; 290 | CUdeviceptr src_frame = 0; 291 | 292 | CUVIDPROCPARAMS vpp = { 0 }; 293 | vpp.progressive_frame = info->progressive_frame; 294 | vpp.second_field = info->repeat_first_field + 1; 295 | vpp.top_field_first = info->top_field_first; 296 | vpp.unpaired_field = (info->repeat_first_field < 0); 297 | vpp.output_stream = nullptr; /* @todo do we need to set this to something? */ 298 | 299 | r = cuvidMapVideoFrame(decoder, info->picture_index, &src_frame, &pitch, &vpp); 300 | 301 | if (CUDA_SUCCESS != r) { 302 | cuGetErrorString(r, &err_str); 303 | printf("- mapping: %u failed: %s\n", info->picture_index, err_str); 304 | return 0; 305 | } 306 | 307 | printf("+ mapping: %u succeeded\n", info->picture_index); 308 | 309 | r = cuvidUnmapVideoFrame(decoder, src_frame); 310 | if (CUDA_SUCCESS != r) { 311 | cuGetErrorString(r, &err_str); 312 | printf("- failed to unmap the video frame: %s, %d\n", err_str, info->picture_index); 313 | return 0; 314 | } 315 | 316 | return 1; 317 | } 318 | 319 | /* ------------------------------------------------ */ 320 | 321 | static void print_cuvid_decode_caps(CUVIDDECODECAPS* caps) { 322 | 323 | if (nullptr == caps) { 324 | printf("Cannot print the cuvid decode caps as the given pointer is a nullptr."); 325 | return; 326 | } 327 | 328 | printf("CUVIDDECODECAPS.nBitDepthMinus8: %u\n", caps->nBitDepthMinus8); 329 | printf("CUVIDDECODECAPS.bIsSupported: %u\n", caps->bIsSupported); 330 | printf("CUVIDDECODECAPS.nMaxWidth: %u\n", caps->nMaxWidth); 331 | printf("CUVIDDECODECAPS.nMaxHeight: %u\n", caps->nMaxHeight); 332 | printf("CUVIDDECODECAPS.nMaxMBCount: %u\n", caps->nMaxMBCount); 333 | printf("CUVIDDECODECAPS.nMinWidth: %u\n", caps->nMinWidth); 334 | printf("CUVIDDECODECAPS.nMinHeight: %u\n", caps->nMinHeight); 335 | } 336 | 337 | static void print_cuvid_parser_disp_info(CUVIDPARSERDISPINFO* info) { 338 | 339 | if (nullptr == info) { 340 | printf("Cannot print the cuvid parser disp info, nullptr given."); 341 | return; 342 | } 343 | 344 | printf("CUVIDPARSERDISPINFO.picture_index: %d\n", info->picture_index); 345 | printf("CUVIDPARSERDISPINFO.progressive_frame: %d\n", info->progressive_frame); 346 | printf("CUVIDPARSERDISPINFO.top_field_first: %d\n", info->top_field_first); 347 | printf("CUVIDPARSERDISPINFO.repeat_first_field: %d\n", info->repeat_first_field); 348 | printf("CUVIDPARSERDISPINFO.timestamp: %lld\n", info->timestamp); 349 | } 350 | 351 | static void print_cuvid_pic_params(CUVIDPICPARAMS* pic) { 352 | 353 | if (nullptr == pic) { 354 | printf("Cannot print the cuvid pic params, nullptr given."); 355 | return; 356 | } 357 | 358 | printf("CUVIDPICPARAMS.PicWithInMbs: %d\n", pic->PicWidthInMbs); 359 | printf("CUVIDPICPARAMS.FrameHeightInMbs: %d\n", pic->FrameHeightInMbs); 360 | printf("CUVIDPICPARAMS.CurrPicIdx: %d\n", pic->CurrPicIdx); 361 | printf("CUVIDPICPARAMS.field_pic_flag: %d\n", pic->field_pic_flag); 362 | printf("CUVIDPICPARAMS.bottom_field_flag: %d\n", pic->bottom_field_flag); 363 | printf("CUVIDPICPARAMS.second_field: %d\n", pic->second_field); 364 | printf("CUVIDPICPARAMS.nBitstreamDataLen: %u\n", pic->nBitstreamDataLen); 365 | printf("CUVIDPICPARAMS.nNumSlices: %u\n", pic->nNumSlices); 366 | printf("CUVIDPICPARAMS.ref_pic_flag: %d\n", pic->ref_pic_flag); 367 | printf("CUVIDPICPARAMS.intra_pic_flag: %d\n", pic->intra_pic_flag); 368 | } 369 | 370 | /* ------------------------------------------------ */ 371 | 372 | -------------------------------------------------------------------------------- /src/test-nvidia-decode-v2.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | NVIDIA DECODE EXPERIMENTS 3 | ========================= 4 | 5 | GENERAL INFO: 6 | 7 | This repository contains a collection of experiments using 8 | the NVDECODE SDK to decode H264 using hardware 9 | acceleration. These tests are meant to be minimal and should 10 | not be used in production environments. The code was written 11 | while diving into the APIs so things might be incorrect. 12 | 13 | This particular test writes the decoded YUV into a file which 14 | can be played back with ffplay. Though atm only videos with 15 | IDR only frames are working correctly. 16 | 17 | QUESTIONS: 18 | 19 | Q1: Should I use the CUVIDDECODECREATEINFO.vidLock .. and when? 20 | A1: ... 21 | 22 | Q2: What are the video parser callbacks supposed to return? 23 | A2: .... 24 | 25 | Q3: When calling a cuvidCreateVideoParser(), do I need to provide `pExtVideoInfo` ? 26 | A3: I tested this by setting the pExtVideoInfo member to nullptr in the cudaDecodeGL example 27 | and things were working fine w/o. 28 | 29 | REFERENCES: 30 | 31 | [0]: http://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf "Cuda C Programming Guide" 32 | [1]: https://github.com/gpac/gpac/blob/9bf9d23283553bf8214d13b286ce759ddd216be0/modules/nvdec/nvdec.c "GPAC implementation of NVDECODE" 33 | 34 | */ 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | 42 | /* ------------------------------------------------ */ 43 | 44 | static void print_cuvid_decode_caps(CUVIDDECODECAPS* caps); 45 | static void print_cuvid_parser_disp_info(CUVIDPARSERDISPINFO* info); 46 | static void print_cuvid_pic_params(CUVIDPICPARAMS* pic); 47 | static int parser_sequence_callback(void* user, CUVIDEOFORMAT* fmt); 48 | static int parser_decode_picture_callback(void* user, CUVIDPICPARAMS* pic); 49 | static int parser_display_picture_callback(void* user, CUVIDPARSERDISPINFO* info); 50 | 51 | /* ------------------------------------------------ */ 52 | 53 | CUcontext context = { 0 }; 54 | CUvideodecoder decoder = nullptr; 55 | CUdevice device = { 0 }; 56 | std::ofstream ofs; 57 | 58 | char* yuv_buffer = nullptr; 59 | int yuv_nbytes_needed = 0; 60 | int coded_width = 0; 61 | int coded_height = 0; 62 | 63 | /* ------------------------------------------------ */ 64 | 65 | int main() { 66 | 67 | printf("\n\nnvidia decode test v2.\n\n"); 68 | 69 | CUresult r = CUDA_SUCCESS; 70 | const char* err_str = nullptr; 71 | 72 | ofs.open("out.nv12", std::ios::out | std::ios::binary); 73 | if (!ofs.is_open()) { 74 | printf("Failed to open output file. (exiting).\n"); 75 | exit(EXIT_FAILURE); 76 | } 77 | 78 | /* Initialize cuda, must be done before anything else. */ 79 | r = cuInit(0); 80 | if (CUDA_SUCCESS != r) { 81 | cuGetErrorString(r, &err_str); 82 | printf("Failed to initialize cuda: %s. (exiting).\n", err_str); 83 | exit(EXIT_FAILURE); 84 | } 85 | 86 | int device_count = 0; 87 | r = cuDeviceGetCount(&device_count); 88 | if (CUDA_SUCCESS != r) { 89 | cuGetErrorString(r, &err_str); 90 | printf("Failed to get the cuda device count: %s. (exiting).\n", err_str); 91 | exit(EXIT_FAILURE); 92 | } 93 | 94 | printf("We have %d cuda device(s).\n", device_count); 95 | 96 | r = cuDeviceGet(&device, 0); 97 | if (CUDA_SUCCESS != r) { 98 | cuGetErrorString(r, &err_str); 99 | printf("Failed to get a handle to the cuda device: %s. (exiting).\n", err_str); 100 | exit(EXIT_FAILURE); 101 | } 102 | 103 | char name[80] = { 0 }; 104 | r = cuDeviceGetName(name, sizeof(name), device); 105 | if (CUDA_SUCCESS != r) { 106 | cuGetErrorString(r, &err_str); 107 | printf("Failed to get the cuda device name: %s. (exiting).\n", err_str); 108 | exit(EXIT_FAILURE); 109 | } 110 | 111 | printf("Cuda device: %s.\n", name); 112 | 113 | r = cuCtxCreate(&context, 0, device); 114 | if (CUDA_SUCCESS != r) { 115 | cuGetErrorString(r, &err_str); 116 | printf("Failed to create a cuda context: %s. (exiting).\n", err_str); 117 | exit(EXIT_FAILURE); 118 | } 119 | 120 | /* Create a video parser that gives us the CUVIDPICPARAMS structures. */ 121 | CUVIDPARSERPARAMS parser_params; 122 | memset((void*)&parser_params, 0x00, sizeof(parser_params)); 123 | parser_params.CodecType = cudaVideoCodec_H264; 124 | parser_params.ulMaxNumDecodeSurfaces = 4; 125 | parser_params.ulClockRate = 0; 126 | parser_params.ulErrorThreshold = 0; 127 | parser_params.ulMaxDisplayDelay = 1; 128 | parser_params.pUserData = nullptr; 129 | parser_params.pfnSequenceCallback = parser_sequence_callback; 130 | parser_params.pfnDecodePicture = parser_decode_picture_callback; 131 | parser_params.pfnDisplayPicture = parser_display_picture_callback; 132 | 133 | CUvideoparser parser = nullptr; 134 | r = cuvidCreateVideoParser(&parser, &parser_params); 135 | 136 | if (CUDA_SUCCESS != r) { 137 | cuGetErrorString(r, &err_str); 138 | printf("Failed to create a video parser: %s (exiting).\n", err_str); 139 | exit(EXIT_FAILURE); 140 | } 141 | 142 | /* Load our h264 nal parser. */ 143 | std::string filename = ""; 144 | filename = "./moonlight.264"; 145 | 146 | /* Instead of reading the file one nal at a time, we just read a huge chunk and feed that into the decoder. */ 147 | std::ifstream ifs(filename.c_str(), std::ios::in | std::ios::binary); 148 | if (!ifs.is_open()) { 149 | printf("Failed to open the file: %s. (exiting).\n", filename.c_str()); 150 | exit(EXIT_FAILURE); 151 | } 152 | 153 | ifs.seekg(0, std::ifstream::end); 154 | size_t ifs_size = ifs.tellg(); 155 | ifs.seekg(0, std::ifstream::beg); 156 | printf("Loaded %s which holds %zu bytes.\n", filename.c_str(), ifs_size); 157 | 158 | char* ifs_buf = (char*)malloc(ifs_size); 159 | ifs.read(ifs_buf, ifs_size); 160 | 161 | CUVIDSOURCEDATAPACKET pkt; 162 | pkt.flags = 0; 163 | pkt.payload_size = ifs_size; 164 | pkt.payload = (uint8_t*)ifs_buf; 165 | pkt.timestamp = 0; 166 | 167 | r = cuvidParseVideoData(parser, &pkt); 168 | if (CUDA_SUCCESS != r) { 169 | cuGetErrorString(r, &err_str); 170 | printf("Failed to parse h264 packet: %s (exiting).\n", err_str); 171 | exit(EXIT_FAILURE); 172 | } 173 | 174 | /* Cleanup */ 175 | /* ------------------------------------------------------ */ 176 | 177 | printf("Cleaning up.\n"); 178 | 179 | if (nullptr != parser) { 180 | printf("Destroying video parser.\n"); 181 | r = cuvidDestroyVideoParser(parser); 182 | if (CUDA_SUCCESS != r) { 183 | cuGetErrorString(r, &err_str); 184 | printf("Failed to the video parser context: %s. (exiting).\n", err_str); 185 | exit(EXIT_FAILURE); 186 | } 187 | } 188 | 189 | if (nullptr != decoder) { 190 | printf("Destroying decoder.\n"); 191 | r = cuvidDestroyDecoder(decoder); 192 | if (CUDA_SUCCESS != r) { 193 | cuGetErrorString(r, &err_str); 194 | printf("Failed to cleanly destroy the decoder context: %s. (exiting).\n", err_str); 195 | exit(EXIT_FAILURE); 196 | } 197 | } 198 | 199 | if (nullptr != context) { 200 | printf("Destroying context.\n"); 201 | r = cuCtxDestroy(context); 202 | if (CUDA_SUCCESS != r) { 203 | cuGetErrorString(r, &err_str); 204 | printf("Failed to cleanly destroy the cuda context: %s (exiting).\n", err_str); 205 | exit(EXIT_FAILURE); 206 | } 207 | printf("Context destroyed.\n"); 208 | } 209 | 210 | if (nullptr != yuv_buffer) { 211 | cuMemFreeHost(yuv_buffer); 212 | printf("Freeing yuv buffer.\n"); 213 | yuv_buffer = nullptr; 214 | yuv_nbytes_needed = 0; 215 | } 216 | 217 | printf("Playback with: "); 218 | printf("ffplay -f rawvideo -pix_fmt nv12 -s %dx%d -i out.nv12\n", coded_width, coded_height); 219 | 220 | printf("Resetting state.\n"); 221 | context = nullptr; 222 | decoder = nullptr; 223 | parser = nullptr; 224 | coded_width = 0; 225 | coded_height = 0; 226 | 227 | if (ofs.is_open()) { 228 | ofs.close(); 229 | } 230 | 231 | return 0; 232 | } 233 | 234 | /* ------------------------------------------------ */ 235 | 236 | static int parser_sequence_callback(void* user, CUVIDEOFORMAT* fmt) { 237 | 238 | if (nullptr == context) { 239 | printf("The CUcontext is nullptr, you should initialize it before kicking off the decoder.\n"); 240 | exit(EXIT_FAILURE); 241 | } 242 | 243 | coded_width = fmt->coded_width; 244 | coded_height = fmt->coded_height; 245 | 246 | const char* err_str = nullptr; 247 | 248 | printf("CUVIDEOFORMAT.Coded size: %d x %d\n", fmt->coded_width, fmt->coded_height); 249 | printf("CUVIDEOFORMAT.Display area: %d %d %d %d\n", fmt->display_area.left, fmt->display_area.top, fmt->display_area.right, fmt->display_area.bottom); 250 | printf("CUVIDEOFORMAT.Bitrate: %u\n", fmt->bitrate); 251 | 252 | CUVIDDECODECAPS decode_caps; 253 | memset((char*)&decode_caps, 0x00, sizeof(decode_caps)); 254 | decode_caps.eCodecType = fmt->codec; 255 | decode_caps.eChromaFormat = fmt->chroma_format; 256 | decode_caps.nBitDepthMinus8 = fmt->bit_depth_luma_minus8; 257 | 258 | CUresult r = cuvidGetDecoderCaps(&decode_caps); 259 | if (CUDA_SUCCESS != r) { 260 | cuGetErrorString(r, &err_str); 261 | printf("Failed to get decoder caps: %s (exiting).\n", err_str); 262 | exit(EXIT_FAILURE); 263 | } 264 | 265 | if (!decode_caps.bIsSupported) { 266 | printf("The video file format is not supported by NVDECODE. (exiting).\n"); 267 | exit(EXIT_FAILURE); 268 | } 269 | 270 | /* Create decoder context. */ 271 | CUVIDDECODECREATEINFO create_info = { 0 }; 272 | create_info.CodecType = fmt->codec; 273 | create_info.ChromaFormat = fmt->chroma_format; 274 | create_info.OutputFormat = (fmt->bit_depth_luma_minus8) ? cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12; 275 | create_info.bitDepthMinus8 = fmt->bit_depth_luma_minus8; 276 | create_info.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave; 277 | create_info.ulNumOutputSurfaces = 1; 278 | create_info.ulNumDecodeSurfaces = 20; 279 | create_info.ulCreationFlags = cudaVideoCreate_PreferCUVID; 280 | create_info.vidLock = nullptr; 281 | create_info.ulIntraDecodeOnly = 0; /* Set to 1 when the source only has intra frames; memory will be optimized. */ 282 | create_info.ulTargetWidth = fmt->coded_width; 283 | create_info.ulTargetHeight = fmt->coded_height; 284 | create_info.ulWidth = fmt->coded_width; 285 | create_info.ulHeight = fmt->coded_height; 286 | 287 | cuCtxPushCurrent(context); 288 | { 289 | r = cuvidCreateDecoder(&decoder, &create_info); 290 | if (CUDA_SUCCESS != r) { 291 | cuGetErrorString(r, &err_str); 292 | printf("Failed to create the decoder: %s. (exiting).\n", err_str); 293 | exit(EXIT_FAILURE); 294 | } 295 | } 296 | cuCtxPopCurrent(nullptr); 297 | 298 | printf("Created the decoder.\n"); 299 | 300 | return 1; 301 | } 302 | 303 | static int parser_decode_picture_callback(void* user, CUVIDPICPARAMS* pic) { 304 | 305 | CUresult r = CUDA_SUCCESS; 306 | 307 | if (nullptr == decoder) { 308 | printf("decoder is nullptr. (exiting)."); 309 | exit(EXIT_FAILURE); 310 | } 311 | 312 | r = cuvidDecodePicture(decoder, pic); 313 | if (CUDA_SUCCESS != r) { 314 | printf("Failed to decode the picture."); 315 | } 316 | 317 | return 1; 318 | } 319 | 320 | static int parser_display_picture_callback(void* user, CUVIDPARSERDISPINFO* info) { 321 | 322 | const char* err_str = nullptr; 323 | CUresult r = CUDA_SUCCESS; 324 | CUVIDPROCPARAMS vpp = { 0 }; 325 | unsigned int pitch = 0; 326 | int to_map = info->picture_index; 327 | CUdeviceptr device_ptr = 0; 328 | 329 | vpp.progressive_frame = info->progressive_frame; 330 | vpp.top_field_first = info->top_field_first; 331 | vpp.unpaired_field = (info->repeat_first_field < 0); 332 | vpp.second_field = 0; 333 | 334 | r = cuvidMapVideoFrame(decoder, to_map, &device_ptr, &pitch, &vpp); 335 | 336 | if (CUDA_SUCCESS != r) { 337 | cuGetErrorString(r, &err_str); 338 | printf("- mapping: %u failed: %s\n", to_map, err_str); 339 | return 0; 340 | } 341 | 342 | if (nullptr == yuv_buffer) { 343 | printf("Allocating yuv buffer.\n"); 344 | yuv_nbytes_needed = (pitch * coded_height) + (pitch * coded_height * 0.5); 345 | r = cuMemAllocHost((void**)&yuv_buffer, yuv_nbytes_needed); 346 | if (CUDA_SUCCESS != r) { 347 | printf("Failed to allocate the buffer for the decoded yuv frames. (exiting).\n"); 348 | exit(EXIT_FAILURE); 349 | } 350 | } 351 | 352 | if (nullptr == yuv_buffer 353 | || 0 == yuv_nbytes_needed) 354 | { 355 | printf("No valid yuf buffer. (exiting).\n"); 356 | exit(EXIT_FAILURE); 357 | } 358 | 359 | r = cuMemcpyDtoH(yuv_buffer, device_ptr, yuv_nbytes_needed); 360 | if (CUDA_SUCCESS != r) { 361 | printf("Failed to copy the decode frame into our (cpu) buffer. (exiting).\n"); 362 | exit(EXIT_FAILURE); 363 | } 364 | 365 | if (false == ofs.is_open()) { 366 | printf("The output file is not opened. (exiting).\n"); 367 | exit(EXIT_FAILURE); 368 | } 369 | 370 | for (int j = 0; j < coded_height; ++j) { 371 | ofs.write(yuv_buffer + j * pitch, coded_width); 372 | } 373 | 374 | int half_height = coded_height * 0.5; 375 | for (int j = 0; j < half_height; ++j) { 376 | ofs.write(yuv_buffer + (coded_height * pitch) + j * pitch, coded_width); 377 | } 378 | 379 | ofs.flush(); 380 | 381 | printf("+ mapping: %u succeeded, device_ptr: %d\n", to_map, device_ptr); 382 | 383 | r = cuvidUnmapVideoFrame(decoder, device_ptr); 384 | if (CUDA_SUCCESS != r) { 385 | cuGetErrorString(r, &err_str); 386 | printf("- failed to unmap the video frame: %s, %d\n", err_str, to_map); 387 | return 0; 388 | } 389 | 390 | return 1; 391 | } 392 | 393 | /* ------------------------------------------------ */ 394 | 395 | static void print_cuvid_decode_caps(CUVIDDECODECAPS* caps) { 396 | 397 | if (nullptr == caps) { 398 | printf("Cannot print the cuvid decode caps as the given pointer is a nullptr."); 399 | return; 400 | } 401 | 402 | printf("CUVIDDECODECAPS.nBitDepthMinus8: %u\n", caps->nBitDepthMinus8); 403 | printf("CUVIDDECODECAPS.bIsSupported: %u\n", caps->bIsSupported); 404 | printf("CUVIDDECODECAPS.nMaxWidth: %u\n", caps->nMaxWidth); 405 | printf("CUVIDDECODECAPS.nMaxHeight: %u\n", caps->nMaxHeight); 406 | printf("CUVIDDECODECAPS.nMaxMBCount: %u\n", caps->nMaxMBCount); 407 | printf("CUVIDDECODECAPS.nMinWidth: %u\n", caps->nMinWidth); 408 | printf("CUVIDDECODECAPS.nMinHeight: %u\n", caps->nMinHeight); 409 | } 410 | 411 | static void print_cuvid_parser_disp_info(CUVIDPARSERDISPINFO* info) { 412 | 413 | if (nullptr == info) { 414 | printf("Cannot print the cuvid parser disp info, nullptr given."); 415 | return; 416 | } 417 | 418 | printf("CUVIDPARSERDISPINFO.picture_index: %d\n", info->picture_index); 419 | printf("CUVIDPARSERDISPINFO.progressive_frame: %d\n", info->progressive_frame); 420 | printf("CUVIDPARSERDISPINFO.top_field_first: %d\n", info->top_field_first); 421 | printf("CUVIDPARSERDISPINFO.repeat_first_field: %d\n", info->repeat_first_field); 422 | printf("CUVIDPARSERDISPINFO.timestamp: %lld\n", info->timestamp); 423 | } 424 | 425 | static void print_cuvid_pic_params(CUVIDPICPARAMS* pic) { 426 | 427 | if (nullptr == pic) { 428 | printf("Cannot print the cuvid pic params, nullptr given."); 429 | return; 430 | } 431 | 432 | printf("CUVIDPICPARAMS.PicWithInMbs: %d\n", pic->PicWidthInMbs); 433 | printf("CUVIDPICPARAMS.FrameHeightInMbs: %d\n", pic->FrameHeightInMbs); 434 | printf("CUVIDPICPARAMS.CurrPicIdx: %d\n", pic->CurrPicIdx); 435 | printf("CUVIDPICPARAMS.field_pic_flag: %d\n", pic->field_pic_flag); 436 | printf("CUVIDPICPARAMS.bottom_field_flag: %d\n", pic->bottom_field_flag); 437 | printf("CUVIDPICPARAMS.second_field: %d\n", pic->second_field); 438 | printf("CUVIDPICPARAMS.nBitstreamDataLen: %u\n", pic->nBitstreamDataLen); 439 | printf("CUVIDPICPARAMS.nNumSlices: %u\n", pic->nNumSlices); 440 | printf("CUVIDPICPARAMS.ref_pic_flag: %d\n", pic->ref_pic_flag); 441 | printf("CUVIDPICPARAMS.intra_pic_flag: %d\n", pic->intra_pic_flag); 442 | } 443 | 444 | /* ------------------------------------------------ */ 445 | 446 | -------------------------------------------------------------------------------- /src/test-nvidia-decode-v3.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | NVIDIA DECODE EXPERIMENTS 3 | ========================= 4 | 5 | GENERAL INFO: 6 | 7 | This repository contains a collection of experiments using 8 | the NVDECODE SDK to decode H264 using hardware 9 | acceleration. These tests are meant to be minimal and should 10 | not be used in production environments. The code was written 11 | while diving into the APIs so things might be incorrect. 12 | 13 | This particular test does the same thing as v2, but it will 14 | use a queue that get's filled up first and once it's full 15 | we start mapping the frames. 16 | 17 | QUESTIONS: 18 | 19 | Q1: Should I use the CUVIDDECODECREATEINFO.vidLock .. and when? 20 | A1: ... 21 | 22 | Q2: What are the video parser callbacks supposed to return? 23 | A2: .... 24 | 25 | Q3: When calling a cuvidCreateVideoParser(), do I need to provide `pExtVideoInfo` ? 26 | A3: I tested this by setting the pExtVideoInfo member to nullptr in the cudaDecodeGL example 27 | and things were working fine w/o. 28 | 29 | REFERENCES: 30 | 31 | [0]: http://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf "Cuda C Programming Guide" 32 | [1]: https://github.com/gpac/gpac/blob/9bf9d23283553bf8214d13b286ce759ddd216be0/modules/nvdec/nvdec.c "GPAC implementation of NVDECODE" 33 | 34 | */ 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | 42 | #define QUEUE_SIZE 3 43 | 44 | /* ------------------------------------------------ */ 45 | 46 | static void print_cuvid_decode_caps(CUVIDDECODECAPS* caps); 47 | static void print_cuvid_parser_disp_info(CUVIDPARSERDISPINFO* info); 48 | static void print_cuvid_pic_params(CUVIDPICPARAMS* pic); 49 | static int parser_sequence_callback(void* user, CUVIDEOFORMAT* fmt); 50 | static int parser_decode_picture_callback(void* user, CUVIDPICPARAMS* pic); 51 | static int parser_display_picture_callback(void* user, CUVIDPARSERDISPINFO* info); 52 | static int map_picture(CUVIDPARSERDISPINFO* info); 53 | 54 | /* ------------------------------------------------ */ 55 | 56 | CUcontext context = { 0 }; 57 | CUvideodecoder decoder = nullptr; 58 | CUdevice device = { 0 }; 59 | CUVIDPARSERDISPINFO queue[QUEUE_SIZE]; 60 | int queue_write_dx = 0; 61 | std::ofstream ofs; 62 | 63 | char* yuv_buffer = nullptr; 64 | int yuv_nbytes_needed = 0; 65 | int coded_width = 0; 66 | int coded_height = 0; 67 | 68 | /* ------------------------------------------------ */ 69 | 70 | int main() { 71 | 72 | printf("\n\nnvidia decode test v3.\n\n"); 73 | 74 | CUresult r = CUDA_SUCCESS; 75 | const char* err_str = nullptr; 76 | 77 | ofs.open("out.nv12", std::ios::out | std::ios::binary); 78 | if (!ofs.is_open()) { 79 | printf("Failed to open output file. (exiting).\n"); 80 | exit(EXIT_FAILURE); 81 | } 82 | 83 | /* Initialize cuda, must be done before anything else. */ 84 | r = cuInit(0); 85 | if (CUDA_SUCCESS != r) { 86 | cuGetErrorString(r, &err_str); 87 | printf("Failed to initialize cuda: %s. (exiting).\n", err_str); 88 | exit(EXIT_FAILURE); 89 | } 90 | 91 | int device_count = 0; 92 | r = cuDeviceGetCount(&device_count); 93 | if (CUDA_SUCCESS != r) { 94 | cuGetErrorString(r, &err_str); 95 | printf("Failed to get the cuda device count: %s. (exiting).\n", err_str); 96 | exit(EXIT_FAILURE); 97 | } 98 | 99 | printf("We have %d cuda device(s).\n", device_count); 100 | 101 | r = cuDeviceGet(&device, 0); 102 | if (CUDA_SUCCESS != r) { 103 | cuGetErrorString(r, &err_str); 104 | printf("Failed to get a handle to the cuda device: %s. (exiting).\n", err_str); 105 | exit(EXIT_FAILURE); 106 | } 107 | 108 | char name[80] = { 0 }; 109 | r = cuDeviceGetName(name, sizeof(name), device); 110 | if (CUDA_SUCCESS != r) { 111 | cuGetErrorString(r, &err_str); 112 | printf("Failed to get the cuda device name: %s. (exiting).\n", err_str); 113 | exit(EXIT_FAILURE); 114 | } 115 | 116 | printf("Cuda device: %s.\n", name); 117 | 118 | r = cuCtxCreate(&context, 0, device); 119 | if (CUDA_SUCCESS != r) { 120 | cuGetErrorString(r, &err_str); 121 | printf("Failed to create a cuda context: %s. (exiting).\n", err_str); 122 | exit(EXIT_FAILURE); 123 | } 124 | 125 | /* Initialize are queue. We set the picture_index member to -1 which means that the slot is free. */ 126 | for (int i = 0; i < QUEUE_SIZE; ++i) { 127 | queue[i].picture_index = -1; 128 | } 129 | 130 | /* Create a video parser that gives us the CUVIDPICPARAMS structures. */ 131 | CUVIDPARSERPARAMS parser_params; 132 | memset((void*)&parser_params, 0x00, sizeof(parser_params)); 133 | parser_params.CodecType = cudaVideoCodec_H264; 134 | parser_params.ulMaxNumDecodeSurfaces = 4; 135 | parser_params.ulClockRate = 0; 136 | parser_params.ulErrorThreshold = 0; 137 | parser_params.ulMaxDisplayDelay = 1; 138 | parser_params.pUserData = nullptr; 139 | parser_params.pfnSequenceCallback = parser_sequence_callback; 140 | parser_params.pfnDecodePicture = parser_decode_picture_callback; 141 | parser_params.pfnDisplayPicture = parser_display_picture_callback; 142 | 143 | CUvideoparser parser = nullptr; 144 | r = cuvidCreateVideoParser(&parser, &parser_params); 145 | 146 | if (CUDA_SUCCESS != r) { 147 | cuGetErrorString(r, &err_str); 148 | printf("Failed to create a video parser: %s (exiting).\n", err_str); 149 | exit(EXIT_FAILURE); 150 | } 151 | 152 | /* Load our h264 nal parser. */ 153 | std::string filename = ""; 154 | filename = "./moonlight.264"; 155 | 156 | /* Instead of reading the file one nal at a time, we just read a huge chunk and feed that into the decoder. */ 157 | std::ifstream ifs(filename.c_str(), std::ios::in | std::ios::binary); 158 | if (!ifs.is_open()) { 159 | printf("Failed to open the file: %s. (exiting).\n", filename.c_str()); 160 | exit(EXIT_FAILURE); 161 | } 162 | 163 | ifs.seekg(0, std::ifstream::end); 164 | size_t ifs_size = ifs.tellg(); 165 | ifs.seekg(0, std::ifstream::beg); 166 | printf("Loaded %s which holds %zu bytes.\n", filename.c_str(), ifs_size); 167 | 168 | char* ifs_buf = (char*)malloc(ifs_size); 169 | ifs.read(ifs_buf, ifs_size); 170 | 171 | CUVIDSOURCEDATAPACKET pkt; 172 | pkt.flags = 0; 173 | pkt.payload_size = ifs_size; 174 | pkt.payload = (uint8_t*)ifs_buf; 175 | pkt.timestamp = 0; 176 | 177 | r = cuvidParseVideoData(parser, &pkt); 178 | if (CUDA_SUCCESS != r) { 179 | cuGetErrorString(r, &err_str); 180 | printf("Failed to parse h264 packet: %s (exiting).\n", err_str); 181 | exit(EXIT_FAILURE); 182 | } 183 | 184 | /* Cleanup */ 185 | /* ------------------------------------------------------ */ 186 | 187 | printf("Cleaning up.\n"); 188 | 189 | if (nullptr != parser) { 190 | printf("Destroying video parser.\n"); 191 | r = cuvidDestroyVideoParser(parser); 192 | if (CUDA_SUCCESS != r) { 193 | cuGetErrorString(r, &err_str); 194 | printf("Failed to the video parser context: %s. (exiting).\n", err_str); 195 | exit(EXIT_FAILURE); 196 | } 197 | } 198 | 199 | if (nullptr != decoder) { 200 | printf("Destroying decoder.\n"); 201 | r = cuvidDestroyDecoder(decoder); 202 | if (CUDA_SUCCESS != r) { 203 | cuGetErrorString(r, &err_str); 204 | printf("Failed to cleanly destroy the decoder context: %s. (exiting).\n", err_str); 205 | exit(EXIT_FAILURE); 206 | } 207 | } 208 | 209 | if (nullptr != context) { 210 | printf("Destroying context.\n"); 211 | r = cuCtxDestroy(context); 212 | if (CUDA_SUCCESS != r) { 213 | cuGetErrorString(r, &err_str); 214 | printf("Failed to cleanly destroy the cuda context: %s (exiting).\n", err_str); 215 | exit(EXIT_FAILURE); 216 | } 217 | printf("Context destroyed.\n"); 218 | } 219 | 220 | if (nullptr != yuv_buffer) { 221 | cuMemFreeHost(yuv_buffer); 222 | printf("Freeing yuv buffer.\n"); 223 | yuv_buffer = nullptr; 224 | yuv_nbytes_needed = 0; 225 | } 226 | 227 | printf("Playback with: "); 228 | printf("ffplay -f rawvideo -pix_fmt nv12 -s %dx%d -i out.nv12\n", coded_width, coded_height); 229 | 230 | printf("Resetting state.\n"); 231 | context = nullptr; 232 | decoder = nullptr; 233 | parser = nullptr; 234 | coded_width = 0; 235 | coded_height = 0; 236 | 237 | if (ofs.is_open()) { 238 | ofs.close(); 239 | } 240 | 241 | return 0; 242 | } 243 | 244 | /* ------------------------------------------------ */ 245 | 246 | static int parser_sequence_callback(void* user, CUVIDEOFORMAT* fmt) { 247 | 248 | if (nullptr == context) { 249 | printf("The CUcontext is nullptr, you should initialize it before kicking off the decoder.\n"); 250 | exit(EXIT_FAILURE); 251 | } 252 | 253 | coded_width = fmt->coded_width; 254 | coded_height = fmt->coded_height; 255 | 256 | const char* err_str = nullptr; 257 | 258 | printf("CUVIDEOFORMAT.Coded size: %d x %d\n", fmt->coded_width, fmt->coded_height); 259 | printf("CUVIDEOFORMAT.Display area: %d %d %d %d\n", fmt->display_area.left, fmt->display_area.top, fmt->display_area.right, fmt->display_area.bottom); 260 | printf("CUVIDEOFORMAT.Bitrate: %u\n", fmt->bitrate); 261 | 262 | CUVIDDECODECAPS decode_caps; 263 | memset((char*)&decode_caps, 0x00, sizeof(decode_caps)); 264 | decode_caps.eCodecType = fmt->codec; 265 | decode_caps.eChromaFormat = fmt->chroma_format; 266 | decode_caps.nBitDepthMinus8 = fmt->bit_depth_luma_minus8; 267 | 268 | CUresult r = cuvidGetDecoderCaps(&decode_caps); 269 | if (CUDA_SUCCESS != r) { 270 | cuGetErrorString(r, &err_str); 271 | printf("Failed to get decoder caps: %s (exiting).\n", err_str); 272 | exit(EXIT_FAILURE); 273 | } 274 | 275 | if (!decode_caps.bIsSupported) { 276 | printf("The video file format is not supported by NVDECODE. (exiting).\n"); 277 | exit(EXIT_FAILURE); 278 | } 279 | 280 | /* Create decoder context. */ 281 | CUVIDDECODECREATEINFO create_info = { 0 }; 282 | create_info.CodecType = fmt->codec; 283 | create_info.ChromaFormat = fmt->chroma_format; 284 | create_info.OutputFormat = (fmt->bit_depth_luma_minus8) ? cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12; 285 | create_info.bitDepthMinus8 = fmt->bit_depth_luma_minus8; 286 | create_info.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave; 287 | create_info.ulNumOutputSurfaces = 1; 288 | create_info.ulNumDecodeSurfaces = 20; 289 | create_info.ulCreationFlags = cudaVideoCreate_PreferCUVID; 290 | create_info.vidLock = nullptr; 291 | create_info.ulIntraDecodeOnly = 0; /* Set to 1 when the source only has intra frames; memory will be optimized. */ 292 | create_info.ulTargetWidth = fmt->coded_width; 293 | create_info.ulTargetHeight = fmt->coded_height; 294 | create_info.ulWidth = fmt->coded_width; 295 | create_info.ulHeight = fmt->coded_height; 296 | 297 | cuCtxPushCurrent(context); 298 | { 299 | r = cuvidCreateDecoder(&decoder, &create_info); 300 | if (CUDA_SUCCESS != r) { 301 | cuGetErrorString(r, &err_str); 302 | printf("Failed to create the decoder: %s. (exiting).\n", err_str); 303 | exit(EXIT_FAILURE); 304 | } 305 | } 306 | cuCtxPopCurrent(nullptr); 307 | 308 | printf("Created the decoder.\n"); 309 | 310 | return 1; 311 | } 312 | 313 | static int parser_decode_picture_callback(void* user, CUVIDPICPARAMS* pic) { 314 | 315 | CUresult r = CUDA_SUCCESS; 316 | 317 | if (nullptr == decoder) { 318 | printf("decoder is nullptr. (exiting)."); 319 | exit(EXIT_FAILURE); 320 | } 321 | 322 | r = cuvidDecodePicture(decoder, pic); 323 | if (CUDA_SUCCESS != r) { 324 | printf("Failed to decode the picture."); 325 | } 326 | 327 | return 1; 328 | } 329 | 330 | static int parser_display_picture_callback(void* user, CUVIDPARSERDISPINFO* info) { 331 | 332 | /* Perform a delayed write. */ 333 | if (-1 != queue[queue_write_dx].picture_index) { 334 | map_picture(&queue[queue_write_dx]); 335 | queue[queue_write_dx].picture_index = -1; 336 | } 337 | 338 | queue[queue_write_dx] = *info; 339 | queue_write_dx = (queue_write_dx + 1) % QUEUE_SIZE; 340 | 341 | return 1; 342 | } 343 | 344 | static int map_picture(CUVIDPARSERDISPINFO* info) { 345 | 346 | if (nullptr == info) { 347 | printf("Cannot map the picture; nullptr given. (exiting).\n"); 348 | exit(EXIT_FAILURE); 349 | } 350 | 351 | const char* err_str = nullptr; 352 | CUresult r = CUDA_SUCCESS; 353 | CUVIDPROCPARAMS vpp = { 0 }; 354 | unsigned int pitch = 0; 355 | int to_map = info->picture_index; 356 | CUdeviceptr device_ptr = 0; 357 | 358 | //memset((char*)&vpp, 0x00, sizeof(vpp)); 359 | vpp.progressive_frame = info->progressive_frame; 360 | vpp.top_field_first = info->top_field_first; 361 | //vpp.unpaired_field = (info->repeat_first_field < 0); 362 | //vpp.second_field = 0; 363 | 364 | r = cuvidMapVideoFrame(decoder, to_map, &device_ptr, &pitch, &vpp); 365 | 366 | //usleep(100 * 1e3); 367 | 368 | if (CUDA_SUCCESS != r) { 369 | cuGetErrorString(r, &err_str); 370 | printf("- mapping: %u failed: %s\n", to_map, err_str); 371 | return 0; 372 | } 373 | 374 | if (nullptr == yuv_buffer) { 375 | printf("Allocating yuv buffer.\n"); 376 | yuv_nbytes_needed = pitch * (coded_height + coded_height / 2); 377 | r = cuMemAllocHost((void**)&yuv_buffer, yuv_nbytes_needed); 378 | if (CUDA_SUCCESS != r) { 379 | printf("Failed to allocate the buffer for the decoded yuv frames. (exiting).\n"); 380 | exit(EXIT_FAILURE); 381 | } 382 | } 383 | 384 | if (nullptr == yuv_buffer 385 | || 0 == yuv_nbytes_needed) 386 | { 387 | printf("No valid yuf buffer. (exiting).\n"); 388 | exit(EXIT_FAILURE); 389 | } 390 | 391 | r = cuMemcpyDtoH(yuv_buffer, device_ptr, yuv_nbytes_needed); 392 | if (CUDA_SUCCESS != r) { 393 | printf("Failed to copy the decode frame into our (cpu) buffer. (exiting).\n"); 394 | exit(EXIT_FAILURE); 395 | } 396 | 397 | printf("Mapping Picture Index: %d (%u), YUV buffer size: %d\n", info->picture_index, device_ptr, yuv_nbytes_needed); 398 | 399 | r = cuvidUnmapVideoFrame(decoder, device_ptr); 400 | if (CUDA_SUCCESS != r) { 401 | cuGetErrorString(r, &err_str); 402 | printf("- failed to unmap the video frame: %s, %d (exiting)\n", err_str, to_map); 403 | exit(EXIT_FAILURE); 404 | } 405 | 406 | if (false == ofs.is_open()) { 407 | printf("The output file is not opened. (exiting).\n"); 408 | exit(EXIT_FAILURE); 409 | } 410 | 411 | for (int j = 0; j < coded_height; ++j) { 412 | ofs.write(yuv_buffer + j * pitch, coded_width); 413 | } 414 | 415 | int half_height = coded_height * 0.5; 416 | for (int j = 0; j < half_height; ++j) { 417 | ofs.write(yuv_buffer + (coded_height * pitch) + j * pitch, coded_width); 418 | } 419 | 420 | ofs.flush(); 421 | 422 | return 0; 423 | } 424 | 425 | /* ------------------------------------------------ */ 426 | 427 | static void print_cuvid_decode_caps(CUVIDDECODECAPS* caps) { 428 | 429 | if (nullptr == caps) { 430 | printf("Cannot print the cuvid decode caps as the given pointer is a nullptr."); 431 | return; 432 | } 433 | 434 | printf("CUVIDDECODECAPS.nBitDepthMinus8: %u\n", caps->nBitDepthMinus8); 435 | printf("CUVIDDECODECAPS.bIsSupported: %u\n", caps->bIsSupported); 436 | printf("CUVIDDECODECAPS.nMaxWidth: %u\n", caps->nMaxWidth); 437 | printf("CUVIDDECODECAPS.nMaxHeight: %u\n", caps->nMaxHeight); 438 | printf("CUVIDDECODECAPS.nMaxMBCount: %u\n", caps->nMaxMBCount); 439 | printf("CUVIDDECODECAPS.nMinWidth: %u\n", caps->nMinWidth); 440 | printf("CUVIDDECODECAPS.nMinHeight: %u\n", caps->nMinHeight); 441 | } 442 | 443 | static void print_cuvid_parser_disp_info(CUVIDPARSERDISPINFO* info) { 444 | 445 | if (nullptr == info) { 446 | printf("Cannot print the cuvid parser disp info, nullptr given."); 447 | return; 448 | } 449 | 450 | printf("CUVIDPARSERDISPINFO.picture_index: %d\n", info->picture_index); 451 | printf("CUVIDPARSERDISPINFO.progressive_frame: %d\n", info->progressive_frame); 452 | printf("CUVIDPARSERDISPINFO.top_field_first: %d\n", info->top_field_first); 453 | printf("CUVIDPARSERDISPINFO.repeat_first_field: %d\n", info->repeat_first_field); 454 | printf("CUVIDPARSERDISPINFO.timestamp: %lld\n", info->timestamp); 455 | } 456 | 457 | static void print_cuvid_pic_params(CUVIDPICPARAMS* pic) { 458 | 459 | if (nullptr == pic) { 460 | printf("Cannot print the cuvid pic params, nullptr given."); 461 | return; 462 | } 463 | 464 | printf("CUVIDPICPARAMS.PicWithInMbs: %d\n", pic->PicWidthInMbs); 465 | printf("CUVIDPICPARAMS.FrameHeightInMbs: %d\n", pic->FrameHeightInMbs); 466 | printf("CUVIDPICPARAMS.CurrPicIdx: %d\n", pic->CurrPicIdx); 467 | printf("CUVIDPICPARAMS.field_pic_flag: %d\n", pic->field_pic_flag); 468 | printf("CUVIDPICPARAMS.bottom_field_flag: %d\n", pic->bottom_field_flag); 469 | printf("CUVIDPICPARAMS.second_field: %d\n", pic->second_field); 470 | printf("CUVIDPICPARAMS.nBitstreamDataLen: %u\n", pic->nBitstreamDataLen); 471 | printf("CUVIDPICPARAMS.nNumSlices: %u\n", pic->nNumSlices); 472 | printf("CUVIDPICPARAMS.ref_pic_flag: %d\n", pic->ref_pic_flag); 473 | printf("CUVIDPICPARAMS.intra_pic_flag: %d\n", pic->intra_pic_flag); 474 | } 475 | 476 | /* ------------------------------------------------ */ 477 | 478 | --------------------------------------------------------------------------------