├── .gitignore ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── nvcodec-python.cpp ├── setup.py ├── src ├── cuvid │ ├── AppDecUtils.h │ ├── Logger.cpp │ ├── NvDecoder │ │ ├── NvDecoder.cpp │ │ └── NvDecoder.h │ ├── NvEncoder │ │ ├── NvEncoder.cpp │ │ ├── NvEncoder.h │ │ ├── NvEncoderCuda.cpp │ │ └── NvEncoderCuda.h │ ├── Utils │ │ ├── BitDepth.cu │ │ ├── ColorSpace.cu │ │ ├── ColorSpace.h │ │ ├── FFmpegDemuxer.h │ │ ├── FFmpegStreamer.h │ │ ├── Logger.h │ │ ├── NvCodecUtils.h │ │ ├── NvEncoderCLIOptions.h │ │ ├── Resize.cu │ │ └── crc.cu │ ├── cuviddec.h │ ├── nvEncodeAPI.h │ └── nvcuvid.h ├── decoder.cpp ├── decoder.h ├── encoder.cpp ├── encoder.h ├── source.cpp └── source.h └── tests ├── cpp ├── decode.cpp ├── encode.cpp └── read_source.cpp └── python ├── read_source_opencv.py └── read_source_sdl.py /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | dist 3 | build 4 | pynvcodec.egg-info 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020-2021 Jason Dsouza 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include src/** 2 | include src/**/** 3 | include src/**/**/** 4 | include tests/**/** 5 | include Makefile -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS=-Wall -O2 -lstdc++ -pthread -lm -fPIC 2 | ifdef DEBUG 3 | CFLAGS=-g -Wall -O0 -D DEBUG -lstdc++ -pthread -lm -fPIC 4 | endif 5 | 6 | ifndef CUDA_PATH 7 | CUDA_PATH=/usr/local/cuda 8 | endif 9 | 10 | ifndef PYTHON_VERSION 11 | PYTHON_VERSION=$(shell python3 -c "import sys; print('%d.%d' % (sys.version_info.major, sys.version_info.minor,))") 12 | endif 13 | 14 | ifndef PYTHON_INCLUDE_PATH 15 | PYTHON_INCLUDE_PATH=/usr/include/python${PYTHON_VERSION} 16 | endif 17 | 18 | ifndef PYTHON_BIN 19 | PYTHON_BIN=python${PYTHON_VERSION} 20 | endif 21 | 22 | ifndef PREFIX 23 | PREFIX=/usr/local 24 | ifdef VIRTUAL_ENV 25 | PREFIX=${VIRTUAL_ENV} 26 | endif 27 | endif 28 | 29 | lib: build/lib/libnvcodec.a 30 | test: build/tests/read_source build/tests/decode build/tests/encode 31 | 32 | python: lib 33 | ${PYTHON_BIN} setup.py build 34 | 35 | out: 36 | mkdir -p build/tests 37 | mkdir -p build/lib 38 | 39 | SRC_FILES=$(wildcard src/*.cpp) $(wildcard src/cuvid/*.cpp) $(wildcard src/cuvid/NvDecoder/*.cpp) $(wildcard src/cuvid/Utils/*.cpp) src/cuvid/NvEncoder/NvEncoder.cpp src/cuvid/NvEncoder/NvEncoderCuda.cpp 40 | OPENCV_LIB=-I/usr/local/include/opencv4 -L/usr/local/lib -lopencv_core -lopencv_highgui 41 | 42 | lib_cuda: build/lib/libcolor_space.a 43 | 44 | build/lib/libcolor_space.a: src/cuvid/Utils/ColorSpace.cu out 45 | nvcc -DCUDNN --compiler-options "-fPIC -lstdc++ -pthread -lm" -c src/cuvid/Utils/ColorSpace.cu -o build/lib/libcolor_space.a 46 | 47 | 48 | FLAGS=-L${CUDA_PATH}/lib64 -Lbuild/lib -lavformat -lavcodec -lavutil -lcudart -lnvcuvid -lnvidia-encode -lcuda -Isrc -I${CUDA_PATH}/include -Isrc/cuvid ${CFLAGS} 49 | 50 | # build/lib/libnvcodec.so: lib_cuda out 51 | # g++ -o build/lib/libnvcodec.so -shared ${SRC_FILES} -lcolor_space ${FLAGS} -fPIC 52 | 53 | 54 | build/tests/read_source: tests/cpp/read_source.cpp lib 55 | g++ -o build/tests/read_source tests/cpp/read_source.cpp -lnvcodec ${FLAGS} 56 | 57 | build/lib/libnvcodec.a: lib_cuda out 58 | # g++ -o build/lib/libnvcodec.so -shared ${SRC_FILES} -lcolor_space ${FLAGS} -fPIC 59 | mkdir -p build/object 60 | cd build/object; g++ -c ../../src/*.cpp ../../src/**/*.cpp ../../src/**/**/*.cpp -I../../src -I../../src/cuvid -I${CUDA_PATH}/include 61 | ar rcs build/lib/libnvcodec.a build/object/*.o 62 | 63 | 64 | build/tests/decode: tests/cpp/decode.cpp lib 65 | g++ -o build/tests/decode tests/cpp/decode.cpp -lnvcodec ${OPENCV_LIB} ${FLAGS} 66 | 67 | build/tests/encode: tests/cpp/encode.cpp lib 68 | g++ -o build/tests/encode tests/cpp/encode.cpp -lnvcodec ${OPENCV_LIB} ${FLAGS} 69 | 70 | clean: 71 | rm build -rf 72 | rm pynvcodec.egg-info -rf 73 | rm dist -rf 74 | 75 | python-interface: 76 | ${PYTHON_BIN} setup.py build 77 | 78 | release: clean python-interface 79 | ${PYTHON_BIN} setup.py sdist 80 | ${PYTHON_BIN} -m twine upload dist/* 81 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | NvCodec - Python 2 | --------------------------- 3 | 4 | ## Require 5 | * cuda >= 11.2 6 | * numpy >= 1.7 7 | * python >= 3.6 8 | * gcc >= 7.5 9 | * make >= 4.1 10 | 11 | ## Install 12 | ```shell 13 | pip install pynvcodec 14 | ``` 15 | 16 | ## Usage 17 | 18 | ### 0. Init PyNvCodec 19 | ```python 20 | from nvcodec import VideoSource, VideoDecoder, VideoEncoder 21 | ``` 22 | 23 | ### 1. Use VideoSource 24 | 25 | ```python 26 | source = VideoSource("rtmp://RTMP_URL") 27 | h264_data = source.read() 28 | ``` 29 | 30 | #### 2. Use VideoDecoder 31 | ```python 32 | decoder = VideoDecoder() 33 | # output OpenCV format frame 34 | frames = decoder.decode(h264_data) 35 | # output SDL format frame 36 | frames = decoder.decode(h264_data, 1) 37 | ``` 38 | 39 | #### 3. Use VideoEncoder 40 | ```python 41 | encoder = VideoEncoder(width, height) 42 | h264_data = encoder.encode(frame) 43 | ``` 44 | -------------------------------------------------------------------------------- /nvcodec-python.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 9 | #include 10 | 11 | #include "source.h" 12 | #include "decoder.h" 13 | #include "encoder.h" 14 | 15 | 16 | typedef struct 17 | { 18 | PyObject_HEAD 19 | long long m_handle; 20 | }NvCodec; 21 | 22 | static PyMemberDef NvCodec_DataMembers[] = 23 | { 24 | {(char*)"m_handle", T_LONGLONG, offsetof(NvCodec, m_handle), 0, (char*)"NvCodec handle ptr"}, 25 | {NULL, 0, 0, 0, NULL} 26 | }; 27 | 28 | /* ----------- VideoSource Part --------------- */ 29 | 30 | static PyObject* VideoSource_read(NvCodec* Self) 31 | { 32 | videoSourceHandle m_handle = (videoSourceHandle)Self->m_handle; 33 | AVPacket *packet = av_packet_alloc(); 34 | if(videoSource_read(m_handle, packet) < 0){ 35 | av_packet_free(&packet); 36 | return Py_None; 37 | } 38 | PyObject* rtn = PyBytes_FromStringAndSize((const char*)packet->data, packet->size); 39 | av_packet_free(&packet); 40 | return rtn; 41 | } 42 | 43 | static PyMethodDef VideoSource_MethodMembers[] = 44 | { 45 | {"read", (PyCFunction)VideoSource_read, METH_NOARGS, "read h264 from video source"}, 46 | {NULL, NULL, 0, NULL} 47 | }; 48 | 49 | static void VideoSource_Destruct(NvCodec* Self) 50 | { 51 | videoSourceHandle m_handle = (videoSourceHandle)(Self->m_handle); 52 | videoSource_destroy(m_handle); 53 | Py_TYPE(Self)->tp_free((PyObject*)Self); 54 | } 55 | 56 | 57 | static PyObject* VideoSource_Str(NvCodec* Self) 58 | { 59 | return Py_BuildValue("s", ""); 60 | } 61 | 62 | static PyObject* VideoSource_Repr(NvCodec* Self) 63 | { 64 | return VideoSource_Str(Self); 65 | } 66 | 67 | static void VideoSource_init(NvCodec* Self, PyObject* pArgs) 68 | { 69 | unsigned char* url; 70 | unsigned int listen = 0; 71 | if(!PyArg_ParseTuple(pArgs, "s|I", &url, &listen)){ 72 | PyErr_SetString(PyExc_ValueError, "Parse the argument FAILED! You should pass url string!"); 73 | return; 74 | } 75 | 76 | Self->m_handle = (long long)(videoSource_init((char*)url, listen)); 77 | } 78 | 79 | static PyTypeObject VideoSource_ClassInfo = 80 | { 81 | PyVarObject_HEAD_INIT(NULL, 0)"NvCodec.VideoSource", 82 | sizeof(NvCodec), 83 | 0, 84 | (destructor)VideoSource_Destruct, 85 | NULL,NULL,NULL,NULL, 86 | (reprfunc)VideoSource_Repr, 87 | NULL,NULL,NULL,NULL,NULL, 88 | (reprfunc)VideoSource_Str, 89 | NULL,NULL,NULL, 90 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, 91 | "NvCodec Python VideoSource Objects --- Extensioned by nvcodec", 92 | NULL,NULL,NULL,0,NULL,NULL, 93 | VideoSource_MethodMembers, 94 | NvCodec_DataMembers, 95 | NULL,NULL,NULL,NULL,NULL,0, 96 | (initproc)VideoSource_init, 97 | NULL, 98 | }; 99 | 100 | /* ----------- Decoder Part --------------- */ 101 | 102 | static PyObject* VideoDecoder_decode(NvCodec* Self, PyObject* pArgs) 103 | { 104 | videoDecoderHandle m_handle = (videoDecoderHandle)Self->m_handle; 105 | 106 | unsigned char* data; 107 | int len; 108 | unsigned int type = 0; 109 | if(!PyArg_ParseTuple(pArgs, "y#|I", &data, &len, &type)){ 110 | PyErr_SetString(PyExc_ValueError, "Parse the argument FAILED! You should video byte data!"); 111 | return Py_None; 112 | } 113 | 114 | PyObject* rtn = Py_BuildValue("[]"); 115 | char error_str[128]; 116 | videoFrameList* list = videoDecoder_decode(m_handle, data, len, error_str); 117 | if(list == NULL){ 118 | if(error_str[0] != NULL){ 119 | PyErr_Format(PyExc_ValueError, "%s", error_str); 120 | return NULL; 121 | } 122 | return rtn; 123 | } 124 | 125 | npy_intp dims[3] = {(npy_intp)(list->height), (npy_intp)(list->width), 4}; 126 | PyObject* tempFrame; 127 | for(int i = 0;ilength;i++){ 128 | tempFrame = PyArray_SimpleNewFromData(3, dims, NPY_UINT8, list->pFrames + (i*(list->perFrameSize))); 129 | PyArray_ENABLEFLAGS((PyArrayObject*) tempFrame, NPY_ARRAY_OWNDATA); 130 | if(type != 0){ 131 | tempFrame = PyArray_SwapAxes((PyArrayObject*)tempFrame, 0, 1); 132 | } 133 | PyList_Append(rtn, tempFrame); 134 | } 135 | videoFrameList_destory(&list); 136 | return rtn; 137 | } 138 | 139 | static PyMethodDef VideoDecoder_MethodMembers[] = 140 | { 141 | {"decode", (PyCFunction)VideoDecoder_decode, METH_VARARGS, "decode video frame"}, 142 | {NULL, NULL, 0, NULL} 143 | }; 144 | 145 | static void VideoDecoder_Destruct(NvCodec* Self) 146 | { 147 | videoDecoderHandle m_handle = (videoDecoderHandle)(Self->m_handle); 148 | videoDecoder_destroy(m_handle); 149 | Py_TYPE(Self)->tp_free((PyObject*)Self); 150 | } 151 | 152 | 153 | static PyObject* VideoDecoder_Str(NvCodec* Self) 154 | { 155 | return Py_BuildValue("s", ""); 156 | } 157 | 158 | static PyObject* VideoDecoder_Repr(NvCodec* Self) 159 | { 160 | return VideoDecoder_Str(Self); 161 | } 162 | 163 | static void VideoDecoder_init(NvCodec* Self, PyObject* pArgs) 164 | { 165 | unsigned int format = AV_CODEC_ID_H264; 166 | if(!PyArg_ParseTuple(pArgs, "|I", &format)){ 167 | PyErr_SetString(PyExc_ValueError, "Parse the argument FAILED! You should pass AV_CODEC_ID!"); 168 | return; 169 | } 170 | Self->m_handle = (long long)(videoDecoder_init((enum AVCodecID)format)); 171 | } 172 | 173 | static PyTypeObject VideoDecoder_ClassInfo = 174 | { 175 | PyVarObject_HEAD_INIT(NULL, 0)"NvCodec.VideoDecoder", 176 | sizeof(NvCodec), 177 | 0, 178 | (destructor)VideoDecoder_Destruct, 179 | NULL,NULL,NULL,NULL, 180 | (reprfunc)VideoDecoder_Repr, 181 | NULL,NULL,NULL,NULL,NULL, 182 | (reprfunc)VideoDecoder_Str, 183 | NULL,NULL,NULL, 184 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, 185 | "NvCodec Python VideoDecoder Objects --- Extensioned by nvcodec", 186 | NULL,NULL,NULL,0,NULL,NULL, 187 | VideoDecoder_MethodMembers, 188 | NvCodec_DataMembers, 189 | NULL,NULL,NULL,NULL,NULL,0, 190 | (initproc)VideoDecoder_init, 191 | NULL, 192 | }; 193 | 194 | /* ----------- Encoder Part --------------- */ 195 | 196 | static PyObject* VideoEncoder_encode(NvCodec* Self, PyObject* pArgs) 197 | { 198 | PyArrayObject *vecin; 199 | if (!PyArg_ParseTuple(pArgs, "O!", &PyArray_Type, &vecin)){ 200 | PyErr_SetString(PyExc_ValueError, "Parse the argument FAILED! You should pass ABGR image numpy array!"); 201 | return NULL; 202 | } 203 | 204 | if (NULL == vecin){ 205 | Py_INCREF(Py_None); 206 | return Py_None; 207 | } 208 | 209 | if (PyArray_NDIM(vecin) != 4){ 210 | PyErr_SetString(PyExc_ValueError, "Parse the argument FAILED! You should pass ABGR image numpy array by height*width*channel!"); 211 | return NULL; 212 | } 213 | 214 | videoEncoderHandle m_handle = (videoEncoderHandle)Self->m_handle; 215 | 216 | PyObject* bytes = PyObject_CallMethod((PyObject*)vecin, "tobytes", NULL); 217 | 218 | int length; 219 | unsigned char* data; 220 | PyArg_Parse(bytes, "y#", &data, &length); 221 | 222 | videoEncodedBuffer* buffer = videoEncoder_encode(m_handle, data); 223 | if(buffer == NULL){ 224 | Py_INCREF(Py_None); 225 | return Py_None; 226 | } 227 | 228 | PyObject* rtn = PyBytes_FromStringAndSize((const char*)buffer->data, buffer->size); 229 | videoEncodedBuffer_destory(&buffer); 230 | return rtn; 231 | } 232 | 233 | static PyMethodDef VideoEncoder_MethodMembers[] = 234 | { 235 | {"encode", (PyCFunction)VideoEncoder_encode, METH_VARARGS, "encode video frame"}, 236 | {NULL, NULL, 0, NULL} 237 | }; 238 | 239 | static void VideoEncoder_Destruct(NvCodec* Self) 240 | { 241 | videoEncoderHandle m_handle = (videoEncoderHandle)(Self->m_handle); 242 | videoEncoder_destroy(m_handle); 243 | Py_TYPE(Self)->tp_free((PyObject*)Self); 244 | } 245 | 246 | 247 | static PyObject* VideoEncoder_Str(NvCodec* Self) 248 | { 249 | return Py_BuildValue("s", ""); 250 | } 251 | 252 | static PyObject* VideoEncoder_Repr(NvCodec* Self) 253 | { 254 | return VideoEncoder_Str(Self); 255 | } 256 | 257 | static void VideoEncoder_init(NvCodec* Self, PyObject* pArgs) 258 | { 259 | unsigned int width,height; 260 | if(!PyArg_ParseTuple(pArgs, "II", &width, &height)){ 261 | PyErr_SetString(PyExc_ValueError, "Parse the argument FAILED! You should pass width and height!"); 262 | return; 263 | } 264 | Self->m_handle = (long long)(videoEncoder_init(width, height)); 265 | } 266 | 267 | static PyTypeObject VideoEncoder_ClassInfo = 268 | { 269 | PyVarObject_HEAD_INIT(NULL, 0)"NvCodec.VideoEncoder", 270 | sizeof(NvCodec), 271 | 0, 272 | (destructor)VideoEncoder_Destruct, 273 | NULL,NULL,NULL,NULL, 274 | (reprfunc)VideoEncoder_Repr, 275 | NULL,NULL,NULL,NULL,NULL, 276 | (reprfunc)VideoEncoder_Str, 277 | NULL,NULL,NULL, 278 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, 279 | "NvCodec Python VideoEncoder Objects --- Extensioned by nvcodec", 280 | NULL,NULL,NULL,0,NULL,NULL, 281 | VideoEncoder_MethodMembers, 282 | NvCodec_DataMembers, 283 | NULL,NULL,NULL,NULL,NULL,0, 284 | (initproc)VideoEncoder_init, 285 | NULL, 286 | }; 287 | 288 | 289 | 290 | 291 | void NvCodec_module_destroy(void *_){ 292 | // Pass 293 | } 294 | 295 | static PyModuleDef ModuleInfo = 296 | { 297 | PyModuleDef_HEAD_INIT, 298 | "NvCodec Module", 299 | "NvCodec by NvCodec", 300 | -1, 301 | NULL, NULL, NULL, NULL, 302 | NvCodec_module_destroy 303 | }; 304 | 305 | PyMODINIT_FUNC 306 | PyInit_nvcodec(void) { 307 | PyObject * pReturn = NULL; 308 | 309 | VideoSource_ClassInfo.tp_new = PyType_GenericNew; 310 | if(PyType_Ready(&VideoSource_ClassInfo) < 0) 311 | return NULL; 312 | 313 | VideoDecoder_ClassInfo.tp_new = PyType_GenericNew; 314 | if(PyType_Ready(&VideoDecoder_ClassInfo) < 0) 315 | return NULL; 316 | 317 | VideoEncoder_ClassInfo.tp_new = PyType_GenericNew; 318 | if(PyType_Ready(&VideoEncoder_ClassInfo) < 0) 319 | return NULL; 320 | 321 | pReturn = PyModule_Create(&ModuleInfo); 322 | if(pReturn == NULL) 323 | return NULL; 324 | 325 | Py_INCREF(&ModuleInfo); 326 | PyModule_AddObject(pReturn, "VideoSource", (PyObject*)&VideoSource_ClassInfo); 327 | PyModule_AddObject(pReturn, "VideoDecoder", (PyObject*)&VideoDecoder_ClassInfo); 328 | PyModule_AddObject(pReturn, "VideoEncoder", (PyObject*)&VideoEncoder_ClassInfo); 329 | import_array(); 330 | return pReturn; 331 | } 332 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # #!/usr/bin/env python 2 | import sys 3 | import os 4 | import glob 5 | from setuptools import setup, find_packages, Extension 6 | from distutils.command.build_ext import build_ext 7 | import numpy as np 8 | 9 | libpath = os.path.abspath(os.path.join(os.path.dirname(__file__), './build/lib')) 10 | 11 | class custom_build_ext(build_ext): 12 | def build_extensions(self): 13 | os.system('make lib_cuda') 14 | build_ext.build_extensions(self) 15 | 16 | 17 | with open("README.md", "r", encoding="utf-8") as fh: 18 | long_description = fh.read() 19 | 20 | nvcodec_dir = '/usr/local/lib' 21 | if 'VIRTUAL_ENV' in os.environ: 22 | nvcodec_dir = os.path.join(os.environ['VIRTUAL_ENV'], 'lib') 23 | 24 | sources = ['nvcodec-python.cpp'] + glob.glob('src/**/*.cpp', recursive=True) 25 | 26 | module = Extension('nvcodec', sources=sources, language='c++', 27 | include_dirs=['src', 'src/cuvid', '/usr/local/cuda/include',np.get_include(),], 28 | library_dirs=['build/lib', '/usr/local/cuda-11.2/targets/x86_64-linux/lib'], 29 | libraries=['avformat', 'avcodec','avutil','nvcuvid','nvidia-encode','cuda', 'stdc++', 'm', 'cudart', 'color_space'], 30 | ) 31 | 32 | from distutils.core import setup, Extension 33 | setup(name='pynvcodec', 34 | version='0.0.6', 35 | ext_modules=[module], 36 | cmdclass={'build_ext': custom_build_ext}, 37 | author="Usingnet", 38 | author_email="developer@usingnet.com", 39 | license="MIT", 40 | description="Python interface for nvcodec. Encode/Decode H264 with Nvidia GPU Hardware Acceleration.", 41 | long_description=long_description, 42 | long_description_content_type="text/markdown", 43 | url="https://github.com/UsingNet/nvcodec-python", 44 | # packages=setuptools.find_packages(), 45 | classifiers=[ 46 | "Development Status :: 4 - Beta", 47 | "Programming Language :: Python :: 3 :: Only", 48 | "License :: OSI Approved :: MIT License", 49 | "Operating System :: POSIX :: Linux", 50 | "Environment :: GPU :: NVIDIA CUDA :: 11.0", 51 | ], 52 | keywords=[ 53 | "pynvcodec", 54 | "nvcodec", 55 | "h264", 56 | "encode", 57 | "decode", 58 | "h264 encode", 59 | "h264 decode", 60 | "gpu", 61 | "nvidia" 62 | ], 63 | python_requires=">=3.6", 64 | project_urls={ 65 | 'Source': 'https://github.com/UsingNet/nvcodec-python', 66 | 'Tracker': 'https://github.com/UsingNet/nvcodec-python/issues', 67 | }, 68 | install_requires=['numpy>=1.17'] 69 | ) -------------------------------------------------------------------------------- /src/cuvid/AppDecUtils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017-2020 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | //--------------------------------------------------------------------------- 13 | //! \file AppDecUtils.h 14 | //! \brief Header file containing definitions of miscellaneous functions used by Decode samples 15 | //--------------------------------------------------------------------------- 16 | 17 | #pragma once 18 | #include 19 | #include 20 | 21 | static void ShowHelpAndExit(const char *szBadOption, char *szOutputFileName, bool *pbVerbose, int *piD3d) 22 | { 23 | std::ostringstream oss; 24 | bool bThrowError = false; 25 | if (szBadOption) { 26 | bThrowError = false; 27 | oss << "Error parsing \"" << szBadOption << "\"" << std::endl; 28 | } 29 | std::cout << "Options:" << std::endl 30 | << "-i Input file path" << std::endl 31 | << (szOutputFileName ? "-o Output file path\n" : "") 32 | << "-gpu Ordinal of GPU to use" << std::endl 33 | << (pbVerbose ? "-v Verbose message\n" : "") 34 | << (piD3d ? "-d3d 9 (default): display with D3D9; 11: display with D3D11\n" : "") 35 | ; 36 | if (bThrowError) { 37 | throw std::invalid_argument(oss.str()); 38 | } 39 | else { 40 | std::cout << oss.str(); 41 | exit(0); 42 | } 43 | } 44 | 45 | static void ParseCommandLine(int argc, char *argv[], char *szInputFileName, 46 | char *szOutputFileName, int &iGpu, bool *pbVerbose = NULL, int *piD3d = NULL) 47 | { 48 | std::ostringstream oss; 49 | int i; 50 | for (i = 1; i < argc; i++) { 51 | if (!_stricmp(argv[i], "-h")) { 52 | ShowHelpAndExit(NULL, szOutputFileName, pbVerbose, piD3d); 53 | } 54 | if (!_stricmp(argv[i], "-i")) { 55 | if (++i == argc) { 56 | ShowHelpAndExit("-i", szOutputFileName, pbVerbose, piD3d); 57 | } 58 | sprintf(szInputFileName, "%s", argv[i]); 59 | continue; 60 | } 61 | if (!_stricmp(argv[i], "-o")) { 62 | if (++i == argc || !szOutputFileName) { 63 | ShowHelpAndExit("-o", szOutputFileName, pbVerbose, piD3d); 64 | } 65 | sprintf(szOutputFileName, "%s", argv[i]); 66 | continue; 67 | } 68 | if (!_stricmp(argv[i], "-gpu")) { 69 | if (++i == argc) { 70 | ShowHelpAndExit("-gpu", szOutputFileName, pbVerbose, piD3d); 71 | } 72 | iGpu = atoi(argv[i]); 73 | continue; 74 | } 75 | if (!_stricmp(argv[i], "-v")) { 76 | if (!pbVerbose) { 77 | ShowHelpAndExit("-v", szOutputFileName, pbVerbose, piD3d); 78 | } 79 | *pbVerbose = true; 80 | continue; 81 | } 82 | if (!_stricmp(argv[i], "-d3d")) { 83 | if (++i == argc || !piD3d) { 84 | ShowHelpAndExit("-d3d", szOutputFileName, pbVerbose, piD3d); 85 | } 86 | *piD3d = atoi(argv[i]); 87 | continue; 88 | } 89 | ShowHelpAndExit(argv[i], szOutputFileName, pbVerbose, piD3d); 90 | } 91 | } 92 | 93 | /** 94 | * @brief Function to generate space-separated list of supported video surface formats 95 | * @param nOutputFormatMask - Bit mask to represent supported cudaVideoSurfaceFormat in decoder 96 | * @param OutputFormats - Variable into which output string is written 97 | */ 98 | static void getOutputFormatNames(unsigned short nOutputFormatMask, char *OutputFormats) 99 | { 100 | if (nOutputFormatMask == 0) { 101 | strcpy(OutputFormats, "N/A"); 102 | return; 103 | } 104 | 105 | if (nOutputFormatMask & (1U << cudaVideoSurfaceFormat_NV12)) { 106 | strcat(OutputFormats, "NV12 "); 107 | } 108 | 109 | if (nOutputFormatMask & (1U << cudaVideoSurfaceFormat_P016)) { 110 | strcat(OutputFormats, "P016 "); 111 | } 112 | 113 | if (nOutputFormatMask & (1U << cudaVideoSurfaceFormat_YUV444)) { 114 | strcat(OutputFormats, "YUV444 "); 115 | } 116 | 117 | if (nOutputFormatMask & (1U << cudaVideoSurfaceFormat_YUV444_16Bit)) { 118 | strcat(OutputFormats, "YUV444P16 "); 119 | } 120 | return; 121 | } 122 | 123 | /** 124 | * @brief Utility function to create CUDA context 125 | * @param cuContext - Pointer to CUcontext. Updated by this function. 126 | * @param iGpu - Device number to get handle for 127 | */ 128 | static void createCudaContext(CUcontext* cuContext, int iGpu, unsigned int flags) 129 | { 130 | CUdevice cuDevice = 0; 131 | ck(cuDeviceGet(&cuDevice, iGpu)); 132 | char szDeviceName[80]; 133 | ck(cuDeviceGetName(szDeviceName, sizeof(szDeviceName), cuDevice)); 134 | // std::cout << "GPU in use: " << szDeviceName << std::endl; 135 | ck(cuCtxCreate(cuContext, flags, cuDevice)); 136 | } 137 | 138 | /** 139 | * @brief Print decoder capabilities on std::cout 140 | */ 141 | static void ShowDecoderCapability() 142 | { 143 | ck(cuInit(0)); 144 | int nGpu = 0; 145 | ck(cuDeviceGetCount(&nGpu)); 146 | std::cout << "Decoder Capability" << std::endl << std::endl; 147 | const char *aszCodecName[] = {"JPEG", "MPEG1", "MPEG2", "MPEG4", "H264", "HEVC", "HEVC", "HEVC", "HEVC", "HEVC", "HEVC", "VC1", "VP8", "VP9", "VP9", "VP9", "AV1", "AV1", "AV1", "AV1"}; 148 | const char *aszChromaFormat[] = { "4:0:0", "4:2:0", "4:2:2", "4:4:4" }; 149 | char strOutputFormats[64]; 150 | cudaVideoCodec aeCodec[] = { cudaVideoCodec_JPEG, cudaVideoCodec_MPEG1, cudaVideoCodec_MPEG2, cudaVideoCodec_MPEG4, cudaVideoCodec_H264, cudaVideoCodec_HEVC, 151 | cudaVideoCodec_HEVC, cudaVideoCodec_HEVC, cudaVideoCodec_HEVC, cudaVideoCodec_HEVC, cudaVideoCodec_HEVC, cudaVideoCodec_VC1, cudaVideoCodec_VP8, 152 | cudaVideoCodec_VP9, cudaVideoCodec_VP9, cudaVideoCodec_VP9, cudaVideoCodec_AV1, cudaVideoCodec_AV1, cudaVideoCodec_AV1, cudaVideoCodec_AV1 }; 153 | int anBitDepthMinus8[] = {0, 0, 0, 0, 0, 0, 2, 4, 0, 2, 4, 0, 0, 0, 2, 4, 0, 2, 0, 2}; 154 | 155 | cudaVideoChromaFormat aeChromaFormat[] = { cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, 156 | cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_444, cudaVideoChromaFormat_444, 157 | cudaVideoChromaFormat_444, cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, 158 | cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_Monochrome, cudaVideoChromaFormat_Monochrome }; 159 | 160 | for (int iGpu = 0; iGpu < nGpu; iGpu++) { 161 | 162 | CUcontext cuContext = NULL; 163 | createCudaContext(&cuContext, iGpu, 0); 164 | 165 | for (int i = 0; i < sizeof(aeCodec) / sizeof(aeCodec[0]); i++) { 166 | 167 | CUVIDDECODECAPS decodeCaps = {}; 168 | decodeCaps.eCodecType = aeCodec[i]; 169 | decodeCaps.eChromaFormat = aeChromaFormat[i]; 170 | decodeCaps.nBitDepthMinus8 = anBitDepthMinus8[i]; 171 | 172 | cuvidGetDecoderCaps(&decodeCaps); 173 | 174 | strOutputFormats[0] = '\0'; 175 | getOutputFormatNames(decodeCaps.nOutputFormatMask, strOutputFormats); 176 | 177 | // setw() width = maximum_width_of_string + 2 spaces 178 | std::cout << "Codec " << std::left << std::setw(7) << aszCodecName[i] << 179 | "BitDepth " << std::setw(4) << decodeCaps.nBitDepthMinus8 + 8 << 180 | "ChromaFormat " << std::setw(7) << aszChromaFormat[decodeCaps.eChromaFormat] << 181 | "Supported " << std::setw(3) << (int)decodeCaps.bIsSupported << 182 | "MaxWidth " << std::setw(7) << decodeCaps.nMaxWidth << 183 | "MaxHeight " << std::setw(7) << decodeCaps.nMaxHeight << 184 | "MaxMBCount " << std::setw(10) << decodeCaps.nMaxMBCount << 185 | "MinWidth " << std::setw(5) << decodeCaps.nMinWidth << 186 | "MinHeight " << std::setw(5) << decodeCaps.nMinHeight << 187 | "SurfaceFormat " << std::setw(11) << strOutputFormats << std::endl; 188 | } 189 | 190 | std::cout << std::endl; 191 | 192 | ck(cuCtxDestroy(cuContext)); 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /src/cuvid/Logger.cpp: -------------------------------------------------------------------------------- 1 | #include "cuvid/Utils/Logger.h" 2 | 3 | extern simplelogger::Logger *logger = simplelogger::LoggerFactory::CreateConsoleLogger(); 4 | -------------------------------------------------------------------------------- /src/cuvid/NvDecoder/NvDecoder.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017-2020 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #pragma once 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include "../nvcuvid.h" 23 | #include "../Utils/NvCodecUtils.h" 24 | 25 | /** 26 | * @brief Exception class for error reporting from the decode API. 27 | */ 28 | class NVDECException : public std::exception 29 | { 30 | public: 31 | NVDECException(const std::string& errorStr, const CUresult errorCode) 32 | : m_errorString(errorStr), m_errorCode(errorCode) {} 33 | 34 | virtual ~NVDECException() throw() {} 35 | virtual const char* what() const throw() { return m_errorString.c_str(); } 36 | CUresult getErrorCode() const { return m_errorCode; } 37 | const std::string& getErrorString() const { return m_errorString; } 38 | static NVDECException makeNVDECException(const std::string& errorStr, const CUresult errorCode, 39 | const std::string& functionName, const std::string& fileName, int lineNo); 40 | private: 41 | std::string m_errorString; 42 | CUresult m_errorCode; 43 | }; 44 | 45 | inline NVDECException NVDECException::makeNVDECException(const std::string& errorStr, const CUresult errorCode, const std::string& functionName, 46 | const std::string& fileName, int lineNo) 47 | { 48 | std::ostringstream errorLog; 49 | errorLog << functionName << " : " << errorStr << " at " << fileName << ":" << lineNo << std::endl; 50 | NVDECException exception(errorLog.str(), errorCode); 51 | return exception; 52 | } 53 | 54 | #define NVDEC_THROW_ERROR( errorStr, errorCode ) \ 55 | do \ 56 | { \ 57 | throw NVDECException::makeNVDECException(errorStr, errorCode, __FUNCTION__, __FILE__, __LINE__); \ 58 | } while (0) 59 | 60 | 61 | #define NVDEC_API_CALL( cuvidAPI ) \ 62 | do \ 63 | { \ 64 | CUresult errorCode = cuvidAPI; \ 65 | if( errorCode != CUDA_SUCCESS) \ 66 | { \ 67 | std::ostringstream errorLog; \ 68 | errorLog << #cuvidAPI << " returned error " << errorCode; \ 69 | throw NVDECException::makeNVDECException(errorLog.str(), errorCode, __FUNCTION__, __FILE__, __LINE__); \ 70 | } \ 71 | } while (0) 72 | 73 | struct Rect { 74 | int l, t, r, b; 75 | }; 76 | 77 | struct Dim { 78 | int w, h; 79 | }; 80 | 81 | /** 82 | * @brief Base class for decoder interface. 83 | */ 84 | class NvDecoder { 85 | public: 86 | int *decodeResult = nullptr; 87 | public: 88 | /** 89 | * @brief This function is used to initialize the decoder session. 90 | * Application must call this function to initialize the decoder, before 91 | * starting to decode any frames. 92 | */ 93 | NvDecoder(CUcontext cuContext, bool bUseDeviceFrame, cudaVideoCodec eCodec, bool bLowLatency = false, 94 | bool bDeviceFramePitched = false, const Rect *pCropRect = NULL, const Dim *pResizeDim = NULL, 95 | int maxWidth = 0, int maxHeight = 0, unsigned int clkRate = 1000); 96 | ~NvDecoder(); 97 | 98 | /** 99 | * @brief This function is used to get the current CUDA context. 100 | */ 101 | CUcontext GetContext() { return m_cuContext; } 102 | 103 | /** 104 | * @brief This function is used to get the output frame width. 105 | * NV12/P016 output format width is 2 byte aligned because of U and V interleave 106 | */ 107 | int GetWidth() { assert(m_nWidth); return (m_eOutputFormat == cudaVideoSurfaceFormat_NV12 || m_eOutputFormat == cudaVideoSurfaceFormat_P016) 108 | ? (m_nWidth + 1) & ~1 : m_nWidth; } 109 | 110 | /** 111 | * @brief This function is used to get the actual decode width 112 | */ 113 | int GetDecodeWidth() { assert(m_nWidth); return m_nWidth; } 114 | 115 | /** 116 | * @brief This function is used to get the output frame height (Luma height). 117 | */ 118 | int GetHeight() { assert(m_nLumaHeight); return m_nLumaHeight; } 119 | 120 | /** 121 | * @brief This function is used to get the current chroma height. 122 | */ 123 | int GetChromaHeight() { assert(m_nChromaHeight); return m_nChromaHeight; } 124 | 125 | /** 126 | * @brief This function is used to get the number of chroma planes. 127 | */ 128 | int GetNumChromaPlanes() { assert(m_nNumChromaPlanes); return m_nNumChromaPlanes; } 129 | 130 | /** 131 | * @brief This function is used to get the current frame size based on pixel format. 132 | */ 133 | int GetFrameSize() { assert(m_nWidth); return GetWidth() * (m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes)) * m_nBPP; } 134 | 135 | /** 136 | * @brief This function is used to get the current frame Luma plane size. 137 | */ 138 | int GetLumaPlaneSize() { assert(m_nWidth); return GetWidth() * m_nLumaHeight * m_nBPP; } 139 | 140 | /** 141 | * @brief This function is used to get the current frame chroma plane size. 142 | */ 143 | int GetChromaPlaneSize() { assert(m_nWidth); return GetWidth() * (m_nChromaHeight * m_nNumChromaPlanes) * m_nBPP; } 144 | 145 | /** 146 | * @brief This function is used to get the pitch of the device buffer holding the decoded frame. 147 | */ 148 | int GetDeviceFramePitch() { assert(m_nWidth); return m_nDeviceFramePitch ? (int)m_nDeviceFramePitch : GetWidth() * m_nBPP; } 149 | 150 | /** 151 | * @brief This function is used to get the bit depth associated with the pixel format. 152 | */ 153 | int GetBitDepth() { assert(m_nWidth); return m_nBitDepthMinus8 + 8; } 154 | 155 | /** 156 | * @brief This function is used to get the bytes used per pixel. 157 | */ 158 | int GetBPP() { assert(m_nWidth); return m_nBPP; } 159 | 160 | /** 161 | * @brief This function is used to get the YUV chroma format 162 | */ 163 | cudaVideoSurfaceFormat GetOutputFormat() { return m_eOutputFormat; } 164 | 165 | /** 166 | * @brief This function is used to get information about the video stream (codec, display parameters etc) 167 | */ 168 | CUVIDEOFORMAT GetVideoFormatInfo() { assert(m_nWidth); return m_videoFormat; } 169 | 170 | /** 171 | * @brief This function is used to get codec string from codec id 172 | */ 173 | const char *GetCodecString(cudaVideoCodec eCodec); 174 | 175 | /** 176 | * @brief This function is used to print information about the video stream 177 | */ 178 | std::string GetVideoInfo() const { return m_videoInfo.str(); } 179 | 180 | /** 181 | * @brief This function decodes a frame and returns the number of frames that are available for 182 | * display. All frames that are available for display should be read before making a subsequent decode call. 183 | * @param pData - pointer to the data buffer that is to be decoded 184 | * @param nSize - size of the data buffer in bytes 185 | * @param nFlags - CUvideopacketflags for setting decode options 186 | * @param nTimestamp - presentation timestamp 187 | */ 188 | int Decode(const uint8_t *pData, int nSize, int nFlags = 0, int64_t nTimestamp = 0); 189 | 190 | /** 191 | * @brief This function returns a decoded frame and timestamp. This function should be called in a loop for 192 | * fetching all the frames that are available for display. 193 | */ 194 | uint8_t* GetFrame(int64_t* pTimestamp = nullptr); 195 | 196 | 197 | /** 198 | * @brief This function decodes a frame and returns the locked frame buffers 199 | * This makes the buffers available for use by the application without the buffers 200 | * getting overwritten, even if subsequent decode calls are made. The frame buffers 201 | * remain locked, until UnlockFrame() is called 202 | */ 203 | uint8_t* GetLockedFrame(int64_t* pTimestamp = nullptr); 204 | 205 | /** 206 | * @brief This function unlocks the frame buffer and makes the frame buffers available for write again 207 | * @param ppFrame - pointer to array of frames that are to be unlocked 208 | * @param nFrame - number of frames to be unlocked 209 | */ 210 | void UnlockFrame(uint8_t **pFrame); 211 | 212 | /** 213 | * @brief This function allows app to set decoder reconfig params 214 | * @param pCropRect - cropping rectangle coordinates 215 | * @param pResizeDim - width and height of resized output 216 | */ 217 | int setReconfigParams(const Rect * pCropRect, const Dim * pResizeDim); 218 | 219 | /** 220 | * @brief This function allows app to set operating point for AV1 SVC clips 221 | * @param opPoint - operating point of an AV1 scalable bitstream 222 | * @param bDispAllLayers - Output all decoded frames of an AV1 scalable bitstream 223 | */ 224 | void SetOperatingPoint(const uint32_t opPoint, const bool bDispAllLayers) { m_nOperatingPoint = opPoint; m_bDispAllLayers = bDispAllLayers; } 225 | 226 | // start a timer 227 | void startTimer() { m_stDecode_time.Start(); } 228 | 229 | // stop the timer 230 | double stopTimer() { return m_stDecode_time.Stop(); } 231 | private: 232 | /** 233 | * @brief Callback function to be registered for getting a callback when decoding of sequence starts 234 | */ 235 | static int CUDAAPI HandleVideoSequenceProc(void *pUserData, CUVIDEOFORMAT *pVideoFormat) { return ((NvDecoder *)pUserData)->HandleVideoSequence(pVideoFormat); } 236 | 237 | /** 238 | * @brief Callback function to be registered for getting a callback when a decoded frame is ready to be decoded 239 | */ 240 | static int CUDAAPI HandlePictureDecodeProc(void *pUserData, CUVIDPICPARAMS *pPicParams) { return ((NvDecoder *)pUserData)->HandlePictureDecode(pPicParams); } 241 | 242 | /** 243 | * @brief Callback function to be registered for getting a callback when a decoded frame is available for display 244 | */ 245 | static int CUDAAPI HandlePictureDisplayProc(void *pUserData, CUVIDPARSERDISPINFO *pDispInfo) { return ((NvDecoder *)pUserData)->HandlePictureDisplay(pDispInfo); } 246 | 247 | /** 248 | * @brief Callback function to be registered for getting a callback to get operating point when AV1 SVC sequence header start. 249 | */ 250 | static int CUDAAPI HandleOperatingPointProc(void *pUserData, CUVIDOPERATINGPOINTINFO *pOPInfo) { return ((NvDecoder *)pUserData)->GetOperatingPoint(pOPInfo); } 251 | 252 | /** 253 | * @brief This function gets called when a sequence is ready to be decoded. The function also gets called 254 | when there is format change 255 | */ 256 | int HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat); 257 | 258 | /** 259 | * @brief This function gets called when a picture is ready to be decoded. cuvidDecodePicture is called from this function 260 | * to decode the picture 261 | */ 262 | int HandlePictureDecode(CUVIDPICPARAMS *pPicParams); 263 | 264 | /** 265 | * @brief This function gets called after a picture is decoded and available for display. Frames are fetched and stored in 266 | internal buffer 267 | */ 268 | int HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo); 269 | 270 | /** 271 | * @brief This function gets called when AV1 sequence encounter more than one operating points 272 | */ 273 | int GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo); 274 | /** 275 | * @brief This function reconfigure decoder if there is a change in sequence params. 276 | */ 277 | int ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat); 278 | 279 | private: 280 | CUcontext m_cuContext = NULL; 281 | CUvideoctxlock m_ctxLock; 282 | CUvideoparser m_hParser = NULL; 283 | CUvideodecoder m_hDecoder = NULL; 284 | bool m_bUseDeviceFrame; 285 | // dimension of the output 286 | unsigned int m_nWidth = 0, m_nLumaHeight = 0, m_nChromaHeight = 0; 287 | unsigned int m_nNumChromaPlanes = 0; 288 | // height of the mapped surface 289 | int m_nSurfaceHeight = 0; 290 | int m_nSurfaceWidth = 0; 291 | cudaVideoCodec m_eCodec = cudaVideoCodec_NumCodecs; 292 | cudaVideoChromaFormat m_eChromaFormat = cudaVideoChromaFormat_420; 293 | cudaVideoSurfaceFormat m_eOutputFormat = cudaVideoSurfaceFormat_NV12; 294 | int m_nBitDepthMinus8 = 0; 295 | int m_nBPP = 1; 296 | CUVIDEOFORMAT m_videoFormat = {}; 297 | Rect m_displayRect = {}; 298 | // stock of frames 299 | std::vector m_vpFrame; 300 | // timestamps of decoded frames 301 | std::vector m_vTimestamp; 302 | int m_nDecodedFrame = 0, m_nDecodedFrameReturned = 0; 303 | int m_nDecodePicCnt = 0, m_nPicNumInDecodeOrder[32]; 304 | bool m_bEndDecodeDone = false; 305 | std::mutex m_mtxVPFrame; 306 | int m_nFrameAlloc = 0; 307 | CUstream m_cuvidStream = 0; 308 | bool m_bDeviceFramePitched = false; 309 | size_t m_nDeviceFramePitch = 0; 310 | Rect m_cropRect = {}; 311 | Dim m_resizeDim = {}; 312 | 313 | std::ostringstream m_videoInfo; 314 | unsigned int m_nMaxWidth = 0, m_nMaxHeight = 0; 315 | bool m_bReconfigExternal = false; 316 | bool m_bReconfigExtPPChange = false; 317 | StopWatch m_stDecode_time; 318 | 319 | unsigned int m_nOperatingPoint = 0; 320 | bool m_bDispAllLayers = false; 321 | }; 322 | -------------------------------------------------------------------------------- /src/cuvid/NvEncoder/NvEncoder.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017-2020 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #pragma once 13 | 14 | #include 15 | #include "nvEncodeAPI.h" 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | /** 24 | * @brief Exception class for error reporting from NvEncodeAPI calls. 25 | */ 26 | class NVENCException : public std::exception 27 | { 28 | public: 29 | NVENCException(const std::string& errorStr, const NVENCSTATUS errorCode) 30 | : m_errorString(errorStr), m_errorCode(errorCode) {} 31 | 32 | virtual ~NVENCException() throw() {} 33 | virtual const char* what() const throw() { return m_errorString.c_str(); } 34 | NVENCSTATUS getErrorCode() const { return m_errorCode; } 35 | const std::string& getErrorString() const { return m_errorString; } 36 | static NVENCException makeNVENCException(const std::string& errorStr, const NVENCSTATUS errorCode, 37 | const std::string& functionName, const std::string& fileName, int lineNo); 38 | private: 39 | std::string m_errorString; 40 | NVENCSTATUS m_errorCode; 41 | }; 42 | 43 | inline NVENCException NVENCException::makeNVENCException(const std::string& errorStr, const NVENCSTATUS errorCode, const std::string& functionName, 44 | const std::string& fileName, int lineNo) 45 | { 46 | std::ostringstream errorLog; 47 | errorLog << functionName << " : " << errorStr << " at " << fileName << ":" << lineNo << std::endl; 48 | NVENCException exception(errorLog.str(), errorCode); 49 | return exception; 50 | } 51 | 52 | #define NVENC_THROW_ERROR( errorStr, errorCode ) \ 53 | do \ 54 | { \ 55 | throw NVENCException::makeNVENCException(errorStr, errorCode, __FUNCTION__, __FILE__, __LINE__); \ 56 | } while (0) 57 | 58 | 59 | #define NVENC_API_CALL( nvencAPI ) \ 60 | do \ 61 | { \ 62 | NVENCSTATUS errorCode = nvencAPI; \ 63 | if( errorCode != NV_ENC_SUCCESS) \ 64 | { \ 65 | std::ostringstream errorLog; \ 66 | errorLog << #nvencAPI << " returned error " << errorCode; \ 67 | throw NVENCException::makeNVENCException(errorLog.str(), errorCode, __FUNCTION__, __FILE__, __LINE__); \ 68 | } \ 69 | } while (0) 70 | 71 | struct NvEncInputFrame 72 | { 73 | void* inputPtr = nullptr; 74 | uint32_t chromaOffsets[2]; 75 | uint32_t numChromaPlanes; 76 | uint32_t pitch; 77 | uint32_t chromaPitch; 78 | NV_ENC_BUFFER_FORMAT bufferFormat; 79 | NV_ENC_INPUT_RESOURCE_TYPE resourceType; 80 | }; 81 | 82 | /** 83 | * @brief Shared base class for different encoder interfaces. 84 | */ 85 | class NvEncoder 86 | { 87 | public: 88 | /** 89 | * @brief This function is used to initialize the encoder session. 90 | * Application must call this function to initialize the encoder, before 91 | * starting to encode any frames. 92 | */ 93 | void CreateEncoder(const NV_ENC_INITIALIZE_PARAMS* pEncodeParams); 94 | 95 | /** 96 | * @brief This function is used to destroy the encoder session. 97 | * Application must call this function to destroy the encoder session and 98 | * clean up any allocated resources. The application must call EndEncode() 99 | * function to get any queued encoded frames before calling DestroyEncoder(). 100 | */ 101 | void DestroyEncoder(); 102 | 103 | /** 104 | * @brief This function is used to reconfigure an existing encoder session. 105 | * Application can use this function to dynamically change the bitrate, 106 | * resolution and other QOS parameters. If the application changes the 107 | * resolution, it must set NV_ENC_RECONFIGURE_PARAMS::forceIDR. 108 | */ 109 | bool Reconfigure(const NV_ENC_RECONFIGURE_PARAMS *pReconfigureParams); 110 | 111 | /** 112 | * @brief This function is used to get the next available input buffer. 113 | * Applications must call this function to obtain a pointer to the next 114 | * input buffer. The application must copy the uncompressed data to the 115 | * input buffer and then call EncodeFrame() function to encode it. 116 | */ 117 | const NvEncInputFrame* GetNextInputFrame(); 118 | 119 | 120 | /** 121 | * @brief This function is used to encode a frame. 122 | * Applications must call EncodeFrame() function to encode the uncompressed 123 | * data, which has been copied to an input buffer obtained from the 124 | * GetNextInputFrame() function. 125 | */ 126 | void EncodeFrame(std::vector> &vPacket, NV_ENC_PIC_PARAMS *pPicParams = nullptr); 127 | 128 | /** 129 | * @brief This function to flush the encoder queue. 130 | * The encoder might be queuing frames for B picture encoding or lookahead; 131 | * the application must call EndEncode() to get all the queued encoded frames 132 | * from the encoder. The application must call this function before destroying 133 | * an encoder session. 134 | */ 135 | void EndEncode(std::vector> &vPacket); 136 | 137 | /** 138 | * @brief This function is used to query hardware encoder capabilities. 139 | * Applications can call this function to query capabilities like maximum encode 140 | * dimensions, support for lookahead or the ME-only mode etc. 141 | */ 142 | int GetCapabilityValue(GUID guidCodec, NV_ENC_CAPS capsToQuery); 143 | 144 | /** 145 | * @brief This function is used to get the current device on which encoder is running. 146 | */ 147 | void *GetDevice() const { return m_pDevice; } 148 | 149 | /** 150 | * @brief This function is used to get the current device type which encoder is running. 151 | */ 152 | NV_ENC_DEVICE_TYPE GetDeviceType() const { return m_eDeviceType; } 153 | 154 | /** 155 | * @brief This function is used to get the current encode width. 156 | * The encode width can be modified by Reconfigure() function. 157 | */ 158 | int GetEncodeWidth() const { return m_nWidth; } 159 | 160 | /** 161 | * @brief This function is used to get the current encode height. 162 | * The encode height can be modified by Reconfigure() function. 163 | */ 164 | int GetEncodeHeight() const { return m_nHeight; } 165 | 166 | /** 167 | * @brief This function is used to get the current frame size based on pixel format. 168 | */ 169 | int GetFrameSize() const; 170 | 171 | /** 172 | * @brief This function is used to initialize config parameters based on 173 | * given codec and preset guids. 174 | * The application can call this function to get the default configuration 175 | * for a certain preset. The application can either use these parameters 176 | * directly or override them with application-specific settings before 177 | * using them in CreateEncoder() function. 178 | */ 179 | void CreateDefaultEncoderParams(NV_ENC_INITIALIZE_PARAMS* pIntializeParams, GUID codecGuid, GUID presetGuid, NV_ENC_TUNING_INFO tuningInfo = NV_ENC_TUNING_INFO_UNDEFINED); 180 | 181 | /** 182 | * @brief This function is used to get the current initialization parameters, 183 | * which had been used to configure the encoder session. 184 | * The initialization parameters are modified if the application calls 185 | * Reconfigure() function. 186 | */ 187 | void GetInitializeParams(NV_ENC_INITIALIZE_PARAMS *pInitializeParams); 188 | 189 | /** 190 | * @brief This function is used to run motion estimation 191 | * This is used to run motion estimation on a a pair of frames. The 192 | * application must copy the reference frame data to the buffer obtained 193 | * by calling GetNextReferenceFrame(), and copy the input frame data to 194 | * the buffer obtained by calling GetNextInputFrame() before calling the 195 | * RunMotionEstimation() function. 196 | */ 197 | void RunMotionEstimation(std::vector &mvData); 198 | 199 | /** 200 | * @brief This function is used to get an available reference frame. 201 | * Application must call this function to get a pointer to reference buffer, 202 | * to be used in the subsequent RunMotionEstimation() function. 203 | */ 204 | const NvEncInputFrame* GetNextReferenceFrame(); 205 | 206 | /** 207 | * @brief This function is used to get sequence and picture parameter headers. 208 | * Application can call this function after encoder is initialized to get SPS and PPS 209 | * nalus for the current encoder instance. The sequence header data might change when 210 | * application calls Reconfigure() function. 211 | */ 212 | void GetSequenceParams(std::vector &seqParams); 213 | 214 | /** 215 | * @brief NvEncoder class virtual destructor. 216 | */ 217 | virtual ~NvEncoder(); 218 | 219 | public: 220 | /** 221 | * @brief This a static function to get chroma offsets for YUV planar formats. 222 | */ 223 | static void GetChromaSubPlaneOffsets(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t pitch, 224 | const uint32_t height, std::vector& chromaOffsets); 225 | /** 226 | * @brief This a static function to get the chroma plane pitch for YUV planar formats. 227 | */ 228 | static uint32_t GetChromaPitch(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaPitch); 229 | 230 | /** 231 | * @brief This a static function to get the number of chroma planes for YUV planar formats. 232 | */ 233 | static uint32_t GetNumChromaPlanes(const NV_ENC_BUFFER_FORMAT bufferFormat); 234 | 235 | /** 236 | * @brief This a static function to get the chroma plane width in bytes for YUV planar formats. 237 | */ 238 | static uint32_t GetChromaWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaWidth); 239 | 240 | /** 241 | * @brief This a static function to get the chroma planes height in bytes for YUV planar formats. 242 | */ 243 | static uint32_t GetChromaHeight(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaHeight); 244 | 245 | 246 | /** 247 | * @brief This a static function to get the width in bytes for the frame. 248 | * For YUV planar format this is the width in bytes of the luma plane. 249 | */ 250 | static uint32_t GetWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t width); 251 | 252 | /** 253 | * @brief This function returns the number of allocated buffers. 254 | */ 255 | uint32_t GetEncoderBufferCount() const { return m_nEncoderBuffer; } 256 | protected: 257 | 258 | /** 259 | * @brief NvEncoder class constructor. 260 | * NvEncoder class constructor cannot be called directly by the application. 261 | */ 262 | NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, void *pDevice, uint32_t nWidth, uint32_t nHeight, 263 | NV_ENC_BUFFER_FORMAT eBufferFormat, uint32_t nOutputDelay, bool bMotionEstimationOnly, bool bOutputInVideoMemory = false); 264 | 265 | /** 266 | * @brief This function is used to check if hardware encoder is properly initialized. 267 | */ 268 | bool IsHWEncoderInitialized() const { return m_hEncoder != NULL && m_bEncoderInitialized; } 269 | 270 | /** 271 | * @brief This function is used to register CUDA, D3D or OpenGL input buffers with NvEncodeAPI. 272 | * This is non public function and is called by derived class for allocating 273 | * and registering input buffers. 274 | */ 275 | void RegisterInputResources(std::vector inputframes, NV_ENC_INPUT_RESOURCE_TYPE eResourceType, 276 | int width, int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat, bool bReferenceFrame = false); 277 | 278 | /** 279 | * @brief This function is used to unregister resources which had been previously registered for encoding 280 | * using RegisterInputResources() function. 281 | */ 282 | void UnregisterInputResources(); 283 | 284 | /** 285 | * @brief This function is used to register CUDA, D3D or OpenGL input or output buffers with NvEncodeAPI. 286 | */ 287 | NV_ENC_REGISTERED_PTR RegisterResource(void *pBuffer, NV_ENC_INPUT_RESOURCE_TYPE eResourceType, 288 | int width, int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat, NV_ENC_BUFFER_USAGE bufferUsage = NV_ENC_INPUT_IMAGE); 289 | 290 | /** 291 | * @brief This function returns maximum width used to open the encoder session. 292 | * All encode input buffers are allocated using maximum dimensions. 293 | */ 294 | uint32_t GetMaxEncodeWidth() const { return m_nMaxEncodeWidth; } 295 | 296 | /** 297 | * @brief This function returns maximum height used to open the encoder session. 298 | * All encode input buffers are allocated using maximum dimensions. 299 | */ 300 | uint32_t GetMaxEncodeHeight() const { return m_nMaxEncodeHeight; } 301 | 302 | /** 303 | * @brief This function returns the completion event. 304 | */ 305 | void* GetCompletionEvent(uint32_t eventIdx) { return (m_vpCompletionEvent.size() == m_nEncoderBuffer) ? m_vpCompletionEvent[eventIdx] : nullptr; } 306 | 307 | /** 308 | * @brief This function returns the current pixel format. 309 | */ 310 | NV_ENC_BUFFER_FORMAT GetPixelFormat() const { return m_eBufferFormat; } 311 | 312 | /** 313 | * @brief This function is used to submit the encode commands to the 314 | * NVENC hardware. 315 | */ 316 | NVENCSTATUS DoEncode(NV_ENC_INPUT_PTR inputBuffer, NV_ENC_OUTPUT_PTR outputBuffer, NV_ENC_PIC_PARAMS *pPicParams); 317 | 318 | /** 319 | * @brief This function is used to submit the encode commands to the 320 | * NVENC hardware for ME only mode. 321 | */ 322 | NVENCSTATUS DoMotionEstimation(NV_ENC_INPUT_PTR inputBuffer, NV_ENC_INPUT_PTR inputBufferForReference, NV_ENC_OUTPUT_PTR outputBuffer); 323 | 324 | /** 325 | * @brief This function is used to map the input buffers to NvEncodeAPI. 326 | */ 327 | void MapResources(uint32_t bfrIdx); 328 | 329 | /** 330 | * @brief This function is used to wait for completion of encode command. 331 | */ 332 | void WaitForCompletionEvent(int iEvent); 333 | 334 | /** 335 | * @brief This function is used to send EOS to HW encoder. 336 | */ 337 | void SendEOS(); 338 | 339 | private: 340 | /** 341 | * @brief This is a private function which is used to check if there is any 342 | buffering done by encoder. 343 | * The encoder generally buffers data to encode B frames or for lookahead 344 | * or pipelining. 345 | */ 346 | bool IsZeroDelay() { return m_nOutputDelay == 0; } 347 | 348 | /** 349 | * @brief This is a private function which is used to load the encode api shared library. 350 | */ 351 | void LoadNvEncApi(); 352 | 353 | /** 354 | * @brief This is a private function which is used to get the output packets 355 | * from the encoder HW. 356 | * This is called by DoEncode() function. If there is buffering enabled, 357 | * this may return without any output data. 358 | */ 359 | void GetEncodedPacket(std::vector &vOutputBuffer, std::vector> &vPacket, bool bOutputDelay); 360 | 361 | /** 362 | * @brief This is a private function which is used to initialize the bitstream buffers. 363 | * This is only used in the encoding mode. 364 | */ 365 | void InitializeBitstreamBuffer(); 366 | 367 | /** 368 | * @brief This is a private function which is used to destroy the bitstream buffers. 369 | * This is only used in the encoding mode. 370 | */ 371 | void DestroyBitstreamBuffer(); 372 | 373 | /** 374 | * @brief This is a private function which is used to initialize MV output buffers. 375 | * This is only used in ME-only Mode. 376 | */ 377 | void InitializeMVOutputBuffer(); 378 | 379 | /** 380 | * @brief This is a private function which is used to destroy MV output buffers. 381 | * This is only used in ME-only Mode. 382 | */ 383 | void DestroyMVOutputBuffer(); 384 | 385 | /** 386 | * @brief This is a private function which is used to destroy HW encoder. 387 | */ 388 | void DestroyHWEncoder(); 389 | 390 | /** 391 | * @brief This function is used to flush the encoder queue. 392 | */ 393 | void FlushEncoder(); 394 | 395 | private: 396 | /** 397 | * @brief This is a pure virtual function which is used to allocate input buffers. 398 | * The derived classes must implement this function. 399 | */ 400 | virtual void AllocateInputBuffers(int32_t numInputBuffers) = 0; 401 | 402 | /** 403 | * @brief This is a pure virtual function which is used to destroy input buffers. 404 | * The derived classes must implement this function. 405 | */ 406 | virtual void ReleaseInputBuffers() = 0; 407 | 408 | protected: 409 | bool m_bMotionEstimationOnly = false; 410 | bool m_bOutputInVideoMemory = false; 411 | void *m_hEncoder = nullptr; 412 | NV_ENCODE_API_FUNCTION_LIST m_nvenc; 413 | std::vector m_vInputFrames; 414 | std::vector m_vRegisteredResources; 415 | std::vector m_vReferenceFrames; 416 | std::vector m_vRegisteredResourcesForReference; 417 | std::vector m_vMappedInputBuffers; 418 | std::vector m_vMappedRefBuffers; 419 | std::vector m_vpCompletionEvent; 420 | 421 | int32_t m_iToSend = 0; 422 | int32_t m_iGot = 0; 423 | int32_t m_nEncoderBuffer = 0; 424 | int32_t m_nOutputDelay = 0; 425 | 426 | private: 427 | uint32_t m_nWidth; 428 | uint32_t m_nHeight; 429 | NV_ENC_BUFFER_FORMAT m_eBufferFormat; 430 | void *m_pDevice; 431 | NV_ENC_DEVICE_TYPE m_eDeviceType; 432 | NV_ENC_INITIALIZE_PARAMS m_initializeParams = {}; 433 | NV_ENC_CONFIG m_encodeConfig = {}; 434 | bool m_bEncoderInitialized = false; 435 | uint32_t m_nExtraOutputDelay = 3; // To ensure encode and graphics can work in parallel, m_nExtraOutputDelay should be set to at least 1 436 | std::vector m_vBitstreamOutputBuffer; 437 | std::vector m_vMVDataOutputBuffer; 438 | uint32_t m_nMaxEncodeWidth = 0; 439 | uint32_t m_nMaxEncodeHeight = 0; 440 | }; 441 | -------------------------------------------------------------------------------- /src/cuvid/NvEncoder/NvEncoderCuda.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017-2020 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #include "NvEncoder/NvEncoderCuda.h" 13 | 14 | 15 | NvEncoderCuda::NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat, 16 | uint32_t nExtraOutputDelay, bool bMotionEstimationOnly, bool bOutputInVideoMemory): 17 | NvEncoder(NV_ENC_DEVICE_TYPE_CUDA, cuContext, nWidth, nHeight, eBufferFormat, nExtraOutputDelay, bMotionEstimationOnly, bOutputInVideoMemory), 18 | m_cuContext(cuContext) 19 | { 20 | if (!m_hEncoder) 21 | { 22 | NVENC_THROW_ERROR("Encoder Initialization failed", NV_ENC_ERR_INVALID_DEVICE); 23 | } 24 | 25 | if (!m_cuContext) 26 | { 27 | NVENC_THROW_ERROR("Invalid Cuda Context", NV_ENC_ERR_INVALID_DEVICE); 28 | } 29 | } 30 | 31 | NvEncoderCuda::~NvEncoderCuda() 32 | { 33 | ReleaseCudaResources(); 34 | } 35 | 36 | void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers) 37 | { 38 | if (!IsHWEncoderInitialized()) 39 | { 40 | NVENC_THROW_ERROR("Encoder intialization failed", NV_ENC_ERR_ENCODER_NOT_INITIALIZED); 41 | } 42 | 43 | // for MEOnly mode we need to allocate seperate set of buffers for reference frame 44 | int numCount = m_bMotionEstimationOnly ? 2 : 1; 45 | 46 | for (int count = 0; count < numCount; count++) 47 | { 48 | CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext)); 49 | std::vector inputFrames; 50 | for (int i = 0; i < numInputBuffers; i++) 51 | { 52 | CUdeviceptr pDeviceFrame; 53 | uint32_t chromaHeight = GetNumChromaPlanes(GetPixelFormat()) * GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight()); 54 | if (GetPixelFormat() == NV_ENC_BUFFER_FORMAT_YV12 || GetPixelFormat() == NV_ENC_BUFFER_FORMAT_IYUV) 55 | chromaHeight = GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight()); 56 | CUDA_DRVAPI_CALL(cuMemAllocPitch((CUdeviceptr *)&pDeviceFrame, 57 | &m_cudaPitch, 58 | GetWidthInBytes(GetPixelFormat(), GetMaxEncodeWidth()), 59 | GetMaxEncodeHeight() + chromaHeight, 16)); 60 | inputFrames.push_back((void*)pDeviceFrame); 61 | } 62 | CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); 63 | 64 | RegisterInputResources(inputFrames, 65 | NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR, 66 | GetMaxEncodeWidth(), 67 | GetMaxEncodeHeight(), 68 | (int)m_cudaPitch, 69 | GetPixelFormat(), 70 | (count == 1) ? true : false); 71 | } 72 | } 73 | 74 | void NvEncoderCuda::SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream, NV_ENC_CUSTREAM_PTR outputStream) 75 | { 76 | NVENC_API_CALL(m_nvenc.nvEncSetIOCudaStreams(m_hEncoder, inputStream, outputStream)); 77 | } 78 | 79 | void NvEncoderCuda::ReleaseInputBuffers() 80 | { 81 | ReleaseCudaResources(); 82 | } 83 | 84 | void NvEncoderCuda::ReleaseCudaResources() 85 | { 86 | if (!m_hEncoder) 87 | { 88 | return; 89 | } 90 | 91 | if (!m_cuContext) 92 | { 93 | return; 94 | } 95 | 96 | UnregisterInputResources(); 97 | 98 | cuCtxPushCurrent(m_cuContext); 99 | 100 | for (uint32_t i = 0; i < m_vInputFrames.size(); ++i) 101 | { 102 | if (m_vInputFrames[i].inputPtr) 103 | { 104 | cuMemFree(reinterpret_cast(m_vInputFrames[i].inputPtr)); 105 | } 106 | } 107 | m_vInputFrames.clear(); 108 | 109 | for (uint32_t i = 0; i < m_vReferenceFrames.size(); ++i) 110 | { 111 | if (m_vReferenceFrames[i].inputPtr) 112 | { 113 | cuMemFree(reinterpret_cast(m_vReferenceFrames[i].inputPtr)); 114 | } 115 | } 116 | m_vReferenceFrames.clear(); 117 | 118 | cuCtxPopCurrent(NULL); 119 | m_cuContext = nullptr; 120 | } 121 | 122 | void NvEncoderCuda::CopyToDeviceFrame(CUcontext device, 123 | void* pSrcFrame, 124 | uint32_t nSrcPitch, 125 | CUdeviceptr pDstFrame, 126 | uint32_t dstPitch, 127 | int width, 128 | int height, 129 | CUmemorytype srcMemoryType, 130 | NV_ENC_BUFFER_FORMAT pixelFormat, 131 | const uint32_t dstChromaOffsets[], 132 | uint32_t numChromaPlanes, 133 | bool bUnAlignedDeviceCopy, 134 | CUstream stream) 135 | { 136 | if (srcMemoryType != CU_MEMORYTYPE_HOST && srcMemoryType != CU_MEMORYTYPE_DEVICE) 137 | { 138 | NVENC_THROW_ERROR("Invalid source memory type for copy", NV_ENC_ERR_INVALID_PARAM); 139 | } 140 | 141 | CUDA_DRVAPI_CALL(cuCtxPushCurrent(device)); 142 | 143 | uint32_t srcPitch = nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width); 144 | CUDA_MEMCPY2D m = { 0 }; 145 | m.srcMemoryType = srcMemoryType; 146 | if (srcMemoryType == CU_MEMORYTYPE_HOST) 147 | { 148 | m.srcHost = pSrcFrame; 149 | } 150 | else 151 | { 152 | m.srcDevice = (CUdeviceptr)pSrcFrame; 153 | } 154 | m.srcPitch = srcPitch; 155 | m.dstMemoryType = CU_MEMORYTYPE_DEVICE; 156 | m.dstDevice = pDstFrame; 157 | m.dstPitch = dstPitch; 158 | m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width); 159 | m.Height = height; 160 | if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) 161 | { 162 | CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m)); 163 | } 164 | else 165 | { 166 | CUDA_DRVAPI_CALL(stream == NULL? cuMemcpy2D(&m) : cuMemcpy2DAsync(&m, stream)); 167 | } 168 | 169 | std::vector srcChromaOffsets; 170 | NvEncoder::GetChromaSubPlaneOffsets(pixelFormat, srcPitch, height, srcChromaOffsets); 171 | uint32_t chromaHeight = NvEncoder::GetChromaHeight(pixelFormat, height); 172 | uint32_t destChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, dstPitch); 173 | uint32_t srcChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, srcPitch); 174 | uint32_t chromaWidthInBytes = NvEncoder::GetChromaWidthInBytes(pixelFormat, width); 175 | 176 | for (uint32_t i = 0; i < numChromaPlanes; ++i) 177 | { 178 | if (chromaHeight) 179 | { 180 | if (srcMemoryType == CU_MEMORYTYPE_HOST) 181 | { 182 | m.srcHost = ((uint8_t *)pSrcFrame + srcChromaOffsets[i]); 183 | } 184 | else 185 | { 186 | m.srcDevice = (CUdeviceptr)((uint8_t *)pSrcFrame + srcChromaOffsets[i]); 187 | } 188 | m.srcPitch = srcChromaPitch; 189 | 190 | m.dstDevice = (CUdeviceptr)((uint8_t *)pDstFrame + dstChromaOffsets[i]); 191 | m.dstPitch = destChromaPitch; 192 | m.WidthInBytes = chromaWidthInBytes; 193 | m.Height = chromaHeight; 194 | if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) 195 | { 196 | CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m)); 197 | } 198 | else 199 | { 200 | CUDA_DRVAPI_CALL(stream == NULL? cuMemcpy2D(&m) : cuMemcpy2DAsync(&m, stream)); 201 | } 202 | } 203 | } 204 | CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); 205 | } 206 | 207 | void NvEncoderCuda::CopyToDeviceFrame(CUcontext device, 208 | void* pSrcFrame, 209 | uint32_t nSrcPitch, 210 | CUdeviceptr pDstFrame, 211 | uint32_t dstPitch, 212 | int width, 213 | int height, 214 | CUmemorytype srcMemoryType, 215 | NV_ENC_BUFFER_FORMAT pixelFormat, 216 | CUdeviceptr dstChromaDevicePtrs[], 217 | uint32_t dstChromaPitch, 218 | uint32_t numChromaPlanes, 219 | bool bUnAlignedDeviceCopy) 220 | { 221 | if (srcMemoryType != CU_MEMORYTYPE_HOST && srcMemoryType != CU_MEMORYTYPE_DEVICE) 222 | { 223 | NVENC_THROW_ERROR("Invalid source memory type for copy", NV_ENC_ERR_INVALID_PARAM); 224 | } 225 | 226 | CUDA_DRVAPI_CALL(cuCtxPushCurrent(device)); 227 | 228 | uint32_t srcPitch = nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width); 229 | CUDA_MEMCPY2D m = { 0 }; 230 | m.srcMemoryType = srcMemoryType; 231 | if (srcMemoryType == CU_MEMORYTYPE_HOST) 232 | { 233 | m.srcHost = pSrcFrame; 234 | } 235 | else 236 | { 237 | m.srcDevice = (CUdeviceptr)pSrcFrame; 238 | } 239 | m.srcPitch = srcPitch; 240 | m.dstMemoryType = CU_MEMORYTYPE_DEVICE; 241 | m.dstDevice = pDstFrame; 242 | m.dstPitch = dstPitch; 243 | m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width); 244 | m.Height = height; 245 | if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) 246 | { 247 | CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m)); 248 | } 249 | else 250 | { 251 | CUDA_DRVAPI_CALL(cuMemcpy2D(&m)); 252 | } 253 | 254 | std::vector srcChromaOffsets; 255 | NvEncoder::GetChromaSubPlaneOffsets(pixelFormat, srcPitch, height, srcChromaOffsets); 256 | uint32_t chromaHeight = NvEncoder::GetChromaHeight(pixelFormat, height); 257 | uint32_t srcChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, srcPitch); 258 | uint32_t chromaWidthInBytes = NvEncoder::GetChromaWidthInBytes(pixelFormat, width); 259 | 260 | for (uint32_t i = 0; i < numChromaPlanes; ++i) 261 | { 262 | if (chromaHeight) 263 | { 264 | if (srcMemoryType == CU_MEMORYTYPE_HOST) 265 | { 266 | m.srcHost = ((uint8_t *)pSrcFrame + srcChromaOffsets[i]); 267 | } 268 | else 269 | { 270 | m.srcDevice = (CUdeviceptr)((uint8_t *)pSrcFrame + srcChromaOffsets[i]); 271 | } 272 | m.srcPitch = srcChromaPitch; 273 | 274 | m.dstDevice = dstChromaDevicePtrs[i]; 275 | m.dstPitch = dstChromaPitch; 276 | m.WidthInBytes = chromaWidthInBytes; 277 | m.Height = chromaHeight; 278 | if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) 279 | { 280 | CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m)); 281 | } 282 | else 283 | { 284 | CUDA_DRVAPI_CALL(cuMemcpy2D(&m)); 285 | } 286 | } 287 | } 288 | CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); 289 | } 290 | -------------------------------------------------------------------------------- /src/cuvid/NvEncoder/NvEncoderCuda.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017-2020 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #pragma once 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include "NvEncoder.h" 19 | 20 | #define CUDA_DRVAPI_CALL( call ) \ 21 | do \ 22 | { \ 23 | CUresult err__ = call; \ 24 | if (err__ != CUDA_SUCCESS) \ 25 | { \ 26 | const char *szErrName = NULL; \ 27 | cuGetErrorName(err__, &szErrName); \ 28 | std::ostringstream errorLog; \ 29 | errorLog << "CUDA driver API error " << szErrName ; \ 30 | throw NVENCException::makeNVENCException(errorLog.str(), NV_ENC_ERR_GENERIC, __FUNCTION__, __FILE__, __LINE__); \ 31 | } \ 32 | } \ 33 | while (0) 34 | 35 | /** 36 | * @brief Encoder for CUDA device memory. 37 | */ 38 | class NvEncoderCuda : public NvEncoder 39 | { 40 | public: 41 | NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat, 42 | uint32_t nExtraOutputDelay = 3, bool bMotionEstimationOnly = false, bool bOPInVideoMemory = false); 43 | virtual ~NvEncoderCuda(); 44 | 45 | /** 46 | * @brief This is a static function to copy input data from host memory to device memory. 47 | * This function assumes YUV plane is a single contiguous memory segment. 48 | */ 49 | static void CopyToDeviceFrame(CUcontext device, 50 | void* pSrcFrame, 51 | uint32_t nSrcPitch, 52 | CUdeviceptr pDstFrame, 53 | uint32_t dstPitch, 54 | int width, 55 | int height, 56 | CUmemorytype srcMemoryType, 57 | NV_ENC_BUFFER_FORMAT pixelFormat, 58 | const uint32_t dstChromaOffsets[], 59 | uint32_t numChromaPlanes, 60 | bool bUnAlignedDeviceCopy = false, 61 | CUstream stream = NULL); 62 | 63 | /** 64 | * @brief This is a static function to copy input data from host memory to device memory. 65 | * Application must pass a seperate device pointer for each YUV plane. 66 | */ 67 | static void CopyToDeviceFrame(CUcontext device, 68 | void* pSrcFrame, 69 | uint32_t nSrcPitch, 70 | CUdeviceptr pDstFrame, 71 | uint32_t dstPitch, 72 | int width, 73 | int height, 74 | CUmemorytype srcMemoryType, 75 | NV_ENC_BUFFER_FORMAT pixelFormat, 76 | CUdeviceptr dstChromaPtr[], 77 | uint32_t dstChromaPitch, 78 | uint32_t numChromaPlanes, 79 | bool bUnAlignedDeviceCopy = false); 80 | 81 | /** 82 | * @brief This function sets input and output CUDA streams 83 | */ 84 | void SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream, NV_ENC_CUSTREAM_PTR outputStream); 85 | 86 | protected: 87 | /** 88 | * @brief This function is used to release the input buffers allocated for encoding. 89 | * This function is an override of virtual function NvEncoder::ReleaseInputBuffers(). 90 | */ 91 | virtual void ReleaseInputBuffers() override; 92 | 93 | private: 94 | /** 95 | * @brief This function is used to allocate input buffers for encoding. 96 | * This function is an override of virtual function NvEncoder::AllocateInputBuffers(). 97 | */ 98 | virtual void AllocateInputBuffers(int32_t numInputBuffers) override; 99 | 100 | private: 101 | /** 102 | * @brief This is a private function to release CUDA device memory used for encoding. 103 | */ 104 | void ReleaseCudaResources(); 105 | 106 | protected: 107 | CUcontext m_cuContext; 108 | 109 | private: 110 | size_t m_cudaPitch = 0; 111 | }; 112 | -------------------------------------------------------------------------------- /src/cuvid/Utils/BitDepth.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017-2020 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | static __global__ void ConvertUInt8ToUInt16Kernel(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight) 17 | { 18 | int x = blockIdx.x * blockDim.x + threadIdx.x, 19 | y = blockIdx.y * blockDim.y + threadIdx.y; 20 | 21 | if (x >= nWidth || y >= nHeight) 22 | { 23 | return; 24 | } 25 | int destStrideInPixels = nDestPitch / (sizeof(uint16_t)); 26 | *(uchar2 *)&dpUInt16[y * destStrideInPixels + x] = uchar2{ 0, dpUInt8[y * nSrcPitch + x] }; 27 | } 28 | 29 | static __global__ void ConvertUInt16ToUInt8Kernel(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight) 30 | { 31 | int x = blockIdx.x * blockDim.x + threadIdx.x, 32 | y = blockIdx.y * blockDim.y + threadIdx.y; 33 | 34 | if (x >= nWidth || y >= nHeight) 35 | { 36 | return; 37 | } 38 | int srcStrideInPixels = nSrcPitch / (sizeof(uint16_t)); 39 | dpUInt8[y * nDestPitch + x] = ((uchar2 *)&dpUInt16[y * srcStrideInPixels + x])->y; 40 | } 41 | 42 | void ConvertUInt8ToUInt16(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight) 43 | { 44 | dim3 blockSize(16, 16, 1); 45 | dim3 gridSize(((uint32_t)nWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nHeight + blockSize.y - 1) / blockSize.y, 1); 46 | ConvertUInt8ToUInt16Kernel <<< gridSize, blockSize >>>(dpUInt8, dpUInt16, nSrcPitch, nDestPitch, nWidth, nHeight); 47 | } 48 | 49 | void ConvertUInt16ToUInt8(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight) 50 | { 51 | dim3 blockSize(16, 16, 1); 52 | dim3 gridSize(((uint32_t)nWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nHeight + blockSize.y - 1) / blockSize.y, 1); 53 | ConvertUInt16ToUInt8Kernel <<>>(dpUInt16, dpUInt8, nSrcPitch, nDestPitch, nWidth, nHeight); 54 | } 55 | -------------------------------------------------------------------------------- /src/cuvid/Utils/ColorSpace.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017-2020 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #include 13 | #include "ColorSpace.h" 14 | 15 | __constant__ float matYuv2Rgb[3][3]; 16 | __constant__ float matRgb2Yuv[3][3]; 17 | 18 | 19 | void inline GetConstants(int iMatrix, float &wr, float &wb, int &black, int &white, int &max) { 20 | black = 16; white = 235; 21 | max = 255; 22 | 23 | switch (iMatrix) 24 | { 25 | case ColorSpaceStandard_BT709: 26 | default: 27 | wr = 0.2126f; wb = 0.0722f; 28 | break; 29 | 30 | case ColorSpaceStandard_FCC: 31 | wr = 0.30f; wb = 0.11f; 32 | break; 33 | 34 | case ColorSpaceStandard_BT470: 35 | case ColorSpaceStandard_BT601: 36 | wr = 0.2990f; wb = 0.1140f; 37 | break; 38 | 39 | case ColorSpaceStandard_SMPTE240M: 40 | wr = 0.212f; wb = 0.087f; 41 | break; 42 | 43 | case ColorSpaceStandard_BT2020: 44 | case ColorSpaceStandard_BT2020C: 45 | wr = 0.2627f; wb = 0.0593f; 46 | // 10-bit only 47 | black = 64 << 6; white = 940 << 6; 48 | max = (1 << 16) - 1; 49 | break; 50 | } 51 | } 52 | 53 | void SetMatYuv2Rgb(int iMatrix) { 54 | float wr, wb; 55 | int black, white, max; 56 | GetConstants(iMatrix, wr, wb, black, white, max); 57 | float mat[3][3] = { 58 | 1.0f, 0.0f, (1.0f - wr) / 0.5f, 59 | 1.0f, -wb * (1.0f - wb) / 0.5f / (1 - wb - wr), -wr * (1 - wr) / 0.5f / (1 - wb - wr), 60 | 1.0f, (1.0f - wb) / 0.5f, 0.0f, 61 | }; 62 | for (int i = 0; i < 3; i++) { 63 | for (int j = 0; j < 3; j++) { 64 | mat[i][j] = (float)(1.0 * max / (white - black) * mat[i][j]); 65 | } 66 | } 67 | cudaMemcpyToSymbol(matYuv2Rgb, mat, sizeof(mat)); 68 | } 69 | 70 | void SetMatRgb2Yuv(int iMatrix) { 71 | float wr, wb; 72 | int black, white, max; 73 | GetConstants(iMatrix, wr, wb, black, white, max); 74 | float mat[3][3] = { 75 | wr, 1.0f - wb - wr, wb, 76 | -0.5f * wr / (1.0f - wb), -0.5f * (1 - wb - wr) / (1.0f - wb), 0.5f, 77 | 0.5f, -0.5f * (1.0f - wb - wr) / (1.0f - wr), -0.5f * wb / (1.0f - wr), 78 | }; 79 | for (int i = 0; i < 3; i++) { 80 | for (int j = 0; j < 3; j++) { 81 | mat[i][j] = (float)(1.0 * (white - black) / max * mat[i][j]); 82 | } 83 | } 84 | cudaMemcpyToSymbol(matRgb2Yuv, mat, sizeof(mat)); 85 | } 86 | 87 | template 88 | __device__ static T Clamp(T x, T lower, T upper) { 89 | return x < lower ? lower : (x > upper ? upper : x); 90 | } 91 | 92 | template 93 | __device__ inline Rgb YuvToRgbForPixel(YuvUnit y, YuvUnit u, YuvUnit v) { 94 | const int 95 | low = 1 << (sizeof(YuvUnit) * 8 - 4), 96 | mid = 1 << (sizeof(YuvUnit) * 8 - 1); 97 | float fy = (int)y - low, fu = (int)u - mid, fv = (int)v - mid; 98 | const float maxf = (1 << sizeof(YuvUnit) * 8) - 1.0f; 99 | YuvUnit 100 | r = (YuvUnit)Clamp(matYuv2Rgb[0][0] * fy + matYuv2Rgb[0][1] * fu + matYuv2Rgb[0][2] * fv, 0.0f, maxf), 101 | g = (YuvUnit)Clamp(matYuv2Rgb[1][0] * fy + matYuv2Rgb[1][1] * fu + matYuv2Rgb[1][2] * fv, 0.0f, maxf), 102 | b = (YuvUnit)Clamp(matYuv2Rgb[2][0] * fy + matYuv2Rgb[2][1] * fu + matYuv2Rgb[2][2] * fv, 0.0f, maxf); 103 | 104 | Rgb rgb{}; 105 | const int nShift = abs((int)sizeof(YuvUnit) - (int)sizeof(rgb.c.r)) * 8; 106 | if (sizeof(YuvUnit) >= sizeof(rgb.c.r)) { 107 | rgb.c.r = r >> nShift; 108 | rgb.c.g = g >> nShift; 109 | rgb.c.b = b >> nShift; 110 | } else { 111 | rgb.c.r = r << nShift; 112 | rgb.c.g = g << nShift; 113 | rgb.c.b = b << nShift; 114 | } 115 | return rgb; 116 | } 117 | 118 | template 119 | __global__ static void YuvToRgbKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgb, int nRgbPitch, int nWidth, int nHeight) { 120 | int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2; 121 | int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2; 122 | if (x + 1 >= nWidth || y + 1 >= nHeight) { 123 | return; 124 | } 125 | 126 | uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch; 127 | uint8_t *pDst = pRgb + x * sizeof(Rgb) + y * nRgbPitch; 128 | 129 | YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc; 130 | YuvUnitx2 l1 = *(YuvUnitx2 *)(pSrc + nYuvPitch); 131 | YuvUnitx2 ch = *(YuvUnitx2 *)(pSrc + (nHeight - y / 2) * nYuvPitch); 132 | 133 | *(RgbIntx2 *)pDst = RgbIntx2 { 134 | YuvToRgbForPixel(l0.x, ch.x, ch.y).d, 135 | YuvToRgbForPixel(l0.y, ch.x, ch.y).d, 136 | }; 137 | *(RgbIntx2 *)(pDst + nRgbPitch) = RgbIntx2 { 138 | YuvToRgbForPixel(l1.x, ch.x, ch.y).d, 139 | YuvToRgbForPixel(l1.y, ch.x, ch.y).d, 140 | }; 141 | } 142 | 143 | template 144 | __global__ static void Yuv444ToRgbKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgb, int nRgbPitch, int nWidth, int nHeight) { 145 | int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2; 146 | int y = (threadIdx.y + blockIdx.y * blockDim.y); 147 | if (x + 1 >= nWidth || y >= nHeight) { 148 | return; 149 | } 150 | 151 | uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch; 152 | uint8_t *pDst = pRgb + x * sizeof(Rgb) + y * nRgbPitch; 153 | 154 | YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc; 155 | YuvUnitx2 ch1 = *(YuvUnitx2 *)(pSrc + (nHeight * nYuvPitch)); 156 | YuvUnitx2 ch2 = *(YuvUnitx2 *)(pSrc + (2 * nHeight * nYuvPitch)); 157 | 158 | *(RgbIntx2 *)pDst = RgbIntx2{ 159 | YuvToRgbForPixel(l0.x, ch1.x, ch2.x).d, 160 | YuvToRgbForPixel(l0.y, ch1.y, ch2.y).d, 161 | }; 162 | } 163 | 164 | template 165 | __global__ static void YuvToRgbPlanarKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgbp, int nRgbpPitch, int nWidth, int nHeight) { 166 | int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2; 167 | int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2; 168 | if (x + 1 >= nWidth || y + 1 >= nHeight) { 169 | return; 170 | } 171 | 172 | uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch; 173 | 174 | YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc; 175 | YuvUnitx2 l1 = *(YuvUnitx2 *)(pSrc + nYuvPitch); 176 | YuvUnitx2 ch = *(YuvUnitx2 *)(pSrc + (nHeight - y / 2) * nYuvPitch); 177 | 178 | Rgb rgb0 = YuvToRgbForPixel(l0.x, ch.x, ch.y), 179 | rgb1 = YuvToRgbForPixel(l0.y, ch.x, ch.y), 180 | rgb2 = YuvToRgbForPixel(l1.x, ch.x, ch.y), 181 | rgb3 = YuvToRgbForPixel(l1.y, ch.x, ch.y); 182 | 183 | uint8_t *pDst = pRgbp + x * sizeof(RgbUnitx2) / 2 + y * nRgbpPitch; 184 | *(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.x, rgb1.v.x}; 185 | *(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.x, rgb3.v.x}; 186 | pDst += nRgbpPitch * nHeight; 187 | *(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.y, rgb1.v.y}; 188 | *(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.y, rgb3.v.y}; 189 | pDst += nRgbpPitch * nHeight; 190 | *(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.z, rgb1.v.z}; 191 | *(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.z, rgb3.v.z}; 192 | } 193 | 194 | template 195 | __global__ static void Yuv444ToRgbPlanarKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgbp, int nRgbpPitch, int nWidth, int nHeight) { 196 | int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2; 197 | int y = (threadIdx.y + blockIdx.y * blockDim.y); 198 | if (x + 1 >= nWidth || y >= nHeight) { 199 | return; 200 | } 201 | 202 | uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch; 203 | 204 | YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc; 205 | YuvUnitx2 ch1 = *(YuvUnitx2 *)(pSrc + (nHeight * nYuvPitch)); 206 | YuvUnitx2 ch2 = *(YuvUnitx2 *)(pSrc + (2 * nHeight * nYuvPitch)); 207 | 208 | Rgb rgb0 = YuvToRgbForPixel(l0.x, ch1.x, ch2.x), 209 | rgb1 = YuvToRgbForPixel(l0.y, ch1.y, ch2.y); 210 | 211 | 212 | uint8_t *pDst = pRgbp + x * sizeof(RgbUnitx2) / 2 + y * nRgbpPitch; 213 | *(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.x, rgb1.v.x }; 214 | 215 | pDst += nRgbpPitch * nHeight; 216 | *(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.y, rgb1.v.y }; 217 | 218 | pDst += nRgbpPitch * nHeight; 219 | *(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.z, rgb1.v.z }; 220 | } 221 | 222 | template 223 | void Nv12ToColor32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) { 224 | SetMatYuv2Rgb(iMatrix); 225 | YuvToRgbKernel 226 | <<>> 227 | (dpNv12, nNv12Pitch, dpBgra, nBgraPitch, nWidth, nHeight); 228 | } 229 | 230 | template 231 | void Nv12ToColor64(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) { 232 | SetMatYuv2Rgb(iMatrix); 233 | YuvToRgbKernel 234 | <<>> 235 | (dpNv12, nNv12Pitch, dpBgra, nBgraPitch, nWidth, nHeight); 236 | } 237 | 238 | template 239 | void YUV444ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) { 240 | SetMatYuv2Rgb(iMatrix); 241 | Yuv444ToRgbKernel 242 | <<>> 243 | (dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight); 244 | } 245 | 246 | template 247 | void YUV444ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) { 248 | SetMatYuv2Rgb(iMatrix); 249 | Yuv444ToRgbKernel 250 | <<>> 251 | (dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight); 252 | } 253 | 254 | template 255 | void P016ToColor32(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) { 256 | SetMatYuv2Rgb(iMatrix); 257 | YuvToRgbKernel 258 | <<>> 259 | (dpP016, nP016Pitch, dpBgra, nBgraPitch, nWidth, nHeight); 260 | } 261 | 262 | template 263 | void P016ToColor64(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) { 264 | SetMatYuv2Rgb(iMatrix); 265 | YuvToRgbKernel 266 | <<>> 267 | (dpP016, nP016Pitch, dpBgra, nBgraPitch, nWidth, nHeight); 268 | } 269 | 270 | template 271 | void YUV444P16ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) { 272 | SetMatYuv2Rgb(iMatrix); 273 | Yuv444ToRgbKernel 274 | <<>> 275 | (dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight); 276 | } 277 | 278 | template 279 | void YUV444P16ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) { 280 | SetMatYuv2Rgb(iMatrix); 281 | Yuv444ToRgbKernel 282 | <<>> 283 | (dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight); 284 | } 285 | 286 | template 287 | void Nv12ToColorPlanar(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) { 288 | SetMatYuv2Rgb(iMatrix); 289 | YuvToRgbPlanarKernel 290 | <<>> 291 | (dpNv12, nNv12Pitch, dpBgrp, nBgrpPitch, nWidth, nHeight); 292 | } 293 | 294 | template 295 | void P016ToColorPlanar(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) { 296 | SetMatYuv2Rgb(iMatrix); 297 | YuvToRgbPlanarKernel 298 | <<>> 299 | (dpP016, nP016Pitch, dpBgrp, nBgrpPitch, nWidth, nHeight); 300 | } 301 | 302 | template 303 | void YUV444ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) { 304 | SetMatYuv2Rgb(iMatrix); 305 | Yuv444ToRgbPlanarKernel 306 | <<>> 307 | (dpYUV444, nPitch, dpBgrp, nBgrpPitch, nWidth, nHeight); 308 | } 309 | 310 | template 311 | void YUV444P16ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) { 312 | SetMatYuv2Rgb(iMatrix); 313 | Yuv444ToRgbPlanarKernel 314 | << > > 315 | (dpYUV444, nPitch, dpBgrp, nBgrpPitch, nWidth, nHeight); 316 | } 317 | 318 | // Explicit Instantiation 319 | template void Nv12ToColor32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix); 320 | template void Nv12ToColor32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix); 321 | template void Nv12ToColor64(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix); 322 | template void Nv12ToColor64(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix); 323 | template void YUV444ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix); 324 | template void YUV444ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix); 325 | template void YUV444ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix); 326 | template void YUV444ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix); 327 | template void P016ToColor32(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix); 328 | template void P016ToColor32(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix); 329 | template void P016ToColor64(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix); 330 | template void P016ToColor64(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix); 331 | template void YUV444P16ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix); 332 | template void YUV444P16ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix); 333 | template void YUV444P16ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix); 334 | template void YUV444P16ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix); 335 | template void Nv12ToColorPlanar(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix); 336 | template void Nv12ToColorPlanar(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix); 337 | template void P016ToColorPlanar(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix); 338 | template void P016ToColorPlanar(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix); 339 | template void YUV444ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix); 340 | template void YUV444ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix); 341 | template void YUV444P16ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix); 342 | template void YUV444P16ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix); 343 | 344 | template 345 | __device__ inline YuvUnit RgbToY(RgbUnit r, RgbUnit g, RgbUnit b) { 346 | const YuvUnit low = 1 << (sizeof(YuvUnit) * 8 - 4); 347 | return matRgb2Yuv[0][0] * r + matRgb2Yuv[0][1] * g + matRgb2Yuv[0][2] * b + low; 348 | } 349 | 350 | template 351 | __device__ inline YuvUnit RgbToU(RgbUnit r, RgbUnit g, RgbUnit b) { 352 | const YuvUnit mid = 1 << (sizeof(YuvUnit) * 8 - 1); 353 | return matRgb2Yuv[1][0] * r + matRgb2Yuv[1][1] * g + matRgb2Yuv[1][2] * b + mid; 354 | } 355 | 356 | template 357 | __device__ inline YuvUnit RgbToV(RgbUnit r, RgbUnit g, RgbUnit b) { 358 | const YuvUnit mid = 1 << (sizeof(YuvUnit) * 8 - 1); 359 | return matRgb2Yuv[2][0] * r + matRgb2Yuv[2][1] * g + matRgb2Yuv[2][2] * b + mid; 360 | } 361 | 362 | template 363 | __global__ static void RgbToYuvKernel(uint8_t *pRgb, int nRgbPitch, uint8_t *pYuv, int nYuvPitch, int nWidth, int nHeight) { 364 | int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2; 365 | int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2; 366 | if (x + 1 >= nWidth || y + 1 >= nHeight) { 367 | return; 368 | } 369 | 370 | uint8_t *pSrc = pRgb + x * sizeof(Rgb) + y * nRgbPitch; 371 | RgbIntx2 int2a = *(RgbIntx2 *)pSrc; 372 | RgbIntx2 int2b = *(RgbIntx2 *)(pSrc + nRgbPitch); 373 | 374 | Rgb rgb[4] = {int2a.x, int2a.y, int2b.x, int2b.y}; 375 | decltype(Rgb::c.r) 376 | r = (rgb[0].c.r + rgb[1].c.r + rgb[2].c.r + rgb[3].c.r) / 4, 377 | g = (rgb[0].c.g + rgb[1].c.g + rgb[2].c.g + rgb[3].c.g) / 4, 378 | b = (rgb[0].c.b + rgb[1].c.b + rgb[2].c.b + rgb[3].c.b) / 4; 379 | 380 | uint8_t *pDst = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch; 381 | *(YuvUnitx2 *)pDst = YuvUnitx2 { 382 | RgbToY(rgb[0].c.r, rgb[0].c.g, rgb[0].c.b), 383 | RgbToY(rgb[1].c.r, rgb[1].c.g, rgb[1].c.b), 384 | }; 385 | *(YuvUnitx2 *)(pDst + nYuvPitch) = YuvUnitx2 { 386 | RgbToY(rgb[2].c.r, rgb[2].c.g, rgb[2].c.b), 387 | RgbToY(rgb[3].c.r, rgb[3].c.g, rgb[3].c.b), 388 | }; 389 | *(YuvUnitx2 *)(pDst + (nHeight - y / 2) * nYuvPitch) = YuvUnitx2 { 390 | RgbToU(r, g, b), 391 | RgbToV(r, g, b), 392 | }; 393 | } 394 | 395 | void Bgra64ToP016(uint8_t *dpBgra, int nBgraPitch, uint8_t *dpP016, int nP016Pitch, int nWidth, int nHeight, int iMatrix) { 396 | SetMatRgb2Yuv(iMatrix); 397 | RgbToYuvKernel 398 | <<>> 399 | (dpBgra, nBgraPitch, dpP016, nP016Pitch, nWidth, nHeight); 400 | } 401 | -------------------------------------------------------------------------------- /src/cuvid/Utils/ColorSpace.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | #include 8 | #include 9 | 10 | typedef enum ColorSpaceStandard { 11 | ColorSpaceStandard_BT709 = 1, 12 | ColorSpaceStandard_Unspecified = 2, 13 | ColorSpaceStandard_Reserved = 3, 14 | ColorSpaceStandard_FCC = 4, 15 | ColorSpaceStandard_BT470 = 5, 16 | ColorSpaceStandard_BT601 = 6, 17 | ColorSpaceStandard_SMPTE240M = 7, 18 | ColorSpaceStandard_YCgCo = 8, 19 | ColorSpaceStandard_BT2020 = 9, 20 | ColorSpaceStandard_BT2020C = 10 21 | } ColorSpaceStandard; 22 | 23 | union BGRA32 { 24 | uint32_t d; 25 | uchar4 v; 26 | struct { 27 | uint8_t b, g, r, a; 28 | } c; 29 | }; 30 | 31 | union RGBA32 { 32 | uint32_t d; 33 | uchar4 v; 34 | struct { 35 | uint8_t r, g, b, a; 36 | } c; 37 | }; 38 | 39 | union BGRA64 { 40 | uint64_t d; 41 | ushort4 v; 42 | struct { 43 | uint16_t b, g, r, a; 44 | } c; 45 | }; 46 | 47 | union RGBA64 { 48 | uint64_t d; 49 | ushort4 v; 50 | struct { 51 | uint16_t r, g, b, a; 52 | } c; 53 | }; 54 | 55 | #ifdef __cplusplus 56 | } 57 | #endif -------------------------------------------------------------------------------- /src/cuvid/Utils/FFmpegDemuxer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017-2020 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | #pragma once 12 | 13 | extern "C" { 14 | #include 15 | #include 16 | #include 17 | } 18 | #include "NvCodecUtils.h" 19 | 20 | //--------------------------------------------------------------------------- 21 | //! \file FFmpegDemuxer.h 22 | //! \brief Provides functionality for stream demuxing 23 | //! 24 | //! This header file is used by Decode/Transcode apps to demux input video clips before decoding frames from it. 25 | //--------------------------------------------------------------------------- 26 | 27 | /** 28 | * @brief libavformat wrapper class. Retrieves the elementary encoded stream from the container format. 29 | */ 30 | class FFmpegDemuxer { 31 | private: 32 | AVFormatContext *fmtc = NULL; 33 | AVIOContext *avioc = NULL; 34 | AVPacket pkt, pktFiltered; /*!< AVPacket stores compressed data typically exported by demuxers and then passed as input to decoders */ 35 | AVBSFContext *bsfc = NULL; 36 | 37 | int iVideoStream; 38 | bool bMp4H264, bMp4HEVC, bMp4MPEG4; 39 | AVCodecID eVideoCodec; 40 | AVPixelFormat eChromaFormat; 41 | int nWidth, nHeight, nBitDepth, nBPP, nChromaHeight; 42 | double timeBase = 0.0; 43 | int64_t userTimeScale = 0; 44 | 45 | uint8_t *pDataWithHeader = NULL; 46 | 47 | unsigned int frameCount = 0; 48 | 49 | public: 50 | class DataProvider { 51 | public: 52 | virtual ~DataProvider() {} 53 | virtual int GetData(uint8_t *pBuf, int nBuf) = 0; 54 | }; 55 | 56 | private: 57 | 58 | /** 59 | * @brief Private constructor to initialize libavformat resources. 60 | * @param fmtc - Pointer to AVFormatContext allocated inside avformat_open_input() 61 | */ 62 | FFmpegDemuxer(AVFormatContext *fmtc, int64_t timeScale = 1000 /*Hz*/) : fmtc(fmtc) { 63 | if (!fmtc) { 64 | LOG(ERROR) << "No AVFormatContext provided."; 65 | return; 66 | } 67 | 68 | LOG(INFO) << "Media format: " << fmtc->iformat->long_name << " (" << fmtc->iformat->name << ")"; 69 | 70 | ck(avformat_find_stream_info(fmtc, NULL)); 71 | iVideoStream = av_find_best_stream(fmtc, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0); 72 | if (iVideoStream < 0) { 73 | LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "Could not find stream in input file"; 74 | return; 75 | } 76 | 77 | //fmtc->streams[iVideoStream]->need_parsing = AVSTREAM_PARSE_NONE; 78 | eVideoCodec = fmtc->streams[iVideoStream]->codecpar->codec_id; 79 | nWidth = fmtc->streams[iVideoStream]->codecpar->width; 80 | nHeight = fmtc->streams[iVideoStream]->codecpar->height; 81 | eChromaFormat = (AVPixelFormat)fmtc->streams[iVideoStream]->codecpar->format; 82 | AVRational rTimeBase = fmtc->streams[iVideoStream]->time_base; 83 | timeBase = av_q2d(rTimeBase); 84 | userTimeScale = timeScale; 85 | 86 | // Set bit depth, chroma height, bits per pixel based on eChromaFormat of input 87 | switch (eChromaFormat) 88 | { 89 | case AV_PIX_FMT_YUV420P10LE: 90 | case AV_PIX_FMT_GRAY10LE: // monochrome is treated as 420 with chroma filled with 0x0 91 | nBitDepth = 10; 92 | nChromaHeight = (nHeight + 1) >> 1; 93 | nBPP = 2; 94 | break; 95 | case AV_PIX_FMT_YUV420P12LE: 96 | nBitDepth = 12; 97 | nChromaHeight = (nHeight + 1) >> 1; 98 | nBPP = 2; 99 | break; 100 | case AV_PIX_FMT_YUV444P10LE: 101 | nBitDepth = 10; 102 | nChromaHeight = nHeight << 1; 103 | nBPP = 2; 104 | break; 105 | case AV_PIX_FMT_YUV444P12LE: 106 | nBitDepth = 12; 107 | nChromaHeight = nHeight << 1; 108 | nBPP = 2; 109 | break; 110 | case AV_PIX_FMT_YUV444P: 111 | nBitDepth = 8; 112 | nChromaHeight = nHeight << 1; 113 | nBPP = 1; 114 | break; 115 | case AV_PIX_FMT_YUV420P: 116 | case AV_PIX_FMT_YUVJ420P: 117 | case AV_PIX_FMT_YUVJ422P: // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420 118 | case AV_PIX_FMT_YUVJ444P: // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420 119 | case AV_PIX_FMT_GRAY8: // monochrome is treated as 420 with chroma filled with 0x0 120 | nBitDepth = 8; 121 | nChromaHeight = (nHeight + 1) >> 1; 122 | nBPP = 1; 123 | break; 124 | default: 125 | LOG(WARNING) << "ChromaFormat not recognized. Assuming 420"; 126 | eChromaFormat = AV_PIX_FMT_YUV420P; 127 | nBitDepth = 8; 128 | nChromaHeight = (nHeight + 1) >> 1; 129 | nBPP = 1; 130 | } 131 | 132 | bMp4H264 = eVideoCodec == AV_CODEC_ID_H264 && ( 133 | !strcmp(fmtc->iformat->long_name, "QuickTime / MOV") 134 | || !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)") 135 | || !strcmp(fmtc->iformat->long_name, "Matroska / WebM") 136 | ); 137 | bMp4HEVC = eVideoCodec == AV_CODEC_ID_HEVC && ( 138 | !strcmp(fmtc->iformat->long_name, "QuickTime / MOV") 139 | || !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)") 140 | || !strcmp(fmtc->iformat->long_name, "Matroska / WebM") 141 | ); 142 | 143 | bMp4MPEG4 = eVideoCodec == AV_CODEC_ID_MPEG4 && ( 144 | !strcmp(fmtc->iformat->long_name, "QuickTime / MOV") 145 | || !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)") 146 | || !strcmp(fmtc->iformat->long_name, "Matroska / WebM") 147 | ); 148 | 149 | //Initialize packet fields with default values 150 | av_init_packet(&pkt); 151 | pkt.data = NULL; 152 | pkt.size = 0; 153 | av_init_packet(&pktFiltered); 154 | pktFiltered.data = NULL; 155 | pktFiltered.size = 0; 156 | 157 | // Initialize bitstream filter and its required resources 158 | if (bMp4H264) { 159 | const AVBitStreamFilter *bsf = av_bsf_get_by_name("h264_mp4toannexb"); 160 | if (!bsf) { 161 | LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "av_bsf_get_by_name() failed"; 162 | return; 163 | } 164 | ck(av_bsf_alloc(bsf, &bsfc)); 165 | avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar); 166 | ck(av_bsf_init(bsfc)); 167 | } 168 | if (bMp4HEVC) { 169 | const AVBitStreamFilter *bsf = av_bsf_get_by_name("hevc_mp4toannexb"); 170 | if (!bsf) { 171 | LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "av_bsf_get_by_name() failed"; 172 | return; 173 | } 174 | ck(av_bsf_alloc(bsf, &bsfc)); 175 | avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar); 176 | ck(av_bsf_init(bsfc)); 177 | } 178 | } 179 | 180 | AVFormatContext *CreateFormatContext(DataProvider *pDataProvider) { 181 | 182 | AVFormatContext *ctx = NULL; 183 | if (!(ctx = avformat_alloc_context())) { 184 | LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__; 185 | return NULL; 186 | } 187 | 188 | uint8_t *avioc_buffer = NULL; 189 | int avioc_buffer_size = 8 * 1024 * 1024; 190 | avioc_buffer = (uint8_t *)av_malloc(avioc_buffer_size); 191 | if (!avioc_buffer) { 192 | LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__; 193 | return NULL; 194 | } 195 | avioc = avio_alloc_context(avioc_buffer, avioc_buffer_size, 196 | 0, pDataProvider, &ReadPacket, NULL, NULL); 197 | if (!avioc) { 198 | LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__; 199 | return NULL; 200 | } 201 | ctx->pb = avioc; 202 | 203 | ck(avformat_open_input(&ctx, NULL, NULL, NULL)); 204 | return ctx; 205 | } 206 | 207 | /** 208 | * @brief Allocate and return AVFormatContext*. 209 | * @param szFilePath - Filepath pointing to input stream. 210 | * @return Pointer to AVFormatContext 211 | */ 212 | AVFormatContext *CreateFormatContext(const char *szFilePath) { 213 | avformat_network_init(); 214 | 215 | AVFormatContext *ctx = NULL; 216 | ck(avformat_open_input(&ctx, szFilePath, NULL, NULL)); 217 | return ctx; 218 | } 219 | 220 | public: 221 | FFmpegDemuxer(const char *szFilePath, int64_t timescale = 1000 /*Hz*/) : FFmpegDemuxer(CreateFormatContext(szFilePath), timescale) {} 222 | FFmpegDemuxer(DataProvider *pDataProvider) : FFmpegDemuxer(CreateFormatContext(pDataProvider)) {avioc = fmtc->pb;} 223 | ~FFmpegDemuxer() { 224 | 225 | if (!fmtc) { 226 | return; 227 | } 228 | 229 | if (pkt.data) { 230 | av_packet_unref(&pkt); 231 | } 232 | if (pktFiltered.data) { 233 | av_packet_unref(&pktFiltered); 234 | } 235 | 236 | if (bsfc) { 237 | av_bsf_free(&bsfc); 238 | } 239 | 240 | avformat_close_input(&fmtc); 241 | 242 | if (avioc) { 243 | av_freep(&avioc->buffer); 244 | av_freep(&avioc); 245 | } 246 | 247 | if (pDataWithHeader) { 248 | av_free(pDataWithHeader); 249 | } 250 | } 251 | AVCodecID GetVideoCodec() { 252 | return eVideoCodec; 253 | } 254 | AVPixelFormat GetChromaFormat() { 255 | return eChromaFormat; 256 | } 257 | int GetWidth() { 258 | return nWidth; 259 | } 260 | int GetHeight() { 261 | return nHeight; 262 | } 263 | int GetBitDepth() { 264 | return nBitDepth; 265 | } 266 | int GetFrameSize() { 267 | return nWidth * (nHeight + nChromaHeight) * nBPP; 268 | } 269 | bool Demux(uint8_t **ppVideo, int *pnVideoBytes, int64_t *pts = NULL) { 270 | if (!fmtc) { 271 | return false; 272 | } 273 | 274 | *pnVideoBytes = 0; 275 | 276 | if (pkt.data) { 277 | av_packet_unref(&pkt); 278 | } 279 | 280 | int e = 0; 281 | while ((e = av_read_frame(fmtc, &pkt)) >= 0 && pkt.stream_index != iVideoStream) { 282 | av_packet_unref(&pkt); 283 | } 284 | if (e < 0) { 285 | return false; 286 | } 287 | 288 | if (bMp4H264 || bMp4HEVC) { 289 | if (pktFiltered.data) { 290 | av_packet_unref(&pktFiltered); 291 | } 292 | ck(av_bsf_send_packet(bsfc, &pkt)); 293 | ck(av_bsf_receive_packet(bsfc, &pktFiltered)); 294 | *ppVideo = pktFiltered.data; 295 | *pnVideoBytes = pktFiltered.size; 296 | if (pts) 297 | *pts = (int64_t) (pktFiltered.pts * userTimeScale * timeBase); 298 | } else { 299 | 300 | if (bMp4MPEG4 && (frameCount == 0)) { 301 | 302 | int extraDataSize = fmtc->streams[iVideoStream]->codecpar->extradata_size; 303 | 304 | if (extraDataSize > 0) { 305 | 306 | // extradata contains start codes 00 00 01. Subtract its size 307 | pDataWithHeader = (uint8_t *)av_malloc(extraDataSize + pkt.size - 3*sizeof(uint8_t)); 308 | 309 | if (!pDataWithHeader) { 310 | LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__; 311 | return false; 312 | } 313 | 314 | memcpy(pDataWithHeader, fmtc->streams[iVideoStream]->codecpar->extradata, extraDataSize); 315 | memcpy(pDataWithHeader+extraDataSize, pkt.data+3, pkt.size - 3*sizeof(uint8_t)); 316 | 317 | *ppVideo = pDataWithHeader; 318 | *pnVideoBytes = extraDataSize + pkt.size - 3*sizeof(uint8_t); 319 | } 320 | 321 | } else { 322 | *ppVideo = pkt.data; 323 | *pnVideoBytes = pkt.size; 324 | } 325 | 326 | if (pts) 327 | *pts = (int64_t)(pkt.pts * userTimeScale * timeBase); 328 | } 329 | 330 | frameCount++; 331 | 332 | return true; 333 | } 334 | 335 | static int ReadPacket(void *opaque, uint8_t *pBuf, int nBuf) { 336 | return ((DataProvider *)opaque)->GetData(pBuf, nBuf); 337 | } 338 | }; 339 | 340 | inline cudaVideoCodec FFmpeg2NvCodecId(AVCodecID id) { 341 | switch (id) { 342 | case AV_CODEC_ID_MPEG1VIDEO : return cudaVideoCodec_MPEG1; 343 | case AV_CODEC_ID_MPEG2VIDEO : return cudaVideoCodec_MPEG2; 344 | case AV_CODEC_ID_MPEG4 : return cudaVideoCodec_MPEG4; 345 | case AV_CODEC_ID_WMV3 : 346 | case AV_CODEC_ID_VC1 : return cudaVideoCodec_VC1; 347 | case AV_CODEC_ID_H264 : return cudaVideoCodec_H264; 348 | case AV_CODEC_ID_HEVC : return cudaVideoCodec_HEVC; 349 | case AV_CODEC_ID_VP8 : return cudaVideoCodec_VP8; 350 | case AV_CODEC_ID_VP9 : return cudaVideoCodec_VP9; 351 | case AV_CODEC_ID_MJPEG : return cudaVideoCodec_JPEG; 352 | case AV_CODEC_ID_AV1 : return cudaVideoCodec_AV1; 353 | default : return cudaVideoCodec_NumCodecs; 354 | } 355 | } 356 | 357 | 358 | -------------------------------------------------------------------------------- /src/cuvid/Utils/FFmpegStreamer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017-2020 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | #pragma once 12 | 13 | #include 14 | #include 15 | extern "C" { 16 | #include 17 | #include 18 | #include 19 | }; 20 | #include "Logger.h" 21 | 22 | extern simplelogger::Logger *logger; 23 | 24 | class FFmpegStreamer { 25 | private: 26 | AVFormatContext *oc = NULL; 27 | AVStream *vs = NULL; 28 | int nFps = 0; 29 | 30 | public: 31 | FFmpegStreamer(AVCodecID eCodecId, int nWidth, int nHeight, int nFps, const char *szInFilePath) : nFps(nFps) { 32 | avformat_network_init(); 33 | oc = avformat_alloc_context(); 34 | if (!oc) { 35 | LOG(ERROR) << "FFMPEG: avformat_alloc_context error"; 36 | return; 37 | } 38 | 39 | // Set format on oc 40 | AVOutputFormat *fmt = av_guess_format("mpegts", NULL, NULL); 41 | if (!fmt) { 42 | LOG(ERROR) << "Invalid format"; 43 | return; 44 | } 45 | fmt->video_codec = eCodecId; 46 | 47 | oc->oformat = fmt; 48 | oc->url = av_strdup(szInFilePath); 49 | LOG(INFO) << "Streaming destination: " << oc->url; 50 | 51 | // Add video stream to oc 52 | vs = avformat_new_stream(oc, NULL); 53 | if (!vs) { 54 | LOG(ERROR) << "FFMPEG: Could not alloc video stream"; 55 | return; 56 | } 57 | vs->id = 0; 58 | 59 | // Set video parameters 60 | AVCodecParameters *vpar = vs->codecpar; 61 | vpar->codec_id = fmt->video_codec; 62 | vpar->codec_type = AVMEDIA_TYPE_VIDEO; 63 | vpar->width = nWidth; 64 | vpar->height = nHeight; 65 | 66 | // Everything is ready. Now open the output stream. 67 | if (avio_open(&oc->pb, oc->url, AVIO_FLAG_WRITE) < 0) { 68 | LOG(ERROR) << "FFMPEG: Could not open " << oc->url; 69 | return ; 70 | } 71 | 72 | // Write the container header 73 | if (avformat_write_header(oc, NULL)) { 74 | LOG(ERROR) << "FFMPEG: avformat_write_header error!"; 75 | return; 76 | } 77 | } 78 | ~FFmpegStreamer() { 79 | if (oc) { 80 | av_write_trailer(oc); 81 | avio_close(oc->pb); 82 | avformat_free_context(oc); 83 | } 84 | } 85 | 86 | bool Stream(uint8_t *pData, int nBytes, int nPts) { 87 | AVPacket pkt = {0}; 88 | av_init_packet(&pkt); 89 | pkt.pts = av_rescale_q(nPts++, AVRational {1, nFps}, vs->time_base); 90 | // No B-frames 91 | pkt.dts = pkt.pts; 92 | pkt.stream_index = vs->index; 93 | pkt.data = pData; 94 | pkt.size = nBytes; 95 | 96 | if(!memcmp(pData, "\x00\x00\x00\x01\x67", 5)) { 97 | pkt.flags |= AV_PKT_FLAG_KEY; 98 | } 99 | 100 | // Write the compressed frame into the output 101 | int ret = av_write_frame(oc, &pkt); 102 | av_write_frame(oc, NULL); 103 | if (ret < 0) { 104 | LOG(ERROR) << "FFMPEG: Error while writing video frame"; 105 | } 106 | 107 | return true; 108 | } 109 | }; 110 | -------------------------------------------------------------------------------- /src/cuvid/Utils/Logger.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017-2020 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #pragma once 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #ifdef _WIN32 22 | #include 23 | #include 24 | 25 | #pragma comment(lib, "ws2_32.lib") 26 | #undef ERROR 27 | #else 28 | #include 29 | #include 30 | #include 31 | #include 32 | #define SOCKET int 33 | #define INVALID_SOCKET -1 34 | #endif 35 | 36 | enum LogLevel { 37 | TRACE, 38 | INFO, 39 | WARNING, 40 | ERROR, 41 | FATAL 42 | }; 43 | 44 | namespace simplelogger{ 45 | class Logger { 46 | public: 47 | Logger(LogLevel level, bool bPrintTimeStamp) : level(level), bPrintTimeStamp(bPrintTimeStamp) {} 48 | virtual ~Logger() {} 49 | virtual std::ostream& GetStream() = 0; 50 | virtual void FlushStream() {} 51 | bool ShouldLogFor(LogLevel l) { 52 | return l >= level; 53 | } 54 | char* GetLead(LogLevel l, const char *szFile, int nLine, const char *szFunc) { 55 | if (l < TRACE || l > FATAL) { 56 | sprintf(szLead, "[?????] "); 57 | return szLead; 58 | } 59 | const char *szLevels[] = {"TRACE", "INFO", "WARN", "ERROR", "FATAL"}; 60 | if (bPrintTimeStamp) { 61 | time_t t = time(NULL); 62 | struct tm *ptm = localtime(&t); 63 | sprintf(szLead, "[%-5s][%02d:%02d:%02d] ", 64 | szLevels[l], ptm->tm_hour, ptm->tm_min, ptm->tm_sec); 65 | } else { 66 | sprintf(szLead, "[%-5s] ", szLevels[l]); 67 | } 68 | return szLead; 69 | } 70 | void EnterCriticalSection() { 71 | mtx.lock(); 72 | } 73 | void LeaveCriticalSection() { 74 | mtx.unlock(); 75 | } 76 | private: 77 | LogLevel level; 78 | char szLead[80]; 79 | bool bPrintTimeStamp; 80 | std::mutex mtx; 81 | }; 82 | 83 | class LoggerFactory { 84 | public: 85 | static Logger* CreateFileLogger(std::string strFilePath, 86 | LogLevel level = INFO, bool bPrintTimeStamp = true) { 87 | return new FileLogger(strFilePath, level, bPrintTimeStamp); 88 | } 89 | static Logger* CreateConsoleLogger(LogLevel level = INFO, 90 | bool bPrintTimeStamp = true) { 91 | return new ConsoleLogger(level, bPrintTimeStamp); 92 | } 93 | static Logger* CreateUdpLogger(char *szHost, unsigned uPort, LogLevel level = INFO, 94 | bool bPrintTimeStamp = true) { 95 | return new UdpLogger(szHost, uPort, level, bPrintTimeStamp); 96 | } 97 | private: 98 | LoggerFactory() {} 99 | 100 | class FileLogger : public Logger { 101 | public: 102 | FileLogger(std::string strFilePath, LogLevel level, bool bPrintTimeStamp) 103 | : Logger(level, bPrintTimeStamp) { 104 | pFileOut = new std::ofstream(); 105 | pFileOut->open(strFilePath.c_str()); 106 | } 107 | ~FileLogger() { 108 | pFileOut->close(); 109 | } 110 | std::ostream& GetStream() { 111 | return *pFileOut; 112 | } 113 | private: 114 | std::ofstream *pFileOut; 115 | }; 116 | 117 | class ConsoleLogger : public Logger { 118 | public: 119 | ConsoleLogger(LogLevel level, bool bPrintTimeStamp) 120 | : Logger(level, bPrintTimeStamp) {} 121 | std::ostream& GetStream() { 122 | return std::cout; 123 | } 124 | }; 125 | 126 | class UdpLogger : public Logger { 127 | private: 128 | class UdpOstream : public std::ostream { 129 | public: 130 | UdpOstream(char *szHost, unsigned short uPort) : std::ostream(&sb), socket(INVALID_SOCKET){ 131 | #ifdef _WIN32 132 | WSADATA w; 133 | if (WSAStartup(0x0101, &w) != 0) { 134 | fprintf(stderr, "WSAStartup() failed.\n"); 135 | return; 136 | } 137 | #endif 138 | socket = ::socket(AF_INET, SOCK_DGRAM, 0); 139 | if (socket == INVALID_SOCKET) { 140 | #ifdef _WIN32 141 | WSACleanup(); 142 | #endif 143 | fprintf(stderr, "socket() failed.\n"); 144 | return; 145 | } 146 | #ifdef _WIN32 147 | unsigned int b1, b2, b3, b4; 148 | sscanf(szHost, "%u.%u.%u.%u", &b1, &b2, &b3, &b4); 149 | struct in_addr addr = {(unsigned char)b1, (unsigned char)b2, (unsigned char)b3, (unsigned char)b4}; 150 | #else 151 | struct in_addr addr = {inet_addr(szHost)}; 152 | #endif 153 | struct sockaddr_in s = {AF_INET, htons(uPort), addr}; 154 | server = s; 155 | } 156 | ~UdpOstream() throw() { 157 | if (socket == INVALID_SOCKET) { 158 | return; 159 | } 160 | #ifdef _WIN32 161 | closesocket(socket); 162 | WSACleanup(); 163 | #else 164 | close(socket); 165 | #endif 166 | } 167 | void Flush() { 168 | if (sendto(socket, sb.str().c_str(), (int)sb.str().length() + 1, 169 | 0, (struct sockaddr *)&server, (int)sizeof(sockaddr_in)) == -1) { 170 | fprintf(stderr, "sendto() failed.\n"); 171 | } 172 | sb.str(""); 173 | } 174 | 175 | private: 176 | std::stringbuf sb; 177 | SOCKET socket; 178 | struct sockaddr_in server; 179 | }; 180 | public: 181 | UdpLogger(char *szHost, unsigned uPort, LogLevel level, bool bPrintTimeStamp) 182 | : Logger(level, bPrintTimeStamp), udpOut(szHost, (unsigned short)uPort) {} 183 | UdpOstream& GetStream() { 184 | return udpOut; 185 | } 186 | virtual void FlushStream() { 187 | udpOut.Flush(); 188 | } 189 | private: 190 | UdpOstream udpOut; 191 | }; 192 | }; 193 | 194 | class LogTransaction { 195 | public: 196 | LogTransaction(Logger *pLogger, LogLevel level, const char *szFile, const int nLine, const char *szFunc) : pLogger(pLogger), level(level) { 197 | if (!pLogger) { 198 | std::cout << "[-----] "; 199 | return; 200 | } 201 | if (!pLogger->ShouldLogFor(level)) { 202 | return; 203 | } 204 | pLogger->EnterCriticalSection(); 205 | pLogger->GetStream() << pLogger->GetLead(level, szFile, nLine, szFunc); 206 | } 207 | ~LogTransaction() { 208 | if (!pLogger) { 209 | std::cout << std::endl; 210 | return; 211 | } 212 | if (!pLogger->ShouldLogFor(level)) { 213 | return; 214 | } 215 | pLogger->GetStream() << std::endl; 216 | pLogger->FlushStream(); 217 | pLogger->LeaveCriticalSection(); 218 | if (level == FATAL) { 219 | exit(1); 220 | } 221 | } 222 | std::ostream& GetStream() { 223 | if (!pLogger) { 224 | return std::cout; 225 | } 226 | if (!pLogger->ShouldLogFor(level)) { 227 | return ossNull; 228 | } 229 | return pLogger->GetStream(); 230 | } 231 | private: 232 | Logger *pLogger; 233 | LogLevel level; 234 | std::ostringstream ossNull; 235 | }; 236 | 237 | } 238 | extern simplelogger::Logger *logger; 239 | #define LOG(level) simplelogger::LogTransaction(logger, level, __FILE__, __LINE__, __FUNCTION__).GetStream() 240 | -------------------------------------------------------------------------------- /src/cuvid/Utils/NvCodecUtils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017-2020 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | //--------------------------------------------------------------------------- 13 | //! \file NvCodecUtils.h 14 | //! \brief Miscellaneous classes and error checking functions. 15 | //! 16 | //! Used by Transcode/Encode samples apps for reading input files, mutithreading, performance measurement or colorspace conversion while decoding. 17 | //--------------------------------------------------------------------------- 18 | 19 | #pragma once 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include "Logger.h" 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | extern simplelogger::Logger *logger; 34 | 35 | #ifdef __cuda_cuda_h__ 36 | inline bool check(CUresult e, int iLine, const char *szFile) { 37 | if (e != CUDA_SUCCESS) { 38 | const char *szErrName = NULL; 39 | cuGetErrorName(e, &szErrName); 40 | LOG(FATAL) << "CUDA driver API error " << szErrName << " at line " << iLine << " in file " << szFile; 41 | return false; 42 | } 43 | return true; 44 | } 45 | #endif 46 | 47 | #ifdef __CUDA_RUNTIME_H__ 48 | inline bool check(cudaError_t e, int iLine, const char *szFile) { 49 | if (e != cudaSuccess) { 50 | LOG(FATAL) << "CUDA runtime API error " << cudaGetErrorName(e) << " at line " << iLine << " in file " << szFile; 51 | return false; 52 | } 53 | return true; 54 | } 55 | #endif 56 | 57 | #ifdef _NV_ENCODEAPI_H_ 58 | inline bool check(NVENCSTATUS e, int iLine, const char *szFile) { 59 | const char *aszErrName[] = { 60 | "NV_ENC_SUCCESS", 61 | "NV_ENC_ERR_NO_ENCODE_DEVICE", 62 | "NV_ENC_ERR_UNSUPPORTED_DEVICE", 63 | "NV_ENC_ERR_INVALID_ENCODERDEVICE", 64 | "NV_ENC_ERR_INVALID_DEVICE", 65 | "NV_ENC_ERR_DEVICE_NOT_EXIST", 66 | "NV_ENC_ERR_INVALID_PTR", 67 | "NV_ENC_ERR_INVALID_EVENT", 68 | "NV_ENC_ERR_INVALID_PARAM", 69 | "NV_ENC_ERR_INVALID_CALL", 70 | "NV_ENC_ERR_OUT_OF_MEMORY", 71 | "NV_ENC_ERR_ENCODER_NOT_INITIALIZED", 72 | "NV_ENC_ERR_UNSUPPORTED_PARAM", 73 | "NV_ENC_ERR_LOCK_BUSY", 74 | "NV_ENC_ERR_NOT_ENOUGH_BUFFER", 75 | "NV_ENC_ERR_INVALID_VERSION", 76 | "NV_ENC_ERR_MAP_FAILED", 77 | "NV_ENC_ERR_NEED_MORE_INPUT", 78 | "NV_ENC_ERR_ENCODER_BUSY", 79 | "NV_ENC_ERR_EVENT_NOT_REGISTERD", 80 | "NV_ENC_ERR_GENERIC", 81 | "NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY", 82 | "NV_ENC_ERR_UNIMPLEMENTED", 83 | "NV_ENC_ERR_RESOURCE_REGISTER_FAILED", 84 | "NV_ENC_ERR_RESOURCE_NOT_REGISTERED", 85 | "NV_ENC_ERR_RESOURCE_NOT_MAPPED", 86 | }; 87 | if (e != NV_ENC_SUCCESS) { 88 | LOG(FATAL) << "NVENC error " << aszErrName[e] << " at line " << iLine << " in file " << szFile; 89 | return false; 90 | } 91 | return true; 92 | } 93 | #endif 94 | 95 | #ifdef _WINERROR_ 96 | inline bool check(HRESULT e, int iLine, const char *szFile) { 97 | if (e != S_OK) { 98 | std::stringstream stream; 99 | stream << std::hex << std::uppercase << e; 100 | LOG(FATAL) << "HRESULT error 0x" << stream.str() << " at line " << iLine << " in file " << szFile; 101 | return false; 102 | } 103 | return true; 104 | } 105 | #endif 106 | 107 | #if defined(__gl_h_) || defined(__GL_H__) 108 | inline bool check(GLenum e, int iLine, const char *szFile) { 109 | if (e != 0) { 110 | LOG(ERROR) << "GLenum error " << e << " at line " << iLine << " in file " << szFile; 111 | return false; 112 | } 113 | return true; 114 | } 115 | #endif 116 | 117 | inline bool check(int e, int iLine, const char *szFile) { 118 | if (e < 0) { 119 | LOG(ERROR) << "General error " << e << " at line " << iLine << " in file " << szFile; 120 | return false; 121 | } 122 | return true; 123 | } 124 | 125 | #define ck(call) check(call, __LINE__, __FILE__) 126 | 127 | /** 128 | * @brief Wrapper class around std::thread 129 | */ 130 | class NvThread 131 | { 132 | public: 133 | NvThread() = default; 134 | NvThread(const NvThread&) = delete; 135 | NvThread& operator=(const NvThread& other) = delete; 136 | 137 | NvThread(std::thread&& thread) : t(std::move(thread)) 138 | { 139 | 140 | } 141 | 142 | NvThread(NvThread&& thread) : t(std::move(thread.t)) 143 | { 144 | 145 | } 146 | 147 | NvThread& operator=(NvThread&& other) 148 | { 149 | t = std::move(other.t); 150 | return *this; 151 | } 152 | 153 | ~NvThread() 154 | { 155 | join(); 156 | } 157 | 158 | void join() 159 | { 160 | if (t.joinable()) 161 | { 162 | t.join(); 163 | } 164 | } 165 | private: 166 | std::thread t; 167 | }; 168 | 169 | #ifndef _WIN32 170 | #define _stricmp strcasecmp 171 | #define _stat64 stat64 172 | #endif 173 | 174 | /** 175 | * @brief Utility class to allocate buffer memory. Helps avoid I/O during the encode/decode loop in case of performance tests. 176 | */ 177 | class BufferedFileReader { 178 | public: 179 | /** 180 | * @brief Constructor function to allocate appropriate memory and copy file contents into it 181 | */ 182 | BufferedFileReader(const char *szFileName, bool bPartial = false) { 183 | struct _stat64 st; 184 | 185 | if (_stat64(szFileName, &st) != 0) { 186 | return; 187 | } 188 | 189 | nSize = st.st_size; 190 | while (nSize) { 191 | try { 192 | pBuf = new uint8_t[(size_t)nSize]; 193 | if (nSize != st.st_size) { 194 | LOG(WARNING) << "File is too large - only " << std::setprecision(4) << 100.0 * nSize / st.st_size << "% is loaded"; 195 | } 196 | break; 197 | } catch(std::bad_alloc) { 198 | if (!bPartial) { 199 | LOG(ERROR) << "Failed to allocate memory in BufferedReader"; 200 | return; 201 | } 202 | nSize = (uint32_t)(nSize * 0.9); 203 | } 204 | } 205 | 206 | std::ifstream fpIn(szFileName, std::ifstream::in | std::ifstream::binary); 207 | if (!fpIn) 208 | { 209 | LOG(ERROR) << "Unable to open input file: " << szFileName; 210 | return; 211 | } 212 | 213 | std::streamsize nRead = fpIn.read(reinterpret_cast(pBuf), nSize).gcount(); 214 | fpIn.close(); 215 | 216 | assert(nRead == nSize); 217 | } 218 | ~BufferedFileReader() { 219 | if (pBuf) { 220 | delete[] pBuf; 221 | } 222 | } 223 | bool GetBuffer(uint8_t **ppBuf, uint64_t *pnSize) { 224 | if (!pBuf) { 225 | return false; 226 | } 227 | 228 | *ppBuf = pBuf; 229 | *pnSize = nSize; 230 | return true; 231 | } 232 | 233 | private: 234 | uint8_t *pBuf = NULL; 235 | uint64_t nSize = 0; 236 | }; 237 | 238 | /** 239 | * @brief Template class to facilitate color space conversion 240 | */ 241 | template 242 | class YuvConverter { 243 | public: 244 | YuvConverter(int nWidth, int nHeight) : nWidth(nWidth), nHeight(nHeight) { 245 | pQuad = new T[((nWidth + 1) / 2) * ((nHeight + 1) / 2)]; 246 | } 247 | ~YuvConverter() { 248 | delete[] pQuad; 249 | } 250 | void PlanarToUVInterleaved(T *pFrame, int nPitch = 0) { 251 | if (nPitch == 0) { 252 | nPitch = nWidth; 253 | } 254 | 255 | // sizes of source surface plane 256 | int nSizePlaneY = nPitch * nHeight; 257 | int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2); 258 | int nSizePlaneV = nSizePlaneU; 259 | 260 | T *puv = pFrame + nSizePlaneY; 261 | if (nPitch == nWidth) { 262 | memcpy(pQuad, puv, nSizePlaneU * sizeof(T)); 263 | } else { 264 | for (int i = 0; i < (nHeight + 1) / 2; i++) { 265 | memcpy(pQuad + ((nWidth + 1) / 2) * i, puv + ((nPitch + 1) / 2) * i, ((nWidth + 1) / 2) * sizeof(T)); 266 | } 267 | } 268 | T *pv = puv + nSizePlaneU; 269 | for (int y = 0; y < (nHeight + 1) / 2; y++) { 270 | for (int x = 0; x < (nWidth + 1) / 2; x++) { 271 | puv[y * nPitch + x * 2] = pQuad[y * ((nWidth + 1) / 2) + x]; 272 | puv[y * nPitch + x * 2 + 1] = pv[y * ((nPitch + 1) / 2) + x]; 273 | } 274 | } 275 | } 276 | void UVInterleavedToPlanar(T *pFrame, int nPitch = 0) { 277 | if (nPitch == 0) { 278 | nPitch = nWidth; 279 | } 280 | 281 | // sizes of source surface plane 282 | int nSizePlaneY = nPitch * nHeight; 283 | int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2); 284 | int nSizePlaneV = nSizePlaneU; 285 | 286 | T *puv = pFrame + nSizePlaneY, 287 | *pu = puv, 288 | *pv = puv + nSizePlaneU; 289 | 290 | // split chroma from interleave to planar 291 | for (int y = 0; y < (nHeight + 1) / 2; y++) { 292 | for (int x = 0; x < (nWidth + 1) / 2; x++) { 293 | pu[y * ((nPitch + 1) / 2) + x] = puv[y * nPitch + x * 2]; 294 | pQuad[y * ((nWidth + 1) / 2) + x] = puv[y * nPitch + x * 2 + 1]; 295 | } 296 | } 297 | if (nPitch == nWidth) { 298 | memcpy(pv, pQuad, nSizePlaneV * sizeof(T)); 299 | } else { 300 | for (int i = 0; i < (nHeight + 1) / 2; i++) { 301 | memcpy(pv + ((nPitch + 1) / 2) * i, pQuad + ((nWidth + 1) / 2) * i, ((nWidth + 1) / 2) * sizeof(T)); 302 | } 303 | } 304 | } 305 | 306 | private: 307 | T *pQuad; 308 | int nWidth, nHeight; 309 | }; 310 | 311 | /** 312 | * @brief Utility class to measure elapsed time in seconds between the block of executed code 313 | */ 314 | class StopWatch { 315 | public: 316 | void Start() { 317 | t0 = std::chrono::high_resolution_clock::now(); 318 | } 319 | double Stop() { 320 | return std::chrono::duration_cast(std::chrono::high_resolution_clock::now().time_since_epoch() - t0.time_since_epoch()).count() / 1.0e9; 321 | } 322 | 323 | private: 324 | std::chrono::high_resolution_clock::time_point t0; 325 | }; 326 | 327 | template 328 | class ConcurrentQueue 329 | { 330 | public: 331 | 332 | ConcurrentQueue() {} 333 | ConcurrentQueue(size_t size) : maxSize(size) {} 334 | ConcurrentQueue(const ConcurrentQueue&) = delete; 335 | ConcurrentQueue& operator=(const ConcurrentQueue&) = delete; 336 | 337 | void setSize(size_t s) { 338 | maxSize = s; 339 | } 340 | 341 | void push_back(const T& value) { 342 | // Do not use a std::lock_guard here. We will need to explicitly 343 | // unlock before notify_one as the other waiting thread will 344 | // automatically try to acquire mutex once it wakes up 345 | // (which will happen on notify_one) 346 | std::unique_lock lock(m_mutex); 347 | auto wasEmpty = m_List.empty(); 348 | 349 | while (full()) { 350 | m_cond.wait(lock); 351 | } 352 | 353 | m_List.push_back(value); 354 | if (wasEmpty && !m_List.empty()) { 355 | lock.unlock(); 356 | m_cond.notify_one(); 357 | } 358 | } 359 | 360 | T pop_front() { 361 | std::unique_lock lock(m_mutex); 362 | 363 | while (m_List.empty()) { 364 | m_cond.wait(lock); 365 | } 366 | auto wasFull = full(); 367 | T data = std::move(m_List.front()); 368 | m_List.pop_front(); 369 | 370 | if (wasFull && !full()) { 371 | lock.unlock(); 372 | m_cond.notify_one(); 373 | } 374 | 375 | return data; 376 | } 377 | 378 | T front() { 379 | std::unique_lock lock(m_mutex); 380 | 381 | while (m_List.empty()) { 382 | m_cond.wait(lock); 383 | } 384 | 385 | return m_List.front(); 386 | } 387 | 388 | size_t size() { 389 | std::unique_lock lock(m_mutex); 390 | return m_List.size(); 391 | } 392 | 393 | bool empty() { 394 | std::unique_lock lock(m_mutex); 395 | return m_List.empty(); 396 | } 397 | void clear() { 398 | std::unique_lock lock(m_mutex); 399 | m_List.clear(); 400 | } 401 | 402 | private: 403 | bool full() { 404 | if (m_List.size() == maxSize) 405 | return true; 406 | return false; 407 | } 408 | 409 | private: 410 | std::list m_List; 411 | std::mutex m_mutex; 412 | std::condition_variable m_cond; 413 | size_t maxSize; 414 | }; 415 | 416 | inline void CheckInputFile(const char *szInFilePath) { 417 | std::ifstream fpIn(szInFilePath, std::ios::in | std::ios::binary); 418 | if (fpIn.fail()) { 419 | std::ostringstream err; 420 | err << "Unable to open input file: " << szInFilePath << std::endl; 421 | throw std::invalid_argument(err.str()); 422 | } 423 | } 424 | 425 | inline void ValidateResolution(int nWidth, int nHeight) { 426 | 427 | if (nWidth <= 0 || nHeight <= 0) { 428 | std::ostringstream err; 429 | err << "Please specify positive non zero resolution as -s WxH. Current resolution is " << nWidth << "x" << nHeight << std::endl; 430 | throw std::invalid_argument(err.str()); 431 | } 432 | } 433 | 434 | template 435 | void Nv12ToColor32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0); 436 | template 437 | void Nv12ToColor64(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0); 438 | 439 | template 440 | void P016ToColor32(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4); 441 | template 442 | void P016ToColor64(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4); 443 | 444 | template 445 | void YUV444ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0); 446 | template 447 | void YUV444ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0); 448 | 449 | template 450 | void YUV444P16ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4); 451 | template 452 | void YUV444P16ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4); 453 | 454 | template 455 | void Nv12ToColorPlanar(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix = 0); 456 | template 457 | void P016ToColorPlanar(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix = 4); 458 | 459 | template 460 | void YUV444ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix = 0); 461 | template 462 | void YUV444P16ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix = 4); 463 | 464 | void Bgra64ToP016(uint8_t *dpBgra, int nBgraPitch, uint8_t *dpP016, int nP016Pitch, int nWidth, int nHeight, int iMatrix = 4); 465 | 466 | void ConvertUInt8ToUInt16(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight); 467 | void ConvertUInt16ToUInt8(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight); 468 | 469 | void ResizeNv12(unsigned char *dpDstNv12, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcNv12, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char *dpDstNv12UV = nullptr); 470 | void ResizeP016(unsigned char *dpDstP016, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcP016, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char *dpDstP016UV = nullptr); 471 | 472 | void ScaleYUV420(unsigned char *dpDstY, unsigned char* dpDstU, unsigned char* dpDstV, int nDstPitch, int nDstChromaPitch, int nDstWidth, int nDstHeight, 473 | unsigned char *dpSrcY, unsigned char* dpSrcU, unsigned char* dpSrcV, int nSrcPitch, int nSrcChromaPitch, int nSrcWidth, int nSrcHeight, bool bSemiplanar); 474 | 475 | #ifdef __cuda_cuda_h__ 476 | void ComputeCRC(uint8_t *pBuffer, uint32_t *crcValue, CUstream_st *outputCUStream); 477 | #endif 478 | -------------------------------------------------------------------------------- /src/cuvid/Utils/Resize.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017-2020 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #include 13 | #include "NvCodecUtils.h" 14 | 15 | template 16 | static __global__ void Resize(cudaTextureObject_t texY, cudaTextureObject_t texUv, 17 | uint8_t *pDst, uint8_t *pDstUV, int nPitch, int nWidth, int nHeight, 18 | float fxScale, float fyScale) 19 | { 20 | int ix = blockIdx.x * blockDim.x + threadIdx.x, 21 | iy = blockIdx.y * blockDim.y + threadIdx.y; 22 | 23 | if (ix >= nWidth / 2 || iy >= nHeight / 2) { 24 | return; 25 | } 26 | 27 | int x = ix * 2, y = iy * 2; 28 | typedef decltype(YuvUnitx2::x) YuvUnit; 29 | const int MAX = (1 << (sizeof(YuvUnit) * 8)) - 1; 30 | *(YuvUnitx2 *)(pDst + y * nPitch + x * sizeof(YuvUnit)) = YuvUnitx2 { 31 | (YuvUnit)(tex2D(texY, x / fxScale, y / fyScale) * MAX), 32 | (YuvUnit)(tex2D(texY, (x + 1) / fxScale, y / fyScale) * MAX) 33 | }; 34 | y++; 35 | *(YuvUnitx2 *)(pDst + y * nPitch + x * sizeof(YuvUnit)) = YuvUnitx2 { 36 | (YuvUnit)(tex2D(texY, x / fxScale, y / fyScale) * MAX), 37 | (YuvUnit)(tex2D(texY, (x + 1) / fxScale, y / fyScale) * MAX) 38 | }; 39 | float2 uv = tex2D(texUv, ix / fxScale, (nHeight + iy) / fyScale + 0.5f); 40 | *(YuvUnitx2 *)(pDstUV + iy * nPitch + ix * 2 * sizeof(YuvUnit)) = YuvUnitx2{ (YuvUnit)(uv.x * MAX), (YuvUnit)(uv.y * MAX) }; 41 | } 42 | 43 | template 44 | static void Resize(unsigned char *dpDst, unsigned char* dpDstUV, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrc, int nSrcPitch, int nSrcWidth, int nSrcHeight) { 45 | cudaResourceDesc resDesc = {}; 46 | resDesc.resType = cudaResourceTypePitch2D; 47 | resDesc.res.pitch2D.devPtr = dpSrc; 48 | resDesc.res.pitch2D.desc = cudaCreateChannelDesc(); 49 | resDesc.res.pitch2D.width = nSrcWidth; 50 | resDesc.res.pitch2D.height = nSrcHeight; 51 | resDesc.res.pitch2D.pitchInBytes = nSrcPitch; 52 | 53 | cudaTextureDesc texDesc = {}; 54 | texDesc.filterMode = cudaFilterModeLinear; 55 | texDesc.readMode = cudaReadModeNormalizedFloat; 56 | 57 | cudaTextureObject_t texY=0; 58 | ck(cudaCreateTextureObject(&texY, &resDesc, &texDesc, NULL)); 59 | 60 | resDesc.res.pitch2D.desc = cudaCreateChannelDesc(); 61 | resDesc.res.pitch2D.width = nSrcWidth / 2; 62 | resDesc.res.pitch2D.height = nSrcHeight * 3 / 2; 63 | 64 | cudaTextureObject_t texUv=0; 65 | ck(cudaCreateTextureObject(&texUv, &resDesc, &texDesc, NULL)); 66 | 67 | Resize << > >(texY, texUv, dpDst, dpDstUV, 68 | nDstPitch, nDstWidth, nDstHeight, 1.0f * nDstWidth / nSrcWidth, 1.0f * nDstHeight / nSrcHeight); 69 | 70 | ck(cudaDestroyTextureObject(texY)); 71 | ck(cudaDestroyTextureObject(texUv)); 72 | } 73 | 74 | void ResizeNv12(unsigned char *dpDstNv12, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcNv12, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char* dpDstNv12UV) 75 | { 76 | unsigned char* dpDstUV = dpDstNv12UV ? dpDstNv12UV : dpDstNv12 + (nDstPitch*nDstHeight); 77 | return Resize(dpDstNv12, dpDstUV, nDstPitch, nDstWidth, nDstHeight, dpSrcNv12, nSrcPitch, nSrcWidth, nSrcHeight); 78 | } 79 | 80 | 81 | void ResizeP016(unsigned char *dpDstP016, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcP016, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char* dpDstP016UV) 82 | { 83 | unsigned char* dpDstUV = dpDstP016UV ? dpDstP016UV : dpDstP016 + (nDstPitch*nDstHeight); 84 | return Resize(dpDstP016, dpDstUV, nDstPitch, nDstWidth, nDstHeight, dpSrcP016, nSrcPitch, nSrcWidth, nSrcHeight); 85 | } 86 | 87 | static __global__ void Scale(cudaTextureObject_t texSrc, 88 | uint8_t *pDst, int nPitch, int nWidth, int nHeight, 89 | float fxScale, float fyScale) 90 | { 91 | int x = blockIdx.x * blockDim.x + threadIdx.x, 92 | y = blockIdx.y * blockDim.y + threadIdx.y; 93 | 94 | if (x >= nWidth || y >= nHeight) 95 | { 96 | return; 97 | } 98 | 99 | *(unsigned char*)(pDst + (y * nPitch) + x) = (unsigned char)(fminf((tex2D(texSrc, x * fxScale, y * fyScale)) * 255.0f, 255.0f)); 100 | } 101 | 102 | static __global__ void Scale_uv(cudaTextureObject_t texSrc, 103 | uint8_t *pDst, int nPitch, int nWidth, int nHeight, 104 | float fxScale, float fyScale) 105 | { 106 | int x = blockIdx.x * blockDim.x + threadIdx.x, 107 | y = blockIdx.y * blockDim.y + threadIdx.y; 108 | 109 | if (x >= nWidth || y >= nHeight) 110 | { 111 | return; 112 | } 113 | 114 | float2 uv = tex2D(texSrc, x * fxScale, y * fyScale); 115 | uchar2 uvOut = uchar2{ (unsigned char)(fminf(uv.x * 255.0f, 255.0f)), (unsigned char)(fminf(uv.y * 255.0f, 255.0f)) }; 116 | 117 | *(uchar2*)(pDst + (y * nPitch) + 2 * x) = uvOut; 118 | } 119 | 120 | void ScaleKernelLaunch(unsigned char *dpDst, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrc, int nSrcPitch, int nSrcWidth, int nSrcHeight, bool bUVPlane = false) 121 | { 122 | cudaResourceDesc resDesc = {}; 123 | resDesc.resType = cudaResourceTypePitch2D; 124 | resDesc.res.pitch2D.devPtr = dpSrc; 125 | resDesc.res.pitch2D.desc = bUVPlane ? cudaCreateChannelDesc() : cudaCreateChannelDesc(); 126 | resDesc.res.pitch2D.width = nSrcWidth; 127 | resDesc.res.pitch2D.height = nSrcHeight; 128 | resDesc.res.pitch2D.pitchInBytes = nSrcPitch; 129 | 130 | cudaTextureDesc texDesc = {}; 131 | texDesc.filterMode = cudaFilterModeLinear; 132 | texDesc.readMode = cudaReadModeNormalizedFloat; 133 | 134 | texDesc.addressMode[0] = cudaAddressModeClamp; 135 | texDesc.addressMode[1] = cudaAddressModeClamp; 136 | texDesc.addressMode[2] = cudaAddressModeClamp; 137 | 138 | cudaTextureObject_t texSrc = 0; 139 | ck(cudaCreateTextureObject(&texSrc, &resDesc, &texDesc, NULL)); 140 | 141 | dim3 blockSize(16, 16, 1); 142 | dim3 gridSize(((uint32_t)nDstWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nDstHeight + blockSize.y - 1) / blockSize.y, 1); 143 | 144 | if (bUVPlane) 145 | { 146 | Scale_uv << > >(texSrc, dpDst, 147 | nDstPitch, nDstWidth, nDstHeight, 1.0f * nSrcWidth / nDstWidth, 1.0f * nSrcHeight / nDstHeight); 148 | } 149 | else 150 | { 151 | Scale << > >(texSrc, dpDst, 152 | nDstPitch, nDstWidth, nDstHeight, 1.0f * nSrcWidth / nDstWidth, 1.0f * nSrcHeight / nDstHeight); 153 | } 154 | 155 | ck(cudaGetLastError()); 156 | ck(cudaDestroyTextureObject(texSrc)); 157 | } 158 | 159 | void ScaleYUV420(unsigned char *dpDstY, 160 | unsigned char* dpDstU, 161 | unsigned char* dpDstV, 162 | int nDstPitch, 163 | int nDstChromaPitch, 164 | int nDstWidth, 165 | int nDstHeight, 166 | unsigned char *dpSrcY, 167 | unsigned char* dpSrcU, 168 | unsigned char* dpSrcV, 169 | int nSrcPitch, 170 | int nSrcChromaPitch, 171 | int nSrcWidth, 172 | int nSrcHeight, 173 | bool bSemiplanar) 174 | { 175 | int chromaWidthDst = (nDstWidth + 1) / 2; 176 | int chromaHeightDst = (nDstHeight + 1) / 2; 177 | 178 | int chromaWidthSrc = (nSrcWidth + 1) / 2; 179 | int chromaHeightSrc = (nSrcHeight + 1) / 2; 180 | 181 | ScaleKernelLaunch(dpDstY, nDstPitch, nDstWidth, nDstHeight, dpSrcY, nSrcPitch, nSrcWidth, nSrcHeight); 182 | 183 | if (bSemiplanar) 184 | { 185 | ScaleKernelLaunch(dpDstU, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcU, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc, true); 186 | } 187 | else 188 | { 189 | ScaleKernelLaunch(dpDstU, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcU, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc); 190 | ScaleKernelLaunch(dpDstV, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcV, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc); 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /src/cuvid/Utils/crc.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2018-2020 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #include 13 | #include "NvCodecUtils.h" 14 | 15 | /* 16 | * CRC32 lookup table 17 | * Generated by the following routine 18 | * int i, j; 19 | * U032 crc; 20 | * for (i = 0; i < 256; i++) 21 | * { 22 | * crc = i; 23 | * for (j = 0; j < 8; j++) { // 8 reduction 24 | * crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320L : 0); 25 | * } 26 | * Crc32Table[i] = crc; 27 | * } 28 | */ 29 | __device__ __constant__ uint32_t Crc32Table[256] = { 30 | 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 31 | 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, 32 | 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 33 | 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 34 | 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 35 | 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 36 | 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 37 | 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, 38 | 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 39 | 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 40 | 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 41 | 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 42 | 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 43 | 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, 44 | 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 45 | 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 46 | 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 47 | 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, 48 | 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 49 | 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, 50 | 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 51 | 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 52 | 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 53 | 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, 54 | 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 55 | 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 56 | 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 57 | 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 58 | 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 59 | 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, 60 | 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 61 | 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 62 | 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 63 | 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 64 | 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 65 | 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 66 | 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 67 | 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, 68 | 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 69 | 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 70 | 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 71 | 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 72 | 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 73 | 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, 74 | 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 75 | 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 76 | 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 77 | 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, 78 | 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 79 | 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, 80 | 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 81 | 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 82 | 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 83 | 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, 84 | 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 85 | 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 86 | 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 87 | 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 88 | 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 89 | 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, 90 | 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 91 | 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, 92 | 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 93 | 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d 94 | }; 95 | 96 | typedef struct _NV_ENC_ENCODE_OUT_PARAMS 97 | { 98 | uint32_t version; /**< [out]: Struct version. */ 99 | uint32_t bitstreamSizeInBytes; /**< [out]: Encoded bitstream size in bytes */ 100 | uint32_t cycleCount; /**< [out]: Cycle count */ 101 | uint32_t firstPassCycleCount; /**< [out]: First pass cycle count */ 102 | uint32_t reserved[60]; /**< [out]: Reserved and must be set to 0 */ 103 | } NV_ENC_ENCODE_OUT_PARAMS; 104 | 105 | static __global__ void ComputeCRCKernel(uint8_t *pBuffer, uint32_t *crcValue) 106 | { 107 | NV_ENC_ENCODE_OUT_PARAMS *outParams = (NV_ENC_ENCODE_OUT_PARAMS *)pBuffer; 108 | uint32_t bitstreamSize = outParams->bitstreamSizeInBytes; 109 | uint8_t *pEncStream = pBuffer + sizeof(NV_ENC_ENCODE_OUT_PARAMS); 110 | uint32_t crc=~0; 111 | 112 | for(uint32_t i = 0; i < bitstreamSize; i++) 113 | { 114 | crc = (crc >> 8) ^ Crc32Table[((uint8_t)(crc)) ^ (*pEncStream++)]; 115 | } 116 | 117 | *crcValue = ~crc; 118 | } 119 | 120 | void ComputeCRC(uint8_t *pBuffer, uint32_t *crcValue, cudaStream_t outputCUStream) 121 | { 122 | dim3 blockSize(1, 1, 1); 123 | dim3 gridSize(1, 1, 1); 124 | 125 | ComputeCRCKernel <<>>(pBuffer, crcValue); 126 | } 127 | -------------------------------------------------------------------------------- /src/cuvid/nvcuvid.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This copyright notice applies to this header file only: 3 | * 4 | * Copyright (c) 2010-2020 NVIDIA Corporation 5 | * 6 | * Permission is hereby granted, free of charge, to any person 7 | * obtaining a copy of this software and associated documentation 8 | * files (the "Software"), to deal in the Software without 9 | * restriction, including without limitation the rights to use, 10 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the software, and to permit persons to whom the 12 | * software is furnished to do so, subject to the following 13 | * conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be 16 | * included in all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 20 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 22 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 | * OTHER DEALINGS IN THE SOFTWARE. 26 | */ 27 | 28 | /********************************************************************************************************************/ 29 | //! \file nvcuvid.h 30 | //! NVDECODE API provides video decoding interface to NVIDIA GPU devices. 31 | //! \date 2015-2020 32 | //! This file contains the interface constants, structure definitions and function prototypes. 33 | /********************************************************************************************************************/ 34 | 35 | #if !defined(__NVCUVID_H__) 36 | #define __NVCUVID_H__ 37 | 38 | #include "cuviddec.h" 39 | 40 | #if defined(__cplusplus) 41 | extern "C" { 42 | #endif /* __cplusplus */ 43 | 44 | 45 | /***********************************************/ 46 | //! 47 | //! High-level helper APIs for video sources 48 | //! 49 | /***********************************************/ 50 | 51 | typedef void *CUvideosource; 52 | typedef void *CUvideoparser; 53 | typedef long long CUvideotimestamp; 54 | 55 | 56 | /************************************************************************/ 57 | //! \enum cudaVideoState 58 | //! Video source state enums 59 | //! Used in cuvidSetVideoSourceState and cuvidGetVideoSourceState APIs 60 | /************************************************************************/ 61 | typedef enum { 62 | cudaVideoState_Error = -1, /**< Error state (invalid source) */ 63 | cudaVideoState_Stopped = 0, /**< Source is stopped (or reached end-of-stream) */ 64 | cudaVideoState_Started = 1 /**< Source is running and delivering data */ 65 | } cudaVideoState; 66 | 67 | /************************************************************************/ 68 | //! \enum cudaAudioCodec 69 | //! Audio compression enums 70 | //! Used in CUAUDIOFORMAT structure 71 | /************************************************************************/ 72 | typedef enum { 73 | cudaAudioCodec_MPEG1=0, /**< MPEG-1 Audio */ 74 | cudaAudioCodec_MPEG2, /**< MPEG-2 Audio */ 75 | cudaAudioCodec_MP3, /**< MPEG-1 Layer III Audio */ 76 | cudaAudioCodec_AC3, /**< Dolby Digital (AC3) Audio */ 77 | cudaAudioCodec_LPCM, /**< PCM Audio */ 78 | cudaAudioCodec_AAC, /**< AAC Audio */ 79 | } cudaAudioCodec; 80 | 81 | /************************************************************************************************/ 82 | //! \ingroup STRUCTS 83 | //! \struct CUVIDEOFORMAT 84 | //! Video format 85 | //! Used in cuvidGetSourceVideoFormat API 86 | /************************************************************************************************/ 87 | typedef struct 88 | { 89 | cudaVideoCodec codec; /**< OUT: Compression format */ 90 | /** 91 | * OUT: frame rate = numerator / denominator (for example: 30000/1001) 92 | */ 93 | struct { 94 | /**< OUT: frame rate numerator (0 = unspecified or variable frame rate) */ 95 | unsigned int numerator; 96 | /**< OUT: frame rate denominator (0 = unspecified or variable frame rate) */ 97 | unsigned int denominator; 98 | } frame_rate; 99 | unsigned char progressive_sequence; /**< OUT: 0=interlaced, 1=progressive */ 100 | unsigned char bit_depth_luma_minus8; /**< OUT: high bit depth luma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */ 101 | unsigned char bit_depth_chroma_minus8; /**< OUT: high bit depth chroma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */ 102 | unsigned char min_num_decode_surfaces; /**< OUT: Minimum number of decode surfaces to be allocated for correct 103 | decoding. The client can send this value in ulNumDecodeSurfaces 104 | (in CUVIDDECODECREATEINFO structure). 105 | This guarantees correct functionality and optimal video memory 106 | usage but not necessarily the best performance, which depends on 107 | the design of the overall application. The optimal number of 108 | decode surfaces (in terms of performance and memory utilization) 109 | should be decided by experimentation for each application, but it 110 | cannot go below min_num_decode_surfaces. 111 | If this value is used for ulNumDecodeSurfaces then it must be 112 | returned to parser during sequence callback. */ 113 | unsigned int coded_width; /**< OUT: coded frame width in pixels */ 114 | unsigned int coded_height; /**< OUT: coded frame height in pixels */ 115 | /** 116 | * area of the frame that should be displayed 117 | * typical example: 118 | * coded_width = 1920, coded_height = 1088 119 | * display_area = { 0,0,1920,1080 } 120 | */ 121 | struct { 122 | int left; /**< OUT: left position of display rect */ 123 | int top; /**< OUT: top position of display rect */ 124 | int right; /**< OUT: right position of display rect */ 125 | int bottom; /**< OUT: bottom position of display rect */ 126 | } display_area; 127 | cudaVideoChromaFormat chroma_format; /**< OUT: Chroma format */ 128 | unsigned int bitrate; /**< OUT: video bitrate (bps, 0=unknown) */ 129 | /** 130 | * OUT: Display Aspect Ratio = x:y (4:3, 16:9, etc) 131 | */ 132 | struct { 133 | int x; 134 | int y; 135 | } display_aspect_ratio; 136 | /** 137 | * Video Signal Description 138 | * Refer section E.2.1 (VUI parameters semantics) of H264 spec file 139 | */ 140 | struct { 141 | unsigned char video_format : 3; /**< OUT: 0-Component, 1-PAL, 2-NTSC, 3-SECAM, 4-MAC, 5-Unspecified */ 142 | unsigned char video_full_range_flag : 1; /**< OUT: indicates the black level and luma and chroma range */ 143 | unsigned char reserved_zero_bits : 4; /**< Reserved bits */ 144 | unsigned char color_primaries; /**< OUT: chromaticity coordinates of source primaries */ 145 | unsigned char transfer_characteristics; /**< OUT: opto-electronic transfer characteristic of the source picture */ 146 | unsigned char matrix_coefficients; /**< OUT: used in deriving luma and chroma signals from RGB primaries */ 147 | } video_signal_description; 148 | unsigned int seqhdr_data_length; /**< OUT: Additional bytes following (CUVIDEOFORMATEX) */ 149 | } CUVIDEOFORMAT; 150 | 151 | /****************************************************************/ 152 | //! \ingroup STRUCTS 153 | //! \struct CUVIDOPERATINGPOINTINFO 154 | //! Operating point information of scalable bitstream 155 | /****************************************************************/ 156 | typedef struct 157 | { 158 | cudaVideoCodec codec; 159 | union 160 | { 161 | struct 162 | { 163 | unsigned char operating_points_cnt; 164 | unsigned char reserved24_bits[3]; 165 | unsigned short operating_points_idc[32]; 166 | } av1; 167 | unsigned char CodecReserved[1024]; 168 | }; 169 | } CUVIDOPERATINGPOINTINFO; 170 | 171 | /****************************************************************/ 172 | //! \ingroup STRUCTS 173 | //! \struct CUVIDAV1SEQHDR 174 | //! AV1 specific sequence header information 175 | /****************************************************************/ 176 | typedef struct { 177 | unsigned int max_width; 178 | unsigned int max_height; 179 | unsigned char reserved[1016]; 180 | } CUVIDAV1SEQHDR; 181 | 182 | /****************************************************************/ 183 | //! \ingroup STRUCTS 184 | //! \struct CUVIDEOFORMATEX 185 | //! Video format including raw sequence header information 186 | //! Used in cuvidGetSourceVideoFormat API 187 | /****************************************************************/ 188 | typedef struct 189 | { 190 | CUVIDEOFORMAT format; /**< OUT: CUVIDEOFORMAT structure */ 191 | union { 192 | CUVIDAV1SEQHDR av1; 193 | unsigned char raw_seqhdr_data[1024]; /**< OUT: Sequence header data */ 194 | }; 195 | } CUVIDEOFORMATEX; 196 | 197 | /****************************************************************/ 198 | //! \ingroup STRUCTS 199 | //! \struct CUAUDIOFORMAT 200 | //! Audio formats 201 | //! Used in cuvidGetSourceAudioFormat API 202 | /****************************************************************/ 203 | typedef struct 204 | { 205 | cudaAudioCodec codec; /**< OUT: Compression format */ 206 | unsigned int channels; /**< OUT: number of audio channels */ 207 | unsigned int samplespersec; /**< OUT: sampling frequency */ 208 | unsigned int bitrate; /**< OUT: For uncompressed, can also be used to determine bits per sample */ 209 | unsigned int reserved1; /**< Reserved for future use */ 210 | unsigned int reserved2; /**< Reserved for future use */ 211 | } CUAUDIOFORMAT; 212 | 213 | 214 | /***************************************************************/ 215 | //! \enum CUvideopacketflags 216 | //! Data packet flags 217 | //! Used in CUVIDSOURCEDATAPACKET structure 218 | /***************************************************************/ 219 | typedef enum { 220 | CUVID_PKT_ENDOFSTREAM = 0x01, /**< Set when this is the last packet for this stream */ 221 | CUVID_PKT_TIMESTAMP = 0x02, /**< Timestamp is valid */ 222 | CUVID_PKT_DISCONTINUITY = 0x04, /**< Set when a discontinuity has to be signalled */ 223 | CUVID_PKT_ENDOFPICTURE = 0x08, /**< Set when the packet contains exactly one frame or one field */ 224 | CUVID_PKT_NOTIFY_EOS = 0x10, /**< If this flag is set along with CUVID_PKT_ENDOFSTREAM, an additional (dummy) 225 | display callback will be invoked with null value of CUVIDPARSERDISPINFO which 226 | should be interpreted as end of the stream. */ 227 | } CUvideopacketflags; 228 | 229 | /*****************************************************************************/ 230 | //! \ingroup STRUCTS 231 | //! \struct CUVIDSOURCEDATAPACKET 232 | //! Data Packet 233 | //! Used in cuvidParseVideoData API 234 | //! IN for cuvidParseVideoData 235 | /*****************************************************************************/ 236 | typedef struct _CUVIDSOURCEDATAPACKET 237 | { 238 | unsigned long flags; /**< IN: Combination of CUVID_PKT_XXX flags */ 239 | unsigned long payload_size; /**< IN: number of bytes in the payload (may be zero if EOS flag is set) */ 240 | const unsigned char *payload; /**< IN: Pointer to packet payload data (may be NULL if EOS flag is set) */ 241 | CUvideotimestamp timestamp; /**< IN: Presentation time stamp (10MHz clock), only valid if 242 | CUVID_PKT_TIMESTAMP flag is set */ 243 | } CUVIDSOURCEDATAPACKET; 244 | 245 | // Callback for packet delivery 246 | typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *); 247 | 248 | /**************************************************************************************************************************/ 249 | //! \ingroup STRUCTS 250 | //! \struct CUVIDSOURCEPARAMS 251 | //! Describes parameters needed in cuvidCreateVideoSource API 252 | //! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported 253 | //! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed. 254 | /**************************************************************************************************************************/ 255 | typedef struct _CUVIDSOURCEPARAMS 256 | { 257 | unsigned int ulClockRate; /**< IN: Time stamp units in Hz (0=default=10000000Hz) */ 258 | unsigned int bAnnexb : 1; /**< IN: AV1 annexB stream */ 259 | unsigned int uReserved : 31; /**< Reserved for future use - set to zero */ 260 | unsigned int uReserved1[6]; /**< Reserved for future use - set to zero */ 261 | void *pUserData; /**< IN: User private data passed in to the data handlers */ 262 | PFNVIDSOURCECALLBACK pfnVideoDataHandler; /**< IN: Called to deliver video packets */ 263 | PFNVIDSOURCECALLBACK pfnAudioDataHandler; /**< IN: Called to deliver audio packets. */ 264 | void *pvReserved2[8]; /**< Reserved for future use - set to NULL */ 265 | } CUVIDSOURCEPARAMS; 266 | 267 | 268 | /**********************************************/ 269 | //! \ingroup ENUMS 270 | //! \enum CUvideosourceformat_flags 271 | //! CUvideosourceformat_flags 272 | //! Used in cuvidGetSourceVideoFormat API 273 | /**********************************************/ 274 | typedef enum { 275 | CUVID_FMT_EXTFORMATINFO = 0x100 /**< Return extended format structure (CUVIDEOFORMATEX) */ 276 | } CUvideosourceformat_flags; 277 | 278 | #if !defined(__APPLE__) 279 | /***************************************************************************************************************************/ 280 | //! \ingroup FUNCTS 281 | //! \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams) 282 | //! Create CUvideosource object. CUvideosource spawns demultiplexer thread that provides two callbacks: 283 | //! pfnVideoDataHandler() and pfnAudioDataHandler() 284 | //! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported 285 | //! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed. 286 | /***************************************************************************************************************************/ 287 | CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams); 288 | 289 | /***************************************************************************************************************************/ 290 | //! \ingroup FUNCTS 291 | //! \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams) 292 | //! Create video source 293 | /***************************************************************************************************************************/ 294 | CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams); 295 | 296 | /********************************************************************/ 297 | //! \ingroup FUNCTS 298 | //! \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj) 299 | //! Destroy video source 300 | /********************************************************************/ 301 | CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj); 302 | 303 | /******************************************************************************************/ 304 | //! \ingroup FUNCTS 305 | //! \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state) 306 | //! Set video source state to: 307 | //! cudaVideoState_Started - to signal the source to run and deliver data 308 | //! cudaVideoState_Stopped - to stop the source from delivering the data 309 | //! cudaVideoState_Error - invalid source 310 | /******************************************************************************************/ 311 | CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state); 312 | 313 | /******************************************************************************************/ 314 | //! \ingroup FUNCTS 315 | //! \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj) 316 | //! Get video source state 317 | //! Returns: 318 | //! cudaVideoState_Started - if Source is running and delivering data 319 | //! cudaVideoState_Stopped - if Source is stopped or reached end-of-stream 320 | //! cudaVideoState_Error - if Source is in error state 321 | /******************************************************************************************/ 322 | cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj); 323 | 324 | /******************************************************************************************************************/ 325 | //! \ingroup FUNCTS 326 | //! \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags) 327 | //! Gets video source format in pvidfmt, flags is set to combination of CUvideosourceformat_flags as per requirement 328 | /******************************************************************************************************************/ 329 | CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags); 330 | 331 | /**************************************************************************************************************************/ 332 | //! \ingroup FUNCTS 333 | //! \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags) 334 | //! Get audio source format 335 | //! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported 336 | //! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed. 337 | /**************************************************************************************************************************/ 338 | CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags); 339 | 340 | #endif 341 | /**********************************************************************************/ 342 | //! \ingroup STRUCTS 343 | //! \struct CUVIDPARSERDISPINFO 344 | //! Used in cuvidParseVideoData API with PFNVIDDISPLAYCALLBACK pfnDisplayPicture 345 | /**********************************************************************************/ 346 | typedef struct _CUVIDPARSERDISPINFO 347 | { 348 | int picture_index; /**< OUT: Index of the current picture */ 349 | int progressive_frame; /**< OUT: 1 if progressive frame; 0 otherwise */ 350 | int top_field_first; /**< OUT: 1 if top field is displayed first; 0 otherwise */ 351 | int repeat_first_field; /**< OUT: Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling, 352 | -1=unpaired field) */ 353 | CUvideotimestamp timestamp; /**< OUT: Presentation time stamp */ 354 | } CUVIDPARSERDISPINFO; 355 | 356 | /***********************************************************************************************************************/ 357 | //! Parser callbacks 358 | //! The parser will call these synchronously from within cuvidParseVideoData(), whenever there is sequence change or a picture 359 | //! is ready to be decoded and/or displayed. First argument in functions is "void *pUserData" member of structure CUVIDSOURCEPARAMS 360 | //! Return values from these callbacks are interpreted as below. If the callbacks return failure, it will be propagated by 361 | //! cuvidParseVideoData() to the application. 362 | //! Parser picks default operating point as 0 and outputAllLayers flag as 0 if PFNVIDOPPOINTCALLBACK is not set or return value is 363 | //! -1 or invalid operating point. 364 | //! PFNVIDSEQUENCECALLBACK : 0: fail, 1: succeeded, > 1: override dpb size of parser (set by CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces 365 | //! while creating parser) 366 | //! PFNVIDDECODECALLBACK : 0: fail, >=1: succeeded 367 | //! PFNVIDDISPLAYCALLBACK : 0: fail, >=1: succeeded 368 | //! PFNVIDOPPOINTCALLBACK : <0: fail, >=0: succeeded (bit 0-9: OperatingPoint, bit 10-10: outputAllLayers, bit 11-30: reserved) 369 | /***********************************************************************************************************************/ 370 | typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *); 371 | typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *); 372 | typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *); 373 | typedef int (CUDAAPI *PFNVIDOPPOINTCALLBACK)(void *, CUVIDOPERATINGPOINTINFO*); 374 | 375 | /**************************************/ 376 | //! \ingroup STRUCTS 377 | //! \struct CUVIDPARSERPARAMS 378 | //! Used in cuvidCreateVideoParser API 379 | /**************************************/ 380 | typedef struct _CUVIDPARSERPARAMS 381 | { 382 | cudaVideoCodec CodecType; /**< IN: cudaVideoCodec_XXX */ 383 | unsigned int ulMaxNumDecodeSurfaces; /**< IN: Max # of decode surfaces (parser will cycle through these) */ 384 | unsigned int ulClockRate; /**< IN: Timestamp units in Hz (0=default=10000000Hz) */ 385 | unsigned int ulErrorThreshold; /**< IN: % Error threshold (0-100) for calling pfnDecodePicture (100=always 386 | IN: call pfnDecodePicture even if picture bitstream is fully corrupted) */ 387 | unsigned int ulMaxDisplayDelay; /**< IN: Max display queue delay (improves pipelining of decode with display) 388 | 0=no delay (recommended values: 2..4) */ 389 | unsigned int bAnnexb : 1; /**< IN: AV1 annexB stream */ 390 | unsigned int uReserved : 31; /**< Reserved for future use - set to zero */ 391 | unsigned int uReserved1[4]; /**< IN: Reserved for future use - set to 0 */ 392 | void *pUserData; /**< IN: User data for callbacks */ 393 | PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< IN: Called before decoding frames and/or whenever there is a fmt change */ 394 | PFNVIDDECODECALLBACK pfnDecodePicture; /**< IN: Called when a picture is ready to be decoded (decode order) */ 395 | PFNVIDDISPLAYCALLBACK pfnDisplayPicture; /**< IN: Called whenever a picture is ready to be displayed (display order) */ 396 | PFNVIDOPPOINTCALLBACK pfnGetOperatingPoint; /**< IN: Called from AV1 sequence header to get operating point of a AV1 397 | scalable bitstream */ 398 | void *pvReserved2[6]; /**< Reserved for future use - set to NULL */ 399 | CUVIDEOFORMATEX *pExtVideoInfo; /**< IN: [Optional] sequence header data from system layer */ 400 | } CUVIDPARSERPARAMS; 401 | 402 | /************************************************************************************************/ 403 | //! \ingroup FUNCTS 404 | //! \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams) 405 | //! Create video parser object and initialize 406 | /************************************************************************************************/ 407 | CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams); 408 | 409 | /************************************************************************************************/ 410 | //! \ingroup FUNCTS 411 | //! \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket) 412 | //! Parse the video data from source data packet in pPacket 413 | //! Extracts parameter sets like SPS, PPS, bitstream etc. from pPacket and 414 | //! calls back pfnDecodePicture with CUVIDPICPARAMS data for kicking of HW decoding 415 | //! calls back pfnSequenceCallback with CUVIDEOFORMAT data for initial sequence header or when 416 | //! the decoder encounters a video format change 417 | //! calls back pfnDisplayPicture with CUVIDPARSERDISPINFO data to display a video frame 418 | /************************************************************************************************/ 419 | CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket); 420 | 421 | /************************************************************************************************/ 422 | //! \ingroup FUNCTS 423 | //! \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj) 424 | //! Destroy the video parser 425 | /************************************************************************************************/ 426 | CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj); 427 | 428 | /**********************************************************************************************/ 429 | 430 | #if defined(__cplusplus) 431 | } 432 | #endif /* __cplusplus */ 433 | 434 | #endif // __NVCUVID_H__ 435 | 436 | 437 | -------------------------------------------------------------------------------- /src/decoder.cpp: -------------------------------------------------------------------------------- 1 | #include "decoder.h" 2 | #include "cuvid/Utils/NvCodecUtils.h" 3 | #include "cuvid/Utils/FFmpegDemuxer.h" 4 | #include "cuvid/Utils/ColorSpace.h" 5 | #include "cuvid/AppDecUtils.h" 6 | #include 7 | #include "cuvid/Utils/Logger.h" 8 | #include 9 | #include "cuvid/NvDecoder/NvDecoder.h" 10 | #include 11 | 12 | #define DEC(handle) ((NvDecoder*)(handle->dec)) 13 | 14 | 15 | videoDecoderHandle videoDecoder_init(enum AVCodecID codec){ 16 | videoDecoderHandle handle = (videoDecoderHandle)malloc(sizeof(videoDecoder)); 17 | ck(cuInit(0)); 18 | handle->cuContext = nullptr; 19 | createCudaContext(&(handle->cuContext), 0, 0); 20 | handle->dec = (void*)(new NvDecoder(handle->cuContext, false, FFmpeg2NvCodecId(codec))); 21 | return handle; 22 | } 23 | 24 | int videoDecoder_destroy(videoDecoderHandle handle){ 25 | delete(handle->dec); 26 | cuCtxDestroy(handle->cuContext); 27 | handle->cuContext = nullptr; 28 | handle->dec = nullptr; 29 | return 0; 30 | } 31 | 32 | void videoFrameList_destory(videoFrameList** list){ 33 | if(*list != NULL){ 34 | if((*list)->pFrames != NULL){ 35 | free((*list)->pFrames); 36 | (*list)->pFrames = NULL; 37 | } 38 | free((*list)); 39 | *list = NULL; 40 | } 41 | } 42 | 43 | videoFrameList* videoFrameList_init(int width, int height, int length){ 44 | videoFrameList* frameList = (videoFrameList*)malloc(sizeof(videoFrameList)); 45 | frameList->height = height; 46 | frameList->width = width; 47 | frameList->length = length; 48 | frameList->perFrameSize = height*width*4; 49 | frameList->pFrames = (uint8_t*)malloc(frameList->height * frameList->width * 4 * frameList->length); 50 | return frameList; 51 | } 52 | 53 | 54 | videoFrameList* videoDecoder_decode(videoDecoderHandle handle, u_int8_t* in, size_t in_size, char*error){ 55 | int nFrameReturned; 56 | int i; 57 | int frameSize; 58 | uint8_t *pVideo = NULL, *pFrame; 59 | videoFrameList* frameList = NULL; 60 | CUdeviceptr dpFrame = 0, nv12Frame = 0; 61 | if(error!=NULL){ 62 | error[0] = NULL; 63 | } 64 | try{ 65 | nFrameReturned = DEC(handle)->Decode(in, in_size); 66 | }catch(NVDECException e){ 67 | if(error != NULL){ 68 | strcpy(error, e.what()); 69 | } 70 | return NULL; 71 | } 72 | for (i = 0; i < nFrameReturned; i++) { 73 | pFrame = DEC(handle)->GetFrame(); 74 | frameSize = DEC(handle)->GetFrameSize(); 75 | if(i == 0){ 76 | frameList = videoFrameList_init(DEC(handle)->GetWidth(), DEC(handle)->GetHeight(), nFrameReturned); 77 | ck(cuMemAlloc(&dpFrame, frameList->width * frameList->height * 4)); 78 | ck(cuMemAlloc(&nv12Frame, frameSize)); 79 | } 80 | cudaMemcpy((void*)nv12Frame, pFrame, frameSize, cudaMemcpyHostToDevice); 81 | Nv12ToColor32((uint8_t*)nv12Frame, frameList->width, (uint8_t *)dpFrame, 4 * frameList->width, frameList->width, frameList->height); 82 | int output_size = frameList->width * frameList->height * 4; 83 | cudaMemcpy((void*)(frameList->pFrames+i*frameList->perFrameSize), (uint8_t*)dpFrame, output_size, cudaMemcpyDeviceToHost); 84 | } 85 | cuMemFree(dpFrame); 86 | return frameList; 87 | } -------------------------------------------------------------------------------- /src/decoder.h: -------------------------------------------------------------------------------- 1 | #ifndef NVCODEC_PYTHON_VIDEO_DECODER_H 2 | #define NVCODEC_PYTHON_VIDEO_DECODER_H 3 | 4 | 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | #include 11 | #include "cuvid/nvcuvid.h" 12 | #include 13 | 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | 19 | typedef struct 20 | { 21 | CUcontext cuContext; 22 | void* dec; 23 | }videoDecoder; 24 | 25 | typedef videoDecoder* videoDecoderHandle; 26 | 27 | typedef struct 28 | { 29 | uint8_t* pFrames; 30 | int perFrameSize; 31 | int width; 32 | int height; 33 | int length; 34 | }videoFrameList; 35 | 36 | 37 | 38 | videoDecoderHandle videoDecoder_init(enum AVCodecID codec); 39 | int videoDecoder_destroy(videoDecoderHandle handle); 40 | videoFrameList* videoDecoder_decode(videoDecoderHandle handle, u_int8_t* in, size_t in_size, char*error); 41 | void videoFrameList_destory(videoFrameList**); 42 | videoFrameList* videoFrameList_init(int width, int height, int length); 43 | 44 | 45 | #endif -------------------------------------------------------------------------------- /src/encoder.cpp: -------------------------------------------------------------------------------- 1 | #include "encoder.h" 2 | #include "cuvid/NvEncoder/NvEncoderCuda.h" 3 | #include "cuvid/Utils/NvCodecUtils.h" 4 | #include "cuvid/Utils/FFmpegDemuxer.h" 5 | #include "cuvid/Utils/ColorSpace.h" 6 | #include "cuvid/AppDecUtils.h" 7 | #include "cuvid/Utils/NvEncoderCLIOptions.h" 8 | #include "cuvid/nvEncodeAPI.h" 9 | 10 | #define ENC(handle) ((NvEncoderCuda*)(handle->enc)) 11 | 12 | void _InitializeEncoder(NvEncoder* pEnc, NvEncoderInitParam encodeCLIOptions, NV_ENC_BUFFER_FORMAT eFormat) 13 | { 14 | NV_ENC_INITIALIZE_PARAMS initializeParams = { NV_ENC_INITIALIZE_PARAMS_VER }; 15 | NV_ENC_CONFIG encodeConfig = { NV_ENC_CONFIG_VER }; 16 | 17 | initializeParams.encodeConfig = &encodeConfig; 18 | 19 | pEnc->CreateDefaultEncoderParams(&initializeParams, encodeCLIOptions.GetEncodeGUID(), encodeCLIOptions.GetPresetGUID(), encodeCLIOptions.GetTuningInfo()); 20 | encodeCLIOptions.SetInitParams(&initializeParams, eFormat); 21 | 22 | pEnc->CreateEncoder(&initializeParams); 23 | } 24 | 25 | 26 | videoEncoderHandle videoEncoder_init(int width, int height){ 27 | videoEncoderHandle handle = (videoEncoderHandle)malloc(sizeof(videoEncoder)); 28 | ck(cuInit(0)); 29 | handle->cuContext = nullptr; 30 | createCudaContext(&(handle->cuContext), 0, 0); 31 | handle->enc = new NvEncoderCuda(handle->cuContext, width, height, NV_ENC_BUFFER_FORMAT_ARGB); 32 | 33 | NV_ENC_BUFFER_FORMAT eFormat = NV_ENC_BUFFER_FORMAT_ARGB; 34 | NvEncoderInitParam encodeCLIOptions; 35 | _InitializeEncoder(ENC(handle), encodeCLIOptions, eFormat); 36 | return handle; 37 | } 38 | 39 | int videoEncoder_destroy(videoEncoderHandle handle){ 40 | ENC(handle)->DestroyEncoder(); 41 | delete(ENC(handle)); 42 | cuCtxDestroy(handle->cuContext); 43 | handle->cuContext = nullptr; 44 | handle->enc = nullptr; 45 | return 0; 46 | } 47 | 48 | videoEncodedBuffer* videoEncoder_encode_end(videoEncoderHandle handle){ 49 | std::vector> vPacket; 50 | ENC(handle)->EndEncode(vPacket); 51 | int currentSize = 0; 52 | for (std::vector &packet : vPacket){ 53 | currentSize += packet.size(); 54 | } 55 | if(currentSize == 0){ 56 | return NULL; 57 | } 58 | videoEncodedBuffer* buffer = videoEncodedBuffer_init(currentSize); 59 | currentSize = 0; 60 | for (std::vector &packet : vPacket){ 61 | memcpy(buffer->data+currentSize, reinterpret_cast(packet.data()), packet.size()); 62 | currentSize+=packet.size(); 63 | } 64 | return buffer; 65 | } 66 | 67 | videoEncodedBuffer* videoEncoder_encode(videoEncoderHandle handle, u_int8_t* in){ 68 | std::vector> vPacket; 69 | 70 | const NvEncInputFrame* encoderInputFrame = ENC(handle)->GetNextInputFrame(); 71 | NvEncoderCuda::CopyToDeviceFrame(handle->cuContext, in, ENC(handle)->GetWidthInBytes(NV_ENC_BUFFER_FORMAT_ARGB,ENC(handle)->GetEncodeWidth()), (CUdeviceptr)encoderInputFrame->inputPtr, 72 | (int)encoderInputFrame->pitch, 73 | ENC(handle)->GetEncodeWidth(), 74 | ENC(handle)->GetEncodeHeight(), 75 | CU_MEMORYTYPE_HOST, 76 | encoderInputFrame->bufferFormat, 77 | encoderInputFrame->chromaOffsets, 78 | encoderInputFrame->numChromaPlanes); 79 | ENC(handle)->EncodeFrame(vPacket); 80 | int currentSize = 0; 81 | for (std::vector &packet : vPacket){ 82 | currentSize += packet.size(); 83 | } 84 | if(currentSize == 0){ 85 | return NULL; 86 | } 87 | videoEncodedBuffer* buffer = videoEncodedBuffer_init(currentSize); 88 | currentSize = 0; 89 | for (std::vector &packet : vPacket){ 90 | memcpy(buffer->data+currentSize, reinterpret_cast(packet.data()), packet.size()); 91 | currentSize+=packet.size(); 92 | } 93 | return buffer; 94 | } 95 | 96 | void videoEncodedBuffer_destory(videoEncodedBuffer** buffer){ 97 | if(*buffer == NULL){ 98 | if((*buffer)->data != NULL){ 99 | free((*buffer)->data); 100 | (*buffer)->data = NULL; 101 | } 102 | free(*buffer); 103 | (*buffer) = NULL; 104 | } 105 | } 106 | 107 | videoEncodedBuffer* videoEncodedBuffer_init(int size){ 108 | videoEncodedBuffer* frame = (videoEncodedBuffer*)malloc(sizeof(videoEncodedBuffer)); 109 | frame->size = size; 110 | frame->data = (u_int8_t*)malloc(frame->size); 111 | return frame; 112 | } 113 | -------------------------------------------------------------------------------- /src/encoder.h: -------------------------------------------------------------------------------- 1 | #ifndef NVCODEC_PYTHON_VIDEO_ENCODER_H 2 | #define NVCODEC_PYTHON_VIDEO_ENCODER_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include "cuvid/nvcuvid.h" 10 | #include 11 | 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | typedef struct 18 | { 19 | CUcontext cuContext; 20 | void* enc; 21 | }videoEncoder; 22 | 23 | typedef videoEncoder* videoEncoderHandle; 24 | 25 | 26 | typedef struct 27 | { 28 | uint8_t* data; 29 | int size; 30 | }videoEncodedBuffer; 31 | 32 | 33 | videoEncoderHandle videoEncoder_init(int width, int height); 34 | int videoEncoder_destroy(videoEncoderHandle handle); 35 | videoEncodedBuffer* videoEncoder_encode(videoEncoderHandle handle, u_int8_t* in); 36 | videoEncodedBuffer* videoEncoder_encode_end(videoEncoderHandle handle); 37 | void videoEncodedBuffer_destory(videoEncodedBuffer**); 38 | videoEncodedBuffer* videoEncodedBuffer_init(int size); 39 | 40 | 41 | #endif -------------------------------------------------------------------------------- /src/source.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "source.h" 3 | 4 | videoSourceHandle videoSource_init(char* url, int listen){ 5 | #ifdef DEBUG 6 | av_log_set_level(AV_LOG_TRACE); 7 | #else 8 | av_log_set_level(AV_LOG_FATAL); 9 | #endif 10 | av_register_all(); 11 | avformat_network_init(); 12 | videoSourceHandle handle = (videoSourceHandle)malloc(sizeof(videoSource)); 13 | handle->url = (char*)malloc(strlen(url)); 14 | memcpy(handle->url, url, strlen(url)); 15 | handle->bsfc = NULL; 16 | handle->options = NULL; 17 | handle->pFormatCtx = NULL; 18 | handle->video_stream_index = 0; 19 | 20 | av_bsf_alloc(av_bsf_get_by_name("h264_mp4toannexb"), &(handle->bsfc)); 21 | 22 | if(listen){ 23 | av_dict_set(&(handle->options), "listen", "1", 0); 24 | } 25 | // av_dict_set(&(handle->options), "timeout", "3", 0); 26 | 27 | return handle; 28 | } 29 | 30 | int videoSource_destroy(videoSourceHandle handle){ 31 | if(handle->pFormatCtx != NULL){ 32 | videoSource_close(handle); 33 | } 34 | if(handle->url != NULL){ 35 | free(handle->url); 36 | handle->url = NULL; 37 | } 38 | if(handle->bsfc != NULL){ 39 | av_bsf_free(&(handle->bsfc)); 40 | handle->bsfc = NULL; 41 | } 42 | free(handle); 43 | return 0; 44 | } 45 | 46 | int videoSource_connect(videoSourceHandle handle){ 47 | if(handle->pFormatCtx != NULL){ 48 | return 1; 49 | } 50 | 51 | char error_buf[128] = {0}; 52 | int error_code; 53 | if ((error_code = avformat_open_input(&(handle->pFormatCtx), handle->url, NULL, &(handle->options))) < 0){ 54 | // AVERROR_INPUT_CHANGED 55 | av_make_error_string(error_buf, 127, error_code); 56 | printf("RTMP/Listen %d: %s\n", error_code, error_buf); 57 | // throw RTMPException(error_buf, 301); 58 | return -301; 59 | } 60 | if ((error_code = avformat_find_stream_info(handle->pFormatCtx, NULL))<0) 61 | { 62 | printf("Couldn't find stream information\n"); 63 | return -302; 64 | } 65 | unsigned int i; 66 | for (i = 0; i< handle->pFormatCtx->nb_streams; i++){ 67 | if (handle->pFormatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) 68 | { 69 | handle->video_stream_index = i; 70 | avcodec_parameters_copy(handle->bsfc->par_in,handle->pFormatCtx->streams[i]->codecpar); 71 | av_bsf_init(handle->bsfc); 72 | break; 73 | } 74 | } 75 | if (handle->pFormatCtx->nb_streams == i) 76 | { 77 | printf("Didn't find a video stream\n"); 78 | // throw RTMPException("Didn't find a video stream", 303); 79 | return -303; 80 | } 81 | return 0; 82 | } 83 | 84 | 85 | int videoSource_read(videoSourceHandle handle, AVPacket* packet){ 86 | if(handle->pFormatCtx == NULL){ 87 | if(videoSource_connect(handle)<0){ 88 | return -1; 89 | } 90 | } 91 | while(av_read_frame(handle->pFormatCtx, packet)==0){ 92 | av_bsf_send_packet(handle->bsfc, packet); 93 | av_bsf_receive_packet(handle->bsfc, packet); 94 | if(packet->size>0 && packet->stream_index == handle->video_stream_index){ 95 | return packet->size; 96 | } 97 | } 98 | return -1; 99 | } 100 | 101 | int videoSource_close(videoSourceHandle handle){ 102 | if( handle->pFormatCtx != NULL) { 103 | avformat_close_input(&(handle->pFormatCtx)); 104 | av_free(handle->pFormatCtx); 105 | handle->pFormatCtx = NULL; 106 | } 107 | return 0; 108 | } 109 | 110 | enum AVPixelFormat videoSource_getAVPixelFormat(videoSourceHandle handle){ 111 | if(handle->pFormatCtx != NULL) { 112 | return (AVPixelFormat)(handle->pFormatCtx->streams[handle->video_stream_index]->codecpar->format); 113 | }else{ 114 | return AV_PIX_FMT_NONE; 115 | } 116 | } 117 | 118 | enum AVCodecID videoSource_getVideoCodecId(videoSourceHandle handle){ 119 | if(handle->pFormatCtx != NULL) { 120 | return handle->pFormatCtx->video_codec_id; 121 | }else{ 122 | return AV_CODEC_ID_NONE; 123 | } 124 | } 125 | 126 | int videoSource_isConnect(videoSourceHandle handle){ 127 | return handle->pFormatCtx != NULL; 128 | } 129 | -------------------------------------------------------------------------------- /src/source.h: -------------------------------------------------------------------------------- 1 | #ifndef NVCODEC_PYTHON_VIDEO_SOURCE_H 2 | #define NVCODEC_PYTHON_VIDEO_SOURCE_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | 14 | 15 | typedef struct 16 | { 17 | AVFormatContext *pFormatCtx; 18 | AVDictionary *options; 19 | char* url; 20 | AVBSFContext *bsfc; 21 | int video_stream_index; 22 | }videoSource; 23 | 24 | typedef videoSource* videoSourceHandle; 25 | 26 | enum AVPixelFormat videoSource_getAVPixelFormat(videoSourceHandle handle); 27 | enum AVCodecID videoSource_getVideoCodecId(videoSourceHandle handle); 28 | videoSourceHandle videoSource_init(char* url, int listen); 29 | int videoSource_destroy(videoSourceHandle handle); 30 | int videoSource_connect(videoSourceHandle handle); 31 | int videoSource_read(videoSourceHandle handle, AVPacket* packet); 32 | int videoSource_isConnect(videoSourceHandle handle); 33 | int videoSource_close(videoSourceHandle handle); 34 | 35 | 36 | #endif //NVCODEC_PYTHON_VIDEO_SOURCE_H 37 | -------------------------------------------------------------------------------- /tests/cpp/decode.cpp: -------------------------------------------------------------------------------- 1 | #include "source.h" 2 | #include "decoder.h" 3 | #include "stdio.h" 4 | 5 | #ifndef TEST_RTMP_URL 6 | #define TEST_RTMP_URL "rtmp://58.200.131.2:1935/livetv/hunantv" 7 | #endif 8 | 9 | int main(int argc, char** argv){ 10 | videoSourceHandle videoSource = videoSource_init( (char*)TEST_RTMP_URL, 0); 11 | // videoSourceHandle videoSource = videoSource_init((char*)"rtmp://10.10.1.108:8981/app/video/001", 1); 12 | AVPacket packet; 13 | int width, height, size; 14 | cudaVideoSurfaceFormat format; 15 | videoDecoderHandle videoDecode = videoDecoder_init(AV_CODEC_ID_H264); 16 | videoFrameList* frameList; 17 | while(1){ 18 | if(videoSource_read(videoSource, &packet)<0){ 19 | break; 20 | } 21 | frameList = videoDecoder_decode(videoDecode, packet.data, packet.size); 22 | if(frameList!=NULL){ 23 | printf("Decode Frame %dx%d, Frames %d\n", frameList->width, frameList->height, frameList->length); 24 | } 25 | videoFrameList_destory(&frameList); 26 | } 27 | videoDecoder_destroy(videoDecode); 28 | return 0; 29 | } -------------------------------------------------------------------------------- /tests/cpp/encode.cpp: -------------------------------------------------------------------------------- 1 | #include "source.h" 2 | #include "decoder.h" 3 | #include "encoder.h" 4 | #include "stdio.h" 5 | 6 | #ifndef TEST_RTMP_URL 7 | #define TEST_RTMP_URL "rtmp://58.200.131.2:1935/livetv/hunantv" 8 | #endif 9 | 10 | int main(int argc, char** argv){ 11 | videoSourceHandle videoSource = videoSource_init( (char*)TEST_RTMP_URL, 0); 12 | // videoSourceHandle videoSource = videoSource_init((char*)"rtmp://10.10.1.108:8981/app/video/001", 1); 13 | AVPacket packet; 14 | int width, height, size; 15 | cudaVideoSurfaceFormat format; 16 | videoDecoderHandle videoDecode = videoDecoder_init(AV_CODEC_ID_H264); 17 | videoEncoderHandle videoEncode = NULL; 18 | videoFrameList* frameList; 19 | videoEncodedBuffer* buffer; 20 | #ifdef DEBUG 21 | FILE* fp = fopen("/tmp/encode.h264", "wb"); 22 | #endif 23 | while(1){ 24 | if(videoSource_read(videoSource, &packet)<0){ 25 | break; 26 | } 27 | frameList = videoDecoder_decode(videoDecode, packet.data, packet.size); 28 | if(frameList==NULL){ 29 | continue; 30 | } 31 | if(videoEncode==NULL){ 32 | videoEncode = videoEncoder_init(frameList->width, frameList->height); 33 | } 34 | buffer = videoEncoder_encode(videoEncode, frameList->pFrames); 35 | videoFrameList_destory(&frameList); 36 | if(buffer == NULL){ 37 | continue; 38 | } 39 | printf("Encode Buffer size: %d\n", buffer->size); 40 | #ifdef DEBUG 41 | fwrite(buffer->data, 1, buffer->size, fp); 42 | #endif 43 | videoEncodedBuffer_destory(&buffer); 44 | } 45 | #ifdef DEBUG 46 | fclose(fp); 47 | #endif 48 | videoDecoder_destroy(videoDecode); 49 | videoEncoder_destroy(videoEncode); 50 | return 0; 51 | } -------------------------------------------------------------------------------- /tests/cpp/read_source.cpp: -------------------------------------------------------------------------------- 1 | #include "source.h" 2 | #include "stdio.h" 3 | 4 | #ifndef TEST_RTMP_URL 5 | #define TEST_RTMP_URL "rtmp://58.200.131.2:1935/livetv/hunantv" 6 | #endif 7 | 8 | int main(int argc, char** argv){ 9 | videoSourceHandle videoSource = videoSource_init( (char*)TEST_RTMP_URL, 0); 10 | // videoSourceHandle videoSource = videoSource_init((char*)"rtmp://10.10.1.108:8981/app/video/001", 1); 11 | AVPacket packet; 12 | #ifdef DEBUG 13 | FILE* fp = fopen("/tmp/save.h264", "wb"); 14 | #endif 15 | while(1){ 16 | if(videoSource_read(videoSource, &packet)<0){ 17 | break; 18 | } 19 | #ifdef DEBUG 20 | fwrite(packet.data, 1, packet.size, fp); 21 | #endif 22 | printf("Read AVPacket Index: %d Size:%d\n", packet.stream_index, packet.size); 23 | } 24 | #ifdef DEBUG 25 | fclose(fp); 26 | #endif 27 | return 0; 28 | } -------------------------------------------------------------------------------- /tests/python/read_source_opencv.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | 5 | lib_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../build/lib.linux-x86_64-3.6')) 6 | sys.path.append(lib_path) 7 | from nvcodec import VideoSource, VideoDecoder, VideoEncoder 8 | 9 | source = VideoSource("rtmp://58.200.131.2:1935/livetv/hunantv") 10 | decoder = VideoDecoder() 11 | while True: 12 | h264_data = source.read() 13 | if not h264_data: 14 | break 15 | frames = decoder.decode(h264_data) 16 | for frame in frames: 17 | cv2.imshow("Demo", frame) 18 | cv2.waitKey(1) 19 | 20 | -------------------------------------------------------------------------------- /tests/python/read_source_sdl.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from cv2 import cv2 4 | import sdl2 5 | import sdl2.ext 6 | import numpy 7 | import time 8 | 9 | lib_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../build/lib.linux-x86_64-3.6')) 10 | sys.path.append(lib_path) 11 | from nvcodec import VideoSource, VideoDecoder, VideoEncoder 12 | 13 | windowArray = None 14 | window = None 15 | 16 | def showImage(image): 17 | global windowArray, window 18 | if windowArray is None: 19 | sdl2.ext.init() 20 | window = sdl2.ext.Window("test", size=(image.shape[0],image.shape[1])) 21 | window.show() 22 | windowSurf = sdl2.SDL_GetWindowSurface(window.window) 23 | windowArray = sdl2.ext.pixels3d(windowSurf.contents) 24 | numpy.copyto(windowArray, image) 25 | window.refresh() 26 | 27 | 28 | # source = VideoSource("rtmp://58.200.131.2:1935/livetv/hunantv") 29 | source = VideoSource("/tmp/1.mp4") 30 | decoder = VideoDecoder() 31 | while True: 32 | h264_data = source.read() 33 | if not h264_data: 34 | break 35 | frames = decoder.decode(h264_data, 1) 36 | for frame in frames: 37 | showImage(frame) --------------------------------------------------------------------------------