├── .gitignore
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── nvcodec-python.cpp
├── setup.py
├── src
    ├── cuvid
    │   ├── AppDecUtils.h
    │   ├── Logger.cpp
    │   ├── NvDecoder
    │   │   ├── NvDecoder.cpp
    │   │   └── NvDecoder.h
    │   ├── NvEncoder
    │   │   ├── NvEncoder.cpp
    │   │   ├── NvEncoder.h
    │   │   ├── NvEncoderCuda.cpp
    │   │   └── NvEncoderCuda.h
    │   ├── Utils
    │   │   ├── BitDepth.cu
    │   │   ├── ColorSpace.cu
    │   │   ├── ColorSpace.h
    │   │   ├── FFmpegDemuxer.h
    │   │   ├── FFmpegStreamer.h
    │   │   ├── Logger.h
    │   │   ├── NvCodecUtils.h
    │   │   ├── NvEncoderCLIOptions.h
    │   │   ├── Resize.cu
    │   │   └── crc.cu
    │   ├── cuviddec.h
    │   ├── nvEncodeAPI.h
    │   └── nvcuvid.h
    ├── decoder.cpp
    ├── decoder.h
    ├── encoder.cpp
    ├── encoder.h
    ├── source.cpp
    └── source.h
└── tests
    ├── cpp
        ├── decode.cpp
        ├── encode.cpp
        └── read_source.cpp
    └── python
        ├── read_source_opencv.py
        └── read_source_sdl.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | dist
3 | build
4 | pynvcodec.egg-info
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020-2021 Jason Dsouza
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include src/**
2 | include src/**/**
3 | include src/**/**/**
4 | include tests/**/**
5 | include Makefile


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CFLAGS=-Wall -O2 -lstdc++ -pthread -lm -fPIC
 2 | ifdef DEBUG
 3 | CFLAGS=-g -Wall -O0 -D DEBUG -lstdc++ -pthread -lm -fPIC
 4 | endif
 5 | 
 6 | ifndef CUDA_PATH
 7 | CUDA_PATH=/usr/local/cuda
 8 | endif
 9 | 
10 | ifndef PYTHON_VERSION
11 | PYTHON_VERSION=$(shell python3 -c "import sys; print('%d.%d' % (sys.version_info.major, sys.version_info.minor,))")
12 | endif
13 | 
14 | ifndef PYTHON_INCLUDE_PATH
15 | PYTHON_INCLUDE_PATH=/usr/include/python${PYTHON_VERSION}
16 | endif
17 | 
18 | ifndef PYTHON_BIN
19 | PYTHON_BIN=python${PYTHON_VERSION}
20 | endif
21 | 
22 | ifndef PREFIX
23 | PREFIX=/usr/local
24 | ifdef VIRTUAL_ENV
25 | PREFIX=${VIRTUAL_ENV}
26 | endif
27 | endif
28 | 
29 | lib: build/lib/libnvcodec.a
30 | test: build/tests/read_source build/tests/decode build/tests/encode
31 | 
32 | python: lib
33 | 	${PYTHON_BIN} setup.py build
34 | 
35 | out:
36 | 	mkdir -p build/tests
37 | 	mkdir -p build/lib
38 | 
39 | SRC_FILES=$(wildcard src/*.cpp) $(wildcard src/cuvid/*.cpp) $(wildcard src/cuvid/NvDecoder/*.cpp) $(wildcard src/cuvid/Utils/*.cpp) src/cuvid/NvEncoder/NvEncoder.cpp src/cuvid/NvEncoder/NvEncoderCuda.cpp
40 | OPENCV_LIB=-I/usr/local/include/opencv4 -L/usr/local/lib -lopencv_core -lopencv_highgui
41 | 
42 | lib_cuda: build/lib/libcolor_space.a
43 | 
44 | build/lib/libcolor_space.a: src/cuvid/Utils/ColorSpace.cu out
45 | 	nvcc -DCUDNN  --compiler-options "-fPIC -lstdc++ -pthread -lm" -c src/cuvid/Utils/ColorSpace.cu -o build/lib/libcolor_space.a
46 | 
47 | 
48 | FLAGS=-L${CUDA_PATH}/lib64 -Lbuild/lib -lavformat -lavcodec -lavutil -lcudart -lnvcuvid -lnvidia-encode -lcuda -Isrc -I${CUDA_PATH}/include -Isrc/cuvid ${CFLAGS} 
49 | 
50 | # build/lib/libnvcodec.so: lib_cuda out
51 | # 	g++ -o build/lib/libnvcodec.so -shared ${SRC_FILES} -lcolor_space ${FLAGS} -fPIC
52 | 
53 | 
54 | build/tests/read_source: tests/cpp/read_source.cpp lib
55 | 	g++ -o build/tests/read_source tests/cpp/read_source.cpp -lnvcodec ${FLAGS}
56 | 
57 | build/lib/libnvcodec.a: lib_cuda out
58 |     # g++ -o build/lib/libnvcodec.so -shared ${SRC_FILES} -lcolor_space ${FLAGS} -fPIC
59 | 	mkdir -p build/object
60 | 	cd build/object; g++ -c ../../src/*.cpp ../../src/**/*.cpp ../../src/**/**/*.cpp -I../../src -I../../src/cuvid -I${CUDA_PATH}/include
61 | 	ar rcs build/lib/libnvcodec.a build/object/*.o
62 | 
63 | 
64 | build/tests/decode: tests/cpp/decode.cpp lib
65 | 	g++ -o build/tests/decode tests/cpp/decode.cpp -lnvcodec  ${OPENCV_LIB} ${FLAGS}
66 | 
67 | build/tests/encode: tests/cpp/encode.cpp lib
68 | 	g++ -o build/tests/encode tests/cpp/encode.cpp -lnvcodec ${OPENCV_LIB} ${FLAGS}
69 | 
70 | clean:
71 | 	rm build -rf
72 | 	rm pynvcodec.egg-info -rf
73 | 	rm dist -rf
74 | 
75 | python-interface:
76 | 	${PYTHON_BIN} setup.py build
77 | 
78 | release: clean python-interface
79 | 	${PYTHON_BIN} setup.py sdist
80 | 	${PYTHON_BIN} -m twine upload dist/*
81 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | NvCodec - Python
 2 | ---------------------------
 3 | 
 4 | ## Require
 5 | * cuda >= 11.2
 6 | * numpy >= 1.7
 7 | * python >= 3.6
 8 | * gcc >= 7.5
 9 | * make >= 4.1
10 | 
11 | ## Install
12 | ```shell
13 | pip install pynvcodec
14 | ```
15 | 
16 | ## Usage
17 | 
18 | ### 0. Init PyNvCodec
19 | ```python
20 | from nvcodec import VideoSource, VideoDecoder, VideoEncoder
21 | ```
22 | 
23 | ### 1. Use VideoSource
24 | 
25 | ```python
26 | source = VideoSource("rtmp://RTMP_URL")
27 | h264_data = source.read()
28 | ```
29 | 
30 | #### 2. Use VideoDecoder
31 | ```python
32 | decoder = VideoDecoder()
33 | # output OpenCV format frame
34 | frames = decoder.decode(h264_data)
35 | # output SDL format frame
36 | frames = decoder.decode(h264_data, 1)
37 | ```
38 | 
39 | #### 3. Use VideoEncoder
40 | ```python
41 | encoder = VideoEncoder(width, height)
42 | h264_data = encoder.encode(frame)
43 | ```
44 | 


--------------------------------------------------------------------------------
/nvcodec-python.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <malloc.h>
  3 | #include <sys/stat.h>
  4 | #include <sys/types.h>
  5 | 
  6 | #include <Python.h>
  7 | #include <structmember.h>
  8 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
  9 | #include <numpy/arrayobject.h>
 10 | 
 11 | #include "source.h"
 12 | #include "decoder.h"
 13 | #include "encoder.h"
 14 | 
 15 | 
 16 | typedef struct
 17 | {
 18 |     PyObject_HEAD
 19 |     long long m_handle;
 20 | }NvCodec;
 21 | 
 22 | static PyMemberDef NvCodec_DataMembers[] =
 23 | {
 24 |         {(char*)"m_handle",   T_LONGLONG, offsetof(NvCodec, m_handle),   0, (char*)"NvCodec handle ptr"},
 25 |         {NULL, 0, 0, 0, NULL}
 26 | };
 27 | 
 28 | /* ----------- VideoSource Part --------------- */
 29 | 
 30 | static PyObject* VideoSource_read(NvCodec* Self)
 31 | {
 32 |     videoSourceHandle m_handle = (videoSourceHandle)Self->m_handle;
 33 |     AVPacket *packet = av_packet_alloc();
 34 |     if(videoSource_read(m_handle, packet) < 0){
 35 |         av_packet_free(&packet);
 36 |         return Py_None;
 37 |     }
 38 |     PyObject* rtn = PyBytes_FromStringAndSize((const char*)packet->data, packet->size);
 39 |     av_packet_free(&packet);
 40 |     return rtn;
 41 | }
 42 | 
 43 | static PyMethodDef VideoSource_MethodMembers[] =
 44 | {
 45 |         {"read", (PyCFunction)VideoSource_read, METH_NOARGS,  "read h264 from video source"},
 46 |         {NULL, NULL, 0, NULL}
 47 | };
 48 | 
 49 | static void VideoSource_Destruct(NvCodec* Self)
 50 | {
 51 |     videoSourceHandle m_handle = (videoSourceHandle)(Self->m_handle);
 52 |     videoSource_destroy(m_handle);
 53 |     Py_TYPE(Self)->tp_free((PyObject*)Self);
 54 | }
 55 | 
 56 | 
 57 | static PyObject* VideoSource_Str(NvCodec* Self)
 58 | {
 59 |     return Py_BuildValue("s", "<nvcodec-python.VideoSource>");
 60 | }
 61 | 
 62 | static PyObject* VideoSource_Repr(NvCodec* Self)
 63 | {
 64 |     return VideoSource_Str(Self);
 65 | }
 66 | 
 67 | static void VideoSource_init(NvCodec* Self, PyObject* pArgs)
 68 | {
 69 |     unsigned char* url;
 70 |     unsigned int listen = 0;
 71 |     if(!PyArg_ParseTuple(pArgs, "s|I", &url, &listen)){
 72 |         PyErr_SetString(PyExc_ValueError, "Parse the argument FAILED! You should pass url string!");
 73 |         return;
 74 |     }
 75 | 
 76 |     Self->m_handle = (long long)(videoSource_init((char*)url, listen));
 77 | }
 78 | 
 79 | static PyTypeObject VideoSource_ClassInfo =
 80 | {
 81 |         PyVarObject_HEAD_INIT(NULL, 0)"NvCodec.VideoSource",
 82 |         sizeof(NvCodec),
 83 |         0,
 84 |         (destructor)VideoSource_Destruct,
 85 |         NULL,NULL,NULL,NULL,
 86 |         (reprfunc)VideoSource_Repr,
 87 |         NULL,NULL,NULL,NULL,NULL,
 88 |         (reprfunc)VideoSource_Str,
 89 |         NULL,NULL,NULL,
 90 |         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
 91 |         "NvCodec Python VideoSource Objects --- Extensioned by nvcodec",
 92 |         NULL,NULL,NULL,0,NULL,NULL,
 93 |         VideoSource_MethodMembers,
 94 |         NvCodec_DataMembers,
 95 |         NULL,NULL,NULL,NULL,NULL,0,
 96 |         (initproc)VideoSource_init,
 97 |         NULL,
 98 | };
 99 | 
100 | /* ----------- Decoder Part --------------- */
101 | 
102 | static PyObject* VideoDecoder_decode(NvCodec* Self, PyObject* pArgs)
103 | {
104 |     videoDecoderHandle m_handle = (videoDecoderHandle)Self->m_handle;
105 |     
106 |     unsigned char* data;
107 |     int len;
108 |     unsigned int type = 0;
109 |     if(!PyArg_ParseTuple(pArgs, "y#|I", &data, &len, &type)){
110 |         PyErr_SetString(PyExc_ValueError, "Parse the argument FAILED! You should video byte data!");
111 |         return Py_None;
112 |     }
113 |     
114 |     PyObject* rtn = Py_BuildValue("[]");
115 |     char error_str[128];
116 |     videoFrameList* list = videoDecoder_decode(m_handle, data, len, error_str);
117 |     if(list == NULL){
118 |         if(error_str[0] != NULL){
119 |             PyErr_Format(PyExc_ValueError, "%s", error_str);
120 |             return NULL;
121 |         }
122 |         return rtn;
123 |     }
124 | 
125 |     npy_intp dims[3] = {(npy_intp)(list->height), (npy_intp)(list->width), 4};
126 |     PyObject* tempFrame;
127 |     for(int i = 0;i<list->length;i++){
128 |         tempFrame = PyArray_SimpleNewFromData(3, dims, NPY_UINT8, list->pFrames + (i*(list->perFrameSize)));
129 |         PyArray_ENABLEFLAGS((PyArrayObject*) tempFrame, NPY_ARRAY_OWNDATA);
130 |         if(type != 0){
131 |             tempFrame = PyArray_SwapAxes((PyArrayObject*)tempFrame, 0, 1);
132 |         }
133 |         PyList_Append(rtn, tempFrame);
134 |     }
135 |     videoFrameList_destory(&list);
136 |     return rtn;
137 | }
138 | 
139 | static PyMethodDef VideoDecoder_MethodMembers[] =
140 | {
141 |         {"decode", (PyCFunction)VideoDecoder_decode, METH_VARARGS,  "decode video frame"},
142 |         {NULL, NULL, 0, NULL}
143 | };
144 | 
145 | static void VideoDecoder_Destruct(NvCodec* Self)
146 | {
147 |     videoDecoderHandle m_handle = (videoDecoderHandle)(Self->m_handle);
148 |     videoDecoder_destroy(m_handle);
149 |     Py_TYPE(Self)->tp_free((PyObject*)Self);
150 | }
151 | 
152 | 
153 | static PyObject* VideoDecoder_Str(NvCodec* Self)
154 | {
155 |     return Py_BuildValue("s", "<nvcodec-python.VideoDecoder>");
156 | }
157 | 
158 | static PyObject* VideoDecoder_Repr(NvCodec* Self)
159 | {
160 |     return VideoDecoder_Str(Self);
161 | }
162 | 
163 | static void VideoDecoder_init(NvCodec* Self, PyObject* pArgs)
164 | {
165 |     unsigned int format = AV_CODEC_ID_H264;
166 |     if(!PyArg_ParseTuple(pArgs, "|I", &format)){
167 |         PyErr_SetString(PyExc_ValueError, "Parse the argument FAILED! You should pass AV_CODEC_ID!");
168 |         return;
169 |     }
170 |     Self->m_handle = (long long)(videoDecoder_init((enum AVCodecID)format));
171 | }
172 | 
173 | static PyTypeObject VideoDecoder_ClassInfo =
174 | {
175 |         PyVarObject_HEAD_INIT(NULL, 0)"NvCodec.VideoDecoder",
176 |         sizeof(NvCodec),
177 |         0,
178 |         (destructor)VideoDecoder_Destruct,
179 |         NULL,NULL,NULL,NULL,
180 |         (reprfunc)VideoDecoder_Repr,
181 |         NULL,NULL,NULL,NULL,NULL,
182 |         (reprfunc)VideoDecoder_Str,
183 |         NULL,NULL,NULL,
184 |         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
185 |         "NvCodec Python VideoDecoder Objects --- Extensioned by nvcodec",
186 |         NULL,NULL,NULL,0,NULL,NULL,
187 |         VideoDecoder_MethodMembers,
188 |         NvCodec_DataMembers,
189 |         NULL,NULL,NULL,NULL,NULL,0,
190 |         (initproc)VideoDecoder_init,
191 |         NULL,
192 | };
193 | 
194 | /* ----------- Encoder Part --------------- */
195 | 
196 | static PyObject* VideoEncoder_encode(NvCodec* Self, PyObject* pArgs)
197 | {
198 |     PyArrayObject *vecin;
199 |     if (!PyArg_ParseTuple(pArgs, "O!", &PyArray_Type, &vecin)){
200 |         PyErr_SetString(PyExc_ValueError, "Parse the argument FAILED! You should pass ABGR image numpy array!");
201 |         return NULL;
202 |     }
203 | 
204 |     if (NULL == vecin){
205 |         Py_INCREF(Py_None);
206 |         return Py_None;
207 |     }
208 | 
209 |     if (PyArray_NDIM(vecin) != 4){
210 |         PyErr_SetString(PyExc_ValueError, "Parse the argument FAILED! You should pass ABGR image numpy array by height*width*channel!");
211 |         return NULL;
212 |     }
213 | 
214 |     videoEncoderHandle m_handle = (videoEncoderHandle)Self->m_handle;
215 | 
216 |     PyObject* bytes = PyObject_CallMethod((PyObject*)vecin, "tobytes", NULL);
217 |     
218 |     int length;
219 |     unsigned char* data;
220 |     PyArg_Parse(bytes, "y#", &data, &length);
221 | 
222 |     videoEncodedBuffer* buffer = videoEncoder_encode(m_handle, data);
223 |     if(buffer == NULL){
224 |         Py_INCREF(Py_None);
225 |         return Py_None;
226 |     }
227 | 
228 |     PyObject* rtn = PyBytes_FromStringAndSize((const char*)buffer->data, buffer->size);
229 |     videoEncodedBuffer_destory(&buffer);
230 |     return rtn;
231 | }
232 | 
233 | static PyMethodDef VideoEncoder_MethodMembers[] =
234 | {
235 |         {"encode", (PyCFunction)VideoEncoder_encode, METH_VARARGS,  "encode video frame"},
236 |         {NULL, NULL, 0, NULL}
237 | };
238 | 
239 | static void VideoEncoder_Destruct(NvCodec* Self)
240 | {
241 |     videoEncoderHandle m_handle = (videoEncoderHandle)(Self->m_handle);
242 |     videoEncoder_destroy(m_handle);
243 |     Py_TYPE(Self)->tp_free((PyObject*)Self);
244 | }
245 | 
246 | 
247 | static PyObject* VideoEncoder_Str(NvCodec* Self)
248 | {
249 |     return Py_BuildValue("s", "<nvcodec-python.VideoEncoder>");
250 | }
251 | 
252 | static PyObject* VideoEncoder_Repr(NvCodec* Self)
253 | {
254 |     return VideoEncoder_Str(Self);
255 | }
256 | 
257 | static void VideoEncoder_init(NvCodec* Self, PyObject* pArgs)
258 | {
259 |     unsigned int width,height;
260 |     if(!PyArg_ParseTuple(pArgs, "II", &width, &height)){
261 |         PyErr_SetString(PyExc_ValueError, "Parse the argument FAILED! You should pass width and height!");
262 |         return;
263 |     }
264 |     Self->m_handle = (long long)(videoEncoder_init(width, height));
265 | }
266 | 
267 | static PyTypeObject VideoEncoder_ClassInfo =
268 | {
269 |         PyVarObject_HEAD_INIT(NULL, 0)"NvCodec.VideoEncoder",
270 |         sizeof(NvCodec),
271 |         0,
272 |         (destructor)VideoEncoder_Destruct,
273 |         NULL,NULL,NULL,NULL,
274 |         (reprfunc)VideoEncoder_Repr,
275 |         NULL,NULL,NULL,NULL,NULL,
276 |         (reprfunc)VideoEncoder_Str,
277 |         NULL,NULL,NULL,
278 |         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
279 |         "NvCodec Python VideoEncoder Objects --- Extensioned by nvcodec",
280 |         NULL,NULL,NULL,0,NULL,NULL,
281 |         VideoEncoder_MethodMembers,
282 |         NvCodec_DataMembers,
283 |         NULL,NULL,NULL,NULL,NULL,0,
284 |         (initproc)VideoEncoder_init,
285 |         NULL,
286 | };
287 | 
288 | 
289 | 
290 | 
291 | void NvCodec_module_destroy(void *_){
292 |     // Pass
293 | }
294 | 
295 | static PyModuleDef ModuleInfo =
296 | {
297 |         PyModuleDef_HEAD_INIT,
298 |         "NvCodec Module",
299 |         "NvCodec by NvCodec",
300 |         -1,
301 |         NULL, NULL, NULL, NULL,
302 |         NvCodec_module_destroy
303 | };
304 | 
305 | PyMODINIT_FUNC
306 | PyInit_nvcodec(void) {    
307 |     PyObject * pReturn = NULL;
308 | 
309 |     VideoSource_ClassInfo.tp_new = PyType_GenericNew;
310 |     if(PyType_Ready(&VideoSource_ClassInfo) < 0)
311 |         return NULL;
312 |     
313 |     VideoDecoder_ClassInfo.tp_new = PyType_GenericNew;
314 |     if(PyType_Ready(&VideoDecoder_ClassInfo) < 0)
315 |         return NULL;
316 |     
317 |     VideoEncoder_ClassInfo.tp_new = PyType_GenericNew;
318 |     if(PyType_Ready(&VideoEncoder_ClassInfo) < 0)
319 |         return NULL;
320 | 
321 |     pReturn = PyModule_Create(&ModuleInfo);
322 |     if(pReturn == NULL)
323 |         return NULL;
324 | 
325 |     Py_INCREF(&ModuleInfo);
326 |     PyModule_AddObject(pReturn, "VideoSource", (PyObject*)&VideoSource_ClassInfo);
327 |     PyModule_AddObject(pReturn, "VideoDecoder", (PyObject*)&VideoDecoder_ClassInfo);
328 |     PyModule_AddObject(pReturn, "VideoEncoder", (PyObject*)&VideoEncoder_ClassInfo);
329 |     import_array();
330 |     return pReturn;
331 | }
332 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # #!/usr/bin/env python
 2 | import sys
 3 | import os
 4 | import glob
 5 | from setuptools import setup, find_packages, Extension
 6 | from distutils.command.build_ext import build_ext
 7 | import numpy as np
 8 | 
 9 | libpath = os.path.abspath(os.path.join(os.path.dirname(__file__), './build/lib'))
10 | 
11 | class custom_build_ext(build_ext):
12 |     def build_extensions(self):
13 |         os.system('make lib_cuda')
14 |         build_ext.build_extensions(self)
15 | 
16 | 
17 | with open("README.md", "r", encoding="utf-8") as fh:
18 |     long_description = fh.read()
19 | 
20 | nvcodec_dir = '/usr/local/lib'
21 | if 'VIRTUAL_ENV' in os.environ:
22 |     nvcodec_dir = os.path.join(os.environ['VIRTUAL_ENV'], 'lib')
23 | 
24 | sources = ['nvcodec-python.cpp'] + glob.glob('src/**/*.cpp', recursive=True)
25 | 
26 | module = Extension('nvcodec', sources=sources, language='c++', 
27 | include_dirs=['src', 'src/cuvid', '/usr/local/cuda/include',np.get_include(),], 
28 | library_dirs=['build/lib', '/usr/local/cuda-11.2/targets/x86_64-linux/lib'],
29 | libraries=['avformat', 'avcodec','avutil','nvcuvid','nvidia-encode','cuda', 'stdc++', 'm', 'cudart', 'color_space'],
30 | )
31 | 
32 | from distutils.core import setup, Extension
33 | setup(name='pynvcodec',
34 |     version='0.0.6',
35 |     ext_modules=[module],
36 |     cmdclass={'build_ext': custom_build_ext},
37 |     author="Usingnet",
38 |     author_email="developer@usingnet.com",
39 |     license="MIT",
40 |     description="Python interface for nvcodec. Encode/Decode H264 with Nvidia GPU Hardware Acceleration.",
41 |     long_description=long_description,
42 |     long_description_content_type="text/markdown",
43 |     url="https://github.com/UsingNet/nvcodec-python",
44 |     # packages=setuptools.find_packages(),
45 |     classifiers=[
46 |         "Development Status :: 4 - Beta",
47 |         "Programming Language :: Python :: 3 :: Only",
48 |         "License :: OSI Approved :: MIT License",
49 |         "Operating System :: POSIX :: Linux",
50 |         "Environment :: GPU :: NVIDIA CUDA :: 11.0",
51 |     ],
52 |     keywords=[
53 |         "pynvcodec",
54 |         "nvcodec",
55 |         "h264",
56 |         "encode",
57 |         "decode",
58 |         "h264 encode",
59 |         "h264 decode",
60 |         "gpu",
61 |         "nvidia"
62 |     ],
63 |     python_requires=">=3.6",
64 |     project_urls={
65 |         'Source': 'https://github.com/UsingNet/nvcodec-python',
66 |         'Tracker': 'https://github.com/UsingNet/nvcodec-python/issues',
67 |     },
68 |     install_requires=['numpy>=1.17']
69 | )


--------------------------------------------------------------------------------
/src/cuvid/AppDecUtils.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
  3 | *
  4 | * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 | * with this source code for terms and conditions that govern your use of
  6 | * this software. Any use, reproduction, disclosure, or distribution of
  7 | * this software and related documentation outside the terms of the EULA
  8 | * is strictly prohibited.
  9 | *
 10 | */
 11 | 
 12 | //---------------------------------------------------------------------------
 13 | //! \file AppDecUtils.h
 14 | //! \brief Header file containing definitions of miscellaneous functions used by Decode samples
 15 | //---------------------------------------------------------------------------
 16 | 
 17 | #pragma once
 18 | #include <sstream>
 19 | #include <iostream>
 20 | 
 21 | static void ShowHelpAndExit(const char *szBadOption, char *szOutputFileName, bool *pbVerbose, int *piD3d)
 22 | {
 23 |     std::ostringstream oss;
 24 |     bool bThrowError = false;
 25 |     if (szBadOption) {
 26 |         bThrowError = false;
 27 |         oss << "Error parsing \"" << szBadOption << "\"" << std::endl;
 28 |     }
 29 |     std::cout << "Options:" << std::endl
 30 |         << "-i           Input file path" << std::endl
 31 |         << (szOutputFileName ? "-o           Output file path\n" : "")
 32 |         << "-gpu         Ordinal of GPU to use" << std::endl
 33 |         << (pbVerbose        ? "-v           Verbose message\n" : "")
 34 |         << (piD3d            ? "-d3d         9 (default): display with D3D9; 11: display with D3D11\n" : "")
 35 |         ;
 36 |     if (bThrowError) {
 37 |         throw std::invalid_argument(oss.str());
 38 |     }
 39 |     else {
 40 |         std::cout << oss.str();
 41 |         exit(0);
 42 |     }
 43 | }
 44 | 
 45 | static void ParseCommandLine(int argc, char *argv[], char *szInputFileName,
 46 |     char *szOutputFileName, int &iGpu, bool *pbVerbose = NULL, int *piD3d = NULL) 
 47 | {
 48 |     std::ostringstream oss;
 49 |     int i;
 50 |     for (i = 1; i < argc; i++) {
 51 |         if (!_stricmp(argv[i], "-h")) {
 52 |             ShowHelpAndExit(NULL, szOutputFileName, pbVerbose, piD3d);
 53 |         }
 54 |         if (!_stricmp(argv[i], "-i")) {
 55 |             if (++i == argc) {
 56 |                 ShowHelpAndExit("-i", szOutputFileName, pbVerbose, piD3d);
 57 |             }
 58 |             sprintf(szInputFileName, "%s", argv[i]);
 59 |             continue;
 60 |         }
 61 |         if (!_stricmp(argv[i], "-o")) {
 62 |             if (++i == argc || !szOutputFileName) {
 63 |                 ShowHelpAndExit("-o", szOutputFileName, pbVerbose, piD3d);
 64 |             }
 65 |             sprintf(szOutputFileName, "%s", argv[i]);
 66 |             continue;
 67 |         }
 68 |         if (!_stricmp(argv[i], "-gpu")) {
 69 |             if (++i == argc) {
 70 |                 ShowHelpAndExit("-gpu", szOutputFileName, pbVerbose, piD3d);
 71 |             }
 72 |             iGpu = atoi(argv[i]);
 73 |             continue;
 74 |         }
 75 |         if (!_stricmp(argv[i], "-v")) {
 76 |             if (!pbVerbose) {
 77 |                 ShowHelpAndExit("-v", szOutputFileName, pbVerbose, piD3d);
 78 |             }
 79 |             *pbVerbose = true;
 80 |             continue;
 81 |         }
 82 |         if (!_stricmp(argv[i], "-d3d")) {
 83 |             if (++i == argc || !piD3d) {
 84 |                 ShowHelpAndExit("-d3d", szOutputFileName, pbVerbose, piD3d);
 85 |             }
 86 |             *piD3d = atoi(argv[i]);
 87 |             continue;
 88 |         }
 89 |         ShowHelpAndExit(argv[i], szOutputFileName, pbVerbose, piD3d);
 90 |     }
 91 | }
 92 | 
 93 | /**
 94 | *   @brief  Function to generate space-separated list of supported video surface formats
 95 | *   @param  nOutputFormatMask - Bit mask to represent supported cudaVideoSurfaceFormat in decoder
 96 | *   @param  OutputFormats     - Variable into which output string is written
 97 | */
 98 | static void getOutputFormatNames(unsigned short nOutputFormatMask, char *OutputFormats)
 99 | {
100 |     if (nOutputFormatMask == 0) {
101 |         strcpy(OutputFormats, "N/A");
102 |         return;
103 |     }
104 | 
105 |     if (nOutputFormatMask & (1U << cudaVideoSurfaceFormat_NV12)) {
106 |         strcat(OutputFormats, "NV12 ");
107 |     }
108 | 
109 |     if (nOutputFormatMask & (1U << cudaVideoSurfaceFormat_P016)) {
110 |         strcat(OutputFormats, "P016 ");
111 |     }
112 | 
113 |     if (nOutputFormatMask & (1U << cudaVideoSurfaceFormat_YUV444)) {
114 |         strcat(OutputFormats, "YUV444 ");
115 |     }
116 | 
117 |     if (nOutputFormatMask & (1U << cudaVideoSurfaceFormat_YUV444_16Bit)) {
118 |         strcat(OutputFormats, "YUV444P16 ");
119 |     }
120 |     return;
121 | }
122 | 
123 | /**
124 | *   @brief  Utility function to create CUDA context
125 | *   @param  cuContext - Pointer to CUcontext. Updated by this function.
126 | *   @param  iGpu      - Device number to get handle for
127 | */
128 | static void createCudaContext(CUcontext* cuContext, int iGpu, unsigned int flags)
129 | {
130 |     CUdevice cuDevice = 0;
131 |     ck(cuDeviceGet(&cuDevice, iGpu));
132 |     char szDeviceName[80];
133 |     ck(cuDeviceGetName(szDeviceName, sizeof(szDeviceName), cuDevice));
134 |     // std::cout << "GPU in use: " << szDeviceName << std::endl;
135 |     ck(cuCtxCreate(cuContext, flags, cuDevice));
136 | }
137 | 
138 | /**
139 | *   @brief  Print decoder capabilities on std::cout
140 | */
141 | static void ShowDecoderCapability()
142 | {
143 |     ck(cuInit(0));
144 |     int nGpu = 0;
145 |     ck(cuDeviceGetCount(&nGpu));
146 |     std::cout << "Decoder Capability" << std::endl << std::endl;
147 |     const char *aszCodecName[] = {"JPEG", "MPEG1", "MPEG2", "MPEG4", "H264", "HEVC", "HEVC", "HEVC", "HEVC", "HEVC", "HEVC", "VC1", "VP8", "VP9", "VP9", "VP9", "AV1", "AV1", "AV1", "AV1"};
148 |     const char *aszChromaFormat[] = { "4:0:0", "4:2:0", "4:2:2", "4:4:4" };
149 |     char strOutputFormats[64];
150 |     cudaVideoCodec aeCodec[] = { cudaVideoCodec_JPEG, cudaVideoCodec_MPEG1, cudaVideoCodec_MPEG2, cudaVideoCodec_MPEG4, cudaVideoCodec_H264, cudaVideoCodec_HEVC,
151 |         cudaVideoCodec_HEVC, cudaVideoCodec_HEVC, cudaVideoCodec_HEVC, cudaVideoCodec_HEVC, cudaVideoCodec_HEVC, cudaVideoCodec_VC1, cudaVideoCodec_VP8,
152 |         cudaVideoCodec_VP9, cudaVideoCodec_VP9, cudaVideoCodec_VP9, cudaVideoCodec_AV1, cudaVideoCodec_AV1, cudaVideoCodec_AV1, cudaVideoCodec_AV1 };
153 |     int anBitDepthMinus8[] = {0, 0, 0, 0, 0, 0, 2, 4, 0, 2, 4, 0, 0, 0, 2, 4, 0, 2, 0, 2};
154 | 
155 |     cudaVideoChromaFormat aeChromaFormat[] = { cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_420,
156 |         cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_444, cudaVideoChromaFormat_444,
157 |         cudaVideoChromaFormat_444, cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_420,
158 |         cudaVideoChromaFormat_420, cudaVideoChromaFormat_420, cudaVideoChromaFormat_Monochrome, cudaVideoChromaFormat_Monochrome };
159 | 
160 |     for (int iGpu = 0; iGpu < nGpu; iGpu++) {
161 | 
162 |         CUcontext cuContext = NULL;
163 |         createCudaContext(&cuContext, iGpu, 0);
164 | 
165 |         for (int i = 0; i < sizeof(aeCodec) / sizeof(aeCodec[0]); i++) {
166 | 
167 |             CUVIDDECODECAPS decodeCaps = {};
168 |             decodeCaps.eCodecType = aeCodec[i];
169 |             decodeCaps.eChromaFormat = aeChromaFormat[i];
170 |             decodeCaps.nBitDepthMinus8 = anBitDepthMinus8[i];
171 | 
172 |             cuvidGetDecoderCaps(&decodeCaps);
173 | 
174 |             strOutputFormats[0] = '\0';
175 |             getOutputFormatNames(decodeCaps.nOutputFormatMask, strOutputFormats);
176 | 
177 |             // setw() width = maximum_width_of_string + 2 spaces
178 |             std::cout << "Codec  " << std::left << std::setw(7) << aszCodecName[i] <<
179 |                 "BitDepth  " << std::setw(4) << decodeCaps.nBitDepthMinus8 + 8 <<
180 |                 "ChromaFormat  " << std::setw(7) << aszChromaFormat[decodeCaps.eChromaFormat] <<
181 |                 "Supported  " << std::setw(3) << (int)decodeCaps.bIsSupported <<
182 |                 "MaxWidth  " << std::setw(7) << decodeCaps.nMaxWidth <<
183 |                 "MaxHeight  " << std::setw(7) << decodeCaps.nMaxHeight <<
184 |                 "MaxMBCount  " << std::setw(10) << decodeCaps.nMaxMBCount <<
185 |                 "MinWidth  " << std::setw(5) << decodeCaps.nMinWidth <<
186 |                 "MinHeight  " << std::setw(5) << decodeCaps.nMinHeight <<
187 |                 "SurfaceFormat  " << std::setw(11) << strOutputFormats << std::endl;
188 |         }
189 | 
190 |         std::cout << std::endl;
191 | 
192 |         ck(cuCtxDestroy(cuContext));
193 |     }
194 | }
195 | 


--------------------------------------------------------------------------------
/src/cuvid/Logger.cpp:
--------------------------------------------------------------------------------
1 | #include "cuvid/Utils/Logger.h"
2 | 
3 | extern simplelogger::Logger *logger = simplelogger::LoggerFactory::CreateConsoleLogger();
4 | 


--------------------------------------------------------------------------------
/src/cuvid/NvDecoder/NvDecoder.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
  3 | *
  4 | * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 | * with this source code for terms and conditions that govern your use of
  6 | * this software. Any use, reproduction, disclosure, or distribution of
  7 | * this software and related documentation outside the terms of the EULA
  8 | * is strictly prohibited.
  9 | *
 10 | */
 11 | 
 12 | #pragma once
 13 | 
 14 | #include <assert.h>
 15 | #include <stdint.h>
 16 | #include <mutex>
 17 | #include <vector>
 18 | #include <string>
 19 | #include <iostream>
 20 | #include <sstream>
 21 | #include <string.h>
 22 | #include "../nvcuvid.h"
 23 | #include "../Utils/NvCodecUtils.h"
 24 | 
 25 | /**
 26 | * @brief Exception class for error reporting from the decode API.
 27 | */
 28 | class NVDECException : public std::exception
 29 | {
 30 | public:
 31 |     NVDECException(const std::string& errorStr, const CUresult errorCode)
 32 |         : m_errorString(errorStr), m_errorCode(errorCode) {}
 33 | 
 34 |     virtual ~NVDECException() throw() {}
 35 |     virtual const char* what() const throw() { return m_errorString.c_str(); }
 36 |     CUresult  getErrorCode() const { return m_errorCode; }
 37 |     const std::string& getErrorString() const { return m_errorString; }
 38 |     static NVDECException makeNVDECException(const std::string& errorStr, const CUresult errorCode,
 39 |         const std::string& functionName, const std::string& fileName, int lineNo);
 40 | private:
 41 |     std::string m_errorString;
 42 |     CUresult m_errorCode;
 43 | };
 44 | 
 45 | inline NVDECException NVDECException::makeNVDECException(const std::string& errorStr, const CUresult errorCode, const std::string& functionName,
 46 |     const std::string& fileName, int lineNo)
 47 | {
 48 |     std::ostringstream errorLog;
 49 |     errorLog << functionName << " : " << errorStr << " at " << fileName << ":" << lineNo << std::endl;
 50 |     NVDECException exception(errorLog.str(), errorCode);
 51 |     return exception;
 52 | }
 53 | 
 54 | #define NVDEC_THROW_ERROR( errorStr, errorCode )                                                         \
 55 |     do                                                                                                   \
 56 |     {                                                                                                    \
 57 |         throw NVDECException::makeNVDECException(errorStr, errorCode, __FUNCTION__, __FILE__, __LINE__); \
 58 |     } while (0)
 59 | 
 60 | 
 61 | #define NVDEC_API_CALL( cuvidAPI )                                                                                 \
 62 |     do                                                                                                             \
 63 |     {                                                                                                              \
 64 |         CUresult errorCode = cuvidAPI;                                                                             \
 65 |         if( errorCode != CUDA_SUCCESS)                                                                             \
 66 |         {                                                                                                          \
 67 |             std::ostringstream errorLog;                                                                           \
 68 |             errorLog << #cuvidAPI << " returned error " << errorCode;                                              \
 69 |             throw NVDECException::makeNVDECException(errorLog.str(), errorCode, __FUNCTION__, __FILE__, __LINE__); \
 70 |         }                                                                                                          \
 71 |     } while (0)
 72 | 
 73 | struct Rect {
 74 |     int l, t, r, b;
 75 | };
 76 | 
 77 | struct Dim {
 78 |     int w, h;
 79 | };
 80 | 
 81 | /**
 82 | * @brief Base class for decoder interface.
 83 | */
 84 | class NvDecoder {
 85 | public:
 86 |     int *decodeResult = nullptr;
 87 | public:
 88 |     /**
 89 |     *  @brief This function is used to initialize the decoder session.
 90 |     *  Application must call this function to initialize the decoder, before
 91 |     *  starting to decode any frames.
 92 |     */
 93 |     NvDecoder(CUcontext cuContext, bool bUseDeviceFrame, cudaVideoCodec eCodec, bool bLowLatency = false,
 94 |               bool bDeviceFramePitched = false, const Rect *pCropRect = NULL, const Dim *pResizeDim = NULL,
 95 |               int maxWidth = 0, int maxHeight = 0, unsigned int clkRate = 1000);
 96 |     ~NvDecoder();
 97 | 
 98 |     /**
 99 |     *  @brief  This function is used to get the current CUDA context.
100 |     */
101 |     CUcontext GetContext() { return m_cuContext; }
102 | 
103 |     /**
104 |     *  @brief  This function is used to get the output frame width.
105 |     *  NV12/P016 output format width is 2 byte aligned because of U and V interleave
106 |     */
107 |     int GetWidth() { assert(m_nWidth); return (m_eOutputFormat == cudaVideoSurfaceFormat_NV12 || m_eOutputFormat == cudaVideoSurfaceFormat_P016) 
108 |                                                 ? (m_nWidth + 1) & ~1 : m_nWidth; }
109 | 
110 |     /**
111 |     *  @brief  This function is used to get the actual decode width
112 |     */
113 |     int GetDecodeWidth() { assert(m_nWidth); return m_nWidth; }
114 | 
115 |     /**
116 |     *  @brief  This function is used to get the output frame height (Luma height).
117 |     */
118 |     int GetHeight() { assert(m_nLumaHeight); return m_nLumaHeight; }
119 | 
120 |     /**
121 |     *  @brief  This function is used to get the current chroma height.
122 |     */
123 |     int GetChromaHeight() { assert(m_nChromaHeight); return m_nChromaHeight; }
124 | 
125 |     /**
126 |     *  @brief  This function is used to get the number of chroma planes.
127 |     */
128 |     int GetNumChromaPlanes() { assert(m_nNumChromaPlanes); return m_nNumChromaPlanes; }
129 |     
130 |     /**
131 |     *   @brief  This function is used to get the current frame size based on pixel format.
132 |     */
133 |     int GetFrameSize() { assert(m_nWidth); return GetWidth() * (m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes)) * m_nBPP; }
134 | 
135 |     /**
136 |     *   @brief  This function is used to get the current frame Luma plane size.
137 |     */
138 |     int GetLumaPlaneSize() { assert(m_nWidth); return GetWidth() * m_nLumaHeight * m_nBPP; }
139 | 
140 |     /**
141 |     *   @brief  This function is used to get the current frame chroma plane size.
142 |     */
143 |     int GetChromaPlaneSize() { assert(m_nWidth); return GetWidth() *  (m_nChromaHeight * m_nNumChromaPlanes) * m_nBPP; }
144 | 
145 |     /**
146 |     *  @brief  This function is used to get the pitch of the device buffer holding the decoded frame.
147 |     */
148 |     int GetDeviceFramePitch() { assert(m_nWidth); return m_nDeviceFramePitch ? (int)m_nDeviceFramePitch : GetWidth() * m_nBPP; }
149 | 
150 |     /**
151 |     *   @brief  This function is used to get the bit depth associated with the pixel format.
152 |     */
153 |     int GetBitDepth() { assert(m_nWidth); return m_nBitDepthMinus8 + 8; }
154 | 
155 |     /**
156 |     *   @brief  This function is used to get the bytes used per pixel.
157 |     */
158 |     int GetBPP() { assert(m_nWidth); return m_nBPP; }
159 | 
160 |     /**
161 |     *   @brief  This function is used to get the YUV chroma format
162 |     */
163 |     cudaVideoSurfaceFormat GetOutputFormat() { return m_eOutputFormat; }
164 | 
165 |     /**
166 |     *   @brief  This function is used to get information about the video stream (codec, display parameters etc)
167 |     */
168 |     CUVIDEOFORMAT GetVideoFormatInfo() { assert(m_nWidth); return m_videoFormat; }
169 | 
170 |     /**
171 |     *   @brief  This function is used to get codec string from codec id
172 |     */
173 |     const char *GetCodecString(cudaVideoCodec eCodec);
174 | 
175 |     /**
176 |     *   @brief  This function is used to print information about the video stream
177 |     */
178 |     std::string GetVideoInfo() const { return m_videoInfo.str(); }
179 | 
180 |     /**
181 |     *   @brief  This function decodes a frame and returns the number of frames that are available for
182 |     *   display. All frames that are available for display should be read before making a subsequent decode call.
183 |     *   @param  pData - pointer to the data buffer that is to be decoded
184 |     *   @param  nSize - size of the data buffer in bytes
185 |     *   @param  nFlags - CUvideopacketflags for setting decode options
186 |     *   @param  nTimestamp - presentation timestamp
187 |     */
188 |     int Decode(const uint8_t *pData, int nSize, int nFlags = 0, int64_t nTimestamp = 0);
189 | 
190 |     /**
191 |     *   @brief  This function returns a decoded frame and timestamp. This function should be called in a loop for
192 |     *   fetching all the frames that are available for display.
193 |     */
194 |     uint8_t* GetFrame(int64_t* pTimestamp = nullptr);
195 | 
196 | 
197 |     /**
198 |     *   @brief  This function decodes a frame and returns the locked frame buffers
199 |     *   This makes the buffers available for use by the application without the buffers
200 |     *   getting overwritten, even if subsequent decode calls are made. The frame buffers
201 |     *   remain locked, until UnlockFrame() is called
202 |     */
203 |     uint8_t* GetLockedFrame(int64_t* pTimestamp = nullptr);
204 | 
205 |     /**
206 |     *   @brief  This function unlocks the frame buffer and makes the frame buffers available for write again
207 |     *   @param  ppFrame - pointer to array of frames that are to be unlocked	
208 |     *   @param  nFrame - number of frames to be unlocked
209 |     */
210 |     void UnlockFrame(uint8_t **pFrame);
211 | 
212 |     /**
213 |     *   @brief  This function allows app to set decoder reconfig params
214 |     *   @param  pCropRect - cropping rectangle coordinates
215 |     *   @param  pResizeDim - width and height of resized output
216 |     */
217 |     int setReconfigParams(const Rect * pCropRect, const Dim * pResizeDim);
218 | 
219 |     /**
220 |     *   @brief  This function allows app to set operating point for AV1 SVC clips
221 |     *   @param  opPoint - operating point of an AV1 scalable bitstream
222 |     *   @param  bDispAllLayers - Output all decoded frames of an AV1 scalable bitstream
223 |     */
224 |     void SetOperatingPoint(const uint32_t opPoint, const bool bDispAllLayers) { m_nOperatingPoint = opPoint; m_bDispAllLayers = bDispAllLayers; }
225 | 
226 |     // start a timer
227 |     void   startTimer() { m_stDecode_time.Start(); }
228 | 
229 |     // stop the timer
230 |     double stopTimer() { return m_stDecode_time.Stop(); }
231 | private:
232 |     /**
233 |     *   @brief  Callback function to be registered for getting a callback when decoding of sequence starts
234 |     */
235 |     static int CUDAAPI HandleVideoSequenceProc(void *pUserData, CUVIDEOFORMAT *pVideoFormat) { return ((NvDecoder *)pUserData)->HandleVideoSequence(pVideoFormat); }
236 | 
237 |     /**
238 |     *   @brief  Callback function to be registered for getting a callback when a decoded frame is ready to be decoded
239 |     */
240 |     static int CUDAAPI HandlePictureDecodeProc(void *pUserData, CUVIDPICPARAMS *pPicParams) { return ((NvDecoder *)pUserData)->HandlePictureDecode(pPicParams); }
241 | 
242 |     /**
243 |     *   @brief  Callback function to be registered for getting a callback when a decoded frame is available for display
244 |     */
245 |     static int CUDAAPI HandlePictureDisplayProc(void *pUserData, CUVIDPARSERDISPINFO *pDispInfo) { return ((NvDecoder *)pUserData)->HandlePictureDisplay(pDispInfo); }
246 | 
247 |     /**
248 |     *   @brief  Callback function to be registered for getting a callback to get operating point when AV1 SVC sequence header start.
249 |     */
250 |     static int CUDAAPI HandleOperatingPointProc(void *pUserData, CUVIDOPERATINGPOINTINFO *pOPInfo) { return ((NvDecoder *)pUserData)->GetOperatingPoint(pOPInfo); }
251 | 
252 |     /**
253 |     *   @brief  This function gets called when a sequence is ready to be decoded. The function also gets called
254 |         when there is format change
255 |     */
256 |     int HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat);
257 | 
258 |     /**
259 |     *   @brief  This function gets called when a picture is ready to be decoded. cuvidDecodePicture is called from this function
260 |     *   to decode the picture
261 |     */
262 |     int HandlePictureDecode(CUVIDPICPARAMS *pPicParams);
263 | 
264 |     /**
265 |     *   @brief  This function gets called after a picture is decoded and available for display. Frames are fetched and stored in 
266 |         internal buffer
267 |     */
268 |     int HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo);
269 | 
270 |     /**
271 |     *   @brief  This function gets called when AV1 sequence encounter more than one operating points
272 |     */
273 |     int GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo);
274 |     /**
275 |     *   @brief  This function reconfigure decoder if there is a change in sequence params.
276 |     */
277 |     int ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat);
278 | 
279 | private:
280 |     CUcontext m_cuContext = NULL;
281 |     CUvideoctxlock m_ctxLock;
282 |     CUvideoparser m_hParser = NULL;
283 |     CUvideodecoder m_hDecoder = NULL;
284 |     bool m_bUseDeviceFrame;
285 |     // dimension of the output
286 |     unsigned int m_nWidth = 0, m_nLumaHeight = 0, m_nChromaHeight = 0;
287 |     unsigned int m_nNumChromaPlanes = 0;
288 |     // height of the mapped surface 
289 |     int m_nSurfaceHeight = 0;
290 |     int m_nSurfaceWidth = 0;
291 |     cudaVideoCodec m_eCodec = cudaVideoCodec_NumCodecs;
292 |     cudaVideoChromaFormat m_eChromaFormat = cudaVideoChromaFormat_420;
293 |     cudaVideoSurfaceFormat m_eOutputFormat = cudaVideoSurfaceFormat_NV12;
294 |     int m_nBitDepthMinus8 = 0;
295 |     int m_nBPP = 1;
296 |     CUVIDEOFORMAT m_videoFormat = {};
297 |     Rect m_displayRect = {};
298 |     // stock of frames
299 |     std::vector<uint8_t *> m_vpFrame;
300 |     // timestamps of decoded frames
301 |     std::vector<int64_t> m_vTimestamp;
302 |     int m_nDecodedFrame = 0, m_nDecodedFrameReturned = 0;
303 |     int m_nDecodePicCnt = 0, m_nPicNumInDecodeOrder[32];
304 |     bool m_bEndDecodeDone = false;
305 |     std::mutex m_mtxVPFrame;
306 |     int m_nFrameAlloc = 0;
307 |     CUstream m_cuvidStream = 0;
308 |     bool m_bDeviceFramePitched = false;
309 |     size_t m_nDeviceFramePitch = 0;
310 |     Rect m_cropRect = {};
311 |     Dim m_resizeDim = {};
312 | 
313 |     std::ostringstream m_videoInfo;
314 |     unsigned int m_nMaxWidth = 0, m_nMaxHeight = 0;
315 |     bool m_bReconfigExternal = false;
316 |     bool m_bReconfigExtPPChange = false;
317 |     StopWatch m_stDecode_time;
318 | 
319 |     unsigned int m_nOperatingPoint = 0;
320 |     bool  m_bDispAllLayers = false;
321 | };
322 | 


--------------------------------------------------------------------------------
/src/cuvid/NvEncoder/NvEncoder.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
  3 | *
  4 | * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 | * with this source code for terms and conditions that govern your use of
  6 | * this software. Any use, reproduction, disclosure, or distribution of
  7 | * this software and related documentation outside the terms of the EULA
  8 | * is strictly prohibited.
  9 | *
 10 | */
 11 | 
 12 | #pragma once
 13 | 
 14 | #include <vector>
 15 | #include "nvEncodeAPI.h"
 16 | #include <stdint.h>
 17 | #include <mutex>
 18 | #include <string>
 19 | #include <iostream>
 20 | #include <sstream>
 21 | #include <string.h>
 22 | 
 23 | /**
 24 | * @brief Exception class for error reporting from NvEncodeAPI calls.
 25 | */
 26 | class NVENCException : public std::exception
 27 | {
 28 | public:
 29 |     NVENCException(const std::string& errorStr, const NVENCSTATUS errorCode)
 30 |         : m_errorString(errorStr), m_errorCode(errorCode) {}
 31 | 
 32 |     virtual ~NVENCException() throw() {}
 33 |     virtual const char* what() const throw() { return m_errorString.c_str(); }
 34 |     NVENCSTATUS  getErrorCode() const { return m_errorCode; }
 35 |     const std::string& getErrorString() const { return m_errorString; }
 36 |     static NVENCException makeNVENCException(const std::string& errorStr, const NVENCSTATUS errorCode,
 37 |         const std::string& functionName, const std::string& fileName, int lineNo);
 38 | private:
 39 |     std::string m_errorString;
 40 |     NVENCSTATUS m_errorCode;
 41 | };
 42 | 
 43 | inline NVENCException NVENCException::makeNVENCException(const std::string& errorStr, const NVENCSTATUS errorCode, const std::string& functionName,
 44 |     const std::string& fileName, int lineNo)
 45 | {
 46 |     std::ostringstream errorLog;
 47 |     errorLog << functionName << " : " << errorStr << " at " << fileName << ":" << lineNo << std::endl;
 48 |     NVENCException exception(errorLog.str(), errorCode);
 49 |     return exception;
 50 | }
 51 | 
 52 | #define NVENC_THROW_ERROR( errorStr, errorCode )                                                         \
 53 |     do                                                                                                   \
 54 |     {                                                                                                    \
 55 |         throw NVENCException::makeNVENCException(errorStr, errorCode, __FUNCTION__, __FILE__, __LINE__); \
 56 |     } while (0)
 57 | 
 58 | 
 59 | #define NVENC_API_CALL( nvencAPI )                                                                                 \
 60 |     do                                                                                                             \
 61 |     {                                                                                                              \
 62 |         NVENCSTATUS errorCode = nvencAPI;                                                                          \
 63 |         if( errorCode != NV_ENC_SUCCESS)                                                                           \
 64 |         {                                                                                                          \
 65 |             std::ostringstream errorLog;                                                                           \
 66 |             errorLog << #nvencAPI << " returned error " << errorCode;                                              \
 67 |             throw NVENCException::makeNVENCException(errorLog.str(), errorCode, __FUNCTION__, __FILE__, __LINE__); \
 68 |         }                                                                                                          \
 69 |     } while (0)
 70 | 
 71 | struct NvEncInputFrame
 72 | {
 73 |     void* inputPtr = nullptr;
 74 |     uint32_t chromaOffsets[2];
 75 |     uint32_t numChromaPlanes;
 76 |     uint32_t pitch;
 77 |     uint32_t chromaPitch;
 78 |     NV_ENC_BUFFER_FORMAT bufferFormat;
 79 |     NV_ENC_INPUT_RESOURCE_TYPE resourceType;
 80 | };
 81 | 
 82 | /**
 83 | * @brief Shared base class for different encoder interfaces.
 84 | */
 85 | class NvEncoder
 86 | {
 87 | public:
 88 |     /**
 89 |     *  @brief This function is used to initialize the encoder session.
 90 |     *  Application must call this function to initialize the encoder, before
 91 |     *  starting to encode any frames.
 92 |     */
 93 |     void CreateEncoder(const NV_ENC_INITIALIZE_PARAMS* pEncodeParams);
 94 | 
 95 |     /**
 96 |     *  @brief  This function is used to destroy the encoder session.
 97 |     *  Application must call this function to destroy the encoder session and
 98 |     *  clean up any allocated resources. The application must call EndEncode()
 99 |     *  function to get any queued encoded frames before calling DestroyEncoder().
100 |     */
101 |     void DestroyEncoder();
102 | 
103 |     /**
104 |     *  @brief  This function is used to reconfigure an existing encoder session.
105 |     *  Application can use this function to dynamically change the bitrate,
106 |     *  resolution and other QOS parameters. If the application changes the
107 |     *  resolution, it must set NV_ENC_RECONFIGURE_PARAMS::forceIDR.
108 |     */
109 |     bool Reconfigure(const NV_ENC_RECONFIGURE_PARAMS *pReconfigureParams);
110 | 
111 |     /**
112 |     *  @brief  This function is used to get the next available input buffer.
113 |     *  Applications must call this function to obtain a pointer to the next
114 |     *  input buffer. The application must copy the uncompressed data to the
115 |     *  input buffer and then call EncodeFrame() function to encode it.
116 |     */
117 |     const NvEncInputFrame* GetNextInputFrame();
118 | 
119 | 
120 |     /**
121 |     *  @brief  This function is used to encode a frame.
122 |     *  Applications must call EncodeFrame() function to encode the uncompressed
123 |     *  data, which has been copied to an input buffer obtained from the
124 |     *  GetNextInputFrame() function.
125 |     */
126 |     void EncodeFrame(std::vector<std::vector<uint8_t>> &vPacket, NV_ENC_PIC_PARAMS *pPicParams = nullptr);
127 | 
128 |     /**
129 |     *  @brief  This function to flush the encoder queue.
130 |     *  The encoder might be queuing frames for B picture encoding or lookahead;
131 |     *  the application must call EndEncode() to get all the queued encoded frames
132 |     *  from the encoder. The application must call this function before destroying
133 |     *  an encoder session.
134 |     */
135 |     void EndEncode(std::vector<std::vector<uint8_t>> &vPacket);
136 | 
137 |     /**
138 |     *  @brief  This function is used to query hardware encoder capabilities.
139 |     *  Applications can call this function to query capabilities like maximum encode
140 |     *  dimensions, support for lookahead or the ME-only mode etc.
141 |     */
142 |     int GetCapabilityValue(GUID guidCodec, NV_ENC_CAPS capsToQuery);
143 | 
144 |     /**
145 |     *  @brief  This function is used to get the current device on which encoder is running.
146 |     */
147 |     void *GetDevice() const { return m_pDevice; }
148 | 
149 |     /**
150 |     *  @brief  This function is used to get the current device type which encoder is running.
151 |     */
152 |     NV_ENC_DEVICE_TYPE GetDeviceType() const { return m_eDeviceType; }
153 | 
154 |     /**
155 |     *  @brief  This function is used to get the current encode width.
156 |     *  The encode width can be modified by Reconfigure() function.
157 |     */
158 |     int GetEncodeWidth() const { return m_nWidth; }
159 | 
160 |     /**
161 |     *  @brief  This function is used to get the current encode height.
162 |     *  The encode height can be modified by Reconfigure() function.
163 |     */
164 |     int GetEncodeHeight() const { return m_nHeight; }
165 | 
166 |     /**
167 |     *   @brief  This function is used to get the current frame size based on pixel format.
168 |     */
169 |     int GetFrameSize() const;
170 | 
171 |     /**
172 |     *  @brief  This function is used to initialize config parameters based on
173 |     *          given codec and preset guids.
174 |     *  The application can call this function to get the default configuration
175 |     *  for a certain preset. The application can either use these parameters
176 |     *  directly or override them with application-specific settings before
177 |     *  using them in CreateEncoder() function.
178 |     */
179 |     void CreateDefaultEncoderParams(NV_ENC_INITIALIZE_PARAMS* pIntializeParams, GUID codecGuid, GUID presetGuid, NV_ENC_TUNING_INFO tuningInfo = NV_ENC_TUNING_INFO_UNDEFINED);
180 | 
181 |     /**
182 |     *  @brief  This function is used to get the current initialization parameters,
183 |     *          which had been used to configure the encoder session.
184 |     *  The initialization parameters are modified if the application calls
185 |     *  Reconfigure() function.
186 |     */
187 |     void GetInitializeParams(NV_ENC_INITIALIZE_PARAMS *pInitializeParams);
188 | 
189 |     /**
190 |     *  @brief  This function is used to run motion estimation
191 |     *  This is used to run motion estimation on a a pair of frames. The
192 |     *  application must copy the reference frame data to the buffer obtained
193 |     *  by calling GetNextReferenceFrame(), and copy the input frame data to
194 |     *  the buffer obtained by calling GetNextInputFrame() before calling the
195 |     *  RunMotionEstimation() function.
196 |     */
197 |     void RunMotionEstimation(std::vector<uint8_t> &mvData);
198 | 
199 |     /**
200 |     *  @brief This function is used to get an available reference frame.
201 |     *  Application must call this function to get a pointer to reference buffer,
202 |     *  to be used in the subsequent RunMotionEstimation() function.
203 |     */
204 |     const NvEncInputFrame* GetNextReferenceFrame();
205 | 
206 |     /**
207 |     *  @brief This function is used to get sequence and picture parameter headers.
208 |     *  Application can call this function after encoder is initialized to get SPS and PPS
209 |     *  nalus for the current encoder instance. The sequence header data might change when
210 |     *  application calls Reconfigure() function.
211 |     */
212 |     void GetSequenceParams(std::vector<uint8_t> &seqParams);
213 | 
214 |     /**
215 |     *  @brief  NvEncoder class virtual destructor.
216 |     */
217 |     virtual ~NvEncoder();
218 | 
219 | public:
220 |     /**
221 |     *  @brief This a static function to get chroma offsets for YUV planar formats.
222 |     */
223 |     static void GetChromaSubPlaneOffsets(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t pitch,
224 |                                         const uint32_t height, std::vector<uint32_t>& chromaOffsets);
225 |     /**
226 |     *  @brief This a static function to get the chroma plane pitch for YUV planar formats.
227 |     */
228 |     static uint32_t GetChromaPitch(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaPitch);
229 | 
230 |     /**
231 |     *  @brief This a static function to get the number of chroma planes for YUV planar formats.
232 |     */
233 |     static uint32_t GetNumChromaPlanes(const NV_ENC_BUFFER_FORMAT bufferFormat);
234 | 
235 |     /**
236 |     *  @brief This a static function to get the chroma plane width in bytes for YUV planar formats.
237 |     */
238 |     static uint32_t GetChromaWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaWidth);
239 | 
240 |     /**
241 |     *  @brief This a static function to get the chroma planes height in bytes for YUV planar formats.
242 |     */
243 |     static uint32_t GetChromaHeight(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaHeight);
244 | 
245 | 
246 |     /**
247 |     *  @brief This a static function to get the width in bytes for the frame.
248 |     *  For YUV planar format this is the width in bytes of the luma plane.
249 |     */
250 |     static uint32_t GetWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t width);
251 | 
252 |     /**
253 |     *  @brief This function returns the number of allocated buffers.
254 |     */
255 |     uint32_t GetEncoderBufferCount() const { return m_nEncoderBuffer; }
256 | protected:
257 | 
258 |     /**
259 |     *  @brief  NvEncoder class constructor.
260 |     *  NvEncoder class constructor cannot be called directly by the application.
261 |     */
262 |     NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, void *pDevice, uint32_t nWidth, uint32_t nHeight,
263 |         NV_ENC_BUFFER_FORMAT eBufferFormat, uint32_t nOutputDelay, bool bMotionEstimationOnly, bool bOutputInVideoMemory = false);
264 | 
265 |     /**
266 |     *  @brief This function is used to check if hardware encoder is properly initialized.
267 |     */
268 |     bool IsHWEncoderInitialized() const { return m_hEncoder != NULL && m_bEncoderInitialized; }
269 | 
270 |     /**
271 |     *  @brief This function is used to register CUDA, D3D or OpenGL input buffers with NvEncodeAPI.
272 |     *  This is non public function and is called by derived class for allocating
273 |     *  and registering input buffers.
274 |     */
275 |     void RegisterInputResources(std::vector<void*> inputframes, NV_ENC_INPUT_RESOURCE_TYPE eResourceType,
276 |         int width, int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat, bool bReferenceFrame = false);
277 | 
278 |     /**
279 |     *  @brief This function is used to unregister resources which had been previously registered for encoding
280 |     *         using RegisterInputResources() function.
281 |     */
282 |     void UnregisterInputResources();
283 | 
284 |     /**
285 |     *  @brief This function is used to register CUDA, D3D or OpenGL input or output buffers with NvEncodeAPI.
286 |     */
287 |     NV_ENC_REGISTERED_PTR RegisterResource(void *pBuffer, NV_ENC_INPUT_RESOURCE_TYPE eResourceType,
288 |         int width, int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat, NV_ENC_BUFFER_USAGE bufferUsage = NV_ENC_INPUT_IMAGE);
289 | 
290 |     /**
291 |     *  @brief This function returns maximum width used to open the encoder session.
292 |     *  All encode input buffers are allocated using maximum dimensions.
293 |     */
294 |     uint32_t GetMaxEncodeWidth() const { return m_nMaxEncodeWidth; }
295 | 
296 |     /**
297 |     *  @brief This function returns maximum height used to open the encoder session.
298 |     *  All encode input buffers are allocated using maximum dimensions.
299 |     */
300 |     uint32_t GetMaxEncodeHeight() const { return m_nMaxEncodeHeight; }
301 | 
302 |     /**
303 |     *  @brief This function returns the completion event.
304 |     */
305 |     void* GetCompletionEvent(uint32_t eventIdx) { return (m_vpCompletionEvent.size() == m_nEncoderBuffer) ? m_vpCompletionEvent[eventIdx] : nullptr; }
306 | 
307 |     /**
308 |     *  @brief This function returns the current pixel format.
309 |     */
310 |     NV_ENC_BUFFER_FORMAT GetPixelFormat() const { return m_eBufferFormat; }
311 | 
312 |     /**
313 |     *  @brief This function is used to submit the encode commands to the  
314 |     *         NVENC hardware.
315 |     */
316 |     NVENCSTATUS DoEncode(NV_ENC_INPUT_PTR inputBuffer, NV_ENC_OUTPUT_PTR outputBuffer, NV_ENC_PIC_PARAMS *pPicParams);
317 | 
318 |     /**
319 |     *  @brief This function is used to submit the encode commands to the 
320 |     *         NVENC hardware for ME only mode.
321 |     */
322 |     NVENCSTATUS DoMotionEstimation(NV_ENC_INPUT_PTR inputBuffer, NV_ENC_INPUT_PTR inputBufferForReference, NV_ENC_OUTPUT_PTR outputBuffer);
323 | 
324 |     /**
325 |     *  @brief This function is used to map the input buffers to NvEncodeAPI.
326 |     */
327 |     void MapResources(uint32_t bfrIdx);
328 | 
329 |     /**
330 |     *  @brief This function is used to wait for completion of encode command.
331 |     */
332 |     void WaitForCompletionEvent(int iEvent);
333 | 
334 |     /**
335 |     *  @brief This function is used to send EOS to HW encoder.
336 |     */
337 |     void SendEOS();
338 | 
339 | private:
340 |     /**
341 |     *  @brief This is a private function which is used to check if there is any
342 |               buffering done by encoder.
343 |     *  The encoder generally buffers data to encode B frames or for lookahead
344 |     *  or pipelining.
345 |     */
346 |     bool IsZeroDelay() { return m_nOutputDelay == 0; }
347 | 
348 |     /**
349 |     *  @brief This is a private function which is used to load the encode api shared library.
350 |     */
351 |     void LoadNvEncApi();
352 | 
353 |     /**
354 |     *  @brief This is a private function which is used to get the output packets
355 |     *         from the encoder HW.
356 |     *  This is called by DoEncode() function. If there is buffering enabled,
357 |     *  this may return without any output data.
358 |     */
359 |     void GetEncodedPacket(std::vector<NV_ENC_OUTPUT_PTR> &vOutputBuffer, std::vector<std::vector<uint8_t>> &vPacket, bool bOutputDelay);
360 | 
361 |     /**
362 |     *  @brief This is a private function which is used to initialize the bitstream buffers.
363 |     *  This is only used in the encoding mode.
364 |     */
365 |     void InitializeBitstreamBuffer();
366 | 
367 |     /**
368 |     *  @brief This is a private function which is used to destroy the bitstream buffers.
369 |     *  This is only used in the encoding mode.
370 |     */
371 |     void DestroyBitstreamBuffer();
372 | 
373 |     /**
374 |     *  @brief This is a private function which is used to initialize MV output buffers.
375 |     *  This is only used in ME-only Mode.
376 |     */
377 |     void InitializeMVOutputBuffer();
378 | 
379 |     /**
380 |     *  @brief This is a private function which is used to destroy MV output buffers.
381 |     *  This is only used in ME-only Mode.
382 |     */
383 |     void DestroyMVOutputBuffer();
384 | 
385 |     /**
386 |     *  @brief This is a private function which is used to destroy HW encoder.
387 |     */
388 |     void DestroyHWEncoder();
389 | 
390 |     /**
391 |     *  @brief This function is used to flush the encoder queue.
392 |     */
393 |     void FlushEncoder();
394 | 
395 | private:
396 |     /**
397 |     *  @brief This is a pure virtual function which is used to allocate input buffers.
398 |     *  The derived classes must implement this function.
399 |     */
400 |     virtual void AllocateInputBuffers(int32_t numInputBuffers) = 0;
401 | 
402 |     /**
403 |     *  @brief This is a pure virtual function which is used to destroy input buffers.
404 |     *  The derived classes must implement this function.
405 |     */
406 |     virtual void ReleaseInputBuffers() = 0;
407 | 
408 | protected:
409 |     bool m_bMotionEstimationOnly = false;
410 |     bool m_bOutputInVideoMemory = false;
411 |     void *m_hEncoder = nullptr;
412 |     NV_ENCODE_API_FUNCTION_LIST m_nvenc;
413 |     std::vector<NvEncInputFrame> m_vInputFrames;
414 |     std::vector<NV_ENC_REGISTERED_PTR> m_vRegisteredResources;
415 |     std::vector<NvEncInputFrame> m_vReferenceFrames;
416 |     std::vector<NV_ENC_REGISTERED_PTR> m_vRegisteredResourcesForReference;
417 |     std::vector<NV_ENC_INPUT_PTR> m_vMappedInputBuffers;
418 |     std::vector<NV_ENC_INPUT_PTR> m_vMappedRefBuffers;
419 |     std::vector<void *> m_vpCompletionEvent;
420 | 
421 |     int32_t m_iToSend = 0;
422 |     int32_t m_iGot = 0;
423 |     int32_t m_nEncoderBuffer = 0;
424 |     int32_t m_nOutputDelay = 0;
425 | 
426 | private:
427 |     uint32_t m_nWidth;
428 |     uint32_t m_nHeight;
429 |     NV_ENC_BUFFER_FORMAT m_eBufferFormat;
430 |     void *m_pDevice;
431 |     NV_ENC_DEVICE_TYPE m_eDeviceType;
432 |     NV_ENC_INITIALIZE_PARAMS m_initializeParams = {};
433 |     NV_ENC_CONFIG m_encodeConfig = {};
434 |     bool m_bEncoderInitialized = false;
435 |     uint32_t m_nExtraOutputDelay = 3; // To ensure encode and graphics can work in parallel, m_nExtraOutputDelay should be set to at least 1
436 |     std::vector<NV_ENC_OUTPUT_PTR> m_vBitstreamOutputBuffer;
437 |     std::vector<NV_ENC_OUTPUT_PTR> m_vMVDataOutputBuffer;
438 |     uint32_t m_nMaxEncodeWidth = 0;
439 |     uint32_t m_nMaxEncodeHeight = 0;
440 | };
441 | 


--------------------------------------------------------------------------------
/src/cuvid/NvEncoder/NvEncoderCuda.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
  3 | *
  4 | * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 | * with this source code for terms and conditions that govern your use of
  6 | * this software. Any use, reproduction, disclosure, or distribution of
  7 | * this software and related documentation outside the terms of the EULA
  8 | * is strictly prohibited.
  9 | *
 10 | */
 11 | 
 12 | #include "NvEncoder/NvEncoderCuda.h"
 13 | 
 14 | 
 15 | NvEncoderCuda::NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat,
 16 |     uint32_t nExtraOutputDelay, bool bMotionEstimationOnly, bool bOutputInVideoMemory):
 17 |     NvEncoder(NV_ENC_DEVICE_TYPE_CUDA, cuContext, nWidth, nHeight, eBufferFormat, nExtraOutputDelay, bMotionEstimationOnly, bOutputInVideoMemory),
 18 |     m_cuContext(cuContext)
 19 | {
 20 |     if (!m_hEncoder) 
 21 |     {
 22 |         NVENC_THROW_ERROR("Encoder Initialization failed", NV_ENC_ERR_INVALID_DEVICE);
 23 |     }
 24 | 
 25 |     if (!m_cuContext)
 26 |     {
 27 |         NVENC_THROW_ERROR("Invalid Cuda Context", NV_ENC_ERR_INVALID_DEVICE);
 28 |     }
 29 | }
 30 | 
 31 | NvEncoderCuda::~NvEncoderCuda()
 32 | {
 33 |     ReleaseCudaResources();
 34 | }
 35 | 
 36 | void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers)
 37 | {
 38 |     if (!IsHWEncoderInitialized())
 39 |     {
 40 |         NVENC_THROW_ERROR("Encoder intialization failed", NV_ENC_ERR_ENCODER_NOT_INITIALIZED);
 41 |     }
 42 | 
 43 |     // for MEOnly mode we need to allocate seperate set of buffers for reference frame
 44 |     int numCount = m_bMotionEstimationOnly ? 2 : 1;
 45 | 
 46 |     for (int count = 0; count < numCount; count++)
 47 |     {
 48 |         CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
 49 |         std::vector<void*> inputFrames;
 50 |         for (int i = 0; i < numInputBuffers; i++)
 51 |         {
 52 |             CUdeviceptr pDeviceFrame;
 53 |             uint32_t chromaHeight = GetNumChromaPlanes(GetPixelFormat()) * GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight());
 54 |             if (GetPixelFormat() == NV_ENC_BUFFER_FORMAT_YV12 || GetPixelFormat() == NV_ENC_BUFFER_FORMAT_IYUV)
 55 |                 chromaHeight = GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight());
 56 |             CUDA_DRVAPI_CALL(cuMemAllocPitch((CUdeviceptr *)&pDeviceFrame,
 57 |                 &m_cudaPitch,
 58 |                 GetWidthInBytes(GetPixelFormat(), GetMaxEncodeWidth()),
 59 |                 GetMaxEncodeHeight() + chromaHeight, 16));
 60 |             inputFrames.push_back((void*)pDeviceFrame);
 61 |         }
 62 |         CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
 63 | 
 64 |         RegisterInputResources(inputFrames,
 65 |             NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR,
 66 |             GetMaxEncodeWidth(),
 67 |             GetMaxEncodeHeight(),
 68 |             (int)m_cudaPitch,
 69 |             GetPixelFormat(),
 70 |             (count == 1) ? true : false);
 71 |     }
 72 | }
 73 | 
 74 | void NvEncoderCuda::SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream, NV_ENC_CUSTREAM_PTR outputStream)
 75 | {
 76 |     NVENC_API_CALL(m_nvenc.nvEncSetIOCudaStreams(m_hEncoder, inputStream, outputStream));
 77 | }
 78 | 
 79 | void NvEncoderCuda::ReleaseInputBuffers()
 80 | {
 81 |     ReleaseCudaResources();
 82 | }
 83 | 
 84 | void NvEncoderCuda::ReleaseCudaResources()
 85 | {
 86 |     if (!m_hEncoder)
 87 |     {
 88 |         return;
 89 |     }
 90 | 
 91 |     if (!m_cuContext)
 92 |     {
 93 |         return;
 94 |     }
 95 | 
 96 |     UnregisterInputResources();
 97 | 
 98 |     cuCtxPushCurrent(m_cuContext);
 99 | 
100 |     for (uint32_t i = 0; i < m_vInputFrames.size(); ++i)
101 |     {
102 |         if (m_vInputFrames[i].inputPtr)
103 |         {
104 |             cuMemFree(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr));
105 |         }
106 |     }
107 |     m_vInputFrames.clear();
108 | 
109 |     for (uint32_t i = 0; i < m_vReferenceFrames.size(); ++i)
110 |     {
111 |         if (m_vReferenceFrames[i].inputPtr)
112 |         {
113 |             cuMemFree(reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr));
114 |         }
115 |     }
116 |     m_vReferenceFrames.clear();
117 | 
118 |     cuCtxPopCurrent(NULL);
119 |     m_cuContext = nullptr;
120 | }
121 | 
122 | void NvEncoderCuda::CopyToDeviceFrame(CUcontext device,
123 |     void* pSrcFrame,
124 |     uint32_t nSrcPitch,
125 |     CUdeviceptr pDstFrame,
126 |     uint32_t dstPitch,
127 |     int width,
128 |     int height,
129 |     CUmemorytype srcMemoryType,
130 |     NV_ENC_BUFFER_FORMAT pixelFormat,
131 |     const uint32_t dstChromaOffsets[],
132 |     uint32_t numChromaPlanes,
133 |     bool bUnAlignedDeviceCopy,
134 |     CUstream stream)
135 | {
136 |     if (srcMemoryType != CU_MEMORYTYPE_HOST && srcMemoryType != CU_MEMORYTYPE_DEVICE)
137 |     {
138 |         NVENC_THROW_ERROR("Invalid source memory type for copy", NV_ENC_ERR_INVALID_PARAM);
139 |     }
140 | 
141 |     CUDA_DRVAPI_CALL(cuCtxPushCurrent(device));
142 | 
143 |     uint32_t srcPitch = nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
144 |     CUDA_MEMCPY2D m = { 0 };
145 |     m.srcMemoryType = srcMemoryType;
146 |     if (srcMemoryType == CU_MEMORYTYPE_HOST)
147 |     {
148 |         m.srcHost = pSrcFrame;
149 |     }
150 |     else
151 |     {
152 |         m.srcDevice = (CUdeviceptr)pSrcFrame;
153 |     }
154 |     m.srcPitch = srcPitch;
155 |     m.dstMemoryType = CU_MEMORYTYPE_DEVICE;
156 |     m.dstDevice = pDstFrame;
157 |     m.dstPitch = dstPitch;
158 |     m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
159 |     m.Height = height;
160 |     if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE)
161 |     {
162 |         CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
163 |     }
164 |     else
165 |     {
166 |         CUDA_DRVAPI_CALL(stream == NULL? cuMemcpy2D(&m) : cuMemcpy2DAsync(&m, stream));
167 |     }
168 | 
169 |     std::vector<uint32_t> srcChromaOffsets;
170 |     NvEncoder::GetChromaSubPlaneOffsets(pixelFormat, srcPitch, height, srcChromaOffsets);
171 |     uint32_t chromaHeight = NvEncoder::GetChromaHeight(pixelFormat, height);
172 |     uint32_t destChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, dstPitch);
173 |     uint32_t srcChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, srcPitch);
174 |     uint32_t chromaWidthInBytes = NvEncoder::GetChromaWidthInBytes(pixelFormat, width);
175 | 
176 |     for (uint32_t i = 0; i < numChromaPlanes; ++i)
177 |     {
178 |         if (chromaHeight)
179 |         {
180 |             if (srcMemoryType == CU_MEMORYTYPE_HOST)
181 |             {
182 |                 m.srcHost = ((uint8_t *)pSrcFrame + srcChromaOffsets[i]);
183 |             }
184 |             else
185 |             {
186 |                 m.srcDevice = (CUdeviceptr)((uint8_t *)pSrcFrame + srcChromaOffsets[i]);
187 |             }
188 |             m.srcPitch = srcChromaPitch;
189 | 
190 |             m.dstDevice = (CUdeviceptr)((uint8_t *)pDstFrame + dstChromaOffsets[i]);
191 |             m.dstPitch = destChromaPitch;
192 |             m.WidthInBytes = chromaWidthInBytes;
193 |             m.Height = chromaHeight;
194 |             if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE)
195 |             {
196 |                 CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
197 |             }
198 |             else
199 |             {
200 |                 CUDA_DRVAPI_CALL(stream == NULL? cuMemcpy2D(&m) : cuMemcpy2DAsync(&m, stream));
201 |             }
202 |         }
203 |     }
204 |     CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
205 | }
206 | 
207 | void NvEncoderCuda::CopyToDeviceFrame(CUcontext device,
208 |     void* pSrcFrame,
209 |     uint32_t nSrcPitch,
210 |     CUdeviceptr pDstFrame,
211 |     uint32_t dstPitch,
212 |     int width,
213 |     int height,
214 |     CUmemorytype srcMemoryType,
215 |     NV_ENC_BUFFER_FORMAT pixelFormat,
216 |     CUdeviceptr dstChromaDevicePtrs[],
217 |     uint32_t dstChromaPitch,
218 |     uint32_t numChromaPlanes,
219 |     bool bUnAlignedDeviceCopy)
220 | {
221 |     if (srcMemoryType != CU_MEMORYTYPE_HOST && srcMemoryType != CU_MEMORYTYPE_DEVICE)
222 |     {
223 |         NVENC_THROW_ERROR("Invalid source memory type for copy", NV_ENC_ERR_INVALID_PARAM);
224 |     }
225 | 
226 |     CUDA_DRVAPI_CALL(cuCtxPushCurrent(device));
227 | 
228 |     uint32_t srcPitch = nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
229 |     CUDA_MEMCPY2D m = { 0 };
230 |     m.srcMemoryType = srcMemoryType;
231 |     if (srcMemoryType == CU_MEMORYTYPE_HOST)
232 |     {
233 |         m.srcHost = pSrcFrame;
234 |     }
235 |     else
236 |     {
237 |         m.srcDevice = (CUdeviceptr)pSrcFrame;
238 |     }
239 |     m.srcPitch = srcPitch;
240 |     m.dstMemoryType = CU_MEMORYTYPE_DEVICE;
241 |     m.dstDevice = pDstFrame;
242 |     m.dstPitch = dstPitch;
243 |     m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
244 |     m.Height = height;
245 |     if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE)
246 |     {
247 |         CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
248 |     }
249 |     else
250 |     {
251 |         CUDA_DRVAPI_CALL(cuMemcpy2D(&m));
252 |     }
253 | 
254 |     std::vector<uint32_t> srcChromaOffsets;
255 |     NvEncoder::GetChromaSubPlaneOffsets(pixelFormat, srcPitch, height, srcChromaOffsets);
256 |     uint32_t chromaHeight = NvEncoder::GetChromaHeight(pixelFormat, height);
257 |     uint32_t srcChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, srcPitch);
258 |     uint32_t chromaWidthInBytes = NvEncoder::GetChromaWidthInBytes(pixelFormat, width);
259 | 
260 |     for (uint32_t i = 0; i < numChromaPlanes; ++i)
261 |     {
262 |         if (chromaHeight)
263 |         {
264 |             if (srcMemoryType == CU_MEMORYTYPE_HOST)
265 |             {
266 |                 m.srcHost = ((uint8_t *)pSrcFrame + srcChromaOffsets[i]);
267 |             }
268 |             else
269 |             {
270 |                 m.srcDevice = (CUdeviceptr)((uint8_t *)pSrcFrame + srcChromaOffsets[i]);
271 |             }
272 |             m.srcPitch = srcChromaPitch;
273 | 
274 |             m.dstDevice = dstChromaDevicePtrs[i];
275 |             m.dstPitch = dstChromaPitch;
276 |             m.WidthInBytes = chromaWidthInBytes;
277 |             m.Height = chromaHeight;
278 |             if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE)
279 |             {
280 |                 CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
281 |             }
282 |             else
283 |             {
284 |                 CUDA_DRVAPI_CALL(cuMemcpy2D(&m));
285 |             }
286 |         }
287 |     }
288 |     CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
289 | }
290 | 


--------------------------------------------------------------------------------
/src/cuvid/NvEncoder/NvEncoderCuda.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
  3 | *
  4 | * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 | * with this source code for terms and conditions that govern your use of
  6 | * this software. Any use, reproduction, disclosure, or distribution of
  7 | * this software and related documentation outside the terms of the EULA
  8 | * is strictly prohibited.
  9 | *
 10 | */
 11 | 
 12 | #pragma once
 13 | 
 14 | #include <vector>
 15 | #include <stdint.h>
 16 | #include <mutex>
 17 | #include <cuda.h>
 18 | #include "NvEncoder.h"
 19 | 
 20 | #define CUDA_DRVAPI_CALL( call )                                                                                                 \
 21 |     do                                                                                                                           \
 22 |     {                                                                                                                            \
 23 |         CUresult err__ = call;                                                                                                   \
 24 |         if (err__ != CUDA_SUCCESS)                                                                                               \
 25 |         {                                                                                                                        \
 26 |             const char *szErrName = NULL;                                                                                        \
 27 |             cuGetErrorName(err__, &szErrName);                                                                                   \
 28 |             std::ostringstream errorLog;                                                                                         \
 29 |             errorLog << "CUDA driver API error " << szErrName ;                                                                  \
 30 |             throw NVENCException::makeNVENCException(errorLog.str(), NV_ENC_ERR_GENERIC, __FUNCTION__, __FILE__, __LINE__);      \
 31 |         }                                                                                                                        \
 32 |     }                                                                                                                            \
 33 |     while (0)
 34 | 
 35 | /**
 36 | *  @brief Encoder for CUDA device memory.
 37 | */
 38 | class NvEncoderCuda : public NvEncoder
 39 | {
 40 | public:
 41 |     NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat,
 42 |         uint32_t nExtraOutputDelay = 3, bool bMotionEstimationOnly = false, bool bOPInVideoMemory = false);
 43 |     virtual ~NvEncoderCuda();
 44 | 
 45 |     /**
 46 |     *  @brief This is a static function to copy input data from host memory to device memory.
 47 |     *  This function assumes YUV plane is a single contiguous memory segment.
 48 |     */
 49 |     static void CopyToDeviceFrame(CUcontext device,
 50 |         void* pSrcFrame,
 51 |         uint32_t nSrcPitch,
 52 |         CUdeviceptr pDstFrame,
 53 |         uint32_t dstPitch,
 54 |         int width,
 55 |         int height,
 56 |         CUmemorytype srcMemoryType,
 57 |         NV_ENC_BUFFER_FORMAT pixelFormat,
 58 |         const uint32_t dstChromaOffsets[],
 59 |         uint32_t numChromaPlanes,
 60 |         bool bUnAlignedDeviceCopy = false,
 61 |         CUstream stream = NULL);
 62 | 
 63 |     /**
 64 |     *  @brief This is a static function to copy input data from host memory to device memory.
 65 |     *  Application must pass a seperate device pointer for each YUV plane.
 66 |     */
 67 |     static void CopyToDeviceFrame(CUcontext device,
 68 |         void* pSrcFrame,
 69 |         uint32_t nSrcPitch,
 70 |         CUdeviceptr pDstFrame,
 71 |         uint32_t dstPitch,
 72 |         int width,
 73 |         int height,
 74 |         CUmemorytype srcMemoryType,
 75 |         NV_ENC_BUFFER_FORMAT pixelFormat,
 76 |         CUdeviceptr dstChromaPtr[],
 77 |         uint32_t dstChromaPitch,
 78 |         uint32_t numChromaPlanes,
 79 |         bool bUnAlignedDeviceCopy = false);
 80 | 
 81 |     /**
 82 |     *  @brief This function sets input and output CUDA streams
 83 |     */
 84 |     void SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream, NV_ENC_CUSTREAM_PTR outputStream);
 85 | 
 86 | protected:
 87 |     /**
 88 |     *  @brief This function is used to release the input buffers allocated for encoding.
 89 |     *  This function is an override of virtual function NvEncoder::ReleaseInputBuffers().
 90 |     */
 91 |     virtual void ReleaseInputBuffers() override;
 92 | 
 93 | private:
 94 |     /**
 95 |     *  @brief This function is used to allocate input buffers for encoding.
 96 |     *  This function is an override of virtual function NvEncoder::AllocateInputBuffers().
 97 |     */
 98 |     virtual void AllocateInputBuffers(int32_t numInputBuffers) override;
 99 | 
100 | private:
101 |     /**
102 |     *  @brief This is a private function to release CUDA device memory used for encoding.
103 |     */
104 |     void ReleaseCudaResources();
105 | 
106 | protected:
107 |     CUcontext m_cuContext;
108 | 
109 | private:
110 |     size_t m_cudaPitch = 0;
111 | };
112 | 


--------------------------------------------------------------------------------
/src/cuvid/Utils/BitDepth.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 | * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
 3 | *
 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated
 5 | * with this source code for terms and conditions that govern your use of
 6 | * this software. Any use, reproduction, disclosure, or distribution of
 7 | * this software and related documentation outside the terms of the EULA
 8 | * is strictly prohibited.
 9 | *
10 | */
11 | 
12 | #include <cuda_runtime.h>
13 | #include <stdint.h>
14 | #include <stdio.h>
15 | 
16 | static __global__ void ConvertUInt8ToUInt16Kernel(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
17 | {
18 |     int x = blockIdx.x * blockDim.x + threadIdx.x,
19 |         y = blockIdx.y * blockDim.y + threadIdx.y;
20 | 
21 |     if (x >= nWidth || y >= nHeight)
22 |     {
23 |         return;
24 |     }
25 |     int destStrideInPixels = nDestPitch / (sizeof(uint16_t));
26 |     *(uchar2 *)&dpUInt16[y * destStrideInPixels + x] = uchar2{ 0, dpUInt8[y * nSrcPitch + x] };
27 | }
28 | 
29 | static __global__ void ConvertUInt16ToUInt8Kernel(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
30 | {
31 |     int x = blockIdx.x * blockDim.x + threadIdx.x,
32 |         y = blockIdx.y * blockDim.y + threadIdx.y;
33 | 
34 |     if (x >= nWidth || y >= nHeight)
35 |     {
36 |         return;
37 |     }
38 |     int srcStrideInPixels = nSrcPitch / (sizeof(uint16_t));
39 |     dpUInt8[y * nDestPitch + x] = ((uchar2 *)&dpUInt16[y * srcStrideInPixels + x])->y;
40 | }
41 | 
42 | void ConvertUInt8ToUInt16(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
43 | {
44 |     dim3 blockSize(16, 16, 1);
45 |     dim3 gridSize(((uint32_t)nWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nHeight + blockSize.y - 1) / blockSize.y, 1);
46 |     ConvertUInt8ToUInt16Kernel <<< gridSize, blockSize >>>(dpUInt8, dpUInt16, nSrcPitch, nDestPitch, nWidth, nHeight);
47 | }
48 | 
49 | void ConvertUInt16ToUInt8(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
50 | {
51 |     dim3 blockSize(16, 16, 1);
52 |     dim3 gridSize(((uint32_t)nWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nHeight + blockSize.y - 1) / blockSize.y, 1);
53 |     ConvertUInt16ToUInt8Kernel <<<gridSize, blockSize >>>(dpUInt16, dpUInt8, nSrcPitch, nDestPitch, nWidth, nHeight);
54 | }
55 | 


--------------------------------------------------------------------------------
/src/cuvid/Utils/ColorSpace.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
  3 | *
  4 | * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 | * with this source code for terms and conditions that govern your use of
  6 | * this software. Any use, reproduction, disclosure, or distribution of
  7 | * this software and related documentation outside the terms of the EULA
  8 | * is strictly prohibited.
  9 | *
 10 | */
 11 | 
 12 | #include <cuda_runtime.h>
 13 | #include "ColorSpace.h"
 14 | 
 15 | __constant__ float matYuv2Rgb[3][3];
 16 | __constant__ float matRgb2Yuv[3][3];
 17 | 
 18 | 
 19 | void inline GetConstants(int iMatrix, float &wr, float &wb, int &black, int &white, int &max) {
 20 |     black = 16; white = 235;
 21 |     max = 255;
 22 | 
 23 |     switch (iMatrix)
 24 |     {
 25 |     case ColorSpaceStandard_BT709:
 26 |     default:
 27 |         wr = 0.2126f; wb = 0.0722f;
 28 |         break;
 29 | 
 30 |     case ColorSpaceStandard_FCC:
 31 |         wr = 0.30f; wb = 0.11f;
 32 |         break;
 33 | 
 34 |     case ColorSpaceStandard_BT470:
 35 |     case ColorSpaceStandard_BT601:
 36 |         wr = 0.2990f; wb = 0.1140f;
 37 |         break;
 38 | 
 39 |     case ColorSpaceStandard_SMPTE240M:
 40 |         wr = 0.212f; wb = 0.087f;
 41 |         break;
 42 | 
 43 |     case ColorSpaceStandard_BT2020:
 44 |     case ColorSpaceStandard_BT2020C:
 45 |         wr = 0.2627f; wb = 0.0593f;
 46 |         // 10-bit only
 47 |         black = 64 << 6; white = 940 << 6;
 48 |         max = (1 << 16) - 1;
 49 |         break;
 50 |     }
 51 | }
 52 | 
 53 | void SetMatYuv2Rgb(int iMatrix) {
 54 |     float wr, wb;
 55 |     int black, white, max;
 56 |     GetConstants(iMatrix, wr, wb, black, white, max);
 57 |     float mat[3][3] = {
 58 |         1.0f, 0.0f, (1.0f - wr) / 0.5f,
 59 |         1.0f, -wb * (1.0f - wb) / 0.5f / (1 - wb - wr), -wr * (1 - wr) / 0.5f / (1 - wb - wr),
 60 |         1.0f, (1.0f - wb) / 0.5f, 0.0f,
 61 |     };
 62 |     for (int i = 0; i < 3; i++) {
 63 |         for (int j = 0; j < 3; j++) {
 64 |             mat[i][j] = (float)(1.0 * max / (white - black) * mat[i][j]);
 65 |         }
 66 |     }
 67 |     cudaMemcpyToSymbol(matYuv2Rgb, mat, sizeof(mat));
 68 | }
 69 | 
 70 | void SetMatRgb2Yuv(int iMatrix) {
 71 |     float wr, wb;
 72 |     int black, white, max;
 73 |     GetConstants(iMatrix, wr, wb, black, white, max);
 74 |     float mat[3][3] = {
 75 |         wr, 1.0f - wb - wr, wb,
 76 |         -0.5f * wr / (1.0f - wb), -0.5f * (1 - wb - wr) / (1.0f - wb), 0.5f,
 77 |         0.5f, -0.5f * (1.0f - wb - wr) / (1.0f - wr), -0.5f * wb / (1.0f - wr),
 78 |     };
 79 |     for (int i = 0; i < 3; i++) {
 80 |         for (int j = 0; j < 3; j++) {
 81 |             mat[i][j] = (float)(1.0 * (white - black) / max * mat[i][j]);
 82 |         }
 83 |     }
 84 |     cudaMemcpyToSymbol(matRgb2Yuv, mat, sizeof(mat));
 85 | }
 86 | 
 87 | template<class T>
 88 | __device__ static T Clamp(T x, T lower, T upper) {
 89 |     return x < lower ? lower : (x > upper ? upper : x);
 90 | }
 91 | 
 92 | template<class Rgb, class YuvUnit>
 93 | __device__ inline Rgb YuvToRgbForPixel(YuvUnit y, YuvUnit u, YuvUnit v) {
 94 |     const int 
 95 |         low = 1 << (sizeof(YuvUnit) * 8 - 4),
 96 |         mid = 1 << (sizeof(YuvUnit) * 8 - 1);
 97 |     float fy = (int)y - low, fu = (int)u - mid, fv = (int)v - mid;
 98 |     const float maxf = (1 << sizeof(YuvUnit) * 8) - 1.0f;
 99 |     YuvUnit 
100 |         r = (YuvUnit)Clamp(matYuv2Rgb[0][0] * fy + matYuv2Rgb[0][1] * fu + matYuv2Rgb[0][2] * fv, 0.0f, maxf),
101 |         g = (YuvUnit)Clamp(matYuv2Rgb[1][0] * fy + matYuv2Rgb[1][1] * fu + matYuv2Rgb[1][2] * fv, 0.0f, maxf),
102 |         b = (YuvUnit)Clamp(matYuv2Rgb[2][0] * fy + matYuv2Rgb[2][1] * fu + matYuv2Rgb[2][2] * fv, 0.0f, maxf);
103 |     
104 |     Rgb rgb{};
105 |     const int nShift = abs((int)sizeof(YuvUnit) - (int)sizeof(rgb.c.r)) * 8;
106 |     if (sizeof(YuvUnit) >= sizeof(rgb.c.r)) {
107 |         rgb.c.r = r >> nShift;
108 |         rgb.c.g = g >> nShift;
109 |         rgb.c.b = b >> nShift;
110 |     } else {
111 |         rgb.c.r = r << nShift;
112 |         rgb.c.g = g << nShift;
113 |         rgb.c.b = b << nShift;
114 |     }
115 |     return rgb;
116 | }
117 | 
118 | template<class YuvUnitx2, class Rgb, class RgbIntx2>
119 | __global__ static void YuvToRgbKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgb, int nRgbPitch, int nWidth, int nHeight) {
120 |     int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
121 |     int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
122 |     if (x + 1 >= nWidth || y + 1 >= nHeight) {
123 |         return;
124 |     }
125 | 
126 |     uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
127 |     uint8_t *pDst = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
128 | 
129 |     YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
130 |     YuvUnitx2 l1 = *(YuvUnitx2 *)(pSrc + nYuvPitch);
131 |     YuvUnitx2 ch = *(YuvUnitx2 *)(pSrc + (nHeight - y / 2) * nYuvPitch);
132 | 
133 |     *(RgbIntx2 *)pDst = RgbIntx2 {
134 |         YuvToRgbForPixel<Rgb>(l0.x, ch.x, ch.y).d,
135 |         YuvToRgbForPixel<Rgb>(l0.y, ch.x, ch.y).d,
136 |     };
137 |     *(RgbIntx2 *)(pDst + nRgbPitch) = RgbIntx2 {
138 |         YuvToRgbForPixel<Rgb>(l1.x, ch.x, ch.y).d, 
139 |         YuvToRgbForPixel<Rgb>(l1.y, ch.x, ch.y).d,
140 |     };
141 | }
142 | 
143 | template<class YuvUnitx2, class Rgb, class RgbIntx2>
144 | __global__ static void Yuv444ToRgbKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgb, int nRgbPitch, int nWidth, int nHeight) {
145 |     int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
146 |     int y = (threadIdx.y + blockIdx.y * blockDim.y);
147 |     if (x + 1 >= nWidth || y  >= nHeight) {
148 |         return;
149 |     }
150 | 
151 |     uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
152 |     uint8_t *pDst = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
153 | 
154 |     YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
155 |     YuvUnitx2 ch1 = *(YuvUnitx2 *)(pSrc + (nHeight * nYuvPitch));
156 |     YuvUnitx2 ch2 = *(YuvUnitx2 *)(pSrc + (2 * nHeight * nYuvPitch));
157 | 
158 |     *(RgbIntx2 *)pDst = RgbIntx2{
159 |         YuvToRgbForPixel<Rgb>(l0.x, ch1.x, ch2.x).d,
160 |         YuvToRgbForPixel<Rgb>(l0.y, ch1.y, ch2.y).d,
161 |     };
162 | }
163 | 
164 | template<class YuvUnitx2, class Rgb, class RgbUnitx2>
165 | __global__ static void YuvToRgbPlanarKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgbp, int nRgbpPitch, int nWidth, int nHeight) {
166 |     int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
167 |     int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
168 |     if (x + 1 >= nWidth || y + 1 >= nHeight) {
169 |         return;
170 |     }
171 | 
172 |     uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
173 | 
174 |     YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
175 |     YuvUnitx2 l1 = *(YuvUnitx2 *)(pSrc + nYuvPitch);
176 |     YuvUnitx2 ch = *(YuvUnitx2 *)(pSrc + (nHeight - y / 2) * nYuvPitch);
177 | 
178 |     Rgb rgb0 = YuvToRgbForPixel<Rgb>(l0.x, ch.x, ch.y),
179 |         rgb1 = YuvToRgbForPixel<Rgb>(l0.y, ch.x, ch.y),
180 |         rgb2 = YuvToRgbForPixel<Rgb>(l1.x, ch.x, ch.y),
181 |         rgb3 = YuvToRgbForPixel<Rgb>(l1.y, ch.x, ch.y);
182 | 
183 |     uint8_t *pDst = pRgbp + x * sizeof(RgbUnitx2) / 2 + y * nRgbpPitch;
184 |     *(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.x, rgb1.v.x};
185 |     *(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.x, rgb3.v.x};
186 |     pDst += nRgbpPitch * nHeight;
187 |     *(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.y, rgb1.v.y};
188 |     *(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.y, rgb3.v.y};
189 |     pDst += nRgbpPitch * nHeight;
190 |     *(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.z, rgb1.v.z};
191 |     *(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.z, rgb3.v.z};
192 | }
193 | 
194 | template<class YuvUnitx2, class Rgb, class RgbUnitx2>
195 | __global__ static void Yuv444ToRgbPlanarKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgbp, int nRgbpPitch, int nWidth, int nHeight) {
196 |     int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
197 |     int y = (threadIdx.y + blockIdx.y * blockDim.y);
198 |     if (x + 1 >= nWidth || y >= nHeight) {
199 |         return;
200 |     }
201 | 
202 |     uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
203 | 
204 |     YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
205 |     YuvUnitx2 ch1 = *(YuvUnitx2 *)(pSrc + (nHeight * nYuvPitch));
206 |     YuvUnitx2 ch2 = *(YuvUnitx2 *)(pSrc + (2 * nHeight * nYuvPitch));
207 | 
208 |     Rgb rgb0 = YuvToRgbForPixel<Rgb>(l0.x, ch1.x, ch2.x),
209 |         rgb1 = YuvToRgbForPixel<Rgb>(l0.y, ch1.y, ch2.y);
210 | 
211 | 
212 |     uint8_t *pDst = pRgbp + x * sizeof(RgbUnitx2) / 2 + y * nRgbpPitch;
213 |     *(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.x, rgb1.v.x };
214 | 
215 |     pDst += nRgbpPitch * nHeight;
216 |     *(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.y, rgb1.v.y };
217 | 
218 |     pDst += nRgbpPitch * nHeight;
219 |     *(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.z, rgb1.v.z };
220 | }
221 | 
222 | template <class COLOR32>
223 | void Nv12ToColor32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
224 |     SetMatYuv2Rgb(iMatrix);
225 |     YuvToRgbKernel<uchar2, COLOR32, uint2>
226 |         <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
227 |         (dpNv12, nNv12Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
228 | }
229 | 
230 | template <class COLOR64>
231 | void Nv12ToColor64(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
232 |     SetMatYuv2Rgb(iMatrix);
233 |     YuvToRgbKernel<uchar2, COLOR64, ulonglong2>
234 |         <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
235 |         (dpNv12, nNv12Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
236 | }
237 | 
238 | template <class COLOR32>
239 | void YUV444ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
240 |     SetMatYuv2Rgb(iMatrix);
241 |     Yuv444ToRgbKernel<uchar2, COLOR32, uint2>
242 |         <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
243 |         (dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
244 | }
245 | 
246 | template <class COLOR64>
247 | void YUV444ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
248 |     SetMatYuv2Rgb(iMatrix);
249 |     Yuv444ToRgbKernel<uchar2, COLOR64, ulonglong2>
250 |         <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
251 |         (dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
252 | }
253 | 
254 | template <class COLOR32>
255 | void P016ToColor32(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
256 |     SetMatYuv2Rgb(iMatrix);
257 |     YuvToRgbKernel<ushort2, COLOR32, uint2>
258 |         <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
259 |         (dpP016, nP016Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
260 | }
261 | 
262 | template <class COLOR64>
263 | void P016ToColor64(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
264 |     SetMatYuv2Rgb(iMatrix);
265 |     YuvToRgbKernel<ushort2, COLOR64, ulonglong2>
266 |         <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
267 |         (dpP016, nP016Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
268 | }
269 | 
270 | template <class COLOR32>
271 | void YUV444P16ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
272 |     SetMatYuv2Rgb(iMatrix);
273 |     Yuv444ToRgbKernel<ushort2, COLOR32, uint2>
274 |         <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
275 |         (dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
276 | }
277 | 
278 | template <class COLOR64>
279 | void YUV444P16ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
280 |     SetMatYuv2Rgb(iMatrix);
281 |     Yuv444ToRgbKernel<ushort2, COLOR64, ulonglong2>
282 |         <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
283 |         (dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
284 | }
285 | 
286 | template <class COLOR32>
287 | void Nv12ToColorPlanar(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
288 |     SetMatYuv2Rgb(iMatrix);
289 |     YuvToRgbPlanarKernel<uchar2, COLOR32, uchar2>
290 |         <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
291 |         (dpNv12, nNv12Pitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
292 | }
293 | 
294 | template <class COLOR32>
295 | void P016ToColorPlanar(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
296 |     SetMatYuv2Rgb(iMatrix);
297 |     YuvToRgbPlanarKernel<ushort2, COLOR32, uchar2>
298 |         <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
299 |         (dpP016, nP016Pitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
300 | }
301 | 
302 | template <class COLOR32>
303 | void YUV444ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
304 |     SetMatYuv2Rgb(iMatrix);
305 |     Yuv444ToRgbPlanarKernel<uchar2, COLOR32, uchar2>
306 |         <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
307 |         (dpYUV444, nPitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
308 | }
309 | 
310 | template <class COLOR32>
311 | void YUV444P16ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
312 |     SetMatYuv2Rgb(iMatrix);
313 |     Yuv444ToRgbPlanarKernel<ushort2, COLOR32, uchar2>
314 |         << <dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >> >
315 |         (dpYUV444, nPitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
316 | }
317 | 
318 | // Explicit Instantiation
319 | template void Nv12ToColor32<BGRA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
320 | template void Nv12ToColor32<RGBA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
321 | template void Nv12ToColor64<BGRA64>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
322 | template void Nv12ToColor64<RGBA64>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
323 | template void YUV444ToColor32<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
324 | template void YUV444ToColor32<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
325 | template void YUV444ToColor64<BGRA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
326 | template void YUV444ToColor64<RGBA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
327 | template void P016ToColor32<BGRA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
328 | template void P016ToColor32<RGBA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
329 | template void P016ToColor64<BGRA64>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
330 | template void P016ToColor64<RGBA64>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
331 | template void YUV444P16ToColor32<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
332 | template void YUV444P16ToColor32<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
333 | template void YUV444P16ToColor64<BGRA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
334 | template void YUV444P16ToColor64<RGBA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
335 | template void Nv12ToColorPlanar<BGRA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
336 | template void Nv12ToColorPlanar<RGBA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
337 | template void P016ToColorPlanar<BGRA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
338 | template void P016ToColorPlanar<RGBA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
339 | template void YUV444ToColorPlanar<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
340 | template void YUV444ToColorPlanar<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
341 | template void YUV444P16ToColorPlanar<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
342 | template void YUV444P16ToColorPlanar<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
343 | 
344 | template<class YuvUnit, class RgbUnit>
345 | __device__ inline YuvUnit RgbToY(RgbUnit r, RgbUnit g, RgbUnit b) {
346 |     const YuvUnit low = 1 << (sizeof(YuvUnit) * 8 - 4);
347 |     return matRgb2Yuv[0][0] * r + matRgb2Yuv[0][1] * g + matRgb2Yuv[0][2] * b + low;
348 | }
349 | 
350 | template<class YuvUnit, class RgbUnit>
351 | __device__ inline YuvUnit RgbToU(RgbUnit r, RgbUnit g, RgbUnit b) {
352 |     const YuvUnit mid = 1 << (sizeof(YuvUnit) * 8 - 1);
353 |     return matRgb2Yuv[1][0] * r + matRgb2Yuv[1][1] * g + matRgb2Yuv[1][2] * b + mid;
354 | }
355 | 
356 | template<class YuvUnit, class RgbUnit>
357 | __device__ inline YuvUnit RgbToV(RgbUnit r, RgbUnit g, RgbUnit b) {
358 |     const YuvUnit mid = 1 << (sizeof(YuvUnit) * 8 - 1);
359 |     return matRgb2Yuv[2][0] * r + matRgb2Yuv[2][1] * g + matRgb2Yuv[2][2] * b + mid;
360 | }
361 | 
362 | template<class YuvUnitx2, class Rgb, class RgbIntx2>
363 | __global__ static void RgbToYuvKernel(uint8_t *pRgb, int nRgbPitch, uint8_t *pYuv, int nYuvPitch, int nWidth, int nHeight) {
364 |     int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
365 |     int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
366 |     if (x + 1 >= nWidth || y + 1 >= nHeight) {
367 |         return;
368 |     }
369 | 
370 |     uint8_t *pSrc = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
371 |     RgbIntx2 int2a = *(RgbIntx2 *)pSrc;
372 |     RgbIntx2 int2b = *(RgbIntx2 *)(pSrc + nRgbPitch);
373 | 
374 |     Rgb rgb[4] = {int2a.x, int2a.y, int2b.x, int2b.y};
375 |     decltype(Rgb::c.r)
376 |         r = (rgb[0].c.r + rgb[1].c.r + rgb[2].c.r + rgb[3].c.r) / 4,
377 |         g = (rgb[0].c.g + rgb[1].c.g + rgb[2].c.g + rgb[3].c.g) / 4,
378 |         b = (rgb[0].c.b + rgb[1].c.b + rgb[2].c.b + rgb[3].c.b) / 4;
379 | 
380 |     uint8_t *pDst = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
381 |     *(YuvUnitx2 *)pDst = YuvUnitx2 {
382 |         RgbToY<decltype(YuvUnitx2::x)>(rgb[0].c.r, rgb[0].c.g, rgb[0].c.b),
383 |         RgbToY<decltype(YuvUnitx2::x)>(rgb[1].c.r, rgb[1].c.g, rgb[1].c.b),
384 |     };
385 |     *(YuvUnitx2 *)(pDst + nYuvPitch) = YuvUnitx2 {
386 |         RgbToY<decltype(YuvUnitx2::x)>(rgb[2].c.r, rgb[2].c.g, rgb[2].c.b),
387 |         RgbToY<decltype(YuvUnitx2::x)>(rgb[3].c.r, rgb[3].c.g, rgb[3].c.b),
388 |     };
389 |     *(YuvUnitx2 *)(pDst + (nHeight - y / 2) * nYuvPitch) = YuvUnitx2 {
390 |         RgbToU<decltype(YuvUnitx2::x)>(r, g, b), 
391 |         RgbToV<decltype(YuvUnitx2::x)>(r, g, b),
392 |     };
393 | }
394 | 
395 | void Bgra64ToP016(uint8_t *dpBgra, int nBgraPitch, uint8_t *dpP016, int nP016Pitch, int nWidth, int nHeight, int iMatrix) {
396 |     SetMatRgb2Yuv(iMatrix);
397 |     RgbToYuvKernel<ushort2, BGRA64, ulonglong2>
398 |         <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
399 |         (dpBgra, nBgraPitch, dpP016, nP016Pitch, nWidth, nHeight);
400 | }
401 | 


--------------------------------------------------------------------------------
/src/cuvid/Utils/ColorSpace.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #ifdef __cplusplus
 4 | extern "C" {
 5 | #endif
 6 | 
 7 | #include <stdint.h>
 8 | #include <vector_types.h>
 9 | 
10 | typedef enum ColorSpaceStandard {
11 |     ColorSpaceStandard_BT709 = 1,
12 |     ColorSpaceStandard_Unspecified = 2,
13 |     ColorSpaceStandard_Reserved = 3,
14 |     ColorSpaceStandard_FCC = 4,
15 |     ColorSpaceStandard_BT470 = 5,
16 |     ColorSpaceStandard_BT601 = 6,
17 |     ColorSpaceStandard_SMPTE240M = 7,
18 |     ColorSpaceStandard_YCgCo = 8,
19 |     ColorSpaceStandard_BT2020 = 9,
20 |     ColorSpaceStandard_BT2020C = 10
21 | } ColorSpaceStandard;
22 | 
23 | union BGRA32 {
24 |     uint32_t d;
25 |     uchar4 v;
26 |     struct {
27 |         uint8_t b, g, r, a;
28 |     } c;
29 | };
30 | 
31 | union RGBA32 {
32 |     uint32_t d;
33 |     uchar4 v;
34 |     struct {
35 |         uint8_t r, g, b, a;
36 |     } c;
37 | };
38 | 
39 | union BGRA64 {
40 |     uint64_t d;
41 |     ushort4 v;
42 |     struct {
43 |         uint16_t b, g, r, a;
44 |     } c;
45 | };
46 | 
47 | union RGBA64 {
48 |     uint64_t d;
49 |     ushort4 v;
50 |     struct {
51 |         uint16_t r, g, b, a;
52 |     } c;
53 | };
54 | 
55 | #ifdef __cplusplus
56 | }
57 | #endif


--------------------------------------------------------------------------------
/src/cuvid/Utils/FFmpegDemuxer.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
  3 | *
  4 | * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 | * with this source code for terms and conditions that govern your use of
  6 | * this software. Any use, reproduction, disclosure, or distribution of
  7 | * this software and related documentation outside the terms of the EULA
  8 | * is strictly prohibited.
  9 | *
 10 | */
 11 | #pragma once
 12 | 
 13 | extern "C" {
 14 | #include <libavformat/avformat.h>
 15 | #include <libavformat/avio.h>
 16 | #include <libavcodec/avcodec.h>
 17 | }
 18 | #include "NvCodecUtils.h"
 19 | 
 20 | //---------------------------------------------------------------------------
 21 | //! \file FFmpegDemuxer.h 
 22 | //! \brief Provides functionality for stream demuxing
 23 | //!
 24 | //! This header file is used by Decode/Transcode apps to demux input video clips before decoding frames from it. 
 25 | //---------------------------------------------------------------------------
 26 | 
 27 | /**
 28 | * @brief libavformat wrapper class. Retrieves the elementary encoded stream from the container format.
 29 | */
 30 | class FFmpegDemuxer {
 31 | private:
 32 |     AVFormatContext *fmtc = NULL;
 33 |     AVIOContext *avioc = NULL;
 34 |     AVPacket pkt, pktFiltered; /*!< AVPacket stores compressed data typically exported by demuxers and then passed as input to decoders */
 35 |     AVBSFContext *bsfc = NULL;
 36 | 
 37 |     int iVideoStream;
 38 |     bool bMp4H264, bMp4HEVC, bMp4MPEG4;
 39 |     AVCodecID eVideoCodec;
 40 |     AVPixelFormat eChromaFormat;
 41 |     int nWidth, nHeight, nBitDepth, nBPP, nChromaHeight;
 42 |     double timeBase = 0.0;
 43 |     int64_t userTimeScale = 0; 
 44 | 
 45 |     uint8_t *pDataWithHeader = NULL;
 46 | 
 47 |     unsigned int frameCount = 0;
 48 | 
 49 | public:
 50 |     class DataProvider {
 51 |     public:
 52 |         virtual ~DataProvider() {}
 53 |         virtual int GetData(uint8_t *pBuf, int nBuf) = 0;
 54 |     };
 55 | 
 56 | private:
 57 | 
 58 |     /**
 59 |     *   @brief  Private constructor to initialize libavformat resources.
 60 |     *   @param  fmtc - Pointer to AVFormatContext allocated inside avformat_open_input()
 61 |     */
 62 |     FFmpegDemuxer(AVFormatContext *fmtc, int64_t timeScale = 1000 /*Hz*/) : fmtc(fmtc) {
 63 |         if (!fmtc) {
 64 |             LOG(ERROR) << "No AVFormatContext provided.";
 65 |             return;
 66 |         }
 67 | 
 68 |         LOG(INFO) << "Media format: " << fmtc->iformat->long_name << " (" << fmtc->iformat->name << ")";
 69 | 
 70 |         ck(avformat_find_stream_info(fmtc, NULL));
 71 |         iVideoStream = av_find_best_stream(fmtc, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
 72 |         if (iVideoStream < 0) {
 73 |             LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "Could not find stream in input file";
 74 |             return;
 75 |         }
 76 | 
 77 |         //fmtc->streams[iVideoStream]->need_parsing = AVSTREAM_PARSE_NONE;
 78 |         eVideoCodec = fmtc->streams[iVideoStream]->codecpar->codec_id;
 79 |         nWidth = fmtc->streams[iVideoStream]->codecpar->width;
 80 |         nHeight = fmtc->streams[iVideoStream]->codecpar->height;
 81 |         eChromaFormat = (AVPixelFormat)fmtc->streams[iVideoStream]->codecpar->format;
 82 |         AVRational rTimeBase = fmtc->streams[iVideoStream]->time_base;
 83 |         timeBase = av_q2d(rTimeBase);
 84 |         userTimeScale = timeScale;
 85 | 
 86 |         // Set bit depth, chroma height, bits per pixel based on eChromaFormat of input
 87 |         switch (eChromaFormat)
 88 |         {
 89 |         case AV_PIX_FMT_YUV420P10LE:
 90 |         case AV_PIX_FMT_GRAY10LE:   // monochrome is treated as 420 with chroma filled with 0x0
 91 |             nBitDepth = 10;
 92 |             nChromaHeight = (nHeight + 1) >> 1;
 93 |             nBPP = 2;
 94 |             break;
 95 |         case AV_PIX_FMT_YUV420P12LE:
 96 |             nBitDepth = 12;
 97 |             nChromaHeight = (nHeight + 1) >> 1;
 98 |             nBPP = 2;
 99 |             break;
100 |         case AV_PIX_FMT_YUV444P10LE:
101 |             nBitDepth = 10;
102 |             nChromaHeight = nHeight << 1;
103 |             nBPP = 2;
104 |             break;
105 |         case AV_PIX_FMT_YUV444P12LE:
106 |             nBitDepth = 12;
107 |             nChromaHeight = nHeight << 1;
108 |             nBPP = 2;
109 |             break;
110 |         case AV_PIX_FMT_YUV444P:
111 |             nBitDepth = 8;
112 |             nChromaHeight = nHeight << 1;
113 |             nBPP = 1;
114 |             break;
115 |         case AV_PIX_FMT_YUV420P:
116 |         case AV_PIX_FMT_YUVJ420P:
117 |         case AV_PIX_FMT_YUVJ422P:   // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
118 |         case AV_PIX_FMT_YUVJ444P:   // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
119 |         case AV_PIX_FMT_GRAY8:      // monochrome is treated as 420 with chroma filled with 0x0
120 |             nBitDepth = 8;
121 |             nChromaHeight = (nHeight + 1) >> 1;
122 |             nBPP = 1;
123 |             break;
124 |         default:
125 |             LOG(WARNING) << "ChromaFormat not recognized. Assuming 420";
126 |             eChromaFormat = AV_PIX_FMT_YUV420P;
127 |             nBitDepth = 8;
128 |             nChromaHeight = (nHeight + 1) >> 1;
129 |             nBPP = 1;
130 |         }
131 | 
132 |         bMp4H264 = eVideoCodec == AV_CODEC_ID_H264 && (
133 |                 !strcmp(fmtc->iformat->long_name, "QuickTime / MOV") 
134 |                 || !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)") 
135 |                 || !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
136 |             );
137 |         bMp4HEVC = eVideoCodec == AV_CODEC_ID_HEVC && (
138 |                 !strcmp(fmtc->iformat->long_name, "QuickTime / MOV")
139 |                 || !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)")
140 |                 || !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
141 |             );
142 | 
143 |         bMp4MPEG4 = eVideoCodec == AV_CODEC_ID_MPEG4 && (
144 |                 !strcmp(fmtc->iformat->long_name, "QuickTime / MOV")
145 |                 || !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)")
146 |                 || !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
147 |             );
148 | 
149 |         //Initialize packet fields with default values
150 |         av_init_packet(&pkt);
151 |         pkt.data = NULL;
152 |         pkt.size = 0;
153 |         av_init_packet(&pktFiltered);
154 |         pktFiltered.data = NULL;
155 |         pktFiltered.size = 0;
156 | 
157 |         // Initialize bitstream filter and its required resources
158 |         if (bMp4H264) {
159 |             const AVBitStreamFilter *bsf = av_bsf_get_by_name("h264_mp4toannexb");
160 |             if (!bsf) {
161 |                 LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "av_bsf_get_by_name() failed";
162 |                 return;
163 |             }
164 |             ck(av_bsf_alloc(bsf, &bsfc));
165 |             avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar);
166 |             ck(av_bsf_init(bsfc));
167 |         }
168 |         if (bMp4HEVC) {
169 |             const AVBitStreamFilter *bsf = av_bsf_get_by_name("hevc_mp4toannexb");
170 |             if (!bsf) {
171 |                 LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "av_bsf_get_by_name() failed";
172 |                 return;
173 |             }
174 |             ck(av_bsf_alloc(bsf, &bsfc));
175 |             avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar);
176 |             ck(av_bsf_init(bsfc));
177 |         }
178 |     }
179 | 
180 |     AVFormatContext *CreateFormatContext(DataProvider *pDataProvider) {
181 | 
182 |         AVFormatContext *ctx = NULL;
183 |         if (!(ctx = avformat_alloc_context())) {
184 |             LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
185 |             return NULL;
186 |         }
187 | 
188 |         uint8_t *avioc_buffer = NULL;
189 |         int avioc_buffer_size = 8 * 1024 * 1024;
190 |         avioc_buffer = (uint8_t *)av_malloc(avioc_buffer_size);
191 |         if (!avioc_buffer) {
192 |             LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
193 |             return NULL;
194 |         }
195 |         avioc = avio_alloc_context(avioc_buffer, avioc_buffer_size,
196 |             0, pDataProvider, &ReadPacket, NULL, NULL);
197 |         if (!avioc) {
198 |             LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
199 |             return NULL;
200 |         }
201 |         ctx->pb = avioc;
202 | 
203 |         ck(avformat_open_input(&ctx, NULL, NULL, NULL));
204 |         return ctx;
205 |     }
206 | 
207 |     /**
208 |     *   @brief  Allocate and return AVFormatContext*.
209 |     *   @param  szFilePath - Filepath pointing to input stream.
210 |     *   @return Pointer to AVFormatContext
211 |     */
212 |      AVFormatContext *CreateFormatContext(const char *szFilePath) {
213 |         avformat_network_init();
214 | 
215 |         AVFormatContext *ctx = NULL;
216 |         ck(avformat_open_input(&ctx, szFilePath, NULL, NULL));
217 |         return ctx;
218 |     }
219 | 
220 | public:
221 |     FFmpegDemuxer(const char *szFilePath, int64_t timescale = 1000 /*Hz*/) : FFmpegDemuxer(CreateFormatContext(szFilePath), timescale) {}
222 |     FFmpegDemuxer(DataProvider *pDataProvider) : FFmpegDemuxer(CreateFormatContext(pDataProvider)) {avioc = fmtc->pb;}
223 |     ~FFmpegDemuxer() {
224 | 
225 |         if (!fmtc) {
226 |             return;
227 |         }
228 | 
229 |         if (pkt.data) {
230 |             av_packet_unref(&pkt);
231 |         }
232 |         if (pktFiltered.data) {
233 |             av_packet_unref(&pktFiltered);
234 |         }
235 | 
236 |         if (bsfc) {
237 |             av_bsf_free(&bsfc);
238 |         }
239 | 
240 |         avformat_close_input(&fmtc);
241 | 
242 |         if (avioc) {
243 |             av_freep(&avioc->buffer);
244 |             av_freep(&avioc);
245 |         }
246 | 
247 |         if (pDataWithHeader) {
248 |             av_free(pDataWithHeader);
249 |         }
250 |     }
251 |     AVCodecID GetVideoCodec() {
252 |         return eVideoCodec;
253 |     }
254 |     AVPixelFormat GetChromaFormat() {
255 |         return eChromaFormat;
256 |     }
257 |     int GetWidth() {
258 |         return nWidth;
259 |     }
260 |     int GetHeight() {
261 |         return nHeight;
262 |     }
263 |     int GetBitDepth() {
264 |         return nBitDepth;
265 |     }
266 |     int GetFrameSize() {
267 |         return nWidth * (nHeight + nChromaHeight) * nBPP;
268 |     }
269 |     bool Demux(uint8_t **ppVideo, int *pnVideoBytes, int64_t *pts = NULL) {
270 |         if (!fmtc) {
271 |             return false;
272 |         }
273 | 
274 |         *pnVideoBytes = 0;
275 | 
276 |         if (pkt.data) {
277 |             av_packet_unref(&pkt);
278 |         }
279 | 
280 |         int e = 0;
281 |         while ((e = av_read_frame(fmtc, &pkt)) >= 0 && pkt.stream_index != iVideoStream) {
282 |             av_packet_unref(&pkt);
283 |         }
284 |         if (e < 0) {
285 |             return false;
286 |         }
287 | 
288 |         if (bMp4H264 || bMp4HEVC) {
289 |             if (pktFiltered.data) {
290 |                 av_packet_unref(&pktFiltered);
291 |             }
292 |             ck(av_bsf_send_packet(bsfc, &pkt));
293 |             ck(av_bsf_receive_packet(bsfc, &pktFiltered));
294 |             *ppVideo = pktFiltered.data;
295 |             *pnVideoBytes = pktFiltered.size;
296 |             if (pts)
297 |                 *pts = (int64_t) (pktFiltered.pts * userTimeScale * timeBase);
298 |         } else {
299 | 
300 |             if (bMp4MPEG4 && (frameCount == 0)) {
301 | 
302 |                 int extraDataSize = fmtc->streams[iVideoStream]->codecpar->extradata_size;
303 | 
304 |                 if (extraDataSize > 0) {
305 | 
306 |                     // extradata contains start codes 00 00 01. Subtract its size
307 |                     pDataWithHeader = (uint8_t *)av_malloc(extraDataSize + pkt.size - 3*sizeof(uint8_t));
308 | 
309 |                     if (!pDataWithHeader) {
310 |                         LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
311 |                         return false;
312 |                     }
313 | 
314 |                     memcpy(pDataWithHeader, fmtc->streams[iVideoStream]->codecpar->extradata, extraDataSize);
315 |                     memcpy(pDataWithHeader+extraDataSize, pkt.data+3, pkt.size - 3*sizeof(uint8_t));
316 | 
317 |                     *ppVideo = pDataWithHeader;
318 |                     *pnVideoBytes = extraDataSize + pkt.size - 3*sizeof(uint8_t);
319 |                 }
320 | 
321 |             } else {
322 |                 *ppVideo = pkt.data;
323 |                 *pnVideoBytes = pkt.size;
324 |             }
325 | 
326 |             if (pts)
327 |                 *pts = (int64_t)(pkt.pts * userTimeScale * timeBase);
328 |         }
329 | 
330 |         frameCount++;
331 | 
332 |         return true;
333 |     }
334 | 
335 |     static int ReadPacket(void *opaque, uint8_t *pBuf, int nBuf) {
336 |         return ((DataProvider *)opaque)->GetData(pBuf, nBuf);
337 |     }
338 | };
339 | 
340 | inline cudaVideoCodec FFmpeg2NvCodecId(AVCodecID id) {
341 |     switch (id) {
342 |     case AV_CODEC_ID_MPEG1VIDEO : return cudaVideoCodec_MPEG1;
343 |     case AV_CODEC_ID_MPEG2VIDEO : return cudaVideoCodec_MPEG2;
344 |     case AV_CODEC_ID_MPEG4      : return cudaVideoCodec_MPEG4;
345 |     case AV_CODEC_ID_WMV3       :
346 |     case AV_CODEC_ID_VC1        : return cudaVideoCodec_VC1;
347 |     case AV_CODEC_ID_H264       : return cudaVideoCodec_H264;
348 |     case AV_CODEC_ID_HEVC       : return cudaVideoCodec_HEVC;
349 |     case AV_CODEC_ID_VP8        : return cudaVideoCodec_VP8;
350 |     case AV_CODEC_ID_VP9        : return cudaVideoCodec_VP9;
351 |     case AV_CODEC_ID_MJPEG      : return cudaVideoCodec_JPEG;
352 |     case AV_CODEC_ID_AV1        : return cudaVideoCodec_AV1;
353 |     default                     : return cudaVideoCodec_NumCodecs;
354 |     }
355 | }
356 | 
357 | 
358 | 


--------------------------------------------------------------------------------
/src/cuvid/Utils/FFmpegStreamer.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
  3 | *
  4 | * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 | * with this source code for terms and conditions that govern your use of
  6 | * this software. Any use, reproduction, disclosure, or distribution of
  7 | * this software and related documentation outside the terms of the EULA
  8 | * is strictly prohibited.
  9 | *
 10 | */
 11 | #pragma once
 12 | 
 13 | #include <thread>
 14 | #include <mutex>
 15 | extern "C" {
 16 | #include <libavformat/avformat.h>
 17 | #include <libavutil/opt.h>
 18 | #include <libswresample/swresample.h>
 19 | };
 20 | #include "Logger.h"
 21 | 
 22 | extern simplelogger::Logger *logger;
 23 | 
 24 | class FFmpegStreamer {
 25 | private:
 26 |     AVFormatContext *oc = NULL;
 27 |     AVStream *vs = NULL;
 28 |     int nFps = 0;
 29 | 
 30 | public:
 31 |     FFmpegStreamer(AVCodecID eCodecId, int nWidth, int nHeight, int nFps, const char *szInFilePath) : nFps(nFps) {
 32 |         avformat_network_init();
 33 |         oc = avformat_alloc_context();
 34 |         if (!oc) {
 35 |             LOG(ERROR) << "FFMPEG: avformat_alloc_context error";
 36 |             return;
 37 |         }
 38 | 
 39 |         // Set format on oc
 40 |         AVOutputFormat *fmt = av_guess_format("mpegts", NULL, NULL);
 41 |         if (!fmt) {
 42 |             LOG(ERROR) << "Invalid format";
 43 |             return;
 44 |         }
 45 |         fmt->video_codec = eCodecId;
 46 | 
 47 |         oc->oformat = fmt;
 48 |         oc->url = av_strdup(szInFilePath);
 49 |         LOG(INFO) << "Streaming destination: " << oc->url;
 50 | 
 51 |         // Add video stream to oc
 52 |         vs = avformat_new_stream(oc, NULL);
 53 |         if (!vs) {
 54 |             LOG(ERROR) << "FFMPEG: Could not alloc video stream";
 55 |             return;
 56 |         }
 57 |         vs->id = 0;
 58 | 
 59 |         // Set video parameters
 60 |         AVCodecParameters *vpar = vs->codecpar;
 61 |         vpar->codec_id = fmt->video_codec;
 62 |         vpar->codec_type = AVMEDIA_TYPE_VIDEO;
 63 |         vpar->width = nWidth;
 64 |         vpar->height = nHeight;
 65 | 
 66 |         // Everything is ready. Now open the output stream.
 67 |         if (avio_open(&oc->pb, oc->url, AVIO_FLAG_WRITE) < 0) {
 68 |             LOG(ERROR) << "FFMPEG: Could not open " << oc->url;
 69 |             return ;
 70 |         }
 71 | 
 72 |         // Write the container header
 73 |         if (avformat_write_header(oc, NULL)) {
 74 |             LOG(ERROR) << "FFMPEG: avformat_write_header error!";
 75 |             return;
 76 |         }
 77 |     }
 78 |     ~FFmpegStreamer() {
 79 |         if (oc) {
 80 |             av_write_trailer(oc);
 81 |             avio_close(oc->pb);
 82 |             avformat_free_context(oc);
 83 |         }
 84 |     }
 85 | 
 86 |     bool Stream(uint8_t *pData, int nBytes, int nPts) {
 87 |         AVPacket pkt = {0};
 88 |         av_init_packet(&pkt);
 89 |         pkt.pts = av_rescale_q(nPts++, AVRational {1, nFps}, vs->time_base);
 90 |         // No B-frames
 91 |         pkt.dts = pkt.pts;
 92 |         pkt.stream_index = vs->index;
 93 |         pkt.data = pData;
 94 |         pkt.size = nBytes;
 95 | 
 96 |         if(!memcmp(pData, "\x00\x00\x00\x01\x67", 5)) {
 97 |             pkt.flags |= AV_PKT_FLAG_KEY;
 98 |         }
 99 | 
100 |         // Write the compressed frame into the output
101 |         int ret = av_write_frame(oc, &pkt);
102 |         av_write_frame(oc, NULL);
103 |         if (ret < 0) {
104 |             LOG(ERROR) << "FFMPEG: Error while writing video frame";
105 |         }
106 | 
107 |         return true;
108 |     }
109 | };
110 | 


--------------------------------------------------------------------------------
/src/cuvid/Utils/Logger.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
  3 | *
  4 | * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 | * with this source code for terms and conditions that govern your use of
  6 | * this software. Any use, reproduction, disclosure, or distribution of
  7 | * this software and related documentation outside the terms of the EULA
  8 | * is strictly prohibited.
  9 | *
 10 | */
 11 | 
 12 | #pragma once
 13 | 
 14 | #include <iostream>
 15 | #include <fstream>
 16 | #include <string>
 17 | #include <sstream>
 18 | #include <mutex>
 19 | #include <time.h>
 20 | 
 21 | #ifdef _WIN32
 22 | #include <winsock.h>
 23 | #include <windows.h>
 24 | 
 25 | #pragma comment(lib, "ws2_32.lib")
 26 | #undef ERROR
 27 | #else
 28 | #include <unistd.h>
 29 | #include <sys/socket.h>
 30 | #include <netinet/in.h>
 31 | #include <arpa/inet.h>
 32 | #define SOCKET int
 33 | #define INVALID_SOCKET -1
 34 | #endif
 35 | 
 36 | enum LogLevel {
 37 |     TRACE,
 38 |     INFO,
 39 |     WARNING,
 40 |     ERROR,
 41 |     FATAL
 42 | };
 43 | 
 44 | namespace simplelogger{
 45 | class Logger {
 46 | public:
 47 |     Logger(LogLevel level, bool bPrintTimeStamp) : level(level), bPrintTimeStamp(bPrintTimeStamp) {}
 48 |     virtual ~Logger() {}
 49 |     virtual std::ostream& GetStream() = 0;
 50 |     virtual void FlushStream() {}
 51 |     bool ShouldLogFor(LogLevel l) {
 52 |         return l >= level;
 53 |     }
 54 |     char* GetLead(LogLevel l, const char *szFile, int nLine, const char *szFunc) {
 55 |         if (l < TRACE || l > FATAL) {
 56 |             sprintf(szLead, "[?????] ");
 57 |             return szLead;
 58 |         }
 59 |         const char *szLevels[] = {"TRACE", "INFO", "WARN", "ERROR", "FATAL"};
 60 |         if (bPrintTimeStamp) {
 61 |             time_t t = time(NULL);
 62 |             struct tm *ptm = localtime(&t);
 63 |             sprintf(szLead, "[%-5s][%02d:%02d:%02d] ", 
 64 |                 szLevels[l], ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
 65 |         } else {
 66 |             sprintf(szLead, "[%-5s] ", szLevels[l]);
 67 |         }
 68 |         return szLead;
 69 |     }
 70 |     void EnterCriticalSection() {
 71 |         mtx.lock();
 72 |     }
 73 |     void LeaveCriticalSection() {
 74 |         mtx.unlock();
 75 |     }
 76 | private:
 77 |     LogLevel level;
 78 |     char szLead[80];
 79 |     bool bPrintTimeStamp;
 80 |     std::mutex mtx;
 81 | };
 82 | 
 83 | class LoggerFactory {
 84 | public:
 85 |     static Logger* CreateFileLogger(std::string strFilePath, 
 86 |             LogLevel level = INFO, bool bPrintTimeStamp = true) {
 87 |         return new FileLogger(strFilePath, level, bPrintTimeStamp);
 88 |     }
 89 |     static Logger* CreateConsoleLogger(LogLevel level = INFO, 
 90 |             bool bPrintTimeStamp = true) {
 91 |         return new ConsoleLogger(level, bPrintTimeStamp);
 92 |     }
 93 |     static Logger* CreateUdpLogger(char *szHost, unsigned uPort, LogLevel level = INFO, 
 94 |             bool bPrintTimeStamp = true) {
 95 |         return new UdpLogger(szHost, uPort, level, bPrintTimeStamp);
 96 |     }
 97 | private:
 98 |     LoggerFactory() {}
 99 | 
100 |     class FileLogger : public Logger {
101 |     public:
102 |         FileLogger(std::string strFilePath, LogLevel level, bool bPrintTimeStamp) 
103 |         : Logger(level, bPrintTimeStamp) {
104 |             pFileOut = new std::ofstream();
105 |             pFileOut->open(strFilePath.c_str());
106 |         }
107 |         ~FileLogger() {
108 |             pFileOut->close();
109 |         }
110 |         std::ostream& GetStream() {
111 |             return *pFileOut;
112 |         }
113 |     private:
114 |         std::ofstream *pFileOut;
115 |     };
116 | 
117 |     class ConsoleLogger : public Logger {
118 |     public:
119 |         ConsoleLogger(LogLevel level, bool bPrintTimeStamp) 
120 |         : Logger(level, bPrintTimeStamp) {}
121 |         std::ostream& GetStream() {
122 |             return std::cout;
123 |         }
124 |     };
125 | 
126 |     class UdpLogger : public Logger {
127 |     private:
128 |         class UdpOstream : public std::ostream {
129 |         public:
130 |             UdpOstream(char *szHost, unsigned short uPort) : std::ostream(&sb), socket(INVALID_SOCKET){
131 | #ifdef _WIN32
132 |                 WSADATA w;
133 |                 if (WSAStartup(0x0101, &w) != 0) {
134 |                     fprintf(stderr, "WSAStartup() failed.\n");
135 |                     return;
136 |                 }
137 | #endif
138 |                 socket = ::socket(AF_INET, SOCK_DGRAM, 0);
139 |                 if (socket == INVALID_SOCKET) {
140 | #ifdef _WIN32
141 |                     WSACleanup();
142 | #endif
143 |                     fprintf(stderr, "socket() failed.\n");
144 |                     return;
145 |                 }
146 | #ifdef _WIN32
147 |                 unsigned int b1, b2, b3, b4;
148 |                 sscanf(szHost, "%u.%u.%u.%u", &b1, &b2, &b3, &b4);
149 |                 struct in_addr addr = {(unsigned char)b1, (unsigned char)b2, (unsigned char)b3, (unsigned char)b4};
150 | #else
151 |                 struct in_addr addr = {inet_addr(szHost)};
152 | #endif
153 |                 struct sockaddr_in s = {AF_INET, htons(uPort), addr};
154 |                 server = s;
155 |             }
156 |             ~UdpOstream() throw() {
157 |                 if (socket == INVALID_SOCKET) {
158 |                     return;
159 |                 }
160 | #ifdef _WIN32
161 |                 closesocket(socket);
162 |                 WSACleanup();
163 | #else
164 |                 close(socket);
165 | #endif
166 |             }
167 |             void Flush() {
168 |                 if (sendto(socket, sb.str().c_str(), (int)sb.str().length() + 1, 
169 |                         0, (struct sockaddr *)&server, (int)sizeof(sockaddr_in)) == -1) {
170 |                     fprintf(stderr, "sendto() failed.\n");
171 |                 }
172 |                 sb.str("");
173 |             }
174 | 
175 |         private:
176 |             std::stringbuf sb;
177 |             SOCKET socket;
178 |             struct sockaddr_in server;
179 |         };
180 |     public:
181 |         UdpLogger(char *szHost, unsigned uPort, LogLevel level, bool bPrintTimeStamp) 
182 |         : Logger(level, bPrintTimeStamp), udpOut(szHost, (unsigned short)uPort) {}
183 |         UdpOstream& GetStream() {
184 |             return udpOut;
185 |         }
186 |         virtual void FlushStream() {
187 |             udpOut.Flush();
188 |         }
189 |     private:
190 |         UdpOstream udpOut;
191 |     };
192 | };
193 | 
194 | class LogTransaction {
195 | public:
196 |     LogTransaction(Logger *pLogger, LogLevel level, const char *szFile, const int nLine, const char *szFunc) : pLogger(pLogger), level(level) {
197 |         if (!pLogger) {
198 |             std::cout << "[-----] ";
199 |             return;
200 |         }
201 |         if (!pLogger->ShouldLogFor(level)) {
202 |             return;
203 |         }
204 |         pLogger->EnterCriticalSection();
205 |         pLogger->GetStream() << pLogger->GetLead(level, szFile, nLine, szFunc);
206 |     }
207 |     ~LogTransaction() {
208 |         if (!pLogger) {
209 |             std::cout << std::endl;
210 |             return;
211 |         }
212 |         if (!pLogger->ShouldLogFor(level)) {
213 |             return;
214 |         }
215 |         pLogger->GetStream() << std::endl;
216 |         pLogger->FlushStream();
217 |         pLogger->LeaveCriticalSection();
218 |         if (level == FATAL) {
219 |             exit(1);
220 |         }
221 |     }
222 |     std::ostream& GetStream() {
223 |         if (!pLogger) {
224 |             return std::cout;
225 |         }
226 |         if (!pLogger->ShouldLogFor(level)) {
227 |             return ossNull;
228 |         }
229 |         return pLogger->GetStream();
230 |     }
231 | private:
232 |     Logger *pLogger;
233 |     LogLevel level;
234 |     std::ostringstream ossNull;
235 | };
236 | 
237 | }
238 | extern simplelogger::Logger *logger;
239 | #define LOG(level) simplelogger::LogTransaction(logger, level, __FILE__, __LINE__, __FUNCTION__).GetStream()
240 | 


--------------------------------------------------------------------------------
/src/cuvid/Utils/NvCodecUtils.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
  3 | *
  4 | * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 | * with this source code for terms and conditions that govern your use of
  6 | * this software. Any use, reproduction, disclosure, or distribution of
  7 | * this software and related documentation outside the terms of the EULA
  8 | * is strictly prohibited.
  9 | *
 10 | */
 11 | 
 12 | //---------------------------------------------------------------------------
 13 | //! \file NvCodecUtils.h
 14 | //! \brief Miscellaneous classes and error checking functions.
 15 | //!
 16 | //! Used by Transcode/Encode samples apps for reading input files, mutithreading, performance measurement or colorspace conversion while decoding.
 17 | //---------------------------------------------------------------------------
 18 | 
 19 | #pragma once
 20 | #include <iomanip>
 21 | #include <chrono>
 22 | #include <sys/stat.h>
 23 | #include <assert.h>
 24 | #include <stdint.h>
 25 | #include <string.h>
 26 | #include "Logger.h"
 27 | #include <ios>
 28 | #include <sstream>
 29 | #include <thread>
 30 | #include <list>
 31 | #include <condition_variable>
 32 | 
 33 | extern simplelogger::Logger *logger;
 34 | 
 35 | #ifdef __cuda_cuda_h__
 36 | inline bool check(CUresult e, int iLine, const char *szFile) {
 37 |     if (e != CUDA_SUCCESS) {
 38 |         const char *szErrName = NULL;
 39 |         cuGetErrorName(e, &szErrName);
 40 |         LOG(FATAL) << "CUDA driver API error " << szErrName << " at line " << iLine << " in file " << szFile;
 41 |         return false;
 42 |     }
 43 |     return true;
 44 | }
 45 | #endif
 46 | 
 47 | #ifdef __CUDA_RUNTIME_H__
 48 | inline bool check(cudaError_t e, int iLine, const char *szFile) {
 49 |     if (e != cudaSuccess) {
 50 |         LOG(FATAL) << "CUDA runtime API error " << cudaGetErrorName(e) << " at line " << iLine << " in file " << szFile;
 51 |         return false;
 52 |     }
 53 |     return true;
 54 | }
 55 | #endif
 56 | 
 57 | #ifdef _NV_ENCODEAPI_H_
 58 | inline bool check(NVENCSTATUS e, int iLine, const char *szFile) {
 59 |     const char *aszErrName[] = {
 60 |         "NV_ENC_SUCCESS",
 61 |         "NV_ENC_ERR_NO_ENCODE_DEVICE",
 62 |         "NV_ENC_ERR_UNSUPPORTED_DEVICE",
 63 |         "NV_ENC_ERR_INVALID_ENCODERDEVICE",
 64 |         "NV_ENC_ERR_INVALID_DEVICE",
 65 |         "NV_ENC_ERR_DEVICE_NOT_EXIST",
 66 |         "NV_ENC_ERR_INVALID_PTR",
 67 |         "NV_ENC_ERR_INVALID_EVENT",
 68 |         "NV_ENC_ERR_INVALID_PARAM",
 69 |         "NV_ENC_ERR_INVALID_CALL",
 70 |         "NV_ENC_ERR_OUT_OF_MEMORY",
 71 |         "NV_ENC_ERR_ENCODER_NOT_INITIALIZED",
 72 |         "NV_ENC_ERR_UNSUPPORTED_PARAM",
 73 |         "NV_ENC_ERR_LOCK_BUSY",
 74 |         "NV_ENC_ERR_NOT_ENOUGH_BUFFER",
 75 |         "NV_ENC_ERR_INVALID_VERSION",
 76 |         "NV_ENC_ERR_MAP_FAILED",
 77 |         "NV_ENC_ERR_NEED_MORE_INPUT",
 78 |         "NV_ENC_ERR_ENCODER_BUSY",
 79 |         "NV_ENC_ERR_EVENT_NOT_REGISTERD",
 80 |         "NV_ENC_ERR_GENERIC",
 81 |         "NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY",
 82 |         "NV_ENC_ERR_UNIMPLEMENTED",
 83 |         "NV_ENC_ERR_RESOURCE_REGISTER_FAILED",
 84 |         "NV_ENC_ERR_RESOURCE_NOT_REGISTERED",
 85 |         "NV_ENC_ERR_RESOURCE_NOT_MAPPED",
 86 |     };
 87 |     if (e != NV_ENC_SUCCESS) {
 88 |         LOG(FATAL) << "NVENC error " << aszErrName[e] << " at line " << iLine << " in file " << szFile;
 89 |         return false;
 90 |     }
 91 |     return true;
 92 | }
 93 | #endif
 94 | 
 95 | #ifdef _WINERROR_
 96 | inline bool check(HRESULT e, int iLine, const char *szFile) {
 97 |     if (e != S_OK) {
 98 |         std::stringstream stream;
 99 |         stream << std::hex << std::uppercase << e;
100 |         LOG(FATAL) << "HRESULT error 0x" << stream.str() << " at line " << iLine << " in file " << szFile;
101 |         return false;
102 |     }
103 |     return true;
104 | }
105 | #endif
106 | 
107 | #if defined(__gl_h_) || defined(__GL_H__)
108 | inline bool check(GLenum e, int iLine, const char *szFile) {
109 |     if (e != 0) {
110 |         LOG(ERROR) << "GLenum error " << e << " at line " << iLine << " in file " << szFile;
111 |         return false;
112 |     }
113 |     return true;
114 | }
115 | #endif
116 | 
117 | inline bool check(int e, int iLine, const char *szFile) {
118 |     if (e < 0) {
119 |         LOG(ERROR) << "General error " << e << " at line " << iLine << " in file " << szFile;
120 |         return false;
121 |     }
122 |     return true;
123 | }
124 | 
125 | #define ck(call) check(call, __LINE__, __FILE__)
126 | 
127 | /**
128 | * @brief Wrapper class around std::thread
129 | */
130 | class NvThread
131 | {
132 | public:
133 |     NvThread() = default;
134 |     NvThread(const NvThread&) = delete;
135 |     NvThread& operator=(const NvThread& other) = delete;
136 | 
137 |     NvThread(std::thread&& thread) : t(std::move(thread))
138 |     {
139 | 
140 |     }
141 | 
142 |     NvThread(NvThread&& thread) : t(std::move(thread.t))
143 |     {
144 | 
145 |     }
146 | 
147 |     NvThread& operator=(NvThread&& other)
148 |     {
149 |         t = std::move(other.t);
150 |         return *this;
151 |     }
152 | 
153 |     ~NvThread()
154 |     {
155 |         join();
156 |     }
157 | 
158 |     void join()
159 |     {
160 |         if (t.joinable())
161 |         {
162 |             t.join();
163 |         }
164 |     }
165 | private:
166 |     std::thread t;
167 | };
168 | 
169 | #ifndef _WIN32
170 | #define _stricmp strcasecmp
171 | #define _stat64 stat64
172 | #endif
173 | 
174 | /**
175 | * @brief Utility class to allocate buffer memory. Helps avoid I/O during the encode/decode loop in case of performance tests.
176 | */
177 | class BufferedFileReader {
178 | public:
179 |     /**
180 |     * @brief Constructor function to allocate appropriate memory and copy file contents into it
181 |     */
182 |     BufferedFileReader(const char *szFileName, bool bPartial = false) {
183 |         struct _stat64 st;
184 | 
185 |         if (_stat64(szFileName, &st) != 0) {
186 |             return;
187 |         }
188 |         
189 |         nSize = st.st_size;
190 |         while (nSize) {
191 |             try {
192 |                 pBuf = new uint8_t[(size_t)nSize];
193 |                 if (nSize != st.st_size) {
194 |                     LOG(WARNING) << "File is too large - only " << std::setprecision(4) << 100.0 * nSize / st.st_size << "% is loaded"; 
195 |                 }
196 |                 break;
197 |             } catch(std::bad_alloc) {
198 |                 if (!bPartial) {
199 |                     LOG(ERROR) << "Failed to allocate memory in BufferedReader";
200 |                     return;
201 |                 }
202 |                 nSize = (uint32_t)(nSize * 0.9);
203 |             }
204 |         }
205 | 
206 |         std::ifstream fpIn(szFileName, std::ifstream::in | std::ifstream::binary);
207 |         if (!fpIn)
208 |         {
209 |             LOG(ERROR) << "Unable to open input file: " << szFileName;
210 |             return;
211 |         }
212 | 
213 |         std::streamsize nRead = fpIn.read(reinterpret_cast<char*>(pBuf), nSize).gcount();
214 |         fpIn.close();
215 | 
216 |         assert(nRead == nSize);
217 |     }
218 |     ~BufferedFileReader() {
219 |         if (pBuf) {
220 |             delete[] pBuf;
221 |         }
222 |     }
223 |     bool GetBuffer(uint8_t **ppBuf, uint64_t *pnSize) {
224 |         if (!pBuf) {
225 |             return false;
226 |         }
227 | 
228 |         *ppBuf = pBuf;
229 |         *pnSize = nSize;
230 |         return true;
231 |     }
232 | 
233 | private:
234 |     uint8_t *pBuf = NULL;
235 |     uint64_t nSize = 0;
236 | };
237 | 
238 | /**
239 | * @brief Template class to facilitate color space conversion
240 | */
241 | template<typename T>
242 | class YuvConverter {
243 | public:
244 |     YuvConverter(int nWidth, int nHeight) : nWidth(nWidth), nHeight(nHeight) {
245 |         pQuad = new T[((nWidth + 1) / 2) * ((nHeight + 1) / 2)];
246 |     }
247 |     ~YuvConverter() {
248 |         delete[] pQuad;
249 |     }
250 |     void PlanarToUVInterleaved(T *pFrame, int nPitch = 0) {
251 |         if (nPitch == 0) {
252 |             nPitch = nWidth;
253 |         }
254 | 
255 |         // sizes of source surface plane
256 |         int nSizePlaneY = nPitch * nHeight;
257 |         int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2);
258 |         int nSizePlaneV = nSizePlaneU;
259 | 
260 |         T *puv = pFrame + nSizePlaneY;
261 |         if (nPitch == nWidth) {
262 |             memcpy(pQuad, puv, nSizePlaneU * sizeof(T));
263 |         } else {
264 |             for (int i = 0; i < (nHeight + 1) / 2; i++) {
265 |                 memcpy(pQuad + ((nWidth + 1) / 2) * i, puv + ((nPitch + 1) / 2) * i, ((nWidth + 1) / 2) * sizeof(T));
266 |             }
267 |         }
268 |         T *pv = puv + nSizePlaneU;
269 |         for (int y = 0; y < (nHeight + 1) / 2; y++) {
270 |             for (int x = 0; x < (nWidth + 1) / 2; x++) {
271 |                 puv[y * nPitch + x * 2] = pQuad[y * ((nWidth + 1) / 2) + x];
272 |                 puv[y * nPitch + x * 2 + 1] = pv[y * ((nPitch + 1) / 2) + x];
273 |             }
274 |         }
275 |     }
276 |     void UVInterleavedToPlanar(T *pFrame, int nPitch = 0) {
277 |         if (nPitch == 0) {
278 |             nPitch = nWidth;
279 |         }
280 | 
281 |         // sizes of source surface plane
282 |         int nSizePlaneY = nPitch * nHeight;
283 |         int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2);
284 |         int nSizePlaneV = nSizePlaneU;
285 | 
286 |         T *puv = pFrame + nSizePlaneY,
287 |             *pu = puv, 
288 |             *pv = puv + nSizePlaneU;
289 | 
290 |         // split chroma from interleave to planar
291 |         for (int y = 0; y < (nHeight + 1) / 2; y++) {
292 |             for (int x = 0; x < (nWidth + 1) / 2; x++) {
293 |                 pu[y * ((nPitch + 1) / 2) + x] = puv[y * nPitch + x * 2];
294 |                 pQuad[y * ((nWidth + 1) / 2) + x] = puv[y * nPitch + x * 2 + 1];
295 |             }
296 |         }
297 |         if (nPitch == nWidth) {
298 |             memcpy(pv, pQuad, nSizePlaneV * sizeof(T));
299 |         } else {
300 |             for (int i = 0; i < (nHeight + 1) / 2; i++) {
301 |                 memcpy(pv + ((nPitch + 1) / 2) * i, pQuad + ((nWidth + 1) / 2) * i, ((nWidth + 1) / 2) * sizeof(T));
302 |             }
303 |         }
304 |     }
305 | 
306 | private:
307 |     T *pQuad;
308 |     int nWidth, nHeight;
309 | };
310 | 
311 | /**
312 | * @brief Utility class to measure elapsed time in seconds between the block of executed code
313 | */
314 | class StopWatch {
315 | public:
316 |     void Start() {
317 |         t0 = std::chrono::high_resolution_clock::now();
318 |     }
319 |     double Stop() {
320 |         return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now().time_since_epoch() - t0.time_since_epoch()).count() / 1.0e9;
321 |     }
322 | 
323 | private:
324 |     std::chrono::high_resolution_clock::time_point t0;
325 | };
326 | 
327 | template<typename T>
328 | class ConcurrentQueue
329 | {
330 |     public:
331 | 
332 |     ConcurrentQueue() {}
333 |     ConcurrentQueue(size_t size) : maxSize(size) {}
334 |     ConcurrentQueue(const ConcurrentQueue&) = delete;
335 |     ConcurrentQueue& operator=(const ConcurrentQueue&) = delete;
336 | 
337 |     void setSize(size_t s) {
338 |         maxSize = s;
339 |     }
340 | 
341 |     void push_back(const T& value) {
342 |         // Do not use a std::lock_guard here. We will need to explicitly
343 |         // unlock before notify_one as the other waiting thread will
344 |         // automatically try to acquire mutex once it wakes up
345 |         // (which will happen on notify_one)
346 |         std::unique_lock<std::mutex> lock(m_mutex);
347 |         auto wasEmpty = m_List.empty();
348 | 
349 |         while (full()) {
350 |             m_cond.wait(lock);
351 |         }
352 | 
353 |         m_List.push_back(value);
354 |         if (wasEmpty && !m_List.empty()) {
355 |             lock.unlock();
356 |             m_cond.notify_one();
357 |         }
358 |     }
359 | 
360 |     T pop_front() {
361 |         std::unique_lock<std::mutex> lock(m_mutex);
362 | 
363 |         while (m_List.empty()) {
364 |             m_cond.wait(lock);
365 |         }
366 |         auto wasFull = full();
367 |         T data = std::move(m_List.front());
368 |         m_List.pop_front();
369 | 
370 |         if (wasFull && !full()) {
371 |             lock.unlock();
372 |             m_cond.notify_one();
373 |         }
374 | 
375 |         return data;
376 |     }
377 | 
378 |     T front() {
379 |         std::unique_lock<std::mutex> lock(m_mutex);
380 | 
381 |         while (m_List.empty()) {
382 |             m_cond.wait(lock);
383 |         }
384 | 
385 |         return m_List.front();
386 |     }
387 | 
388 |     size_t size() {
389 |         std::unique_lock<std::mutex> lock(m_mutex);
390 |         return m_List.size();
391 |     }
392 | 
393 |     bool empty() {
394 |         std::unique_lock<std::mutex> lock(m_mutex);
395 |         return m_List.empty();
396 |     }
397 |     void clear() {
398 |         std::unique_lock<std::mutex> lock(m_mutex);
399 |         m_List.clear();
400 |     }
401 | 
402 | private:
403 |     bool full() {
404 |         if (m_List.size() == maxSize)
405 |             return true;
406 |         return false;
407 |     }
408 | 
409 | private:
410 |     std::list<T> m_List;
411 |     std::mutex m_mutex;
412 |     std::condition_variable m_cond;
413 |     size_t maxSize;
414 | };
415 | 
416 | inline void CheckInputFile(const char *szInFilePath) {
417 |     std::ifstream fpIn(szInFilePath, std::ios::in | std::ios::binary);
418 |     if (fpIn.fail()) {
419 |         std::ostringstream err;
420 |         err << "Unable to open input file: " << szInFilePath << std::endl;
421 |         throw std::invalid_argument(err.str());
422 |     }
423 | }
424 | 
425 | inline void ValidateResolution(int nWidth, int nHeight) {
426 |     
427 |     if (nWidth <= 0 || nHeight <= 0) {
428 |         std::ostringstream err;
429 |         err << "Please specify positive non zero resolution as -s WxH. Current resolution is " << nWidth << "x" << nHeight << std::endl;
430 |         throw std::invalid_argument(err.str());
431 |     }
432 | }
433 | 
434 | template <class COLOR32>
435 | void Nv12ToColor32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
436 | template <class COLOR64>
437 | void Nv12ToColor64(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
438 | 
439 | template <class COLOR32>
440 | void P016ToColor32(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4);
441 | template <class COLOR64>
442 | void P016ToColor64(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4);
443 | 
444 | template <class COLOR32>
445 | void YUV444ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
446 | template <class COLOR64>
447 | void YUV444ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
448 | 
449 | template <class COLOR32>
450 | void YUV444P16ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4);
451 | template <class COLOR64>
452 | void YUV444P16ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4);
453 | 
454 | template <class COLOR32>
455 | void Nv12ToColorPlanar(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix = 0);
456 | template <class COLOR32>
457 | void P016ToColorPlanar(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix = 4);
458 | 
459 | template <class COLOR32>
460 | void YUV444ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix = 0);
461 | template <class COLOR32>
462 | void YUV444P16ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix = 4);
463 | 
464 | void Bgra64ToP016(uint8_t *dpBgra, int nBgraPitch, uint8_t *dpP016, int nP016Pitch, int nWidth, int nHeight, int iMatrix = 4);
465 | 
466 | void ConvertUInt8ToUInt16(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight);
467 | void ConvertUInt16ToUInt8(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight);
468 | 
469 | void ResizeNv12(unsigned char *dpDstNv12, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcNv12, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char *dpDstNv12UV = nullptr);
470 | void ResizeP016(unsigned char *dpDstP016, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcP016, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char *dpDstP016UV = nullptr);
471 | 
472 | void ScaleYUV420(unsigned char *dpDstY, unsigned char* dpDstU, unsigned char* dpDstV, int nDstPitch, int nDstChromaPitch, int nDstWidth, int nDstHeight,
473 |     unsigned char *dpSrcY, unsigned char* dpSrcU, unsigned char* dpSrcV, int nSrcPitch, int nSrcChromaPitch, int nSrcWidth, int nSrcHeight, bool bSemiplanar);
474 | 
475 | #ifdef __cuda_cuda_h__
476 | void ComputeCRC(uint8_t *pBuffer, uint32_t *crcValue, CUstream_st *outputCUStream);
477 | #endif
478 | 


--------------------------------------------------------------------------------
/src/cuvid/Utils/Resize.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
  3 | *
  4 | * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 | * with this source code for terms and conditions that govern your use of
  6 | * this software. Any use, reproduction, disclosure, or distribution of
  7 | * this software and related documentation outside the terms of the EULA
  8 | * is strictly prohibited.
  9 | *
 10 | */
 11 | 
 12 | #include <cuda_runtime.h>
 13 | #include "NvCodecUtils.h"
 14 | 
 15 | template<typename YuvUnitx2>
 16 | static __global__ void Resize(cudaTextureObject_t texY, cudaTextureObject_t texUv,
 17 |         uint8_t *pDst, uint8_t *pDstUV, int nPitch, int nWidth, int nHeight,
 18 |         float fxScale, float fyScale)
 19 | {
 20 |     int ix = blockIdx.x * blockDim.x + threadIdx.x,
 21 |         iy = blockIdx.y * blockDim.y + threadIdx.y;
 22 | 
 23 |     if (ix >= nWidth / 2 || iy >= nHeight / 2) {
 24 |         return;
 25 |     }
 26 | 
 27 |     int x = ix * 2, y = iy * 2;
 28 |     typedef decltype(YuvUnitx2::x) YuvUnit;
 29 |     const int MAX = (1 << (sizeof(YuvUnit) * 8)) - 1;
 30 |     *(YuvUnitx2 *)(pDst + y * nPitch + x * sizeof(YuvUnit)) = YuvUnitx2 {
 31 |         (YuvUnit)(tex2D<float>(texY, x / fxScale, y / fyScale) * MAX),
 32 |         (YuvUnit)(tex2D<float>(texY, (x + 1) / fxScale, y / fyScale) * MAX)
 33 |     };
 34 |     y++;
 35 |     *(YuvUnitx2 *)(pDst + y * nPitch + x * sizeof(YuvUnit)) = YuvUnitx2 {
 36 |         (YuvUnit)(tex2D<float>(texY, x / fxScale, y / fyScale) * MAX),
 37 |         (YuvUnit)(tex2D<float>(texY, (x + 1) / fxScale, y / fyScale) * MAX)
 38 |     };
 39 |     float2 uv = tex2D<float2>(texUv, ix / fxScale, (nHeight + iy) / fyScale + 0.5f);
 40 |     *(YuvUnitx2 *)(pDstUV + iy * nPitch + ix * 2 * sizeof(YuvUnit)) = YuvUnitx2{ (YuvUnit)(uv.x * MAX), (YuvUnit)(uv.y * MAX) };
 41 | }
 42 | 
 43 | template <typename YuvUnitx2>
 44 | static void Resize(unsigned char *dpDst, unsigned char* dpDstUV, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrc, int nSrcPitch, int nSrcWidth, int nSrcHeight) {
 45 |     cudaResourceDesc resDesc = {};
 46 |     resDesc.resType = cudaResourceTypePitch2D;
 47 |     resDesc.res.pitch2D.devPtr = dpSrc;
 48 |     resDesc.res.pitch2D.desc = cudaCreateChannelDesc<decltype(YuvUnitx2::x)>();
 49 |     resDesc.res.pitch2D.width = nSrcWidth;
 50 |     resDesc.res.pitch2D.height = nSrcHeight;
 51 |     resDesc.res.pitch2D.pitchInBytes = nSrcPitch;
 52 | 
 53 |     cudaTextureDesc texDesc = {};
 54 |     texDesc.filterMode = cudaFilterModeLinear;
 55 |     texDesc.readMode = cudaReadModeNormalizedFloat;
 56 | 
 57 |     cudaTextureObject_t texY=0;
 58 |     ck(cudaCreateTextureObject(&texY, &resDesc, &texDesc, NULL));
 59 | 
 60 |     resDesc.res.pitch2D.desc = cudaCreateChannelDesc<YuvUnitx2>();
 61 |     resDesc.res.pitch2D.width = nSrcWidth / 2;
 62 |     resDesc.res.pitch2D.height = nSrcHeight * 3 / 2;
 63 | 
 64 |     cudaTextureObject_t texUv=0;
 65 |     ck(cudaCreateTextureObject(&texUv, &resDesc, &texDesc, NULL));
 66 | 
 67 |     Resize<YuvUnitx2> << <dim3((nDstWidth + 31) / 32, (nDstHeight + 31) / 32), dim3(16, 16) >> >(texY, texUv, dpDst, dpDstUV,
 68 |         nDstPitch, nDstWidth, nDstHeight, 1.0f * nDstWidth / nSrcWidth, 1.0f * nDstHeight / nSrcHeight);
 69 | 
 70 |     ck(cudaDestroyTextureObject(texY));
 71 |     ck(cudaDestroyTextureObject(texUv));
 72 | }
 73 | 
 74 | void ResizeNv12(unsigned char *dpDstNv12, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcNv12, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char* dpDstNv12UV)
 75 | {
 76 |     unsigned char* dpDstUV = dpDstNv12UV ? dpDstNv12UV : dpDstNv12 + (nDstPitch*nDstHeight);
 77 |     return Resize<uchar2>(dpDstNv12, dpDstUV, nDstPitch, nDstWidth, nDstHeight, dpSrcNv12, nSrcPitch, nSrcWidth, nSrcHeight);
 78 | }
 79 | 
 80 | 
 81 | void ResizeP016(unsigned char *dpDstP016, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcP016, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char* dpDstP016UV)
 82 | {
 83 |     unsigned char* dpDstUV = dpDstP016UV ? dpDstP016UV : dpDstP016 + (nDstPitch*nDstHeight);
 84 |     return Resize<ushort2>(dpDstP016, dpDstUV, nDstPitch, nDstWidth, nDstHeight, dpSrcP016, nSrcPitch, nSrcWidth, nSrcHeight);
 85 | }
 86 | 
 87 | static __global__ void Scale(cudaTextureObject_t texSrc,
 88 |     uint8_t *pDst, int nPitch, int nWidth, int nHeight,
 89 |     float fxScale, float fyScale)
 90 | {
 91 |     int x = blockIdx.x * blockDim.x + threadIdx.x,
 92 |         y = blockIdx.y * blockDim.y + threadIdx.y;
 93 | 
 94 |     if (x >= nWidth || y >= nHeight)
 95 |     {
 96 |         return;
 97 |     }
 98 | 
 99 |     *(unsigned char*)(pDst + (y * nPitch) + x) = (unsigned char)(fminf((tex2D<float>(texSrc, x * fxScale, y * fyScale)) * 255.0f, 255.0f));
100 | }
101 | 
102 | static __global__ void Scale_uv(cudaTextureObject_t texSrc,
103 |     uint8_t *pDst, int nPitch, int nWidth, int nHeight,
104 |     float fxScale, float fyScale)
105 | {
106 |     int x = blockIdx.x * blockDim.x + threadIdx.x,
107 |         y = blockIdx.y * blockDim.y + threadIdx.y;
108 | 
109 |     if (x >= nWidth || y >= nHeight)
110 |     {
111 |         return;
112 |     }
113 | 
114 |     float2 uv = tex2D<float2>(texSrc, x * fxScale, y * fyScale);
115 |     uchar2 uvOut = uchar2{ (unsigned char)(fminf(uv.x * 255.0f, 255.0f)), (unsigned char)(fminf(uv.y * 255.0f, 255.0f)) };
116 | 
117 |     *(uchar2*)(pDst + (y * nPitch) + 2 * x) = uvOut;
118 | }
119 | 
120 | void ScaleKernelLaunch(unsigned char *dpDst, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrc, int nSrcPitch, int nSrcWidth, int nSrcHeight, bool bUVPlane = false) 
121 | {
122 |     cudaResourceDesc resDesc = {};
123 |     resDesc.resType = cudaResourceTypePitch2D;
124 |     resDesc.res.pitch2D.devPtr = dpSrc;
125 |     resDesc.res.pitch2D.desc = bUVPlane ? cudaCreateChannelDesc<uchar2>() : cudaCreateChannelDesc<unsigned char>();
126 |     resDesc.res.pitch2D.width = nSrcWidth;
127 |     resDesc.res.pitch2D.height = nSrcHeight;
128 |     resDesc.res.pitch2D.pitchInBytes = nSrcPitch;
129 | 
130 |     cudaTextureDesc texDesc = {};
131 |     texDesc.filterMode = cudaFilterModeLinear;
132 |     texDesc.readMode = cudaReadModeNormalizedFloat;
133 | 
134 |     texDesc.addressMode[0] = cudaAddressModeClamp;
135 |     texDesc.addressMode[1] = cudaAddressModeClamp;
136 |     texDesc.addressMode[2] = cudaAddressModeClamp;
137 | 
138 |     cudaTextureObject_t texSrc = 0;
139 |     ck(cudaCreateTextureObject(&texSrc, &resDesc, &texDesc, NULL));
140 | 
141 |     dim3 blockSize(16, 16, 1);
142 |     dim3 gridSize(((uint32_t)nDstWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nDstHeight + blockSize.y - 1) / blockSize.y, 1);
143 | 
144 |     if (bUVPlane)
145 |     {
146 |         Scale_uv << <gridSize, blockSize >> >(texSrc, dpDst,
147 |             nDstPitch, nDstWidth, nDstHeight, 1.0f * nSrcWidth / nDstWidth, 1.0f * nSrcHeight / nDstHeight);
148 |     }
149 |     else
150 |     {
151 |         Scale << <gridSize, blockSize >> >(texSrc, dpDst,
152 |             nDstPitch, nDstWidth, nDstHeight, 1.0f * nSrcWidth / nDstWidth, 1.0f * nSrcHeight / nDstHeight);
153 |     }
154 | 
155 |     ck(cudaGetLastError());
156 |     ck(cudaDestroyTextureObject(texSrc));
157 | }
158 | 
159 | void ScaleYUV420(unsigned char *dpDstY,
160 |                  unsigned char* dpDstU,
161 |                 unsigned char* dpDstV,
162 |                 int nDstPitch,
163 |                 int nDstChromaPitch,
164 |                 int nDstWidth,
165 |                 int nDstHeight,
166 |                 unsigned char *dpSrcY,
167 |                 unsigned char* dpSrcU,
168 |                 unsigned char* dpSrcV, 
169 |                 int nSrcPitch,
170 |                 int nSrcChromaPitch,
171 |                 int nSrcWidth,
172 |                 int nSrcHeight,
173 |                 bool bSemiplanar)
174 | {
175 |     int chromaWidthDst = (nDstWidth + 1) / 2;
176 |     int chromaHeightDst = (nDstHeight + 1) / 2;
177 | 
178 |     int chromaWidthSrc = (nSrcWidth + 1) / 2;
179 |     int chromaHeightSrc = (nSrcHeight + 1) / 2;
180 | 
181 |     ScaleKernelLaunch(dpDstY, nDstPitch, nDstWidth, nDstHeight, dpSrcY, nSrcPitch, nSrcWidth, nSrcHeight);
182 | 
183 |     if (bSemiplanar)
184 |     {
185 |         ScaleKernelLaunch(dpDstU, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcU, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc, true);
186 |     }
187 |     else
188 |     {
189 |         ScaleKernelLaunch(dpDstU, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcU, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc);
190 |         ScaleKernelLaunch(dpDstV, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcV, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc);
191 |     }
192 | }
193 | 


--------------------------------------------------------------------------------
/src/cuvid/Utils/crc.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Copyright 2018-2020 NVIDIA Corporation.  All rights reserved.
  3 | *
  4 | * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 | * with this source code for terms and conditions that govern your use of
  6 | * this software. Any use, reproduction, disclosure, or distribution of
  7 | * this software and related documentation outside the terms of the EULA
  8 | * is strictly prohibited.
  9 | *
 10 | */
 11 | 
 12 | #include <cuda_runtime.h>
 13 | #include "NvCodecUtils.h"
 14 | 
 15 | /*
 16 | * CRC32 lookup table
 17 | * Generated by the following routine
 18 | * int i, j;
 19 | * U032 crc;
 20 | * for (i = 0; i < 256; i++) 
 21 | * {
 22 | *    crc = i;
 23 | *    for (j = 0; j < 8; j++) {    // 8 reduction
 24 | *      crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320L : 0);
 25 | *    }
 26 | *    Crc32Table[i] = crc;
 27 | * }
 28 |  */
 29 | __device__ __constant__ uint32_t Crc32Table[256] = {
 30 |     0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
 31 |     0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
 32 |     0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
 33 |     0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
 34 |     0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
 35 |     0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
 36 |     0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
 37 |     0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
 38 |     0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
 39 |     0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
 40 |     0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
 41 |     0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
 42 |     0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
 43 |     0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
 44 |     0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
 45 |     0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
 46 |     0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
 47 |     0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
 48 |     0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
 49 |     0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
 50 |     0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
 51 |     0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
 52 |     0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
 53 |     0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
 54 |     0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
 55 |     0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
 56 |     0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
 57 |     0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
 58 |     0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
 59 |     0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
 60 |     0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
 61 |     0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
 62 |     0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
 63 |     0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
 64 |     0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
 65 |     0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
 66 |     0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
 67 |     0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
 68 |     0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
 69 |     0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
 70 |     0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
 71 |     0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
 72 |     0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
 73 |     0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
 74 |     0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
 75 |     0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
 76 |     0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
 77 |     0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
 78 |     0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
 79 |     0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
 80 |     0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
 81 |     0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
 82 |     0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
 83 |     0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
 84 |     0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
 85 |     0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
 86 |     0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
 87 |     0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
 88 |     0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
 89 |     0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
 90 |     0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
 91 |     0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
 92 |     0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
 93 |     0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
 94 | };
 95 | 
 96 | typedef struct _NV_ENC_ENCODE_OUT_PARAMS
 97 | {
 98 |     uint32_t                  version;                 /**< [out]: Struct version. */
 99 |     uint32_t                  bitstreamSizeInBytes;    /**< [out]: Encoded bitstream size in bytes */
100 |     uint32_t                  cycleCount;              /**< [out]: Cycle count */
101 |     uint32_t                  firstPassCycleCount;     /**< [out]: First pass cycle count */
102 |     uint32_t                  reserved[60];            /**< [out]: Reserved and must be set to 0 */
103 | } NV_ENC_ENCODE_OUT_PARAMS;
104 | 
105 | static __global__ void ComputeCRCKernel(uint8_t *pBuffer, uint32_t *crcValue)
106 | {
107 |     NV_ENC_ENCODE_OUT_PARAMS *outParams = (NV_ENC_ENCODE_OUT_PARAMS *)pBuffer;
108 |     uint32_t bitstreamSize = outParams->bitstreamSizeInBytes;
109 |     uint8_t *pEncStream = pBuffer + sizeof(NV_ENC_ENCODE_OUT_PARAMS);
110 |     uint32_t crc=~0;
111 | 
112 |     for(uint32_t i = 0; i < bitstreamSize; i++)
113 |     {
114 |         crc = (crc >> 8) ^ Crc32Table[((uint8_t)(crc))  ^ (*pEncStream++)];
115 |     }
116 | 
117 |     *crcValue = ~crc;
118 | }
119 | 
120 | void ComputeCRC(uint8_t *pBuffer, uint32_t *crcValue, cudaStream_t outputCUStream)
121 | {
122 |     dim3 blockSize(1, 1, 1);
123 |     dim3 gridSize(1, 1, 1);
124 | 
125 |     ComputeCRCKernel <<<gridSize, blockSize, 0, outputCUStream >>>(pBuffer, crcValue);
126 | }
127 | 


--------------------------------------------------------------------------------
/src/cuvid/nvcuvid.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This copyright notice applies to this header file only:
  3 |  *
  4 |  * Copyright (c) 2010-2020 NVIDIA Corporation
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the software, and to permit persons to whom the
 12 |  * software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | /********************************************************************************************************************/
 29 | //! \file nvcuvid.h
 30 | //!   NVDECODE API provides video decoding interface to NVIDIA GPU devices.
 31 | //! \date 2015-2020
 32 | //!  This file contains the interface constants, structure definitions and function prototypes.
 33 | /********************************************************************************************************************/
 34 | 
 35 | #if !defined(__NVCUVID_H__)
 36 | #define __NVCUVID_H__
 37 | 
 38 | #include "cuviddec.h"
 39 | 
 40 | #if defined(__cplusplus)
 41 | extern "C" {
 42 | #endif /* __cplusplus */
 43 | 
 44 | 
 45 | /***********************************************/
 46 | //!
 47 | //! High-level helper APIs for video sources
 48 | //!
 49 | /***********************************************/
 50 | 
 51 | typedef void *CUvideosource;
 52 | typedef void *CUvideoparser;
 53 | typedef long long CUvideotimestamp;
 54 | 
 55 | 
 56 | /************************************************************************/
 57 | //! \enum cudaVideoState
 58 | //! Video source state enums
 59 | //! Used in cuvidSetVideoSourceState and cuvidGetVideoSourceState APIs
 60 | /************************************************************************/
 61 | typedef enum {
 62 |     cudaVideoState_Error   = -1,    /**< Error state (invalid source)                  */
 63 |     cudaVideoState_Stopped = 0,     /**< Source is stopped (or reached end-of-stream)  */
 64 |     cudaVideoState_Started = 1      /**< Source is running and delivering data         */
 65 | } cudaVideoState;
 66 | 
 67 | /************************************************************************/
 68 | //! \enum cudaAudioCodec
 69 | //! Audio compression enums
 70 | //! Used in CUAUDIOFORMAT structure
 71 | /************************************************************************/
 72 | typedef enum {
 73 |     cudaAudioCodec_MPEG1=0,         /**< MPEG-1 Audio               */
 74 |     cudaAudioCodec_MPEG2,           /**< MPEG-2 Audio               */
 75 |     cudaAudioCodec_MP3,             /**< MPEG-1 Layer III Audio     */
 76 |     cudaAudioCodec_AC3,             /**< Dolby Digital (AC3) Audio  */
 77 |     cudaAudioCodec_LPCM,            /**< PCM Audio                  */
 78 |     cudaAudioCodec_AAC,             /**< AAC Audio                  */
 79 | } cudaAudioCodec;
 80 | 
 81 | /************************************************************************************************/
 82 | //! \ingroup STRUCTS
 83 | //! \struct CUVIDEOFORMAT
 84 | //! Video format
 85 | //! Used in cuvidGetSourceVideoFormat API
 86 | /************************************************************************************************/
 87 | typedef struct
 88 | {
 89 |     cudaVideoCodec codec;                   /**< OUT: Compression format          */
 90 |    /**
 91 |     * OUT: frame rate = numerator / denominator (for example: 30000/1001)
 92 |     */
 93 |     struct {
 94 |         /**< OUT: frame rate numerator   (0 = unspecified or variable frame rate) */
 95 |         unsigned int numerator;
 96 |         /**< OUT: frame rate denominator (0 = unspecified or variable frame rate) */
 97 |         unsigned int denominator;
 98 |     } frame_rate;
 99 |     unsigned char progressive_sequence;     /**< OUT: 0=interlaced, 1=progressive                                      */
100 |     unsigned char bit_depth_luma_minus8;    /**< OUT: high bit depth luma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth   */
101 |     unsigned char bit_depth_chroma_minus8;  /**< OUT: high bit depth chroma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */
102 |     unsigned char min_num_decode_surfaces;  /**< OUT: Minimum number of decode surfaces to be allocated for correct
103 |                                                       decoding. The client can send this value in ulNumDecodeSurfaces
104 |                                                       (in CUVIDDECODECREATEINFO structure).
105 |                                                       This guarantees correct functionality and optimal video memory
106 |                                                       usage but not necessarily the best performance, which depends on
107 |                                                       the design of the overall application. The optimal number of
108 |                                                       decode surfaces (in terms of performance and memory utilization)
109 |                                                       should be decided by experimentation for each application, but it
110 |                                                       cannot go below min_num_decode_surfaces.
111 |                                                       If this value is used for ulNumDecodeSurfaces then it must be
112 |                                                       returned to parser during sequence callback.                     */
113 |     unsigned int coded_width;               /**< OUT: coded frame width in pixels                                      */
114 |     unsigned int coded_height;              /**< OUT: coded frame height in pixels                                     */
115 |    /**
116 |     * area of the frame that should be displayed
117 |     * typical example:
118 |     * coded_width = 1920, coded_height = 1088
119 |     * display_area = { 0,0,1920,1080 }
120 |     */
121 |     struct {
122 |         int left;                           /**< OUT: left position of display rect    */
123 |         int top;                            /**< OUT: top position of display rect     */
124 |         int right;                          /**< OUT: right position of display rect   */
125 |         int bottom;                         /**< OUT: bottom position of display rect  */
126 |     } display_area;
127 |     cudaVideoChromaFormat chroma_format;    /**< OUT:  Chroma format                   */
128 |     unsigned int bitrate;                   /**< OUT: video bitrate (bps, 0=unknown)   */
129 |    /**
130 |     * OUT: Display Aspect Ratio = x:y (4:3, 16:9, etc)
131 |     */
132 |     struct {
133 |         int x;
134 |         int y;
135 |     } display_aspect_ratio;
136 |     /**
137 |     * Video Signal Description
138 |     * Refer section E.2.1 (VUI parameters semantics) of H264 spec file
139 |     */
140 |     struct {
141 |         unsigned char video_format          : 3; /**< OUT: 0-Component, 1-PAL, 2-NTSC, 3-SECAM, 4-MAC, 5-Unspecified     */
142 |         unsigned char video_full_range_flag : 1; /**< OUT: indicates the black level and luma and chroma range           */
143 |         unsigned char reserved_zero_bits    : 4; /**< Reserved bits                                                      */
144 |         unsigned char color_primaries;           /**< OUT: chromaticity coordinates of source primaries                  */
145 |         unsigned char transfer_characteristics;  /**< OUT: opto-electronic transfer characteristic of the source picture */
146 |         unsigned char matrix_coefficients;       /**< OUT: used in deriving luma and chroma signals from RGB primaries   */
147 |     } video_signal_description;
148 |     unsigned int seqhdr_data_length;             /**< OUT: Additional bytes following (CUVIDEOFORMATEX)                  */
149 | } CUVIDEOFORMAT;
150 | 
151 | /****************************************************************/
152 | //! \ingroup STRUCTS
153 | //! \struct CUVIDOPERATINGPOINTINFO
154 | //! Operating point information of scalable bitstream
155 | /****************************************************************/
156 | typedef struct 
157 | {
158 |     cudaVideoCodec codec;
159 |     union 
160 |     {
161 |         struct
162 |         {
163 |             unsigned char  operating_points_cnt;
164 |             unsigned char  reserved24_bits[3];
165 |             unsigned short operating_points_idc[32];
166 |         } av1;
167 |         unsigned char CodecReserved[1024];
168 |     };
169 | } CUVIDOPERATINGPOINTINFO;
170 | 
171 | /****************************************************************/
172 | //! \ingroup STRUCTS
173 | //! \struct CUVIDAV1SEQHDR
174 | //! AV1 specific sequence header information
175 | /****************************************************************/
176 | typedef struct {
177 |     unsigned int max_width;
178 |     unsigned int max_height;
179 |     unsigned char reserved[1016];
180 | } CUVIDAV1SEQHDR;
181 | 
182 | /****************************************************************/
183 | //! \ingroup STRUCTS
184 | //! \struct CUVIDEOFORMATEX
185 | //! Video format including raw sequence header information
186 | //! Used in cuvidGetSourceVideoFormat API
187 | /****************************************************************/
188 | typedef struct
189 | {
190 |     CUVIDEOFORMAT format;                 /**< OUT: CUVIDEOFORMAT structure */
191 |     union {
192 |         CUVIDAV1SEQHDR av1;
193 |         unsigned char raw_seqhdr_data[1024];  /**< OUT: Sequence header data    */
194 |     };
195 | } CUVIDEOFORMATEX;
196 | 
197 | /****************************************************************/
198 | //! \ingroup STRUCTS
199 | //! \struct CUAUDIOFORMAT
200 | //! Audio formats
201 | //! Used in cuvidGetSourceAudioFormat API
202 | /****************************************************************/
203 | typedef struct
204 | {
205 |     cudaAudioCodec codec;       /**< OUT: Compression format                                              */
206 |     unsigned int channels;      /**< OUT: number of audio channels                                        */
207 |     unsigned int samplespersec; /**< OUT: sampling frequency                                              */
208 |     unsigned int bitrate;       /**< OUT: For uncompressed, can also be used to determine bits per sample */
209 |     unsigned int reserved1;     /**< Reserved for future use                                              */
210 |     unsigned int reserved2;     /**< Reserved for future use                                              */
211 | } CUAUDIOFORMAT;
212 | 
213 | 
214 | /***************************************************************/
215 | //! \enum CUvideopacketflags
216 | //! Data packet flags
217 | //! Used in CUVIDSOURCEDATAPACKET structure
218 | /***************************************************************/
219 | typedef enum {
220 |     CUVID_PKT_ENDOFSTREAM   = 0x01,   /**< Set when this is the last packet for this stream                              */
221 |     CUVID_PKT_TIMESTAMP     = 0x02,   /**< Timestamp is valid                                                            */
222 |     CUVID_PKT_DISCONTINUITY = 0x04,   /**< Set when a discontinuity has to be signalled                                  */
223 |     CUVID_PKT_ENDOFPICTURE  = 0x08,   /**< Set when the packet contains exactly one frame or one field                   */
224 |     CUVID_PKT_NOTIFY_EOS    = 0x10,   /**< If this flag is set along with CUVID_PKT_ENDOFSTREAM, an additional (dummy)
225 |                                            display callback will be invoked with null value of CUVIDPARSERDISPINFO which
226 |                                            should be interpreted as end of the stream.                                   */
227 | } CUvideopacketflags;
228 | 
229 | /*****************************************************************************/
230 | //! \ingroup STRUCTS
231 | //! \struct CUVIDSOURCEDATAPACKET
232 | //! Data Packet
233 | //! Used in cuvidParseVideoData API
234 | //! IN for cuvidParseVideoData
235 | /*****************************************************************************/
236 | typedef struct _CUVIDSOURCEDATAPACKET
237 | {
238 |     unsigned long flags;            /**< IN: Combination of CUVID_PKT_XXX flags                              */
239 |     unsigned long payload_size;     /**< IN: number of bytes in the payload (may be zero if EOS flag is set) */
240 |     const unsigned char *payload;   /**< IN: Pointer to packet payload data (may be NULL if EOS flag is set) */
241 |     CUvideotimestamp timestamp;     /**< IN: Presentation time stamp (10MHz clock), only valid if
242 |                                              CUVID_PKT_TIMESTAMP flag is set                                 */
243 | } CUVIDSOURCEDATAPACKET;
244 | 
245 | // Callback for packet delivery
246 | typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *);
247 | 
248 | /**************************************************************************************************************************/
249 | //! \ingroup STRUCTS
250 | //! \struct CUVIDSOURCEPARAMS
251 | //! Describes parameters needed in cuvidCreateVideoSource API
252 | //! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported
253 | //! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
254 | /**************************************************************************************************************************/
255 | typedef struct _CUVIDSOURCEPARAMS
256 | {
257 |     unsigned int ulClockRate;                   /**< IN: Time stamp units in Hz (0=default=10000000Hz)      */
258 |     unsigned int bAnnexb : 1;                   /**< IN: AV1 annexB stream                                  */
259 |     unsigned int uReserved : 31;                /**< Reserved for future use - set to zero                  */
260 |     unsigned int uReserved1[6];                 /**< Reserved for future use - set to zero                  */
261 |     void *pUserData;                            /**< IN: User private data passed in to the data handlers   */
262 |     PFNVIDSOURCECALLBACK pfnVideoDataHandler;   /**< IN: Called to deliver video packets                    */
263 |     PFNVIDSOURCECALLBACK pfnAudioDataHandler;   /**< IN: Called to deliver audio packets.                   */
264 |     void *pvReserved2[8];                       /**< Reserved for future use - set to NULL                  */
265 | } CUVIDSOURCEPARAMS;
266 | 
267 | 
268 | /**********************************************/
269 | //! \ingroup ENUMS
270 | //! \enum CUvideosourceformat_flags
271 | //! CUvideosourceformat_flags
272 | //! Used in cuvidGetSourceVideoFormat API
273 | /**********************************************/
274 | typedef enum {
275 |     CUVID_FMT_EXTFORMATINFO = 0x100             /**< Return extended format structure (CUVIDEOFORMATEX) */
276 | } CUvideosourceformat_flags;
277 | 
278 | #if !defined(__APPLE__)
279 | /***************************************************************************************************************************/
280 | //! \ingroup FUNCTS
281 | //! \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams)
282 | //! Create CUvideosource object. CUvideosource spawns demultiplexer thread that provides two callbacks: 
283 | //! pfnVideoDataHandler() and pfnAudioDataHandler()
284 | //! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported 
285 | //! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
286 | /***************************************************************************************************************************/
287 | CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams);
288 | 
289 | /***************************************************************************************************************************/
290 | //! \ingroup FUNCTS
291 | //! \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams)
292 | //! Create video source
293 | /***************************************************************************************************************************/
294 | CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams);
295 | 
296 | /********************************************************************/
297 | //! \ingroup FUNCTS
298 | //! \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj)
299 | //! Destroy video source
300 | /********************************************************************/
301 | CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj);
302 | 
303 | /******************************************************************************************/
304 | //! \ingroup FUNCTS
305 | //! \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state)
306 | //! Set video source state to:
307 | //! cudaVideoState_Started - to signal the source to run and deliver data
308 | //! cudaVideoState_Stopped - to stop the source from delivering the data
309 | //! cudaVideoState_Error   - invalid source
310 | /******************************************************************************************/
311 | CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state);
312 | 
313 | /******************************************************************************************/
314 | //! \ingroup FUNCTS
315 | //! \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj)
316 | //! Get video source state
317 | //! Returns:
318 | //! cudaVideoState_Started - if Source is running and delivering data
319 | //! cudaVideoState_Stopped - if Source is stopped or reached end-of-stream
320 | //! cudaVideoState_Error   - if Source is in error state
321 | /******************************************************************************************/
322 | cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj);
323 | 
324 | /******************************************************************************************************************/
325 | //! \ingroup FUNCTS
326 | //! \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags)
327 | //! Gets video source format in pvidfmt, flags is set to combination of CUvideosourceformat_flags as per requirement
328 | /******************************************************************************************************************/
329 | CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags);
330 | 
331 | /**************************************************************************************************************************/
332 | //! \ingroup FUNCTS
333 | //! \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags)
334 | //! Get audio source format
335 | //! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported 
336 | //! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
337 | /**************************************************************************************************************************/
338 | CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags);
339 | 
340 | #endif
341 | /**********************************************************************************/
342 | //! \ingroup STRUCTS
343 | //! \struct CUVIDPARSERDISPINFO
344 | //! Used in cuvidParseVideoData API with PFNVIDDISPLAYCALLBACK pfnDisplayPicture
345 | /**********************************************************************************/
346 | typedef struct _CUVIDPARSERDISPINFO
347 | {
348 |     int picture_index;          /**< OUT: Index of the current picture                                                         */
349 |     int progressive_frame;      /**< OUT: 1 if progressive frame; 0 otherwise                                                  */
350 |     int top_field_first;        /**< OUT: 1 if top field is displayed first; 0 otherwise                                       */
351 |     int repeat_first_field;     /**< OUT: Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling, 
352 |                                      -1=unpaired field)                                                                        */
353 |     CUvideotimestamp timestamp; /**< OUT: Presentation time stamp                                                              */
354 | } CUVIDPARSERDISPINFO;
355 | 
356 | /***********************************************************************************************************************/
357 | //! Parser callbacks
358 | //! The parser will call these synchronously from within cuvidParseVideoData(), whenever there is sequence change or a picture
359 | //! is ready to be decoded and/or displayed. First argument in functions is "void *pUserData" member of structure CUVIDSOURCEPARAMS
360 | //! Return values from these callbacks are interpreted as below. If the callbacks return failure, it will be propagated by
361 | //! cuvidParseVideoData() to the application.
362 | //! Parser picks default operating point as 0 and outputAllLayers flag as 0 if PFNVIDOPPOINTCALLBACK is not set or return value is 
363 | //! -1 or invalid operating point.
364 | //! PFNVIDSEQUENCECALLBACK : 0: fail, 1: succeeded, > 1: override dpb size of parser (set by CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces
365 | //! while creating parser)
366 | //! PFNVIDDECODECALLBACK   : 0: fail, >=1: succeeded
367 | //! PFNVIDDISPLAYCALLBACK  : 0: fail, >=1: succeeded
368 | //! PFNVIDOPPOINTCALLBACK  : <0: fail, >=0: succeeded (bit 0-9: OperatingPoint, bit 10-10: outputAllLayers, bit 11-30: reserved)
369 | /***********************************************************************************************************************/
370 | typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *);
371 | typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *);
372 | typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *);
373 | typedef int (CUDAAPI *PFNVIDOPPOINTCALLBACK)(void *, CUVIDOPERATINGPOINTINFO*);
374 | 
375 | /**************************************/
376 | //! \ingroup STRUCTS
377 | //! \struct CUVIDPARSERPARAMS
378 | //! Used in cuvidCreateVideoParser API
379 | /**************************************/
380 | typedef struct _CUVIDPARSERPARAMS
381 | {
382 |     cudaVideoCodec CodecType;                   /**< IN: cudaVideoCodec_XXX                                                  */
383 |     unsigned int ulMaxNumDecodeSurfaces;        /**< IN: Max # of decode surfaces (parser will cycle through these)          */
384 |     unsigned int ulClockRate;                   /**< IN: Timestamp units in Hz (0=default=10000000Hz)                        */
385 |     unsigned int ulErrorThreshold;              /**< IN: % Error threshold (0-100) for calling pfnDecodePicture (100=always 
386 |                                                      IN: call pfnDecodePicture even if picture bitstream is fully corrupted) */
387 |     unsigned int ulMaxDisplayDelay;             /**< IN: Max display queue delay (improves pipelining of decode with display)
388 |                                                          0=no delay (recommended values: 2..4)                               */
389 |     unsigned int bAnnexb : 1;                   /**< IN: AV1 annexB stream                                                   */
390 |     unsigned int uReserved : 31;                /**< Reserved for future use - set to zero                                   */
391 |     unsigned int uReserved1[4];                 /**< IN: Reserved for future use - set to 0                                  */
392 |     void *pUserData;                            /**< IN: User data for callbacks                                             */
393 |     PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< IN: Called before decoding frames and/or whenever there is a fmt change */
394 |     PFNVIDDECODECALLBACK pfnDecodePicture;      /**< IN: Called when a picture is ready to be decoded (decode order)         */
395 |     PFNVIDDISPLAYCALLBACK pfnDisplayPicture;    /**< IN: Called whenever a picture is ready to be displayed (display order)  */
396 |     PFNVIDOPPOINTCALLBACK pfnGetOperatingPoint; /**< IN: Called from AV1 sequence header to get operating point of a AV1 
397 |                                                          scalable bitstream                                                  */
398 |     void *pvReserved2[6];                       /**< Reserved for future use - set to NULL                                   */
399 |     CUVIDEOFORMATEX *pExtVideoInfo;             /**< IN: [Optional] sequence header data from system layer                   */
400 | } CUVIDPARSERPARAMS;
401 | 
402 | /************************************************************************************************/
403 | //! \ingroup FUNCTS
404 | //! \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams)
405 | //! Create video parser object and initialize
406 | /************************************************************************************************/
407 | CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams);
408 | 
409 | /************************************************************************************************/
410 | //! \ingroup FUNCTS
411 | //! \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket)
412 | //! Parse the video data from source data packet in pPacket 
413 | //! Extracts parameter sets like SPS, PPS, bitstream etc. from pPacket and 
414 | //! calls back pfnDecodePicture with CUVIDPICPARAMS data for kicking of HW decoding
415 | //! calls back pfnSequenceCallback with CUVIDEOFORMAT data for initial sequence header or when
416 | //! the decoder encounters a video format change
417 | //! calls back pfnDisplayPicture with CUVIDPARSERDISPINFO data to display a video frame
418 | /************************************************************************************************/
419 | CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket);
420 | 
421 | /************************************************************************************************/
422 | //! \ingroup FUNCTS
423 | //! \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj)
424 | //! Destroy the video parser
425 | /************************************************************************************************/
426 | CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj);
427 | 
428 | /**********************************************************************************************/
429 | 
430 | #if defined(__cplusplus)
431 | }
432 | #endif /* __cplusplus */
433 | 
434 | #endif // __NVCUVID_H__
435 | 
436 | 
437 | 


--------------------------------------------------------------------------------
/src/decoder.cpp:
--------------------------------------------------------------------------------
 1 | #include "decoder.h"
 2 | #include "cuvid/Utils/NvCodecUtils.h"
 3 | #include "cuvid/Utils/FFmpegDemuxer.h"
 4 | #include "cuvid/Utils/ColorSpace.h"
 5 | #include "cuvid/AppDecUtils.h"
 6 | #include <libavcodec/avcodec.h>
 7 | #include "cuvid/Utils/Logger.h"
 8 | #include <string.h>
 9 | #include "cuvid/NvDecoder/NvDecoder.h"
10 | #include <cuda_runtime.h>
11 | 
12 | #define DEC(handle) ((NvDecoder*)(handle->dec))
13 | 
14 | 
15 | videoDecoderHandle videoDecoder_init(enum AVCodecID codec){
16 |     videoDecoderHandle handle = (videoDecoderHandle)malloc(sizeof(videoDecoder));
17 |     ck(cuInit(0));
18 |     handle->cuContext = nullptr;
19 |     createCudaContext(&(handle->cuContext), 0, 0);
20 |     handle->dec = (void*)(new NvDecoder(handle->cuContext, false, FFmpeg2NvCodecId(codec)));
21 |     return handle;
22 | }
23 | 
24 | int videoDecoder_destroy(videoDecoderHandle handle){
25 |     delete(handle->dec);
26 |     cuCtxDestroy(handle->cuContext);
27 |     handle->cuContext = nullptr;
28 |     handle->dec = nullptr;
29 |     return 0;
30 | }
31 | 
32 | void videoFrameList_destory(videoFrameList** list){
33 |     if(*list != NULL){
34 |         if((*list)->pFrames != NULL){
35 |             free((*list)->pFrames);
36 |             (*list)->pFrames = NULL;
37 |         }
38 |         free((*list));
39 |         *list = NULL;
40 |     }
41 | }
42 | 
43 | videoFrameList* videoFrameList_init(int width, int height, int length){
44 |     videoFrameList* frameList = (videoFrameList*)malloc(sizeof(videoFrameList));
45 |     frameList->height = height;
46 |     frameList->width = width;
47 |     frameList->length = length;
48 |     frameList->perFrameSize = height*width*4;
49 |     frameList->pFrames = (uint8_t*)malloc(frameList->height * frameList->width * 4 * frameList->length);
50 |     return frameList;
51 | }
52 | 
53 | 
54 | videoFrameList* videoDecoder_decode(videoDecoderHandle handle, u_int8_t* in, size_t in_size, char*error){
55 |     int nFrameReturned;
56 |     int i;
57 |     int frameSize;
58 |     uint8_t *pVideo = NULL, *pFrame;
59 |     videoFrameList* frameList = NULL;
60 |     CUdeviceptr dpFrame = 0, nv12Frame = 0;
61 |     if(error!=NULL){
62 |         error[0] = NULL;
63 |     }
64 |     try{
65 |         nFrameReturned = DEC(handle)->Decode(in, in_size);
66 |     }catch(NVDECException e){
67 |         if(error != NULL){
68 |             strcpy(error, e.what());
69 |         }
70 |         return NULL;
71 |     }
72 |     for (i = 0; i < nFrameReturned; i++) {
73 |         pFrame = DEC(handle)->GetFrame();
74 |         frameSize = DEC(handle)->GetFrameSize();
75 |         if(i == 0){
76 |             frameList = videoFrameList_init(DEC(handle)->GetWidth(), DEC(handle)->GetHeight(), nFrameReturned);
77 |             ck(cuMemAlloc(&dpFrame, frameList->width * frameList->height * 4));
78 |             ck(cuMemAlloc(&nv12Frame, frameSize));
79 |         }
80 |         cudaMemcpy((void*)nv12Frame, pFrame, frameSize, cudaMemcpyHostToDevice);
81 |         Nv12ToColor32<BGRA32>((uint8_t*)nv12Frame, frameList->width, (uint8_t *)dpFrame, 4 * frameList->width, frameList->width, frameList->height);
82 |         int output_size = frameList->width * frameList->height * 4;
83 |         cudaMemcpy((void*)(frameList->pFrames+i*frameList->perFrameSize), (uint8_t*)dpFrame, output_size, cudaMemcpyDeviceToHost);
84 |     }
85 |     cuMemFree(dpFrame);
86 |     return frameList;
87 | }


--------------------------------------------------------------------------------
/src/decoder.h:
--------------------------------------------------------------------------------
 1 | #ifndef NVCODEC_PYTHON_VIDEO_DECODER_H
 2 | #define NVCODEC_PYTHON_VIDEO_DECODER_H
 3 | 
 4 | 
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | #include <cuda.h>
11 | #include "cuvid/nvcuvid.h"
12 | #include <libavcodec/avcodec.h>
13 | 
14 | 
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | 
19 | typedef struct
20 | {
21 |     CUcontext cuContext;
22 |     void* dec;
23 | }videoDecoder;
24 | 
25 | typedef videoDecoder* videoDecoderHandle;
26 | 
27 | typedef struct
28 | {
29 |     uint8_t* pFrames;
30 |     int perFrameSize;
31 |     int width;
32 |     int height;
33 |     int length;
34 | }videoFrameList;
35 | 
36 | 
37 | 
38 | videoDecoderHandle videoDecoder_init(enum AVCodecID codec);
39 | int videoDecoder_destroy(videoDecoderHandle handle);
40 | videoFrameList* videoDecoder_decode(videoDecoderHandle handle, u_int8_t* in, size_t in_size, char*error);
41 | void videoFrameList_destory(videoFrameList**);
42 | videoFrameList* videoFrameList_init(int width, int height, int length);
43 | 
44 | 
45 | #endif


--------------------------------------------------------------------------------
/src/encoder.cpp:
--------------------------------------------------------------------------------
  1 | #include "encoder.h"
  2 | #include "cuvid/NvEncoder/NvEncoderCuda.h"
  3 | #include "cuvid/Utils/NvCodecUtils.h"
  4 | #include "cuvid/Utils/FFmpegDemuxer.h"
  5 | #include "cuvid/Utils/ColorSpace.h"
  6 | #include "cuvid/AppDecUtils.h"
  7 | #include "cuvid/Utils/NvEncoderCLIOptions.h"
  8 | #include "cuvid/nvEncodeAPI.h"
  9 | 
 10 | #define ENC(handle) ((NvEncoderCuda*)(handle->enc))
 11 | 
 12 | void _InitializeEncoder(NvEncoder* pEnc, NvEncoderInitParam encodeCLIOptions, NV_ENC_BUFFER_FORMAT eFormat)
 13 | {
 14 |     NV_ENC_INITIALIZE_PARAMS initializeParams = { NV_ENC_INITIALIZE_PARAMS_VER };
 15 |     NV_ENC_CONFIG encodeConfig = { NV_ENC_CONFIG_VER };
 16 | 
 17 |     initializeParams.encodeConfig = &encodeConfig;
 18 | 
 19 |     pEnc->CreateDefaultEncoderParams(&initializeParams, encodeCLIOptions.GetEncodeGUID(), encodeCLIOptions.GetPresetGUID(), encodeCLIOptions.GetTuningInfo());
 20 |     encodeCLIOptions.SetInitParams(&initializeParams, eFormat);
 21 | 
 22 |     pEnc->CreateEncoder(&initializeParams);
 23 | }
 24 | 
 25 | 
 26 | videoEncoderHandle videoEncoder_init(int width, int height){
 27 |     videoEncoderHandle handle = (videoEncoderHandle)malloc(sizeof(videoEncoder));
 28 |     ck(cuInit(0));
 29 |     handle->cuContext = nullptr;
 30 |     createCudaContext(&(handle->cuContext), 0, 0);
 31 |     handle->enc = new NvEncoderCuda(handle->cuContext, width, height, NV_ENC_BUFFER_FORMAT_ARGB);
 32 | 
 33 |     NV_ENC_BUFFER_FORMAT eFormat = NV_ENC_BUFFER_FORMAT_ARGB;
 34 |     NvEncoderInitParam encodeCLIOptions;
 35 |     _InitializeEncoder(ENC(handle), encodeCLIOptions, eFormat);
 36 |     return handle;
 37 | }
 38 | 
 39 | int videoEncoder_destroy(videoEncoderHandle handle){
 40 |     ENC(handle)->DestroyEncoder();
 41 |     delete(ENC(handle));
 42 |     cuCtxDestroy(handle->cuContext);
 43 |     handle->cuContext = nullptr;
 44 |     handle->enc = nullptr;
 45 |     return 0;
 46 | }
 47 | 
 48 | videoEncodedBuffer* videoEncoder_encode_end(videoEncoderHandle handle){
 49 |     std::vector<std::vector<uint8_t>> vPacket;
 50 |     ENC(handle)->EndEncode(vPacket);
 51 |     int currentSize = 0;
 52 |     for (std::vector<uint8_t> &packet : vPacket){
 53 |         currentSize += packet.size();
 54 |     }
 55 |     if(currentSize == 0){
 56 |         return NULL;
 57 |     }
 58 |     videoEncodedBuffer* buffer = videoEncodedBuffer_init(currentSize);
 59 |     currentSize = 0;
 60 |     for (std::vector<uint8_t> &packet : vPacket){
 61 |         memcpy(buffer->data+currentSize, reinterpret_cast<char*>(packet.data()), packet.size());
 62 |         currentSize+=packet.size();
 63 |     }
 64 |     return buffer;
 65 | }
 66 | 
 67 | videoEncodedBuffer* videoEncoder_encode(videoEncoderHandle handle, u_int8_t* in){
 68 |     std::vector<std::vector<uint8_t>> vPacket;
 69 |     
 70 |     const NvEncInputFrame* encoderInputFrame = ENC(handle)->GetNextInputFrame();
 71 |     NvEncoderCuda::CopyToDeviceFrame(handle->cuContext, in, ENC(handle)->GetWidthInBytes(NV_ENC_BUFFER_FORMAT_ARGB,ENC(handle)->GetEncodeWidth()), (CUdeviceptr)encoderInputFrame->inputPtr,
 72 |         (int)encoderInputFrame->pitch,
 73 |         ENC(handle)->GetEncodeWidth(),
 74 |         ENC(handle)->GetEncodeHeight(),
 75 |         CU_MEMORYTYPE_HOST, 
 76 |         encoderInputFrame->bufferFormat,
 77 |         encoderInputFrame->chromaOffsets,
 78 |         encoderInputFrame->numChromaPlanes);
 79 |     ENC(handle)->EncodeFrame(vPacket);
 80 |     int currentSize = 0;
 81 |     for (std::vector<uint8_t> &packet : vPacket){
 82 |         currentSize += packet.size();
 83 |     }
 84 |     if(currentSize == 0){
 85 |         return NULL;
 86 |     }
 87 |     videoEncodedBuffer* buffer = videoEncodedBuffer_init(currentSize);
 88 |     currentSize = 0;
 89 |     for (std::vector<uint8_t> &packet : vPacket){
 90 |         memcpy(buffer->data+currentSize, reinterpret_cast<char*>(packet.data()), packet.size());
 91 |         currentSize+=packet.size();
 92 |     }
 93 |     return buffer;
 94 | }
 95 | 
 96 | void videoEncodedBuffer_destory(videoEncodedBuffer** buffer){
 97 |     if(*buffer == NULL){
 98 |         if((*buffer)->data != NULL){
 99 |             free((*buffer)->data);
100 |             (*buffer)->data = NULL;
101 |         }
102 |         free(*buffer);
103 |         (*buffer) = NULL;
104 |     }
105 | }
106 | 
107 | videoEncodedBuffer* videoEncodedBuffer_init(int size){
108 |     videoEncodedBuffer* frame = (videoEncodedBuffer*)malloc(sizeof(videoEncodedBuffer));
109 |     frame->size = size;
110 |     frame->data = (u_int8_t*)malloc(frame->size);
111 |     return frame;
112 | }
113 | 


--------------------------------------------------------------------------------
/src/encoder.h:
--------------------------------------------------------------------------------
 1 | #ifndef NVCODEC_PYTHON_VIDEO_ENCODER_H
 2 | #define NVCODEC_PYTHON_VIDEO_ENCODER_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #include <cuda.h>
 9 | #include "cuvid/nvcuvid.h"
10 | #include <libavcodec/avcodec.h>
11 | 
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | typedef struct
18 | {
19 |     CUcontext cuContext;
20 |     void* enc;
21 | }videoEncoder;
22 | 
23 | typedef videoEncoder* videoEncoderHandle;
24 | 
25 | 
26 | typedef struct
27 | {
28 |     uint8_t* data;
29 |     int size;
30 | }videoEncodedBuffer;
31 | 
32 | 
33 | videoEncoderHandle videoEncoder_init(int width, int height);
34 | int videoEncoder_destroy(videoEncoderHandle handle);
35 | videoEncodedBuffer* videoEncoder_encode(videoEncoderHandle handle, u_int8_t* in);
36 | videoEncodedBuffer* videoEncoder_encode_end(videoEncoderHandle handle);
37 | void videoEncodedBuffer_destory(videoEncodedBuffer**);
38 | videoEncodedBuffer* videoEncodedBuffer_init(int size);
39 | 
40 | 
41 | #endif


--------------------------------------------------------------------------------
/src/source.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include "source.h"
  3 | 
  4 | videoSourceHandle videoSource_init(char* url, int listen){
  5 | #ifdef DEBUG
  6 |     av_log_set_level(AV_LOG_TRACE);
  7 | #else
  8 |     av_log_set_level(AV_LOG_FATAL);
  9 | #endif
 10 |     av_register_all();
 11 |     avformat_network_init();
 12 |     videoSourceHandle handle = (videoSourceHandle)malloc(sizeof(videoSource));
 13 |     handle->url = (char*)malloc(strlen(url));
 14 |     memcpy(handle->url, url, strlen(url));
 15 |     handle->bsfc = NULL;
 16 |     handle->options = NULL;
 17 |     handle->pFormatCtx = NULL;
 18 |     handle->video_stream_index = 0;
 19 |     
 20 |     av_bsf_alloc(av_bsf_get_by_name("h264_mp4toannexb"), &(handle->bsfc));
 21 | 
 22 |     if(listen){
 23 |         av_dict_set(&(handle->options), "listen", "1", 0);
 24 |     }
 25 |     // av_dict_set(&(handle->options), "timeout", "3", 0);
 26 | 
 27 |     return handle;
 28 | }
 29 | 
 30 | int videoSource_destroy(videoSourceHandle handle){
 31 |     if(handle->pFormatCtx != NULL){
 32 |         videoSource_close(handle);
 33 |     }
 34 |     if(handle->url != NULL){
 35 |         free(handle->url);
 36 |         handle->url = NULL;
 37 |     }
 38 |     if(handle->bsfc != NULL){
 39 |         av_bsf_free(&(handle->bsfc));
 40 |         handle->bsfc = NULL;
 41 |     }
 42 |     free(handle);
 43 |     return 0;
 44 | }
 45 | 
 46 | int videoSource_connect(videoSourceHandle handle){
 47 |     if(handle->pFormatCtx != NULL){
 48 |         return 1;
 49 |     }
 50 | 
 51 |     char error_buf[128] = {0};
 52 |     int error_code;
 53 |     if ((error_code = avformat_open_input(&(handle->pFormatCtx), handle->url, NULL, &(handle->options))) < 0){
 54 |         // AVERROR_INPUT_CHANGED
 55 |         av_make_error_string(error_buf, 127, error_code);
 56 |         printf("RTMP/Listen %d: %s\n", error_code, error_buf);
 57 |         // throw RTMPException(error_buf, 301);
 58 |         return -301;
 59 |     }
 60 |     if ((error_code = avformat_find_stream_info(handle->pFormatCtx, NULL))<0)
 61 |     {
 62 |         printf("Couldn't find stream information\n");
 63 |         return -302;
 64 |     }
 65 |     unsigned int i;
 66 |     for (i = 0; i< handle->pFormatCtx->nb_streams; i++){
 67 |         if (handle->pFormatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
 68 |         {
 69 |             handle->video_stream_index = i;
 70 |             avcodec_parameters_copy(handle->bsfc->par_in,handle->pFormatCtx->streams[i]->codecpar);
 71 |             av_bsf_init(handle->bsfc);
 72 |             break;
 73 |         }
 74 |     }
 75 |     if (handle->pFormatCtx->nb_streams == i)
 76 |     {
 77 |         printf("Didn't find a video stream\n");
 78 |         // throw RTMPException("Didn't find a video stream", 303);
 79 |         return -303;
 80 |     }
 81 |     return 0;
 82 | }
 83 | 
 84 | 
 85 | int videoSource_read(videoSourceHandle handle, AVPacket* packet){
 86 |     if(handle->pFormatCtx == NULL){
 87 |         if(videoSource_connect(handle)<0){
 88 |             return -1;
 89 |         }
 90 |     }
 91 |     while(av_read_frame(handle->pFormatCtx, packet)==0){
 92 |         av_bsf_send_packet(handle->bsfc, packet);
 93 |         av_bsf_receive_packet(handle->bsfc, packet);
 94 |         if(packet->size>0 && packet->stream_index == handle->video_stream_index){
 95 |             return packet->size;
 96 |         }
 97 |     }
 98 |     return -1;
 99 | }
100 | 
101 | int videoSource_close(videoSourceHandle handle){
102 |     if( handle->pFormatCtx != NULL) {
103 |         avformat_close_input(&(handle->pFormatCtx));
104 |         av_free(handle->pFormatCtx);
105 |         handle->pFormatCtx = NULL;
106 |     }
107 |     return 0;
108 | }
109 | 
110 | enum AVPixelFormat videoSource_getAVPixelFormat(videoSourceHandle handle){
111 |     if(handle->pFormatCtx != NULL) {
112 |         return (AVPixelFormat)(handle->pFormatCtx->streams[handle->video_stream_index]->codecpar->format);
113 |     }else{
114 |         return AV_PIX_FMT_NONE;
115 |     }
116 | }
117 | 
118 | enum AVCodecID videoSource_getVideoCodecId(videoSourceHandle handle){
119 |     if(handle->pFormatCtx != NULL) {
120 |         return handle->pFormatCtx->video_codec_id;
121 |     }else{
122 |         return AV_CODEC_ID_NONE;
123 |     }
124 | }
125 | 
126 | int videoSource_isConnect(videoSourceHandle handle){
127 |     return handle->pFormatCtx != NULL;
128 | }
129 | 


--------------------------------------------------------------------------------
/src/source.h:
--------------------------------------------------------------------------------
 1 | #ifndef NVCODEC_PYTHON_VIDEO_SOURCE_H
 2 | #define NVCODEC_PYTHON_VIDEO_SOURCE_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #include <libavformat/avformat.h>
 9 | 
10 | #ifdef __cplusplus
11 | }
12 | #endif
13 | 
14 | 
15 | typedef struct
16 | {
17 |     AVFormatContext *pFormatCtx;
18 |     AVDictionary *options;
19 |     char* url;
20 |     AVBSFContext *bsfc;
21 |     int video_stream_index;
22 | }videoSource;
23 | 
24 | typedef videoSource* videoSourceHandle;
25 | 
26 | enum AVPixelFormat videoSource_getAVPixelFormat(videoSourceHandle handle);
27 | enum AVCodecID videoSource_getVideoCodecId(videoSourceHandle handle);
28 | videoSourceHandle videoSource_init(char* url, int listen);
29 | int videoSource_destroy(videoSourceHandle handle);
30 | int videoSource_connect(videoSourceHandle handle);
31 | int videoSource_read(videoSourceHandle handle, AVPacket* packet);
32 | int videoSource_isConnect(videoSourceHandle handle);
33 | int videoSource_close(videoSourceHandle handle);
34 | 
35 | 
36 | #endif //NVCODEC_PYTHON_VIDEO_SOURCE_H
37 | 


--------------------------------------------------------------------------------
/tests/cpp/decode.cpp:
--------------------------------------------------------------------------------
 1 | #include "source.h"
 2 | #include "decoder.h"
 3 | #include "stdio.h"
 4 | 
 5 | #ifndef TEST_RTMP_URL
 6 | #define TEST_RTMP_URL "rtmp://58.200.131.2:1935/livetv/hunantv"
 7 | #endif
 8 | 
 9 | int main(int argc, char** argv){
10 |     videoSourceHandle videoSource = videoSource_init( (char*)TEST_RTMP_URL, 0);
11 |     // videoSourceHandle videoSource = videoSource_init((char*)"rtmp://10.10.1.108:8981/app/video/001", 1);
12 |     AVPacket packet;
13 |     int width, height, size;
14 |     cudaVideoSurfaceFormat format;
15 |     videoDecoderHandle videoDecode = videoDecoder_init(AV_CODEC_ID_H264);
16 |     videoFrameList* frameList;
17 |     while(1){
18 |         if(videoSource_read(videoSource, &packet)<0){
19 |             break;
20 |         }
21 |         frameList = videoDecoder_decode(videoDecode, packet.data, packet.size);
22 |         if(frameList!=NULL){
23 |             printf("Decode Frame %dx%d, Frames %d\n", frameList->width, frameList->height, frameList->length);
24 |         }
25 |         videoFrameList_destory(&frameList);
26 |     }
27 |     videoDecoder_destroy(videoDecode);
28 |     return 0;
29 | }


--------------------------------------------------------------------------------
/tests/cpp/encode.cpp:
--------------------------------------------------------------------------------
 1 | #include "source.h"
 2 | #include "decoder.h"
 3 | #include "encoder.h"
 4 | #include "stdio.h"
 5 | 
 6 | #ifndef TEST_RTMP_URL
 7 | #define TEST_RTMP_URL "rtmp://58.200.131.2:1935/livetv/hunantv"
 8 | #endif
 9 | 
10 | int main(int argc, char** argv){
11 |     videoSourceHandle videoSource = videoSource_init( (char*)TEST_RTMP_URL, 0);
12 |     // videoSourceHandle videoSource = videoSource_init((char*)"rtmp://10.10.1.108:8981/app/video/001", 1);
13 |     AVPacket packet;
14 |     int width, height, size;
15 |     cudaVideoSurfaceFormat format;
16 |     videoDecoderHandle videoDecode = videoDecoder_init(AV_CODEC_ID_H264);
17 |     videoEncoderHandle videoEncode = NULL;
18 |     videoFrameList* frameList;
19 |     videoEncodedBuffer* buffer;
20 | #ifdef DEBUG
21 |     FILE* fp = fopen("/tmp/encode.h264", "wb");
22 | #endif
23 |     while(1){
24 |         if(videoSource_read(videoSource, &packet)<0){
25 |             break;
26 |         }
27 |         frameList = videoDecoder_decode(videoDecode, packet.data, packet.size);
28 |         if(frameList==NULL){
29 |             continue;
30 |         }
31 |         if(videoEncode==NULL){
32 |             videoEncode = videoEncoder_init(frameList->width, frameList->height);
33 |         }
34 |         buffer = videoEncoder_encode(videoEncode, frameList->pFrames);
35 |         videoFrameList_destory(&frameList);
36 |         if(buffer == NULL){
37 |             continue;
38 |         }
39 |         printf("Encode Buffer size: %d\n", buffer->size);
40 | #ifdef DEBUG
41 |         fwrite(buffer->data, 1, buffer->size, fp);
42 | #endif
43 |         videoEncodedBuffer_destory(&buffer);
44 |     }
45 | #ifdef DEBUG
46 |     fclose(fp);
47 | #endif
48 |     videoDecoder_destroy(videoDecode);
49 |     videoEncoder_destroy(videoEncode);
50 |     return 0;
51 | }


--------------------------------------------------------------------------------
/tests/cpp/read_source.cpp:
--------------------------------------------------------------------------------
 1 | #include "source.h"
 2 | #include "stdio.h"
 3 | 
 4 | #ifndef TEST_RTMP_URL
 5 | #define TEST_RTMP_URL "rtmp://58.200.131.2:1935/livetv/hunantv"
 6 | #endif
 7 | 
 8 | int main(int argc, char** argv){
 9 |     videoSourceHandle videoSource = videoSource_init( (char*)TEST_RTMP_URL, 0);
10 |     // videoSourceHandle videoSource = videoSource_init((char*)"rtmp://10.10.1.108:8981/app/video/001", 1);
11 |     AVPacket packet;
12 | #ifdef DEBUG
13 |     FILE* fp = fopen("/tmp/save.h264", "wb");
14 | #endif
15 |     while(1){
16 |         if(videoSource_read(videoSource, &packet)<0){
17 |             break;
18 |         }
19 | #ifdef DEBUG
20 |         fwrite(packet.data, 1, packet.size, fp);
21 | #endif
22 |         printf("Read AVPacket Index: %d Size:%d\n", packet.stream_index, packet.size);
23 |     }
24 | #ifdef DEBUG
25 |     fclose(fp);
26 | #endif
27 |     return 0;
28 | }


--------------------------------------------------------------------------------
/tests/python/read_source_opencv.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import cv2
 4 | 
 5 | lib_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../build/lib.linux-x86_64-3.6'))
 6 | sys.path.append(lib_path)
 7 | from nvcodec import VideoSource, VideoDecoder, VideoEncoder
 8 | 
 9 | source = VideoSource("rtmp://58.200.131.2:1935/livetv/hunantv")
10 | decoder = VideoDecoder()
11 | while True:
12 |     h264_data = source.read()
13 |     if not h264_data:
14 |         break
15 |     frames = decoder.decode(h264_data)
16 |     for frame in frames:
17 |         cv2.imshow("Demo", frame)
18 |         cv2.waitKey(1)
19 | 
20 | 


--------------------------------------------------------------------------------
/tests/python/read_source_sdl.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from cv2 import cv2
 4 | import sdl2
 5 | import sdl2.ext
 6 | import numpy
 7 | import time
 8 | 
 9 | lib_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../build/lib.linux-x86_64-3.6'))
10 | sys.path.append(lib_path)
11 | from nvcodec import VideoSource, VideoDecoder, VideoEncoder
12 | 
13 | windowArray = None
14 | window = None
15 | 
16 | def showImage(image):
17 |     global windowArray, window
18 |     if windowArray is None:
19 |         sdl2.ext.init()
20 |         window = sdl2.ext.Window("test", size=(image.shape[0],image.shape[1]))
21 |         window.show()
22 |         windowSurf = sdl2.SDL_GetWindowSurface(window.window)
23 |         windowArray = sdl2.ext.pixels3d(windowSurf.contents)
24 |     numpy.copyto(windowArray, image)
25 |     window.refresh()
26 | 
27 | 
28 | # source = VideoSource("rtmp://58.200.131.2:1935/livetv/hunantv")
29 | source = VideoSource("/tmp/1.mp4")
30 | decoder = VideoDecoder()
31 | while True:
32 |     h264_data = source.read()
33 |     if not h264_data:
34 |         break
35 |     frames = decoder.decode(h264_data, 1)
36 |     for frame in frames:
37 |         showImage(frame)


--------------------------------------------------------------------------------