├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.deprecated.md
├── README.md
├── examples
    ├── cmake
    │   └── FindEGL.cmake
    ├── concurrent.cpp
    ├── egl.cpp
    ├── file.cpp
    ├── lossless.cpp
    ├── memory.cpp
    └── utils.h
└── src
    ├── NvPipe.cu
    └── NvPipe.h.in


/.gitignore:
--------------------------------------------------------------------------------
1 | *.user
2 | build
3 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # Redistribution and use in source and binary forms, with or without
  4 | # modification, are permitted provided that the following conditions
  5 | # are met:
  6 | #  * Redistributions of source code must retain the above copyright
  7 | #    notice, this list of conditions and the following disclaimer.
  8 | #  * Redistributions in binary form must reproduce the above copyright
  9 | #    notice, this list of conditions and the following disclaimer in the
 10 | #    documentation and/or other materials provided with the distribution.
 11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 | #    contributors may be used to endorse or promote products derived
 13 | #    from this software without specific prior written permission.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | cmake_minimum_required(VERSION 3.2)
 28 | project(NvPipe VERSION 1.0.0 LANGUAGES CXX)
 29 | 
 30 | SET(DEFAULT_BUILD_TYPE "Release")
 31 | 
 32 | if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
 33 |     message(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' as none was specified.")
 34 |     set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE
 35 |         STRING "Choose the type of build." FORCE)
 36 |     # Set the possible values of build type for cmake-gui
 37 |     set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
 38 |         "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
 39 | endif()
 40 | 
 41 | find_package(CUDA REQUIRED)
 42 | 
 43 | # Construct path to CUDA driver API lib (not provided by FindCUDA)
 44 | get_filename_component(CUDA_LIB_DIR ${CUDA_cudart_static_LIBRARY} DIRECTORY)
 45 | find_library(CUDA_LIB NAMES cuda HINTS ${CUDA_LIB_DIR})
 46 | 
 47 | # Set C++ standard
 48 | set(CMAKE_CXX_STANDARD 11)
 49 | 
 50 | # Options
 51 | option(NVPIPE_WITH_ENCODER "Enables the NvPipe encoding interface." ON)
 52 | option(NVPIPE_WITH_DECODER "Enables the NvPipe decoding interface." ON)
 53 | option(NVPIPE_WITH_OPENGL "Enables the NvPipe OpenGL interface." ON)
 54 | option(NVPIPE_BUILD_EXAMPLES "Builds the NvPipe example applications (requires both encoder and decoder)." ON)
 55 | 
 56 | # Header
 57 | configure_file(src/NvPipe.h.in include/NvPipe.h @ONLY)
 58 | include_directories(${CMAKE_CURRENT_BINARY_DIR}/include)
 59 | 
 60 | # NvPipe shared library
 61 | list(APPEND NVPIPE_SOURCES
 62 |     src/NvPipe.cu
 63 |     ${NV_VIDEO_CODEC_SDK}/Samples/Utils/ColorSpace.cu
 64 |     )
 65 | list(APPEND NVPIPE_LIBRARIES
 66 |     ${CMAKE_DL_LIBS}
 67 |     ${CUDA_LIBRARIES}
 68 |     ${CUDA_LIB}
 69 |     nvidia-encode
 70 |     )
 71 | 
 72 | if (NVPIPE_WITH_ENCODER)
 73 |     list(APPEND NVPIPE_SOURCES
 74 |         ${NV_VIDEO_CODEC_SDK}/Samples/NvCodec/NvEncoder/NvEncoder.cpp
 75 |         ${NV_VIDEO_CODEC_SDK}/Samples/NvCodec/NvEncoder/NvEncoderCuda.cpp
 76 |         )
 77 | endif()
 78 | 
 79 | if (NVPIPE_WITH_DECODER)
 80 |     list(APPEND NVPIPE_SOURCES
 81 |         ${NV_VIDEO_CODEC_SDK}/Samples/NvCodec/NvDecoder/NvDecoder.cpp
 82 |         )
 83 |     list(APPEND NVPIPE_LIBRARIES
 84 |         nvcuvid
 85 |         )
 86 | 
 87 |     if (WIN32)
 88 |         if (CMAKE_SIZEOF_VOID_P EQUAL 8)
 89 |             link_directories(${NV_VIDEO_CODEC_SDK}/Lib/x64)
 90 |         elseif (CMAKE_SIZEOF_VOID_P EQUAL 4)
 91 |             link_directories(${NV_VIDEO_CODEC_SDK}/Lib/Win32)
 92 |         endif()
 93 |     endif()
 94 | endif()
 95 | 
 96 | include(GNUInstallDirs)
 97 | 
 98 | cuda_add_library(${PROJECT_NAME} SHARED ${NVPIPE_SOURCES})
 99 | target_include_directories(${PROJECT_NAME} PUBLIC
100 |     $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>
101 |     $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
102 |     )
103 | target_include_directories(${PROJECT_NAME} PRIVATE
104 |     $<BUILD_INTERFACE: ${NV_VIDEO_CODEC_SDK}/Samples ${NV_VIDEO_CODEC_SDK}/Samples/NvCodec ${NV_VIDEO_CODEC_SDK}/include ${CUDA_INCLUDE_DIRS}>
105 |     )
106 | target_link_libraries(${PROJECT_NAME} ${NVPIPE_LIBRARIES})
107 | 
108 | set_target_properties(${PROJECT_NAME} PROPERTIES
109 |     VERSION ${PROJECT_VERSION}
110 |     SOVERSION 1)
111 | 
112 | install(TARGETS ${PROJECT_NAME} EXPORT NvPipeConfig
113 |     ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
114 |     LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
115 |     RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR})
116 | install(FILES ${PROJECT_BINARY_DIR}/include/NvPipe.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
117 | 
118 | install(EXPORT NvPipeConfig DESTINATION share/NvPipe/cmake)
119 | 
120 | export(TARGETS ${PROJECT_NAME} FILE NvPipeConfig.cmake)
121 | 
122 | # Examples
123 | if (NVPIPE_BUILD_EXAMPLES)
124 |     # Encode to / decode from file
125 |     add_executable(nvpExampleFile examples/file.cpp)
126 |     target_link_libraries(nvpExampleFile PRIVATE ${PROJECT_NAME})
127 | 
128 |     if (NVPIPE_WITH_ENCODER AND NVPIPE_WITH_DECODER)
129 |         # Host/device memory comparison
130 |         add_executable(nvpExampleMemory examples/memory.cpp)
131 |         target_link_libraries(nvpExampleMemory PRIVATE ${PROJECT_NAME})
132 | 
133 |         # Concurrent test
134 |         add_executable(nvpExampleConcurrent examples/concurrent.cpp)
135 |         target_link_libraries(nvpExampleConcurrent PRIVATE ${PROJECT_NAME})
136 | 
137 |         # Lossless test
138 |         add_executable(nvpExampleLossless examples/lossless.cpp)
139 |         target_link_libraries(nvpExampleLossless PRIVATE ${PROJECT_NAME})
140 | 
141 |         # EGL demo
142 |         if (NVPIPE_WITH_OPENGL)
143 |             list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/examples/cmake)
144 | 
145 |             find_package(EGL)
146 |             find_package(GLEW)
147 | 
148 |             if (EGL_FOUND AND GLEW_FOUND)
149 |                 add_executable(nvpExampleEGL examples/egl.cpp)
150 |                 target_include_directories(nvpExampleEGL PRIVATE ${EGL_INCLUDE_DIR} ${GLEW_INCLUDE_DIR})
151 |                 target_link_libraries(nvpExampleEGL PRIVATE ${PROJECT_NAME} ${EGL_LIBRARIES} ${GLEW_LIBRARIES})
152 |             endif()
153 |         endif()
154 |     endif()
155 | endif()
156 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions
 5 | are met:
 6 |  * Redistributions of source code must retain the above copyright
 7 |    notice, this list of conditions and the following disclaimer.
 8 |  * Redistributions in binary form must reproduce the above copyright
 9 |    notice, this list of conditions and the following disclaimer in the
10 |    documentation and/or other materials provided with the distribution.
11 |  * Neither the name of NVIDIA CORPORATION nor the names of its
12 |    contributors may be used to endorse or promote products derived
13 |    from this software without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | -----
28 | 
29 | The Video Codec SDK has its own license agreement that can be found in src/Video_Codec_SDK.


--------------------------------------------------------------------------------
/README.deprecated.md:
--------------------------------------------------------------------------------
  1 | Introduction
  2 | ============
  3 | 
  4 | NvPipe is a simple and lightweight C API library for low-latency video compression.
  5 | It provides easy-to-use access to NVIDIA's hardware-accelerated H.264 and HEVC video codecs and is a great choice to drastically lower the bandwidth required for your
  6 | networked interactive server/client application.
  7 | 
  8 | Designed for both remote rendering solutions and general compression of arbitrary image data, NvPipe accepts frames in various formats and supports access to host memory, CUDA device memory, OpenGL textures and OpenGL pixel buffer objects.
  9 | 
 10 | Supported formats are 32 bit RGBA frames (8 bit per channel; alpha is not supported by the underlying video codecs and is ignored) and unsigned integer grayscale frames with 4 bit, 8 bit, 16 bit or 32 bit per pixel.
 11 | 
 12 | Besides conventional lossy video compression based on target bitrate and framerate, also fully lossless compression is available enabling exact bit pattern reconstruction.
 13 | 
 14 | Please note that NvPipe acts as a lightweight synchronous convenience layer around the [NVIDIA Video Codec SDK](https://developer.nvidia.com/nvidia-video-codec-sdk) and doesn't offer all high-performance capabilities.
 15 | If you're looking for ultimate encode/decode performance, you may want to consider using NvCodec directly.
 16 | 
 17 | 
 18 | Usage
 19 | ============
 20 | 
 21 | The library is specifically designed to be easily integratable into existing
 22 | low-latency streaming applications.  NvPipe does not take over any of the
 23 | network communication aspects, allowing your application to dictate the
 24 | server/client scenario it is used in.
 25 | 
 26 | A sample encoding scenario:
 27 | 
 28 | ```c++
 29 | #include <NvPipe.h>
 30 | ...
 31 | 
 32 | uint32_t width = ..., height = ...; // Image resolution
 33 | uint8_t* rgba = ...; // Image data in device or host memory
 34 | uint8_t* buffer = ...; // Buffer for compressed output in host memory
 35 | 
 36 | // Create encoder
 37 | NvPipe* encoder = NvPipe_CreateEncoder(NVPIPE_RGBA32, NVPIPE_H264, NVPIPE_LOSSY, 32 * 1000 * 1000, 90, width, height); // 32 Mbps @ 90 Hz
 38 | 
 39 | while (frameAvailable)
 40 | {
 41 |     // Encode next frame
 42 |     uint64_t compressedSize = NvPipe_Encode(encoder, rgba, width * 4, buffer, bufferSize, width, height, false);
 43 | 
 44 |     // Send the frame size and compressed stream to the consuming side
 45 |     send(socket, &compressedSize, sizeof(uint64_t), ...);
 46 |     send(socket, buffer, compressedSize, ...);
 47 |     send(socket, ...) // Other meta data
 48 | }
 49 | 
 50 | // Destroy encode session
 51 | NvPipe_Destroy(encoder);
 52 | ```
 53 | 
 54 | The corresponding decoding scenario:
 55 | 
 56 | ```c++
 57 | #include <NvPipe.h>
 58 | ...
 59 | 
 60 | uint32_t width = ..., height = ...; // Image resolution
 61 | uint8_t* rgba = ...; // Image destination in device or host memory
 62 | uint8_t* buffer = ...; // Buffer for incoming packets
 63 | 
 64 | // Create decoder
 65 | NvPipe* decoder = NvPipe_CreateDecoder(NVPIPE_RGBA32, NVPIPE_H264, width, height);
 66 | 
 67 | while (frameAvailable)
 68 | {
 69 |     // Receive data
 70 |     uint64_t compressedSize;
 71 |     receive(socket, &compressedSize, sizeof(uint64_t), ...);
 72 |     receive(socket, buffer, compressedSize, ...);
 73 |     receive(socket, ...);
 74 | 
 75 |     // Decode frame
 76 |     NvPipe_Decode(decoder, buffer, compressedSize, rgba, width, height);
 77 | 
 78 |     // Use frame (blit/save/...)
 79 |     ...
 80 | }
 81 | 
 82 | // Destroy decode session
 83 | NvPipe_Destroy(decoder);
 84 | ```
 85 | 
 86 | 
 87 | 
 88 | Installation
 89 | ============
 90 | NvPipe requires a C++ compiler and uses the standard CMake compilation and installation procedure.
 91 | 
 92 | Download the Video Codec SDK from https://developer.nvidia.com/nvidia-video-codec-sdk (tested with 9.1.23).
 93 | 
 94 | ```bash
 95 | mkdir build && cd build
 96 | cmake PATH_TO_NVPIPE -DNV_VIDEO_CODEC_SDK=PATH_TO_SDK
 97 | make
 98 | make install
 99 | ```
100 | 
101 | It is possible to compile the encoding or decoding interface only using the `NVPIPE_WITH_ENCODER` and `NVPIPE_WITH_DECODER` options (default: `ON`).
102 | 
103 | The OpenGL interface is optional and can be disabled using the `NVPIPE_WITH_OPENGL` option (default: `ON`).
104 | 
105 | The compilation of the included sample applications can be controlled via the `NVPIPE_BUILD_EXAMPLES` CMake option (default: `ON`).
106 | 
107 | Only shared libraries are supported.
108 | 
109 | 
110 | ##### Compiling on Windows using Visual Studio 2017 #####
111 | 
112 | On Windows, NvPipe can be compiled using Visual Studio's built-in CMake support.
113 | 
114 | Just place a `CMakeSettings.json` file with the following contents (adjust paths accordingly) next to NvPipe's `CMakeLists.txt`:
115 | 
116 | ```
117 | {
118 |   "configurations": [
119 |     {
120 |       "name": "x64-Release",
121 |       "generator": "Ninja",
122 |       "configurationType": "Release",
123 |       "inheritEnvironments": [
124 |         "msvc_x64_x64"
125 |       ],
126 |       "buildRoot": "C:\\.build\\NvPipe\\${name}",
127 |       "installRoot": "C:\\.install\\NvPipe\\${name}",
128 |       "cmakeCommandArgs": "",
129 |       "buildCommandArgs": "-v",
130 |       "ctestCommandArgs": "",
131 |       "variables": [
132 |         {
133 |           "name": "GLEW_INCLUDE_DIR",
134 |           "value": "C:\\PATH\\TO\\glew-2.1.0\\include"
135 |         },
136 |         {
137 |           "name": "GLEW_LIBRARY_RELEASE",
138 |           "value": "C:\\PATH\\TO\\glew-2.1.0\\lib\\Release\\x64\\glew32.lib"
139 |         }
140 |       ]
141 |     }
142 |   ]
143 | }
144 | ```
145 | 
146 | 
147 | Examples
148 | =====
149 | 
150 | Two example applications are included that perform encoding and decoding of a sequence of frames, measure performance, and dump image files before and after compression.
151 | Additionally, an example of integer frame compression verifies the bitwise correctness of lossless encoding and decoding.
152 | 
153 | The `memory` example compares the performance of input and output based on host memory vs. CUDA device memory.
154 | As illustrated in the following example output, device memory can be directly accessed by the video codec hardware and is thus faster, whereas host memory entails additional bus transfers.
155 | ```bash
156 | $ ./nvpExampleMemory
157 | NvPipe example application: Comparison of using host/device memory.
158 | 
159 | Resolution: 3840 x 2160
160 | Codec: H.264
161 | Bitrate: 32 Mbps @ 90 Hz
162 | Resolution: 3840 x 2160
163 | 
164 | --- Encode from host memory / Decode to host memory ---
165 | Frame | Encode (ms) | Decode (ms) | Size (KB)
166 |     0 |        57.8 |        42.2 |     31.0
167 |     1 |        15.4 |        13.3 |     12.1
168 |     2 |        16.6 |        13.5 |      5.5
169 |     3 |        16.6 |        13.6 |      8.3
170 |     4 |        16.9 |        13.8 |      3.9
171 |     5 |        17.1 |        13.8 |      3.5
172 |     6 |        16.9 |        13.8 |      3.5
173 |     7 |        17.0 |        13.8 |      3.5
174 |     8 |        17.0 |        13.8 |      3.5
175 |     9 |        16.9 |        14.3 |      3.5
176 | 
177 | --- Encode from device memory / Decode to device memory ---
178 | Frame | Encode (ms) | Decode (ms) | Size (KB)
179 |     0 |        45.9 |        35.0 |     31.0
180 |     1 |        10.5 |         6.9 |     12.1
181 |     2 |        10.2 |         6.8 |      5.5
182 |     3 |        10.1 |         6.8 |      8.3
183 |     4 |        10.2 |         6.8 |      3.9
184 |     5 |        10.2 |         6.8 |      3.5
185 |     6 |        10.1 |         6.9 |      3.5
186 |     7 |        10.2 |         6.8 |      3.5
187 |     8 |        10.1 |         6.9 |      3.5
188 |     9 |        10.1 |         6.8 |      3.5
189 | ```
190 | 
191 | As indicated by the size column, the first frame is an I-frame and thus requires more bandwidth. The subsequent frames however are more lightweight P-frames, which only describe differences to previous frames.
192 | 
193 | 
194 | The `egl` example application demonstrates the usage of NvPipe in a server/client remote rendering scenario. An offscreen OpenGL framebuffer is created through EGL which is [ideally suited for remote rendering on headless nodes without X server](https://devblogs.nvidia.com/egl-eye-opengl-visualization-without-x-server/). The rendered frame is encoded by directly accessing the framebuffer's color attachment. After decoding, a fullscreen texture is used to draw the frame to the default framebuffer.
195 | The following example output shows that performance is similar to CUDA device memory access as illustrated above.
196 | ```bash
197 | $ ./nvpExampleEGL
198 | NvPipe example application: Render to offscreen framebuffer using EGL,
199 | encode framebuffer, decode to display texture.
200 | 
201 | Resolution: 3840 x 2160
202 | Codec: H.264
203 | Bitrate: 32 Mbps @ 90 Hz
204 | 
205 | Frame | Encode (ms) | Decode (ms) | Size (KB)
206 |     0 |        54.4 |        36.4 |     38.5
207 |     1 |        11.8 |         7.5 |     29.4
208 |     2 |        11.9 |         7.4 |     28.5
209 |     3 |        11.8 |         7.5 |     28.0
210 |     4 |        11.9 |         7.4 |     27.8
211 |     5 |        11.9 |         7.5 |     27.8
212 |     6 |        11.8 |         7.5 |     27.7
213 |     7 |        11.9 |         7.5 |     27.4
214 |     8 |        11.9 |         7.4 |     27.4
215 |     9 |        11.8 |         7.5 |     27.6
216 | ```
217 | 
218 | Note that the overall compressed sizes differ in the two examples as they use different images for input.
219 | 
220 | The `lossless` example demonstrates the usage of NvPipe for lossless integer data compression. An 8 bit unsigned integer frame is created, compressed, and the result verified for bitwise correctness after decompression.
221 | The following example output shows that while correctness is always guaranteed due to lossless compression, the chosen pixel format has crucial impact on the compressed output size.
222 | ```bash
223 | $ ./nvpExampleLossless
224 | NvPipe example application: Tests lossless compression of a grayscale integer frame.
225 | 
226 | Input: 1024 x 1024 UINT8 (Raw size: 1048.6 KB)
227 |  - [as UINT4]  Size: 181.3 KB, Encode: 14.8 ms, Decode: 17.1 ms - OK
228 |  - [as UINT8]  Size: 45.2 KB, Encode: 14.8 ms, Decode: 15.5 ms - OK
229 |  - [as UINT16]  Size: 57.7 KB, Encode: 16.4 ms, Decode: 11.4 ms - OK
230 | ```
231 | 
232 | The ideal pixel format is highly dependent on the structure of your input data. Keep in mind that video codecs are optimized for spatial and temporal coherence. For instance, the 8 bit pixel data  in the example above interpreted as 4 bit pixels results in poor compression due to high frequency noise from the encoder's perspective.
233 | 
234 | 
235 | 
236 | Supported Platforms
237 | ===================
238 | 
239 | NvPipe is supported on both Linux and Windows. OS X support is not plausible in the short term.
240 | 
241 | Please refer to the hardware capability matrices of the [NVIDIA Video Codec SDK](https://developer.nvidia.com/nvidia-video-codec-sdk) for more details on feature availability and driver requirements.
242 | 
243 | Note that NvPipe does not support the Jetson platform, on which the video hardware should be accessed through the [NvMedia API](https://docs.nvidia.com/drive/nvvib_docs/NVIDIA%20DRIVE%20Linux%20SDK%20Development%20Guide/baggage/group__nvmedia__top.html).
244 | 
245 | 
246 | 
247 | Publication
248 | ===================
249 | 
250 | NvPipe was successfully used in the EGPGV 2018 best paper *Hardware-Accelerated Multi-Tile Streaming for Realtime Remote Visualization*, which is available [here](https://hpcvis.org/publications/Streaming-EGPGV2018.pdf).
251 | 
252 | 
253 | 
254 | Feedback
255 | ===================
256 | 
257 | Feedback and pull requests welcome! After starting a PR, remember to sign the CLA.
258 | 
259 | We would love to hear more about your use cases of NvPipe!
260 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Deprecation Notice
 2 | ============
 3 | 
 4 | NvPipe is deprecated and will receive no further updates or support. 
 5 | 
 6 | For a concise yet versatile convenience wrapper around the low-level NVENC/NVDEC APIs please refer to the `NvCodec` wrapper classes in the official [NVIDIA Video Codec SDK](https://developer.nvidia.com/nvidia-video-codec-sdk).
 7 | 
 8 | Several samples are included in the Video Codec SDK. Furthermore, feel free to checkout the NvPipe source code for exemplary usage.
 9 | 
10 | Support inquiries and feature requests should be directed to the official [Video Codec SDK Developer Forums](https://forums.developer.nvidia.com/c/professional-graphics-and-rendering/video-technologies/video-codec-and-optical-flow-sdk/189).
11 | 
12 | The original NvPipe README can be found [here](README.deprecated.md).
13 | 


--------------------------------------------------------------------------------
/examples/cmake/FindEGL.cmake:
--------------------------------------------------------------------------------
 1 | #
 2 | # Source: VTK (https://github.com/Kitware/VTK/blob/master/CMake/FindEGL.cmake)
 3 | # Copied and adapted 07/17/2017 (GLdispatch is not needed anymore).
 4 | #
 5 | # This is a temporary solution.
 6 | #
 7 | 
 8 | # Try to find EGL library and include dir.
 9 | # Once done this will define
10 | #
11 | # EGL_FOUND        - true if EGL has been found
12 | # EGL_INCLUDE_DIR  - where the EGL/egl.h and KHR/khrplatform.h can be found
13 | # EGL_LIBRARY      - link this to use libEGL.so.1
14 | # EGL_opengl_LIBRARY     - link with these two libraries instead of the gl library
15 | # EGL_LIBRARIES    - all EGL related libraries: EGL, OpenGL
16 | 
17 | 
18 | if(NOT EGL_INCLUDE_DIR)
19 | 
20 |   # If we have a root defined look there first
21 |   if(EGL_ROOT)
22 |     find_path(EGL_INCLUDE_DIR EGL/egl.h PATHS ${EGL_ROOT}/include
23 |       NO_DEFAULT_PATH
24 |     )
25 |   endif()
26 | 
27 |   if(NOT EGL_INCLUDE_DIR)
28 |     find_path(EGL_INCLUDE_DIR EGL/egl.h PATHS
29 |       /usr/local/include
30 |       /usr/include
31 |     )
32 |   endif()
33 | endif()
34 | 
35 | if(NOT EGL_LIBRARY)
36 |   # If we have a root defined look there first
37 |   if(EGL_ROOT)
38 |     find_library(EGL_LIBRARY EGL PATHS ${EGL_ROOT}/lib
39 |       NO_DEFAULT_PATH
40 |     )
41 |   endif()
42 | 
43 |   if(NOT EGL_LIBRARY)
44 |     find_library(EGL_LIBRARY EGL PATHS
45 |       /usr/local/lib
46 |       /usr/lib
47 |     )
48 |   endif()
49 | endif()
50 | 
51 | if(NOT EGL_opengl_LIBRARY)
52 |   # If we have a root defined look there first
53 |   if(EGL_ROOT)
54 |     find_library(EGL_opengl_LIBRARY OpenGL PATHS ${EGL_ROOT}/lib
55 |       NO_DEFAULT_PATH
56 |     )
57 |   endif()
58 | 
59 |   if(NOT EGL_opengl_LIBRARY)
60 |     find_library(EGL_opengl_LIBRARY OpenGL PATHS
61 |       /usr/local/lib
62 |       /usr/lib
63 |     )
64 |   endif()
65 | endif()
66 | 
67 | 
68 | set(EGL_LIBRARIES ${EGL_LIBRARY} ${EGL_opengl_LIBRARY})
69 | 
70 | include(FindPackageHandleStandardArgs)
71 | find_package_handle_standard_args(EGL  DEFAULT_MSG
72 |                                   EGL_LIBRARY  EGL_opengl_LIBRARY EGL_INCLUDE_DIR)
73 | 
74 | mark_as_advanced(EGL_DIR EGL_INCLUDE_DIR EGL_LIBRARY EGL_opengl_LIBRARY)
75 | 


--------------------------------------------------------------------------------
/examples/concurrent.cpp:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
  2 |  *
  3 |  * Redistribution and use in source and binary forms, with or without
  4 |  * modification, are permitted provided that the following conditions
  5 |  * are met:
  6 |  *  * Redistributions of source code must retain the above copyright
  7 |  *    notice, this list of conditions and the following disclaimer.
  8 |  *  * Redistributions in binary form must reproduce the above copyright
  9 |  *    notice, this list of conditions and the following disclaimer in the
 10 |  *    documentation and/or other materials provided with the distribution.
 11 |  *  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 |  *    contributors may be used to endorse or promote products derived
 13 |  *    from this software without specific prior written permission.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 |  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 |  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 |  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 |  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 |  */
 27 | 
 28 | 
 29 | #include <thread>
 30 | #include <mutex>
 31 | #include <string>
 32 | #include <iostream>
 33 | #include <vector>
 34 | 
 35 | #include <cuda_runtime_api.h>
 36 | 
 37 | #include <NvPipe.h>
 38 | 
 39 | #include "utils.h"
 40 | 
 41 | 
 42 | void run(void* rgba, uint32_t width, uint32_t height, uint32_t index, std::mutex* printMutex)
 43 | {
 44 |     const NvPipe_Codec codec = NVPIPE_H264;
 45 |     const NvPipe_Compression compression = NVPIPE_LOSSY;
 46 |     const float bitrateMbps = 4;
 47 |     const uint32_t targetFPS = 30;
 48 |     const uint32_t numFrames = 500;
 49 | 
 50 |     std::vector<uint8_t> compressed(width * height * 4);
 51 | 
 52 |     void* decompressedDevice;
 53 |     cudaMalloc(&decompressedDevice, width * height * 4);
 54 | 
 55 |     // Create encoder
 56 |     NvPipe* encoder = NvPipe_CreateEncoder(NVPIPE_RGBA32, codec, compression, bitrateMbps * 1000 * 1000, targetFPS, width, height);
 57 |     if (!encoder)
 58 |     {
 59 |         std::lock_guard<std::mutex> lock(*printMutex);
 60 |         std::cerr << "[" << index << "] Failed to create encoder: " << NvPipe_GetError(NULL) << std::endl;
 61 |         return;
 62 |     }
 63 | 
 64 |     // Create decoder
 65 |     NvPipe* decoder = NvPipe_CreateDecoder(NVPIPE_RGBA32, codec, width, height);
 66 |     if (!decoder)
 67 |     {
 68 |         std::lock_guard<std::mutex> lock(*printMutex);
 69 |         std::cerr << "[" << index << "] Failed to create decoder: " << NvPipe_GetError(NULL) << std::endl;
 70 |         return;
 71 |     }
 72 | 
 73 |     Timer timer;
 74 |     double encodeMs = 0.0;
 75 |     double decodeMs = 0.0;
 76 | 
 77 |     for (uint32_t i = 0; i < numFrames; ++i)
 78 |     {
 79 |         // Encode
 80 |         timer.reset();
 81 |         uint64_t size = NvPipe_Encode(encoder, rgba, width * 4, compressed.data(), compressed.size(), width, height, false);
 82 |         encodeMs += timer.getElapsedMilliseconds();
 83 | 
 84 |         if (0 == size)
 85 |         {
 86 |             std::lock_guard<std::mutex> lock(*printMutex);
 87 |             std::cerr << "[" << index << "] Encode error: " << NvPipe_GetError(encoder) << std::endl;
 88 |             return;
 89 |         }
 90 | 
 91 |         // Decode
 92 |         timer.reset();
 93 |         uint64_t r = NvPipe_Decode(decoder, compressed.data(), size, decompressedDevice, width, height);
 94 |         decodeMs += timer.getElapsedMilliseconds();
 95 | 
 96 |         if (0 == r)
 97 |         {
 98 |             std::lock_guard<std::mutex> lock(*printMutex);
 99 |             std::cerr << "[" << index << "] Decode error: " << NvPipe_GetError(decoder) << std::endl;
100 |             return;
101 |         }
102 |     }
103 | 
104 |     encodeMs /= numFrames;
105 |     decodeMs /= numFrames;
106 | 
107 |     // Clean up
108 |     NvPipe_Destroy(encoder);
109 |     NvPipe_Destroy(decoder);
110 | 
111 |     cudaFree(decompressedDevice);
112 | 
113 |     // Print results
114 |     std::lock_guard<std::mutex> lock(*printMutex);
115 |     std::cout << std::fixed << std::setprecision(1) << "[" << index << "] Average encode: " << encodeMs << " ms / Average decode: " << decodeMs << " ms" << std::endl;
116 | }
117 | 
118 | 
119 | int main(int argc, char* argv[])
120 | {
121 |     std::cout << "NvPipe example application: Concurrent encode/decode sessions." << std::endl;
122 | 
123 |     const uint32_t width = 1920;
124 |     const uint32_t height = 1080;
125 | 
126 |     const uint32_t numWorkersMax = 4;
127 | 
128 | 
129 |     // Allocate dummy frame
130 |     std::vector<uint8_t> rgba(width * height * 4);
131 |     for (uint32_t y = 0; y < height; ++y)
132 |         for (uint32_t x = 0; x < width; ++x)
133 |             rgba[4 * (y * width + x) + 0] = (255.0f * x * y) / (width * height) * (y % 100 < 50);
134 | 
135 |     // Allocate device memory and copy input
136 |     void* rgbaDevice;
137 |     cudaMalloc(&rgbaDevice, rgba.size());
138 |     cudaMemcpy(rgbaDevice, rgba.data(), rgba.size(), cudaMemcpyHostToDevice);
139 | 
140 |     // Multiple runs with increasing worker count
141 |     for (uint32_t numWorkers = 1; numWorkers <= numWorkersMax; ++numWorkers)
142 |     {
143 |         std::cout << std::endl;
144 |         std::cout << "--- " << numWorkers << " threads ---" << std::endl;
145 | 
146 |         std::mutex printMutex;
147 |         std::vector<std::thread> workers;
148 | 
149 |         for (uint32_t i = 0; i < numWorkers; ++i)
150 |             workers.push_back(std::thread(run, rgbaDevice, width, height, i, &printMutex));
151 | 
152 |         for (std::thread& t : workers)
153 |             if (t.joinable())
154 |                 t.join();
155 |     }
156 | 
157 |     // Clean up
158 |     cudaFree(rgbaDevice);
159 | 
160 |     return 0;
161 | }
162 | 


--------------------------------------------------------------------------------
/examples/egl.cpp:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  2 |  *
  3 |  * Redistribution and use in source and binary forms, with or without
  4 |  * modification, are permitted provided that the following conditions
  5 |  * are met:
  6 |  *  * Redistributions of source code must retain the above copyright
  7 |  *    notice, this list of conditions and the following disclaimer.
  8 |  *  * Redistributions in binary form must reproduce the above copyright
  9 |  *    notice, this list of conditions and the following disclaimer in the
 10 |  *    documentation and/or other materials provided with the distribution.
 11 |  *  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 |  *    contributors may be used to endorse or promote products derived
 13 |  *    from this software without specific prior written permission.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 |  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 |  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 |  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 |  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 |  */
 27 | 
 28 | #include <NvPipe.h>
 29 | 
 30 | #include "utils.h"
 31 | 
 32 | #include <iostream>
 33 | #include <vector>
 34 | 
 35 | #include <EGL/egl.h>
 36 | #include <GL/glew.h>
 37 | 
 38 | 
 39 | void captureFramebufferPPM(GLuint framebuffer, uint32_t width, uint32_t height, const std::string& path)
 40 | {
 41 |     std::vector<uint8_t> rgba(width * height * 4);
 42 | 
 43 |     glBindFramebuffer(GL_READ_FRAMEBUFFER, framebuffer);
 44 |     glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, rgba.data());
 45 | 
 46 |     savePPM(rgba.data(), width, height, path);
 47 | }
 48 | 
 49 | 
 50 | int main(int argc, char* argv[])
 51 | {
 52 |     std::cout << "NvPipe example application: Render to offscreen framebuffer using EGL," << std::endl << "encode framebuffer, decode to display texture." << std::endl << std::endl;
 53 | 
 54 |     const uint32_t width = 3840;
 55 |     const uint32_t height = 2160;
 56 | 
 57 |     const NvPipe_Codec codec = NVPIPE_H264;
 58 |     const float bitrateMbps = 32;
 59 |     const uint32_t targetFPS = 90;
 60 | 
 61 | 
 62 |     std::cout << "Resolution: " << width << " x " << height << std::endl;
 63 |     std::cout << "Codec: " << (codec == NVPIPE_H264 ? "H.264" : "HEVC") << std::endl;
 64 |     std::cout << "Bitrate: " << bitrateMbps << " Mbps @ " << targetFPS << " Hz" << std::endl;
 65 | 
 66 | 
 67 | 
 68 |     // Init EGL/OpenGL
 69 |     EGLDisplay display = eglGetDisplay(EGL_DEFAULT_DISPLAY);
 70 | 
 71 |     EGLint major, minor;
 72 |     eglInitialize(display, &major, &minor);
 73 | 
 74 |     const EGLint configAttribs[] = {
 75 |         EGL_SURFACE_TYPE, EGL_PBUFFER_BIT,
 76 |         EGL_BLUE_SIZE, 8,
 77 |         EGL_GREEN_SIZE, 8,
 78 |         EGL_RED_SIZE, 8,
 79 |         EGL_DEPTH_SIZE, 8,
 80 |         EGL_RENDERABLE_TYPE, EGL_OPENGL_BIT,
 81 |         EGL_NONE
 82 |     };
 83 | 
 84 |     EGLint numConfigs;
 85 |     EGLConfig config;
 86 |     eglChooseConfig(display, configAttribs, &config, 1, &numConfigs);
 87 | 
 88 |     const EGLint pbufferAttribs[] = {
 89 |         EGL_WIDTH, width,
 90 |         EGL_HEIGHT, height,
 91 |         EGL_NONE,
 92 |     };
 93 | 
 94 |     EGLSurface surface = eglCreatePbufferSurface(display, config, pbufferAttribs);
 95 | 
 96 |     eglBindAPI(EGL_OPENGL_API);
 97 |     EGLContext context = eglCreateContext(display, config, EGL_NO_CONTEXT, NULL);
 98 |     eglMakeCurrent(display, surface, surface, context);
 99 | 
100 |     glewInit();
101 | 
102 | 
103 |     // Create framebuffer for offscreen rendering
104 |     GLuint serverColorTex;
105 |     glGenTextures(1, &serverColorTex);
106 |     glBindTexture(GL_TEXTURE_2D, serverColorTex);
107 |     glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
108 | 
109 |     GLuint serverDepthTex;
110 |     glGenTextures(1, &serverDepthTex);
111 |     glBindTexture(GL_TEXTURE_2D, serverDepthTex);
112 |     glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT, width, height, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL);
113 | 
114 |     GLuint serverFBO;
115 |     glGenFramebuffers(1, &serverFBO);
116 |     glBindFramebuffer(GL_DRAW_FRAMEBUFFER, serverFBO);
117 |     glFramebufferTexture(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, serverColorTex, 0);
118 |     glFramebufferTexture(GL_DRAW_FRAMEBUFFER,  GL_DEPTH_ATTACHMENT, serverDepthTex, 0);
119 | 
120 |     if (glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE)
121 |     {
122 |         std::cerr << "Failed to create framebuffer" << std::endl;
123 |         return 1;
124 |     }
125 | 
126 | 
127 |     // Create shader and texture for fullscreen display of decompressed frame
128 |     const GLchar* clientVertexShader =
129 |             "#version 330\n"
130 |             "void main() {}";
131 | 
132 |     const GLchar* clientGeometryShader =
133 |             "#version 330 core\n"
134 |             "layout(points) in;"
135 |             "layout(triangle_strip, max_vertices = 4) out;"
136 |             "out vec2 texcoord;"
137 |             "void main() {"
138 |             "gl_Position = vec4( 1.0, 1.0, 0.0, 1.0 ); texcoord = vec2( 1.0, 1.0 ); EmitVertex();"
139 |             "gl_Position = vec4(-1.0, 1.0, 0.0, 1.0 ); texcoord = vec2( 0.0, 1.0 ); EmitVertex();"
140 |             "gl_Position = vec4( 1.0,-1.0, 0.0, 1.0 ); texcoord = vec2( 1.0, 0.0 ); EmitVertex();"
141 |             "gl_Position = vec4(-1.0,-1.0, 0.0, 1.0 ); texcoord = vec2( 0.0, 0.0 ); EmitVertex();"
142 |             "EndPrimitive();"
143 |             "}";
144 | 
145 |     const GLchar* clientFragmentShader =
146 |             "#version 330\n"
147 |             "uniform sampler2D tex;"
148 |             "in vec2 texcoord;"
149 |             "out vec4 color;"
150 |             "void main() {"
151 |             "	color = texture(tex, texcoord);"
152 |             "}";
153 | 
154 |     GLuint clientVertexShaderHandle = glCreateShader(GL_VERTEX_SHADER);
155 |     glShaderSource(clientVertexShaderHandle, 1, &clientVertexShader, 0);
156 |     glCompileShader(clientVertexShaderHandle);
157 | 
158 |     GLuint clientGeometryShaderHandle = glCreateShader(GL_GEOMETRY_SHADER);
159 |     glShaderSource(clientGeometryShaderHandle, 1, &clientGeometryShader, 0);
160 |     glCompileShader(clientGeometryShaderHandle);
161 | 
162 |     GLuint clientFragmentShaderHandle = glCreateShader(GL_FRAGMENT_SHADER);
163 |     glShaderSource(clientFragmentShaderHandle, 1, &clientFragmentShader, 0);
164 |     glCompileShader(clientFragmentShaderHandle);
165 | 
166 |     GLuint clientFullscreenQuadProgram = glCreateProgram();
167 |     glAttachShader(clientFullscreenQuadProgram, clientVertexShaderHandle);
168 |     glAttachShader(clientFullscreenQuadProgram, clientGeometryShaderHandle);
169 |     glAttachShader(clientFullscreenQuadProgram, clientFragmentShaderHandle);
170 |     glLinkProgram(clientFullscreenQuadProgram);
171 | 
172 |     GLuint clientFullscreenTextureLocation = glGetUniformLocation(clientFullscreenQuadProgram, "tex");
173 | 
174 |     GLuint clientFullscreenVAO;
175 |     glGenVertexArrays(1, &clientFullscreenVAO);
176 | 
177 |     GLuint clientColorTex;
178 |     glGenTextures(1, &clientColorTex);
179 |     glBindTexture(GL_TEXTURE_2D, clientColorTex);
180 |     glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0); // must use RGBA(8) here for CUDA-GL interop
181 |     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
182 |     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
183 | 
184 | 
185 |     // Create encoder
186 |     NvPipe* encoder = NvPipe_CreateEncoder(NVPIPE_RGBA32, codec, NVPIPE_LOSSY, bitrateMbps * 1000 * 1000, targetFPS, width, height);
187 |     if (!encoder)
188 |         std::cerr << "Failed to create encoder: " << NvPipe_GetError(NULL) << std::endl;
189 | 
190 |     // Create decoder
191 |     NvPipe* decoder = NvPipe_CreateDecoder(NVPIPE_RGBA32, codec, width, height);
192 |     if (!decoder)
193 |         std::cerr << "Failed to create decoder: " << NvPipe_GetError(NULL) << std::endl;
194 | 
195 | 
196 |     Timer timer;
197 |     std::cout << std::endl << "Frame | Encode (ms) | Decode (ms) | Size (KB)" << std::endl;
198 | 
199 |     for (uint32_t i = 0; i < 10; ++i)
200 |     {
201 |         // Render dummy scene (Nothing to see here; just some oldschool immediate mode.. urgh)
202 |         glBindFramebuffer(GL_DRAW_FRAMEBUFFER, serverFBO);
203 |         glUseProgram(0);
204 | 
205 |         glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
206 |         glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
207 | 
208 |         glViewport(0, 0, width, height);
209 | 
210 |         glMatrixMode(GL_PROJECTION);
211 |         glLoadIdentity();
212 |         glMatrixMode(GL_MODELVIEW);
213 |         glLoadIdentity();
214 | 
215 |         glRotatef((float) i, 0.0f, 0.0f, 1.0f);
216 | 
217 |         glBegin(GL_TRIANGLES);
218 |         glColor3f(1.0f, 0.0f, 0.0f);
219 |         glVertex3f(0.0f, -0.9f, 0.0f);
220 |         glColor3f(0.0f, 1.0f, 0.0f);
221 |         glVertex3f(-0.9f,0.9f, 0.0f);
222 |         glColor3f(0.0f, 0.0f, 1.0f);
223 |         glVertex3f(0.9f, 0.9f, 0.0f);
224 |         glEnd();
225 | 
226 |         glFinish(); // Make sure rendering is complete before grabbing frame
227 | 
228 |         captureFramebufferPPM(serverFBO, width, height, "egl-input" + std::to_string(i) + ".ppm");
229 | 
230 |         // Encode
231 |         std::vector<uint8_t> compressed(width * height * 4);
232 | 
233 |         timer.reset();
234 |         uint64_t size = NvPipe_EncodeTexture(encoder, serverColorTex, GL_TEXTURE_2D, compressed.data(), compressed.size(), width, height, false);
235 |         double encodeMs = timer.getElapsedMilliseconds();
236 | 
237 |         if (0 == size)
238 |             std::cerr << "Encode error: " << NvPipe_GetError(encoder) << std::endl;
239 | 
240 | 
241 |         // Decode
242 |         timer.reset();
243 |         uint64_t r = NvPipe_DecodeTexture(decoder, compressed.data(), size, clientColorTex, GL_TEXTURE_2D, width, height);
244 |         double decodeMs = timer.getElapsedMilliseconds();
245 | 
246 |         if (0 == r)
247 |             std::cerr << "Decode error: " << NvPipe_GetError(decoder) << std::endl;
248 | 
249 |         double sizeKB = size / 1000.0;
250 |         std::cout << std::fixed << std::setprecision(1) << std::setw(5) << i << " | " << std::setw(11) << encodeMs << " | " <<  std::setw(11) << decodeMs << " | " <<  std::setw(8) << sizeKB << std::endl;
251 | 
252 | 
253 | 
254 |         // Display decoded frame
255 |         glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
256 |         glUseProgram(clientFullscreenQuadProgram);
257 |         glActiveTexture(GL_TEXTURE0);
258 |         glBindTexture(GL_TEXTURE_2D, clientColorTex);
259 |         glUniform1i(clientFullscreenTextureLocation, 0);
260 |         glBindVertexArray(clientFullscreenVAO);
261 |         glDrawArrays(GL_POINTS, 0, 1);
262 | 
263 |         captureFramebufferPPM(0, width, height, "egl-output" + std::to_string(i) + ".ppm");
264 |     }
265 | 
266 |     // Clean up
267 |     NvPipe_Destroy(encoder);
268 |     NvPipe_Destroy(decoder);
269 | 
270 |     eglTerminate(display);
271 | 
272 |     return 0;
273 | }
274 | 


--------------------------------------------------------------------------------
/examples/file.cpp:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  2 |  *
  3 |  * Redistribution and use in source and binary forms, with or without
  4 |  * modification, are permitted provided that the following conditions
  5 |  * are met:
  6 |  *  * Redistributions of source code must retain the above copyright
  7 |  *    notice, this list of conditions and the following disclaimer.
  8 |  *  * Redistributions in binary form must reproduce the above copyright
  9 |  *    notice, this list of conditions and the following disclaimer in the
 10 |  *    documentation and/or other materials provided with the distribution.
 11 |  *  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 |  *    contributors may be used to endorse or promote products derived
 13 |  *    from this software without specific prior written permission.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 |  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 |  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 |  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 |  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 |  */
 27 | 
 28 | #include <NvPipe.h>
 29 | 
 30 | #include "utils.h"
 31 | 
 32 | #include <iostream>
 33 | #include <vector>
 34 | #include <fstream>
 35 | 
 36 | int main(int argc, char* argv[])
 37 | {
 38 |     std::cout << "NvPipe example application: Encodes to a file / decodes from a file." << std::endl << "Useful for testing if only encoding or decoding is enabled." << std::endl << std::endl;
 39 | 
 40 |     const uint32_t width = 3840;
 41 |     const uint32_t height = 2160;
 42 | 
 43 |     const NvPipe_Codec codec = NVPIPE_H264;
 44 |     const float bitrateMbps = 32;
 45 |     const uint32_t targetFPS = 90;
 46 | 
 47 | 
 48 |     std::cout << "Resolution: " << width << " x " << height << std::endl;
 49 |     std::cout << "Codec: " << (codec == NVPIPE_H264 ? "H.264" : "HEVC") << std::endl;
 50 |     std::cout << "Bitrate: " << bitrateMbps << " Mbps @ " << targetFPS << " Hz" << std::endl;
 51 | 
 52 |     std::cout << "Resolution: " << width << " x " << height << std::endl;
 53 | 
 54 |     Timer timer;
 55 | 
 56 |     std::vector<uint8_t> rgba(width * height * 4);
 57 |     std::vector<uint8_t> compressed(rgba.size());
 58 | 
 59 | 
 60 |     // Encoding
 61 | #ifdef NVPIPE_WITH_ENCODER
 62 |     // Construct dummy frame
 63 |     for (uint32_t y = 0; y < height; ++y)
 64 |         for (uint32_t x = 0; x < width; ++x)
 65 |             rgba[4 * (y * width + x) + 1] = (255.0f * x* y) / (width * height) * (y % 100 < 50);
 66 | 
 67 |     savePPM(rgba.data(), width, height, "file-input.ppm");
 68 | 
 69 | 
 70 |     NvPipe* encoder = NvPipe_CreateEncoder(NVPIPE_RGBA32, codec, NVPIPE_LOSSY, bitrateMbps * 1000 * 1000, targetFPS, width, height);
 71 |     if (!encoder)
 72 |         std::cerr << "Failed to create encoder: " << NvPipe_GetError(NULL) << std::endl;
 73 | 
 74 |     std::ofstream out("stream.bin", std::ios::out | std::ios::binary);
 75 | 
 76 |     std::cout << std::endl << "Encoding..." << std::endl;
 77 | 
 78 |     for (uint32_t i = 0; i < 10; ++i)
 79 |     {
 80 |         timer.reset();
 81 |         uint64_t size = NvPipe_Encode(encoder, rgba.data(), width * 4, compressed.data(), compressed.size(), width, height, false);
 82 |         double encodeMs = timer.getElapsedMilliseconds();
 83 | 
 84 |         if (0 == size)
 85 |             std::cerr << "Encode error: " << NvPipe_GetError(encoder) << std::endl;
 86 | 
 87 |         out.write((char*) &size, sizeof(uint64_t));
 88 |         out.write((char*) compressed.data(), size);
 89 | 
 90 |         std::cout << i << ": " << encodeMs << " ms" << std::endl;
 91 |     }
 92 | 
 93 |     out.close();
 94 | 
 95 |     NvPipe_Destroy(encoder);
 96 | #endif
 97 | 
 98 | 
 99 |     // Decoding
100 | #ifdef NVPIPE_WITH_DECODER
101 |     NvPipe* decoder = NvPipe_CreateDecoder(NVPIPE_RGBA32, codec, width, height);
102 |     if (!decoder)
103 |         std::cerr << "Failed to create decoder: " << NvPipe_GetError(NULL) << std::endl;
104 | 
105 |     std::ifstream in("stream.bin", std::ios::in | std::ios::binary);
106 |     if (!in)
107 |     {
108 |         std::cerr << std::endl;
109 |         std::cerr << "Error: Failed to open input file \"stream.bin\"." << std::endl;
110 |         std::cerr << "The file can be created using this example with" << std::endl;
111 |         std::cerr << "NvPipe encoding enabled." << std::endl;
112 |         return 1;
113 |     }
114 | 
115 |     std::cout << std::endl << "Decoding..." << std::endl;
116 | 
117 |     for (uint32_t i = 0; i < 10; ++i)
118 |     {
119 |         uint64_t size;
120 |         in.read((char*) &size, sizeof(uint64_t));
121 |         in.read((char*) compressed.data(), size);
122 | 
123 | 
124 |         // Decode
125 |         timer.reset();
126 |         uint64_t r = NvPipe_Decode(decoder, compressed.data(), size, rgba.data(), width, height);
127 |         double decodeMs = timer.getElapsedMilliseconds();
128 | 
129 |         if (r == size)
130 |             std::cerr << "Decode error: " << NvPipe_GetError(decoder) << std::endl;
131 | 
132 |         std::cout << i << ": " << decodeMs << " ms" << std::endl;
133 | 
134 |         if (i == 0)
135 |             savePPM(rgba.data(), width, height, "file-output.ppm");
136 |     }
137 | 
138 |     in.close();
139 | 
140 |     NvPipe_Destroy(decoder);
141 | #endif
142 | 
143 |     return 0;
144 | }
145 | 


--------------------------------------------------------------------------------
/examples/lossless.cpp:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  2 |  *
  3 |  * Redistribution and use in source and binary forms, with or without
  4 |  * modification, are permitted provided that the following conditions
  5 |  * are met:
  6 |  *  * Redistributions of source code must retain the above copyright
  7 |  *    notice, this list of conditions and the following disclaimer.
  8 |  *  * Redistributions in binary form must reproduce the above copyright
  9 |  *    notice, this list of conditions and the following disclaimer in the
 10 |  *    documentation and/or other materials provided with the distribution.
 11 |  *  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 |  *    contributors may be used to endorse or promote products derived
 13 |  *    from this software without specific prior written permission.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 |  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 |  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 |  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 |  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 |  */
 27 | 
 28 | #include <NvPipe.h>
 29 | 
 30 | #include <iostream>
 31 | #include <iomanip>
 32 | #include <vector>
 33 | 
 34 | #include "utils.h"
 35 | 
 36 | 
 37 | 
 38 | 
 39 | void test(const uint8_t* data, NvPipe_Format format, uint32_t width, uint32_t height)
 40 | {
 41 |     uint64_t dataSize = width * height;
 42 |     uint64_t dataPitch = width;
 43 |     if (format == NVPIPE_UINT4)
 44 |     {
 45 |         dataSize /= 2;
 46 |         dataPitch /= 2;
 47 |     }
 48 |     else if (format == NVPIPE_UINT16)
 49 |     {
 50 |         dataSize *= 2;
 51 |         dataPitch *= 2;
 52 |     }
 53 |     else if (format == NVPIPE_UINT32)
 54 |     {
 55 |         dataSize *= 4;
 56 |         dataPitch *= 4;
 57 |     }
 58 | 
 59 | 
 60 |     Timer timer;
 61 | 
 62 |     // Encode
 63 |     NvPipe* encoder = NvPipe_CreateEncoder(format, NVPIPE_H264, NVPIPE_LOSSLESS, 0, 0, width, height);
 64 |     if (!encoder)
 65 |     {
 66 |         std::cerr << "Failed to create encoder: " << NvPipe_GetError(NULL) << std::endl;
 67 |         return;
 68 |     }
 69 | 
 70 |     std::vector<uint8_t> buffer(dataSize * 2);
 71 |     timer.reset();
 72 |     uint64_t size = NvPipe_Encode(encoder, data, dataPitch, buffer.data(), buffer.size(), width, height, false);
 73 |     double encodeMs = timer.getElapsedMilliseconds();
 74 |     if (0 == size)
 75 |     {
 76 |         std::cerr << "Encode error: " << NvPipe_GetError(encoder) << std::endl;
 77 |         return;
 78 |     }
 79 | 
 80 |     NvPipe_Destroy(encoder);
 81 | 
 82 |     // Decode
 83 |     NvPipe* decoder = NvPipe_CreateDecoder(format, NVPIPE_H264, width, height);
 84 |     if (!decoder)
 85 |     {
 86 |         std::cerr << "Failed to create decoder: " << NvPipe_GetError(NULL) << std::endl;
 87 |         return;
 88 |     }
 89 | 
 90 |     std::vector<uint8_t> result(dataSize);
 91 |     timer.reset();
 92 |     uint64_t r = NvPipe_Decode(decoder, buffer.data(), size, result.data(), width, height);
 93 |     double decodeMs = timer.getElapsedMilliseconds();
 94 |     if (0 == r)
 95 |     {
 96 |         std::cerr << "Decode error: " << NvPipe_GetError(decoder) << std::endl;
 97 |         return;
 98 |     }
 99 | 
100 |     NvPipe_Destroy(decoder);
101 | 
102 | 
103 |     // Check
104 |     auto bitpattern = [](uint8_t v) -> std::string
105 |     {
106 |         std::string s;
107 | 
108 |         for (uint32_t i = 1; i <= 128; i *= 2)
109 |         {
110 |             if (v & i)
111 |                 s = "1" + s;
112 |             else
113 |                 s = "0" + s;
114 | 
115 |             if (i == 8)
116 |                 s = " " + s;
117 |         }
118 | 
119 |         return s;
120 |     };
121 | 
122 |     bool ok = true;
123 |     std::string mismatch;
124 | 
125 |     for (uint32_t i = 0; i < dataSize; ++i)
126 |     {
127 |         if (data[i] != result[i])
128 |         {
129 |             mismatch = "Byte " + std::to_string(i) + ": " + bitpattern(data[i]) + " -> " + bitpattern(result[i]);
130 |             ok = false;
131 |             break;
132 |         }
133 |     }
134 | 
135 |     // Print result
136 |     if (format == NVPIPE_UINT4)
137 |         std::cout << " - [as UINT4] ";
138 |     else if (format == NVPIPE_UINT8)
139 |         std::cout << " - [as UINT8] ";
140 |     else if (format == NVPIPE_UINT16)
141 |         std::cout << " - [as UINT16] ";
142 |     else if (format == NVPIPE_UINT32)
143 |         std::cout << " - [as UINT32] ";
144 | 
145 |     std::cout << std::fixed << std::setprecision(1) << " Size: " << size * 0.001 << " KB, Encode: " << encodeMs << " ms, Decode: " << decodeMs << " ms - ";
146 | 
147 |     if (ok)
148 |         std::cout << "OK" << std::endl;
149 |     else
150 |         std::cout << "MISMATCH [" << mismatch << "]" << std::endl;
151 | }
152 | 
153 | 
154 | int main(int argc, char* argv[])
155 | {
156 |     std::cout << "NvPipe example application: Tests lossless compression of a grayscale integer frame." << std::endl << std::endl;
157 | 
158 |     uint32_t width = 1024;
159 |     uint32_t height = 1024;
160 | 
161 |     // UINT 8 test
162 |     {
163 |         std::vector<uint8_t> image(width * height);
164 |         for (uint32_t y = 0; y < height; ++y)
165 |             for (uint32_t x = 0; x < width; ++x)
166 |                 image[y * width + x] = (255.0f * x * y) / (width * height) * (y % 100 < 50);
167 | 
168 |         std::cout << std::fixed << std::setprecision(1) << "Input: " << width << " x " << height << " UINT8 (Raw size: " << (width * height)  * 0.001 << " KB)" << std::endl;
169 |         test(image.data(), NVPIPE_UINT4, width * 2, height);
170 |         test(image.data(), NVPIPE_UINT8, width, height);
171 |         test(image.data(), NVPIPE_UINT16, width / 2, height);
172 |         test(image.data(), NVPIPE_UINT32, width / 4, height);
173 |     }
174 | 
175 |     std::cout << std::endl;
176 | 
177 | 
178 |     // UINT32 test
179 |     {
180 |         std::vector<uint32_t> image(width * height);
181 |         for (uint32_t y = 0; y < height; ++y)
182 |             for (uint32_t x = 0; x < width; ++x)
183 |                 image[y * width + x] = (4294967295.0f * x * y) / (width * height) * (y % 100 < 50);
184 | 
185 |         std::cout << std::fixed << std::setprecision(1) << "Input: " << width << " x " << height << " UINT32 (Raw size: " << (width * height * 4)  * 0.001 << " KB)" << std::endl;
186 | //        test((uint8_t*) image.data(), NVPIPE_UINT4, width * 8, height);
187 |         test((uint8_t*) image.data(), NVPIPE_UINT8, width * 4, height);
188 |         test((uint8_t*) image.data(), NVPIPE_UINT16, width * 2, height);
189 |         test((uint8_t*) image.data(), NVPIPE_UINT32, width, height);
190 |     }
191 | 
192 | 
193 |     return 0;
194 | }
195 | 


--------------------------------------------------------------------------------
/examples/memory.cpp:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  2 |  *
  3 |  * Redistribution and use in source and binary forms, with or without
  4 |  * modification, are permitted provided that the following conditions
  5 |  * are met:
  6 |  *  * Redistributions of source code must retain the above copyright
  7 |  *    notice, this list of conditions and the following disclaimer.
  8 |  *  * Redistributions in binary form must reproduce the above copyright
  9 |  *    notice, this list of conditions and the following disclaimer in the
 10 |  *    documentation and/or other materials provided with the distribution.
 11 |  *  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 |  *    contributors may be used to endorse or promote products derived
 13 |  *    from this software without specific prior written permission.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 |  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 |  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 |  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 |  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 |  */
 27 | 
 28 | #include <NvPipe.h>
 29 | 
 30 | #include "utils.h"
 31 | 
 32 | #include <iostream>
 33 | #include <vector>
 34 | 
 35 | #include <cuda_runtime_api.h>
 36 | 
 37 | 
 38 | int main(int argc, char* argv[])
 39 | {
 40 |     std::cout << "NvPipe example application: Comparison of using host/device memory." << std::endl << std::endl;
 41 | 
 42 |     const uint32_t width = 3840;
 43 |     const uint32_t height = 2160;
 44 | 
 45 |     const NvPipe_Codec codec = NVPIPE_H264;
 46 |     const NvPipe_Compression compression = NVPIPE_LOSSY;
 47 |     const float bitrateMbps = 32;
 48 |     const uint32_t targetFPS = 90;
 49 | 
 50 | 
 51 |     std::cout << "Resolution: " << width << " x " << height << std::endl;
 52 |     std::cout << "Codec: " << (codec == NVPIPE_H264 ? "H.264" : "HEVC") << std::endl;
 53 |     if (compression == NVPIPE_LOSSY)
 54 |         std::cout << "Bitrate: " << bitrateMbps << " Mbps @ " << targetFPS << " Hz" << std::endl;
 55 | 
 56 | 
 57 |     // Construct dummy frame
 58 |     std::vector<uint8_t> rgba(width * height * 4);
 59 |     for (uint32_t y = 0; y < height; ++y)
 60 |         for (uint32_t x = 0; x < width; ++x)
 61 |             rgba[4 * (y * width + x) + 0] = (255.0f * x * y) / (width * height) * (y % 100 < 50);
 62 | 
 63 |     savePPM(rgba.data(), width, height, "memory-input.ppm");
 64 | 
 65 |     std::cout << "Resolution: " << width << " x " << height << std::endl;
 66 | 
 67 | 
 68 |     std::vector<uint8_t> compressed(rgba.size());
 69 |     std::vector<uint8_t> decompressed(rgba.size());
 70 | 
 71 |     Timer timer;
 72 | 
 73 | 
 74 |     // Host memory benchmark
 75 |     {
 76 |         std::cout << std::endl << "--- Encode from host memory / Decode to host memory ---" << std::endl;
 77 |         std::cout << "Frame | Encode (ms) | Decode (ms) | Size (KB)" << std::endl;
 78 | 
 79 |         // Create encoder
 80 |         NvPipe* encoder = NvPipe_CreateEncoder(NVPIPE_RGBA32, codec, compression, bitrateMbps * 1000 * 1000, targetFPS, width, height);
 81 |         if (!encoder)
 82 |             std::cerr << "Failed to create encoder: " << NvPipe_GetError(NULL) << std::endl;
 83 | 
 84 |         // Create decoder
 85 |         NvPipe* decoder = NvPipe_CreateDecoder(NVPIPE_RGBA32, codec, width, height);
 86 |         if (!decoder)
 87 |             std::cerr << "Failed to create decoder: " << NvPipe_GetError(NULL) << std::endl;
 88 | 
 89 |         // A few frames ...
 90 |         for (uint32_t i = 0; i < 10; ++i)
 91 |         {
 92 |             // Encode
 93 |             timer.reset();
 94 |             uint64_t size = NvPipe_Encode(encoder, rgba.data(), width * 4, compressed.data(), compressed.size(), width, height, false);
 95 |             double encodeMs = timer.getElapsedMilliseconds();
 96 | 
 97 |             if (0 == size)
 98 |                 std::cerr << "Encode error: " << NvPipe_GetError(encoder) << std::endl;
 99 | 
100 |             // Decode
101 |             timer.reset();
102 |             uint64_t r = NvPipe_Decode(decoder, compressed.data(), size, decompressed.data(), width, height);
103 |             double decodeMs = timer.getElapsedMilliseconds();
104 | 
105 |             if (0 == r)
106 |                 std::cerr << "Decode error: " << NvPipe_GetError(decoder) << std::endl;
107 | 
108 |             double sizeKB = size / 1000.0;
109 |             std::cout << std::fixed << std::setprecision(1) << std::setw(5) << i << " | " << std::setw(11) << encodeMs << " | " <<  std::setw(11) << decodeMs << " | " <<  std::setw(8) << sizeKB << std::endl;
110 | 
111 |             if (i == 9)
112 |                 savePPM(decompressed.data(), width, height, "memory-output.ppm");
113 |         }
114 | 
115 |         // Clean up
116 |         NvPipe_Destroy(encoder);
117 |         NvPipe_Destroy(decoder);
118 |     }
119 | 
120 |     // Device memory benchmark
121 |     {
122 |         std::cout << std::endl << "--- Encode from device memory / Decode to device memory ---" << std::endl;
123 |         std::cout << "Frame | Encode (ms) | Decode (ms) | Size (KB)" << std::endl;
124 | 
125 |         // Create encoder
126 |         NvPipe* encoder = NvPipe_CreateEncoder(NVPIPE_RGBA32, codec, compression, bitrateMbps * 1000 * 1000, targetFPS, width, height);
127 |         if (!encoder)
128 |             std::cerr << "Failed to create encoder: " << NvPipe_GetError(NULL) << std::endl;
129 | 
130 |         // Create decoder
131 |         NvPipe* decoder = NvPipe_CreateDecoder(NVPIPE_RGBA32, codec, width, height);
132 |         if (!decoder)
133 |             std::cerr << "Failed to create decoder: " << NvPipe_GetError(NULL) << std::endl;
134 | 
135 |         // Allocate device memory and copy input
136 |         void* rgbaDevice;
137 |         cudaMalloc(&rgbaDevice, rgba.size());
138 |         cudaMemcpy(rgbaDevice, rgba.data(), rgba.size(), cudaMemcpyHostToDevice);
139 | 
140 |         void* decompressedDevice;
141 |         cudaMalloc(&decompressedDevice, rgba.size());
142 | 
143 |         for (uint32_t i = 0; i < 10; ++i)
144 |         {
145 |             // Encode
146 |             timer.reset();
147 |             uint64_t size = NvPipe_Encode(encoder, rgbaDevice, width * 4, compressed.data(), compressed.size(), width, height, false);
148 |             double encodeMs = timer.getElapsedMilliseconds();
149 | 
150 |             if (0 == size)
151 |                 std::cerr << "Encode error: " << NvPipe_GetError(encoder) << std::endl;
152 | 
153 |             // Decode
154 |             timer.reset();
155 |             uint64_t r = NvPipe_Decode(decoder, compressed.data(), size, decompressedDevice, width, height);
156 |             double decodeMs = timer.getElapsedMilliseconds();
157 | 
158 |             if (0 == r)
159 |                 std::cerr << "Decode error: " << NvPipe_GetError(decoder) << std::endl;
160 | 
161 |             double sizeKB = size / 1000.0;
162 |             std::cout << std::fixed << std::setprecision(1) << std::setw(5) << i << " | " << std::setw(11) << encodeMs << " | " <<  std::setw(11) << decodeMs << " | " <<  std::setw(8) << sizeKB << std::endl;
163 |         }
164 | 
165 |         cudaFree(rgbaDevice);
166 |         cudaFree(decompressedDevice);
167 | 
168 |         // Clean up
169 |         NvPipe_Destroy(encoder);
170 |         NvPipe_Destroy(decoder);
171 |     }
172 | 
173 |     return 0;
174 | }
175 | 


--------------------------------------------------------------------------------
/examples/utils.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 2 |  *
 3 |  * Redistribution and use in source and binary forms, with or without
 4 |  * modification, are permitted provided that the following conditions
 5 |  * are met:
 6 |  *  * Redistributions of source code must retain the above copyright
 7 |  *    notice, this list of conditions and the following disclaimer.
 8 |  *  * Redistributions in binary form must reproduce the above copyright
 9 |  *    notice, this list of conditions and the following disclaimer in the
10 |  *    documentation and/or other materials provided with the distribution.
11 |  *  * Neither the name of NVIDIA CORPORATION nor the names of its
12 |  *    contributors may be used to endorse or promote products derived
13 |  *    from this software without specific prior written permission.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 |  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 |  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 |  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 |  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |  */
27 | 
28 | #pragma once
29 | 
30 | #include <string>
31 | #include <fstream>
32 | #include <vector>
33 | #include <chrono>
34 | #include <iostream>
35 | #include <iomanip>
36 | 
37 | 
38 | void savePPM(uint8_t* rgba, uint32_t width, uint32_t height, const std::string& path)
39 | {
40 |     // Convert to RGB
41 |     std::vector<uint8_t> rgb(width * height * 3);
42 |     for (uint32_t i = 0; i < width * height; ++i)
43 |         for (uint32_t j = 0; j < 3; ++j)
44 |             rgb[3 * i + j] = rgba[4 * i + j];
45 | 
46 |     // Write PPM
47 |     std::ofstream outFile;
48 |     outFile.open(path.c_str(), std::ios::binary);
49 | 
50 |     outFile << "P6" << "\n"
51 |             << width << " " << height << "\n"
52 |             << "255\n";
53 | 
54 |     outFile.write((char*) rgb.data(), rgb.size());
55 | }
56 | 
57 | 
58 | class Timer
59 | {
60 | public:
61 |     Timer()
62 |     {
63 |         this->reset();
64 |     }
65 | 
66 |     void reset()
67 |     {
68 |         this->time  = std::chrono::high_resolution_clock::now();
69 |     }
70 | 
71 |     double getElapsedSeconds() const
72 |     {
73 |         return 1.0e-6 * std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - this->time).count();
74 |     }
75 | 
76 |     double getElapsedMilliseconds() const
77 |     {
78 |         return 1.0e-3 * std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - this->time).count();
79 |     }
80 | 
81 | private:
82 |     std::chrono::high_resolution_clock::time_point time;
83 | };
84 | 
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/src/NvPipe.cu:
--------------------------------------------------------------------------------
   1 | /* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
   2 |  *
   3 |  * Redistribution and use in source and binary forms, with or without
   4 |  * modification, are permitted provided that the following conditions
   5 |  * are met:
   6 |  *  * Redistributions of source code must retain the above copyright
   7 |  *    notice, this list of conditions and the following disclaimer.
   8 |  *  * Redistributions in binary form must reproduce the above copyright
   9 |  *    notice, this list of conditions and the following disclaimer in the
  10 |  *    documentation and/or other materials provided with the distribution.
  11 |  *  * Neither the name of NVIDIA CORPORATION nor the names of its
  12 |  *    contributors may be used to endorse or promote products derived
  13 |  *    from this software without specific prior written permission.
  14 |  *
  15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
  16 |  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  18 |  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  19 |  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  20 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  21 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  22 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  23 |  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  25 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26 |  */
  27 | 
  28 | #include "NvPipe.h"
  29 | 
  30 | #ifdef NVPIPE_WITH_ENCODER
  31 | #include "NvCodec/NvEncoder/NvEncoderCuda.h"
  32 | #endif
  33 | 
  34 | #ifdef NVPIPE_WITH_DECODER
  35 | #include "NvCodec/NvDecoder/NvDecoder.h"
  36 | #endif
  37 | 
  38 | #include "Utils/ColorSpace.h"
  39 | #include "Utils/NvCodecUtils.h"
  40 | 
  41 | #include <memory>
  42 | #include <iostream>
  43 | #include <string>
  44 | #include <sstream>
  45 | #include <unordered_map>
  46 | #include <mutex>
  47 | 
  48 | #include <cuda.h>
  49 | #include <cuda_runtime_api.h>
  50 | 
  51 | #ifdef NVPIPE_WITH_OPENGL
  52 | #include <cuda_gl_interop.h>
  53 | #endif
  54 | 
  55 | 
  56 | class Exception
  57 | {
  58 | public:
  59 |     Exception(const std::string& msg) : message(msg) {}
  60 |     std::string getErrorString() const { return message; }
  61 | public:
  62 |     std::string message;
  63 | };
  64 | 
  65 | 
  66 | inline void CUDA_THROW(cudaError_t code, const std::string& errorMessage)
  67 | {
  68 |     if (cudaSuccess != code) {
  69 |         throw Exception(errorMessage + " (Error " + std::to_string(code) + ": " + std::string(cudaGetErrorString(code)) + ")");
  70 |     }
  71 | }
  72 | 
  73 | inline bool isDevicePointer(const void* ptr)
  74 | {
  75 |     struct cudaPointerAttributes attr;
  76 |     const cudaError_t perr = cudaPointerGetAttributes(&attr, ptr);
  77 | 
  78 | #if (CUDA_VERSION >= 10000)
  79 |     return (perr == cudaSuccess) && (attr.type != cudaMemoryTypeHost);
  80 | #else
  81 |     return (perr == cudaSuccess) && (attr.memoryType != cudaMemoryTypeHost);
  82 | #endif
  83 | }
  84 | 
  85 | inline uint64_t getFrameSize(NvPipe_Format format, uint32_t width, uint32_t height)
  86 | {
  87 |     if (format == NVPIPE_RGBA32)
  88 |         return width * height * 4;
  89 |     else if (format == NVPIPE_UINT4)
  90 |         return width * height / 2;
  91 |     else if (format == NVPIPE_UINT8)
  92 |         return width * height;
  93 |     else if (format == NVPIPE_UINT16)
  94 |         return width * height * 2;
  95 |     else if (format == NVPIPE_UINT32)
  96 |         return width * height * 4;
  97 | 
  98 |     return 0;
  99 | }
 100 | 
 101 | 
 102 | __global__
 103 | void uint4_to_nv12(const uint8_t* src, uint32_t srcPitch, uint8_t* dst, uint32_t dstPitch, uint32_t width, uint32_t height)
 104 | {
 105 |     // one thread per pixel
 106 |     const uint32_t x = blockIdx.x * blockDim.x + threadIdx.x;
 107 |     const uint32_t y = blockIdx.y * blockDim.y + threadIdx.y;
 108 | 
 109 |     if (x < width && y < height)
 110 |     {
 111 |         const uint32_t i = y * srcPitch + x / 2;
 112 |         const uint32_t j = y * dstPitch + x;
 113 | 
 114 |         // Extend 4 bit to 8 bits
 115 |         // Even thread: higher 4 bits, odd thread: lower 4 bits
 116 |         dst[j] = (x & 1 == 1) ? (src[i] & 0xF) : ((src[i] & 0xF0) >> 4);
 117 | 
 118 |         // Blank UV channel
 119 |         if (y < height / 2)
 120 |         {
 121 |             uint8_t* UV = dst + dstPitch * (height + y);
 122 |             UV[x] = 0;
 123 |         }
 124 |     }
 125 | }
 126 | 
 127 | __global__
 128 | void nv12_to_uint4(const uint8_t* src, uint32_t srcPitch, uint8_t* dst, uint32_t dstPitch, uint32_t width, uint32_t height)
 129 | {
 130 |     // one thread per TWO pixels
 131 |     const uint32_t x = blockIdx.x * blockDim.x + threadIdx.x;
 132 |     const uint32_t y = blockIdx.y * blockDim.y + threadIdx.y;
 133 | 
 134 |     if (2 * x < width && y < height)
 135 |     {
 136 |         const uint32_t i = y * srcPitch + 2 * x;
 137 |         const uint32_t j = y * dstPitch + x;
 138 | 
 139 |         // Merge lower 4 bits of two Y bytes to one output byte
 140 |         uint8_t v = (src[i] & 0xF) << 4;
 141 | 
 142 |         if (2 * x + 1 < width)
 143 |             v = v | (src[i + 1] & 0xF);
 144 | 
 145 |         dst[j] = v;
 146 |     }
 147 | }
 148 | 
 149 | __global__
 150 | void uint8_to_nv12(const uint8_t* src, uint32_t srcPitch, uint8_t* dst, uint32_t dstPitch, uint32_t width, uint32_t height)
 151 | {
 152 |     const uint32_t x = blockIdx.x * blockDim.x + threadIdx.x;
 153 |     const uint32_t y = blockIdx.y * blockDim.y + threadIdx.y;
 154 | 
 155 |     if (x < width && y < height)
 156 |     {
 157 |         const uint32_t i = y * srcPitch + x;
 158 |         const uint32_t j = y * dstPitch + x;
 159 | 
 160 |         // Copy grayscale image to Y channel
 161 |         dst[j] = src[i];
 162 | 
 163 |         // Blank UV channel
 164 |         if (y < height / 2)
 165 |         {
 166 |             uint8_t* UV = dst + dstPitch * (height + y);
 167 |             UV[x] = 0;
 168 |         }
 169 |     }
 170 | }
 171 | 
 172 | __global__
 173 | void nv12_to_uint8(const uint8_t* src, uint32_t srcPitch, uint8_t* dst, uint32_t dstPitch, uint32_t width, uint32_t height)
 174 | {
 175 |     const uint32_t x = blockIdx.x * blockDim.x + threadIdx.x;
 176 |     const uint32_t y = blockIdx.y * blockDim.y + threadIdx.y;
 177 | 
 178 |     if (x < width && y < height)
 179 |     {
 180 |         const uint32_t i = y * srcPitch + x;
 181 |         const uint32_t j = y * dstPitch + x;
 182 | 
 183 |         // Copy Y channel to grayscale image
 184 |         dst[j] = src[i];
 185 | 
 186 |     }
 187 | }
 188 | 
 189 | __global__
 190 | void uint16_to_nv12(const uint8_t* src, uint32_t srcPitch, uint8_t* dst, uint32_t dstPitch, uint32_t width, uint32_t height)
 191 | {
 192 |     const uint32_t x = blockIdx.x * blockDim.x + threadIdx.x;
 193 |     const uint32_t y = blockIdx.y * blockDim.y + threadIdx.y;
 194 | 
 195 |     if (x < width && y < height)
 196 |     {
 197 |         const uint32_t i = y * srcPitch + 2 * x;
 198 |         const uint32_t j = y * dstPitch + x;
 199 | 
 200 |         // Copy higher byte to left half of Y channel
 201 |         dst[j] = src[i];
 202 | 
 203 |         // Copy lower byte to right half of Y channel
 204 |         dst[j + width] = src[i + 1];
 205 | 
 206 |         // Blank UV channel
 207 |         if (y < height / 2)
 208 |         {
 209 |             uint8_t* UV = dst + dstPitch * (height + y);
 210 |             UV[2 * x + 0] = 0;
 211 |             UV[2 * x + 1] = 0;
 212 |         }
 213 |     }
 214 | }
 215 | 
 216 | __global__
 217 | void nv12_to_uint16(const uint8_t* src, uint32_t srcPitch, uint8_t* dst, uint32_t dstPitch, uint32_t width, uint32_t height)
 218 | {
 219 |     const uint32_t x = blockIdx.x * blockDim.x + threadIdx.x;
 220 |     const uint32_t y = blockIdx.y * blockDim.y + threadIdx.y;
 221 | 
 222 |     if (x < width && y < height)
 223 |     {
 224 |         const uint32_t i = y * srcPitch + x;
 225 |         const uint32_t j = y * dstPitch + 2 * x;
 226 | 
 227 |         // Copy higher byte from left half of Y channel
 228 |         dst[j] = src[i];
 229 | 
 230 |         // Copy lower byte from right half of Y channel
 231 |         dst[j + 1] = src[i + width];
 232 |     }
 233 | }
 234 | 
 235 | __global__
 236 | void uint32_to_nv12(const uint8_t* src, uint32_t srcPitch, uint8_t* dst, uint32_t dstPitch, uint32_t width, uint32_t height)
 237 | {
 238 |     const uint32_t x = blockIdx.x * blockDim.x + threadIdx.x;
 239 |     const uint32_t y = blockIdx.y * blockDim.y + threadIdx.y;
 240 | 
 241 |     if (x < width && y < height)
 242 |     {
 243 |         const uint32_t i = y * srcPitch + 4 * x;
 244 |         const uint32_t j = y * dstPitch + x;
 245 | 
 246 |         // Copy highest byte to left quarter of Y channel,
 247 |         // ...
 248 |         // Copy lowest byte to right quarter of Y channel
 249 |         dst[j] = src[i];
 250 |         dst[j + width] = src[i + 1];
 251 |         dst[j + 2 * width] = src[i + 2];
 252 |         dst[j + 3 * width] = src[i + 3];
 253 | 
 254 |         // Blank UV channel
 255 |         if (y < height / 2)
 256 |         {
 257 |             uint8_t* UV = dst + dstPitch * (height + y);
 258 |             UV[4 * x + 0] = 0;
 259 |             UV[4 * x + 1] = 0;
 260 |             UV[4 * x + 2] = 0;
 261 |             UV[4 * x + 3] = 0;
 262 |         }
 263 |     }
 264 | }
 265 | 
 266 | __global__
 267 | void nv12_to_uint32(const uint8_t* src, uint32_t srcPitch, uint8_t* dst, uint32_t dstPitch, uint32_t width, uint32_t height)
 268 | {
 269 |     const uint32_t x = blockIdx.x * blockDim.x + threadIdx.x;
 270 |     const uint32_t y = blockIdx.y * blockDim.y + threadIdx.y;
 271 | 
 272 |     if (x < width && y < height)
 273 |     {
 274 |         const uint32_t i = y * srcPitch + x;
 275 |         const uint32_t j = y * dstPitch + 4 * x;
 276 | 
 277 |         // Copy highest byte from left quarter of Y channel
 278 |         // ...
 279 |         // Copy lowest byte from right quarter of Y channel
 280 |         dst[j] = src[i];
 281 |         dst[j + 1] = src[i + width];
 282 |         dst[j + 2] = src[i + 2 * width];
 283 |         dst[j + 3] = src[i + 3 * width];
 284 | 
 285 |     }
 286 | }
 287 | 
 288 | #ifdef NVPIPE_WITH_OPENGL
 289 | /**
 290 |  * @brief Utility class for managing CUDA-GL interop graphics resources.
 291 |  */
 292 | class GraphicsResourceRegistry
 293 | {
 294 | public:
 295 |     virtual ~GraphicsResourceRegistry()
 296 |     {
 297 |         // Unregister all
 298 |         for (auto& r : this->registeredPBOs)
 299 |             CUDA_THROW(cudaGraphicsUnregisterResource(r.second.graphicsResource),
 300 |                 "Failed to unregister PBO graphics resource");
 301 | 
 302 |         for (auto& r : this->registeredTextures)
 303 |             CUDA_THROW(cudaGraphicsUnregisterResource(r.second.graphicsResource),
 304 |                 "Failed to unregister texture graphics resource");
 305 |     }
 306 | 
 307 |     cudaGraphicsResource_t getTextureGraphicsResource(uint32_t texture, uint32_t target, uint32_t width, uint32_t height, uint32_t flags)
 308 |     {
 309 |         // Check if texture needs to be (re)registered
 310 |         RegisteredTexture& reg = this->registeredTextures[texture];
 311 | 
 312 |         if (reg.width != width || reg.height != height || reg.target != target) {
 313 |             if (reg.graphicsResource) {
 314 |                 CUDA_THROW(cudaGraphicsUnregisterResource(reg.graphicsResource),
 315 |                     "Failed to unregister texture graphics resource");
 316 | 
 317 |                 reg.graphicsResource = nullptr;
 318 |             }
 319 | 
 320 |             CUDA_THROW(cudaGraphicsGLRegisterImage(&reg.graphicsResource, texture, target, flags),
 321 |                 "Failed to register texture as graphics resource");
 322 | 
 323 |             reg.width = width;
 324 |             reg.height = height;
 325 |             reg.target = target;
 326 |         }
 327 | 
 328 |         return reg.graphicsResource;
 329 |     }
 330 | 
 331 |     cudaGraphicsResource_t getPBOGraphicsResource(uint32_t pbo, uint32_t width, uint32_t height, uint32_t flags)
 332 |     {
 333 |         // Check if PBO needs to be (re)registered
 334 |         RegisteredPBO& reg = this->registeredPBOs[pbo];
 335 | 
 336 |         if (reg.width != width || reg.height != height) {
 337 |             if (reg.graphicsResource) {
 338 |                 CUDA_THROW(cudaGraphicsUnregisterResource(reg.graphicsResource),
 339 |                     "Failed to unregister PBO graphics resource");
 340 | 
 341 |                 reg.graphicsResource = nullptr;
 342 |             }
 343 | 
 344 |             CUDA_THROW(cudaGraphicsGLRegisterBuffer(&reg.graphicsResource, pbo, flags),
 345 |                 "Failed to register PBO as graphics resource");
 346 | 
 347 |             reg.width = width;
 348 |             reg.height = height;
 349 |         }
 350 | 
 351 |         return reg.graphicsResource;
 352 |     }
 353 | 
 354 | private:
 355 |     struct RegisteredTexture
 356 |     {
 357 |         cudaGraphicsResource_t graphicsResource = nullptr;
 358 |         uint32_t width = 0;
 359 |         uint32_t height = 0;
 360 |         uint32_t target = 0;
 361 |     };
 362 |     std::unordered_map<uint32_t, RegisteredTexture> registeredTextures;
 363 | 
 364 |     struct RegisteredPBO
 365 |     {
 366 |         cudaGraphicsResource_t graphicsResource = nullptr;
 367 |         uint32_t width = 0;
 368 |         uint32_t height = 0;
 369 |     };
 370 |     std::unordered_map<uint32_t, RegisteredPBO> registeredPBOs;
 371 | };
 372 | #endif
 373 | 
 374 | 
 375 | #ifdef NVPIPE_WITH_ENCODER
 376 | 
 377 | inline std::string EncErrorCodeToString(NVENCSTATUS code)
 378 | {
 379 |     std::vector<std::string> errors = {
 380 |         "NV_ENC_SUCCESS",
 381 |         "NV_ENC_ERR_NO_ENCODE_DEVICE",
 382 |         "NV_ENC_ERR_UNSUPPORTED_DEVICE",
 383 |         "NV_ENC_ERR_INVALID_ENCODERDEVICE",
 384 |         "NV_ENC_ERR_INVALID_DEVICE",
 385 |         "NV_ENC_ERR_DEVICE_NOT_EXIST",
 386 |         "NV_ENC_ERR_INVALID_PTR",
 387 |         "NV_ENC_ERR_INVALID_EVENT",
 388 |         "NV_ENC_ERR_INVALID_PARAM",
 389 |         "NV_ENC_ERR_INVALID_CALL",
 390 |         "NV_ENC_ERR_OUT_OF_MEMORY",
 391 |         "NV_ENC_ERR_ENCODER_NOT_INITIALIZED",
 392 |         "NV_ENC_ERR_UNSUPPORTED_PARAM",
 393 |         "NV_ENC_ERR_LOCK_BUSY",
 394 |         "NV_ENC_ERR_NOT_ENOUGH_BUFFER",
 395 |         "NV_ENC_ERR_INVALID_VERSION",
 396 |         "NV_ENC_ERR_MAP_FAILED",
 397 |         "NV_ENC_ERR_NEED_MORE_INPUT",
 398 |         "NV_ENC_ERR_ENCODER_BUSY",
 399 |         "NV_ENC_ERR_EVENT_NOT_REGISTERD",
 400 |         "NV_ENC_ERR_GENERIC",
 401 |         "NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY",
 402 |         "NV_ENC_ERR_UNIMPLEMENTED",
 403 |         "NV_ENC_ERR_RESOURCE_REGISTER_FAILED",
 404 |         "NV_ENC_ERR_RESOURCE_NOT_REGISTERED",
 405 |         "NV_ENC_ERR_RESOURCE_NOT_MAPPED"
 406 |     };
 407 | 
 408 |     if (code >= 0 && code < errors.size())
 409 |         return errors[code];
 410 | 
 411 |     return "Unknown error code";
 412 | }
 413 | 
 414 | /**
 415 |  * @brief Encoder implementation.
 416 |  */
 417 | class Encoder
 418 | {
 419 | public:
 420 |     Encoder(NvPipe_Format format, NvPipe_Codec codec, NvPipe_Compression compression, uint64_t bitrate, uint32_t targetFrameRate, uint32_t width, uint32_t height)
 421 |     {
 422 |         this->format = format;
 423 |         this->codec = codec;
 424 |         this->compression = compression;
 425 |         this->bitrate = bitrate;
 426 |         this->targetFrameRate = targetFrameRate;
 427 | 
 428 |         this->recreate(width, height);
 429 |     }
 430 | 
 431 |     ~Encoder()
 432 |     {
 433 |       // Destroy encoder
 434 |       if (this->encoder)
 435 |       {
 436 |         std::vector<std::vector<uint8_t>> tmp;
 437 |         this->encoder->EndEncode(tmp);
 438 |         this->encoder->DestroyEncoder();
 439 |         this->encoder.reset();
 440 |       }
 441 | 
 442 |         // Free temporary device memory
 443 |         if (this->deviceBuffer)
 444 |             cudaFree(this->deviceBuffer);
 445 |     }
 446 | 
 447 |     void setBitrate(uint64_t bitrate, uint32_t targetFrameRate)
 448 |     {
 449 |         NV_ENC_CONFIG config;
 450 |         memset(&config, 0, sizeof(config));
 451 |         config.version = NV_ENC_CONFIG_VER;
 452 |         config.rcParams.averageBitRate = bitrate;
 453 | 
 454 |         NV_ENC_RECONFIGURE_PARAMS reconfigureParams;
 455 |         memset(&reconfigureParams, 0, sizeof(reconfigureParams));
 456 |         reconfigureParams.version = NV_ENC_RECONFIGURE_PARAMS_VER;
 457 |         reconfigureParams.resetEncoder = 1;
 458 |         reconfigureParams.forceIDR = 1;
 459 |         reconfigureParams.reInitEncodeParams.encodeConfig = &config;
 460 | 
 461 |         encoder->GetInitializeParams(&reconfigureParams.reInitEncodeParams);
 462 |         reconfigureParams.reInitEncodeParams.frameRateNum = targetFrameRate;
 463 |         reconfigureParams.reInitEncodeParams.frameRateDen = 1;
 464 | 
 465 |         encoder->Reconfigure(&reconfigureParams);
 466 | 
 467 |         this->bitrate = bitrate;
 468 |         this->targetFrameRate = targetFrameRate;
 469 |     }
 470 | 
 471 |     uint64_t encode(const void* src, uint64_t srcPitch, uint8_t *dst, uint64_t dstSize, uint32_t width, uint32_t height, bool forceIFrame)
 472 |     {
 473 |         // Recreate encoder if size changed
 474 |         if (this->format == NVPIPE_UINT16)
 475 |             this->recreate(width * 2, height); // split into two adjecent tiles in Y channel
 476 |         else if (this->format == NVPIPE_UINT32)
 477 |             this->recreate(width * 4, height); // split into four adjecent tiles in Y channel
 478 |         else
 479 |             this->recreate(width, height);
 480 | 
 481 |         // RGBA can be directly copied from host or device
 482 |         if (this->format == NVPIPE_RGBA32)
 483 |         {
 484 |             const NvEncInputFrame* f = this->encoder->GetNextInputFrame();
 485 |             CUDA_THROW(cudaMemcpy2D(f->inputPtr, f->pitch, src, srcPitch, width * 4, height, isDevicePointer(src) ? cudaMemcpyDeviceToDevice : cudaMemcpyHostToDevice),
 486 |                 "Failed to copy input frame");
 487 |         }
 488 |         // Other formats need to be copied to the device and converted
 489 |         else
 490 |         {
 491 |             // Copy to device if necessary
 492 |             bool copyToDevice = !isDevicePointer(src);
 493 |             if (copyToDevice)
 494 |             {
 495 |                 this->recreateDeviceBuffer(width, height);
 496 |                 CUDA_THROW(cudaMemcpy(this->deviceBuffer, src, getFrameSize(this->format, width, height), cudaMemcpyHostToDevice),
 497 |                     "Failed to copy input frame");
 498 |             }
 499 | 
 500 |             // Convert
 501 |             const NvEncInputFrame* f = this->encoder->GetNextInputFrame();
 502 | 
 503 |             if (this->format == NVPIPE_UINT4)
 504 |             {
 505 |                 // one thread per pixel (extract 4 bit and copy to 8 bit)
 506 |                 dim3 gridSize(width / 16 + 1, height / 2 + 1);
 507 |                 dim3 blockSize(16, 2);
 508 | 
 509 |                 uint4_to_nv12 << <gridSize, blockSize >> > ((uint8_t*)(copyToDevice ? this->deviceBuffer : src), srcPitch, (uint8_t*)f->inputPtr, f->pitch, width, height);
 510 |             }
 511 |             else if (this->format == NVPIPE_UINT8)
 512 |             {
 513 |                 // one thread per pixel (copy 8 bit)
 514 |                 dim3 gridSize(width / 16 + 1, height / 2 + 1);
 515 |                 dim3 blockSize(16, 2);
 516 | 
 517 |                 uint8_to_nv12 << <gridSize, blockSize >> > ((uint8_t*)(copyToDevice ? this->deviceBuffer : src), srcPitch, (uint8_t*)f->inputPtr, f->pitch, width, height);
 518 |             }
 519 |             else if (this->format == NVPIPE_UINT16)
 520 |             {
 521 |                 // one thread per pixel (split 16 bit into 2x 8 bit)
 522 |                 dim3 gridSize(width / 16 + 1, height / 2 + 1);
 523 |                 dim3 blockSize(16, 2);
 524 | 
 525 |                 uint16_to_nv12 << <gridSize, blockSize >> > ((uint8_t*)(copyToDevice ? this->deviceBuffer : src), srcPitch, (uint8_t*)f->inputPtr, f->pitch, width, height);
 526 |             }
 527 |             else if (this->format == NVPIPE_UINT32)
 528 |             {
 529 |                 // one thread per pixel (split 32 bit into 4x 8 bit)
 530 |                 dim3 gridSize(width / 16 + 1, height / 2 + 1);
 531 |                 dim3 blockSize(16, 2);
 532 | 
 533 |                 uint32_to_nv12 << <gridSize, blockSize >> > ((uint8_t*)(copyToDevice ? this->deviceBuffer : src), srcPitch, (uint8_t*)f->inputPtr, f->pitch, width, height);
 534 |             }
 535 |         }
 536 | 
 537 |         // Encode
 538 |         return this->encode(dst, dstSize, forceIFrame);
 539 |     }
 540 | 
 541 | #ifdef NVPIPE_WITH_OPENGL
 542 | 
 543 |     uint64_t encodeTexture(uint32_t texture, uint32_t target, uint8_t* dst, uint64_t dstSize, uint32_t width, uint32_t height, bool forceIFrame)
 544 |     {
 545 |         if (this->format != NVPIPE_RGBA32)
 546 |             throw Exception("The OpenGL interface only supports the RGBA32 format");
 547 | 
 548 |         // Recreate encoder if size changed
 549 |         this->recreate(width, height);
 550 | 
 551 |         // Map texture and copy input to encoder
 552 |         cudaGraphicsResource_t resource = this->registry.getTextureGraphicsResource(texture, target, width, height, cudaGraphicsRegisterFlagsReadOnly);
 553 |         CUDA_THROW(cudaGraphicsMapResources(1, &resource),
 554 |             "Failed to map texture graphics resource");
 555 |         cudaArray_t array;
 556 |         CUDA_THROW(cudaGraphicsSubResourceGetMappedArray(&array, resource, 0, 0),
 557 |             "Failed get texture graphics resource array");
 558 | 
 559 |         const NvEncInputFrame* f = this->encoder->GetNextInputFrame();
 560 |         CUDA_THROW(cudaMemcpy2DFromArray(f->inputPtr, f->pitch, array, 0, 0, width * 4, height, cudaMemcpyDeviceToDevice),
 561 |             "Failed to copy from texture array");
 562 | 
 563 |         // Encode
 564 |         uint64_t size = this->encode(dst, dstSize, forceIFrame);
 565 | 
 566 |         // Unmap texture
 567 |         CUDA_THROW(cudaGraphicsUnmapResources(1, &resource),
 568 |             "Failed to unmap texture graphics resource");
 569 | 
 570 |         return size;
 571 |     }
 572 | 
 573 |     uint64_t encodePBO(uint32_t pbo, uint8_t* dst, uint64_t dstSize, uint32_t width, uint32_t height, bool forceIFrame)
 574 |     {
 575 |         if (this->format != NVPIPE_RGBA32)
 576 |             throw Exception("The OpenGL interface only supports the RGBA32 format");
 577 | 
 578 |         // Map PBO and copy input to encoder
 579 |         cudaGraphicsResource_t resource = this->registry.getPBOGraphicsResource(pbo, width, height, cudaGraphicsRegisterFlagsReadOnly);
 580 |         CUDA_THROW(cudaGraphicsMapResources(1, &resource),
 581 |             "Failed to map PBO graphics resource");
 582 |         void* pboPointer;
 583 |         size_t pboSize;
 584 |         CUDA_THROW(cudaGraphicsResourceGetMappedPointer(&pboPointer, &pboSize, resource),
 585 |             "Failed to get mapped PBO pointer");
 586 | 
 587 |         // Encode
 588 |         uint64_t size = this->encode(pboPointer, width * 4, dst, dstSize, width, height, forceIFrame);
 589 | 
 590 |         // Unmap PBO
 591 |         CUDA_THROW(cudaGraphicsUnmapResources(1, &resource),
 592 |             "Failed to unmap PBO graphics resource");
 593 | 
 594 |         return size;
 595 |     }
 596 | 
 597 | #endif
 598 | 
 599 | private:
 600 |     void recreate(uint32_t width, uint32_t height)
 601 |     {
 602 |         std::lock_guard<std::mutex> lock(Encoder::mutex);
 603 | 
 604 |         // Only recreate if necessary
 605 |         if (width == this->width && height == this->height)
 606 |             return;
 607 | 
 608 |         this->width = width;
 609 |         this->height = height;
 610 | 
 611 |         // Ensure we have a CUDA context
 612 |         CUDA_THROW(cudaDeviceSynchronize(),
 613 |             "Failed to synchronize device");
 614 |         CUcontext cudaContext;
 615 |         cuCtxGetCurrent(&cudaContext);
 616 | 
 617 |         // Create encoder
 618 |         try
 619 |         {
 620 |             // Destroy previous encoder
 621 |             if (this->encoder)
 622 |             {
 623 |                 std::vector<std::vector<uint8_t>> tmp;
 624 |                 this->encoder->EndEncode(tmp);
 625 |                 this->encoder->DestroyEncoder();
 626 |                 this->encoder.reset();
 627 |             }
 628 | 
 629 |             NV_ENC_BUFFER_FORMAT bufferFormat = (this->format == NVPIPE_RGBA32) ? NV_ENC_BUFFER_FORMAT_ABGR : NV_ENC_BUFFER_FORMAT_NV12;
 630 |             this->encoder = std::unique_ptr<NvEncoderCuda>(new NvEncoderCuda(cudaContext, width, height, bufferFormat, 0));
 631 | 
 632 |             NV_ENC_INITIALIZE_PARAMS initializeParams = { NV_ENC_INITIALIZE_PARAMS_VER };
 633 |             NV_ENC_CONFIG encodeConfig = { NV_ENC_CONFIG_VER };
 634 |             initializeParams.encodeConfig = &encodeConfig;
 635 | 
 636 |             GUID codecGUID = (this->codec == NVPIPE_HEVC) ? NV_ENC_CODEC_HEVC_GUID : NV_ENC_CODEC_H264_GUID;
 637 | 
 638 |             GUID presetGUID = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID;
 639 |             if (this->compression == NVPIPE_LOSSLESS)
 640 |                 presetGUID = NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID; // NV_ENC_PRESET_LOSSLESS_HP_GUID
 641 | 
 642 |             encoder->CreateDefaultEncoderParams(&initializeParams, codecGUID, presetGUID);
 643 | 
 644 |             initializeParams.encodeWidth = width;
 645 |             initializeParams.encodeHeight = height;
 646 |             initializeParams.frameRateNum = this->targetFrameRate;
 647 |             initializeParams.frameRateDen = 1;
 648 |             initializeParams.enablePTD = 1;
 649 | 
 650 |             encodeConfig.gopLength = NVENC_INFINITE_GOPLENGTH; // No B-frames
 651 |             encodeConfig.frameIntervalP = 1;
 652 | 
 653 |             if (this->codec == NVPIPE_H264)
 654 |                 encodeConfig.encodeCodecConfig.h264Config.idrPeriod = NVENC_INFINITE_GOPLENGTH;
 655 |             else if (this->codec == NVPIPE_HEVC)
 656 |                 encodeConfig.encodeCodecConfig.hevcConfig.idrPeriod = NVENC_INFINITE_GOPLENGTH;
 657 | 
 658 |             if (this->compression == NVPIPE_LOSSY)
 659 |             {
 660 |                 encodeConfig.rcParams.averageBitRate = this->bitrate;
 661 |                 encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ;
 662 |                 encodeConfig.rcParams.vbvBufferSize = encodeConfig.rcParams.averageBitRate * initializeParams.frameRateDen / initializeParams.frameRateNum; // bitrate / framerate = one frame
 663 |                 encodeConfig.rcParams.maxBitRate = encodeConfig.rcParams.averageBitRate;
 664 |                 encodeConfig.rcParams.vbvInitialDelay = encodeConfig.rcParams.vbvBufferSize;
 665 |             }
 666 | 
 667 |             encoder->CreateEncoder(&initializeParams);
 668 |         }
 669 |         catch (NVENCException& e)
 670 |         {
 671 |             throw Exception("Failed to create encoder (" + e.getErrorString() + ", error " + std::to_string(e.getErrorCode()) + " = " + EncErrorCodeToString(e.getErrorCode()) + ")");
 672 |         }
 673 |     }
 674 | 
 675 |     uint64_t encode(uint8_t* dst, uint64_t dstSize, bool forceIFrame)
 676 |     {
 677 |         std::vector<std::vector<uint8_t>> packets;
 678 | 
 679 |         try
 680 |         {
 681 |             if (forceIFrame)
 682 |             {
 683 |                 NV_ENC_PIC_PARAMS params = {};
 684 |                 params.encodePicFlags = NV_ENC_PIC_FLAG_FORCEIDR | NV_ENC_PIC_FLAG_OUTPUT_SPSPPS;
 685 | 
 686 |                 this->encoder->EncodeFrame(packets, &params);
 687 |             }
 688 |             else
 689 |             {
 690 |                 this->encoder->EncodeFrame(packets);
 691 |             }
 692 |         }
 693 |         catch (NVENCException& e)
 694 |         {
 695 |             throw Exception("Encode failed (" + e.getErrorString() + ", error " + std::to_string(e.getErrorCode()) + " = " + EncErrorCodeToString(e.getErrorCode()) + ")");
 696 |         }
 697 | 
 698 |         // Copy output
 699 |         uint64_t size = 0;
 700 |         for (auto& p : packets)
 701 |         {
 702 |             if (size + p.size() <= dstSize)
 703 |             {
 704 |                 memcpy(dst + size, p.data(), p.size());
 705 |                 size += p.size();
 706 |             }
 707 |             else
 708 |             {
 709 |                 throw Exception("Encode output buffer overflow");
 710 |             }
 711 |         }
 712 | 
 713 |         return size;
 714 |     }
 715 | 
 716 |     void recreateDeviceBuffer(uint32_t width, uint32_t height)
 717 |     {
 718 |         // (Re)allocate temporary device memory if necessary
 719 |         uint64_t requiredSize = getFrameSize(this->format, width, height);
 720 | 
 721 |         if (this->deviceBufferSize < requiredSize)
 722 |         {
 723 |             if (this->deviceBuffer)
 724 |                 cudaFree(this->deviceBuffer);
 725 | 
 726 |             this->deviceBufferSize = requiredSize;
 727 |             CUDA_THROW(cudaMalloc(&this->deviceBuffer, this->deviceBufferSize),
 728 |                 "Failed to allocate temporary device memory");
 729 |         }
 730 |     }
 731 | 
 732 | private:
 733 |     NvPipe_Format format;
 734 |     NvPipe_Codec codec;
 735 |     NvPipe_Compression compression;
 736 |     uint64_t bitrate;
 737 |     uint32_t targetFrameRate;
 738 |     uint32_t width = 0;
 739 |     uint32_t height = 0;
 740 | 
 741 |     std::unique_ptr<NvEncoderCuda> encoder;
 742 | 
 743 |     void* deviceBuffer = nullptr;
 744 |     uint64_t deviceBufferSize = 0;
 745 | 
 746 |     static std::mutex mutex;
 747 | 
 748 | #ifdef NVPIPE_WITH_OPENGL
 749 |     GraphicsResourceRegistry registry;
 750 | #endif
 751 | };
 752 | 
 753 | std::mutex Encoder::mutex;
 754 | 
 755 | #endif
 756 | 
 757 | 
 758 | #ifdef NVPIPE_WITH_DECODER
 759 | 
 760 | inline std::string DecErrorCodeToString(CUresult code)
 761 | {
 762 |     const char* str = nullptr;
 763 |     cuGetErrorName(code, &str);
 764 | 
 765 |     if (str)
 766 |         return std::string(str);
 767 | 
 768 |     return "Unknown error code";
 769 | }
 770 | 
 771 | /**
 772 |  * @brief Decoder implementation.
 773 |  */
 774 | class Decoder
 775 | {
 776 | public:
 777 |     Decoder(NvPipe_Format format, NvPipe_Codec codec, uint32_t width, uint32_t height)
 778 |     {
 779 |         this->format = format;
 780 |         this->codec = codec;
 781 | 
 782 |         this->recreate(width, height);
 783 |     }
 784 | 
 785 |     ~Decoder()
 786 |     {
 787 |         // Free temporary device memory
 788 |         if (this->deviceBuffer)
 789 |             cudaFree(this->deviceBuffer);
 790 |     }
 791 | 
 792 |     uint64_t decode(const uint8_t* src, uint64_t srcSize, void* dst, uint32_t width, uint32_t height)
 793 |     {
 794 |         // Recreate decoder if size changed
 795 |         if (this->format == NVPIPE_UINT16)
 796 |             this->recreate(width * 2, height); // split into two adjecent tiles in Y channel
 797 |         else if (this->format == NVPIPE_UINT32)
 798 |             this->recreate(width * 4, height); // split into four adjecent tiles in Y channel
 799 |         else
 800 |             this->recreate(width, height);
 801 | 
 802 |         // Decode
 803 |         uint8_t* decoded = this->decode(src, srcSize);
 804 | 
 805 |         if (nullptr != decoded)
 806 |         {
 807 |             // Allocate temporary device buffer if we need to copy to the host eventually
 808 |             bool copyToHost = !isDevicePointer(dst);
 809 |             if (copyToHost)
 810 |                 this->recreateDeviceBuffer(width, height);
 811 | 
 812 |             // Convert to output format
 813 |             uint8_t* dstDevice = (uint8_t*)(copyToHost ? this->deviceBuffer : dst);
 814 | 
 815 |             if (this->format == NVPIPE_RGBA32)
 816 |             {
 817 |                 Nv12ToColor32<RGBA32>(decoded, width, dstDevice, width * 4, width, height);
 818 |             }
 819 |             else if (this->format == NVPIPE_UINT4)
 820 |             {
 821 |                 // one thread per TWO pixels (merge 2x4 bit to one byte per thread)
 822 |                 dim3 gridSize(width / 16 / 2 + 1, height / 2 + 1);
 823 |                 dim3 blockSize(16, 2);
 824 | 
 825 |                 nv12_to_uint4 << <gridSize, blockSize >> > (decoded, this->decoder->GetDeviceFramePitch(), dstDevice, width / 2, width, height);
 826 |             }
 827 |             else if (this->format == NVPIPE_UINT8)
 828 |             {
 829 |                 // one thread per pixel (copy 8 bit)
 830 |                 dim3 gridSize(width / 16 + 1, height / 2 + 1);
 831 |                 dim3 blockSize(16, 2);
 832 | 
 833 |                 nv12_to_uint8 << <gridSize, blockSize >> > (decoded, this->decoder->GetDeviceFramePitch(), dstDevice, width, width, height);
 834 |             }
 835 |             else if (this->format == NVPIPE_UINT16)
 836 |             {
 837 |                 // one thread per pixel (merge 2x8 bit into 16 bit pixels)
 838 |                 dim3 gridSize(width / 16 + 1, height / 2 + 1);
 839 |                 dim3 blockSize(16, 2);
 840 | 
 841 |                 nv12_to_uint16 << <gridSize, blockSize >> > (decoded, this->decoder->GetDeviceFramePitch(), dstDevice, width * 2, width, height);
 842 |             }
 843 |             else if (this->format == NVPIPE_UINT32)
 844 |             {
 845 |                 // one thread per pixel (merge 4x8 bit into 32 bit pixels)
 846 |                 dim3 gridSize(width / 16 + 1, height / 2 + 1);
 847 |                 dim3 blockSize(16, 2);
 848 | 
 849 |                 nv12_to_uint32 << <gridSize, blockSize >> > (decoded, this->decoder->GetDeviceFramePitch(), dstDevice, width * 4, width, height);
 850 |             }
 851 | 
 852 |             // Copy to host if necessary
 853 |             if (copyToHost)
 854 |                 CUDA_THROW(cudaMemcpy(dst, this->deviceBuffer, getFrameSize(this->format, width, height), cudaMemcpyDeviceToHost),
 855 |                     "Failed to copy output to host memory");
 856 | 
 857 |             return getFrameSize(this->format, width, height);
 858 |         }
 859 | 
 860 |         return 0;
 861 |     }
 862 | 
 863 | #ifdef NVPIPE_WITH_OPENGL
 864 | 
 865 |     uint64_t decodeTexture(const uint8_t* src, uint64_t srcSize, uint32_t texture, uint32_t target, uint32_t width, uint32_t height)
 866 |     {
 867 |         if (this->format != NVPIPE_RGBA32)
 868 |             throw Exception("The OpenGL interface only supports the RGBA32 format");
 869 | 
 870 |         // Recreate decoder if size changed
 871 |         this->recreate(width, height);
 872 | 
 873 |         // Decode
 874 |         uint8_t* decoded = this->decode(src, srcSize);
 875 | 
 876 |         if (nullptr != decoded)
 877 |         {
 878 |             // Convert to RGBA
 879 |             this->recreateDeviceBuffer(width, height);
 880 |             Nv12ToColor32<RGBA32>(decoded, width, (uint8_t*)this->deviceBuffer, width * 4, width, height);
 881 | 
 882 |             // Copy output to texture
 883 |             cudaGraphicsResource_t resource = this->registry.getTextureGraphicsResource(texture, target, width, height, cudaGraphicsRegisterFlagsWriteDiscard);
 884 |             CUDA_THROW(cudaGraphicsMapResources(1, &resource),
 885 |                 "Failed to map texture graphics resource");
 886 |             cudaArray_t array;
 887 |             CUDA_THROW(cudaGraphicsSubResourceGetMappedArray(&array, resource, 0, 0),
 888 |                 "Failed get texture graphics resource array");
 889 |             CUDA_THROW(cudaMemcpy2DToArray(array, 0, 0, this->deviceBuffer, width * 4, width * 4, height, cudaMemcpyDeviceToDevice),
 890 |                 "Failed to copy to texture array");
 891 |             CUDA_THROW(cudaGraphicsUnmapResources(1, &resource),
 892 |                 "Failed to unmap texture graphics resource");
 893 | 
 894 |             return width * height * 4;
 895 |         }
 896 | 
 897 |         return 0;
 898 |     }
 899 | 
 900 |     uint64_t decodePBO(const uint8_t* src, uint64_t srcSize, uint32_t pbo, uint32_t width, uint32_t height)
 901 |     {
 902 |         if (this->format != NVPIPE_RGBA32)
 903 |             throw Exception("The OpenGL interface only supports the RGBA32 format");
 904 | 
 905 |         // Map PBO for output
 906 |         cudaGraphicsResource_t resource = this->registry.getPBOGraphicsResource(pbo, width, height, cudaGraphicsRegisterFlagsWriteDiscard);
 907 |         CUDA_THROW(cudaGraphicsMapResources(1, &resource),
 908 |             "Failed to map PBO graphics resource");
 909 |         void* pboPointer;
 910 |         size_t pboSize;
 911 |         CUDA_THROW(cudaGraphicsResourceGetMappedPointer(&pboPointer, &pboSize, resource),
 912 |             "Failed to get mapped PBO pointer");
 913 | 
 914 |         // Decode
 915 |         uint64_t size = this->decode(src, srcSize, pboPointer, width, height);
 916 | 
 917 |         // Unmap PBO
 918 |         CUDA_THROW(cudaGraphicsUnmapResources(1, &resource),
 919 |             "Failed to unmap PBO graphics resource");
 920 | 
 921 |         return size;
 922 |     }
 923 | 
 924 | #endif
 925 | 
 926 | private:
 927 |     void recreate(uint32_t width, uint32_t height)
 928 |     {
 929 |         std::lock_guard<std::mutex> lock(Decoder::mutex);
 930 | 
 931 |         // Only recreate if necessary
 932 |         if (width == this->width && height == this->height)
 933 |             return;
 934 | 
 935 |         this->width = width;
 936 |         this->height = height;
 937 | 
 938 |         // Ensure we have a CUDA context
 939 |         CUDA_THROW(cudaDeviceSynchronize(),
 940 |             "Failed to synchronize device");
 941 |         CUcontext cudaContext;
 942 |         cuCtxGetCurrent(&cudaContext);
 943 | 
 944 |         // Create decoder
 945 |         try
 946 |         {
 947 |             // Destroy previous decoder
 948 |             if (this->decoder)
 949 |             {
 950 |                 this->decoder.reset();
 951 |             }
 952 | 
 953 |             this->decoder = std::unique_ptr<NvDecoder>(new NvDecoder(cudaContext, true, (this->codec == NVPIPE_HEVC) ? cudaVideoCodec_HEVC : cudaVideoCodec_H264,/* &Decoder::mutex*/ nullptr, true));
 954 |         }
 955 |         catch (NVDECException& e)
 956 |         {
 957 |             throw Exception("Failed to create decoder (" + e.getErrorString() + ", error " + std::to_string(e.getErrorCode()) + " = " + DecErrorCodeToString(e.getErrorCode()) + ")");
 958 |         }
 959 |     }
 960 | 
 961 |     uint8_t* decode(const uint8_t* src, uint64_t srcSize)
 962 |     {
 963 |         int numFramesDecoded = 0;
 964 |         uint8_t **decodedFrames;
 965 |         int64_t *timeStamps;
 966 | 
 967 |         try
 968 |         {
 969 |             // Some cuvid implementations have one frame latency. Refeed frame into pipeline in this case.
 970 |             const uint32_t DECODE_TRIES = 3;
 971 |             for (uint32_t i = 0; (i < DECODE_TRIES) && (numFramesDecoded <= 0); ++i)
 972 |                 this->decoder->Decode(src, srcSize, &decodedFrames, &numFramesDecoded, CUVID_PKT_ENDOFPICTURE, &timeStamps, this->n++);
 973 |         }
 974 |         catch (NVDECException& e)
 975 |         {
 976 |             throw Exception("Decode failed (" + e.getErrorString() + ", error " + std::to_string(e.getErrorCode()) + " = " + DecErrorCodeToString(e.getErrorCode()) + ")");
 977 |         }
 978 | 
 979 |         if (numFramesDecoded <= 0)
 980 |         {
 981 |             throw Exception("No frame decoded (Decoder expects encoded bitstream for a single complete frame. Accumulating partial data or combining multiple frames is not supported.)");
 982 |         }
 983 | 
 984 |         return decodedFrames[numFramesDecoded - 1];
 985 |     }
 986 | 
 987 |     void recreateDeviceBuffer(uint32_t width, uint32_t height)
 988 |     {
 989 |         // (Re)allocate temporary device memory if necessary
 990 |         uint64_t requiredSize = getFrameSize(this->format, width, height);
 991 | 
 992 |         if (this->deviceBufferSize < requiredSize)
 993 |         {
 994 |             if (this->deviceBuffer)
 995 |                 cudaFree(this->deviceBuffer);
 996 | 
 997 |             this->deviceBufferSize = requiredSize;
 998 |             CUDA_THROW(cudaMalloc(&this->deviceBuffer, this->deviceBufferSize),
 999 |                 "Failed to allocate temporary device memory");
1000 |         }
1001 |     }
1002 | 
1003 | private:
1004 |     NvPipe_Format format;
1005 |     NvPipe_Codec codec;
1006 |     uint32_t width = 0;
1007 |     uint32_t height = 0;
1008 | 
1009 |     std::unique_ptr<NvDecoder> decoder;
1010 |     int64_t n = 0;
1011 | 
1012 |     void* deviceBuffer = nullptr;
1013 |     uint64_t deviceBufferSize = 0;
1014 | 
1015 |     static std::mutex mutex;
1016 | 
1017 | #ifdef NVPIPE_WITH_OPENGL
1018 |     GraphicsResourceRegistry registry;
1019 | #endif
1020 | };
1021 | 
1022 | std::mutex Decoder::mutex;
1023 | 
1024 | #endif
1025 | 
1026 | 
1027 | 
1028 | 
1029 | // --------- Exported C API ---------
1030 | 
1031 | // NvPipe was originally developed as a C++ library.
1032 | // However, for compatibility reasons its functionality is now exposed as a plain C API.
1033 | 
1034 | struct Instance
1035 | {
1036 | #ifdef NVPIPE_WITH_ENCODER
1037 |     std::unique_ptr<Encoder> encoder;
1038 | #endif
1039 | 
1040 | #ifdef NVPIPE_WITH_DECODER
1041 |     std::unique_ptr<Decoder> decoder;
1042 | #endif
1043 | 
1044 |     std::string error;
1045 | };
1046 | 
1047 | std::string sharedError; // shared error code for create functions (NOT threadsafe)
1048 | 
1049 | 
1050 | #ifdef NVPIPE_WITH_ENCODER
1051 | 
1052 | NVPIPE_EXPORT NvPipe* NvPipe_CreateEncoder(NvPipe_Format format, NvPipe_Codec codec, NvPipe_Compression compression, uint64_t bitrate, uint32_t targetFrameRate, uint32_t width, uint32_t height)
1053 | {
1054 |     Instance* instance = new Instance();
1055 | 
1056 |     try
1057 |     {
1058 |         instance->encoder = std::unique_ptr<Encoder>(new Encoder(format, codec, compression, bitrate, targetFrameRate, width, height));
1059 |     }
1060 |     catch (Exception& e)
1061 |     {
1062 |         sharedError = e.getErrorString();
1063 |         delete instance;
1064 |         return nullptr;
1065 |     }
1066 | 
1067 |     return instance;
1068 | }
1069 | 
1070 | NVPIPE_EXPORT void NvPipe_SetBitrate(NvPipe* nvp, uint64_t bitrate, uint32_t targetFrameRate)
1071 | {
1072 |     Instance* instance = static_cast<Instance*>(nvp);
1073 |     if (!instance->encoder)
1074 |     {
1075 |         instance->error = "Invalid NvPipe encoder.";
1076 |         return;
1077 |     }
1078 | 
1079 |     try
1080 |     {
1081 |         return instance->encoder->setBitrate(bitrate, targetFrameRate);
1082 |     }
1083 |     catch (Exception& e)
1084 |     {
1085 |         instance->error = e.getErrorString();
1086 |     }
1087 | }
1088 | 
1089 | NVPIPE_EXPORT uint64_t NvPipe_Encode(NvPipe* nvp, const void* src, uint64_t srcPitch, uint8_t* dst, uint64_t dstSize, uint32_t width, uint32_t height, bool forceIFrame)
1090 | {
1091 |     Instance* instance = static_cast<Instance*>(nvp);
1092 |     if (!instance->encoder)
1093 |     {
1094 |         instance->error = "Invalid NvPipe encoder.";
1095 |         return 0;
1096 |     }
1097 | 
1098 |     try
1099 |     {
1100 |         return instance->encoder->encode(src, srcPitch, dst, dstSize, width, height, forceIFrame);
1101 |     }
1102 |     catch (Exception& e)
1103 |     {
1104 |         instance->error = e.getErrorString();
1105 |         return 0;
1106 |     }
1107 | }
1108 | 
1109 | #ifdef NVPIPE_WITH_OPENGL
1110 | 
1111 | NVPIPE_EXPORT uint64_t NvPipe_EncodeTexture(NvPipe* nvp, uint32_t texture, uint32_t target, uint8_t* dst, uint64_t dstSize, uint32_t width, uint32_t height, bool forceIFrame)
1112 | {
1113 |     Instance* instance = static_cast<Instance*>(nvp);
1114 |     if (!instance->encoder)
1115 |     {
1116 |         instance->error = "Invalid NvPipe encoder.";
1117 |         return 0;
1118 |     }
1119 | 
1120 |     try
1121 |     {
1122 |         return instance->encoder->encodeTexture(texture, target, dst, dstSize, width, height, forceIFrame);
1123 |     }
1124 |     catch (Exception& e)
1125 |     {
1126 |         instance->error = e.getErrorString();
1127 |         return 0;
1128 |     }
1129 | }
1130 | 
1131 | NVPIPE_EXPORT uint64_t NvPipe_EncodePBO(NvPipe* nvp, uint32_t pbo, uint8_t* dst, uint64_t dstSize, uint32_t width, uint32_t height, bool forceIFrame)
1132 | {
1133 |     Instance* instance = static_cast<Instance*>(nvp);
1134 |     if (!instance->encoder)
1135 |     {
1136 |         instance->error = "Invalid NvPipe encoder.";
1137 |         return 0;
1138 |     }
1139 | 
1140 |     try
1141 |     {
1142 |         return instance->encoder->encodePBO(pbo, dst, dstSize, width, height, forceIFrame);
1143 |     }
1144 |     catch (Exception& e)
1145 |     {
1146 |         instance->error = e.getErrorString();
1147 |         return 0;
1148 |     }
1149 | }
1150 | 
1151 | #endif
1152 | 
1153 | #endif
1154 | 
1155 | #ifdef NVPIPE_WITH_DECODER
1156 | 
1157 | NVPIPE_EXPORT NvPipe* NvPipe_CreateDecoder(NvPipe_Format format, NvPipe_Codec codec, uint32_t width, uint32_t height)
1158 | {
1159 |     Instance* instance = new Instance();
1160 | 
1161 |     try
1162 |     {
1163 |         instance->decoder = std::unique_ptr<Decoder>(new Decoder(format, codec, width, height));
1164 |     }
1165 |     catch (Exception& e)
1166 |     {
1167 |         sharedError = e.getErrorString();
1168 |         delete instance;
1169 |         return nullptr;
1170 |     }
1171 | 
1172 |     return instance;
1173 | }
1174 | 
1175 | NVPIPE_EXPORT uint64_t NvPipe_Decode(NvPipe* nvp, const uint8_t* src, uint64_t srcSize, void* dst, uint32_t width, uint32_t height)
1176 | {
1177 |     Instance* instance = static_cast<Instance*>(nvp);
1178 |     if (!instance->decoder)
1179 |     {
1180 |         instance->error = "Invalid NvPipe decoder.";
1181 |         return 0;
1182 |     }
1183 | 
1184 |     try
1185 |     {
1186 |         return instance->decoder->decode(src, srcSize, dst, width, height);
1187 |     }
1188 |     catch (Exception& e)
1189 |     {
1190 |         instance->error = e.getErrorString();
1191 |         return 0;
1192 |     }
1193 | }
1194 | 
1195 | #ifdef NVPIPE_WITH_OPENGL
1196 | 
1197 | NVPIPE_EXPORT uint64_t NvPipe_DecodeTexture(NvPipe* nvp, const uint8_t* src, uint64_t srcSize, uint32_t texture, uint32_t target, uint32_t width, uint32_t height)
1198 | {
1199 |     Instance* instance = static_cast<Instance*>(nvp);
1200 |     if (!instance->decoder)
1201 |     {
1202 |         instance->error = "Invalid NvPipe decoder.";
1203 |         return 0;
1204 |     }
1205 | 
1206 |     try
1207 |     {
1208 |         return instance->decoder->decodeTexture(src, srcSize, texture, target, width, height);
1209 |     }
1210 |     catch (Exception& e)
1211 |     {
1212 |         instance->error = e.getErrorString();
1213 |         return 0;
1214 |     }
1215 | }
1216 | 
1217 | NVPIPE_EXPORT uint64_t NvPipe_DecodePBO(NvPipe* nvp, const uint8_t* src, uint64_t srcSize, uint32_t pbo, uint32_t width, uint32_t height)
1218 | {
1219 |     Instance* instance = static_cast<Instance*>(nvp);
1220 |     if (!instance->decoder)
1221 |     {
1222 |         instance->error = "Invalid NvPipe decoder.";
1223 |         return 0;
1224 |     }
1225 | 
1226 |     try
1227 |     {
1228 |         return instance->decoder->decodePBO(src, srcSize, pbo, width, height);
1229 |     }
1230 |     catch (Exception& e)
1231 |     {
1232 |         instance->error = e.getErrorString();
1233 |         return 0;
1234 |     }
1235 | }
1236 | 
1237 | #endif
1238 | 
1239 | #endif
1240 | 
1241 | NVPIPE_EXPORT void NvPipe_Destroy(NvPipe* nvp)
1242 | {
1243 |     Instance* instance = static_cast<Instance*>(nvp);
1244 |     delete instance;
1245 | }
1246 | 
1247 | NVPIPE_EXPORT const char* NvPipe_GetError(NvPipe* nvp)
1248 | {
1249 |     if (nullptr == nvp)
1250 |         return sharedError.c_str();
1251 | 
1252 |     Instance* instance = static_cast<Instance*>(nvp);
1253 |     return instance->error.c_str();
1254 | }
1255 | 
1256 | 
1257 | 
1258 | 
1259 | 
1260 | 
1261 | 
1262 | 
1263 | 
1264 | 
1265 | 
1266 | 
1267 | 
1268 | 
1269 | 
1270 | 


--------------------------------------------------------------------------------
/src/NvPipe.h.in:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  2 |  *
  3 |  * Redistribution and use in source and binary forms, with or without
  4 |  * modification, are permitted provided that the following conditions
  5 |  * are met:
  6 |  *  * Redistributions of source code must retain the above copyright
  7 |  *    notice, this list of conditions and the following disclaimer.
  8 |  *  * Redistributions in binary form must reproduce the above copyright
  9 |  *    notice, this list of conditions and the following disclaimer in the
 10 |  *    documentation and/or other materials provided with the distribution.
 11 |  *  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 |  *    contributors may be used to endorse or promote products derived
 13 |  *    from this software without specific prior written permission.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 |  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 |  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 |  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 |  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 |  */
 27 | 
 28 | #ifndef NVPIPE_H
 29 | #define NVPIPE_H
 30 | 
 31 | #include <stdlib.h>
 32 | #include <stdint.h>
 33 | 
 34 | #cmakedefine NVPIPE_WITH_ENCODER
 35 | #cmakedefine NVPIPE_WITH_DECODER
 36 | #cmakedefine NVPIPE_WITH_OPENGL
 37 | 
 38 | #ifdef _WIN32
 39 | #	define NVPIPE_EXPORT __declspec(dllexport)
 40 | #else
 41 | #	define NVPIPE_EXPORT __attribute__((visibility("default")))
 42 | #endif
 43 | 
 44 | extern "C"
 45 | {
 46 | 
 47 | typedef void NvPipe;
 48 | 
 49 | 
 50 | /**
 51 |  * Available video codecs in NvPipe.
 52 |  */
 53 | typedef enum {
 54 |     NVPIPE_H264,
 55 |     NVPIPE_HEVC
 56 | } NvPipe_Codec;
 57 | 
 58 | 
 59 | /**
 60 |  * Compression type used for encoding. Lossless produces larger output.
 61 |  */
 62 | typedef enum {
 63 |     NVPIPE_LOSSY,
 64 |     NVPIPE_LOSSLESS
 65 | } NvPipe_Compression;
 66 | 
 67 | 
 68 | /**
 69 |  * Format of the input frame.
 70 |  */
 71 | typedef enum {
 72 |     NVPIPE_RGBA32,
 73 |     NVPIPE_UINT4,
 74 |     NVPIPE_UINT8,
 75 |     NVPIPE_UINT16,
 76 |     NVPIPE_UINT32
 77 | } NvPipe_Format;
 78 | 
 79 | 
 80 | #ifdef NVPIPE_WITH_ENCODER
 81 | 
 82 | /**
 83 |  * @brief Creates a new encoder instance.
 84 |  * @param format Format of input frame.
 85 |  * @param codec Possible codecs are H.264 and HEVC if available.
 86 |  * @param compression Lossy or lossless compression.
 87 |  * @param bitrate Bitrate in bit per second, e.g., 32 * 1000 * 1000 = 32 Mbps (for lossy compression only).
 88 |  * @param targetFrameRate At this frame rate the effective data rate approximately equals the bitrate (for lossy compression only).
 89 |  * @param width Initial width of the encoder.
 90 |  * @param height Initial height of the encoder.
 91 |  * @return NULL on error.
 92 |  */
 93 | NVPIPE_EXPORT NvPipe* NvPipe_CreateEncoder(NvPipe_Format format, NvPipe_Codec codec, NvPipe_Compression compression, uint64_t bitrate, uint32_t targetFrameRate, uint32_t width, uint32_t height);
 94 | 
 95 | 
 96 | /**
 97 |  * @brief Reconfigures the encoder with a new bitrate and target frame rate.
 98 |  * @param nvp Encoder instance.
 99 |  * @param bitrate Bitrate in bit per second, e.g., 32 * 1000 * 1000 = 32 Mbps (for lossy compression only).
100 |  * @param targetFrameRate At this frame rate the effective data rate approximately equals the bitrate (for lossy compression only).
101 |  */
102 | NVPIPE_EXPORT void NvPipe_SetBitrate(NvPipe* nvp, uint64_t bitrate, uint32_t targetFrameRate);
103 | 
104 | 
105 | /**
106 |  * @brief Encodes a single frame from device or host memory.
107 |  * @param nvp Encoder instance.
108 |  * @param src Device or host memory pointer.
109 |  * @param srcPitch Pitch of source memory.
110 |  * @param dst Host memory pointer for compressed output.
111 |  * @param dstSize Available space for compressed output.
112 |  * @param width Width of input frame in pixels.
113 |  * @param height Height of input frame in pixels.
114 |  * @param forceIFrame Enforces an I-frame instead of a P-frame.
115 |  * @return Size of encoded data in bytes or 0 on error.
116 |  */
117 | NVPIPE_EXPORT uint64_t NvPipe_Encode(NvPipe* nvp, const void* src, uint64_t srcPitch, uint8_t* dst, uint64_t dstSize, uint32_t width, uint32_t height, bool forceIFrame);
118 | 
119 | 
120 | #ifdef NVPIPE_WITH_OPENGL
121 | 
122 | /**
123 |  * @brief encodeTexture Encodes a single frame from an OpenGL texture.
124 |  * @param nvp Encoder instance.
125 |  * @param texture OpenGL texture ID.
126 |  * @param target OpenGL texture target.
127 |  * @param dst Host memory pointer for compressed output.
128 |  * @param dstSize Available space for compressed output. Will be overridden by effective compressed output size.
129 |  * @param width Width of frame in pixels.
130 |  * @param height Height of frame in pixels.
131 |  * @param forceIFrame Enforces an I-frame instead of a P-frame.
132 |  * @return Size of encoded data in bytes or 0 on error.
133 |  */
134 | NVPIPE_EXPORT uint64_t NvPipe_EncodeTexture(NvPipe* nvp, uint32_t texture, uint32_t target, uint8_t* dst, uint64_t dstSize, uint32_t width, uint32_t height, bool forceIFrame);
135 | 
136 | 
137 | /**
138 |  * @brief encodePBO Encodes a single frame from an OpenGL pixel buffer object (PBO).
139 |  * @param nvp Encoder instance.
140 |  * @param pbo OpenGL PBO ID.
141 |  * @param dst Host memory pointer for compressed output.
142 |  * @param dstSize Available space for compressed output. Will be overridden by effective compressed output size.
143 |  * @param width Width of frame in pixels.
144 |  * @param height Height of frame in pixels.
145 |   * @param forceIFrame Enforces an I-frame instead of a P-frame.
146 |  * @return Size of encoded data in bytes or 0 on error.
147 |  */
148 | NVPIPE_EXPORT uint64_t NvPipe_EncodePBO(NvPipe* nvp, uint32_t pbo, uint8_t* dst, uint64_t dstSize, uint32_t width, uint32_t height, bool forceIFrame);
149 | 
150 | #endif
151 | 
152 | #endif
153 | 
154 | #ifdef NVPIPE_WITH_DECODER
155 | 
156 | /**
157 |  * @brief Creates a new decoder instance.
158 |  * @param format Format of output frame.
159 |  * @param codec Possible codecs are H.264 and HEVC if available.
160 |  * @param width Initial width of the decoder.
161 |  * @param height Initial height of the decoder.
162 |  * @return NULL on error.
163 |  */
164 | NVPIPE_EXPORT NvPipe* NvPipe_CreateDecoder(NvPipe_Format format, NvPipe_Codec codec, uint32_t width, uint32_t height);
165 | 
166 | 
167 | /**
168 |  * @brief Decodes a single frame to device or host memory.
169 |  * @param nvp Decoder instance.
170 |  * @param src Compressed frame data in host memory.
171 |  * @param srcSize Size of compressed data.
172 |  * @param dst Device or host memory pointer.
173 |  * @param width Width of frame in pixels.
174 |  * @param height Height of frame in pixels.
175 |  * @return Size of decoded data in bytes or 0 on error.
176 |  */
177 | NVPIPE_EXPORT uint64_t NvPipe_Decode(NvPipe* nvp, const uint8_t* src, uint64_t srcSize, void* dst, uint32_t width, uint32_t height);
178 | 
179 | 
180 | #ifdef NVPIPE_WITH_OPENGL
181 | 
182 | /**
183 |  * @brief Decodes a single frame to an OpenGL texture.
184 |  * @param nvp Decoder instance.
185 |  * @param src Compressed frame data in host memory.
186 |  * @param srcSize Size of compressed data.
187 |  * @param texture OpenGL texture ID.
188 |  * @param target OpenGL texture target.
189 |  * @param width Width of frame in pixels.
190 |  * @param height Height of frame in pixels.
191 |  * @return Size of decoded data in bytes or 0 on error.
192 |  */
193 | NVPIPE_EXPORT uint64_t NvPipe_DecodeTexture(NvPipe* nvp, const uint8_t* src, uint64_t srcSize, uint32_t texture, uint32_t target, uint32_t width, uint32_t height);
194 | 
195 | 
196 | /**
197 |  * @brief Decodes a single frame to an OpenGL pixel buffer object (PBO).
198 |  * @param nvp Decoder instance.
199 |  * @param src Compressed frame data in host memory.
200 |  * @param srcSize Size of compressed data.
201 |  * @param pbo OpenGL PBO ID.
202 |  * @param width Width of frame in pixels.
203 |  * @param height Height of frame in pixels.
204 |  * @return Size of decoded data in bytes or 0 on error.
205 |  */
206 | NVPIPE_EXPORT uint64_t NvPipe_DecodePBO(NvPipe* nvp, const uint8_t* src, uint64_t srcSize, uint32_t pbo, uint32_t width, uint32_t height);
207 | 
208 | #endif
209 | 
210 | #endif
211 | 
212 | 
213 | /**
214 |  * @brief Cleans up an encoder or decoder instance.
215 |  * @param nvp The encoder or decoder instance to destroy.
216 |  */
217 | NVPIPE_EXPORT void NvPipe_Destroy(NvPipe* nvp);
218 | 
219 | 
220 | /**
221 |  * @brief Returns an error message for the last error that occured.
222 |  * @param nvp Encoder or decoder. Use NULL to get error message if encoder or decoder creation failed.
223 |  * @return Returned string must not be deleted.
224 |  */
225 | NVPIPE_EXPORT const char* NvPipe_GetError(NvPipe* nvp);
226 | 
227 | }
228 | 
229 | #endif
230 | 
231 | 


--------------------------------------------------------------------------------