├── .clang-format ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── auto-commits.yml │ ├── build.yml │ ├── docker.yml │ └── test.yml ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── Makefile ├── README.md ├── docker ├── Dockerfile └── Dockerfile.tensorrt ├── docs ├── Makefile ├── PyNvCodec.rst ├── PytorchNvCodec.rst ├── conf.py └── index.rst ├── package-lock.json ├── pyproject.toml ├── samples ├── SampleCupy.py ├── SampleDecode.py ├── SampleDecodeMultiThread.py ├── SampleDecodeRTSP.py ├── SampleDecodeSw.py ├── SampleDemuxDecode.py ├── SampleEncode.py ├── SampleEncodeMultiThread.py ├── SampleMeasureVideoQuality.py ├── SampleOpenGL.py ├── SamplePyTorch.py ├── SampleRemap.py ├── SampleTensorRTResnet.py ├── SampleTorchResnet.py ├── SampleTorchSegmentation.py ├── SampleTypeConversionTest.py └── utils.py ├── setup.py ├── src ├── CMakeLists.txt ├── PyNvCodec │ ├── CMakeLists.txt │ ├── __init__.py │ ├── __init__.pyi │ ├── inc │ │ ├── PyNvCodec.hpp │ │ └── Version.hpp.in │ └── src │ │ ├── PyBufferUploader.cpp │ │ ├── PyCudaBufferDownloader.cpp │ │ ├── PyFFMpegDecoder.cpp │ │ ├── PyFFMpegDemuxer.cpp │ │ ├── PyFrameUploader.cpp │ │ ├── PyNvCodec.cpp │ │ ├── PyNvDecoder.cpp │ │ ├── PyNvEncoder.cpp │ │ ├── PySurface.cpp │ │ ├── PySurfaceConverter.cpp │ │ ├── PySurfaceDownloader.cpp │ │ ├── PySurfaceRemaper.cpp │ │ └── PySurfaceResizer.cpp ├── PytorchNvCodec │ ├── CMakeLists.txt │ ├── LICENSE │ ├── __init__.py │ ├── pyproject.toml │ ├── setup.py │ └── src │ │ └── PytorchNvCodec.cpp └── TC │ ├── CMakeLists.txt │ ├── TC_CORE │ ├── CMakeLists.txt │ ├── inc │ │ ├── TC_CORE.hpp │ │ └── Version.hpp.in │ └── src │ │ ├── Task.cpp │ │ └── Token.cpp │ ├── has_bsf_compile_test.c │ ├── inc │ ├── CodecsSupport.hpp │ ├── CuvidFunctions.h │ ├── FFmpegDemuxer.h │ ├── MemoryInterfaces.hpp │ ├── NppCommon.hpp │ ├── NvCodecCLIOptions.h │ ├── NvCodecUtils.h │ ├── NvDecoder.h │ ├── NvEncoder.h │ ├── NvEncoderCuda.h │ ├── Tasks.hpp │ ├── Version.hpp.in │ ├── cuvid_function_pointer.h │ └── tc_dlopen.h │ ├── src │ ├── FFmpegDemuxer.cpp │ ├── FfmpegSwDecoder.cpp │ ├── MemoryInterfaces.cpp │ ├── NppCommon.cpp │ ├── NvCodecCliOptions.cpp │ ├── NvDecoder.cpp │ ├── NvEncoder.cpp │ ├── NvEncoderCuda.cpp │ ├── Resize.cu │ ├── Tasks.cpp │ ├── TasksColorCvt.cpp │ ├── tc_dlopen_unix.cpp │ └── tc_dlopen_windows.cpp │ └── third_party │ ├── cuviddec.h │ ├── nvEncodeAPI.h │ └── nvcuvid.h └── tests ├── CMakeLists.txt ├── test.mp4 ├── test_PyFfmpegDemuxer.py ├── test_PyNvDecoder.py ├── test_PyNvEncoder.py ├── test_PySurface.py ├── test_reported_bugs.py └── test_res_change.h264 /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | # We'll use defaults from the LLVM style, but with 4 columns indentation. 3 | BasedOnStyle: LLVM 4 | IndentWidth: 2 5 | BreakBeforeBraces: Linux 6 | --- 7 | Language: Cpp 8 | # Force pointers to the type for C++. 9 | DerivePointerAlignment: false 10 | PointerAlignment: Left -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | 7 | --- 8 | 9 | **Describe the bug** 10 | A clear and concise description of what the bug is. 11 | 12 | **To Reproduce** 13 | Steps to reproduce the behavior: 14 | 1. Go to '...' 15 | 2. Click on '....' 16 | 3. Scroll down to '....' 17 | 4. See error 18 | 19 | **Expected behavior** 20 | A clear and concise description of what you expected to happen. 21 | 22 | **Screenshots** 23 | If applicable, add screenshots to help explain your problem. 24 | 25 | **Desktop (please complete the following information):** 26 | - OS: [e.g. Windows, Linux] 27 | - Nvidia driver version 28 | - CUDA Version [e.g. 10.2] 29 | - Python Version [e.g. 3.7] 30 | - Video Codec SDK Version (only if using a custom header from the Video Codec SDK via the CMake variable `TC_VIDEO_CODEC_INTERFACE_DIR`) 31 | 32 | **Additional context** 33 | Add any other context about the problem here. 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/auto-commits.yml: -------------------------------------------------------------------------------- 1 | name: auto-commits 2 | 3 | on: 4 | push: 5 | 6 | # Use bash also on Windows (instead of pwsh) 7 | defaults: 8 | run: 9 | shell: bash 10 | 11 | jobs: 12 | build: 13 | strategy: 14 | fail-fast: false 15 | runs-on: ubuntu-22.04 16 | permissions: 17 | # Give the default GITHUB_TOKEN write permission to commit and push the 18 | # added or changed files to the repository. 19 | contents: write 20 | 21 | steps: 22 | - uses: actions/checkout@v3 23 | 24 | # already installed on runner: git build-essential python3-dev python3-pip cmake (new version required, use pip if toold) 25 | - name: Install dependencies 26 | run: | 27 | sudo apt-get update 28 | sudo apt-get install \ 29 | libavfilter-dev \ 30 | libavformat-dev \ 31 | libavcodec-dev \ 32 | libswresample-dev \ 33 | libavutil-dev\ 34 | -y 35 | 36 | # CUDA toolkit: the following is sufficient on Ubuntu cuda-libraries-XX-X cuda-compiler-XX-X libnpp-dev-XX-X libnvidia-decode-XXX 37 | - uses: Jimver/cuda-toolkit@v0.2.8 38 | id: cuda-toolkit 39 | with: 40 | cuda: '11.7.0' 41 | sub-packages: '["nvcc", "nvtx", "cudart"]' 42 | method: "network" 43 | 44 | - name: Install CUVID 45 | run: | 46 | # normally you would install them via `apt install cuda` 47 | sudo apt-get install \ 48 | libnpp-dev-11-7 \ 49 | libnvidia-decode-520 \ 50 | -y 51 | 52 | - name: Build and Install Package 53 | run: | 54 | python -mpip install . --verbose 55 | 56 | - name: Check Import 57 | # importing does not work on Windows as nvcuda.dll is required which comes with a Nvidia driver 58 | run: | 59 | python -c "import PyNvCodec" 60 | 61 | - name: Generate Stubs 62 | run: 63 | make generate-stubs 64 | 65 | - name: Check Import 66 | uses: stefanzweifel/git-auto-commit-action@v4 67 | with: 68 | commit_message: Automated stub generation 69 | 70 | # TODO: have dedicated jobs for auto format with clang-format 71 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | # Cancel any in-progress CI runs for a PR if it is updated 10 | concurrency: 11 | group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.head_ref || github.sha }} 12 | cancel-in-progress: true 13 | 14 | 15 | # Use bash also on Windows (instead of pwsh) 16 | defaults: 17 | run: 18 | shell: bash 19 | 20 | jobs: 21 | build: 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | os: [ubuntu-20.04, ubuntu-22.04, windows-2022] 26 | runs-on: ${{ matrix.os }} 27 | permissions: 28 | # Give the default GITHUB_TOKEN write permission to commit and push the 29 | # added or changed files to the repository. 30 | contents: write 31 | 32 | steps: 33 | - uses: actions/checkout@v3 34 | 35 | # already installed on runner: git build-essential python3-dev python3-pip cmake (new version required, use pip if toold) 36 | - name: Install dependencies 37 | if: ${{ matrix.os != 'windows-2022' }} 38 | run: | 39 | sudo apt-get update 40 | sudo apt-get install \ 41 | libavfilter-dev \ 42 | libavformat-dev \ 43 | libavcodec-dev \ 44 | libswresample-dev \ 45 | libavutil-dev \ 46 | python3-testresources\ 47 | -y 48 | python -mpip install -U setuptools wheel pip 49 | 50 | - name: Install dependencies (Windows) 51 | if: ${{ matrix.os == 'windows-2022' }} 52 | run: | 53 | curl -L https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-win64-lgpl-shared.zip --output ffmpeg.zip 54 | unzip ffmpeg.zip 55 | mv ffmpeg*-shared ffmpeg 56 | 57 | # CUDA toolkit: the following is sufficient on Ubuntu cuda-libraries-XX-X cuda-compiler-XX-X libnpp-dev-XX-X libnvidia-decode-XXX 58 | - uses: Jimver/cuda-toolkit@v0.2.8 59 | id: cuda-toolkit 60 | if: ${{ matrix.os != 'windows-2022' }} 61 | with: 62 | cuda: '11.7.0' 63 | sub-packages: '["nvcc", "nvtx", "cudart"]' 64 | method: "network" 65 | 66 | - uses: Jimver/cuda-toolkit@v0.2.8 67 | id: cuda-toolkit-win 68 | if: ${{ matrix.os == 'windows-2022' }} 69 | with: 70 | cuda: '11.7.0' 71 | # https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html#install-cuda-software 72 | sub-packages: '["nvcc", "nvtx", "cudart", "npp_dev", "npp"]' 73 | method: "network" 74 | 75 | - name: Install CUVID 76 | if: ${{ matrix.os != 'windows-2022' }} 77 | run: | 78 | # normally you would install them via `apt install cuda` 79 | sudo apt-get install \ 80 | libnpp-dev-11-7 \ 81 | libnvidia-decode-520 \ 82 | -y 83 | 84 | - name: Build and Install Package 85 | run: | 86 | SKBUILD_CONFIGURE_OPTIONS="-DTC_FFMPEG_ROOT=D:/a/VideoProcessingFramework/VideoProcessingFramework/ffmpeg/" python -mpip install . --verbose 87 | 88 | - name: Check Import 89 | # importing does not work on Windows as nvcuda.dll is required which comes with a Nvidia driver 90 | if: ${{ matrix.os != 'windows-2022' }} 91 | run: | 92 | python -c "import PyNvCodec" 93 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: docker 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | # Cancel any in-progress CI runs for a PR if it is updated 10 | concurrency: 11 | group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.head_ref || github.sha }} 12 | cancel-in-progress: true 13 | 14 | 15 | jobs: 16 | docker: 17 | # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. 18 | # You can convert this to a matrix build if you need cross-platform coverage. 19 | # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix 20 | runs-on: self-hosted 21 | 22 | steps: 23 | - uses: actions/checkout@v3 24 | - name: Build docker containers 25 | run: | 26 | DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile --tag vpf-gpu --build-arg PIP_INSTALL_EXTRAS=torch . 27 | DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile.tensorrt --build-arg PIP_INSTALL_EXTRAS=samples --tag vpf-gpu-trt . 28 | 29 | - name: Run tests 30 | run: | 31 | docker run --gpus all --rm --user $UID -v $GITHUB_WORKSPACE:/cwd --entrypoint "python3" vpf-gpu -m unittest discover /cwd/tests 32 | 33 | - name: Run samples 34 | run: | 35 | docker run --gpus all --rm --user $UID -v $GITHUB_WORKSPACE:/cwd \ 36 | -e TORCH_HOME=/cwd/cache \ 37 | --entrypoint "make" vpf-gpu-trt run_samples_without_docker 38 | 39 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | jobs: 10 | test: 11 | # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. 12 | # You can convert this to a matrix build if you need cross-platform coverage. 13 | # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix 14 | runs-on: self-hosted 15 | 16 | steps: 17 | - uses: actions/checkout@v3 18 | - name: Setup FFmpeg 19 | env: 20 | CXX: /usr/lib/ccache/c++ 21 | CC: /usr/lib/ccache/cc 22 | run: | 23 | cd ~/FFmpeg/ 24 | ./prepare.sh 25 | 26 | - name: Setup pip 27 | run: | 28 | python3 -m venv /tmp/venv_vpf 29 | source /tmp/venv_vpf/bin/activate 30 | 31 | - name: Build and Install Package 32 | env: 33 | PKG_CONFIG_PATH: /home/gh-runner/FFmpeg/build_x64_release_shared/lib/pkgconfig 34 | CXX: /usr/lib/ccache/c++ 35 | CC: /usr/lib/ccache/cc 36 | SKBUILD_CONFIGURE_OPTIONS: "-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache" 37 | run: | 38 | source /tmp/venv_vpf/bin/activate 39 | python3 -mpip install .[samples] --verbose 40 | python3 -mpip install src/PytorchNvCodec --verbose 41 | 42 | - name: Run Tests 43 | run: | 44 | source /tmp/venv_vpf/bin/activate 45 | python3 -munittest discover tests 46 | 47 | - name: Run Samples 48 | run: | 49 | source /tmp/venv_vpf/bin/activate 50 | make run_samples_without_docker 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _skbuild 2 | **/*.egg-info 3 | 4 | # IDE files 5 | .devcontainer 6 | .vscode 7 | 8 | # python gitignore: https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | *.whl 19 | *.zip 20 | .Python 21 | build/ 22 | develop-eggs/ 23 | dist/ 24 | downloads/ 25 | eggs/ 26 | .eggs/ 27 | lib/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | wheels/ 33 | share/python-wheels/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | MANIFEST 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .nox/ 53 | .coverage 54 | .coverage.* 55 | .cache 56 | nosetests.xml 57 | coverage.xml 58 | *.cover 59 | *.py,cover 60 | .hypothesis/ 61 | .pytest_cache/ 62 | cover/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | db.sqlite3 72 | db.sqlite3-journal 73 | 74 | # Flask stuff: 75 | instance/ 76 | .webassets-cache 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | .pybuilder/ 86 | target/ 87 | 88 | # Jupyter Notebook 89 | .ipynb_checkpoints 90 | 91 | # IPython 92 | profile_default/ 93 | ipython_config.py 94 | 95 | # pyenv 96 | # For a library or package, you might want to ignore these files since the code is 97 | # intended to run in multiple environments; otherwise, check them in: 98 | # .python-version 99 | 100 | # pipenv 101 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 102 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 103 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 104 | # install all needed dependencies. 105 | #Pipfile.lock 106 | 107 | # pdm 108 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 109 | #pdm.lock 110 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 111 | # in version control. 112 | # https://pdm.fming.dev/#use-with-ide 113 | .pdm.toml 114 | 115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 116 | __pypackages__/ 117 | 118 | # Celery stuff 119 | celerybeat-schedule 120 | celerybeat.pid 121 | 122 | # SageMath parsed files 123 | *.sage.py 124 | 125 | # Environments 126 | .env 127 | .venv 128 | env/ 129 | venv/ 130 | ENV/ 131 | env.bak/ 132 | venv.bak/ 133 | 134 | # Spyder project settings 135 | .spyderproject 136 | .spyproject 137 | 138 | # Rope project settings 139 | .ropeproject 140 | 141 | # mkdocs documentation 142 | /site 143 | 144 | # mypy 145 | .mypy_cache/ 146 | .dmypy.json 147 | dmypy.json 148 | 149 | # Pyre type checker 150 | .pyre/ 151 | 152 | # pytype static type analyzer 153 | .pytype/ 154 | 155 | # Cython debug symbols 156 | cython_debug/ 157 | 158 | # PyCharm 159 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 160 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 161 | # and can be added to the global gitignore or merged into this file. For a more nuclear 162 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 163 | .idea 164 | out 165 | *.dll 166 | *.deb 167 | /samples/.idea/.name 168 | /samples/.idea/workspace.xml 169 | *.bin 170 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2019 NVIDIA Corporation 3 | # Copyright 2021 Kognia Sports Intelligence 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | cmake_minimum_required(VERSION 3.21) 18 | 19 | project(PyNvCodec) 20 | 21 | set(CMAKE_CXX_STANDARD 17) 22 | add_subdirectory(src) 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | run_tests: 2 | python -m unittest discover tests 3 | 4 | run_samples_without_docker: 5 | wget http://www.scikit-video.org/stable/_static/bikes.mp4 6 | 7 | python ./samples/SampleDecode.py -g 0 -e ./bikes.mp4 -r ./tests/test.raw 8 | python ./samples/SampleDecodeSw.py -e ./bikes.mp4 -r ./tests/test.raw 9 | python ./samples/SampleEncodeMultiThread.py 0 848 464 ./tests/test.raw 10 10 | python ./samples/SampleMeasureVideoQuality.py -g 0 -i ./tests/test.raw -o ./tests/test.raw -w 848 -h 464 11 | python ./samples/SamplePyTorch.py 0 ./bikes.mp4 ./tests/out.mp4 12 | python ./samples/SampleTensorRTResnet.py 0 ./bikes.mp4 13 | python ./samples/SampleTorchResnet.py 0 ./bikes.mp4 14 | python ./samples/SampleDecodeMultiThread.py 0 ./bikes.mp4 10 15 | python ./samples/SampleDemuxDecode.py 0 ./tests/test_res_change.h264 ./tests/test.raw 16 | python ./samples/SampleEncode.py 0 ./tests/test.raw ./tests/output.mp4 848 464 17 | ifndef DISPLAY 18 | echo "skipping rendering samples" 19 | else 20 | python ./samples/SampleTorchSegmentation.py 0 ./bikes.mp4 21 | python ./samples/SampleOpenGL.py -g 0 -e ./bikes.mp4 22 | endif 23 | # python ./samples/SampleRemap.py 0 ./bikes.mp4 ./tests/remap.npz 24 | # python ./samples/SampleDecodeRTSP.py 0 rtsp://localhost:8554/mystream rtsp://localhost:8554/mystream # no rtsp stream available for testing 25 | 26 | generate-stubs: 27 | pip3 install mypy 28 | stubgen -mPyNvCodec._PyNvCodec 29 | cp out/PyNvCodec/_PyNvCodec.pyi src/PyNvCodec/__init__.pyi 30 | 31 | .PHONY: run_tests generate-stubs 32 | 33 | # vim:ft=make 34 | # 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Deprecation notice 2 | 3 | VPF is being replaced by [PyNvVideoCodec](https://pypi.org/project/PyNvVideoCodec/) library with leaner API and `pip install` support. 4 | The library offers easy-to-use Python APIs, granting access to the core C/C++ video encode/decode capabilities of the Video Codec SDK. 5 | 6 | PyNvVideoCodec library is distributed in two formats: binary distribution via [PyPI](https://pypi.org/project/PyNvVideoCodec/) and source code distribution via [NVIDIA NGC](https://catalog.ngc.nvidia.com/orgs/nvidia/resources/pynvvideocodec). In both cases, it can be installed using a single `pip install` command. 7 | The library is distributed under the MIT license and is officially supported by NVIDIA. PyNvVideoCodec supports all features of VPF (except software encode/decode and surface format conversion). 8 | 9 | For more information, please visit the [Get Started with PyNvVideoCodec](https://developer.nvidia.com/pynvvideocodec) page. 10 | 11 | We'd like to thank [Roman Arzumanyan](https://github.com/RomanArzumanyan), original author of VPF, for his support and efforts along these years. Roman will continue his work on [VALI](https://github.com/RomanArzumanyan/VALI) also leveraging NVIDIA GPUs. 12 | 13 | ---- 14 | 15 | # VideoProcessingFramework 16 | 17 | VPF stands for Video Processing Framework. It’s set of C++ libraries and Python bindings which provides full HW acceleration for video processing tasks such as decoding, encoding, transcoding and GPU-accelerated color space and pixel format conversions. 18 | 19 | VPF also supports exporting GPU memory objects such as decoded video frames to PyTorch tensors without Host to Device copies. 20 | 21 | ## Prerequisites 22 | VPF works on Linux(Ubuntu 20.04 and Ubuntu 22.04 only) and Windows 23 | 24 | - NVIDIA display driver: 525.xx.xx or above 25 | - CUDA Toolkit 11.2 or above 26 | - CUDA toolkit has driver bundled with it e.g. CUDA Toolkit 12.0 has driver `530.xx.xx`. During installation of CUDA toolkit you could choose to install or skip installation of the bundled driver. Please choose the appropriate option. 27 | - FFMPEG 28 | - [Compile FFMPEG with shared libraries](https://docs.nvidia.com/video-technologies/video-codec-sdk/12.0/ffmpeg-with-nvidia-gpu/index.html) 29 | - or download pre-compiled binaries from a source you trust. 30 | - During VPF’s “pip install”(mentioned in sections below) you need to provide a path to the directory where FFMPEG got installed. 31 | - or you could install system FFMPEG packages (e.g. ```apt install libavfilter-dev libavformat-dev libavcodec-dev libswresample-dev libavutil-dev``` on Ubuntu) 32 | 33 | - Python 3 and above 34 | - Install a C++ toolchain either via Visual Studio or Tools for Visual Studio. 35 | - Recommended version is Visual Studio 2017 and above 36 | (Windows only) 37 | 38 | ### Linux 39 | 40 | We recommend Ubuntu 20.04 as it comes with a recent enough FFmpeg system packages. 41 | If you want to build FFmpeg from source, you can follow 42 | https://docs.nvidia.com/video-technologies/video-codec-sdk/12.0/ffmpeg-with-nvidia-gpu/index.html 43 | ```bash 44 | # Install dependencies 45 | apt install -y \ 46 | libavfilter-dev \ 47 | libavformat-dev \ 48 | libavcodec-dev \ 49 | libswresample-dev \ 50 | libavutil-dev\ 51 | wget \ 52 | build-essential \ 53 | git 54 | 55 | # Install CUDA Toolkit (if not already present) 56 | wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb 57 | sudo dpkg -i cuda-keyring_1.0-1_all.deb 58 | sudo apt-get update 59 | sudo apt-get install -y cuda 60 | # Ensure nvcc to your $PATH (most commonly already done by the CUDA installation) 61 | export PATH=/usr/local/cuda/bin:$PATH 62 | 63 | # Install VPF 64 | pip3 install git+https://github.com/NVIDIA/VideoProcessingFramework 65 | # or if you cloned this repository 66 | pip3 install . 67 | ``` 68 | 69 | To check whether VPF is correctly installed run the following Python script 70 | ```python 71 | import PyNvCodec 72 | ``` 73 | If using Docker via [Nvidia Container Runtime](https://developer.nvidia.com/nvidia-container-runtime), 74 | please make sure to enable the `video` driver capability: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#driver-capabilities via 75 | the `NVIDIA_DRIVER_CAPABILITIES` environment variable in the container or the `--gpus` command line parameter (e.g. 76 | `docker run -it --rm --gpus 'all,"capabilities=compute,utility,video"' nvidia/cuda:12.1.0-base-ubuntu22.04`). 77 | 78 | Please note that some examples have additional dependencies that need to be installed via pip (`pip install .[samples]`). 79 | Samples using PyTorch will require an optional extension which can be installed via 80 | ```bash 81 | pip install src/PytorchNvCodec # install Torch extension if needed (optional), requires "torch" to be installed before 82 | ``` 83 | 84 | After resolving those you should be able to run `make run_samples_without_docker` using your local pip installation. 85 | 86 | ### Windows 87 | 88 | - Install a C++ toolchain either via Visual Studio or Tools for Visual Studio (https://visualstudio.microsoft.com/downloads/) 89 | - Install the CUDA Toolkit: https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64 90 | - Compile [FFMPEG](https://github.com/FFmpeg/FFmpeg/) with shared libraries or download pre-compiled binaries from a source you trust 91 | - Install from the root directory of this repository indicating the location of the compiled FFMPEG in a Powershell console 92 | ```pwsh 93 | # Indicate path to your FFMPEG installation (with subfolders `bin` with DLLs, `include`, `lib`) 94 | $env:SKBUILD_CONFIGURE_OPTIONS="-DTC_FFMPEG_ROOT=C:/path/to/your/ffmpeg/installation/ffmpeg/" 95 | pip install . 96 | ``` 97 | To check whether VPF is correctly installed run the following Python script 98 | ```python 99 | import PyNvCodec 100 | ``` 101 | Please note that some examples have additional dependencies (`pip install .[sampels]`) that need to be installed via pip. 102 | Samples using PyTorch will require an optional extension which can be installed via 103 | 104 | ```bash 105 | pip install src/PytorchNvCodec # install Torch extension if needed (optional), requires "torch" to be installed before 106 | ``` 107 | 108 | ## Docker 109 | 110 | For convenience, we provide a Docker images located at `docker` that you can use to easily install all dependencies for 111 | the samples ([docker](https://docs.docker.com/engine/install/ubuntu/) and [nvidia-docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) 112 | are required) 113 | 114 | 115 | ```bash 116 | DOCKER_BUILDKIT=1 docker build \ 117 | --tag vpf-gpu \ 118 | -f docker/Dockerfile \ 119 | --build-arg PIP_INSTALL_EXTRAS=torch \ 120 | . 121 | docker run -it --rm --gpus=all vpf-gpu 122 | ``` 123 | 124 | `PIP_INSTALL_EXTRAS` can be any subset listed under `project.optional-dependencies` in [pyproject.toml](pyproject.toml). 125 | 126 | ## Documentation 127 | 128 | A documentation for Video Processing Framework can be generated from this repository: 129 | ```bash 130 | pip install . # install Video Processing Framework 131 | pip install src/PytorchNvCodec # install Torch extension if needed (optional), requires "torch" to be installed before 132 | pip install sphinx # install documentation tool sphinx 133 | cd docs 134 | make html 135 | ``` 136 | You can then open `_build/html/index.html` with your browser. 137 | 138 | ## Community Support 139 | If you did not find the information you need or if you have further questions or problems, you are very welcome to join the developer community at [NVIDIA](https://forums.developer.nvidia.com/categories). We have dedicated categories covering diverse topics related to [video processing and codecs](https://forums.developer.nvidia.com/c/gaming-and-visualization-technologies/visualization/video-processing-optical-flow/189). 140 | 141 | The forums are also a place where we would be happy to hear about how you made use of VPF in your project. 142 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:12.0.1-devel-ubuntu22.04 2 | 3 | ENV NVIDIA_DRIVER_CAPABILITIES=compute,video 4 | 5 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive \ 6 | apt-get -y install \ 7 | libavfilter-dev \ 8 | libavformat-dev \ 9 | libavcodec-dev \ 10 | libswresample-dev \ 11 | libavutil-dev\ 12 | wget \ 13 | build-essential \ 14 | ninja-build \ 15 | cmake \ 16 | git \ 17 | python3 \ 18 | python3-pip \ 19 | python-is-python3 20 | 21 | ARG PIP_INSTALL_EXTRAS="" 22 | # Build vpf 23 | RUN mkdir /src 24 | WORKDIR /src 25 | COPY src src 26 | COPY setup.py setup.py 27 | COPY pyproject.toml pyproject.toml 28 | COPY CMakeLists.txt CMakeLists.txt 29 | RUN python3 -m pip install --no-cache-dir setuptools wheel 30 | RUN python3 -m pip install --no-cache-dir .[$PIP_INSTALL_EXTRAS] 31 | 32 | RUN mkdir /cwd 33 | WORKDIR /cwd 34 | 35 | ENTRYPOINT ["/bin/bash"] 36 | -------------------------------------------------------------------------------- /docker/Dockerfile.tensorrt: -------------------------------------------------------------------------------- 1 | FROM nvcr.io/nvidia/tensorrt:23.04-py3 2 | 3 | ENV NVIDIA_DRIVER_CAPABILITIES=compute,video 4 | 5 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive \ 6 | apt-get -y install \ 7 | libavfilter-dev \ 8 | libavformat-dev \ 9 | libavcodec-dev \ 10 | libswresample-dev \ 11 | libavutil-dev\ 12 | wget \ 13 | build-essential \ 14 | ninja-build \ 15 | cmake \ 16 | git \ 17 | python3 \ 18 | python3-pip \ 19 | python-is-python3 20 | 21 | ARG PIP_INSTALL_EXTRAS="torch" 22 | # Build vpf 23 | RUN mkdir /src 24 | WORKDIR /src 25 | COPY src src 26 | COPY setup.py setup.py 27 | COPY pyproject.toml pyproject.toml 28 | COPY CMakeLists.txt CMakeLists.txt 29 | RUN python3 -m pip install --no-cache-dir .[$PIP_INSTALL_EXTRAS] 30 | 31 | RUN mkdir /cwd 32 | WORKDIR /cwd 33 | 34 | ENTRYPOINT ["/bin/bash"] 35 | -------------------------------------------------------------------------------- /docs/PyNvCodec.rst: -------------------------------------------------------------------------------- 1 | PyNvCodec 2 | ********* 3 | .. automodule:: PyNvCodec._PyNvCodec 4 | -------------------------------------------------------------------------------- /docs/PytorchNvCodec.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: PytorchNvCodec -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | extensions = [ 5 | "sphinx.ext.autodoc", 6 | "sphinx.ext.intersphinx", 7 | "sphinx.ext.autosummary", 8 | "sphinx.ext.napoleon", 9 | ] 10 | 11 | autosummary_generate = True 12 | autosummary_imported_members = True 13 | 14 | autodoc_default_options = {"members": True, "memer-order": "bysource"} 15 | 16 | source_suffix = ".rst" 17 | master_doc = "index" 18 | project = "VPF" 19 | copyright = "2022 NVIDIA Corporation" 20 | author = "" 21 | 22 | language = None 23 | 24 | exclude_patterns = ["_build"] 25 | 26 | pygments_style = "sphinx" 27 | 28 | todo_include_todos = False 29 | 30 | html_theme = "haiku" 31 | htmlhelp_basename = "VPFdoc" 32 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. toctree:: 2 | :maxdepth: 2 3 | 4 | PyNvCodec 5 | PytorchNvCodec -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "VideoProcessingFramework", 3 | "lockfileVersion": 2, 4 | "requires": true, 5 | "packages": {} 6 | } 7 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "scikit-build", 5 | "numpy", 6 | ## out-comment not install cmake/ninja via pip 7 | "cmake>=3.21", 8 | "ninja; platform_system!='Windows'" 9 | ] 10 | build-backend = "setuptools.build_meta" 11 | 12 | [tool.pytest.ini_options] 13 | minversion = "6.0" 14 | addopts = "-ra -q" 15 | testpaths = [ 16 | "tests", 17 | ] 18 | -------------------------------------------------------------------------------- /samples/SampleCupy.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2023 @royinx 3 | 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Starting from Python 3.8 DLL search policy has changed. 19 | # We need to add path to CUDA DLLs explicitly. 20 | import sys 21 | import os 22 | from typing import Any 23 | import PyNvCodec as nvc 24 | import numpy as np 25 | import cupy as cp 26 | 27 | class cconverter: 28 | """ 29 | Colorspace conversion chain. 30 | """ 31 | 32 | def __init__(self, width: int, height: int, gpu_id: int): 33 | self.gpu_id = gpu_id 34 | self.w = width 35 | self.h = height 36 | self.chain = [] 37 | 38 | def add(self, src_fmt: nvc.PixelFormat, dst_fmt: nvc.PixelFormat) -> None: 39 | self.chain.append( 40 | nvc.PySurfaceConverter(self.w, self.h, src_fmt, dst_fmt, self.gpu_id) 41 | ) 42 | 43 | def run(self, src_surface: nvc.Surface) -> nvc.Surface: 44 | surf = src_surface 45 | cc = nvc.ColorspaceConversionContext(nvc.ColorSpace.BT_601, nvc.ColorRange.MPEG) 46 | 47 | for cvt in self.chain: 48 | surf = cvt.Execute(surf, cc) 49 | if surf.Empty(): 50 | raise RuntimeError("Failed to perform color conversion") 51 | 52 | return surf.Clone(self.gpu_id) 53 | 54 | class CupyNVC: 55 | def get_memptr(self, surface: nvc.Surface) -> int: 56 | return surface.PlanePtr().GpuMem() 57 | 58 | def SurfaceToArray(self, surface: nvc.Surface) -> cp.array: 59 | """ 60 | Converts surface to cupy unit8 tensor. 61 | 62 | - surface: nvc.Surface 63 | - return: cp.array (height, width, 3) 64 | """ 65 | if surface.Format() != nvc.PixelFormat.RGB: 66 | raise RuntimeError("Surface shall be of RGB PLANAR format , got {}".format(surface.Format())) 67 | plane = surface.PlanePtr() 68 | # cuPy array zero copy non ownned 69 | height, width, pitch = (plane.Height(), plane.Width(), plane.Pitch()) 70 | cupy_mem = cp.cuda.UnownedMemory(self.get_memptr(surface), height * width * 1, surface) 71 | cupy_memptr = cp.cuda.MemoryPointer(cupy_mem, 0) 72 | cupy_frame = cp.ndarray((height, width // 3, 3), cp.uint8, cupy_memptr, strides=(pitch, 3, 1)) # RGB 73 | 74 | return cupy_frame 75 | 76 | def _memcpy(self, surface: nvc.Surface, img_array: cp.array) -> None: 77 | cp.cuda.runtime.memcpy2DAsync(self.get_memptr(surface), 78 | surface.Pitch(), 79 | img_array.data.ptr, 80 | surface.Width(), 81 | surface.Width(), 82 | surface.Height()*3, 83 | cp.cuda.runtime.memcpyDeviceToDevice, 84 | 0) # null_stream.ptr: 0 85 | return 86 | 87 | def ArrayToSurface(self, img_array: cp.array, gpu_id: int) -> nvc.Surface: 88 | """ 89 | Converts cupy ndarray to rgb surface. 90 | - surface: cp.array 91 | - return: nvc.Surface 92 | """ 93 | img_array = img_array.astype(cp.uint8) 94 | img_array = cp.transpose(img_array, (2,0,1)) # HWC to CHW 95 | img_array = cp.ascontiguousarray(img_array) 96 | _ ,tensor_h , tensor_w= img_array.shape 97 | surface = nvc.Surface.Make(nvc.PixelFormat.RGB_PLANAR, tensor_w, tensor_h, gpu_id) 98 | self._memcpy(surface, img_array) 99 | return surface 100 | 101 | def grayscale(img_array: cp.array) -> cp.array: 102 | img_array = cp.matmul(img_array, cp.array([0.299, 0.587, 0.114]).T) 103 | img_array = cp.expand_dims(img_array, axis=-1) 104 | img_array = cp.tile(img_array, (1,1,3)) # view as 3 channel image (packed RGB: HWC) 105 | return img_array 106 | 107 | def contrast_boost(img_array: cp.array) -> cp.array: 108 | """ 109 | histogram equalization 110 | """ 111 | channel_min = cp.quantile(img_array, 0.05, axis=(0,1)) 112 | channel_max = cp.quantile(img_array, 0.95, axis=(0,1)) 113 | img_array = img_array.astype(cp.float32) 114 | for c, (cmin, cmax) in enumerate(zip(channel_min, channel_max)): 115 | img_array[c] = cp.clip(img_array[c], cmin, cmax) 116 | img_array = img_array- channel_min.reshape(1,1,-1) 117 | img_array /= (channel_max - channel_min).reshape(1,1,-1) 118 | img_array = cp.multiply(img_array, 255.0) 119 | return img_array 120 | 121 | def main(gpu_id: int, encFilePath: str, dstFilePath: str): 122 | dstFile = open(dstFilePath, "wb") 123 | nvDec = nvc.PyNvDecoder(encFilePath, gpu_id) 124 | cpnvc = CupyNVC() 125 | 126 | w = nvDec.Width() 127 | h = nvDec.Height() 128 | res = str(w) + "x" + str(h) 129 | nvEnc = nvc.PyNvEncoder( 130 | {"preset": "P4", "codec": "h264", "s": res, "bitrate": "10M"}, gpu_id 131 | ) 132 | 133 | # Surface converters 134 | to_rgb = cconverter(w, h, gpu_id) 135 | to_rgb.add(nvc.PixelFormat.NV12, nvc.PixelFormat.YUV420) 136 | to_rgb.add(nvc.PixelFormat.YUV420, nvc.PixelFormat.RGB) 137 | 138 | to_nv12 = cconverter(w, h, gpu_id) 139 | to_nv12.add(nvc.PixelFormat.RGB_PLANAR, nvc.PixelFormat.RGB) 140 | to_nv12.add(nvc.PixelFormat.RGB, nvc.PixelFormat.YUV420) 141 | to_nv12.add(nvc.PixelFormat.YUV420, nvc.PixelFormat.NV12) 142 | 143 | # Encoded video frame 144 | encFrame = np.ndarray(shape=(0), dtype=np.uint8) 145 | while True: 146 | # Decode NV12 surface 147 | src_surface = nvDec.DecodeSingleSurface() 148 | if src_surface.Empty(): 149 | break 150 | 151 | # Convert to packed RGB: HWC , planar CHW 152 | rgb_sur = to_rgb.run(src_surface) 153 | if rgb_sur.Empty(): 154 | break 155 | 156 | # PROCESS YOUR TENSOR HERE. 157 | # THIS DUMMY PROCESSING JUST ADDS RANDOM ROTATION. 158 | src_array = cpnvc.SurfaceToArray(rgb_sur) 159 | dst_array = contrast_boost(src_array) 160 | dst_array = grayscale(dst_array) 161 | surface_rgb = cpnvc.ArrayToSurface(dst_array, gpu_id) 162 | 163 | # Convert back to NV12 164 | dst_surface = to_nv12.run(surface_rgb) 165 | if src_surface.Empty(): 166 | break 167 | 168 | # Encode 169 | success = nvEnc.EncodeSingleSurface(dst_surface, encFrame) 170 | if success: 171 | byteArray = bytearray(encFrame) 172 | dstFile.write(byteArray) 173 | 174 | # Encoder is asynchronous, so we need to flush it 175 | while True: 176 | success = nvEnc.FlushSinglePacket(encFrame) 177 | if success: 178 | byteArray = bytearray(encFrame) 179 | dstFile.write(byteArray) 180 | else: 181 | break 182 | 183 | 184 | if __name__ == "__main__": 185 | 186 | 187 | if len(sys.argv) < 4: 188 | print("This sample transcode and process with pytorch an input video on given GPU.") 189 | print("Provide gpu ID, path to input and output files") 190 | print("Usage: SamplePyTorch.py $gpu_id $input_file $output_file.") 191 | print("Example: \npython3 samples/SampleCupy.py 0 tests/test.mp4 tests/dec_test.mp4") 192 | exit(1) 193 | 194 | gpu_id = int(sys.argv[1]) 195 | encFilePath = sys.argv[2] 196 | decFilePath = sys.argv[3] 197 | main(gpu_id, encFilePath, decFilePath) 198 | -------------------------------------------------------------------------------- /samples/SampleDecodeMultiThread.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2020 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Starting from Python 3.8 DLL search policy has changed. 18 | # We need to add path to CUDA DLLs explicitly. 19 | import sys 20 | import os 21 | 22 | if os.name == "nt": 23 | # Add CUDA_PATH env variable 24 | cuda_path = os.environ["CUDA_PATH"] 25 | if cuda_path: 26 | os.add_dll_directory(cuda_path) 27 | else: 28 | print("CUDA_PATH environment variable is not set.", file=sys.stderr) 29 | print("Can't set CUDA DLLs search path.", file=sys.stderr) 30 | exit(1) 31 | 32 | # Add PATH as well for minor CUDA releases 33 | sys_path = os.environ["PATH"] 34 | if sys_path: 35 | paths = sys_path.split(";") 36 | for path in paths: 37 | if os.path.isdir(path): 38 | os.add_dll_directory(path) 39 | else: 40 | print("PATH environment variable is not set.", file=sys.stderr) 41 | exit(1) 42 | 43 | import pycuda.driver as cuda 44 | import PyNvCodec as nvc 45 | import numpy as np 46 | 47 | from threading import Thread 48 | 49 | 50 | class Worker(Thread): 51 | def __init__(self, gpuID, encFile): 52 | Thread.__init__(self) 53 | 54 | # Retain primary CUDA device context and create separate stream per thread. 55 | self.ctx = cuda.Device(gpuID).retain_primary_context() 56 | self.ctx.push() 57 | self.str = cuda.Stream() 58 | self.ctx.pop() 59 | 60 | # Create Decoder with given CUDA context & stream. 61 | self.nvDec = nvc.PyNvDecoder(encFile, self.ctx.handle, self.str.handle) 62 | 63 | width, height = self.nvDec.Width(), self.nvDec.Height() 64 | hwidth, hheight = int(width / 2), int(height / 2) 65 | 66 | # Determine colorspace conversion parameters. 67 | # Some video streams don't specify these parameters so default values 68 | # are most widespread bt601 and mpeg. 69 | cspace, crange = self.nvDec.ColorSpace(), self.nvDec.ColorRange() 70 | if nvc.ColorSpace.UNSPEC == cspace: 71 | cspace = nvc.ColorSpace.BT_601 72 | if nvc.ColorRange.UDEF == crange: 73 | crange = nvc.ColorRange.MPEG 74 | self.cc_ctx = nvc.ColorspaceConversionContext(cspace, crange) 75 | print("Color space: ", str(cspace)) 76 | print("Color range: ", str(crange)) 77 | 78 | # Initialize colorspace conversion chain 79 | if self.nvDec.ColorSpace() != nvc.ColorSpace.BT_709: 80 | self.nvYuv = nvc.PySurfaceConverter( 81 | width, 82 | height, 83 | self.nvDec.Format(), 84 | nvc.PixelFormat.YUV420, 85 | self.ctx.handle, 86 | self.str.handle, 87 | ) 88 | else: 89 | self.nvYuv = None 90 | 91 | if self.nvYuv: 92 | self.nvCvt = nvc.PySurfaceConverter( 93 | width, 94 | height, 95 | self.nvYuv.Format(), 96 | nvc.PixelFormat.RGB, 97 | self.ctx.handle, 98 | self.str.handle, 99 | ) 100 | else: 101 | self.nvCvt = nvc.PySurfaceConverter( 102 | width, 103 | height, 104 | self.nvDec.Format(), 105 | nvc.PixelFormat.RGB, 106 | self.ctx.handle, 107 | self.str.handle, 108 | ) 109 | 110 | self.nvRes = nvc.PySurfaceResizer( 111 | hwidth, hheight, self.nvCvt.Format(), self.ctx.handle, self.str.handle 112 | ) 113 | self.nvDwn = nvc.PySurfaceDownloader( 114 | hwidth, hheight, self.nvRes.Format(), self.ctx.handle, self.str.handle 115 | ) 116 | self.num_frame = 0 117 | 118 | def run(self): 119 | try: 120 | while True: 121 | try: 122 | self.rawSurface = self.nvDec.DecodeSingleSurface() 123 | if self.rawSurface.Empty(): 124 | print("No more video frames") 125 | break 126 | except nvc.HwResetException: 127 | print("Continue after HW decoder was reset") 128 | continue 129 | 130 | if self.nvYuv: 131 | self.yuvSurface = self.nvYuv.Execute(self.rawSurface, self.cc_ctx) 132 | self.cvtSurface = self.nvCvt.Execute(self.yuvSurface, self.cc_ctx) 133 | else: 134 | self.cvtSurface = self.nvCvt.Execute(self.rawSurface, self.cc_ctx) 135 | if self.cvtSurface.Empty(): 136 | print("Failed to do color conversion") 137 | break 138 | 139 | self.resSurface = self.nvRes.Execute(self.cvtSurface) 140 | if self.resSurface.Empty(): 141 | print("Failed to resize surface") 142 | break 143 | 144 | self.rawFrame = np.ndarray( 145 | shape=(self.resSurface.HostSize()), dtype=np.uint8 146 | ) 147 | success = self.nvDwn.DownloadSingleSurface( 148 | self.resSurface, self.rawFrame 149 | ) 150 | if not (success): 151 | print("Failed to download surface") 152 | break 153 | 154 | self.num_frame += 1 155 | if 0 == self.num_frame % self.nvDec.Framerate(): 156 | print(f"Thread {self.ident} at frame {self.num_frame}") 157 | 158 | except Exception as e: 159 | print(getattr(e, "message", str(e))) 160 | fout.close() 161 | 162 | 163 | def create_threads(gpu_id, input_file, num_threads): 164 | cuda.init() 165 | 166 | thread_pool = [] 167 | for i in range(0, num_threads): 168 | thread = Worker(gpu_id, input_file) 169 | thread.start() 170 | thread_pool.append(thread) 171 | 172 | for thread in thread_pool: 173 | thread.join() 174 | 175 | 176 | if __name__ == "__main__": 177 | 178 | print( 179 | "This sample decodes video streams in parallel threads. It does not save output." 180 | ) 181 | print("GPU-accelerated color conversion and resize are also applied.") 182 | print("This sample may serve as a stability test.") 183 | print("Usage: python SampleDecodeMultiThread.py $gpu_id $input $num_threads") 184 | 185 | if len(sys.argv) < 4: 186 | print("Provide input CLI arguments as shown above") 187 | exit(1) 188 | 189 | gpu_id = int(sys.argv[1]) 190 | input_file = sys.argv[2] 191 | num_threads = int(sys.argv[3]) 192 | 193 | create_threads(gpu_id, input_file, num_threads) 194 | -------------------------------------------------------------------------------- /samples/SampleDecodeSw.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2020 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Starting from Python 3.8 DLL search policy has changed. 18 | # We need to add path to CUDA DLLs explicitly. 19 | import sys 20 | import os 21 | 22 | if os.name == "nt": 23 | # Add CUDA_PATH env variable 24 | cuda_path = os.environ["CUDA_PATH"] 25 | if cuda_path: 26 | os.add_dll_directory(cuda_path) 27 | else: 28 | print("CUDA_PATH environment variable is not set.", file=sys.stderr) 29 | print("Can't set CUDA DLLs search path.", file=sys.stderr) 30 | exit(1) 31 | 32 | # Add PATH as well for minor CUDA releases 33 | sys_path = os.environ["PATH"] 34 | if sys_path: 35 | paths = sys_path.split(";") 36 | for path in paths: 37 | if os.path.isdir(path): 38 | os.add_dll_directory(path) 39 | else: 40 | print("PATH environment variable is not set.", file=sys.stderr) 41 | exit(1) 42 | 43 | import PyNvCodec as nvc 44 | import numpy as np 45 | import argparse 46 | from pathlib import Path 47 | 48 | 49 | def decode(encFilePath, decFilePath): 50 | decFile = open(decFilePath, "wb") 51 | 52 | nvDec = nvc.PyFfmpegDecoder(encFilePath, {}) 53 | rawFrameYUV = np.ndarray(shape=(0), dtype=np.uint8) 54 | 55 | while True: 56 | success = nvDec.DecodeSingleFrame(rawFrameYUV) 57 | if success: 58 | bits = bytearray(rawFrameYUV) 59 | decFile.write(bits) 60 | else: 61 | break 62 | 63 | 64 | if __name__ == "__main__": 65 | 66 | parser = argparse.ArgumentParser( 67 | "This sample decodes input video to raw YUV file using libavcodec SW decoder." 68 | ) 69 | parser.add_argument( 70 | "-e", 71 | "--encoded-file-path", 72 | type=Path, 73 | required=True, 74 | help="Encoded video file (read from)", 75 | ) 76 | parser.add_argument( 77 | "-r", 78 | "--raw-file-path", 79 | type=Path, 80 | required=True, 81 | help="Raw YUV video file (write to)", 82 | ) 83 | parser.add_argument( 84 | "-v", "--verbose", default=False, action="store_true", help="Verbose" 85 | ) 86 | 87 | args = parser.parse_args() 88 | 89 | decode(args.encoded_file_path.as_posix(), args.raw_file_path.as_posix()) 90 | -------------------------------------------------------------------------------- /samples/SampleDemuxDecode.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2019 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Starting from Python 3.8 DLL search policy has changed. 18 | # We need to add path to CUDA DLLs explicitly. 19 | import sys 20 | import os 21 | 22 | if os.name == "nt": 23 | # Add CUDA_PATH env variable 24 | cuda_path = os.environ["CUDA_PATH"] 25 | if cuda_path: 26 | os.add_dll_directory(cuda_path) 27 | else: 28 | print("CUDA_PATH environment variable is not set.", file=sys.stderr) 29 | print("Can't set CUDA DLLs search path.", file=sys.stderr) 30 | exit(1) 31 | 32 | # Add PATH as well for minor CUDA releases 33 | sys_path = os.environ["PATH"] 34 | if sys_path: 35 | paths = sys_path.split(";") 36 | for path in paths: 37 | if os.path.isdir(path): 38 | os.add_dll_directory(path) 39 | else: 40 | print("PATH environment variable is not set.", file=sys.stderr) 41 | exit(1) 42 | 43 | import pycuda.driver as cuda 44 | import PyNvCodec as nvc 45 | import numpy as np 46 | 47 | 48 | def decode(gpuID, encFilePath, decFilePath): 49 | cuda.init() 50 | cuda_ctx = cuda.Device(gpuID).retain_primary_context() 51 | cuda_ctx.push() 52 | cuda_str = cuda.Stream() 53 | cuda_ctx.pop() 54 | 55 | decFile = open(decFilePath, "wb") 56 | 57 | nvDmx = nvc.PyFFmpegDemuxer(encFilePath) 58 | nvDec = nvc.PyNvDecoder( 59 | nvDmx.Width(), 60 | nvDmx.Height(), 61 | nvDmx.Format(), 62 | nvDmx.Codec(), 63 | cuda_ctx.handle, 64 | cuda_str.handle, 65 | ) 66 | nvCvt = nvc.PySurfaceConverter( 67 | nvDmx.Width(), 68 | nvDmx.Height(), 69 | nvDmx.Format(), 70 | nvc.PixelFormat.YUV420, 71 | cuda_ctx.handle, 72 | cuda_str.handle, 73 | ) 74 | nvDwn = nvc.PySurfaceDownloader( 75 | nvDmx.Width(), nvDmx.Height(), nvCvt.Format(), cuda_ctx.handle, cuda_str.handle 76 | ) 77 | 78 | packet = np.ndarray(shape=(0), dtype=np.uint8) 79 | frameSize = int(nvDmx.Width() * nvDmx.Height() * 3 / 2) 80 | rawFrame = np.ndarray(shape=(frameSize), dtype=np.uint8) 81 | pdata_in, pdata_out = nvc.PacketData(), nvc.PacketData() 82 | 83 | # Determine colorspace conversion parameters. 84 | # Some video streams don't specify these parameters so default values 85 | # are most widespread bt601 and mpeg. 86 | cspace, crange = nvDmx.ColorSpace(), nvDmx.ColorRange() 87 | if nvc.ColorSpace.UNSPEC == cspace: 88 | cspace = nvc.ColorSpace.BT_601 89 | if nvc.ColorRange.UDEF == crange: 90 | crange = nvc.ColorRange.MPEG 91 | cc_ctx = nvc.ColorspaceConversionContext(cspace, crange) 92 | print("Color space: ", str(cspace)) 93 | print("Color range: ", str(crange)) 94 | 95 | while True: 96 | # Demuxer has sync design, it returns packet every time it's called. 97 | # If demuxer can't return packet it usually means EOF. 98 | if not nvDmx.DemuxSinglePacket(packet): 99 | break 100 | 101 | # Get last packet data to obtain frame timestamp 102 | nvDmx.LastPacketData(pdata_in) 103 | 104 | # Decoder is async by design. 105 | # As it consumes packets from demuxer one at a time it may not return 106 | # decoded surface every time the decoding function is called. 107 | surface_nv12 = nvDec.DecodeSurfaceFromPacket(pdata_in, packet, pdata_out) 108 | if not surface_nv12.Empty(): 109 | surface_yuv420 = nvCvt.Execute(surface_nv12, cc_ctx) 110 | if surface_yuv420.Empty(): 111 | break 112 | if not nvDwn.DownloadSingleSurface(surface_yuv420, rawFrame): 113 | break 114 | 115 | bits = bytearray(rawFrame) 116 | decFile.write(bits) 117 | 118 | # Now we flush decoder to emtpy decoded frames queue. 119 | while True: 120 | surface_nv12 = nvDec.FlushSingleSurface() 121 | if surface_nv12.Empty(): 122 | break 123 | surface_yuv420 = nvCvt.Execute(surface_nv12, cc_ctx) 124 | if surface_yuv420.Empty(): 125 | break 126 | if not nvDwn.DownloadSingleSurface(surface_yuv420, rawFrame): 127 | break 128 | bits = bytearray(rawFrame) 129 | decFile.write(bits) 130 | 131 | 132 | if __name__ == "__main__": 133 | 134 | print("This sample decodes input video to raw YUV420 file on given GPU.") 135 | print("Usage: SampleDecode.py $gpu_id $input_file $output_file.") 136 | 137 | if len(sys.argv) < 4: 138 | print("Provide gpu ID, path to input and output files") 139 | exit(1) 140 | 141 | gpuID = int(sys.argv[1]) 142 | encFilePath = sys.argv[2] 143 | decFilePath = sys.argv[3] 144 | 145 | decode(gpuID, encFilePath, decFilePath) 146 | -------------------------------------------------------------------------------- /samples/SampleEncode.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2019 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Starting from Python 3.8 DLL search policy has changed. 18 | # We need to add path to CUDA DLLs explicitly. 19 | import numpy as np 20 | from enum import Enum 21 | import PyNvCodec as nvc 22 | import sys 23 | import os 24 | import logging 25 | import argparse 26 | import pathlib 27 | 28 | logger = logging.getLogger(__file__) 29 | 30 | if os.name == "nt": 31 | # Add CUDA_PATH env variable 32 | cuda_path = os.environ["CUDA_PATH"] 33 | if cuda_path: 34 | os.add_dll_directory(cuda_path) 35 | else: 36 | logger.error("CUDA_PATH environment variable is not set.") 37 | logger.error("Can't set CUDA DLLs search path.") 38 | exit(1) 39 | 40 | # Add PATH as well for minor CUDA releases 41 | sys_path = os.environ["PATH"] 42 | if sys_path: 43 | paths = sys_path.split(";") 44 | for path in paths: 45 | if os.path.isdir(path): 46 | os.add_dll_directory(path) 47 | else: 48 | logger.error("PATH environment variable is not set.") 49 | exit(1) 50 | 51 | total_num_frames = 444 52 | 53 | 54 | def encode(gpuID, decFilePath, encFilePath, width, height, codec, format): 55 | decFile = open(decFilePath, "rb") 56 | encFile = open(encFilePath, "wb") 57 | res = str(width) + "x" + str(height) 58 | 59 | pixel_format = nvc.PixelFormat.NV12 60 | profile = "high" 61 | if format == 'yuv444': 62 | pixel_format = nvc.PixelFormat.YUV444 63 | profile = "high_444" 64 | elif format == 'yuv444_10bit': 65 | pixel_format = nvc.PixelFormat.YUV444_10bit 66 | profile = "high_444_10bit" 67 | elif format == 'yuv420_10bit': 68 | pixel_format = nvc.PixelFormat.YUV420_10bit 69 | profile = "high_420_10bit" 70 | nvEnc = nvc.PyNvEncoder( 71 | { 72 | "preset": "P5", 73 | "tuning_info": "high_quality", 74 | "codec": codec, 75 | "profile": profile, 76 | "s": res, 77 | "bitrate": "10M", 78 | }, 79 | gpuID, 80 | pixel_format 81 | ) 82 | 83 | frameSize = nvEnc.GetFrameSizeInBytes() 84 | encFrame = np.ndarray(shape=(0), dtype=np.uint8) 85 | 86 | # Number of frames we've sent to encoder 87 | framesSent = 0 88 | # Number of frames we've received from encoder 89 | framesReceived = 0 90 | # Number of frames we've got from encoder during flush. 91 | # This number is included in number of received frames. 92 | # We use separate counter to check if encoder receives packets one by one 93 | # during flush. 94 | framesFlushed = 0 95 | 96 | while framesSent < total_num_frames: 97 | rawFrame = np.fromfile(decFile, np.uint8, count=frameSize) 98 | if not (rawFrame.size): 99 | print("No more input frames") 100 | break 101 | 102 | success = nvEnc.EncodeSingleFrame(rawFrame, encFrame, sync=False) 103 | framesSent += 1 104 | 105 | if success: 106 | encByteArray = bytearray(encFrame) 107 | encFile.write(encByteArray) 108 | framesReceived += 1 109 | 110 | # Encoder is asynchronous, so we need to flush it 111 | while True: 112 | success = nvEnc.FlushSinglePacket(encFrame) 113 | if (success) and (framesReceived < total_num_frames): 114 | encByteArray = bytearray(encFrame) 115 | encFile.write(encByteArray) 116 | framesReceived += 1 117 | framesFlushed += 1 118 | else: 119 | break 120 | 121 | print( 122 | framesReceived, 123 | "/", 124 | total_num_frames, 125 | " frames encoded and written to output file.", 126 | ) 127 | print(framesFlushed, " frame(s) received during encoder flush.") 128 | 129 | 130 | 131 | 132 | if __name__ == "__main__": 133 | 134 | 135 | 136 | parser = argparse.ArgumentParser( 137 | "This sample encodes first " + str(total_num_frames) + "frames of input raw NV12 file to H.264 video on given " 138 | "GPU." + "\n " 139 | + "It reconfigures encoder on-the fly to illustrate bitrate change," 140 | + " IDR frame force and encoder reset.\n" 141 | ) 142 | parser.add_argument( 143 | "gpu_id", 144 | type=int, 145 | help="GPU id, check nvidia-smi", 146 | ) 147 | parser.add_argument( 148 | "raw_file_path", 149 | type=pathlib.Path, 150 | help="raw video file (read from)", 151 | ) 152 | 153 | parser.add_argument( 154 | "encoded_file_path", 155 | type=pathlib.Path, 156 | help="encoded video file (write to)", 157 | ) 158 | 159 | parser.add_argument( 160 | "width", 161 | type=int, 162 | help="width", 163 | ) 164 | 165 | parser.add_argument( 166 | "height", 167 | type=int, 168 | help="height", 169 | ) 170 | 171 | parser.add_argument( 172 | "codec", 173 | type=str, 174 | nargs='?', 175 | default="h264", 176 | help="supported codec", 177 | ) 178 | 179 | parser.add_argument( 180 | "surfaceformat", 181 | type=str, 182 | nargs='?', 183 | default="nv12", 184 | help="supported format", 185 | ) 186 | 187 | args = parser.parse_args() 188 | 189 | gpuID = args.gpu_id 190 | decFilePath = args.raw_file_path 191 | encFilePath = args.encoded_file_path 192 | width = args.width 193 | height = args.height 194 | codec = args.codec 195 | surfaceformat = args.surfaceformat 196 | 197 | encode(gpuID, decFilePath, encFilePath, width, height, codec, surfaceformat) 198 | 199 | exit(0) 200 | -------------------------------------------------------------------------------- /samples/SampleEncodeMultiThread.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2020 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Starting from Python 3.8 DLL search policy has changed. 18 | # We need to add path to CUDA DLLs explicitly. 19 | import sys 20 | import os 21 | 22 | if os.name == "nt": 23 | # Add CUDA_PATH env variable 24 | cuda_path = os.environ["CUDA_PATH"] 25 | if cuda_path: 26 | os.add_dll_directory(cuda_path) 27 | else: 28 | print("CUDA_PATH environment variable is not set.", file=sys.stderr) 29 | print("Can't set CUDA DLLs search path.", file=sys.stderr) 30 | exit(1) 31 | 32 | # Add PATH as well for minor CUDA releases 33 | sys_path = os.environ["PATH"] 34 | if sys_path: 35 | paths = sys_path.split(";") 36 | for path in paths: 37 | if os.path.isdir(path): 38 | os.add_dll_directory(path) 39 | else: 40 | print("PATH environment variable is not set.", file=sys.stderr) 41 | exit(1) 42 | 43 | import pycuda.driver as cuda 44 | import PyNvCodec as nvc 45 | import numpy as np 46 | 47 | from threading import Thread 48 | 49 | 50 | class Worker(Thread): 51 | def __init__(self, gpuID: int, width: int, height: int, rawFilePath: str): 52 | Thread.__init__(self) 53 | 54 | res = str(width) + "x" + str(height) 55 | 56 | # Retain primary CUDA device context and create separate stream per thread. 57 | self.ctx = cuda.Device(gpuID).retain_primary_context() 58 | self.ctx.push() 59 | self.str = cuda.Stream() 60 | self.ctx.pop() 61 | 62 | # Initialize color conversion context. 63 | # Accurate color rendition doesn't matter in this sample so just use 64 | # most common bt601 and mpeg. 65 | self.cc_ctx = nvc.ColorspaceConversionContext( 66 | color_space=nvc.ColorSpace.BT_601, color_range=nvc.ColorRange.MPEG 67 | ) 68 | 69 | self.nvUpl = nvc.PyFrameUploader( 70 | width, height, nvc.PixelFormat.YUV420, self.ctx.handle, self.str.handle 71 | ) 72 | 73 | self.nvCvt = nvc.PySurfaceConverter( 74 | width, 75 | height, 76 | nvc.PixelFormat.YUV420, 77 | nvc.PixelFormat.NV12, 78 | self.ctx.handle, 79 | self.str.handle, 80 | ) 81 | 82 | self.nvEnc = nvc.PyNvEncoder( 83 | {"preset": "P1", "codec": "h264", "s": res}, 84 | self.ctx.handle, 85 | self.str.handle, 86 | ) 87 | 88 | self.rawFile = open(rawFilePath, "rb") 89 | 90 | self.encFrame = np.ndarray(shape=(0), dtype=np.uint8) 91 | 92 | def run(self): 93 | try: 94 | while True: 95 | frameSize = self.nvEnc.Width() * self.nvEnc.Height() * 3 / 2 96 | rawFrame = np.fromfile(self.rawFile, np.uint8, count=int(frameSize)) 97 | if not (rawFrame.size): 98 | print("No more video frames.") 99 | break 100 | 101 | rawSurface = self.nvUpl.UploadSingleFrame(rawFrame) 102 | if rawSurface.Empty(): 103 | print("Failed to upload video frame to GPU.") 104 | break 105 | 106 | cvtSurface = self.nvCvt.Execute(rawSurface, self.cc_ctx) 107 | if cvtSurface.Empty(): 108 | print("Failed to do color conversion.") 109 | break 110 | 111 | self.nvEnc.EncodeSingleSurface(cvtSurface, self.encFrame) 112 | 113 | # Encoder is asynchronous, so we need to flush it 114 | success = self.nvEnc.Flush(self.encFrame) 115 | 116 | except Exception as e: 117 | print(getattr(e, "message", str(e))) 118 | 119 | 120 | def create_threads(gpu_id: int, width: int, height: int, input: str, num_threads: int): 121 | cuda.init() 122 | 123 | thread_pool = [] 124 | for i in range(0, num_threads): 125 | thread = Worker(gpu_id, width, height, input) 126 | thread.start() 127 | thread_pool.append(thread) 128 | 129 | for thread in thread_pool: 130 | thread.join() 131 | 132 | 133 | if __name__ == "__main__": 134 | print("This sample encodes multiple videos simultaneously from same YUV file.") 135 | print("Usage: SampleDecode.py $gpu_id $width $height $input_file $num_threads") 136 | 137 | if len(sys.argv) < 6: 138 | print("Provide input CLI arguments as shown above") 139 | exit(1) 140 | 141 | gpu_id = int(sys.argv[1]) 142 | width = int(sys.argv[2]) 143 | height = int(sys.argv[3]) 144 | input = sys.argv[4] 145 | num_threads = int(sys.argv[5]) 146 | 147 | create_threads(gpu_id, width, height, input, num_threads) 148 | -------------------------------------------------------------------------------- /samples/SamplePyTorch.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2021 Kognia Sports Intelligence 3 | # Copyright 2021 NVIDIA Corporation 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Starting from Python 3.8 DLL search policy has changed. 19 | # We need to add path to CUDA DLLs explicitly. 20 | import sys 21 | import os 22 | 23 | if os.name == "nt": 24 | # Add CUDA_PATH env variable 25 | cuda_path = os.environ["CUDA_PATH"] 26 | if cuda_path: 27 | os.add_dll_directory(cuda_path) 28 | else: 29 | print("CUDA_PATH environment variable is not set.", file=sys.stderr) 30 | print("Can't set CUDA DLLs search path.", file=sys.stderr) 31 | exit(1) 32 | 33 | # Add PATH as well for minor CUDA releases 34 | sys_path = os.environ["PATH"] 35 | if sys_path: 36 | paths = sys_path.split(";") 37 | for path in paths: 38 | if os.path.isdir(path): 39 | os.add_dll_directory(path) 40 | else: 41 | print("PATH environment variable is not set.", file=sys.stderr) 42 | exit(1) 43 | 44 | import torch 45 | import torchvision.transforms as T 46 | import PyNvCodec as nvc 47 | try: 48 | import PytorchNvCodec as pnvc 49 | except ImportError as err: 50 | raise (f"""Could not import `PytorchNvCodec`: {err}. 51 | Please make sure it is installed! Run 52 | `pip install git+https://github.com/NVIDIA/VideoProcessingFramework#subdirectory=src/PytorchNvCodec` or 53 | `pip install src/PytorchNvCodec` if using a local copy of the VideoProcessingFramework repository""") # noqa 54 | 55 | import numpy as np 56 | 57 | 58 | class cconverter: 59 | """ 60 | Colorspace conversion chain. 61 | """ 62 | 63 | def __init__(self, width: int, height: int, gpu_id: int): 64 | self.gpu_id = gpu_id 65 | self.w = width 66 | self.h = height 67 | self.chain = [] 68 | 69 | def add(self, src_fmt: nvc.PixelFormat, dst_fmt: nvc.PixelFormat) -> None: 70 | self.chain.append( 71 | nvc.PySurfaceConverter(self.w, self.h, src_fmt, dst_fmt, self.gpu_id) 72 | ) 73 | 74 | def run(self, src_surface: nvc.Surface) -> nvc.Surface: 75 | surf = src_surface 76 | cc = nvc.ColorspaceConversionContext(nvc.ColorSpace.BT_601, nvc.ColorRange.MPEG) 77 | 78 | for cvt in self.chain: 79 | surf = cvt.Execute(surf, cc) 80 | if surf.Empty(): 81 | raise RuntimeError("Failed to perform color conversion") 82 | 83 | return surf.Clone(self.gpu_id) 84 | 85 | 86 | def surface_to_tensor(surface: nvc.Surface) -> torch.Tensor: 87 | """ 88 | Converts planar rgb surface to cuda float tensor. 89 | """ 90 | if surface.Format() != nvc.PixelFormat.RGB_PLANAR: 91 | raise RuntimeError("Surface shall be of RGB_PLANAR pixel format") 92 | 93 | surf_plane = surface.PlanePtr() 94 | img_tensor = pnvc.DptrToTensor( 95 | surf_plane.GpuMem(), 96 | surf_plane.Width(), 97 | surf_plane.Height(), 98 | surf_plane.Pitch(), 99 | surf_plane.ElemSize(), 100 | ) 101 | if img_tensor is None: 102 | raise RuntimeError("Can not export to tensor.") 103 | 104 | img_tensor.resize_(3, int(surf_plane.Height() / 3), surf_plane.Width()) 105 | img_tensor = img_tensor.type(dtype=torch.cuda.FloatTensor) 106 | img_tensor = torch.divide(img_tensor, 255.0) 107 | img_tensor = torch.clamp(img_tensor, 0.0, 1.0) 108 | 109 | return img_tensor 110 | 111 | 112 | def tensor_to_surface(img_tensor: torch.tensor, gpu_id: int) -> nvc.Surface: 113 | """ 114 | Converts cuda float tensor to planar rgb surface. 115 | """ 116 | if len(img_tensor.shape) != 3 and img_tensor.shape[0] != 3: 117 | raise RuntimeError("Shape of the tensor must be (3, height, width)") 118 | 119 | tensor_w, tensor_h = img_tensor.shape[2], img_tensor.shape[1] 120 | img = torch.clamp(img_tensor, 0.0, 1.0) 121 | img = torch.multiply(img, 255.0) 122 | img = img.type(dtype=torch.cuda.ByteTensor) 123 | 124 | surface = nvc.Surface.Make(nvc.PixelFormat.RGB_PLANAR, tensor_w, tensor_h, gpu_id) 125 | surf_plane = surface.PlanePtr() 126 | pnvc.TensorToDptr( 127 | img, 128 | surf_plane.GpuMem(), 129 | surf_plane.Width(), 130 | surf_plane.Height(), 131 | surf_plane.Pitch(), 132 | surf_plane.ElemSize(), 133 | ) 134 | 135 | return surface 136 | 137 | 138 | def main(gpu_id, encFilePath, dstFilePath): 139 | dstFile = open(dstFilePath, "wb") 140 | nvDec = nvc.PyNvDecoder(encFilePath, gpu_id) 141 | 142 | w = nvDec.Width() 143 | h = nvDec.Height() 144 | res = str(w) + "x" + str(h) 145 | nvEnc = nvc.PyNvEncoder( 146 | {"preset": "P4", "codec": "h264", "s": res, "bitrate": "10M"}, gpu_id 147 | ) 148 | 149 | # Surface converters 150 | to_rgb = cconverter(w, h, gpu_id) 151 | to_rgb.add(nvc.PixelFormat.NV12, nvc.PixelFormat.YUV420) 152 | to_rgb.add(nvc.PixelFormat.YUV420, nvc.PixelFormat.RGB) 153 | to_rgb.add(nvc.PixelFormat.RGB, nvc.PixelFormat.RGB_PLANAR) 154 | 155 | to_nv12 = cconverter(w, h, gpu_id) 156 | to_nv12.add(nvc.PixelFormat.RGB_PLANAR, nvc.PixelFormat.RGB) 157 | to_nv12.add(nvc.PixelFormat.RGB, nvc.PixelFormat.YUV420) 158 | to_nv12.add(nvc.PixelFormat.YUV420, nvc.PixelFormat.NV12) 159 | 160 | # Encoded video frame 161 | encFrame = np.ndarray(shape=(0), dtype=np.uint8) 162 | 163 | while True: 164 | # Decode NV12 surface 165 | src_surface = nvDec.DecodeSingleSurface() 166 | if src_surface.Empty(): 167 | break 168 | 169 | # Convert to planar RGB 170 | rgb_pln = to_rgb.run(src_surface) 171 | if rgb_pln.Empty(): 172 | break 173 | 174 | # PROCESS YOUR TENSOR HERE. 175 | # THIS DUMMY PROCESSING JUST ADDS RANDOM ROTATION. 176 | src_tensor = surface_to_tensor(rgb_pln) 177 | dst_tensor = T.RandomRotation(degrees=(-1, 1))(src_tensor) 178 | surface_rgb = tensor_to_surface(dst_tensor, gpu_id) 179 | 180 | # Convert back to NV12 181 | dst_surface = to_nv12.run(surface_rgb) 182 | if src_surface.Empty(): 183 | break 184 | 185 | # Encode 186 | success = nvEnc.EncodeSingleSurface(dst_surface, encFrame) 187 | if success: 188 | byteArray = bytearray(encFrame) 189 | dstFile.write(byteArray) 190 | 191 | # Encoder is asynchronous, so we need to flush it 192 | while True: 193 | success = nvEnc.FlushSinglePacket(encFrame) 194 | if success: 195 | byteArray = bytearray(encFrame) 196 | dstFile.write(byteArray) 197 | else: 198 | break 199 | 200 | 201 | if __name__ == "__main__": 202 | 203 | print("This sample transcode and process with pytorch an input video on given GPU.") 204 | print("Usage: SamplePyTorch.py $gpu_id $input_file $output_file.") 205 | 206 | if len(sys.argv) < 4: 207 | print("Provide gpu ID, path to input and output files") 208 | exit(1) 209 | 210 | gpu_id = int(sys.argv[1]) 211 | encFilePath = sys.argv[2] 212 | decFilePath = sys.argv[3] 213 | main(gpu_id, encFilePath, decFilePath) 214 | -------------------------------------------------------------------------------- /samples/SampleRemap.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2020 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Starting from Python 3.8 DLL search policy has changed. 18 | # We need to add path to CUDA DLLs explicitly. 19 | import sys 20 | import os 21 | 22 | if os.name == "nt": 23 | # Add CUDA_PATH env variable 24 | cuda_path = os.environ["CUDA_PATH"] 25 | if cuda_path: 26 | os.add_dll_directory(cuda_path) 27 | else: 28 | print("CUDA_PATH environment variable is not set.", file=sys.stderr) 29 | print("Can't set CUDA DLLs search path.", file=sys.stderr) 30 | exit(1) 31 | 32 | # Add PATH as well for minor CUDA releases 33 | sys_path = os.environ["PATH"] 34 | if sys_path: 35 | paths = sys_path.split(";") 36 | for path in paths: 37 | if os.path.isdir(path): 38 | os.add_dll_directory(path) 39 | else: 40 | print("PATH environment variable is not set.", file=sys.stderr) 41 | exit(1) 42 | 43 | import PyNvCodec as nvc 44 | import numpy as np 45 | import cv2 46 | 47 | 48 | def load_remap(remap_file): 49 | remap_x, remap_y = np.load(remap_file, allow_pickle=True).values() 50 | if remap_x.shape != remap_y.shape: 51 | raise ValueError( 52 | "remap_x.shape != remap_y.shape: ", remap_x.shape, " != ", remap_y.shape 53 | ) 54 | 55 | if not remap_x.flags["C_CONTIGUOUS"]: 56 | remap_x = np.ascontiguousarray(remap_x, dtype=remap_x.dtype) 57 | if not remap_y.flags["C_CONTIGUOUS"]: 58 | remap_y = np.ascontiguousarray(remap_y, dtype=remap_y.dtype) 59 | 60 | print("----> load remap_x: ", remap_x.shape, remap_x.dtype, remap_x.strides) 61 | print("----> load remap_y: ", remap_y.shape, remap_y.dtype, remap_y.strides) 62 | return remap_x, remap_y 63 | 64 | 65 | total_num_frames = 4 66 | 67 | 68 | def decode(gpuID, encFilePath, remapFilePath): 69 | 70 | nvDec = nvc.PyNvDecoder(encFilePath, gpuID) 71 | w = nvDec.Width() 72 | h = nvDec.Height() 73 | 74 | to_rgb = nvc.PySurfaceConverter( 75 | w, h, nvc.PixelFormat.NV12, nvc.PixelFormat.RGB, gpuID 76 | ) 77 | cc1 = nvc.ColorspaceConversionContext(nvc.ColorSpace.BT_709, nvc.ColorRange.JPEG) 78 | 79 | # init remaper 80 | remap_x, remap_y = load_remap(remapFilePath) 81 | remap_h, remap_w = remap_x.shape 82 | nv_remap = nvc.PySurfaceRemaper(remap_x, remap_y, nvc.PixelFormat.RGB, gpuID) 83 | 84 | nv_dwn = nvc.PySurfaceDownloader(remap_w, remap_h, nvc.PixelFormat.RGB, gpuID) 85 | 86 | dec_frame = 0 87 | while dec_frame < total_num_frames: 88 | rawSurface = nvDec.DecodeSingleSurface() 89 | if rawSurface.Empty(): 90 | print("DecodeSingleSurface Failed.") 91 | break 92 | rgb24_origin = to_rgb.Execute(rawSurface, cc1) 93 | if rgb24_origin.Empty(): 94 | print("Convert to rgb Failed.") 95 | break 96 | rgb24_remap = nv_remap.Execute(rgb24_origin) 97 | if rgb24_remap.Empty(): 98 | print("Remap Failed.") 99 | break 100 | rawFrameRGB = np.ndarray(shape=(remap_h, remap_w, 3), dtype=np.uint8) 101 | if not nv_dwn.DownloadSingleSurface(rgb24_remap, rawFrameRGB): 102 | print("DownloadSingleSurface Failed.") 103 | break 104 | undistort_img = cv2.cvtColor(rawFrameRGB, cv2.COLOR_RGB2BGR) 105 | print("dump image shape: ", undistort_img.shape) 106 | cv2.imwrite("%s.jpg" % dec_frame, undistort_img) 107 | dec_frame += 1 108 | 109 | 110 | if __name__ == "__main__": 111 | 112 | print( 113 | "This sample decodes first ", 114 | total_num_frames, 115 | " frames from input video and undistort them.", 116 | ) 117 | print("Usage: SampleRemap.py $gpu_id $input_file $remap_npz_file") 118 | 119 | if len(sys.argv) < 4: 120 | print("Provide gpu_id, path to input, path to remap file") 121 | exit(1) 122 | 123 | gpu_id = int(sys.argv[1]) 124 | encFilePath = sys.argv[2] 125 | remapFilePath = sys.argv[3] 126 | 127 | decode(gpu_id, encFilePath, remapFilePath) 128 | -------------------------------------------------------------------------------- /samples/SampleTypeConversionTest.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2019 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Starting from Python 3.8 DLL search policy has changed. 18 | # We need to add path to CUDA DLLs explicitly. 19 | import sys 20 | import os 21 | 22 | if os.name == 'nt': 23 | # Add CUDA_PATH env variable 24 | cuda_path = os.environ["CUDA_PATH"] 25 | if cuda_path: 26 | os.add_dll_directory(cuda_path) 27 | else: 28 | print("CUDA_PATH environment variable is not set.", file = sys.stderr) 29 | print("Can't set CUDA DLLs search path.", file = sys.stderr) 30 | exit(1) 31 | 32 | # Add PATH as well for minor CUDA releases 33 | sys_path = os.environ["PATH"] 34 | if sys_path: 35 | paths = sys_path.split(';') 36 | for path in paths: 37 | if os.path.isdir(path): 38 | os.add_dll_directory(path) 39 | else: 40 | print("PATH environment variable is not set.", file = sys.stderr) 41 | exit(1) 42 | 43 | import pycuda.driver as cuda 44 | import PyNvCodec as nvc 45 | import numpy as np 46 | import torch 47 | import nvcv 48 | 49 | def decode(gpuID, encFilePath, decFilePath): 50 | cuda.init() 51 | cuda_ctx = cuda.Device(gpuID).retain_primary_context() 52 | cuda_ctx.push() 53 | cuda_str = cuda.Stream() 54 | cuda_ctx.pop() 55 | 56 | decFile = open(decFilePath, "wb") 57 | 58 | nvDmx = nvc.PyFFmpegDemuxer(encFilePath) 59 | nvDec = nvc.PyNvDecoder(nvDmx.Width(), nvDmx.Height(), nvDmx.Format(), nvDmx.Codec(), cuda_ctx.handle, cuda_str.handle) 60 | 61 | nvDwn = nvc.PySurfaceDownloader(nvDmx.Width(), nvDmx.Height(), nvDmx.Format(), cuda_ctx.handle, cuda_str.handle) 62 | 63 | res = str(nvDmx.Width()) + 'x' + str(nvDmx.Height()) 64 | 65 | nvEnc = nvc.PyNvEncoder({'preset': 'P5', 'tuning_info': 'high_quality', 'codec': 'h264', 66 | 'profile': 'high', 's': res, 'bitrate': '10M'}, cuda_ctx.handle, cuda_str.handle) 67 | 68 | packet = np.ndarray(shape=(0), dtype=np.uint8) 69 | frameSize = int(nvDmx.Width() * nvDmx.Height() * 3 / 2) 70 | rawFrame = np.ndarray(shape=(frameSize), dtype=np.uint8) 71 | pdata_in, pdata_out = nvc.PacketData(), nvc.PacketData() 72 | 73 | encFrame = np.ndarray(shape=(0), dtype=np.uint8) 74 | 75 | # Determine colorspace conversion parameters. 76 | # Some video streams don't specify these parameters so default values 77 | # are most widespread bt601 and mpeg. 78 | cspace, crange = nvDmx.ColorSpace(), nvDmx.ColorRange() 79 | if nvc.ColorSpace.UNSPEC == cspace: 80 | cspace = nvc.ColorSpace.BT_601 81 | if nvc.ColorRange.UDEF == crange: 82 | crange = nvc.ColorRange.MPEG 83 | cc_ctx = nvc.ColorspaceConversionContext(cspace, crange) 84 | print('Color space: ', str(cspace)) 85 | print('Color range: ', str(crange)) 86 | 87 | while True: 88 | # Demuxer has sync design, it returns packet every time it's called. 89 | # If demuxer can't return packet it usually means EOF. 90 | torch.cuda.nvtx.range_push("DemuxSinglePacket") 91 | success = nvDmx.DemuxSinglePacket(packet) 92 | torch.cuda.nvtx.range_pop() 93 | if not success: 94 | break 95 | 96 | 97 | # Get last packet data to obtain frame timestamp 98 | torch.cuda.nvtx.range_push("LastPacketData") 99 | nvDmx.LastPacketData(pdata_in) 100 | torch.cuda.nvtx.range_pop() 101 | 102 | # Decoder is async by design. 103 | # As it consumes packets from demuxer one at a time it may not return 104 | # decoded surface every time the decoding function is called. 105 | torch.cuda.nvtx.range_push("nvDec.DecodeSurfaceFromPacket") 106 | 107 | surface_nv12 = nvDec.DecodeSurfaceFromPacket(pdata_in, packet, pdata_out ,True) 108 | torch.cuda.nvtx.range_pop() 109 | 110 | if surface_nv12.width == 0 and surface_nv12.height == 0: 111 | continue 112 | 113 | torch.cuda.nvtx.range_push("nvEnc.EncodeSingleSurface") 114 | success = nvEnc.EncodeFromNVCVImage(surface_nv12, encFrame) 115 | torch.cuda.nvtx.range_pop() 116 | 117 | if (success): 118 | bits = bytearray(encFrame) 119 | decFile.write(bits) 120 | print("Interop test successfull") 121 | break; 122 | 123 | 124 | 125 | if __name__ == "__main__": 126 | 127 | print("This sample decodes input video to raw YUV420 file on given GPU.") 128 | print("Usage: SampleDecode.py $gpu_id $input_file $output_file.") 129 | 130 | if(len(sys.argv) < 4): 131 | print("Provide gpu ID, path to input and output files") 132 | exit(1) 133 | 134 | gpuID = int(sys.argv[1]) 135 | encFilePath = sys.argv[2] 136 | decFilePath = sys.argv[3] 137 | 138 | decode(gpuID, encFilePath, decFilePath) 139 | -------------------------------------------------------------------------------- /samples/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import logging 3 | 4 | SERVICE_LOGGING_FORMAT = ( 5 | "[{filename:s}][{funcName:s}:{lineno:d}]" + "[{levelname:s}] {message:s}" 6 | ) 7 | SERVICE_LOGGING_STREAM = sys.stdout 8 | 9 | 10 | def get_logger(logger_name, log_level="info"): 11 | 12 | SERVICE_LOGGING_LEVEL = getattr(logging, log_level.upper(), None) 13 | 14 | logger = logging.getLogger(logger_name) 15 | logger.setLevel(SERVICE_LOGGING_LEVEL) 16 | ch = logging.StreamHandler(SERVICE_LOGGING_STREAM) 17 | formatter = logging.Formatter(SERVICE_LOGGING_FORMAT, style="{") 18 | ch.setFormatter(formatter) 19 | ch.setLevel(SERVICE_LOGGING_LEVEL) 20 | logger.addHandler(ch) 21 | logger.propagate = False 22 | 23 | return logger 24 | 25 | 26 | logger = get_logger(__file__) 27 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | 5 | import sys 6 | import os 7 | 8 | from pkg_resources import VersionConflict, require 9 | 10 | try: 11 | require("setuptools>=42") 12 | except VersionConflict: 13 | print("Error: version of setuptools is too old (<42)!") 14 | sys.exit(1) 15 | 16 | 17 | if __name__ == "__main__": 18 | import skbuild 19 | 20 | PytorchNvCodec = "PytorchNvCodec @ git+https://github.com/NVIDIA/VideoProcessingFramework.git#subdirectory=src/PytorchNvCodec/" 21 | skbuild.setup( 22 | name="PyNvCodec", 23 | version="2.0", 24 | description="Video Processing Library with full NVENC/NVDEC hardware acceleration", 25 | author="NVIDIA", 26 | license="Apache 2.0", 27 | install_requires=["numpy"], 28 | extras_require={ 29 | # , "PyOpenGL-accelerate" # does not compile on 3.10 30 | "dev": ["pycuda", "pyopengl", "torch", "torchvision", "opencv-python", "onnx", "tensorrt", f"PytorchNvCodec @ file://{os.getcwd()}/src/PytorchNvCodec/"], 31 | "samples": ["pycuda", "pyopengl", "torch", "torchvision", "opencv-python", "onnx", "tensorrt", "tqdm", PytorchNvCodec], 32 | "tests": ["pycuda", "pyopengl", "torch", "torchvision", "opencv-python", PytorchNvCodec], 33 | "torch": ["torch", "torchvision", PytorchNvCodec], 34 | "tensorrt": ["torch", "torchvision", PytorchNvCodec], 35 | }, 36 | dependency_links=[ 37 | "https://pypi.ngc.nvidia.com" 38 | ], 39 | packages=["PyNvCodec"], 40 | package_data={"PyNvCodec": ["__init__.pyi"]}, 41 | package_dir={"": "src"}, 42 | cmake_install_dir="src", 43 | ) 44 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.21) 2 | 3 | project(Video_Processing_Framework) 4 | 5 | option(TRACK_TOKEN_ALLOCATIONS "Debug memory allocations within VPF" FALSE ) 6 | 7 | if(TRACK_TOKEN_ALLOCATIONS) 8 | add_definitions(-DTRACK_TOKEN_ALLOCATIONS) 9 | endif(TRACK_TOKEN_ALLOCATIONS) 10 | 11 | #Undef MIN & MAX & C runtime warnings for Windows 12 | if(WIN32) 13 | add_definitions(-DNOMINMAX) 14 | add_definitions(-D_CRT_SECURE_NO_WARNINGS) 15 | endif(WIN32) 16 | 17 | find_package(CUDAToolkit 11.2 REQUIRED) 18 | 19 | find_path( 20 | TC_FFMPEG_ROOT_HINT 21 | NAMES "include/libavcodec/avcodec.h" 22 | ) 23 | set(TC_FFMPEG_ROOT "${TC_FFMPEG_ROOT_HINT}" CACHE PATH "Where to find ffmpeg" PARENT_SCOPE) 24 | 25 | add_subdirectory(PyNvCodec) 26 | # TODO: rename TC to NvCodec or something more descriptive 27 | add_subdirectory(TC) 28 | #add_subdirectory(PytorchNvCodec) 29 | -------------------------------------------------------------------------------- /src/PyNvCodec/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2019 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | cmake_minimum_required(VERSION 3.21) 18 | 19 | project(PyNvCodec VERSION 1.0) 20 | 21 | set(PYNVCODEC_VERSION_MAJOR 1) 22 | set(PYNVCODEC_VERSION_MINOR 0) 23 | 24 | if(POLICY CMP0135) 25 | # https://cmake.org/cmake/help/latest/policy/CMP0135.html 26 | # From docs: 27 | # CMake 3.24 and above prefers to set the timestamps of all extracted contents to the time of the extraction. 28 | # This ensures that anything that depends on the extracted contents will be rebuilt whenever the URL changes. 29 | cmake_policy(SET CMP0135 NEW) 30 | endif() 31 | 32 | configure_file("inc/Version.hpp.in" "pynvcode_version.h") 33 | 34 | find_package(Python3 3.6 REQUIRED COMPONENTS Interpreter Development) 35 | 36 | option(FETCHCONTENT_QUIET OFF) 37 | include(FetchContent) 38 | fetchcontent_declare( 39 | pybind11 40 | URL https://github.com/pybind/pybind11/archive/v2.10.0.tar.gz 41 | URL_HASH 42 | SHA256=eacf582fa8f696227988d08cfc46121770823839fe9e301a20fbce67e7cd70ec 43 | ) 44 | fetchcontent_makeavailable(pybind11) 45 | 46 | pybind11_add_module(_PyNvCodec MODULE 47 | src/PyBufferUploader.cpp 48 | src/PyCudaBufferDownloader.cpp 49 | src/PyFFMpegDecoder.cpp 50 | src/PyFFMpegDemuxer.cpp 51 | src/PyFrameUploader.cpp 52 | src/PyNvCodec.cpp 53 | src/PyNvDecoder.cpp 54 | src/PyNvEncoder.cpp 55 | src/PySurface.cpp 56 | src/PySurfaceConverter.cpp 57 | src/PySurfaceDownloader.cpp 58 | src/PySurfaceRemaper.cpp 59 | src/PySurfaceResizer.cpp 60 | ) 61 | set_property(TARGET _PyNvCodec PROPERTY CXX_STANDARD 17) 62 | target_include_directories(_PyNvCodec PRIVATE inc) 63 | 64 | target_link_libraries(_PyNvCodec PUBLIC TC_CORE TC) 65 | 66 | include(GNUInstallDirs) 67 | # Install runtime dependencies (i.e. FFMPEG, nppi DLLS) on Windows but not Linux 68 | if(WIN32) 69 | message(STATUS "TC_FFMPEG_ROOT/bin/=${TC_FFMPEG_ROOT}/bin/") 70 | install(TARGETS _PyNvCodec 71 | RUNTIME_DEPENDENCIES DIRECTORIES "${TC_FFMPEG_ROOT}/bin/" "${TC_FFMPEG_ROOT}/lib/" ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_LIBRARY_DIR} 72 | PRE_EXCLUDE_REGEXES "api-ms-" "ext-ms-" "python" "nvcuda"# "nppi" 73 | POST_EXCLUDE_REGEXES ".*system32/.*\\.dll" 74 | RUNTIME DESTINATION PyNvCodec 75 | LIBRARY DESTINATION PyNvCodec 76 | ) 77 | else() 78 | install(TARGETS _PyNvCodec 79 | RUNTIME DESTINATION PyNvCodec 80 | LIBRARY DESTINATION PyNvCodec 81 | ) 82 | endif() 83 | -------------------------------------------------------------------------------- /src/PyNvCodec/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | 5 | __author__ = "NVIDIA" 6 | __copyright__ = "Copyright 2022, NVIDIA" 7 | __credits__ = [] 8 | __license__ = "Apache 2.0" 9 | __version__ = "0.1.0" 10 | __maintainer__ = "NVIDIA" 11 | __email__ = "TODO" 12 | __status__ = "Production" 13 | 14 | 15 | try: 16 | # Import native module 17 | from ._PyNvCodec import * # noqa 18 | except ImportError: 19 | import distutils.sysconfig 20 | from os.path import join, dirname 21 | raise RuntimeError("Failed to import native module _PyNvCodec! " 22 | f"Please check whether \"{join(dirname(__file__), '_PyNvCodec' + distutils.sysconfig.get_config_var('EXT_SUFFIX'))}\"" # noqa 23 | " exists and can find all library dependencies (CUDA, ffmpeg).\n" 24 | "On Unix systems, you can use `ldd` on the file to see whether it can find all dependencies.\n" 25 | "On Windows, you can use \"dumpbin /dependents\" in a Visual Studio command prompt or\n" 26 | "https://github.com/lucasg/Dependencies/releases.") 27 | -------------------------------------------------------------------------------- /src/PyNvCodec/inc/Version.hpp.in: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2019 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | #define PYNVCODEC_VERSION_MAJOR @PYNVCODEC_VERSION_MAJOR@ 18 | #define PYNVCODEC_VERSION_MINOR @PYNVCODEC_VERSION_MINOR@ -------------------------------------------------------------------------------- /src/PyNvCodec/src/PyBufferUploader.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 NVIDIA Corporation 3 | * Copyright 2021 Kognia Sports Intelligence 4 | * Copyright 2021 Videonetics Technology Private Limited 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "PyNvCodec.hpp" 18 | 19 | using namespace std; 20 | using namespace VPF; 21 | using namespace chrono; 22 | 23 | namespace py = pybind11; 24 | 25 | constexpr auto TASK_EXEC_SUCCESS = TaskExecStatus::TASK_EXEC_SUCCESS; 26 | constexpr auto TASK_EXEC_FAIL = TaskExecStatus::TASK_EXEC_FAIL; 27 | 28 | PyBufferUploader::PyBufferUploader(uint32_t elemSize, uint32_t numElems, 29 | uint32_t gpu_ID) 30 | { 31 | elem_size = elemSize; 32 | num_elems = numElems; 33 | 34 | uploader.reset(UploadBuffer::Make(CudaResMgr::Instance().GetStream(gpu_ID), 35 | CudaResMgr::Instance().GetCtx(gpu_ID), 36 | elem_size, num_elems)); 37 | } 38 | 39 | PyBufferUploader::PyBufferUploader(uint32_t elemSize, uint32_t numElems, 40 | CUcontext ctx, CUstream str) 41 | { 42 | elem_size = elemSize; 43 | num_elems = numElems; 44 | 45 | uploader.reset(UploadBuffer::Make(str, ctx, elem_size, num_elems)); 46 | } 47 | 48 | shared_ptr 49 | PyBufferUploader::UploadSingleBuffer(py::array_t& frame) 50 | { 51 | auto pRawBuf = Buffer::Make(frame.size(), frame.mutable_data()); 52 | uploader->SetInput(pRawBuf, 0U); 53 | auto res = uploader->Execute(); 54 | delete pRawBuf; 55 | 56 | if (TASK_EXEC_FAIL == res) 57 | throw runtime_error("Error uploading frame to GPU"); 58 | 59 | auto pCudaBuffer = (CudaBuffer*)uploader->GetOutput(0U); 60 | if (!pCudaBuffer) 61 | throw runtime_error("Error uploading frame to GPU"); 62 | 63 | return shared_ptr(pCudaBuffer->Clone()); 64 | } 65 | 66 | void Init_PyBufferUploader(py::module& m) 67 | { 68 | py::class_(m, "PyBufferUploader") 69 | .def(py::init(), py::arg("elem_size"), 70 | py::arg("num_elems"), py::arg("gpu_id"), 71 | R"pbdoc( 72 | Constructor method. 73 | 74 | :param elem_size: single buffer element size in bytes 75 | :param num_elems: number of elements in buffer 76 | :param gpu_id: GPU to use for memcopy 77 | )pbdoc") 78 | .def(py::init(), py::arg("elem_size"), 79 | py::arg("num_elems"), py::arg("context"), py::arg("stream"), 80 | R"pbdoc( 81 | Constructor method. 82 | 83 | :param elem_size: single buffer element size in bytes 84 | :param num_elems: number of elements in buffer 85 | :param context: CUDA context to use 86 | :param stream: CUDA stream to use 87 | )pbdoc") 88 | .def("UploadSingleBuffer", &PyBufferUploader::UploadSingleBuffer, 89 | py::return_value_policy::take_ownership, 90 | py::call_guard(), py::arg("array"), 91 | R"pbdoc( 92 | Perform HtoD memcopy. 93 | 94 | :param array: output numpy array 95 | :return: True in case of success, False otherwise 96 | )pbdoc"); 97 | } -------------------------------------------------------------------------------- /src/PyNvCodec/src/PyCudaBufferDownloader.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 NVIDIA Corporation 3 | * Copyright 2021 Kognia Sports Intelligence 4 | * Copyright 2021 Videonetics Technology Private Limited 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "PyNvCodec.hpp" 18 | 19 | using namespace std; 20 | using namespace VPF; 21 | using namespace chrono; 22 | 23 | namespace py = pybind11; 24 | 25 | constexpr auto TASK_EXEC_SUCCESS = TaskExecStatus::TASK_EXEC_SUCCESS; 26 | constexpr auto TASK_EXEC_FAIL = TaskExecStatus::TASK_EXEC_FAIL; 27 | 28 | PyCudaBufferDownloader::PyCudaBufferDownloader(uint32_t elemSize, 29 | uint32_t numElems, 30 | uint32_t gpu_ID) 31 | { 32 | elem_size = elemSize; 33 | num_elems = numElems; 34 | 35 | upDownloader.reset(DownloadCudaBuffer::Make( 36 | CudaResMgr::Instance().GetStream(gpu_ID), 37 | CudaResMgr::Instance().GetCtx(gpu_ID), elem_size, num_elems)); 38 | } 39 | 40 | PyCudaBufferDownloader::PyCudaBufferDownloader(uint32_t elemSize, 41 | uint32_t numElems, CUcontext ctx, 42 | CUstream str) 43 | { 44 | elem_size = elemSize; 45 | num_elems = numElems; 46 | 47 | upDownloader.reset(DownloadCudaBuffer::Make(str, ctx, elem_size, num_elems)); 48 | } 49 | 50 | bool PyCudaBufferDownloader::DownloadSingleCudaBuffer( 51 | std::shared_ptr buffer, py::array_t& np_array) 52 | { 53 | upDownloader->SetInput(buffer.get(), 0U); 54 | if (TASK_EXEC_FAIL == upDownloader->Execute()) { 55 | return false; 56 | } 57 | 58 | auto* pRawBuf = (Buffer*)upDownloader->GetOutput(0U); 59 | if (pRawBuf) { 60 | auto const downloadSize = pRawBuf->GetRawMemSize(); 61 | if (downloadSize != np_array.size()) { 62 | np_array.resize({downloadSize}, false); 63 | } 64 | 65 | memcpy(np_array.mutable_data(), pRawBuf->GetRawMemPtr(), downloadSize); 66 | return true; 67 | } 68 | 69 | return false; 70 | } 71 | 72 | void Init_PyCudaBufferDownloader(py::module& m) 73 | { 74 | py::class_(m, "PyCudaBufferDownloader") 75 | .def(py::init(), py::arg("elem_size"), 76 | py::arg("num_elems"), py::arg("gpu_id"), 77 | R"pbdoc( 78 | Constructor method. 79 | 80 | :param elem_size: single buffer element size in bytes 81 | :param num_elems: number of elements in buffer 82 | :param gpu_id: GPU to use for memcopy 83 | )pbdoc") 84 | .def(py::init(), py::arg("elem_size"), 85 | py::arg("num_elems"), py::arg("context"), py::arg("stream"), 86 | R"pbdoc( 87 | Constructor method. 88 | 89 | :param elem_size: single buffer element size in bytes 90 | :param num_elems: number of elements in buffer 91 | :param context: CUDA context to use 92 | :param stream: CUDA stream to use 93 | )pbdoc") 94 | .def("DownloadSingleCudaBuffer", 95 | &PyCudaBufferDownloader::DownloadSingleCudaBuffer, 96 | py::call_guard(), py::arg("buffer"), 97 | py::arg("array"), 98 | R"pbdoc( 99 | Perform DtoH memcopy. 100 | 101 | :param buffer: input CUDA buffer 102 | :param array: output numpy array 103 | :return: True in case of success, False otherwise 104 | )pbdoc"); 105 | } -------------------------------------------------------------------------------- /src/PyNvCodec/src/PyFrameUploader.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 NVIDIA Corporation 3 | * Copyright 2021 Kognia Sports Intelligence 4 | * Copyright 2021 Videonetics Technology Private Limited 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "PyNvCodec.hpp" 18 | 19 | using namespace std; 20 | using namespace VPF; 21 | using namespace chrono; 22 | 23 | namespace py = pybind11; 24 | 25 | constexpr auto TASK_EXEC_SUCCESS = TaskExecStatus::TASK_EXEC_SUCCESS; 26 | constexpr auto TASK_EXEC_FAIL = TaskExecStatus::TASK_EXEC_FAIL; 27 | 28 | PyFrameUploader::PyFrameUploader(uint32_t width, uint32_t height, 29 | Pixel_Format format, uint32_t gpu_ID) 30 | { 31 | surfaceWidth = width; 32 | surfaceHeight = height; 33 | surfaceFormat = format; 34 | 35 | uploader.reset(CudaUploadFrame::Make(CudaResMgr::Instance().GetStream(gpu_ID), 36 | CudaResMgr::Instance().GetCtx(gpu_ID), 37 | surfaceWidth, surfaceHeight, 38 | surfaceFormat)); 39 | } 40 | 41 | PyFrameUploader::PyFrameUploader(uint32_t width, uint32_t height, 42 | Pixel_Format format, CUcontext ctx, 43 | CUstream str) 44 | { 45 | surfaceWidth = width; 46 | surfaceHeight = height; 47 | surfaceFormat = format; 48 | 49 | uploader.reset(CudaUploadFrame::Make(str, ctx, surfaceWidth, surfaceHeight, 50 | surfaceFormat)); 51 | } 52 | 53 | Pixel_Format PyFrameUploader::GetFormat() { return surfaceFormat; } 54 | 55 | shared_ptr 56 | PyFrameUploader::UploadSingleFrame(py::array_t& frame) 57 | { 58 | /* Upload to GPU; 59 | */ 60 | auto pRawFrame = Buffer::Make(frame.size(), frame.mutable_data()); 61 | uploader->SetInput(pRawFrame, 0U); 62 | auto res = uploader->Execute(); 63 | delete pRawFrame; 64 | 65 | if (TASK_EXEC_FAIL == res) { 66 | throw runtime_error("Error uploading frame to GPU"); 67 | } 68 | 69 | /* Get surface; 70 | */ 71 | auto pSurface = (Surface*)uploader->GetOutput(0U); 72 | if (!pSurface) { 73 | throw runtime_error("Error uploading frame to GPU"); 74 | } 75 | 76 | return shared_ptr(pSurface->Clone()); 77 | } 78 | 79 | shared_ptr 80 | PyFrameUploader::UploadSingleFrame(py::array_t& frame) 81 | { 82 | /* Upload to GPU; 83 | */ 84 | auto pRawFrame = 85 | Buffer::Make(frame.size() * sizeof(float), frame.mutable_data()); 86 | uploader->SetInput(pRawFrame, 0U); 87 | auto res = uploader->Execute(); 88 | delete pRawFrame; 89 | 90 | if (TASK_EXEC_FAIL == res) { 91 | throw runtime_error("Error uploading frame to GPU"); 92 | } 93 | 94 | /* Get surface; 95 | */ 96 | auto pSurface = (Surface*)uploader->GetOutput(0U); 97 | if (!pSurface) { 98 | throw runtime_error("Error uploading frame to GPU"); 99 | } 100 | 101 | return shared_ptr(pSurface->Clone()); 102 | } 103 | 104 | void Init_PyFrameUploader(py::module& m) 105 | { 106 | py::class_(m, "PyFrameUploader") 107 | .def(py::init(), 108 | py::arg("width"), py::arg("height"), py::arg("format"), 109 | py::arg("gpu_id"), 110 | R"pbdoc( 111 | Constructor method. 112 | 113 | :param width: target Surface width 114 | :param height: target Surface height 115 | :param format: target Surface pixel format 116 | :param gpu_id: what GPU to use for upload. 117 | )pbdoc") 118 | .def(py::init(), 119 | py::arg("width"), py::arg("height"), py::arg("format"), 120 | py::arg("context"), py::arg("stream"), 121 | R"pbdoc( 122 | Constructor method. 123 | 124 | :param width: target Surface width 125 | :param height: target Surface height 126 | :param format: target Surface pixel format 127 | :param context: CUDA context to use for upload 128 | :param stream: CUDA stream to use for upload 129 | )pbdoc") 130 | .def("Format", &PyFrameUploader::GetFormat, 131 | R"pbdoc( 132 | Get pixel format. 133 | )pbdoc") 134 | .def("UploadSingleFrame", 135 | py::overload_cast&>( 136 | &PyFrameUploader::UploadSingleFrame), 137 | py::arg("frame").noconvert(true), 138 | py::return_value_policy::take_ownership, 139 | py::call_guard(), 140 | R"pbdoc( 141 | Perform HtoD memcpy. 142 | 143 | :param frame: input numpy array 144 | :type frame: numpy.ndarray of type numpy.uint8 145 | :return: Surface 146 | :rtype: PyNvCodec.Surface 147 | )pbdoc") 148 | .def("UploadSingleFrame", 149 | py::overload_cast&>( 150 | &PyFrameUploader::UploadSingleFrame), 151 | py::arg("frame").noconvert(true), 152 | py::return_value_policy::take_ownership, 153 | py::call_guard(), 154 | R"pbdoc( 155 | Perform HtoD memcpy. 156 | 157 | :param frame: input numpy array 158 | :type frame: numpy.ndarray of type numpy.f 159 | :return: Surface 160 | :rtype: PyNvCodec.Surface 161 | )pbdoc"); 162 | } -------------------------------------------------------------------------------- /src/PyNvCodec/src/PySurfaceConverter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 NVIDIA Corporation 3 | * Copyright 2021 Kognia Sports Intelligence 4 | * Copyright 2021 Videonetics Technology Private Limited 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "PyNvCodec.hpp" 18 | 19 | using namespace std; 20 | using namespace VPF; 21 | using namespace chrono; 22 | 23 | namespace py = pybind11; 24 | 25 | constexpr auto TASK_EXEC_SUCCESS = TaskExecStatus::TASK_EXEC_SUCCESS; 26 | constexpr auto TASK_EXEC_FAIL = TaskExecStatus::TASK_EXEC_FAIL; 27 | 28 | PySurfaceConverter::PySurfaceConverter(uint32_t width, uint32_t height, 29 | Pixel_Format inFormat, 30 | Pixel_Format outFormat, uint32_t gpuID) 31 | : outputFormat(outFormat) 32 | { 33 | upConverter.reset(ConvertSurface::Make( 34 | width, height, inFormat, outFormat, CudaResMgr::Instance().GetCtx(gpuID), 35 | CudaResMgr::Instance().GetStream(gpuID))); 36 | upCtxBuffer.reset(Buffer::MakeOwnMem(sizeof(ColorspaceConversionContext))); 37 | } 38 | 39 | PySurfaceConverter::PySurfaceConverter(uint32_t width, uint32_t height, 40 | Pixel_Format inFormat, 41 | Pixel_Format outFormat, CUcontext ctx, 42 | CUstream str) 43 | : outputFormat(outFormat) 44 | { 45 | upConverter.reset( 46 | ConvertSurface::Make(width, height, inFormat, outFormat, ctx, str)); 47 | upCtxBuffer.reset(Buffer::MakeOwnMem(sizeof(ColorspaceConversionContext))); 48 | } 49 | 50 | shared_ptr 51 | PySurfaceConverter::Execute(shared_ptr surface, 52 | shared_ptr context) 53 | { 54 | if (!surface) { 55 | return shared_ptr(Surface::Make(outputFormat)); 56 | } 57 | 58 | upConverter->ClearInputs(); 59 | 60 | upConverter->SetInput(surface.get(), 0U); 61 | 62 | if (context) { 63 | upCtxBuffer->CopyFrom(sizeof(ColorspaceConversionContext), context.get()); 64 | upConverter->SetInput((Token*)upCtxBuffer.get(), 1U); 65 | } 66 | 67 | if (TASK_EXEC_SUCCESS != upConverter->Execute()) { 68 | return shared_ptr(Surface::Make(outputFormat)); 69 | } 70 | 71 | auto pSurface = (Surface*)upConverter->GetOutput(0U); 72 | return shared_ptr(pSurface ? pSurface->Clone() 73 | : Surface::Make(outputFormat)); 74 | } 75 | 76 | Pixel_Format PySurfaceConverter::GetFormat() { return outputFormat; } 77 | 78 | void Init_PySurfaceConverter(py::module& m) 79 | { 80 | py::class_(m, "PySurfaceConverter") 81 | .def(py::init(), 82 | py::arg("width"), py::arg("height"), py::arg("src_format"), 83 | py::arg("dst_format"), py::arg("gpu_id"), 84 | R"pbdoc( 85 | Constructor method. 86 | 87 | :param width: target Surface width 88 | :param height: target Surface height 89 | :param src_format: input Surface pixel format 90 | :param dst_format: output Surface pixel format 91 | :param gpu_id: what GPU to run conversion on 92 | )pbdoc") 93 | .def(py::init(), 95 | py::arg("width"), py::arg("height"), py::arg("src_format"), 96 | py::arg("dst_format"), py::arg("context"), py::arg("stream"), 97 | R"pbdoc( 98 | Constructor method. 99 | 100 | :param width: target Surface width 101 | :param height: target Surface height 102 | :param src_format: input Surface pixel format 103 | :param dst_format: output Surface pixel format 104 | :param context: CUDA context to use for conversion 105 | :param stream: CUDA stream to use for conversion 106 | )pbdoc") 107 | .def("Format", &PySurfaceConverter::GetFormat, R"pbdoc( 108 | Get pixel format. 109 | )pbdoc") 110 | .def("Execute", &PySurfaceConverter::Execute, py::arg("src"), 111 | py::arg("cc_ctx"), py::return_value_policy::take_ownership, 112 | py::call_guard(), 113 | R"pbdoc( 114 | Perform pixel format conversion. 115 | 116 | :param src: input Surface. Must be of same format class instance was created with. 117 | :param cc_ctx: colorspace conversion context. Describes color space and color range used for conversion. 118 | :return: Surface of pixel format equal to given to ctor 119 | :rtype: PyNvCodec.Surface 120 | )pbdoc"); 121 | } -------------------------------------------------------------------------------- /src/PyNvCodec/src/PySurfaceDownloader.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 NVIDIA Corporation 3 | * Copyright 2021 Kognia Sports Intelligence 4 | * Copyright 2021 Videonetics Technology Private Limited 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "PyNvCodec.hpp" 18 | 19 | using namespace std; 20 | using namespace VPF; 21 | using namespace chrono; 22 | 23 | namespace py = pybind11; 24 | 25 | constexpr auto TASK_EXEC_SUCCESS = TaskExecStatus::TASK_EXEC_SUCCESS; 26 | constexpr auto TASK_EXEC_FAIL = TaskExecStatus::TASK_EXEC_FAIL; 27 | 28 | PySurfaceDownloader::PySurfaceDownloader(uint32_t width, uint32_t height, 29 | Pixel_Format format, uint32_t gpu_ID) 30 | { 31 | surfaceWidth = width; 32 | surfaceHeight = height; 33 | surfaceFormat = format; 34 | 35 | upDownloader.reset( 36 | CudaDownloadSurface::Make(CudaResMgr::Instance().GetStream(gpu_ID), 37 | CudaResMgr::Instance().GetCtx(gpu_ID), 38 | surfaceWidth, surfaceHeight, surfaceFormat)); 39 | } 40 | 41 | PySurfaceDownloader::PySurfaceDownloader(uint32_t width, uint32_t height, 42 | Pixel_Format format, CUcontext ctx, 43 | CUstream str) 44 | { 45 | surfaceWidth = width; 46 | surfaceHeight = height; 47 | surfaceFormat = format; 48 | 49 | upDownloader.reset(CudaDownloadSurface::Make(str, ctx, surfaceWidth, 50 | surfaceHeight, surfaceFormat)); 51 | } 52 | 53 | Pixel_Format PySurfaceDownloader::GetFormat() { return surfaceFormat; } 54 | 55 | bool PySurfaceDownloader::DownloadSingleSurface(shared_ptr surface, 56 | py::array_t& frame) 57 | { 58 | upDownloader->SetInput(surface.get(), 0U); 59 | if (TASK_EXEC_FAIL == upDownloader->Execute()) { 60 | return false; 61 | } 62 | 63 | auto* pRawFrame = (Buffer*)upDownloader->GetOutput(0U); 64 | if (pRawFrame) { 65 | auto const downloadSize = pRawFrame->GetRawMemSize(); 66 | if (downloadSize != frame.size()) { 67 | frame.resize({downloadSize}, false); 68 | } 69 | 70 | memcpy(frame.mutable_data(), pRawFrame->GetRawMemPtr(), downloadSize); 71 | return true; 72 | } 73 | 74 | return false; 75 | } 76 | 77 | bool PySurfaceDownloader::DownloadSingleSurface(shared_ptr surface, 78 | py::array_t& frame) 79 | { 80 | upDownloader->SetInput(surface.get(), 0U); 81 | if (TASK_EXEC_FAIL == upDownloader->Execute()) { 82 | return false; 83 | } 84 | 85 | auto* pRawFrame = (Buffer*)upDownloader->GetOutput(0U); 86 | if (pRawFrame) { 87 | auto const downloadSize = pRawFrame->GetRawMemSize(); 88 | if (downloadSize != frame.size() * sizeof(float)) { 89 | frame.resize({downloadSize}, false); 90 | } 91 | memcpy(frame.mutable_data(), pRawFrame->GetRawMemPtr(), downloadSize); 92 | return true; 93 | } 94 | 95 | return false; 96 | } 97 | 98 | bool PySurfaceDownloader::DownloadSingleSurface(shared_ptr surface, 99 | py::array_t& frame) 100 | { 101 | upDownloader->SetInput(surface.get(), 0U); 102 | if (TASK_EXEC_FAIL == upDownloader->Execute()) { 103 | return false; 104 | } 105 | 106 | auto* pRawFrame = (Buffer*)upDownloader->GetOutput(0U); 107 | if (pRawFrame) { 108 | auto const downloadSize = pRawFrame->GetRawMemSize(); 109 | if (downloadSize != frame.size() * sizeof(uint16_t)) { 110 | frame.resize({downloadSize / sizeof(uint16_t)}, false); 111 | } 112 | memcpy(frame.mutable_data(), pRawFrame->GetRawMemPtr(), downloadSize); 113 | return true; 114 | } 115 | 116 | return false; 117 | } 118 | 119 | void Init_PySurfaceDownloader(py::module& m) 120 | { 121 | py::class_(m, "PySurfaceDownloader") 122 | .def(py::init(), 123 | py::arg("width"), py::arg("height"), py::arg("format"), 124 | py::arg("gpu_id"), 125 | R"pbdoc( 126 | Constructor method. 127 | 128 | :param width: Surface width 129 | :param height: Surface height 130 | :param format: Surface pixel format 131 | :param gpu_id: what GPU does Surface belong to 132 | )pbdoc") 133 | .def(py::init(), 134 | py::arg("width"), py::arg("height"), py::arg("format"), 135 | py::arg("context"), py::arg("stream"), 136 | R"pbdoc( 137 | Constructor method. 138 | 139 | :param width: Surface width 140 | :param height: Surface height 141 | :param format: Surface pixel format 142 | :param context: CUDA context to use for HtoD memcopy 143 | :param stream: CUDA stream to use for HtoD memcopy 144 | )pbdoc") 145 | .def("Format", &PySurfaceDownloader::GetFormat, 146 | R"pbdoc( 147 | Get pixel format. 148 | )pbdoc") 149 | .def("DownloadSingleSurface", 150 | py::overload_cast, py::array_t&>( 151 | &PySurfaceDownloader::DownloadSingleSurface), 152 | py::arg("surface"), py::arg("frame").noconvert(true), 153 | R"pbdoc( 154 | Perform DtoH memcpy. 155 | 156 | :param src: input Surface 157 | :param frame: output numpy array 158 | :type frame: numpy.ndarray of type numpy.uint8 159 | :return: True in case of success False otherwise 160 | :rtype: Bool 161 | )pbdoc") 162 | .def("DownloadSingleSurface", 163 | py::overload_cast, py::array_t&>( 164 | &PySurfaceDownloader::DownloadSingleSurface), 165 | py::arg("surface"), py::arg("frame").noconvert(true), 166 | py::call_guard(), 167 | R"pbdoc( 168 | Perform DtoH memcpy. 169 | 170 | :param src: input Surface 171 | :param frame: output numpy array 172 | :type frame: numpy.ndarray of type numpy.f 173 | :return: True in case of success False otherwise 174 | :rtype: Bool 175 | )pbdoc") 176 | .def("DownloadSingleSurface", 177 | py::overload_cast, py::array_t&>( 178 | &PySurfaceDownloader::DownloadSingleSurface), 179 | py::arg("surface"), py::arg("frame").noconvert(true), 180 | py::call_guard(), 181 | R"pbdoc( 182 | Perform DtoH memcpy. 183 | 184 | :param src: input Surface 185 | :param frame: output numpy array 186 | :type frame: numpy.ndarray of type numpy.uint16 187 | :return: True in case of success False otherwise 188 | :rtype: Bool 189 | )pbdoc"); 190 | } -------------------------------------------------------------------------------- /src/PyNvCodec/src/PySurfaceRemaper.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 NVIDIA Corporation 3 | * Copyright 2021 Kognia Sports Intelligence 4 | * Copyright 2021 Videonetics Technology Private Limited 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "PyNvCodec.hpp" 18 | 19 | using namespace std; 20 | using namespace VPF; 21 | using namespace chrono; 22 | 23 | namespace py = pybind11; 24 | 25 | constexpr auto TASK_EXEC_SUCCESS = TaskExecStatus::TASK_EXEC_SUCCESS; 26 | constexpr auto TASK_EXEC_FAIL = TaskExecStatus::TASK_EXEC_FAIL; 27 | 28 | PySurfaceRemaper::PySurfaceRemaper(py::array_t& x_map, 29 | py::array_t& y_map, 30 | Pixel_Format format, size_t ctx, size_t str) 31 | : outputFormat(format) 32 | { 33 | upRemaper.reset(RemapSurface::Make(x_map.data(), y_map.data(), x_map.shape(1), 34 | x_map.shape(0), format, (CUcontext)ctx, 35 | (CUstream)str)); 36 | } 37 | 38 | PySurfaceRemaper::PySurfaceRemaper(py::array_t& x_map, 39 | py::array_t& y_map, 40 | Pixel_Format format, uint32_t gpuID) 41 | : outputFormat(format) 42 | { 43 | upRemaper.reset(RemapSurface::Make(x_map.data(), y_map.data(), x_map.shape(1), 44 | x_map.shape(0), format, 45 | CudaResMgr::Instance().GetCtx(gpuID), 46 | CudaResMgr::Instance().GetStream(gpuID))); 47 | } 48 | 49 | Pixel_Format PySurfaceRemaper::GetFormat() { return outputFormat; } 50 | 51 | shared_ptr PySurfaceRemaper::Execute(shared_ptr surface) 52 | { 53 | if (!surface) { 54 | return shared_ptr(Surface::Make(outputFormat)); 55 | } 56 | 57 | upRemaper->SetInput(surface.get(), 0U); 58 | 59 | if (TASK_EXEC_SUCCESS != upRemaper->Execute()) { 60 | return shared_ptr(Surface::Make(outputFormat)); 61 | } 62 | 63 | auto pSurface = (Surface*)upRemaper->GetOutput(0U); 64 | return shared_ptr(pSurface ? pSurface->Clone() 65 | : Surface::Make(outputFormat)); 66 | } 67 | 68 | void Init_PySurfaceRemaper(py::module& m) 69 | { 70 | py::class_(m, "PySurfaceRemaper") 71 | .def(py::init&, py::array_t&, Pixel_Format, 72 | uint32_t>(), 73 | py::arg("x_map"), py::arg("y_map"), py::arg("format"), 74 | py::arg("gpu_id"), R"pbdoc( 75 | Constructor method. 76 | 77 | :param x_map: x axis map 78 | :param y_map: y axis map 79 | :param format: target Surface pixel format 80 | :param gpu_id: what GPU to run resize on 81 | )pbdoc") 82 | .def(py::init&, py::array_t&, Pixel_Format, 83 | size_t, size_t>(), 84 | py::arg("x_map"), py::arg("y_map"), py::arg("format"), 85 | py::arg("context"), py::arg("stream"), 86 | R"pbdoc( 87 | Constructor method. 88 | 89 | :param x_map: x axis map 90 | :param y_map: y axis map 91 | :param format: target Surface pixel format 92 | :param context: CUDA context to use for remap 93 | :param stream: CUDA stream to use for remap 94 | )pbdoc") 95 | .def("Format", &PySurfaceRemaper::GetFormat, R"pbdoc( 96 | Get pixel format. 97 | )pbdoc") 98 | .def("Execute", &PySurfaceRemaper::Execute, py::arg("src"), 99 | py::return_value_policy::take_ownership, 100 | py::call_guard(), 101 | R"pbdoc( 102 | Remap input Surface. 103 | 104 | :param src: input Surface. Must be of same format class instance was created with. 105 | :return: remapped Surface of dimensions equal to given to ctor 106 | :rtype: PyNvCodec.Surface 107 | )pbdoc"); 108 | } 109 | -------------------------------------------------------------------------------- /src/PyNvCodec/src/PySurfaceResizer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 NVIDIA Corporation 3 | * Copyright 2021 Kognia Sports Intelligence 4 | * Copyright 2021 Videonetics Technology Private Limited 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "PyNvCodec.hpp" 18 | 19 | using namespace std; 20 | using namespace VPF; 21 | using namespace chrono; 22 | 23 | namespace py = pybind11; 24 | 25 | constexpr auto TASK_EXEC_SUCCESS = TaskExecStatus::TASK_EXEC_SUCCESS; 26 | constexpr auto TASK_EXEC_FAIL = TaskExecStatus::TASK_EXEC_FAIL; 27 | 28 | PySurfaceResizer::PySurfaceResizer(uint32_t width, uint32_t height, 29 | Pixel_Format format, CUcontext ctx, 30 | CUstream str) 31 | : outputFormat(format) 32 | { 33 | upResizer.reset(ResizeSurface::Make(width, height, format, ctx, str)); 34 | } 35 | 36 | PySurfaceResizer::PySurfaceResizer(uint32_t width, uint32_t height, 37 | Pixel_Format format, uint32_t gpuID) 38 | : outputFormat(format) 39 | { 40 | upResizer.reset(ResizeSurface::Make(width, height, format, 41 | CudaResMgr::Instance().GetCtx(gpuID), 42 | CudaResMgr::Instance().GetStream(gpuID))); 43 | } 44 | 45 | Pixel_Format PySurfaceResizer::GetFormat() { return outputFormat; } 46 | 47 | shared_ptr PySurfaceResizer::Execute(shared_ptr surface) 48 | { 49 | if (!surface) { 50 | return shared_ptr(Surface::Make(outputFormat)); 51 | } 52 | 53 | upResizer->SetInput(surface.get(), 0U); 54 | 55 | if (TASK_EXEC_SUCCESS != upResizer->Execute()) { 56 | return shared_ptr(Surface::Make(outputFormat)); 57 | } 58 | 59 | auto pSurface = (Surface*)upResizer->GetOutput(0U); 60 | return shared_ptr(pSurface ? pSurface->Clone() 61 | : Surface::Make(outputFormat)); 62 | } 63 | 64 | void Init_PySurfaceResizer(py::module& m) 65 | { 66 | py::class_(m, "PySurfaceResizer") 67 | .def(py::init(), 68 | py::arg("width"), py::arg("height"), py::arg("format"), 69 | py::arg("gpu_id"), 70 | R"pbdoc( 71 | Constructor method. 72 | 73 | :param width: target Surface width 74 | :param height: target Surface height 75 | :param format: target Surface pixel format 76 | :param gpu_id: what GPU to run resize on 77 | )pbdoc") 78 | .def(py::init(), 79 | py::arg("width"), py::arg("height"), py::arg("format"), 80 | py::arg("context"), py::arg("stream"), 81 | R"pbdoc( 82 | Constructor method. 83 | 84 | :param width: target Surface width 85 | :param height: target Surface height 86 | :param format: target Surface pixel format 87 | :param context: CUDA context to use for resize 88 | :param stream: CUDA stream to use for resize 89 | )pbdoc") 90 | .def("Format", &PySurfaceResizer::GetFormat, R"pbdoc( 91 | Get pixel format. 92 | )pbdoc") 93 | .def("Execute", &PySurfaceResizer::Execute, py::arg("src"), 94 | py::return_value_policy::take_ownership, 95 | py::call_guard(), 96 | R"pbdoc( 97 | Resize input Surface. 98 | 99 | :param src: input Surface. Must be of same format class instance was created with. 100 | :return: Surface of dimensions equal to given to ctor 101 | :rtype: PyNvCodec.Surface 102 | )pbdoc"); 103 | } -------------------------------------------------------------------------------- /src/PytorchNvCodec/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2020 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | cmake_minimum_required(VERSION 3.10) 18 | 19 | project(PytorchNvCodec) 20 | 21 | #Add src directory; 22 | set (src_dir ${CMAKE_CURRENT_SOURCE_DIR}/src) 23 | 24 | #Do version stuff; 25 | set (PYTORCH_NVCODEC_VERSION_MAJOR 1) 26 | set (PYTORCH_NVCODEC_VERSION_MINOR 0) 27 | 28 | #Set up python bindings; 29 | set(GENERATE_PYTORCH_EXTENSION FALSE CACHE BOOL "Generate VPF Pytorch extension") 30 | 31 | if(GENERATE_PYTHON_BINDINGS) 32 | if(GENERATE_PYTORCH_EXTENSION) 33 | set(PYTORCH_EXTENSION_SOURCES_DIR ${src_dir} PARENT_SCOPE) 34 | endif(GENERATE_PYTORCH_EXTENSION) 35 | endif(GENERATE_PYTHON_BINDINGS) 36 | 37 | 38 | #Promote variables to parent scope; 39 | set (GENERATE_PYTORCH_EXTENSION ${GENERATE_PYTORCH_EXTENSION} PARENT_SCOPE) 40 | -------------------------------------------------------------------------------- /src/PytorchNvCodec/LICENSE: -------------------------------------------------------------------------------- 1 | From PyTorch: 2 | 3 | Copyright (c) 2016- Facebook, Inc (Adam Paszke) 4 | Copyright (c) 2014- Facebook, Inc (Soumith Chintala) 5 | Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) 6 | Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) 7 | Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) 8 | Copyright (c) 2011-2013 NYU (Clement Farabet) 9 | Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) 10 | Copyright (c) 2006 Idiap Research Institute (Samy Bengio) 11 | Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) 12 | 13 | From Caffe2: 14 | 15 | Copyright (c) 2016-present, Facebook Inc. All rights reserved. 16 | 17 | All contributions by Facebook: 18 | Copyright (c) 2016 Facebook Inc. 19 | 20 | All contributions by Google: 21 | Copyright (c) 2015 Google Inc. 22 | All rights reserved. 23 | 24 | All contributions by Yangqing Jia: 25 | Copyright (c) 2015 Yangqing Jia 26 | All rights reserved. 27 | 28 | All contributions from Caffe: 29 | Copyright(c) 2013, 2014, 2015, the respective contributors 30 | All rights reserved. 31 | 32 | All other contributions: 33 | Copyright(c) 2015, 2016 the respective contributors 34 | All rights reserved. 35 | 36 | Caffe2 uses a copyright model similar to Caffe: each contributor holds 37 | copyright over their contributions to Caffe2. The project versioning records 38 | all such contribution and copyright details. If a contributor wants to further 39 | mark their specific copyright on a particular contribution, they should 40 | indicate their copyright solely in the commit message of the change when it is 41 | committed. 42 | 43 | All rights reserved. 44 | 45 | Redistribution and use in source and binary forms, with or without 46 | modification, are permitted provided that the following conditions are met: 47 | 48 | 1. Redistributions of source code must retain the above copyright 49 | notice, this list of conditions and the following disclaimer. 50 | 51 | 2. Redistributions in binary form must reproduce the above copyright 52 | notice, this list of conditions and the following disclaimer in the 53 | documentation and/or other materials provided with the distribution. 54 | 55 | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America 56 | and IDIAP Research Institute nor the names of its contributors may be 57 | used to endorse or promote products derived from this software without 58 | specific prior written permission. 59 | 60 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 61 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 62 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 63 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 64 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 65 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 66 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 67 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 68 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 69 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 70 | POSSIBILITY OF SUCH DAMAGE. 71 | -------------------------------------------------------------------------------- /src/PytorchNvCodec/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | 5 | __author__ = "NVIDIA" 6 | __copyright__ = "Copyright 2022, NVIDIA" 7 | __credits__ = [] 8 | __license__ = "Apache 2.0" 9 | __version__ = "0.1.0" 10 | __maintainer__ = "NVIDIA" 11 | __email__ = "TODO" 12 | __status__ = "Production" 13 | 14 | 15 | try: 16 | import torch # has to be imported to have libc10 available 17 | # Import native module 18 | from _PytorchNvCodec import * 19 | except ImportError: 20 | import distutils.sysconfig 21 | from os.path import join, dirname 22 | raise RuntimeError("Failed to import native module _PytorchNvCodec! " 23 | f"Please check whether \"{join(dirname(__file__), '_PytorchNvCodec' + distutils.sysconfig.get_config_var('EXT_SUFFIX'))}\"" # noqa 24 | " exists and can find all library dependencies (CUDA, ffmpeg).\n" 25 | "On Unix systems, you can use `ldd` on the file to see whether it can find all dependencies.\n" 26 | "On Windows, you can use \"dumpbin /dependents\" in a Visual Studio command prompt or\n" 27 | "https://github.com/lucasg/Dependencies/releases.") 28 | -------------------------------------------------------------------------------- /src/PytorchNvCodec/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "torch" 5 | ] 6 | -------------------------------------------------------------------------------- /src/PytorchNvCodec/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | 5 | if __name__ == "__main__": 6 | 7 | setup( 8 | name="PytorchNvCodec", 9 | install_requires=["torch"], 10 | ext_modules=[CUDAExtension("_PytorchNvCodec", ["src/PytorchNvCodec.cpp"])], 11 | packages=["PytorchNvCodec"], 12 | cmdclass={"build_ext": BuildExtension}, 13 | package_dir={"": "../"}, 14 | cmake_install_dir="../", 15 | ) 16 | -------------------------------------------------------------------------------- /src/TC/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2019 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | cmake_minimum_required(VERSION 3.20) 18 | 19 | # This must be set before enabling CUDA. Otherwise it will be set to the default ARCH by nvcc (e.g. 52 by nvcc 11.7) 20 | if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) 21 | # Define Volta / Turing / Ampere / Ada if not set by the user 22 | set(CMAKE_CUDA_ARCHITECTURES 23 | 60 24 | 70 25 | 72 26 | 75 27 | 80 28 | 86 29 | #89 # Require CUDA 11.8 for CC 89 or cmake 3.23 for CMAKE_CUDA_ARCHITECTURES_{ALL,ALL_MAJOR} 30 | ) 31 | endif() 32 | 33 | project(TC LANGUAGES CXX CUDA) 34 | 35 | add_subdirectory(TC_CORE) 36 | 37 | find_package(CUDAToolkit 11.2 REQUIRED) 38 | include(GenerateExportHeader) 39 | 40 | add_library( 41 | TC 42 | src/MemoryInterfaces.cpp 43 | src/Tasks.cpp 44 | src/TasksColorCvt.cpp 45 | src/FFmpegDemuxer.cpp 46 | src/NvDecoder.cpp 47 | src/NvEncoder.cpp 48 | src/NvEncoderCuda.cpp 49 | src/NppCommon.cpp 50 | src/NvCodecCliOptions.cpp 51 | src/FfmpegSwDecoder.cpp 52 | src/Resize.cu 53 | ) 54 | 55 | if (WIN32) 56 | target_sources(TC PRIVATE 57 | src/tc_dlopen_windows.cpp 58 | ) 59 | else() 60 | target_sources(TC PRIVATE 61 | src/tc_dlopen_unix.cpp 62 | ) 63 | endif() 64 | 65 | set(TC_VERSION_MAJOR 1) 66 | set(TC_VERSION_MINOR 0) 67 | 68 | configure_file(inc/Version.hpp.in tc_version.h) 69 | 70 | option(USE_NVTX "Use NVTX for profiling" FALSE) 71 | if(USE_NVTX) 72 | add_definitions(-DUSE_NVTX) 73 | ## Replace when requiring CMAKE 3.25: 74 | # target_link_libraries(TC PUBLIC CUDA::nvtx3) 75 | target_link_libraries(TC PUBLIC ${CMAKE_DL_LIBRARIES}) 76 | endif() 77 | 78 | generate_export_header(TC) 79 | 80 | target_link_libraries( 81 | TC 82 | PUBLIC 83 | CUDA::cuda_driver 84 | CUDA::cudart 85 | CUDA::npps 86 | CUDA::nppig 87 | CUDA::nppial 88 | CUDA::nppicc 89 | CUDA::nppidei 90 | TC_CORE 91 | ) 92 | 93 | target_compile_features(TC PRIVATE cxx_std_17) 94 | set_property( 95 | TARGET TC 96 | PROPERTY 97 | # required for shared Python modules in case library is build statically on Unix 98 | POSITION_INDEPENDENT_CODE 99 | ON 100 | ) 101 | 102 | set(TC_VIDEO_CODEC_INTERFACE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party" 103 | CACHE PATH "Path to Video Codec SDK interface headers" 104 | ) 105 | target_include_directories( 106 | TC 107 | PUBLIC 108 | inc 109 | ${TC_VIDEO_CODEC_INTERFACE_DIR} # TODO: check if it can be made private! 110 | ${CMAKE_CURRENT_BINARY_DIR} 111 | ) 112 | if(NOT EXISTS ${TC_VIDEO_CODEC_INTERFACE_DIR}/nvEncodeAPI.h) 113 | message( 114 | FATAL_ERROR 115 | "Could not find nvEncodeAPI.h! " 116 | "Please set TC_VIDEO_CODEC_INTERFACE_DIR=\"${TC_VIDEO_CODEC_INTERFACE_DIR}\" to a location that contains nvEncodeAPI.h!" 117 | ) 118 | endif() 119 | 120 | if(UNIX) 121 | target_link_libraries(TC PUBLIC pthread) 122 | endif(UNIX) 123 | 124 | find_package(PkgConfig) 125 | if(PKG_CONFIG_FOUND AND (NOT WIN32)) # pkgconfig works on Windows, but can not handle runtime dependencies 126 | pkg_check_modules( 127 | LIBAV 128 | REQUIRED 129 | IMPORTED_TARGET 130 | # versions taken from CI 22.04 131 | #libavfilter>=7.110.100 132 | #libavformat>=58.76.100 133 | #libavcodec>=58.134.100 134 | #libswresample>=3.9.100 135 | #libavutil>=56.70.100 136 | # versions taken from CI. Ubuntu 20.04 system packages 137 | libavfilter>=7.57.100 138 | libavformat>=58.29.100 139 | libavcodec>=58.54.100 140 | libswresample>=3.5.100 141 | libavutil>=56.31.100 142 | ) 143 | target_link_libraries(TC_CORE PUBLIC PkgConfig::LIBAV) 144 | try_compile(HAS_BSF ${PROJECT_BINARY_DIR} SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/has_bsf_compile_test.c LINK_LIBRARIES PkgConfig::LIBAV) 145 | else() 146 | set(TC_FFMPEG_INCLUDE_DIR "" CACHE PATH "Where to find ffmpeg includes") 147 | set(TC_FFMPEG_LIBRARY_DIR "" CACHE PATH "Where to find ffmpeg libraries") 148 | message(STATUS "TC_FFMPEG_INCLUDE_DIR=\"${TC_FFMPEG_INCLUDE_DIR}\"") 149 | message(STATUS "TC_FFMPEG_ROOT=\"${TC_FFMPEG_INCLUDE_DIR}\"") 150 | message(STATUS "TC_FFMPEG_LIBRARY_DIR=\"${TC_FFMPEG_LIBRARY_DIR}\"") 151 | 152 | macro(link_av_component target lib_name) 153 | find_path( 154 | ${lib_name}_INCLUDE_DIR 155 | NAMES "${lib_name}/${lib_name}.h" "lib${lib_name}/${lib_name}.h" 156 | HINTS ${TC_FFMPEG_INCLUDE_DIR} "${TC_FFMPEG_ROOT}/include" 157 | ) 158 | find_library( 159 | ${lib_name}_LIBRARY 160 | NAMES "${lib_name}" 161 | HINTS ${TC_FFMPEG_LIBRARY_DIR} "${TC_FFMPEG_ROOT}/lib" 162 | ) 163 | if(NOT ${lib_name}_LIBRARY) 164 | message( 165 | FATAL_ERROR 166 | "Could not find ${lib_name}_LIBRARY! Please set TC_FFMPEG_ROOT to indicate its location!" 167 | ) 168 | endif() 169 | if(NOT ${lib_name}_INCLUDE_DIR) 170 | message( 171 | FATAL_ERROR 172 | "Could not find ${lib_name}_INCLUDE_DIR! Please set TC_FFMPEG_ROOT to indicate its location!" 173 | ) 174 | endif() 175 | target_link_libraries(TC PUBLIC ${${lib_name}_LIBRARY}) 176 | target_include_directories(TC PUBLIC ${${lib_name}_INCLUDE_DIR}) 177 | endmacro() 178 | link_av_component(TC avfilter) 179 | link_av_component(TC avformat) 180 | link_av_component(TC avcodec) 181 | link_av_component(TC swresample) 182 | link_av_component(TC avutil) 183 | find_path( 184 | BSF_INCLUDE_DIR 185 | NAMES "libavcodec/bsf.h" 186 | HINTS ${TC_FFMPEG_INCLUDE_DIR} "${TC_FFMPEG_ROOT}/include" 187 | ) 188 | if(BSF_INCLUDE_DIR) 189 | set(HAS_BSF TRUE) 190 | else() 191 | set(HAS_BSF FALSE) 192 | message(WARNING "Could not find \"libavcodec/bsf.h\" while other ffmpeg includes could be found." 193 | "This likely means that your FFMPEG installation is old. Still trying to compile VPF!") 194 | endif() 195 | endif() 196 | 197 | if(HAS_BSF) 198 | target_compile_definitions(TC PUBLIC -DHAS_BSF=1) 199 | endif() 200 | message(STATUS "HAS_BSF=${HAS_BSF}") 201 | -------------------------------------------------------------------------------- /src/TC/TC_CORE/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2019 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | cmake_minimum_required(VERSION 3.20) 18 | 19 | project(TC_CORE VERSION 1.0.0 LANGUAGES CXX) 20 | include(GenerateExportHeader) 21 | 22 | set(TC_CORE_VERSION_MAJOR 1) 23 | set(TC_CORE_VERSION_MINOR 0) 24 | 25 | configure_file(inc/Version.hpp.in tc_core_version.h) 26 | 27 | add_library(TC_CORE src/Task.cpp src/Token.cpp) 28 | target_include_directories(TC_CORE PUBLIC inc ${CMAKE_CURRENT_BINARY_DIR}) 29 | 30 | generate_export_header(TC_CORE) 31 | target_compile_features(TC_CORE PRIVATE cxx_std_17) 32 | set_property( 33 | TARGET TC_CORE 34 | PROPERTY 35 | # required for shared Python modules in case library is build statically on Unix 36 | POSITION_INDEPENDENT_CODE 37 | ON 38 | ) 39 | 40 | #install(TARGETS TC_CORE) # must be installed when shared library 41 | -------------------------------------------------------------------------------- /src/TC/TC_CORE/inc/TC_CORE.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 NVIDIA Corporation 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * Unless required by applicable law or agreed to in writing, software 8 | * distributed under the License is distributed on an "AS IS" BASIS, 9 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | * See the License for the specific language governing permissions and 11 | * limitations under the License. 12 | */ 13 | 14 | #pragma once 15 | 16 | #include "tc_core_export.h" // generated by CMake 17 | #include "tc_core_version.h" 18 | #include 19 | #include 20 | 21 | namespace VPF { 22 | 23 | /* Interface for data exchange; 24 | * It represents memory object (CPU- or GPU-side memory etc.); 25 | */ 26 | class TC_CORE_EXPORT Token { 27 | public: 28 | Token &operator=(const Token &other) = delete; 29 | Token(const Token &other) = delete; 30 | 31 | virtual ~Token(); 32 | 33 | protected: 34 | Token(); 35 | }; 36 | 37 | enum class TaskExecStatus { TASK_EXEC_SUCCESS, TASK_EXEC_FAIL }; 38 | 39 | /* Synchronization call which will be done after a blocking task; 40 | */ 41 | typedef void (*p_sync_call)(void *p_args); 42 | 43 | /* Task is unit of processing; Inherit from this class to add user-defined 44 | * processing stage; 45 | */ 46 | class TC_CORE_EXPORT Task { 47 | public: 48 | Task() = delete; 49 | Task(const Task &other) = delete; 50 | Task &operator=(const Task &other) = delete; 51 | 52 | virtual ~Task(); 53 | 54 | /* Method to be overridden in ancestors; 55 | */ 56 | virtual TaskExecStatus Run(); 57 | 58 | /* Call this method to run the task; 59 | */ 60 | virtual TaskExecStatus Execute(); 61 | 62 | /* Sets given token as input; 63 | * Doesn't take ownership of object passed by pointer, only stores it 64 | * within inplementation; 65 | */ 66 | bool SetInput(Token *input, uint32_t input_num); 67 | 68 | /* Sets given token as output; 69 | * Doesn't take ownership of object passed by pointer, only stores it 70 | * within inplementation; 71 | */ 72 | bool SetOutput(Token *output, uint32_t output_num); 73 | 74 | /* Sets all inputs to nullptr; 75 | */ 76 | void ClearInputs(); 77 | 78 | /* Sets all outputs to nullptr; 79 | */ 80 | void ClearOutputs(); 81 | 82 | /* Returns pointer to task input in case of success, nullptr otherwise; 83 | */ 84 | Token *GetInput(uint32_t num_input = 0); 85 | 86 | /* Returns pointer to task output in case of success, nullptr otherwise; 87 | */ 88 | Token *GetOutput(uint32_t num_output = 0); 89 | 90 | /* Returns number of outputs; 91 | */ 92 | uint64_t GetNumOutputs() const; 93 | 94 | /* Returns number of inputs; 95 | */ 96 | uint64_t GetNumInputs() const; 97 | 98 | /* Returns task name; 99 | */ 100 | const char* GetName() const; 101 | 102 | protected: 103 | Task(const char *str_name, uint32_t num_inputs, uint32_t num_outputs, 104 | p_sync_call sync_call = nullptr, void *p_args = nullptr); 105 | 106 | /* Hidden implementation; 107 | */ 108 | struct TaskImpl *p_impl = nullptr; 109 | }; 110 | } // namespace VPF 111 | -------------------------------------------------------------------------------- /src/TC/TC_CORE/inc/Version.hpp.in: -------------------------------------------------------------------------------- 1 | #define TC_CORE_VERSION_MAJOR @TC_CORE_VERSION_MAJOR@ 2 | #define TC_CORE_VERSION_MINOR @TC_CORE_VERSION_MINOR@ 3 | -------------------------------------------------------------------------------- /src/TC/TC_CORE/src/Task.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 NVIDIA Corporation 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * Unless required by applicable law or agreed to in writing, software 8 | * distributed under the License is distributed on an "AS IS" BASIS, 9 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | * See the License for the specific language governing permissions and 11 | * limitations under the License. 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "TC_CORE.hpp" 20 | 21 | using namespace std; 22 | using namespace VPF; 23 | 24 | namespace VPF { 25 | struct TaskImpl { 26 | string name; 27 | vector inputs; 28 | vector outputs; 29 | 30 | p_sync_call call; 31 | void *args; 32 | 33 | TaskImpl() = delete; 34 | TaskImpl(const TaskImpl &other) = delete; 35 | TaskImpl &operator=(const TaskImpl &other) = delete; 36 | 37 | TaskImpl(const char *str_name, uint32_t num_inputs, uint32_t num_outputs, 38 | p_sync_call sync_call, void *p_args) 39 | : name(str_name), inputs(num_inputs), outputs(num_outputs), 40 | call(sync_call), args(p_args) {} 41 | }; 42 | } // namespace VPF 43 | 44 | Task::Task(const char *str_name, uint32_t num_inputs, uint32_t num_outputs, 45 | p_sync_call sync_call, void *p_args) 46 | : p_impl(new TaskImpl(str_name, num_inputs, num_outputs, sync_call, p_args)) {} 47 | 48 | TaskExecStatus Task::Run() { return TaskExecStatus::TASK_EXEC_SUCCESS; } 49 | 50 | TaskExecStatus Task::Execute() { 51 | auto const ret = Run(); 52 | if (p_impl->call && p_impl->args) { 53 | p_impl->call(p_impl->args); 54 | } 55 | 56 | return ret; 57 | } 58 | 59 | bool Task::SetInput(Token *p_input, uint32_t num_input) { 60 | if (num_input < p_impl->inputs.size()) { 61 | p_impl->inputs[num_input] = p_input; 62 | return true; 63 | } 64 | 65 | return false; 66 | } 67 | 68 | void Task::ClearInputs() { 69 | for (auto i = 0U; i < GetNumInputs(); i++) { 70 | SetInput(nullptr, i); 71 | } 72 | } 73 | 74 | Token *Task::GetInput(uint32_t num_input) { 75 | if (num_input < p_impl->inputs.size()) { 76 | return p_impl->inputs[num_input]; 77 | } 78 | 79 | return nullptr; 80 | } 81 | 82 | bool Task::SetOutput(Token *p_output, uint32_t num_output) { 83 | if (num_output < p_impl->outputs.size()) { 84 | p_impl->outputs[num_output] = p_output; 85 | return true; 86 | } 87 | return false; 88 | } 89 | 90 | void Task::ClearOutputs() { 91 | for (auto i = 0U; i < GetNumOutputs(); i++) { 92 | SetOutput(nullptr, i); 93 | } 94 | } 95 | 96 | Token *Task::GetOutput(uint32_t num_output) { 97 | if (num_output < p_impl->outputs.size()) { 98 | return p_impl->outputs[num_output]; 99 | } 100 | 101 | return nullptr; 102 | } 103 | 104 | const char* Task::GetName() const { return p_impl->name.c_str(); } 105 | 106 | Task::~Task() { delete p_impl; } 107 | 108 | size_t Task::GetNumOutputs() const { return p_impl->outputs.size(); } 109 | 110 | size_t Task::GetNumInputs() const { return p_impl->inputs.size(); } 111 | -------------------------------------------------------------------------------- /src/TC/TC_CORE/src/Token.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 NVIDIA Corporation 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * Unless required by applicable law or agreed to in writing, software 8 | * distributed under the License is distributed on an "AS IS" BASIS, 9 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | * See the License for the specific language governing permissions and 11 | * limitations under the License. 12 | */ 13 | 14 | #include "TC_CORE.hpp" 15 | using namespace VPF; 16 | 17 | Token::Token() = default; 18 | 19 | Token::~Token() = default; -------------------------------------------------------------------------------- /src/TC/has_bsf_compile_test.c: -------------------------------------------------------------------------------- 1 | #include "libavcodec/bsf.h" 2 | int main(int argc, const char** argv) { return 0; } 3 | -------------------------------------------------------------------------------- /src/TC/inc/CodecsSupport.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 NVIDIA Corporation 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * Unless required by applicable law or agreed to in writing, software 8 | * distributed under the License is distributed on an "AS IS" BASIS, 9 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | * See the License for the specific language governing permissions and 11 | * limitations under the License. 12 | */ 13 | 14 | #pragma once 15 | #include "MemoryInterfaces.hpp" 16 | #include "cuviddec.h" 17 | #include 18 | 19 | struct PacketData { 20 | int32_t key; 21 | int64_t pts; 22 | int64_t dts; 23 | uint64_t pos; 24 | uint64_t bsl; 25 | uint64_t duration; 26 | }; 27 | 28 | struct VideoContext { 29 | uint32_t width; 30 | uint32_t height; 31 | uint32_t gop_size; 32 | uint32_t num_frames; 33 | uint32_t is_vfr; 34 | double frameRate; 35 | double avgFrameRate; 36 | double timeBase; 37 | uint32_t streamIndex; 38 | cudaVideoCodec codec; 39 | Pixel_Format format; 40 | ColorSpace color_space; 41 | ColorRange color_range; 42 | }; 43 | 44 | struct AudioContext { 45 | // Reserved for future use; 46 | }; 47 | 48 | struct MuxingParams { 49 | VideoContext videoContext; 50 | AudioContext audioContext; 51 | }; 52 | -------------------------------------------------------------------------------- /src/TC/inc/CuvidFunctions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "nvcuvid.h" 4 | #include "tc_dlopen.h" 5 | #include 6 | 7 | typedef struct CuvidFunctions { 8 | TC_LIB lib; 9 | CUresult (*cuvidGetDecoderCaps)(CUVIDDECODECAPS* pdc); 10 | CUresult (*cuvidCreateDecoder)(CUvideodecoder* phDecoder, 11 | CUVIDDECODECREATEINFO* pdci); 12 | CUresult (*cuvidDestroyDecoder)(CUvideodecoder hDecoder); 13 | CUresult (*cuvidDecodePicture)(CUvideodecoder hDecoder, 14 | CUVIDPICPARAMS* pPicParams); 15 | CUresult (*cuvidGetDecodeStatus)(CUvideodecoder hDecoder, int nPicIdx, 16 | CUVIDGETDECODESTATUS* pDecodeStatus); 17 | CUresult (*cuvidReconfigureDecoder)( 18 | CUvideodecoder hDecoder, CUVIDRECONFIGUREDECODERINFO* pDecReconfigParams); 19 | // CUresult (*cuvidMapVideoFrame)(CUvideodecoder hDecoder, int nPicIdx, 20 | // unsigned int* pDevPtr, unsigned int* pPitch, 21 | // CUVIDPROCPARAMS* pVPP); 22 | // CUresult (*cuvidUnmapVideoFrame)(CUvideodecoder hDecoder, 23 | // unsigned int DevPtr); 24 | CUresult (*cuvidMapVideoFrame64)(CUvideodecoder hDecoder, int nPicIdx, 25 | CUdeviceptr* pDevPtr, unsigned int* pPitch, 26 | CUVIDPROCPARAMS* pVPP); 27 | CUresult (*cuvidUnmapVideoFrame64)(CUvideodecoder hDecoder, 28 | unsigned long long DevPtr); 29 | CUresult (*cuvidCtxLockCreate)(CUvideoctxlock* pLock, CUcontext ctx); 30 | CUresult (*cuvidCtxLockDestroy)(CUvideoctxlock lck); 31 | CUresult (*cuvidCtxLock)(CUvideoctxlock lck, unsigned int reserved_flags); 32 | CUresult (*cuvidCtxUnlock)(CUvideoctxlock lck, unsigned int reserved_flags); 33 | 34 | CUresult (*cuvidCreateVideoParser)(CUvideoparser* pObj, 35 | CUVIDPARSERPARAMS* pParams); 36 | CUresult (*cuvidParseVideoData)(CUvideoparser obj, 37 | CUVIDSOURCEDATAPACKET* pPacket); 38 | CUresult (*cuvidDestroyVideoParser)(CUvideoparser obj); 39 | 40 | CUresult (*cuvidCreateVideoSource)(CUvideosource* pObj, 41 | const char* pszFileName, 42 | CUVIDSOURCEPARAMS* pParams); 43 | CUresult (*cuvidCreateVideoSourceW)(CUvideosource* pObj, 44 | const wchar_t* pwszFileName, 45 | CUVIDSOURCEPARAMS* pParams); 46 | CUresult (*cuvidDestroyVideoSource)(CUvideosource obj); 47 | CUresult (*cuvidSetVideoSourceState)(CUvideosource obj, cudaVideoState state); 48 | cudaVideoState (*cuvidGetVideoSourceState)(CUvideosource obj); 49 | CUresult (*cuvidGetSourceVideoFormat)(CUvideosource obj, 50 | CUVIDEOFORMAT* pvidfmt, 51 | unsigned int flags); 52 | CUresult (*cuvidGetSourceAudioFormat)(CUvideosource obj, 53 | CUAUDIOFORMAT* paudfmt, 54 | unsigned int flags); 55 | } CuvidFunctions; 56 | 57 | #define CUVID_LOAD_STRINGIFY(s) _CUVID_LOAD_STRINGIFY(s) 58 | #define _CUVID_LOAD_STRINGIFY(s) #s 59 | 60 | #define CUVID_LOAD_LIBRARY(api, symbol) \ 61 | (api).symbol = (decltype((api).symbol))tc_dlsym( \ 62 | (api).lib, CUVID_LOAD_STRINGIFY(symbol)); \ 63 | if (!(api).symbol) { \ 64 | err = "Could not load function \"" CUVID_LOAD_STRINGIFY(symbol) "\""; \ 65 | goto err; \ 66 | } 67 | #define CUVID_UNLOAD_LIBRARY(api, symbol) (api).symbol = NULL; 68 | 69 | static const char* unloadCuvidSymbols(CuvidFunctions* cuvidApi) 70 | { 71 | const char* err = NULL; 72 | if (!cuvidApi) { 73 | return NULL; 74 | } 75 | 76 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidGetDecoderCaps); 77 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidCreateDecoder); 78 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidDestroyDecoder); 79 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidDecodePicture); 80 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidGetDecodeStatus); 81 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidReconfigureDecoder); 82 | // CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidMapVideoFrame); 83 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidMapVideoFrame64); 84 | // CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidUnmapVideoFrame); 85 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidUnmapVideoFrame64); 86 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidCtxLockCreate); 87 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidCtxLockDestroy); 88 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidCtxLock); 89 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidCtxUnlock); 90 | 91 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidCreateVideoParser); 92 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidParseVideoData); 93 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidDestroyVideoParser); 94 | 95 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidCreateVideoSource); 96 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidCreateVideoSourceW); 97 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidDestroyVideoSource); 98 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidSetVideoSourceState); 99 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidGetVideoSourceState); 100 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidGetSourceVideoFormat); 101 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidGetSourceAudioFormat); 102 | 103 | if (tc_dlclose(cuvidApi->lib) != 0) { 104 | return "Failed to close library handle"; 105 | }; 106 | cuvidApi->lib = 0; 107 | return NULL; 108 | } 109 | 110 | static const char* loadCuvidSymbols(CuvidFunctions* cuvidApi, const char* path) 111 | { 112 | const char* err = NULL; 113 | cuvidApi->lib = tc_dlopen(path); 114 | if (!cuvidApi->lib) { 115 | return "Failed to open dynamic library: cuvid"; 116 | } 117 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidGetDecoderCaps); 118 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidCreateDecoder); 119 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidDestroyDecoder); 120 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidDecodePicture); 121 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidGetDecodeStatus); 122 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidReconfigureDecoder); 123 | // CUVID_LOAD_LIBRARY(*cuvidApi, cuvidMapVideoFrame); 124 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidMapVideoFrame64); 125 | // CUVID_LOAD_LIBRARY(*cuvidApi, cuvidUnmapVideoFrame); 126 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidUnmapVideoFrame64); 127 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidCtxLockCreate); 128 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidCtxLockDestroy); 129 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidCtxLock); 130 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidCtxUnlock); 131 | 132 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidCreateVideoParser); 133 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidParseVideoData); 134 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidDestroyVideoParser); 135 | 136 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidCreateVideoSource); 137 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidCreateVideoSourceW); 138 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidDestroyVideoSource); 139 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidSetVideoSourceState); 140 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidGetVideoSourceState); 141 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidGetSourceVideoFormat); 142 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidGetSourceAudioFormat); 143 | return NULL; 144 | 145 | err: 146 | unloadCuvidSymbols(cuvidApi); 147 | return err; 148 | } 149 | -------------------------------------------------------------------------------- /src/TC/inc/FFmpegDemuxer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 NVIDIA Corporation 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * Unless required by applicable law or agreed to in writing, software 8 | * distributed under the License is distributed on an "AS IS" BASIS, 9 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | * See the License for the specific language governing permissions and 11 | * limitations under the License. 12 | */ 13 | 14 | #pragma once 15 | 16 | #include "tc_export.h" // generated by CMake 17 | 18 | extern "C" { 19 | #include "libavcodec/avcodec.h" 20 | #include "libavformat/avformat.h" 21 | #include "libavformat/avio.h" 22 | // Work around for old FFmpeg version which doesn't ship libavcodec/bsf.h but still has the require bitstream filters 23 | #if HAS_BSF 24 | #include "libavcodec/bsf.h" 25 | #endif 26 | } 27 | 28 | #include "CodecsSupport.hpp" 29 | #include "NvCodecUtils.h" 30 | #include "cuviddec.h" 31 | #include "nvcuvid.h" 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | namespace VPF 38 | { 39 | enum SeekMode { 40 | /* Seek for exact frame number. 41 | * Suited for standalone demuxer seek. */ 42 | EXACT_FRAME = 0, 43 | /* Seek for previous key frame in past. 44 | * Suitable for seek & decode. */ 45 | PREV_KEY_FRAME = 1, 46 | 47 | SEEK_MODE_NUM_ELEMS 48 | }; 49 | 50 | struct SeekContext { 51 | /* Will be set to false for default ctor, true otherwise; 52 | */ 53 | bool use_seek; 54 | 55 | /* Frame number we want to get. Set by user. 56 | */ 57 | int64_t seek_frame; 58 | 59 | /* Timestamp (s) we want to get. Set by user. 60 | */ 61 | double seek_tssec; 62 | 63 | /* Mode in which we seek. */ 64 | SeekMode mode; 65 | 66 | /* PTS of frame found after seek. */ 67 | int64_t out_frame_pts; 68 | 69 | /* Duration of frame found after seek. */ 70 | int64_t out_frame_duration; 71 | 72 | /* Number of frames that were decoded during seek. */ 73 | int64_t num_frames_decoded; 74 | 75 | SeekContext() 76 | : use_seek(false), seek_frame(-1), seek_tssec(-1.0), mode(PREV_KEY_FRAME), 77 | out_frame_pts(-1), out_frame_duration(-1), num_frames_decoded(-1) 78 | { 79 | } 80 | 81 | SeekContext(int64_t frame_id) 82 | : use_seek(true), seek_frame(frame_id), seek_tssec(-1.0), 83 | mode(PREV_KEY_FRAME), out_frame_pts(-1), out_frame_duration(-1), 84 | num_frames_decoded(-1) 85 | { 86 | } 87 | 88 | SeekContext(double frame_ts) 89 | : use_seek(true), seek_tssec(frame_ts), seek_frame(-1), 90 | mode(PREV_KEY_FRAME), out_frame_pts(-1), out_frame_duration(-1), 91 | num_frames_decoded(-1) 92 | { 93 | } 94 | 95 | SeekContext(int64_t frame_num, SeekMode seek_mode) 96 | : use_seek(true), seek_frame(frame_num), seek_tssec(-1.0), mode(seek_mode), 97 | out_frame_pts(-1), out_frame_duration(-1), num_frames_decoded(-1) 98 | { 99 | } 100 | 101 | SeekContext(double frame_ts, SeekMode seek_mode) 102 | : use_seek(true), seek_tssec(frame_ts), mode(seek_mode), seek_frame(-1), 103 | out_frame_pts(-1), out_frame_duration(-1), num_frames_decoded(-1) 104 | { 105 | } 106 | 107 | SeekContext(const SeekContext& other) 108 | : use_seek(other.use_seek), seek_frame(other.seek_frame), 109 | seek_tssec(other.seek_tssec), mode(other.mode), 110 | out_frame_pts(other.out_frame_pts), 111 | out_frame_duration(other.out_frame_duration), 112 | num_frames_decoded(other.num_frames_decoded) 113 | { 114 | } 115 | 116 | SeekContext& operator=(const SeekContext& other) 117 | { 118 | use_seek = other.use_seek; 119 | seek_frame = other.seek_frame; 120 | seek_tssec = other.seek_tssec; 121 | mode = other.mode; 122 | out_frame_pts = other.out_frame_pts; 123 | out_frame_duration = other.out_frame_duration; 124 | num_frames_decoded = other.num_frames_decoded; 125 | return *this; 126 | } 127 | 128 | bool IsByNumber() const { return 0 <= seek_frame; } 129 | bool IsByTimestamp() const { return 0.0 <= seek_tssec; } 130 | }; 131 | 132 | } // namespace VPF 133 | 134 | class DataProvider 135 | { 136 | public: 137 | DataProvider(std::istream& istr); 138 | 139 | virtual ~DataProvider() = default; 140 | 141 | virtual int GetData(uint8_t* pBuf, int nBuf); 142 | 143 | private: 144 | std::istream& i_str; 145 | }; 146 | 147 | class TC_EXPORT FFmpegDemuxer 148 | { 149 | AVIOContext* avioc = nullptr; 150 | AVBSFContext *bsfc_annexb = nullptr, *bsfc_sei = nullptr; 151 | AVFormatContext* fmtc = nullptr; 152 | 153 | AVPacket pktSrc, pktDst, pktSei; 154 | AVCodecID eVideoCodec = AV_CODEC_ID_NONE; 155 | AVPixelFormat eChromaFormat; 156 | AVColorSpace color_space; 157 | AVColorRange color_range; 158 | 159 | uint32_t width; 160 | uint32_t height; 161 | uint32_t gop_size; 162 | uint64_t nb_frames; 163 | double framerate; 164 | double avg_framerate; 165 | double timebase; 166 | 167 | int videoStream = -1; 168 | 169 | bool is_seekable; 170 | bool is_mp4H264; 171 | bool is_mp4HEVC; 172 | bool is_VP9; 173 | bool is_EOF = false; 174 | 175 | PacketData last_packet_data; 176 | 177 | std::vector annexbBytes; 178 | std::vector seiBytes; 179 | 180 | explicit FFmpegDemuxer(AVFormatContext* fmtcx); 181 | 182 | AVFormatContext* 183 | CreateFormatContext(DataProvider& pDataProvider, 184 | const std::map& ffmpeg_options); 185 | 186 | AVFormatContext* 187 | CreateFormatContext(const char* szFilePath, 188 | const std::map& ffmpeg_options); 189 | 190 | public: 191 | explicit FFmpegDemuxer( 192 | const char* szFilePath, 193 | const std::map& ffmpeg_options); 194 | explicit FFmpegDemuxer( 195 | DataProvider& pDataProvider, 196 | const std::map& ffmpeg_options); 197 | ~FFmpegDemuxer(); 198 | 199 | AVCodecID GetVideoCodec() const; 200 | 201 | uint32_t GetWidth() const; 202 | 203 | uint32_t GetHeight() const; 204 | 205 | uint32_t GetGopSize() const; 206 | 207 | uint32_t GetNumFrames() const; 208 | 209 | double GetFramerate() const; 210 | 211 | double GetAvgFramerate() const; 212 | 213 | bool IsVFR() const; 214 | 215 | double GetTimebase() const; 216 | 217 | int64_t TsFromTime(double ts_sec); 218 | 219 | int64_t TsFromFrameNumber(int64_t frame_num); 220 | 221 | uint32_t GetVideoStreamIndex() const; 222 | 223 | AVPixelFormat GetPixelFormat() const; 224 | 225 | AVColorSpace GetColorSpace() const; 226 | 227 | AVColorRange GetColorRange() const; 228 | 229 | bool Demux(uint8_t*& pVideo, size_t& rVideoBytes, PacketData& pktData, 230 | uint8_t** ppSEI = nullptr, size_t* pSEIBytes = nullptr); 231 | 232 | bool Seek(VPF::SeekContext& seek_ctx, uint8_t*& pVideo, size_t& rVideoBytes, 233 | PacketData& pktData, uint8_t** ppSEI = nullptr, 234 | size_t* pSEIBytes = nullptr); 235 | 236 | void Flush(); 237 | 238 | static int ReadPacket(void* opaque, uint8_t* pBuf, int nBuf); 239 | }; 240 | 241 | inline cudaVideoCodec FFmpeg2NvCodecId(AVCodecID id) 242 | { 243 | switch (id) { 244 | case AV_CODEC_ID_MPEG1VIDEO: 245 | return cudaVideoCodec_MPEG1; 246 | case AV_CODEC_ID_MPEG2VIDEO: 247 | return cudaVideoCodec_MPEG2; 248 | case AV_CODEC_ID_MPEG4: 249 | return cudaVideoCodec_MPEG4; 250 | case AV_CODEC_ID_VC1: 251 | return cudaVideoCodec_VC1; 252 | case AV_CODEC_ID_H264: 253 | return cudaVideoCodec_H264; 254 | case AV_CODEC_ID_HEVC: 255 | return cudaVideoCodec_HEVC; 256 | case AV_CODEC_ID_VP8: 257 | return cudaVideoCodec_VP8; 258 | case AV_CODEC_ID_VP9: 259 | return cudaVideoCodec_VP9; 260 | case AV_CODEC_ID_MJPEG: 261 | return cudaVideoCodec_JPEG; 262 | default: 263 | return cudaVideoCodec_NumCodecs; 264 | } 265 | } 266 | -------------------------------------------------------------------------------- /src/TC/inc/NppCommon.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 NVIDIA Corporation 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * Unless required by applicable law or agreed to in writing, software 8 | * distributed under the License is distributed on an "AS IS" BASIS, 9 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | * See the License for the specific language governing permissions and 11 | * limitations under the License. 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | void SetupNppContext(CUcontext context, CUstream stream, 22 | NppStreamContext &nppCtx); -------------------------------------------------------------------------------- /src/TC/inc/NvCodecCLIOptions.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019-2020 NVIDIA Corporation 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * Unless required by applicable law or agreed to in writing, software 8 | * distributed under the License is distributed on an "AS IS" BASIS, 9 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | * See the License for the specific language governing permissions and 11 | * limitations under the License. 12 | */ 13 | 14 | #pragma once 15 | #include "TC_CORE.hpp" 16 | 17 | #include "nvEncodeAPI.h" 18 | #include "tc_export.h" // generated by cmake 19 | #include 20 | #include 21 | #include 22 | 23 | #define CHECK_API_VERSION(major, minor) \ 24 | ((major < NVENCAPI_MAJOR_VERSION) || \ 25 | (major == NVENCAPI_MAJOR_VERSION) && (minor <= NVENCAPI_MINOR_VERSION)) 26 | 27 | extern "C" { 28 | struct AVDictionary; 29 | } 30 | 31 | namespace VPF 32 | { 33 | TC_EXPORT std::map GetNvencInitParams(); 34 | 35 | class TC_EXPORT NvEncoderClInterface 36 | { 37 | public: 38 | explicit NvEncoderClInterface(const std::map&); 39 | ~NvEncoderClInterface() = default; 40 | 41 | // Will setup the parameters from CLI arguments; 42 | void SetupInitParams(NV_ENC_INITIALIZE_PARAMS& params, bool is_reconfigure, 43 | NV_ENCODE_API_FUNCTION_LIST api_func, void* encoder, 44 | std::map& capabilities, 45 | bool print_settings = true) const; 46 | 47 | private: 48 | void SetupEncConfig(NV_ENC_CONFIG& config, struct ParentParams& params, 49 | bool is_reconfigure, bool print_settings) const; 50 | 51 | void SetupRateControl(NV_ENC_RC_PARAMS& params, 52 | struct ParentParams& parent_params, bool is_reconfigure, 53 | bool print_settings) const; 54 | 55 | void SetupH264Config(NV_ENC_CONFIG_H264& config, struct ParentParams& params, 56 | bool is_reconfigure, bool print_settings) const; 57 | 58 | void SetupHEVCConfig(NV_ENC_CONFIG_HEVC& config, struct ParentParams& params, 59 | bool is_reconfigure, bool print_settings) const; 60 | 61 | // H.264 and H.265 has exactly same VUI parameters config; 62 | void SetupVuiConfig(NV_ENC_CONFIG_H264_VUI_PARAMETERS& params, 63 | struct ParentParams& parent_params, bool is_reconfigure, 64 | bool print_settings) const; 65 | 66 | std::map options; 67 | }; 68 | 69 | class TC_EXPORT NvDecoderClInterface 70 | { 71 | public: 72 | explicit NvDecoderClInterface(const std::map&); 73 | ~NvDecoderClInterface(); 74 | 75 | AVDictionary* GetOptions(); 76 | 77 | uint32_t GetNumSideDataEntries(); 78 | 79 | private: 80 | struct NvDecoderClInterface_Impl* pImpl = nullptr; 81 | }; 82 | } // namespace VPF 83 | -------------------------------------------------------------------------------- /src/TC/inc/NvCodecUtils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 NVIDIA Corporation 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * Unless required by applicable law or agreed to in writing, software 8 | * distributed under the License is distributed on an "AS IS" BASIS, 9 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | * See the License for the specific language governing permissions and 11 | * limitations under the License. 12 | */ 13 | 14 | #pragma once 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #ifndef _WIN32 25 | #define _stricmp strcasecmp 26 | #endif 27 | 28 | void ResizeNv12(unsigned char *dpDstNv12, int nDstPitch, int nDstWidth, 29 | int nDstHeight, unsigned char *dpSrcNv12, int nSrcPitch, 30 | int nSrcWidth, int nSrcHeight, 31 | unsigned char *dpDstNv12UV = nullptr, cudaStream_t S = 0); -------------------------------------------------------------------------------- /src/TC/inc/NvDecoder.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 NVIDIA Corporation 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * Unless required by applicable law or agreed to in writing, software 8 | * distributed under the License is distributed on an "AS IS" BASIS, 9 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | * See the License for the specific language governing permissions and 11 | * limitations under the License. 12 | */ 13 | 14 | #pragma once 15 | 16 | #include "CodecsSupport.hpp" 17 | #include "tc_core_export.h" 18 | 19 | #include "CodecsSupport.hpp" 20 | #include "CuvidFunctions.h" 21 | #include "nvcuvid.h" 22 | #include 23 | #include 24 | 25 | struct DecodedFrameContext { 26 | CUdeviceptr mem; 27 | uint64_t pts; 28 | uint64_t bsl; 29 | PacketData out_pdata; 30 | 31 | // Set up this flag to feed decoder with empty input without setting up EOS 32 | // flag; 33 | bool no_eos; 34 | 35 | DecodedFrameContext(CUdeviceptr new_ptr, uint64_t new_pts, uint64_t new_poc) 36 | : mem(new_ptr), pts(new_pts), no_eos(false) 37 | { 38 | } 39 | 40 | DecodedFrameContext(CUdeviceptr new_ptr, uint64_t new_pts, uint64_t new_poc, 41 | bool new_no_eos) 42 | : mem(new_ptr), pts(new_pts), no_eos(new_no_eos) 43 | { 44 | } 45 | 46 | DecodedFrameContext() : mem(0U), pts(0U), no_eos(false) {} 47 | }; 48 | 49 | unsigned long GetNumDecodeSurfaces(cudaVideoCodec eCodec, unsigned int nWidth, 50 | unsigned int nHeight); 51 | 52 | class decoder_error : public std::runtime_error 53 | { 54 | public: 55 | decoder_error(const char* str) : std::runtime_error(str) {} 56 | }; 57 | 58 | class cuvid_parser_error : public std::runtime_error 59 | { 60 | public: 61 | cuvid_parser_error(const char* str) : std::runtime_error(str) {} 62 | }; 63 | 64 | namespace VPF 65 | { 66 | class Buffer; 67 | }; 68 | 69 | class TC_CORE_EXPORT NvDecoder 70 | { 71 | public: 72 | NvDecoder() = delete; 73 | NvDecoder(const NvDecoder& other) = delete; 74 | NvDecoder& operator=(const NvDecoder& other) = delete; 75 | 76 | NvDecoder(CUstream cuStream, CUcontext cuContext, cudaVideoCodec eCodec, 77 | bool bLowLatency = false, int maxWidth = 0, int maxHeight = 0); 78 | 79 | ~NvDecoder(); 80 | 81 | int GetWidth(); 82 | 83 | int GetHeight(); 84 | 85 | int GetChromaHeight(); 86 | 87 | int GetFrameSize(); 88 | 89 | int GetDeviceFramePitch(); 90 | 91 | int GetBitDepth(); 92 | 93 | bool DecodeLockSurface(VPF::Buffer const* encFrame, 94 | struct PacketData const& pdata, 95 | DecodedFrameContext& decCtx); 96 | 97 | void UnlockSurface(CUdeviceptr& lockedSurface); 98 | 99 | void Init(CUVIDEOFORMAT* format) { HandleVideoSequence(format); } 100 | 101 | cudaVideoCodec GetCodec() const; 102 | cudaVideoChromaFormat GetChromaFormat() const; 103 | inline const CuvidFunctions& _api() { return m_api; }; 104 | 105 | private: 106 | /* All the functions with Handle* prefix doesn't 107 | * throw as they are called from different thread; 108 | */ 109 | static int CUDAAPI 110 | HandleVideoSequenceProc(void* pUserData, CUVIDEOFORMAT* pVideoFormat) noexcept 111 | { 112 | return ((NvDecoder*)pUserData)->HandleVideoSequence(pVideoFormat); 113 | } 114 | 115 | static int CUDAAPI 116 | HandlePictureDecodeProc(void* pUserData, CUVIDPICPARAMS* pPicParams) noexcept 117 | { 118 | return ((NvDecoder*)pUserData)->HandlePictureDecode(pPicParams); 119 | } 120 | 121 | static int CUDAAPI HandlePictureDisplayProc( 122 | void* pUserData, CUVIDPARSERDISPINFO* pDispInfo) noexcept 123 | { 124 | return ((NvDecoder*)pUserData)->HandlePictureDisplay(pDispInfo); 125 | } 126 | 127 | int HandleVideoSequence(CUVIDEOFORMAT* pVideoFormat) noexcept; 128 | 129 | int HandlePictureDecode(CUVIDPICPARAMS* pPicParams) noexcept; 130 | 131 | int HandlePictureDisplay(CUVIDPARSERDISPINFO* pDispInfo) noexcept; 132 | 133 | int ReconfigureDecoder(CUVIDEOFORMAT* pVideoFormat); 134 | 135 | struct NvDecoderImpl* p_impl; 136 | CuvidFunctions m_api{}; 137 | }; 138 | -------------------------------------------------------------------------------- /src/TC/inc/NvEncoderCuda.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 NVIDIA Corporation 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * Unless required by applicable law or agreed to in writing, software 8 | * distributed under the License is distributed on an "AS IS" BASIS, 9 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | * See the License for the specific language governing permissions and 11 | * limitations under the License. 12 | */ 13 | 14 | #pragma once 15 | 16 | #include "NvEncoder.h" 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #define CHECK_CUDA_CALL(call) \ 23 | do { \ 24 | CUresult err__ = call; \ 25 | if (err__ != CUDA_SUCCESS) { \ 26 | const char *szErrName = NULL; \ 27 | cuGetErrorName(err__, &szErrName); \ 28 | std::ostringstream errorLog; \ 29 | errorLog << "CUDA driver API error " << szErrName; \ 30 | throw NVENCException::makeNVENCException( \ 31 | errorLog.str(), NV_ENC_ERR_GENERIC, __FUNCTION__, __FILE__, \ 32 | __LINE__); \ 33 | } \ 34 | } while (0) 35 | 36 | /** 37 | * @brief Encoder for CUDA device memory. 38 | */ 39 | class NvEncoderCuda final : public NvEncoder { 40 | public: 41 | NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, uint32_t nHeight, 42 | NV_ENC_BUFFER_FORMAT eBufferFormat, 43 | uint32_t nExtraOutputDelay = 3, 44 | bool bMotionEstimationOnly = false, 45 | bool bOPInVideoMemory = false); 46 | 47 | ~NvEncoderCuda() override; 48 | 49 | static void CopyToDeviceFrame( 50 | CUcontext device, CUstream stream, void *pSrcFrame, uint32_t nSrcPitch, 51 | CUdeviceptr pDstFrame, uint32_t dstPitch, int width, int height, 52 | CUmemorytype srcMemoryType, NV_ENC_BUFFER_FORMAT pixelFormat, 53 | const uint32_t dstChromaOffsets[], uint32_t numChromaPlanes); 54 | 55 | NV_ENCODE_API_FUNCTION_LIST GetApi() const; 56 | 57 | void* GetEncoder() const; 58 | 59 | private: 60 | /** 61 | * @brief This function is used to release the input buffers allocated for 62 | * encoding. This function is an override of virtual function 63 | * NvEncoder::ReleaseInputBuffers(). 64 | */ 65 | void ReleaseInputBuffers() override; 66 | 67 | /** 68 | * @brief This function is used to allocate input buffers for encoding. 69 | * This function is an override of virtual function 70 | * NvEncoder::AllocateInputBuffers(). 71 | */ 72 | void AllocateInputBuffers(int32_t numInputBuffers) override; 73 | 74 | /** 75 | * @brief This is a private function to release CUDA device memory used for 76 | * encoding. 77 | */ 78 | void ReleaseCudaResources(); 79 | 80 | CUcontext m_cuContext; 81 | 82 | size_t m_cudaPitch = 0; 83 | }; 84 | -------------------------------------------------------------------------------- /src/TC/inc/Version.hpp.in: -------------------------------------------------------------------------------- 1 | #define TC_VERSION_MAJOR @TC_VERSION_MAJOR@ 2 | #define TC_VERSION_MINOR @TC_VERSION_MINOR@ -------------------------------------------------------------------------------- /src/TC/inc/cuvid_function_pointer.h: -------------------------------------------------------------------------------- 1 | #include "nvcuvid.h" 2 | #include 3 | 4 | #ifdef _WIN32 5 | #include 6 | #define TC_LIB HMODULE 7 | #else 8 | #define TC_LIB int 9 | #endif 10 | 11 | typedef struct CuvidFunctions { 12 | TC_LIB lib; 13 | CUresult (*cuvidGetDecoderCaps)(CUVIDDECODECAPS* pdc); 14 | CUresult (*cuvidCreateDecoder)(CUvideodecoder* phDecoder, 15 | CUVIDDECODECREATEINFO* pdci); 16 | CUresult (*cuvidDestroyDecoder)(CUvideodecoder hDecoder); 17 | CUresult (*cuvidDecodePicture)(CUvideodecoder hDecoder, 18 | CUVIDPICPARAMS* pPicParams); 19 | CUresult (*cuvidGetDecodeStatus)(CUvideodecoder hDecoder, int nPicIdx, 20 | CUVIDGETDECODESTATUS* pDecodeStatus); 21 | CUresult (*cuvidReconfigureDecoder)( 22 | CUvideodecoder hDecoder, CUVIDRECONFIGUREDECODERINFO* pDecReconfigParams); 23 | CUresult (*cuvidMapVideoFrame)(CUvideodecoder hDecoder, int nPicIdx, 24 | unsigned int* pDevPtr, unsigned int* pPitch, 25 | CUVIDPROCPARAMS* pVPP); 26 | CUresult (*cuvidUnmapVideoFrame)(CUvideodecoder hDecoder, 27 | unsigned int DevPtr); 28 | CUresult (*cuvidMapVideoFrame64)(CUvideodecoder hDecoder, int nPicIdx, 29 | unsigned int* pDevPtr, unsigned int* pPitch, 30 | CUVIDPROCPARAMS* pVPP); 31 | CUresult (*cuvidUnmapVideoFrame64)(CUvideodecoder hDecoder, 32 | unsigned long long DevPtr); 33 | CUresult (*cuvidCtxLockCreate)(CUvideoctxlock* pLock, CUcontext ctx); 34 | CUresult (*cuvidCtxLockDestroy)(CUvideoctxlock lck); 35 | CUresult (*cuvidCtxLock)(CUvideoctxlock lck, unsigned int reserved_flags); 36 | CUresult (*cuvidCtxUnlock)(CUvideoctxlock lck, unsigned int reserved_flags); 37 | } CuvidFunctions; 38 | 39 | #define CUVID_LOAD_STRINGIFY(s) _CUVID_LOAD_STRINGIFY(s) 40 | #define _CUVID_LOAD_STRINGIFY(s) #s 41 | 42 | #define CUVID_LOAD_LIBRARY(api, symbol) \ 43 | (api).(symbol) = tc_dlsym((api).(lib), (symbol)); \ 44 | if (!(api).(function)) { \ 45 | err = "Could not load function \"" CUVID_LOAD_STRINGIFY(symbol) "\""; \ 46 | goto err; \ 47 | } 48 | #define CUVID_UNLOAD_LIBRARY(api, symbol) (api).(symbol) = NULL; 49 | 50 | static const char* unloadCuvidSymbols(CuvidFuncitons* cuvidApi, 51 | const char* file) 52 | { 53 | const char* err = NULL; 54 | if (!cuvidApi) { 55 | return NULL; 56 | } 57 | 58 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidGetDecoderCaps); 59 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidCreateDecoder); 60 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidDestroyDecoder); 61 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidDecodePicture); 62 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidDecodeStatus); 63 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidReconfigureEncoder); 64 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidMapVideoFrame); 65 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidMapVideoFrame64); 66 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidUnmapVideoFrame); 67 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidUnmapVideoFrame64); 68 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidCtxLockCreate); 69 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidCtxLockDestroy); 70 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidCtxLock); 71 | CUVID_UNLOAD_LIBRARY(*cuvidApi, cuvidCtxLockUnlock); 72 | if (tc_dlclose(cuvidApi->lib) != 0) { 73 | return "Failed to close library handle"; 74 | }; 75 | return NULL; 76 | } 77 | 78 | static bool loadCuvidSymbols(CuvidFuncitons* cuvidApi, const char* file) 79 | { 80 | const char* err = NULL; 81 | cuvidApi->lib = tc_dlopen(path); 82 | if (!lib) { 83 | return "Failed to open dynamic library"; 84 | } 85 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidGetDecoderCaps); 86 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidCreateDecoder); 87 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidDestroyDecoder); 88 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidDecodePicture); 89 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidDecodeStatus); 90 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidReconfigureEncoder); 91 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidMapVideoFrame); 92 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidMapVideoFrame64); 93 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidUnmapVideoFrame); 94 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidUnmapVideoFrame64); 95 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidCtxLockCreate); 96 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidCtxLockDestroy); 97 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidCtxLock); 98 | CUVID_LOAD_LIBRARY(*cuvidApi, cuvidCtxLockUnlock); 99 | 100 | return NULL; 101 | 102 | err: 103 | unloadCuvidSymbols(cuvidApi); 104 | return err; 105 | } 106 | -------------------------------------------------------------------------------- /src/TC/inc/tc_dlopen.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef _WIN32 4 | #include 5 | #define TC_LIB HMODULE 6 | #else 7 | #define TC_LIB void* 8 | #endif 9 | 10 | TC_LIB tc_dlopen(const char* path); 11 | void* tc_dlsym(TC_LIB lib, const char* symbol); 12 | char* tc_dlerror(void); 13 | int tc_dlclose(TC_LIB lib); 14 | -------------------------------------------------------------------------------- /src/TC/src/NppCommon.cpp: -------------------------------------------------------------------------------- 1 | #include "NppCommon.hpp" 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace std; 7 | 8 | static mutex gNppMutex; 9 | 10 | void SetupNppContext(CUcontext context, CUstream stream, 11 | NppStreamContext &nppCtx) { 12 | memset(&nppCtx, 0, sizeof(nppCtx)); 13 | 14 | lock_guard lock(gNppMutex); 15 | cuCtxPushCurrent(context); 16 | 17 | CUdevice device; 18 | auto res = cuCtxGetDevice(&device); 19 | if (CUDA_SUCCESS != res) { 20 | cerr << "Failed to get CUDA device. Error code: " << res << endl; 21 | } 22 | 23 | int multiProcessorCount = 0; 24 | res = cuDeviceGetAttribute(&multiProcessorCount, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device); 25 | if (CUDA_SUCCESS != res) { 26 | cerr << "Failed to get CUDA device. Error code: " << res << endl; 27 | } 28 | 29 | int maxThreadsPerBlock = 0; 30 | res = cuDeviceGetAttribute(&maxThreadsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device); 31 | if (CUDA_SUCCESS != res) { 32 | cerr << "Failed to get CUDA device. Error code: " << res << endl; 33 | } 34 | 35 | int sharedMemPerBlock = 0; 36 | res = cuDeviceGetAttribute(&sharedMemPerBlock, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, device); 37 | if (CUDA_SUCCESS != res) { 38 | cerr << "Failed to get CUDA device. Error code: " << res << endl; 39 | } 40 | 41 | int major = 0; 42 | res = cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device); 43 | if (CUDA_SUCCESS != res) { 44 | cerr << "Failed to get CUDA device. Error code: " << res << endl; 45 | } 46 | 47 | int minor = 0; 48 | res = cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device); 49 | if (CUDA_SUCCESS != res) { 50 | cerr << "Failed to get CUDA device. Error code: " << res << endl; 51 | } 52 | 53 | nppCtx.hStream = stream; 54 | nppCtx.nCudaDeviceId = (int)device; 55 | nppCtx.nMultiProcessorCount = multiProcessorCount; 56 | nppCtx.nMaxThreadsPerBlock = maxThreadsPerBlock; 57 | nppCtx.nSharedMemPerBlock = sharedMemPerBlock; 58 | nppCtx.nCudaDevAttrComputeCapabilityMajor = major; 59 | nppCtx.nCudaDevAttrComputeCapabilityMinor = minor; 60 | } -------------------------------------------------------------------------------- /src/TC/src/NvEncoderCuda.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 NVIDIA Corporation 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * Unless required by applicable law or agreed to in writing, software 8 | * distributed under the License is distributed on an "AS IS" BASIS, 9 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | * See the License for the specific language governing permissions and 11 | * limitations under the License. 12 | */ 13 | 14 | #include "MemoryInterfaces.hpp" 15 | #include "NvEncoderCuda.h" 16 | #include 17 | using namespace std; 18 | 19 | NvEncoderCuda::NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, 20 | uint32_t nHeight, 21 | NV_ENC_BUFFER_FORMAT eBufferFormat, 22 | uint32_t nExtraOutputDelay, 23 | bool bMotionEstimationOnly, 24 | bool bOutputInVideoMemory) 25 | : 26 | 27 | NvEncoder(NV_ENC_DEVICE_TYPE_CUDA, cuContext, nWidth, nHeight, 28 | eBufferFormat, nExtraOutputDelay, bMotionEstimationOnly, 29 | bOutputInVideoMemory), 30 | 31 | m_cuContext(cuContext) { 32 | if (!m_hEncoder) { 33 | NVENC_THROW_ERROR("Encoder Initialization failed", 34 | NV_ENC_ERR_INVALID_DEVICE); 35 | } 36 | 37 | if (!m_cuContext) { 38 | NVENC_THROW_ERROR("Invalid Cuda Context", NV_ENC_ERR_INVALID_DEVICE); 39 | } 40 | } 41 | 42 | NvEncoderCuda::~NvEncoderCuda() { ReleaseCudaResources(); } 43 | 44 | void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers) { 45 | if (!IsHWEncoderInitialized()) { 46 | NVENC_THROW_ERROR("Encoder initialization failed", 47 | NV_ENC_ERR_ENCODER_NOT_INITIALIZED); 48 | } 49 | 50 | // for MEOnly mode we need to allocate separate set of buffers for reference 51 | // frame 52 | int numCount = m_bMotionEstimationOnly ? 2 : 1; 53 | 54 | for (int count = 0; count < numCount; count++) { 55 | CudaCtxPush lock(m_cuContext); 56 | vector inputFrames; 57 | 58 | for (int i = 0; i < numInputBuffers; i++) { 59 | CUdeviceptr pDeviceFrame; 60 | uint32_t chromaHeight = 61 | GetNumChromaPlanes(GetPixelFormat()) * 62 | GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight()); 63 | if (GetPixelFormat() == NV_ENC_BUFFER_FORMAT_YV12 || 64 | GetPixelFormat() == NV_ENC_BUFFER_FORMAT_IYUV){ 65 | chromaHeight = GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight()); 66 | } 67 | 68 | CHECK_CUDA_CALL(cuMemAllocPitch( 69 | &pDeviceFrame, &m_cudaPitch, 70 | GetWidthInBytes(GetPixelFormat(), GetMaxEncodeWidth()), 71 | GetMaxEncodeHeight() + chromaHeight, 16)); 72 | inputFrames.push_back((void *)pDeviceFrame); 73 | } 74 | 75 | RegisterInputResources(inputFrames, 76 | NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR, 77 | GetMaxEncodeWidth(), GetMaxEncodeHeight(), 78 | (int)m_cudaPitch, GetPixelFormat(), count == 1); 79 | } 80 | } 81 | 82 | void NvEncoderCuda::ReleaseInputBuffers() { ReleaseCudaResources(); } 83 | 84 | void NvEncoderCuda::ReleaseCudaResources() { 85 | if (!m_hEncoder) { 86 | return; 87 | } 88 | 89 | if (!m_cuContext) { 90 | return; 91 | } 92 | 93 | UnregisterInputResources(); 94 | 95 | CudaCtxPush lock(m_cuContext); 96 | 97 | for (auto inputFrame : m_vInputFrames) { 98 | if (inputFrame.inputPtr) { 99 | cuMemFree(reinterpret_cast(inputFrame.inputPtr)); 100 | } 101 | } 102 | m_vInputFrames.clear(); 103 | 104 | for (auto referenceFrame : m_vReferenceFrames) { 105 | if (referenceFrame.inputPtr) { 106 | cuMemFree(reinterpret_cast(referenceFrame.inputPtr)); 107 | } 108 | } 109 | m_vReferenceFrames.clear(); 110 | 111 | m_cuContext = nullptr; 112 | } 113 | 114 | void NvEncoderCuda::CopyToDeviceFrame( 115 | CUcontext ctx, CUstream stream, void *pSrcFrame, uint32_t nSrcPitch, 116 | CUdeviceptr pDstFrame, uint32_t dstPitch, int width, int height, 117 | CUmemorytype srcMemoryType, NV_ENC_BUFFER_FORMAT pixelFormat, 118 | const uint32_t dstChromaOffsets[], uint32_t numChromaPlanes) { 119 | if (srcMemoryType != CU_MEMORYTYPE_HOST && 120 | srcMemoryType != CU_MEMORYTYPE_DEVICE) { 121 | NVENC_THROW_ERROR("Invalid source memory type for copy", 122 | NV_ENC_ERR_INVALID_PARAM); 123 | } 124 | 125 | CudaCtxPush lock(ctx); 126 | 127 | uint32_t srcPitch = 128 | nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width); 129 | CUDA_MEMCPY2D m = {0}; 130 | m.srcMemoryType = srcMemoryType; 131 | if (srcMemoryType == CU_MEMORYTYPE_HOST) { 132 | m.srcHost = pSrcFrame; 133 | } else { 134 | m.srcDevice = (CUdeviceptr)pSrcFrame; 135 | } 136 | m.srcPitch = srcPitch; 137 | m.dstMemoryType = CU_MEMORYTYPE_DEVICE; 138 | m.dstDevice = pDstFrame; 139 | m.dstPitch = dstPitch; 140 | m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width); 141 | m.Height = height; 142 | 143 | CHECK_CUDA_CALL(cuMemcpy2DAsync(&m, stream)); 144 | 145 | vector srcChromaOffsets; 146 | NvEncoder::GetChromaSubPlaneOffsets(pixelFormat, srcPitch, height, 147 | srcChromaOffsets); 148 | uint32_t chromaHeight = NvEncoder::GetChromaHeight(pixelFormat, height); 149 | uint32_t destChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, dstPitch); 150 | uint32_t srcChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, srcPitch); 151 | uint32_t chromaWidthInBytes = 152 | NvEncoder::GetChromaWidthInBytes(pixelFormat, width); 153 | 154 | for (uint32_t i = 0; i < numChromaPlanes; ++i) { 155 | if (chromaHeight) { 156 | if (srcMemoryType == CU_MEMORYTYPE_HOST) { 157 | m.srcHost = ((uint8_t *)pSrcFrame + srcChromaOffsets[i]); 158 | } else { 159 | m.srcDevice = (CUdeviceptr)((uint8_t *)pSrcFrame + srcChromaOffsets[i]); 160 | } 161 | m.srcPitch = srcChromaPitch; 162 | 163 | m.dstDevice = (CUdeviceptr)((uint8_t *)pDstFrame + dstChromaOffsets[i]); 164 | m.dstPitch = destChromaPitch; 165 | m.WidthInBytes = chromaWidthInBytes; 166 | m.Height = chromaHeight; 167 | CHECK_CUDA_CALL(cuMemcpy2DAsync(&m, stream)); 168 | } 169 | } 170 | } 171 | 172 | NV_ENCODE_API_FUNCTION_LIST NvEncoderCuda::GetApi() const 173 | { 174 | return m_nvenc; 175 | } 176 | 177 | void * NvEncoderCuda::GetEncoder() const 178 | { 179 | return m_hEncoder; 180 | } 181 | -------------------------------------------------------------------------------- /src/TC/src/Resize.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 NVIDIA Corporation 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * Unless required by applicable law or agreed to in writing, software 8 | * distributed under the License is distributed on an "AS IS" BASIS, 9 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | * See the License for the specific language governing permissions and 11 | * limitations under the License. 12 | */ 13 | 14 | #include 15 | #include "NvCodecUtils.h" 16 | 17 | template 18 | static __global__ void 19 | Resize( 20 | cudaTextureObject_t texY, 21 | cudaTextureObject_t texUv, 22 | uint8_t *pDst, 23 | uint8_t *pDstUV, 24 | int nPitch, 25 | int nWidth, 26 | int nHeight, 27 | float fxScale, 28 | float fyScale) 29 | { 30 | int ix = blockIdx.x * blockDim.x + threadIdx.x, 31 | iy = blockIdx.y * blockDim.y + threadIdx.y; 32 | 33 | if (ix >= nWidth / 2 || iy >= nHeight / 2) { 34 | return; 35 | } 36 | 37 | int x = ix * 2, y = iy * 2; 38 | typedef decltype(YuvUnitx2::x) YuvUnit; 39 | const int MAX = 1 << (sizeof(YuvUnit) * 8); 40 | *(YuvUnitx2 *) (pDst + y * nPitch + x * sizeof(YuvUnit)) = YuvUnitx2{ 41 | (YuvUnit) (tex2D(texY, x / fxScale, y / fyScale) * MAX), 42 | (YuvUnit) (tex2D(texY, (x + 1) / fxScale, y / fyScale) * MAX) 43 | }; 44 | y++; 45 | *(YuvUnitx2 *) (pDst + y * nPitch + x * sizeof(YuvUnit)) = YuvUnitx2{ 46 | (YuvUnit) (tex2D(texY, x / fxScale, y / fyScale) * MAX), 47 | (YuvUnit) (tex2D(texY, (x + 1) / fxScale, y / fyScale) * MAX) 48 | }; 49 | float2 uv = tex2D(texUv, ix / fxScale, (nHeight + iy) / fyScale + 0.5f); 50 | *(YuvUnitx2 *) (pDstUV + iy * nPitch + ix * 2 * sizeof(YuvUnit)) = 51 | YuvUnitx2{(YuvUnit) (uv.x * MAX), (YuvUnit) (uv.y * MAX)}; 52 | } 53 | 54 | template 55 | static void 56 | Resize( 57 | unsigned char *dpDst, 58 | unsigned char *dpDstUV, 59 | int nDstPitch, 60 | int nDstWidth, 61 | int nDstHeight, 62 | unsigned char *dpSrc, 63 | int nSrcPitch, 64 | int nSrcWidth, 65 | int nSrcHeight, 66 | cudaStream_t S) 67 | { 68 | cudaResourceDesc resDesc = {}; 69 | resDesc.resType = cudaResourceTypePitch2D; 70 | resDesc.res.pitch2D.devPtr = dpSrc; 71 | resDesc.res.pitch2D.desc = cudaCreateChannelDesc(); 72 | resDesc.res.pitch2D.width = nSrcWidth; 73 | resDesc.res.pitch2D.height = nSrcHeight; 74 | resDesc.res.pitch2D.pitchInBytes = nSrcPitch; 75 | 76 | cudaTextureDesc texDesc = {}; 77 | texDesc.filterMode = cudaFilterModeLinear; 78 | texDesc.readMode = cudaReadModeNormalizedFloat; 79 | 80 | cudaTextureObject_t texY = 0; 81 | cudaCreateTextureObject(&texY, &resDesc, &texDesc, NULL); 82 | 83 | resDesc.res.pitch2D.desc = cudaCreateChannelDesc(); 84 | resDesc.res.pitch2D.width = nSrcWidth / 2; 85 | resDesc.res.pitch2D.height = nSrcHeight * 3 / 2; 86 | 87 | cudaTextureObject_t texUv = 0; 88 | cudaCreateTextureObject(&texUv, &resDesc, &texDesc, NULL); 89 | 90 | dim3 Dg = dim3((nDstWidth + 31) / 32, (nDstHeight + 31) / 32); 91 | dim3 Db = dim3(16, 16); 92 | Resize << < Dg, Db, 0, S >> > ( 93 | texY, texUv, dpDst, dpDstUV, nDstPitch, nDstWidth, nDstHeight, 94 | 1.0f * nDstWidth / nSrcWidth, 1.0f * nDstHeight / nSrcHeight); 95 | 96 | cudaDestroyTextureObject(texY); 97 | cudaDestroyTextureObject(texUv); 98 | } 99 | 100 | void 101 | ResizeNv12( 102 | unsigned char *dpDstNv12, 103 | int nDstPitch, 104 | int nDstWidth, 105 | int nDstHeight, 106 | unsigned char *dpSrcNv12, 107 | int nSrcPitch, 108 | int nSrcWidth, 109 | int nSrcHeight, 110 | unsigned char *dpDstNv12UV, 111 | cudaStream_t S) 112 | { 113 | unsigned char *dpDstUV = dpDstNv12UV ? dpDstNv12UV : dpDstNv12 + (nDstPitch * nDstHeight); 114 | return Resize(dpDstNv12, 115 | dpDstUV, 116 | nDstPitch, 117 | nDstWidth, 118 | nDstHeight, 119 | dpSrcNv12, 120 | nSrcPitch, 121 | nSrcWidth, 122 | nSrcHeight, 123 | S); 124 | } -------------------------------------------------------------------------------- /src/TC/src/tc_dlopen_unix.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | TC_LIB tc_dlopen(const char* path) { return dlopen(path, RTLD_LAZY); } 6 | void* tc_dlsym(TC_LIB lib, const char* symbol) { return dlsym(lib, symbol); } 7 | char* tc_dlerror() { return dlerror(); } 8 | int tc_dlclose(TC_LIB lib) { return dlclose(lib); } 9 | -------------------------------------------------------------------------------- /src/TC/src/tc_dlopen_windows.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | TC_LIB tc_dlopen(const char* path) { return LoadLibraryA (path); } 5 | void* tc_dlsym(TC_LIB lib, const char* symbol) { return GetProcAddress(lib, symbol); } 6 | char* tc_dlerror() { return (char*)"Failed"; } 7 | int tc_dlclose(TC_LIB lib) { return FreeLibrary(lib); } 8 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2021 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | cmake_minimum_required(VERSION 3.10) 18 | 19 | set(ENABLE_TESTS TRUE CACHE BOOL "Enable unit tests") 20 | 21 | set (TEST_SUITES 22 | ${CMAKE_CURRENT_SOURCE_DIR}/test_PySurface.py 23 | ${CMAKE_CURRENT_SOURCE_DIR}/test_PyNvDecoder.py 24 | ${CMAKE_CURRENT_SOURCE_DIR}/test_PyNvEncoder.py 25 | ${CMAKE_CURRENT_SOURCE_DIR}/test_PyFfmpegDemuxer.py 26 | PARENT_SCOPE 27 | ) 28 | 29 | set (TEST_ASSETS 30 | ${CMAKE_CURRENT_SOURCE_DIR}/test_res_change.h264 31 | ${CMAKE_CURRENT_SOURCE_DIR}/test.mp4 32 | PARENT_SCOPE 33 | ) 34 | 35 | set (TESTS_DIR ${CMAKE_CURRENT_SOURCE_DIR} PARENT_SCOPE) -------------------------------------------------------------------------------- /tests/test.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/VideoProcessingFramework/529fb192f22ec305b8b237ed9e6014e171339c81/tests/test.mp4 -------------------------------------------------------------------------------- /tests/test_PyFfmpegDemuxer.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2021 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Starting from Python 3.8 DLL search policy has changed. 18 | # We need to add path to CUDA DLLs explicitly. 19 | import sys 20 | import os 21 | from os.path import join, dirname 22 | 23 | if os.name == "nt": 24 | # Add CUDA_PATH env variable 25 | cuda_path = os.environ["CUDA_PATH"] 26 | if cuda_path: 27 | os.add_dll_directory(cuda_path) 28 | else: 29 | print("CUDA_PATH environment variable is not set.", file=sys.stderr) 30 | print("Can't set CUDA DLLs search path.", file=sys.stderr) 31 | exit(1) 32 | 33 | # Add PATH as well for minor CUDA releases 34 | sys_path = os.environ["PATH"] 35 | if sys_path: 36 | paths = sys_path.split(";") 37 | for path in paths: 38 | if os.path.isdir(path): 39 | os.add_dll_directory(path) 40 | else: 41 | print("PATH environment variable is not set.", file=sys.stderr) 42 | exit(1) 43 | 44 | import PyNvCodec as nvc 45 | import numpy as np 46 | import unittest 47 | import random 48 | 49 | # Ground truth information about input video 50 | gt_file = join(dirname(__file__), "test.mp4") 51 | gt_width = 848 52 | gt_height = 464 53 | gt_is_vfr = False 54 | gt_pix_fmt = nvc.PixelFormat.NV12 55 | gt_framerate = 30 56 | gt_num_frames = 96 57 | gt_len_seconds = 3.23 58 | gt_color_space = nvc.ColorSpace.BT_709 59 | gt_color_range = nvc.ColorRange.MPEG 60 | 61 | 62 | class TestDemuxer(unittest.TestCase): 63 | def __init__(self, methodName): 64 | super().__init__(methodName=methodName) 65 | enc_file = gt_file 66 | self.nvDmx = nvc.PyFFmpegDemuxer(enc_file, {}) 67 | 68 | def test_width(self): 69 | self.assertEqual(gt_width, self.nvDmx.Width()) 70 | 71 | def test_height(self): 72 | self.assertEqual(gt_height, self.nvDmx.Height()) 73 | 74 | def test_color_space(self): 75 | self.assertEqual(gt_color_space, self.nvDmx.ColorSpace()) 76 | 77 | def test_color_range(self): 78 | self.assertEqual(gt_color_range, self.nvDmx.ColorRange()) 79 | 80 | def test_format(self): 81 | self.assertEqual(gt_pix_fmt, self.nvDmx.Format()) 82 | 83 | def test_framerate(self): 84 | self.assertEqual(gt_framerate, self.nvDmx.Framerate()) 85 | 86 | def test_avgframerate(self): 87 | self.assertEqual(gt_framerate, self.nvDmx.AvgFramerate()) 88 | 89 | def test_isvfr(self): 90 | self.assertEqual(gt_is_vfr, self.nvDmx.IsVFR()) 91 | 92 | def test_timebase(self): 93 | epsilon = 1e-4 94 | gt_timebase = 8.1380e-5 95 | self.assertLessEqual(np.abs(gt_timebase - self.nvDmx.Timebase()), epsilon) 96 | 97 | def test_demux_all_packets(self): 98 | num_packets = 0 99 | last_dts = 0 100 | while True: 101 | pdata = nvc.PacketData() 102 | packet = np.ndarray(shape=(0), dtype=np.uint8) 103 | if not self.nvDmx.DemuxSinglePacket( 104 | packet, 105 | ): 106 | break 107 | self.nvDmx.LastPacketData(pdata) 108 | if 0 != num_packets: 109 | self.assertGreaterEqual(pdata.dts, last_dts) 110 | last_dts = pdata.dts 111 | num_packets += 1 112 | self.assertEqual(gt_num_frames, num_packets) 113 | 114 | def test_seek_framenum(self): 115 | seek_frame = random.randint(0, gt_num_frames - 1) 116 | if self.nvDmx.IsVFR(): 117 | print("Seek on VFR sequence, skipping this test") 118 | pass 119 | for mode in (nvc.SeekMode.EXACT_FRAME, nvc.SeekMode.PREV_KEY_FRAME): 120 | packet = np.ndarray(shape=(0), dtype=np.uint8) 121 | sk = nvc.SeekContext( 122 | seek_frame=seek_frame, 123 | mode=mode, 124 | ) 125 | self.assertTrue(self.nvDmx.Seek(sk, packet)) 126 | pdata = nvc.PacketData() 127 | self.nvDmx.LastPacketData(pdata) 128 | if nvc.SeekMode.EXACT_FRAME == mode: 129 | self.assertEqual(pdata.dts, pdata.duration * seek_frame) 130 | elif nvc.SeekMode.PREV_KEY_FRAME == mode: 131 | self.assertLessEqual(pdata.dts, pdata.duration * seek_frame) 132 | 133 | def test_seek_timestamp(self): 134 | timestamp = random.random() * gt_len_seconds 135 | if self.nvDmx.IsVFR(): 136 | print("Seek on VFR sequence, skipping this test") 137 | return 138 | 139 | packet = np.ndarray(shape=(0), dtype=np.uint8) 140 | sk = nvc.SeekContext( 141 | seek_ts=timestamp, 142 | mode=nvc.SeekMode.PREV_KEY_FRAME, 143 | ) 144 | self.assertTrue(self.nvDmx.Seek(sk, packet)) 145 | pdata = nvc.PacketData() 146 | self.nvDmx.LastPacketData(pdata) 147 | self.assertLessEqual(pdata.dts * self.nvDmx.Timebase(), timestamp) 148 | 149 | def test_demux_single_packet(self): 150 | packet = np.ndarray(shape=(0), dtype=np.uint8) 151 | while self.nvDmx.DemuxSinglePacket(packet): 152 | self.assertNotEqual(0, packet.size) 153 | 154 | def test_sei(self): 155 | total_sei_size = 0 156 | while True: 157 | packet = np.ndarray(shape=(0), dtype=np.uint8) 158 | sei = np.ndarray(shape=(0), dtype=np.uint8) 159 | if not self.nvDmx.DemuxSinglePacket(packet, sei): 160 | break 161 | total_sei_size += sei.size 162 | self.assertNotEqual(0, total_sei_size) 163 | 164 | def test_lastpacketdata(self): 165 | try: 166 | pdata = nvc.PacketData() 167 | self.nvDmx.LastPacketData(pdata) 168 | except: 169 | self.fail("Test case raised exception unexpectedly!") 170 | 171 | 172 | if __name__ == "__main__": 173 | unittest.main() 174 | -------------------------------------------------------------------------------- /tests/test_PyNvEncoder.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2021 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Starting from Python 3.8 DLL search policy has changed. 18 | # We need to add path to CUDA DLLs explicitly. 19 | import sys 20 | import os 21 | from os.path import join, dirname 22 | 23 | if os.name == "nt": 24 | # Add CUDA_PATH env variable 25 | cuda_path = os.environ["CUDA_PATH"] 26 | if cuda_path: 27 | os.add_dll_directory(cuda_path) 28 | else: 29 | print("CUDA_PATH environment variable is not set.", file=sys.stderr) 30 | print("Can't set CUDA DLLs search path.", file=sys.stderr) 31 | exit(1) 32 | 33 | # Add PATH as well for minor CUDA releases 34 | sys_path = os.environ["PATH"] 35 | if sys_path: 36 | paths = sys_path.split(";") 37 | for path in paths: 38 | if os.path.isdir(path): 39 | os.add_dll_directory(path) 40 | else: 41 | print("PATH environment variable is not set.", file=sys.stderr) 42 | exit(1) 43 | 44 | import PyNvCodec as nvc 45 | import numpy as np 46 | import unittest 47 | 48 | # Ground truth information about input video 49 | gt_file = join(dirname(__file__), "test.mp4") 50 | gt_file_res_change = join(dirname(__file__), "test_res_change.h264") 51 | gt_width = 848 52 | gt_height = 464 53 | gt_res_change = 47 54 | gt_is_vfr = False 55 | gt_pix_fmt = nvc.PixelFormat.NV12 56 | gt_framerate = 30 57 | gt_num_frames = 96 58 | gt_timebase = 8.1380e-5 59 | gt_color_space = nvc.ColorSpace.BT_709 60 | gt_color_range = nvc.ColorRange.MPEG 61 | 62 | 63 | class TestEncoderBasic(unittest.TestCase): 64 | def __init__(self, methodName): 65 | super().__init__(methodName=methodName) 66 | 67 | def test_encode_all_surfaces(self): 68 | gpu_id = 0 69 | res = str(gt_width) + "x" + str(gt_height) 70 | encFrame = np.ndarray(shape=(0), dtype=np.uint8) 71 | 72 | nvDec = nvc.PyNvDecoder(gt_file, gpu_id) 73 | nvEnc = nvc.PyNvEncoder( 74 | { 75 | "preset": "P4", 76 | "tuning_info": "high_quality", 77 | "codec": "h264", 78 | "profile": "high", 79 | "s": res, 80 | "bitrate": "1M", 81 | }, 82 | gpu_id, 83 | ) 84 | 85 | frames_sent = 0 86 | frames_recv = 0 87 | 88 | while True: 89 | dec_surf = nvDec.DecodeSingleSurface() 90 | if not dec_surf or dec_surf.Empty(): 91 | break 92 | frames_sent += 1 93 | 94 | nvEnc.EncodeSingleSurface(dec_surf, encFrame) 95 | if encFrame.size: 96 | frames_recv += 1 97 | 98 | while True: 99 | success = nvEnc.FlushSinglePacket(encFrame) 100 | if success and encFrame.size: 101 | frames_recv += 1 102 | else: 103 | break 104 | 105 | self.assertEqual(frames_sent, frames_recv) 106 | 107 | def test_reconfigure(self): 108 | gpu_id = 0 109 | res = str(gt_width) + "x" + str(gt_height) 110 | encFrame = np.ndarray(shape=(0), dtype=np.uint8) 111 | 112 | nvDec = nvc.PyNvDecoder(gt_file_res_change, gpu_id) 113 | nvRcn = nvc.PyNvDecoder( 114 | gt_width, gt_height, nvc.PixelFormat.NV12, nvc.CudaVideoCodec.H264, gpu_id 115 | ) 116 | nvEnc = nvc.PyNvEncoder( 117 | { 118 | "preset": "P4", 119 | "tuning_info": "high_quality", 120 | "codec": "h264", 121 | "profile": "high", 122 | "s": res, 123 | "bitrate": "1M", 124 | }, 125 | gpu_id, 126 | ) 127 | 128 | frames_recn = 0 129 | while True: 130 | dec_surf = nvDec.DecodeSingleSurface() 131 | if not dec_surf or dec_surf.Empty(): 132 | break 133 | 134 | sw = dec_surf.Width() 135 | sh = dec_surf.Height() 136 | if sw != gt_width or sh != gt_height: 137 | # Flush encoder before reconfigure. 138 | # Some encoded frames will be lost but that doesn't matter. 139 | # Decoder will be reconfigured upon resolution change anyway. 140 | while nvEnc.FlushSinglePacket(encFrame): 141 | frames_recn += 1 142 | 143 | # Now reconfigure. 144 | res = str(sw) + "x" + str(sh) 145 | self.assertTrue( 146 | nvEnc.Reconfigure({"s": res}, force_idr=True, reset_encoder=True) 147 | ) 148 | self.assertEqual(nvEnc.Width(), sw) 149 | self.assertEqual(nvEnc.Height(), sh) 150 | 151 | nvEnc.EncodeSingleSurface(dec_surf, encFrame) 152 | 153 | if encFrame.size: 154 | dec_surf = nvRcn.DecodeSurfaceFromPacket(encFrame) 155 | if dec_surf and not dec_surf.Empty(): 156 | frames_recn += 1 157 | if frames_recn < gt_res_change: 158 | self.assertEqual(dec_surf.Width(), gt_width) 159 | self.assertEqual(dec_surf.Height(), gt_height) 160 | else: 161 | self.assertEqual(dec_surf.Width(), sw) 162 | self.assertEqual(dec_surf.Height(), sh) 163 | 164 | 165 | if __name__ == "__main__": 166 | unittest.main() 167 | -------------------------------------------------------------------------------- /tests/test_PySurface.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2021 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Starting from Python 3.8 DLL search policy has changed. 18 | # We need to add path to CUDA DLLs explicitly. 19 | import sys 20 | import os 21 | from os.path import join, dirname 22 | 23 | if os.name == "nt": 24 | # Add CUDA_PATH env variable 25 | cuda_path = os.environ["CUDA_PATH"] 26 | if cuda_path: 27 | os.add_dll_directory(cuda_path) 28 | else: 29 | print("CUDA_PATH environment variable is not set.", file=sys.stderr) 30 | print("Can't set CUDA DLLs search path.", file=sys.stderr) 31 | exit(1) 32 | 33 | # Add PATH as well for minor CUDA releases 34 | sys_path = os.environ["PATH"] 35 | if sys_path: 36 | paths = sys_path.split(";") 37 | for path in paths: 38 | if os.path.isdir(path): 39 | os.add_dll_directory(path) 40 | else: 41 | print("PATH environment variable is not set.", file=sys.stderr) 42 | exit(1) 43 | 44 | import PyNvCodec as nvc 45 | import numpy as np 46 | import unittest 47 | 48 | try: 49 | import pycuda.driver as cuda 50 | import torch 51 | except ImportError as e: 52 | raise unittest.SkipTest(f"Skipping because of insufficient dependencies: {e}") 53 | 54 | 55 | # Ground truth information about input video 56 | gt_file = join(dirname(__file__), "test.mp4") 57 | gt_width = 848 58 | gt_height = 464 59 | gt_is_vfr = False 60 | gt_pix_fmt = nvc.PixelFormat.NV12 61 | gt_framerate = 30 62 | gt_num_frames = 96 63 | gt_color_space = nvc.ColorSpace.BT_709 64 | gt_color_range = nvc.ColorRange.MPEG 65 | 66 | 67 | class TestSurfacePycuda(unittest.TestCase): 68 | def __init__(self, methodName): 69 | super().__init__(methodName=methodName) 70 | self.gpu_id = 0 71 | enc_file = gt_file 72 | cuda.init() 73 | self.cuda_ctx = cuda.Device(self.gpu_id).retain_primary_context() 74 | self.cuda_ctx.push() 75 | self.cuda_str = cuda.Stream() 76 | self.cuda_ctx.pop() 77 | self.nvDec = nvc.PyNvDecoder( 78 | enc_file, self.cuda_ctx.handle, self.cuda_str.handle 79 | ) 80 | self.nvDwn = nvc.PySurfaceDownloader( 81 | self.nvDec.Width(), 82 | self.nvDec.Height(), 83 | self.nvDec.Format(), 84 | self.cuda_ctx.handle, 85 | self.cuda_str.handle, 86 | ) 87 | 88 | def test_pycuda_memcpy_Surface_Surface(self): 89 | 90 | while True: 91 | surf_src = self.nvDec.DecodeSingleSurface() 92 | if surf_src.Empty(): 93 | break 94 | src_plane = surf_src.PlanePtr() 95 | 96 | surf_dst = nvc.Surface.Make( 97 | self.nvDec.Format(), 98 | self.nvDec.Width(), 99 | self.nvDec.Height(), 100 | self.gpu_id, 101 | ) 102 | self.assertFalse(surf_dst.Empty()) 103 | dst_plane = surf_dst.PlanePtr() 104 | 105 | memcpy_2d = cuda.Memcpy2D() 106 | memcpy_2d.width_in_bytes = src_plane.Width() * src_plane.ElemSize() 107 | memcpy_2d.src_pitch = src_plane.Pitch() 108 | memcpy_2d.dst_pitch = dst_plane.Pitch() 109 | memcpy_2d.width = src_plane.Width() 110 | memcpy_2d.height = src_plane.Height() 111 | memcpy_2d.set_src_device(src_plane.GpuMem()) 112 | memcpy_2d.set_dst_device(dst_plane.GpuMem()) 113 | memcpy_2d(self.cuda_str) 114 | 115 | frame_src = np.ndarray(shape=(0), dtype=np.uint8) 116 | if not self.nvDwn.DownloadSingleSurface(surf_src, frame_src): 117 | self.fail("Failed to download decoded surface") 118 | 119 | frame_dst = np.ndarray(shape=(0), dtype=np.uint8) 120 | if not self.nvDwn.DownloadSingleSurface(surf_dst, frame_dst): 121 | self.fail("Failed to download decoded surface") 122 | 123 | if not np.array_equal(frame_src, frame_dst): 124 | self.fail("Video frames are not equal") 125 | 126 | def test_pycuda_memcpy_Surface_Tensor(self): 127 | 128 | while True: 129 | surf_src = self.nvDec.DecodeSingleSurface() 130 | if surf_src.Empty(): 131 | break 132 | src_plane = surf_src.PlanePtr() 133 | 134 | surface_tensor = torch.zeros( 135 | src_plane.Height(), 136 | src_plane.Width(), 137 | 1, 138 | dtype=torch.uint8, 139 | device=torch.device(f"cuda:{self.gpu_id}"), 140 | ) 141 | dst_plane = surface_tensor.data_ptr() 142 | 143 | memcpy_2d = cuda.Memcpy2D() 144 | memcpy_2d.width_in_bytes = src_plane.Width() * src_plane.ElemSize() 145 | memcpy_2d.src_pitch = src_plane.Pitch() 146 | memcpy_2d.dst_pitch = self.nvDec.Width() 147 | memcpy_2d.width = src_plane.Width() 148 | memcpy_2d.height = src_plane.Height() 149 | memcpy_2d.set_src_device(src_plane.GpuMem()) 150 | memcpy_2d.set_dst_device(dst_plane) 151 | memcpy_2d(self.cuda_str) 152 | 153 | frame_src = np.ndarray(shape=(0), dtype=np.uint8) 154 | if not self.nvDwn.DownloadSingleSurface(surf_src, frame_src): 155 | self.fail("Failed to download decoded surface") 156 | 157 | frame_dst = surface_tensor.to("cpu").numpy() 158 | frame_dst = frame_dst.reshape((src_plane.Height() * src_plane.Width())) 159 | 160 | if not np.array_equal(frame_src, frame_dst): 161 | self.fail("Video frames are not equal") 162 | 163 | def test_list_append(self): 164 | dec_frames = [] 165 | nvDec = nvc.PyNvDecoder(gt_file, 0) 166 | 167 | # Decode all the surfaces and store them in the list. 168 | while True: 169 | surf = nvDec.DecodeSingleSurface() 170 | if not surf or surf.Empty(): 171 | break 172 | else: 173 | # Please note that we need to clone surfaces because those 174 | # surfaces returned by decoder belongs to it's internal 175 | # memory pool. 176 | dec_frames.append(surf.Clone(self.gpu_id)) 177 | 178 | # Make sure all the surfaces are kept. 179 | self.assertEqual(len(dec_frames), gt_num_frames) 180 | 181 | # Now compare saved surfaces with data from decoder to make sure 182 | # no crruption happened. 183 | nvDec = nvc.PyNvDecoder(gt_file, 0) 184 | nvDwn = nvc.PySurfaceDownloader( 185 | nvDec.Width(), nvDec.Height(), nvDec.Format(), self.gpu_id 186 | ) 187 | 188 | for surf in dec_frames: 189 | dec_frame = np.ndarray(shape=(0), dtype=np.uint8) 190 | svd_frame = np.ndarray(shape=(0), dtype=np.uint8) 191 | 192 | nvDwn.DownloadSingleSurface(surf, svd_frame) 193 | nvDec.DecodeSingleFrame(dec_frame) 194 | 195 | self.assertTrue(np.array_equal(dec_frame, svd_frame)) 196 | 197 | 198 | if __name__ == "__main__": 199 | unittest.main() 200 | -------------------------------------------------------------------------------- /tests/test_reported_bugs.py: -------------------------------------------------------------------------------- 1 | import PyNvCodec as nvc 2 | import numpy as np 3 | import unittest 4 | from os.path import join, dirname 5 | 6 | 7 | def test_issue_455(): 8 | gpuID = 0 9 | 10 | nvEnc = nvc.PyNvEncoder({'bitrate': '30K', 'fps': '10', 'codec': 'hevc', 's': '256x256'}, gpuID) 11 | nvDec = nvc.PyNvDecoder(256, 256, nvc.PixelFormat.NV12, nvc.CudaVideoCodec.HEVC, gpuID) 12 | 13 | rawFrame = np.random.randint(0, 255, size=(256, 256, 3), dtype=np.uint8) 14 | 15 | print('Raw frame size is ' + str(rawFrame.size) + ' bytes.') 16 | 17 | encodedFrame = np.ndarray(shape=(0), dtype=np.uint8) 18 | 19 | count, success = 0, False 20 | 21 | while success is not True and count < 10: 22 | success = nvEnc.EncodeSingleFrame(rawFrame, encodedFrame, sync=False) 23 | count += 1 24 | 25 | print('Encoded frame size is ' + str(encodedFrame.size) + ' bytes.') 26 | 27 | exception_raised = False 28 | try: 29 | success = nvDec.DecodeSingleFrame(encodedFrame) 30 | except Exception as ex: 31 | exception_raised = True 32 | assert ("Tried to call DecodeSurface/DecodeFrame on a Decoder that has been initialized without a built-in " 33 | "demuxer. Please use DecodeSurfaceFromPacket/DecodeFrameFromPacket instead or intialize the decoder" 34 | " with a demuxer when decoding from a file" == str(ex)) 35 | assert exception_raised 36 | 37 | decodedFrame = np.ndarray(shape=(0), dtype=np.uint8) 38 | success = nvDec.DecodeFrameFromPacket(decodedFrame, encodedFrame) 39 | 40 | 41 | @unittest.skip('Skipping because still causing segfault due to built-in demuxer being NULL') 42 | def test_issue_457(): 43 | encFilePath = join(dirname(__file__), "test_res_change.h264") 44 | nvDec = nvc.PyFfmpegDecoder(encFilePath, {}, 1) 45 | nvDec.GetMotionVectors() 46 | -------------------------------------------------------------------------------- /tests/test_res_change.h264: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/VideoProcessingFramework/529fb192f22ec305b8b237ed9e6014e171339c81/tests/test_res_change.h264 --------------------------------------------------------------------------------