├── CNAME ├── README.md ├── imgs ├── profile.png ├── cpu_spec-2.webp ├── gpu_spec-2.webp ├── favicon-32x32.png ├── OpenCV_logo_black.webp ├── opencv_cuda_intel.webp ├── profile_1-e1674474909304.webp ├── vs_community_c++_workload.png ├── cropped-cuda_thread_blocks.webp └── cmake_visual_studio_install.webp ├── nbs ├── imgs │ ├── nvprof_1.PNG │ ├── nvprof_2.PNG │ ├── nvprof_3.PNG │ ├── nvprof_4.PNG │ ├── nvprof_5.PNG │ ├── nvprof_6.PNG │ ├── nvprof_7.PNG │ ├── quicksync.PNG │ ├── proc_mon_filter.png │ ├── proc_mon_failed_search_nvidia.png │ └── proc_mon_failed_search_opencv.png ├── misc │ └── ProcmonConfiguration.pmc ├── cudacodec_videoReader_memory_usage.ipynb ├── resize.ipynb ├── opencv4-sparse-optical-flow.ipynb ├── cuda_optimization_test.ipynb ├── cudacodec.ipynb ├── opencv_cvs_dll_load_failed.ipynb ├── opencv410x-video-read.ipynb ├── opencv450-video-read.ipynb └── opencv450-video-read-CUDA_10_0_VideoCodecSDK_11_0_10.ipynb ├── 404.qmd ├── qmd ├── about.qmd └── opencv_cuda_performance.qmd ├── index.qmd ├── .gitignore ├── _quarto.yml └── LICENSE /CNAME: -------------------------------------------------------------------------------- 1 | www.jamesbowley.co.uk -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # opencv-testing 2 | Notebooks for experimenting with OpenCV 3 | -------------------------------------------------------------------------------- /imgs/profile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/profile.png -------------------------------------------------------------------------------- /imgs/cpu_spec-2.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/cpu_spec-2.webp -------------------------------------------------------------------------------- /imgs/gpu_spec-2.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/gpu_spec-2.webp -------------------------------------------------------------------------------- /nbs/imgs/nvprof_1.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/nvprof_1.PNG -------------------------------------------------------------------------------- /nbs/imgs/nvprof_2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/nvprof_2.PNG -------------------------------------------------------------------------------- /nbs/imgs/nvprof_3.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/nvprof_3.PNG -------------------------------------------------------------------------------- /nbs/imgs/nvprof_4.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/nvprof_4.PNG -------------------------------------------------------------------------------- /nbs/imgs/nvprof_5.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/nvprof_5.PNG -------------------------------------------------------------------------------- /nbs/imgs/nvprof_6.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/nvprof_6.PNG -------------------------------------------------------------------------------- /nbs/imgs/nvprof_7.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/nvprof_7.PNG -------------------------------------------------------------------------------- /imgs/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/favicon-32x32.png -------------------------------------------------------------------------------- /nbs/imgs/quicksync.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/quicksync.PNG -------------------------------------------------------------------------------- /imgs/OpenCV_logo_black.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/OpenCV_logo_black.webp -------------------------------------------------------------------------------- /imgs/opencv_cuda_intel.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/opencv_cuda_intel.webp -------------------------------------------------------------------------------- /nbs/imgs/proc_mon_filter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/proc_mon_filter.png -------------------------------------------------------------------------------- /imgs/profile_1-e1674474909304.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/profile_1-e1674474909304.webp -------------------------------------------------------------------------------- /imgs/vs_community_c++_workload.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/vs_community_c++_workload.png -------------------------------------------------------------------------------- /nbs/misc/ProcmonConfiguration.pmc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/misc/ProcmonConfiguration.pmc -------------------------------------------------------------------------------- /imgs/cropped-cuda_thread_blocks.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/cropped-cuda_thread_blocks.webp -------------------------------------------------------------------------------- /imgs/cmake_visual_studio_install.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/cmake_visual_studio_install.webp -------------------------------------------------------------------------------- /nbs/imgs/proc_mon_failed_search_nvidia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/proc_mon_failed_search_nvidia.png -------------------------------------------------------------------------------- /nbs/imgs/proc_mon_failed_search_opencv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/proc_mon_failed_search_opencv.png -------------------------------------------------------------------------------- /404.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Page Not Found 3 | --- 4 | 5 | The page you requested cannot be found (perhaps it was moved or renamed). 6 | 7 | You may want to try searching to find the page's new location. -------------------------------------------------------------------------------- /qmd/about.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "About" 3 | about: 4 | template: jolla 5 | image: ..\imgs\profile.png 6 | links: 7 | - icon: github 8 | text: Github 9 | href: https://github.com/cudawarped/opencv-experiments/ 10 | --- 11 | 12 | Location for storing useful guides and notebooks 13 | -------------------------------------------------------------------------------- /index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "OpenCV Guides" 3 | listing: 4 | #image-height: 300px 5 | #image-placeholder: "../imgs/OpenCV_logo_black.webp" 6 | sort: "date desc" 7 | contents: 8 | - nbs/ImportError_dll_load_failed_while_importing_cv2.ipynb 9 | - nbs/opencv_cuda_streams_performance_python.ipynb 10 | - qmd/opencv_cuda_python_windows.qmd 11 | - qmd/opencv_cuda_performance.qmd 12 | #- nbs\opencv_delay_jit_or_context_creation.ipynb 13 | --- -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | /.quarto/ 107 | -------------------------------------------------------------------------------- /_quarto.yml: -------------------------------------------------------------------------------- 1 | project: 2 | type: website 3 | resources: 4 | - CNAME 5 | render: 6 | - nbs/ImportError_dll_load_failed_while_importing_cv2.ipynb 7 | #- nbs/opencv4-cuda-streams.ipynb 8 | - nbs/opencv_cuda_streams_performance_python.ipynb 9 | #- nbs\opencv_delay_jit_or_context_creation.ipynb 10 | - "*.html" 11 | - "*.qmd" 12 | 13 | 14 | website: 15 | title: cudawarped 16 | repo-url: https://github.com/cudawarped/opencv-experiments/ 17 | repo-actions: [issue] 18 | favicon: imgs\favicon-32x32.png 19 | back-to-top-navigation: true 20 | google-analytics: "G-69X21EFRB2" 21 | open-graph: true 22 | navbar: 23 | tools: 24 | - icon: github 25 | href: https://github.com/cudawarped 26 | background: light 27 | logo: imgs\profile_1-e1674474909304.webp 28 | search: true 29 | left: 30 | - text: "About" 31 | file: qmd\about.qmd 32 | #- text: "Home" 33 | # file: index.qmd 34 | - text: "OpenCV" 35 | file: index.qmd 36 | - text: "Downloads" 37 | #href: qmd/downloads.qmd 38 | menu: 39 | - text: "OpenCV" 40 | file: qmd/downloads.qmd 41 | - text: OpenCV Python CUDA wheels 42 | href: https://github.com/cudawarped/opencv-python-cuda-wheels/releases 43 | - text: OpenCV C++ CUDA builds 44 | href: https://github.com/cudawarped/opencv_contrib/releases 45 | # - https://github.com/cudawarped/opencv-python-cuda-wheels/releases 46 | #- text: "How-To" 47 | # file: howto.qmd 48 | 49 | sidebar: 50 | - title: OpenCV 51 | style: "floating" 52 | #type: "dark" 53 | #background: light 54 | #contents: auto 55 | contents: 56 | - index.qmd 57 | - text: "Build OpenCV (including Python) with CUDA on Windows" 58 | file: qmd\opencv_cuda_python_windows.qmd 59 | - text: "Accelerate with CUDA streams in Python" 60 | file: nbs\opencv_cuda_streams_performance_python.ipynb 61 | - text: "ImportError: DLL load failed..." 62 | file: nbs\ImportError_dll_load_failed_while_importing_cv2.ipynb 63 | #- text: "OpenCV CUDA initialization delay" 64 | # file: nbs\opencv_delay_jit_or_context_creation.ipynb 65 | - text: "CUDA Performance Comparisson" 66 | file: qmd\opencv_cuda_performance.qmd 67 | # - text: "Import dll load failed while importing cv2" 68 | # file: nbs\ImportError_dll_load_failed_while_importing_cv2.ipynb 69 | 70 | #- tutorial1.qmd 71 | #- tutorial2.qmd 72 | 73 | - title: "How-To" 74 | contents: 75 | - howto.qmd 76 | # navigation items 77 | 78 | - title: "Fundamentals" 79 | contents: 80 | - fundamentals.qmd 81 | # navigation items 82 | 83 | - title: "Reference" 84 | contents: 85 | - reference.qmd 86 | # navigation items 87 | 88 | format: 89 | html: 90 | smooth-scroll: true 91 | theme: 92 | light: cosmo 93 | dark: cyborg 94 | page-layout: full 95 | grid: 96 | sidebar-width: 330px 97 | body-width: 1000px 98 | #css: style.css 99 | #- custom.scss 100 | #navbar: navbar-light 101 | #backgroundcolor: red 102 | #css: custom.scss 103 | toc: false 104 | link-external-newwindow: true 105 | link-external-icon: false 106 | linkcolor: "#76b900" 107 | -------------------------------------------------------------------------------- /nbs/cudacodec_videoReader_memory_usage.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "53117f25", 6 | "metadata": {}, 7 | "source": [ 8 | "# `cudacodec.VideoReader` memory usage example" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "0adbfc27", 14 | "metadata": {}, 15 | "source": [ 16 | "The memory required by `cudacodec.VideoReader` is mainly influenced by the number of decode surfaces required as demonstrated in this example. \n", 17 | "\n", 18 | "The minimum number of decode surfaces is determined by the video source and can be increased to increase decoding performance.\n", 19 | "\n", 20 | "Note: A [CUDA context](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#context) which requires several hundred MB of device memory needs to be created before any CUDA functions can be called. Unfortunately the call to `cv.cuda.DeviceInfo()` will create the cuda context so there is no way to measure how much memory this allocates using the OpenCV API.\n", 21 | "\n", 22 | "This example uses python wheel from https://github.com/cudawarped/opencv-python-cuda-wheels/releases/tag/4.7.0.20221229" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 1, 28 | "id": "154b8b6a", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import cv2 as cv\n", 33 | "import os\n", 34 | "import time\n", 35 | "b_to_mb = 2**20\n", 36 | "vid_root = os.environ['OPENCV_TEST_DATA_PATH'] + \"/cv/video/\"" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "id": "12f4db2a", 42 | "metadata": {}, 43 | "source": [ 44 | "## Create `cudacodec.VideoReader` using the default number of decode surfaces" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 2, 50 | "id": "7819f27c", 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# unfortunately this creates a cuda context therefore mb_free_start is the free memory after context creation\n", 55 | "device_info = cv.cuda.DeviceInfo()\n", 56 | "mb_free_start = device_info.freeMemory()/b_to_mb" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 3, 62 | "id": "9f5291b7", 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "videoReader = cv.cudacodec.createVideoReader(vid_root + \"1920x1080.avi\")" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 4, 72 | "id": "81d66b56", 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "mb_free_after_creation = device_info.freeMemory()/b_to_mb" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 5, 82 | "id": "4080e268", 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "# Get number of decode surfaces currently a frame needs to be processed before the format info is valid\n", 87 | "videoReader.grab()\n", 88 | "format_info = videoReader.format()" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 6, 94 | "id": "97570090", 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "name": "stdout", 99 | "output_type": "stream", 100 | "text": [ 101 | "Total Memory: 8191.50MB\n", 102 | "Free Memory after context creation: 6999.00MB\n", 103 | "Free Memory after creating video reader: 6973.41MB\n", 104 | "25.59MB of internal memory when using 4 (1920x1088) decode surfaces\n" 105 | ] 106 | } 107 | ], 108 | "source": [ 109 | "mb_used = mb_free_start - mb_free_after_creation\n", 110 | "print(f'Total Memory: {device_info.totalMemory()/b_to_mb:.2f}MB')\n", 111 | "print(f'Free Memory after context creation: {mb_free_start:.2f}MB')\n", 112 | "print(f'Free Memory after creating video reader: {mb_free_after_creation:.2f}MB')\n", 113 | "print(f'{mb_used:.2f}MB of internal memory when using {format_info.ulNumDecodeSurfaces} ({format_info.ulWidth}x{format_info.ulHeight}) decode surfaces')" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "id": "485293dd", 119 | "metadata": {}, 120 | "source": [ 121 | "## Create `cudacodec.VideoReader` using twice as many decode surfaces" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 7, 127 | "id": "f887489f", 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "params = cv.cudacodec.VideoReaderInitParams()\n", 132 | "params.minNumDecodeSurfaces = format_info.ulNumDecodeSurfaces*2" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 8, 138 | "id": "42733d60", 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "videoReader = cv.cudacodec.createVideoReader(vid_root + \"1920x1080.avi\",params=params)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 9, 148 | "id": "dc9904f1", 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "mb_used_double_sufaces = mb_free_start - device_info.freeMemory()/b_to_mb" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 10, 158 | "id": "55bc7811", 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "videoReader.grab()\n", 163 | "format_info = videoReader.format()\n", 164 | "assert format_info.ulNumDecodeSurfaces == params.minNumDecodeSurfaces " 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 11, 170 | "id": "c43fad5f", 171 | "metadata": {}, 172 | "outputs": [ 173 | { 174 | "name": "stdout", 175 | "output_type": "stream", 176 | "text": [ 177 | "Memory increase from doubling the number of decode surfaces: 62.52%\n" 178 | ] 179 | } 180 | ], 181 | "source": [ 182 | "print(f'Memory increase from doubling the number of decode surfaces: {100*(mb_used_double_sufaces - mb_used)/mb_used:.2f}%')" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "id": "26ca941d", 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [] 192 | } 193 | ], 194 | "metadata": { 195 | "kernelspec": { 196 | "display_name": "Python 3 (ipykernel)", 197 | "language": "python", 198 | "name": "python3" 199 | }, 200 | "language_info": { 201 | "codemirror_mode": { 202 | "name": "ipython", 203 | "version": 3 204 | }, 205 | "file_extension": ".py", 206 | "mimetype": "text/x-python", 207 | "name": "python", 208 | "nbconvert_exporter": "python", 209 | "pygments_lexer": "ipython3", 210 | "version": "3.9.13" 211 | }, 212 | "toc": { 213 | "base_numbering": 1, 214 | "nav_menu": {}, 215 | "number_sections": true, 216 | "sideBar": true, 217 | "skip_h1_title": false, 218 | "title_cell": "Table of Contents", 219 | "title_sidebar": "Contents", 220 | "toc_cell": false, 221 | "toc_position": {}, 222 | "toc_section_display": true, 223 | "toc_window_display": false 224 | }, 225 | "varInspector": { 226 | "cols": { 227 | "lenName": 16, 228 | "lenType": 16, 229 | "lenVar": 40 230 | }, 231 | "kernels_config": { 232 | "python": { 233 | "delete_cmd_postfix": "", 234 | "delete_cmd_prefix": "del ", 235 | "library": "var_list.py", 236 | "varRefreshCmd": "print(var_dic_list())" 237 | }, 238 | "r": { 239 | "delete_cmd_postfix": ") ", 240 | "delete_cmd_prefix": "rm(", 241 | "library": "var_list.r", 242 | "varRefreshCmd": "cat(var_dic_list()) " 243 | } 244 | }, 245 | "types_to_exclude": [ 246 | "module", 247 | "function", 248 | "builtin_function_or_method", 249 | "instance", 250 | "_Feature" 251 | ], 252 | "window_display": false 253 | } 254 | }, 255 | "nbformat": 4, 256 | "nbformat_minor": 5 257 | } 258 | -------------------------------------------------------------------------------- /nbs/resize.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 94, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import cv2\n", 10 | "import numpy as np\n", 11 | "import time" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 110, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/plain": [ 22 | "'4.5.0'" 23 | ] 24 | }, 25 | "execution_count": 110, 26 | "metadata": {}, 27 | "output_type": "execute_result" 28 | } 29 | ], 30 | "source": [ 31 | "cv2.__version__" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "# OpenCV Resize" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 95, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "img = np.random.random((2048, 2048,3)).astype(np.uint8)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "## Without pre-alloc" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 96, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "resize_width = 1024\n", 64 | "resize_height = 1024 \n", 65 | "loop_cnt = 100000" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "### CPU" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 97, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "name": "stdout", 82 | "output_type": "stream", 83 | "text": [ 84 | "cpu time: 952.11 us\n" 85 | ] 86 | } 87 | ], 88 | "source": [ 89 | "time_start = time.time() \n", 90 | "for _ in range(loop_cnt): \n", 91 | " resize_img = cv2.resize(img, (resize_height, resize_width), interpolation=cv2.INTER_LINEAR) \n", 92 | "print('cpu time: {:.2f} us'.format((time.time() - time_start) * 1e6 / loop_cnt))" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "### GPU" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 98, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "device_img = cv2.cuda_GpuMat()" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "#### Upload/Dload for single GPU operation" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 99, 121 | "metadata": {}, 122 | "outputs": [ 123 | { 124 | "name": "stdout", 125 | "output_type": "stream", 126 | "text": [ 127 | "gpu time: 3926.30 us\n" 128 | ] 129 | } 130 | ], 131 | "source": [ 132 | "time_start = time.time() \n", 133 | "for _ in range(loop_cnt): \n", 134 | " device_img.upload(img) \n", 135 | " resize_device_img = cv2.cuda.resize(device_img, (resize_height, resize_width),interpolation=cv2.INTER_LINEAR)\n", 136 | " resize_img = resize_device_img.download()\n", 137 | "print('gpu time: {:.2f} us'.format((time.time() - time_start) * 1e6 / loop_cnt)) " 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "#### Normal operation" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 100, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "device_img = cv2.cuda_GpuMat(img)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 101, 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "name": "stdout", 163 | "output_type": "stream", 164 | "text": [ 165 | "gpu time: 460.10 us\n" 166 | ] 167 | } 168 | ], 169 | "source": [ 170 | "time_start = time.time() \n", 171 | "for _ in range(loop_cnt): \n", 172 | " resize_device_img = cv2.cuda.resize(device_img, (resize_height, resize_width),interpolation=cv2.INTER_LINEAR)\n", 173 | "print('gpu time: {:.2f} us'.format((time.time() - time_start) * 1e6 / loop_cnt)) " 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "## With pre-alloc" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "### CPU" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 102, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "resize_img = np.zeros((resize_height, resize_width,3),dtype=np.uint8)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 103, 202 | "metadata": {}, 203 | "outputs": [ 204 | { 205 | "name": "stdout", 206 | "output_type": "stream", 207 | "text": [ 208 | "cpu time: 390.59 us\n" 209 | ] 210 | } 211 | ], 212 | "source": [ 213 | "time_start = time.time() \n", 214 | "for _ in range(loop_cnt): \n", 215 | " cv2.resize(img, (resize_height, resize_width), resize_img, interpolation=cv2.INTER_LINEAR)\n", 216 | "print('cpu time: {:.2f} us'.format((time.time() - time_start) * 1e6 / loop_cnt)) " 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "### GPU" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 111, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [ 232 | "device_img = cv2.cuda_GpuMat(img)\n", 233 | "resize_device_img = cv2.cuda_GpuMat(resize_img)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 112, 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "name": "stdout", 243 | "output_type": "stream", 244 | "text": [ 245 | "gpu time: 87.67 us\n" 246 | ] 247 | } 248 | ], 249 | "source": [ 250 | "time_start = time.time() \n", 251 | "for _ in range(loop_cnt): \n", 252 | " cv2.cuda.resize(device_img, (resize_height, resize_width),resize_device_img,interpolation=cv2.INTER_LINEAR)\n", 253 | "print('gpu time: {:.2f} us'.format((time.time() - time_start) * 1e6 / loop_cnt)) " 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "#### Check timer" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 106, 266 | "metadata": {}, 267 | "outputs": [ 268 | { 269 | "name": "stdout", 270 | "output_type": "stream", 271 | "text": [ 272 | "89.9 µs ± 1.53 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" 273 | ] 274 | } 275 | ], 276 | "source": [ 277 | "%timeit cv2.cuda.resize(device_img, (resize_height, resize_width),dst=resize_device_img,interpolation=cv2.INTER_LINEAR)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "#### Streams - unrealistic without pause, check for GPU saturation" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 107, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "stream = cv.cuda_Stream()" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 108, 299 | "metadata": {}, 300 | "outputs": [ 301 | { 302 | "name": "stdout", 303 | "output_type": "stream", 304 | "text": [ 305 | "44.3 µs ± 377 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" 306 | ] 307 | } 308 | ], 309 | "source": [ 310 | "%timeit cv2.cuda.resize(device_img, (resize_height, resize_width),dst=resize_device_img,interpolation=cv2.INTER_LINEAR, stream=stream)" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": null, 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [] 319 | } 320 | ], 321 | "metadata": { 322 | "kernelspec": { 323 | "display_name": "Python 3", 324 | "language": "python", 325 | "name": "python3" 326 | }, 327 | "language_info": { 328 | "codemirror_mode": { 329 | "name": "ipython", 330 | "version": 3 331 | }, 332 | "file_extension": ".py", 333 | "mimetype": "text/x-python", 334 | "name": "python", 335 | "nbconvert_exporter": "python", 336 | "pygments_lexer": "ipython3", 337 | "version": "3.8.1" 338 | } 339 | }, 340 | "nbformat": 4, 341 | "nbformat_minor": 4 342 | } 343 | -------------------------------------------------------------------------------- /qmd/opencv_cuda_performance.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "OpenCV CUDA Performance Comparison (Nvidia vs Intel)" 3 | toc: true 4 | date: 02/28/18 5 | aliases: 6 | - ../opencv-3-4-gpu-cuda-performance-comparison-nvidia-vs-intel 7 | - ../opencv-3-4-gpu-cuda-perform 8 | --- 9 | 10 | # Introduction 11 | 12 | In this post I am going to use the OpenCV's performance tests to compare the CUDA and CPU implementations. The idea, is to get an indication of which OpenCV and/or Computer Vision algorithms, in general, benefit the most from GPU acceleration, and therefore, under what circumstances it might be a good idea to invest in a GPU. 13 | 14 | # Test setup 15 | - **Software**: OpenCV 3.4 compiled on Visual Studio 2017 with CUDA 9.1, Intel MKL with TBB, and TBB. To generate the CPU results I simply ran the CUDA performance tests with CUDA disabled, so that the fall back CPU functions were called, by changing the following 16 | 17 | ```{.cpp} 18 | #define PERF_RUN_CUDA() :perf::GpuPerf::targetDevice() 19 | ``` 20 | 21 | to 22 | 23 | ```{.cpp} 24 | #define PERF_RUN_CUDA() false 25 | ``` 26 | 27 | in [modules\\ts\\include\\opencv2\\ts\\ts_perf.hpp](https://github.com/opencv/opencv/blob/6d4f66472e14b29b8e1623859cfebfdc67f677c3/modules/ts/include/opencv2/ts/ts_perf.hpp#L228). 28 | 29 | The performance tests cover 104 of the OpenCV functions, with each function being tested for a number of different configurations (function arguments). The total number of different CUDA performance configurations/tests which run successfully are 6031, of which only 5300 configurations are supported by both the GPU and CPU. 30 | - **Hardware**: Four different hardware configurations were tested, consisting of 3 laptops and 1 desktop, the CPU/GPU combinations are listed below: 31 | 32 | 1) CPU: i5-4120U, GPU: 730m (laptop) 33 | 2) CPU: i5-5200U, GPU: 840m (laptop) 34 | 3) CPU: i7-6700HQ, GPU: GTX 980m (laptop) 35 | 4) CPU: i5-6500, GPU: GTX 1060 (desktop) 36 | 37 | 38 | ## GPU specifications 39 | The GPU's tested comprise three different micro-architectures, ranging from a low end laptop (730m) to a mid range desktop (GTX 1060) GPU. The full specifications are shown below, where I have also included the maximum theoretical speedup, if the OpenCV function were bandwidth or compute limited. This value is just included to give an indication of what should be possible if architectural improvements, SM count etc. don't have any impact on performance. In "general" most algorithms will be bandwidth limited implying that the average speed up of the OpenCV functions could be somewhere between these two values. If you are not familiar with this concept then I would recommend watching Memory Bandwidth Bootcamp: Best Practices, Memory Bandwidth Bootcamp: Beyond Best Practices and Memory Bandwidth Bootcamp: Collaborative Access Patterns by Tony Scudiero for a good overview. 40 | 41 | 42 | ::: {.column-screen} 43 | [![](../imgs/gpu_spec-2.webp)](https://docs.google.com/spreadsheets/d/e/2PACX-1vTpHmM6T6sCkJUrr6UEVX2V9MRPUs-oD_z979UiGEixOmLaF1qZez_H3EWC96XYXuf4_i2WlhM0g-rz/pubhtml?gid=1232846262&single=true){fig-align="center"} 44 | ::: 45 | 46 | # CPU specifications 47 | The CPU's tested also comprise three different micro-architectures, ranging from a low end laptop dual core (i5-4120U) to a mid range desktop quad core (i5-6500) CPU. The full specifications are shown below, where I have again included the maximum theoretical speedup depending on whether the OpenCV functions are limited by the CPU bandwidth or clock speed (I could not find any Intel published GFLOPS information). 48 | 49 | ::: {.column-screen} 50 | [![](../imgs/cpu_spec-2.webp)](https://docs.google.com/spreadsheets/d/e/2PACX-1vTpHmM6T6sCkJUrr6UEVX2V9MRPUs-oD_z979UiGEixOmLaF1qZez_H3EWC96XYXuf4_i2WlhM0g-rz/pubhtml?gid=921370251&single=true){fig-align="center"} 51 | ::: 52 | 53 | # Benchmark results 54 | The results for all tests are available [here](https://docs.google.com/spreadsheets/d/e/2PACX-1vTpHmM6T6sCkJUrr6UEVX2V9MRPUs-oD_z979UiGEixOmLaF1qZez_H3EWC96XYXuf4_i2WlhM0g-rz/pubhtml?gid=0&single=true), where you can check if a specific configuration benefits from an improvement in performance when moved to the GPU. 55 | 56 | To get an overall picture of the performance increase which can be achieved from using the CUDA functions over the standard CPU ones, the speedup of each CPU/GPU over the least powerful CPU (i5_4210U), is compared. The below figure shows the speedup averaged over all 5300 tests (All Configs). Because the average speedup is influenced by the number of different configurations tested per OpenCV function, two additional measures are also shown (which only consider one configuration per function) on the below figure: 57 | 61 | 62 | 63 | 64 | The results demonstrate that the configuration (function arguments), makes a massive difference to the CPU/GPU performance. That said even the slowest configurations on the slowest GPU's are in the same ball park, performance wise, as the fastest configurations on the most powerful CPU's in the test. This combined with a higher average performance for all GPU's tested, implies that you should nearly always see an improvement when moving to the GPU, if you have several OpenCV functions in your pipeline (as long as you don't keep moving your data to and from the GPU), even if you are using a low end two generation old laptop GPU (730m). 65 | 66 | Now lets examine some individual OpenCV functions. Because each function has many configurations, for each function the average execution time over all configurations tested, is used to calculate the speedup over the i5-4120U. This will provides a guide to the expected performance of a function irrespective of the specific configuration. The next figure shows the top 20 functions where the GPU speedup, was largest. It is worth noting that the speedup of the GTX 1060 over all of the CPU's is so large that it has to be shown on a log scale. 67 | 68 | 69 | 70 | 71 | Next, the bottom 20 functions where the GPU speedup, was smallest. 72 | 73 | 74 | The above figure demonstrates that, although the CUDA implementations are on average much quicker, some functions are significantly quicker on the CPU. Generally this is due to the function using the Intel Integrated Performance Primitives for Image processing and Computer Vision (IPP-ICV) and/or SIMD instructions. That said the above results also show, that some of these slower functions, do benefit from the parallelism of the GPU, but a more powerful GPU is required to leverage this. 75 | 76 | Finally lets examine which OpenCV functions took the longest. This is importanti f you are using one of these functions, as you may consider calling its CUDA counterpart, even if it is the only OpenCV function you need. The below figure contains the execution time for the 20 functions which took the longest on the i5-4120U, again this has to be shown on a log scale because the GPU execution time is much smaller than the CPU execution time. 77 | 78 | 79 | 80 | 81 | Given the possible performance increases shown in the results, if you were performing mean shift filtering with OpenCV, on a laptop with only low end i5-4120U, the execution time of nearly 7 seconds may encourage you to upgrade your hardware. From the above it is clear that it is much better to invest in a tiny GPU (730m) which will reduce your processing time by a factor of 10 to a more tolerable 0.6 seconds, or a mid range GPU (GTX 1060), reducing your processing time by a factor of 100 to 0.07 seconds, rather than a mid range i7 which will give you less than a 30% reduction. 82 | 83 | To conclude I would just reiterate that, the benefit you will get from moving your processing to the GPU with OpenCV will depend on the function you call and configuration that you use, in addition to your processing pipeline. That said from, what I have observed, on average the CUDA functions are much much quicker than their CPU counterparts. Please let me know if there are any mistakes in my results and/or analysis. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /nbs/opencv4-sparse-optical-flow.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## CUDA python implementation of [py_lucas_kanade](https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_video/py_lucas_kanade/py_lucas_kanade.html) example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 8, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "data": { 17 | "text/html": [ 18 | "" 19 | ], 20 | "text/plain": [ 21 | "" 22 | ] 23 | }, 24 | "metadata": {}, 25 | "output_type": "display_data" 26 | } 27 | ], 28 | "source": [ 29 | "import numpy as np\n", 30 | "import os\n", 31 | "import cv2\n", 32 | "import time\n", 33 | "from IPython.core.display import display, HTML\n", 34 | "display(HTML(\"\"))\n", 39 | "\n", 40 | "# params for ShiTomasi corner detection\n", 41 | "feature_params = dict( maxCorners = 100,\n", 42 | " qualityLevel = 0.3,\n", 43 | " minDistance = 7,\n", 44 | " blockSize = 7 )\n", 45 | "\n", 46 | "# Parameters for lucas kanade optical flow\n", 47 | "lk_params = dict( winSize = (15,15),\n", 48 | " maxLevel = 2,\n", 49 | " criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))\n", 50 | "\n", 51 | "vidPath = os.environ['OPENCV_TEST_DATA_PATH'] + '/cv/video/768x576.avi'" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "## Original CPU implementation" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 10, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "ename": "error", 68 | "evalue": "OpenCV(4.1.1) E:\\Dev\\Repos\\OpenCV\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n", 69 | "output_type": "error", 70 | "traceback": [ 71 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 72 | "\u001b[1;31merror\u001b[0m Traceback (most recent call last)", 73 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[1;32mwhile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[0mret\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mframe\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcap\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 16\u001b[1;33m \u001b[0mframe_gray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcv2\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcvtColor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mframe\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcv2\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mCOLOR_BGR2GRAY\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 17\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[1;31m# calculate optical flow\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 74 | "\u001b[1;31merror\u001b[0m: OpenCV(4.1.1) E:\\Dev\\Repos\\OpenCV\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "cap = cv2.VideoCapture(vidPath)\n", 80 | "\n", 81 | "# Create some random colors\n", 82 | "color = np.random.randint(0,255,(100,3))\n", 83 | "\n", 84 | "# Take first frame and find corners in it\n", 85 | "ret, old_frame = cap.read()\n", 86 | "old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)\n", 87 | "p0 = cv2.goodFeaturesToTrack(old_gray, mask = None, **feature_params)\n", 88 | "\n", 89 | "# Create a mask image for drawing purposes\n", 90 | "mask = np.zeros_like(old_frame)\n", 91 | "etime = 0\n", 92 | "while(1):\n", 93 | " ret,frame = cap.read()\n", 94 | " frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)\n", 95 | "\n", 96 | " # calculate optical flow\n", 97 | " t = time.perf_counter()\n", 98 | " p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)\n", 99 | " etime += (time.perf_counter() - t)\n", 100 | " \n", 101 | " # Select good points\n", 102 | " good_new = p1[st==1]\n", 103 | " good_old = p0[st==1]\n", 104 | "\n", 105 | " # draw the tracks\n", 106 | " for i,(new,old) in enumerate(zip(good_new,good_old)):\n", 107 | " a,b = new.ravel()\n", 108 | " c,d = old.ravel()\n", 109 | " mask = cv2.line(mask, (a,b),(c,d), color[i].tolist(), 2)\n", 110 | " frame = cv2.circle(frame,(a,b),5,color[i].tolist(),-1)\n", 111 | " img = cv2.add(frame,mask)\n", 112 | "\n", 113 | " cv2.imshow('frame',img)\n", 114 | " k = cv2.waitKey(30) & 0xff\n", 115 | " if k == 27:\n", 116 | " break\n", 117 | "\n", 118 | " # Now update the previous frame and previous points\n", 119 | " old_gray = frame_gray.copy()\n", 120 | " p0 = good_new.reshape(-1,1,2)\n", 121 | "\n", 122 | "cv2.destroyAllWindows()\n", 123 | "cap.release()" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 11, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "text/plain": [ 134 | "0.16676009999997632" 135 | ] 136 | }, 137 | "execution_count": 11, 138 | "metadata": {}, 139 | "output_type": "execute_result" 140 | } 141 | ], 142 | "source": [ 143 | "etime" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "## Naive CUDA implementation without pre-alloc, streams or other optimizations" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 15, 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "ename": "error", 160 | "evalue": "OpenCV(4.1.1) E:\\Dev\\Repos\\OpenCV\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n", 161 | "output_type": "error", 162 | "traceback": [ 163 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 164 | "\u001b[1;31merror\u001b[0m Traceback (most recent call last)", 165 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[1;32mwhile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 19\u001b[0m \u001b[0mret\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mframe\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcap\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 20\u001b[1;33m \u001b[0mframe_gray_device\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupload\u001b[0m\u001b[1;33m(\u001b[0m \u001b[0mcv2\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcvtColor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mframe\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcv2\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mCOLOR_BGR2GRAY\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 21\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 22\u001b[0m \u001b[1;31m# calculate optical flow\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 166 | "\u001b[1;31merror\u001b[0m: OpenCV(4.1.1) E:\\Dev\\Repos\\OpenCV\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n" 167 | ] 168 | } 169 | ], 170 | "source": [ 171 | "cap = cv2.VideoCapture(vidPath)\n", 172 | "\n", 173 | "# Take first frame and find corners in it\n", 174 | "ret, old_frame = cap.read()\n", 175 | "old_gray_device = cv2.cuda_GpuMat(cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY))\n", 176 | "detector_device = cv2.cuda.createGoodFeaturesToTrackDetector(cv2.CV_8UC1, feature_params['maxCorners'], \\\n", 177 | " feature_params['qualityLevel'], feature_params['minDistance'], \\\n", 178 | " feature_params['blockSize'])\n", 179 | "p0_device = detector_device.detect(old_gray_device)\n", 180 | "\n", 181 | "optFlow = cv2.cuda_SparsePyrLKOpticalFlow.create()\n", 182 | "\n", 183 | "# Create a mask image for drawing purposes\n", 184 | "mask = np.zeros_like(old_frame)\n", 185 | "frame_gray_device = cv2.cuda_GpuMat()\n", 186 | "p0 = p0_device.download()\n", 187 | "etime = 0\n", 188 | "while(1):\n", 189 | " ret,frame = cap.read()\n", 190 | " frame_gray_device.upload( cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)) \n", 191 | "\n", 192 | " # calculate optical flow\n", 193 | " t = time.perf_counter()\n", 194 | " p1_device, st_device, err = optFlow.calc(old_gray_device,frame_gray_device,p0_device,None)\n", 195 | " etime += (time.perf_counter() - t)\n", 196 | " \n", 197 | " # dload points\n", 198 | " p1 = p1_device.download()\n", 199 | " st = st_device.download()\n", 200 | " \n", 201 | " # Select good points\n", 202 | " good_new = p1[st==1]\n", 203 | " good_old = p0[st==1]\n", 204 | "\n", 205 | " # draw the tracks\n", 206 | " for i,(new,old) in enumerate(zip(good_new,good_old)):\n", 207 | " a,b = new.ravel()\n", 208 | " c,d = old.ravel()\n", 209 | " mask = cv2.line(mask, (a,b),(c,d), color[i].tolist(), 2)\n", 210 | " frame = cv2.circle(frame,(a,b),5,color[i].tolist(),-1)\n", 211 | " img = cv2.add(frame,mask)\n", 212 | "\n", 213 | " cv2.imshow('frame',img)\n", 214 | " k = cv2.waitKey(30) & 0xff\n", 215 | " if k == 27:\n", 216 | " break\n", 217 | "\n", 218 | " # Now update the previous frame and previous points\n", 219 | " frame_gray_device.copyTo(old_gray_device)\n", 220 | " p0 = np.expand_dims(good_new,axis=0)\n", 221 | " p0_device.upload(p0)\n", 222 | "\n", 223 | "cv2.destroyAllWindows()\n", 224 | "cap.release()" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 17, 230 | "metadata": {}, 231 | "outputs": [ 232 | { 233 | "data": { 234 | "text/plain": [ 235 | "0.10651610000002165" 236 | ] 237 | }, 238 | "execution_count": 17, 239 | "metadata": {}, 240 | "output_type": "execute_result" 241 | } 242 | ], 243 | "source": [ 244 | "etime" 245 | ] 246 | } 247 | ], 248 | "metadata": { 249 | "kernelspec": { 250 | "display_name": "Python 3", 251 | "language": "python", 252 | "name": "python3" 253 | }, 254 | "language_info": { 255 | "codemirror_mode": { 256 | "name": "ipython", 257 | "version": 3 258 | }, 259 | "file_extension": ".py", 260 | "mimetype": "text/x-python", 261 | "name": "python", 262 | "nbconvert_exporter": "python", 263 | "pygments_lexer": "ipython3", 264 | "version": "3.7.3" 265 | } 266 | }, 267 | "nbformat": 4, 268 | "nbformat_minor": 2 269 | } 270 | -------------------------------------------------------------------------------- /nbs/cuda_optimization_test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import cv2\n", 10 | "import numpy as np\n", 11 | "import time\n", 12 | "import GPUtil\n", 13 | "import platform\n", 14 | "import cpuinfo" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "name": "stdout", 24 | "output_type": "stream", 25 | "text": [ 26 | "4.5.2\n", 27 | "Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz\n", 28 | "GeForce RTX 2080\n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "gpus = GPUtil.getGPUs()\n", 34 | "print(cv2.__version__)\n", 35 | "print(cpuinfo.get_cpu_info()['brand_raw'])\n", 36 | "print(gpus[0].name)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "# OpenCV without optimization" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 3, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "im = np.random.random((4000, 4000,3)).astype(np.uint8)\n", 53 | "loop_cnt = 1000" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "## CPU" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 4, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "name": "stdout", 70 | "output_type": "stream", 71 | "text": [ 72 | "cpu time: 15007.12 us\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "start_t = time.time()\n", 78 | "for _ in range(loop_cnt): \n", 79 | " gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)\n", 80 | " retval,thr = cv2.threshold(gray,128,255,cv2.THRESH_BINARY)\n", 81 | " morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n", 82 | " morph = cv2.dilate(thr,morph_kernel)\n", 83 | " morph = cv2.resize(morph,(640,480)) \n", 84 | "print('cpu time: {:.2f} us'.format((time.time() - start_t) * 1e6 / loop_cnt))" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "### Pre-alloc return arrays and remove constant ops" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 5, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "name": "stdout", 101 | "output_type": "stream", 102 | "text": [ 103 | "cpu time: 12000.67 us\n" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "# use ones from above except morph\n", 109 | "morph_sm = np.empty((480,640),np.uint8)\n", 110 | "morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n", 111 | "start_t = time.time()\n", 112 | "for _ in range(loop_cnt): \n", 113 | " cv2.cvtColor(im, cv2.COLOR_BGR2GRAY, gray)\n", 114 | " cv2.threshold(gray,128,255,cv2.THRESH_BINARY,thr) \n", 115 | " cv2.dilate(thr,morph_kernel,morph)\n", 116 | " cv2.resize(morph,(640,480),morph_sm)\n", 117 | "cpu_time = (time.time() - start_t) * 1e6 / loop_cnt\n", 118 | "print('cpu time: {:.2f} us'.format(cpu_time))" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "## GPU" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 7, 131 | "metadata": {}, 132 | "outputs": [ 133 | { 134 | "name": "stdout", 135 | "output_type": "stream", 136 | "text": [ 137 | "gpu time: 16019.12 us\n" 138 | ] 139 | } 140 | ], 141 | "source": [ 142 | "start_t = time.time()\n", 143 | "for _ in range(loop_cnt):\n", 144 | " gpu_frame = cv2.cuda_GpuMat()\n", 145 | " gpu_frame.upload(im)\n", 146 | " gpu_gray = cv2.cuda.cvtColor(gpu_frame, cv2.COLOR_BGR2GRAY)\n", 147 | " retval,gpu_thr = cv2.cuda.threshold(gpu_gray,128,255,cv2.THRESH_BINARY)\n", 148 | " morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n", 149 | " morph_filter = cv2.cuda.createMorphologyFilter(cv2.MORPH_DILATE,cv2.CV_8U,morph_kernel)\n", 150 | " gpu_morph = morph_filter.apply(gpu_thr)\n", 151 | " gpu_morph = cv2.cuda.resize(gpu_morph,(640,480))\n", 152 | " res = gpu_morph.download()\n", 153 | "print('gpu time: {:.2f} us'.format((time.time() - start_t) * 1e6 / loop_cnt))" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "# OpenCV with optimization" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "## Demonstrate warm up" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 6, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "name": "stdout", 177 | "output_type": "stream", 178 | "text": [ 179 | "gpu time warm up: 116695.36 us\n", 180 | "gpu time when warm: 16002.82 us\n" 181 | ] 182 | } 183 | ], 184 | "source": [ 185 | "start_t = time.time()\n", 186 | "loop_cnt_warm = 5\n", 187 | "for _ in range(loop_cnt_warm):\n", 188 | " gpu_frame = cv2.cuda_GpuMat()\n", 189 | " gpu_frame.upload(im)\n", 190 | " gpu_gray = cv2.cuda.cvtColor(gpu_frame, cv2.COLOR_BGR2GRAY)\n", 191 | " retval,gpu_thr = cv2.cuda.threshold(gpu_gray,128,255,cv2.THRESH_BINARY)\n", 192 | " morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n", 193 | " morph_filter = cv2.cuda.createMorphologyFilter(cv2.MORPH_DILATE,cv2.CV_8U,morph_kernel)\n", 194 | " gpu_morph = morph_filter.apply(gpu_thr)\n", 195 | " gpu_morph = cv2.cuda.resize(gpu_morph,(640,480))\n", 196 | " res = gpu_morph.download()\n", 197 | "print('gpu time warm up: {:.2f} us'.format((time.time() - start_t) * 1e6 / loop_cnt_warm))\n", 198 | "start_t = time.time()\n", 199 | "for _ in range(loop_cnt):\n", 200 | " gpu_frame = cv2.cuda_GpuMat()\n", 201 | " gpu_frame.upload(im)\n", 202 | " gpu_gray = cv2.cuda.cvtColor(gpu_frame, cv2.COLOR_BGR2GRAY)\n", 203 | " retval,gpu_thr = cv2.cuda.threshold(gpu_gray,128,255,cv2.THRESH_BINARY)\n", 204 | " morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n", 205 | " morph_filter = cv2.cuda.createMorphologyFilter(cv2.MORPH_DILATE,cv2.CV_8U,morph_kernel)\n", 206 | " gpu_morph = morph_filter.apply(gpu_thr)\n", 207 | " gpu_morph = cv2.cuda.resize(gpu_morph,(640,480))\n", 208 | " res = gpu_morph.download()\n", 209 | "print('gpu time when warm: {:.2f} us'.format((time.time() - start_t) * 1e6 / loop_cnt))" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": {}, 215 | "source": [ 216 | "### Exlude CPU ops and upload/dload" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 8, 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "name": "stdout", 226 | "output_type": "stream", 227 | "text": [ 228 | "gpu time: 7100.30 us\n" 229 | ] 230 | } 231 | ], 232 | "source": [ 233 | "morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n", 234 | "gpu_frame = cv2.cuda_GpuMat(im)\n", 235 | "start_t = time.time()\n", 236 | "for _ in range(loop_cnt):\n", 237 | " gpu_gray = cv2.cuda.cvtColor(gpu_frame, cv2.COLOR_BGR2GRAY)\n", 238 | " retval,gpu_thr = cv2.cuda.threshold(gpu_gray,128,255,cv2.THRESH_BINARY)\n", 239 | " morph_filter = cv2.cuda.createMorphologyFilter(cv2.MORPH_DILATE,cv2.CV_8U,morph_kernel)\n", 240 | " gpu_morph = morph_filter.apply(gpu_thr)\n", 241 | " gpu_morph = cv2.cuda.resize(gpu_morph,(640,480)) \n", 242 | "print('gpu time: {:.2f} us'.format((time.time() - start_t) * 1e6 / loop_cnt))\n", 243 | "res = gpu_morph.download()" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "### Pre-allocate" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 9, 256 | "metadata": {}, 257 | "outputs": [ 258 | { 259 | "name": "stdout", 260 | "output_type": "stream", 261 | "text": [ 262 | "gpu time: 4493.87 us\n" 263 | ] 264 | } 265 | ], 266 | "source": [ 267 | "morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n", 268 | "gpu_frame = cv2.cuda_GpuMat(im)\n", 269 | "gpu_gray = cv2.cuda_GpuMat(im.shape[:-1][::-1],cv2.CV_8UC1)\n", 270 | "gpu_thr = cv2.cuda_GpuMat(gpu_gray.size(),gpu_gray.type())\n", 271 | "gpu_morph = cv2.cuda_GpuMat(gpu_gray.size(),gpu_gray.type())\n", 272 | "gpu_morph_sm = cv2.cuda_GpuMat((640,480),gpu_gray.type())\n", 273 | "start_t = time.time()\n", 274 | "for _ in range(loop_cnt):\n", 275 | " cv2.cuda.cvtColor(gpu_frame, cv2.COLOR_BGR2GRAY,gpu_gray)\n", 276 | " cv2.cuda.threshold(gpu_gray,128,255,cv2.THRESH_BINARY,gpu_thr)\n", 277 | " morph_filter = cv2.cuda.createMorphologyFilter(cv2.MORPH_DILATE,cv2.CV_8U,morph_kernel)\n", 278 | " morph_filter.apply(gpu_thr,gpu_morph)\n", 279 | " cv2.cuda.resize(gpu_morph,(640,480),gpu_morph_sm) \n", 280 | "print('gpu time: {:.2f} us'.format((time.time() - start_t) * 1e6 / loop_cnt))\n", 281 | "res = gpu_morph.download()" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "### Remove morph_filter creation as this would also be pre-computed in practice" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 10, 294 | "metadata": {}, 295 | "outputs": [ 296 | { 297 | "name": "stdout", 298 | "output_type": "stream", 299 | "text": [ 300 | "gpu time: 3620.95 us\n" 301 | ] 302 | } 303 | ], 304 | "source": [ 305 | "morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n", 306 | "gpu_frame = cv2.cuda_GpuMat(im)\n", 307 | "gpu_gray = cv2.cuda_GpuMat(im.shape[:-1][::-1],cv2.CV_8UC1)\n", 308 | "gpu_thr = cv2.cuda_GpuMat(gpu_gray.size(),gpu_gray.type())\n", 309 | "gpu_morph = cv2.cuda_GpuMat(gpu_gray.size(),gpu_gray.type())\n", 310 | "gpu_morph_sm = cv2.cuda_GpuMat((640,480),gpu_gray.type())\n", 311 | "morph_filter = cv2.cuda.createMorphologyFilter(cv2.MORPH_DILATE,cv2.CV_8U,morph_kernel)\n", 312 | "start_t = time.time()\n", 313 | "for _ in range(loop_cnt):\n", 314 | " cv2.cuda.cvtColor(gpu_frame, cv2.COLOR_BGR2GRAY,gpu_gray)\n", 315 | " cv2.cuda.threshold(gpu_gray,128,255,cv2.THRESH_BINARY,gpu_thr) \n", 316 | " morph_filter.apply(gpu_thr,gpu_morph)\n", 317 | " cv2.cuda.resize(gpu_morph,(640,480),gpu_morph_sm) \n", 318 | "print('gpu time: {:.2f} us'.format((time.time() - start_t) * 1e6 / loop_cnt))\n", 319 | "res = gpu_morph.download()" 320 | ] 321 | }, 322 | { 323 | "cell_type": "markdown", 324 | "metadata": {}, 325 | "source": [ 326 | "### Stream - worst case scenario stall on each loop iteration" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 11, 332 | "metadata": {}, 333 | "outputs": [ 334 | { 335 | "name": "stdout", 336 | "output_type": "stream", 337 | "text": [ 338 | "gpu time: 3443.38 us\n" 339 | ] 340 | } 341 | ], 342 | "source": [ 343 | "stream = cv2.cuda_Stream()\n", 344 | "morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n", 345 | "gpu_frame = cv2.cuda_GpuMat(im)\n", 346 | "gpu_gray = cv2.cuda_GpuMat(im.shape[:-1][::-1],cv2.CV_8UC1)\n", 347 | "gpu_thr = cv2.cuda_GpuMat(gpu_gray.size(),gpu_gray.type())\n", 348 | "gpu_morph = cv2.cuda_GpuMat(gpu_gray.size(),gpu_gray.type())\n", 349 | "gpu_morph_sm = cv2.cuda_GpuMat((640,480),gpu_gray.type())\n", 350 | "morph_filter = cv2.cuda.createMorphologyFilter(cv2.MORPH_DILATE,cv2.CV_8U,morph_kernel)\n", 351 | "start_t = time.time()\n", 352 | "for _ in range(loop_cnt):\n", 353 | " cv2.cuda.cvtColor(gpu_frame, cv2.COLOR_BGR2GRAY,gpu_gray,stream = stream)\n", 354 | " cv2.cuda.threshold(gpu_gray,128,255,cv2.THRESH_BINARY,gpu_thr,stream=stream) \n", 355 | " morph_filter.apply(gpu_thr,gpu_morph,stream=stream)\n", 356 | " cv2.cuda.resize(gpu_morph,(640,480),gpu_morph_sm,stream=stream)\n", 357 | " stream.waitForCompletion()\n", 358 | "gpu_time = (time.time() - start_t) * 1e6 / loop_cnt\n", 359 | "print('gpu time: {:.2f} us'.format(gpu_time))\n", 360 | "res = gpu_morph.download()" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 26, 366 | "metadata": {}, 367 | "outputs": [], 368 | "source": [ 369 | "# No real improvement from streams likely hard sync inside one of the routines" 370 | ] 371 | }, 372 | { 373 | "cell_type": "markdown", 374 | "metadata": {}, 375 | "source": [ 376 | "# Speed up" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": 12, 382 | "metadata": {}, 383 | "outputs": [ 384 | { 385 | "data": { 386 | "text/plain": [ 387 | "3.485140688715614" 388 | ] 389 | }, 390 | "execution_count": 12, 391 | "metadata": {}, 392 | "output_type": "execute_result" 393 | } 394 | ], 395 | "source": [ 396 | "cpu_time/gpu_time" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "metadata": {}, 403 | "outputs": [], 404 | "source": [] 405 | } 406 | ], 407 | "metadata": { 408 | "kernelspec": { 409 | "display_name": "Python 3", 410 | "language": "python", 411 | "name": "python3" 412 | }, 413 | "language_info": { 414 | "codemirror_mode": { 415 | "name": "ipython", 416 | "version": 3 417 | }, 418 | "file_extension": ".py", 419 | "mimetype": "text/x-python", 420 | "name": "python", 421 | "nbconvert_exporter": "python", 422 | "pygments_lexer": "ipython3", 423 | "version": "3.7.7" 424 | }, 425 | "toc": { 426 | "base_numbering": 1, 427 | "nav_menu": {}, 428 | "number_sections": true, 429 | "sideBar": true, 430 | "skip_h1_title": false, 431 | "title_cell": "Table of Contents", 432 | "title_sidebar": "Contents", 433 | "toc_cell": false, 434 | "toc_position": {}, 435 | "toc_section_display": true, 436 | "toc_window_display": false 437 | } 438 | }, 439 | "nbformat": 4, 440 | "nbformat_minor": 4 441 | } 442 | -------------------------------------------------------------------------------- /nbs/cudacodec.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "1545a406", 6 | "metadata": {}, 7 | "source": [ 8 | "# cv.cudacodec" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "d3f8a4b0", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import cv2 as cv\n", 19 | "import os\n", 20 | "import time\n", 21 | "import numpy as np" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "id": "ac450ca8", 27 | "metadata": {}, 28 | "source": [ 29 | "## Transcoding Example" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "id": "33f348a4", 35 | "metadata": {}, 36 | "source": [ 37 | "Quick example comparing cv.cudacodec transcoding with cv on ubuntu using python wheel from https://github.com/cudawarped/opencv-python-cuda-wheels/releases/tag/4.6.0.20221102.\n", 38 | "\n", 39 | "GPU: Mobile RTX 3070 Ti (5th gen decoder & 7th gen encoder)\n", 40 | "\n", 41 | "CPU: i7-12700H\n", 42 | "\n", 43 | "The benchmark results are not comparable because CPU hardware decoding was not available for the chosen codecs." 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 2, 49 | "id": "21825e40", 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "vid_path_in_4k = '/home/b/media/jellyfish-120-mbps-4k-uhd-h264.mkv'\n", 54 | "vid_path_in_out_1080p = '/home/b/media/jelly_1080p.hevc'\n", 55 | "vid_path_out_1080p = '/home/b/media/jelly.h264'\n", 56 | "vid_path_out_1080p_mp4 = '/home/b/media/jelly.mp4'" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 3, 62 | "id": "43ae6af1", 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "\n", 70 | "General configuration for OpenCV 4.6.0-dev =====================================\n", 71 | " Version control: 4.6.0-504-gee9137f176\n", 72 | "\n", 73 | " Extra modules:\n", 74 | " Location (extra): /home/b/repos/opencv/opencv-python/opencv_contrib/modules\n", 75 | " Version control (extra): 4.6.0-106-g9d84eaed\n", 76 | "\n", 77 | " Platform:\n", 78 | " Timestamp: 2022-11-02T16:24:13Z\n", 79 | " Host: Linux 5.10.16.3-microsoft-standard-WSL2 x86_64\n", 80 | " CMake: 3.24.1\n", 81 | " CMake generator: Ninja\n", 82 | " CMake build tool: /usr/bin/ninja\n", 83 | " Configuration: Release\n", 84 | "\n", 85 | " CPU/HW features:\n", 86 | " Baseline: SSE SSE2 SSE3\n", 87 | " requested: SSE3\n", 88 | " Dispatched code generation: SSE4_1 SSE4_2 FP16 AVX AVX2 AVX512_SKX\n", 89 | " requested: SSE4_1 SSE4_2 AVX FP16 AVX2 AVX512_SKX\n", 90 | " SSE4_1 (16 files): + SSSE3 SSE4_1\n", 91 | " SSE4_2 (1 files): + SSSE3 SSE4_1 POPCNT SSE4_2\n", 92 | " FP16 (0 files): + SSSE3 SSE4_1 POPCNT SSE4_2 FP16 AVX\n", 93 | " AVX (4 files): + SSSE3 SSE4_1 POPCNT SSE4_2 AVX\n", 94 | " AVX2 (32 files): + SSSE3 SSE4_1 POPCNT SSE4_2 FP16 FMA3 AVX AVX2\n", 95 | " AVX512_SKX (5 files): + SSSE3 SSE4_1 POPCNT SSE4_2 FP16 FMA3 AVX AVX2 AVX_512F AVX512_COMMON AVX512_SKX\n", 96 | "\n", 97 | " C/C++:\n", 98 | " Built as dynamic libs?: NO\n", 99 | " C++ standard: 11\n", 100 | " C++ Compiler: /usr/bin/c++ (ver 9.4.0)\n", 101 | " C++ flags (Release): -fsigned-char -W -Wall -Wreturn-type -Wnon-virtual-dtor -Waddress -Wsequence-point -Wformat -Wformat-security -Wmissing-declarations -Wundef -Winit-self -Wpointer-arith -Wshadow -Wsign-promo -Wuninitialized -Wsuggest-override -Wno-delete-non-virtual-dtor -Wno-comment -Wimplicit-fallthrough=3 -Wno-strict-overflow -fdiagnostics-show-option -Wno-long-long -pthread -fomit-frame-pointer -ffunction-sections -fdata-sections -msse -msse2 -msse3 -fvisibility=hidden -fvisibility-inlines-hidden -O3 -DNDEBUG -DNDEBUG\n", 102 | " C++ flags (Debug): -fsigned-char -W -Wall -Wreturn-type -Wnon-virtual-dtor -Waddress -Wsequence-point -Wformat -Wformat-security -Wmissing-declarations -Wundef -Winit-self -Wpointer-arith -Wshadow -Wsign-promo -Wuninitialized -Wsuggest-override -Wno-delete-non-virtual-dtor -Wno-comment -Wimplicit-fallthrough=3 -Wno-strict-overflow -fdiagnostics-show-option -Wno-long-long -pthread -fomit-frame-pointer -ffunction-sections -fdata-sections -msse -msse2 -msse3 -fvisibility=hidden -fvisibility-inlines-hidden -g -O0 -DDEBUG -D_DEBUG\n", 103 | " C Compiler: /usr/bin/cc\n", 104 | " C flags (Release): -fsigned-char -W -Wall -Wreturn-type -Waddress -Wsequence-point -Wformat -Wformat-security -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wundef -Winit-self -Wpointer-arith -Wshadow -Wuninitialized -Wno-comment -Wimplicit-fallthrough=3 -Wno-strict-overflow -fdiagnostics-show-option -Wno-long-long -pthread -fomit-frame-pointer -ffunction-sections -fdata-sections -msse -msse2 -msse3 -fvisibility=hidden -O3 -DNDEBUG -DNDEBUG\n", 105 | " C flags (Debug): -fsigned-char -W -Wall -Wreturn-type -Waddress -Wsequence-point -Wformat -Wformat-security -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wundef -Winit-self -Wpointer-arith -Wshadow -Wuninitialized -Wno-comment -Wimplicit-fallthrough=3 -Wno-strict-overflow -fdiagnostics-show-option -Wno-long-long -pthread -fomit-frame-pointer -ffunction-sections -fdata-sections -msse -msse2 -msse3 -fvisibility=hidden -g -O0 -DDEBUG -D_DEBUG\n", 106 | " Linker flags (Release): -Wl,--exclude-libs,libippicv.a -Wl,--exclude-libs,libippiw.a -Wl,--gc-sections -Wl,--as-needed -Wl,--no-undefined \n", 107 | " Linker flags (Debug): -Wl,--exclude-libs,libippicv.a -Wl,--exclude-libs,libippiw.a -Wl,--gc-sections -Wl,--as-needed -Wl,--no-undefined \n", 108 | " ccache: NO\n", 109 | " Precompiled headers: NO\n", 110 | " Extra dependencies: openjp2 /usr/lib/wsl/lib/libcuda.so /home/b/stubs/nvcuvid/libnvcuvid.so /home/b/stubs/nvcuvid/libnvidia-encode.so Iconv::Iconv m pthread cudart_static dl rt nppc nppial nppicc nppidei nppif nppig nppim nppist nppisu nppitc npps cublas cudnn cufft -L/usr/local/cuda/lib64 -L/usr/lib/x86_64-linux-gnu\n", 111 | " 3rdparty dependencies: libprotobuf ade ittnotify libjpeg-turbo libwebp libpng libtiff IlmImf zlib quirc ippiw ippicv\n", 112 | "\n", 113 | " OpenCV modules:\n", 114 | " To be built: aruco barcode bgsegm bioinspired calib3d ccalib core cudaarithm cudabgsegm cudacodec cudafeatures2d cudafilters cudaimgproc cudalegacy cudaobjdetect cudaoptflow cudastereo cudawarping cudev datasets dnn dnn_objdetect dnn_superres dpm face features2d flann fuzzy gapi hfs highgui img_hash imgcodecs imgproc intensity_transform line_descriptor mcc ml objdetect optflow phase_unwrapping photo plot python3 quality rapid reg rgbd saliency shape stereo stitching structured_light superres surface_matching text tracking video videoio videostab wechat_qrcode xfeatures2d ximgproc xobjdetect xphoto\n", 115 | " Disabled: world\n", 116 | " Disabled by dependency: -\n", 117 | " Unavailable: alphamat cvv freetype hdf java julia matlab ovis python2 sfm ts viz\n", 118 | " Applications: -\n", 119 | " Documentation: NO\n", 120 | " Non-free algorithms: NO\n", 121 | "\n", 122 | " GUI: NONE\n", 123 | " GTK+: NO\n", 124 | " VTK support: NO\n", 125 | "\n", 126 | " Media I/O: \n", 127 | " ZLib: zlib (ver 1.2.12)\n", 128 | " JPEG: libjpeg-turbo (ver 2.1.3-62)\n", 129 | " WEBP: build (ver encoder: 0x020f)\n", 130 | " PNG: build (ver 1.6.37)\n", 131 | " TIFF: build (ver 42 - 4.2.0)\n", 132 | " JPEG 2000: OpenJPEG (ver 2.5.0)\n", 133 | " OpenEXR: build (ver 2.3.0)\n", 134 | " HDR: YES\n", 135 | " SUNRASTER: YES\n", 136 | " PXM: YES\n", 137 | " PFM: YES\n", 138 | "\n", 139 | " Video I/O:\n", 140 | " DC1394: NO\n", 141 | " FFMPEG: YES\n", 142 | " avcodec: YES (58.54.100)\n", 143 | " avformat: YES (58.29.100)\n", 144 | " avutil: YES (56.31.100)\n", 145 | " swscale: YES (5.5.100)\n", 146 | " avresample: YES (4.0.0)\n", 147 | " GStreamer: NO\n", 148 | " v4l/v4l2: YES (linux/videodev2.h)\n", 149 | "\n", 150 | " Parallel framework: pthreads\n", 151 | "\n", 152 | " Trace: YES (with Intel ITT)\n", 153 | "\n", 154 | " Other third-party libraries:\n", 155 | " Intel IPP: 2020.0.0 Gold [2020.0.0]\n", 156 | " at: /home/b/repos/opencv/opencv-python/_skbuild/linux-x86_64-3.9/cmake-build/3rdparty/ippicv/ippicv_lnx/icv\n", 157 | " Intel IPP IW: sources (2020.0.0)\n", 158 | " at: /home/b/repos/opencv/opencv-python/_skbuild/linux-x86_64-3.9/cmake-build/3rdparty/ippicv/ippicv_lnx/iw\n", 159 | " VA: NO\n", 160 | " Lapack: NO\n", 161 | " Eigen: NO\n", 162 | " Custom HAL: NO\n", 163 | " Protobuf: build (3.19.1)\n", 164 | "\n", 165 | " NVIDIA CUDA: YES (ver 11.7, CUFFT CUBLAS NVCUVID NVCUVENC FAST_MATH)\n", 166 | " NVIDIA GPU arch: 35 37 50 52 60 61 70 75 80 86\n", 167 | " NVIDIA PTX archs: 86\n", 168 | "\n", 169 | " cuDNN: YES (ver 8.4.1)\n", 170 | "\n", 171 | " OpenCL: YES (no extra features)\n", 172 | " Include path: /home/b/repos/opencv/opencv-python/opencv/3rdparty/include/opencl/1.2\n", 173 | " Link libraries: Dynamic load\n", 174 | "\n", 175 | " Python 3:\n", 176 | " Interpreter: /home/b/mambaforge/bin/python (ver 3.9.13)\n", 177 | " Libraries: /home/b/mambaforge/lib/libpython3.9.so (ver 3.9.13)\n", 178 | " numpy: /home/b/mambaforge/lib/python3.9/site-packages/numpy/core/include (ver 1.23.3)\n", 179 | " install path: python/cv2/python-3\n", 180 | "\n", 181 | " Python (for build): /home/b/mambaforge/bin/python\n", 182 | "\n", 183 | " Java: \n", 184 | " ant: NO\n", 185 | " JNI: NO\n", 186 | " Java wrappers: NO\n", 187 | " Java tests: NO\n", 188 | "\n", 189 | " Install to: /home/b/repos/opencv/opencv-python/_skbuild/linux-x86_64-3.9/cmake-install\n", 190 | "-----------------------------------------------------------------\n", 191 | "\n", 192 | "\n" 193 | ] 194 | } 195 | ], 196 | "source": [ 197 | "print(cv.getBuildInformation())" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "id": "9a66cd2c", 203 | "metadata": {}, 204 | "source": [ 205 | "### GPU" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 4, 211 | "id": "b00f632a", 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "def transcode(vid_path_in, vid_path_out, codec = cv.cudacodec.H264, params = cv.cudacodec_VideoReaderInitParams()):\n", 216 | " reader = cv.cudacodec.createVideoReader(vid_path_in,params=params)\n", 217 | " reader.set(cv.cudacodec.COLOR_FORMAT_BGR)\n", 218 | " format = reader.format()\n", 219 | " if params.targetSz != (0,0):\n", 220 | " w,h = params.targetSz\n", 221 | " else:\n", 222 | " w,h = (format.width,format.height)\n", 223 | " \n", 224 | " frame = cv.cuda.GpuMat(h,w,cv.CV_8UC3)\n", 225 | " writer = cv.cudacodec.createVideoWriter(vid_path_out,[w,h],codec)\n", 226 | " n_frames = 0\n", 227 | " start = time.time()\n", 228 | " ret, _ = reader.nextFrame(frame)\n", 229 | " while(ret):\n", 230 | " n_frames += 1\n", 231 | " writer.write(frame)\n", 232 | " ret, _ = reader.nextFrame(frame)\n", 233 | " writer.release()\n", 234 | " end = time.time()\n", 235 | " return n_frames/(end - start), n_frames;" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "id": "2eab816e", 241 | "metadata": {}, 242 | "source": [ 243 | "First convert 4K(h264) to 1080p(hevc) for benchmarking" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 5, 249 | "id": "c2e2a129", 250 | "metadata": {}, 251 | "outputs": [ 252 | { 253 | "name": "stdout", 254 | "output_type": "stream", 255 | "text": [ 256 | "Transcoded 900 frames from 4k(h264) to 1080p(hevc) at fps= 130.99\n" 257 | ] 258 | } 259 | ], 260 | "source": [ 261 | "params = cv.cudacodec_VideoReaderInitParams()\n", 262 | "params.targetSz = (1920,1080)\n", 263 | "fps, n_frames = transcode(vid_path_in_4k,vid_path_in_out_1080p,cv.cudacodec.HEVC,params)\n", 264 | "print(f'Transcoded {n_frames} frames from 4k(h264) to 1080p(hevc) at fps= {fps:.2f}')" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "id": "7e6dc675", 270 | "metadata": {}, 271 | "source": [ 272 | "Benchmark 1080p transcoding - timings will be slightly optimistic because decoding begins as soon as the VideoWriter is created" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 6, 278 | "id": "511fd755", 279 | "metadata": {}, 280 | "outputs": [ 281 | { 282 | "name": "stdout", 283 | "output_type": "stream", 284 | "text": [ 285 | "Transcoded 900 frames from 1080p(hevc) to 1080p(h264) at fps = 544.81\n" 286 | ] 287 | } 288 | ], 289 | "source": [ 290 | "params = cv.cudacodec_VideoReaderInitParams()\n", 291 | "params.targetSz = (1920,1080)\n", 292 | "fps, n_frames = transcode(vid_path_in_out_1080p,vid_path_out_1080p,cv.cudacodec.H264,params)\n", 293 | "print(f'Transcoded {n_frames} frames from 1080p(hevc) to 1080p(h264) at fps = {fps:.2f}')" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "id": "1f7c9033", 299 | "metadata": {}, 300 | "source": [ 301 | "### CPU" 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "id": "d54dbc48", 307 | "metadata": {}, 308 | "source": [ 309 | "Hardware acceleration does not appear to be available for this codec" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 7, 315 | "id": "75263318", 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [ 319 | "def transcode_cpu(vid_path_in, vid_path_out): \n", 320 | " cap = cv.VideoCapture(vid_path_in,cv.CAP_FFMPEG,(cv.CAP_PROP_HW_ACCELERATION, cv.VIDEO_ACCELERATION_ANY))\n", 321 | " #fourcc = cv.VideoWriter_fourcc(*\"mp4v\")\n", 322 | " fourcc = cv.VideoWriter_fourcc(*\"avc1\")\n", 323 | " fps = cap.get(cv.CAP_PROP_FPS)\n", 324 | " width = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))\n", 325 | " height = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))\n", 326 | " writer = cv.VideoWriter(vid_path_out, fourcc, fps, (width,height),\n", 327 | " (cv.VIDEOWRITER_PROP_HW_ACCELERATION, cv.VIDEO_ACCELERATION_ANY))\n", 328 | " frame = np.zeros((height,width,3),dtype='uint8')\n", 329 | " n_frames = 0\n", 330 | " start = time.time()\n", 331 | " ret, _ = cap.read(frame)\n", 332 | " while(ret):\n", 333 | " n_frames += 1\n", 334 | " writer.write(frame)\n", 335 | " ret, _ = cap.read(frame)\n", 336 | " writer.release()\n", 337 | " end = time.time()\n", 338 | " return n_frames/(end - start), n_frames;" 339 | ] 340 | }, 341 | { 342 | "cell_type": "markdown", 343 | "id": "584b13a1", 344 | "metadata": {}, 345 | "source": [ 346 | "Benchmarking 1080p transcoding without hardware acceleration" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": 8, 352 | "id": "6ab764d4", 353 | "metadata": {}, 354 | "outputs": [ 355 | { 356 | "name": "stdout", 357 | "output_type": "stream", 358 | "text": [ 359 | "Transcoded 900 frames from 1080p(hevc) to 1080p(h264) at fps = 65.46\n" 360 | ] 361 | } 362 | ], 363 | "source": [ 364 | "fps, n_frames = transcode_cpu(vid_path_in_out_1080p,vid_path_out_1080p_mp4)\n", 365 | "print(f'Transcoded {n_frames} frames from 1080p(hevc) to 1080p(h264) at fps = {fps:.2f}')" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": null, 371 | "id": "57534197", 372 | "metadata": {}, 373 | "outputs": [], 374 | "source": [] 375 | } 376 | ], 377 | "metadata": { 378 | "kernelspec": { 379 | "display_name": "Python 3 (ipykernel)", 380 | "language": "python", 381 | "name": "python3" 382 | }, 383 | "language_info": { 384 | "codemirror_mode": { 385 | "name": "ipython", 386 | "version": 3 387 | }, 388 | "file_extension": ".py", 389 | "mimetype": "text/x-python", 390 | "name": "python", 391 | "nbconvert_exporter": "python", 392 | "pygments_lexer": "ipython3", 393 | "version": "3.9.13" 394 | } 395 | }, 396 | "nbformat": 4, 397 | "nbformat_minor": 5 398 | } 399 | -------------------------------------------------------------------------------- /nbs/opencv_cvs_dll_load_failed.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6973dcb4-2e8a-409c-a147-291c8520fb18", 6 | "metadata": {}, 7 | "source": [ 8 | "# \"ImportError: DLL load failed while importing cv2: The specified module could not be found.\"" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "0f79e94d-bc84-4f09-a4a9-90be2e41388e", 14 | "metadata": {}, 15 | "source": [ 16 | "If your on Windows using python >= 3.8 having built OpenCV >= 4.6 from source and your seeing the above error when calling ` import cv2`, this short guide should help solve your problem.\n", 17 | "\n", 18 | "The guide assumes that you have either installed the python bindings during the build process ([not recommended](https://github.com/opencv/opencv/issues/13202#issuecomment-439730899)) or manually copied `cv2.cp3x-win_amd64.pyd` to your distributions `site-packages` directory (e.g. `C:\\Users\\\\mambaforge\\Lib\\site-packages`).\n", 19 | "\n", 20 | "So what's the issue? Although the message is quite explicit regarding the cause, it doesn't really help with finding a solution. In a nutshell python has found `cv2.cp3x-win_amd64.pyd`, tried and then failed to load it because a it can't find a dependant shared library. Now the advice I have seen online regarding this is to dig out trusty dependancy walker, load the `cv2.cp3x-win_amd64.pyd` and see which dependencies the system can't find.\n", 21 | "\n", 22 | "Now this is solid advice if we had a C++ application and/or we were using python < 3.8 (which uses the system/user path for dll resolution), however we are not, so even if dependency walker can't detect any problems we may still be facing the above error.\n", 23 | "\n", 24 | "The good news is there is an easy fix if you know where the missing DLL's are and only slightly more involved if you don't as long as you have access to the missing DLL's on your system." 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "id": "f8ded592", 30 | "metadata": {}, 31 | "source": [ 32 | "## Fix when path to missing DLL's is known" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "id": "eb494db2", 38 | "metadata": {}, 39 | "source": [ 40 | "To demonstrate the fix, I have built the OpenCV shared library and corresponding python bindings and manually copied them to the \n", 41 | "`site-packages` directory inside my python distribution (`C:\\Users\\b\\mambaforge\\Lib\\site-packages`). \n", 42 | "\n", 43 | "As I have built a shared library the python bindings are dependant on `opencv_world460.dll` and I haven't told python where they are I get the error shown below whenn trying to import them." 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 1, 49 | "id": "53efd81a", 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "ename": "ImportError", 54 | "evalue": "DLL load failed while importing cv2: The specified module could not be found.", 55 | "output_type": "error", 56 | "traceback": [ 57 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 58 | "\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)", 59 | "Cell \u001b[1;32mIn [1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcv2\u001b[39;00m\n", 60 | "\u001b[1;31mImportError\u001b[0m: DLL load failed while importing cv2: The specified module could not be found." 61 | ] 62 | } 63 | ], 64 | "source": [ 65 | "import cv2" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "id": "cfa1c021", 71 | "metadata": {}, 72 | "source": [ 73 | "Given that I know the path to OpenCV's shared libraries is required and I haven't told python about it, the first thing to try is to add it to pythons DLL search path and see if that solves the problem." 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 2, 79 | "id": "ba4db59c", 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "ename": "ImportError", 84 | "evalue": "DLL load failed while importing cv2: The specified module could not be found.", 85 | "output_type": "error", 86 | "traceback": [ 87 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 88 | "\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)", 89 | "Cell \u001b[1;32mIn [2], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mos\u001b[39;00m;\n\u001b[0;32m 2\u001b[0m os\u001b[38;5;241m.\u001b[39madd_dll_directory(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mD:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mbuild\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mopencv\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mcuda_11_8_cc_all_sym\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mbin\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m----> 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcv2\u001b[39;00m\n", 90 | "\u001b[1;31mImportError\u001b[0m: DLL load failed while importing cv2: The specified module could not be found." 91 | ] 92 | } 93 | ], 94 | "source": [ 95 | "import os\n", 96 | "os.add_dll_directory(\"D:\\\\build\\\\opencv\\\\cuda_11_8_cc_all_sym\\\\bin\")\n", 97 | "import cv2" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "id": "e1b17eca", 103 | "metadata": {}, 104 | "source": [ 105 | "Ahh the same error, what's going on.\n", 106 | "\n", 107 | "In this case I also built OpenCV against the CUDA SDK so there is a good chance its missing DLL's from there aswell. I can try to fix the issue by simply adding the location of the CUDA SDK binaries to the python DLL search path as shown below." 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 3, 113 | "id": "235eb45b", 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "os.add_dll_directory(\"C:\\\\Program Files\\\\NVIDIA GPU Computing Toolkit\\\\CUDA\\\\v11.8\\\\bin\")\n", 118 | "import cv2" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "id": "40b84c80", 124 | "metadata": {}, 125 | "source": [ 126 | "This appears to have solved the issue, but its a good idea to examine the build information just to double check I have loaded the right version of OpenCV." 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 4, 132 | "id": "a40cc8c5", 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "name": "stdout", 137 | "output_type": "stream", 138 | "text": [ 139 | "\n", 140 | "General configuration for OpenCV 4.6.0-dev =====================================\n", 141 | " Version control: 4.6.0-508-g21133a2091\n", 142 | "\n", 143 | " Extra modules:\n", 144 | " Location (extra): D:/repos/opencv/contrib/modules\n", 145 | " Version control (extra): 4.6.0.20220920-35-g9d84eaed\n", 146 | "\n", 147 | " Platform:\n", 148 | " Timestamp: 2022-11-06T17:20:19Z\n", 149 | " Host: Windows 10.0.22000 AMD64\n", 150 | " CMake: 3.23.2\n", 151 | " CMake generator: Ninja\n", 152 | " CMake build tool: C:/PROGRA~1/MICROS~2/2022/COMMUN~1/Common7/IDE/COMMON~1/MICROS~1/CMake/Ninja/ninja.exe\n", 153 | " MSVC: 1933\n", 154 | " Configuration: Release\n", 155 | "\n", 156 | " CPU/HW features:\n", 157 | " Baseline: SSE SSE2 SSE3\n", 158 | " requested: SSE3\n", 159 | " Dispatched code generation: SSE4_1 SSE4_2 FP16 AVX AVX2 AVX512_SKX\n", 160 | " requested: SSE4_1 SSE4_2 AVX FP16 AVX2 AVX512_SKX\n", 161 | " SSE4_1 (18 files): + SSSE3 SSE4_1\n", 162 | " SSE4_2 (2 files): + SSSE3 SSE4_1 POPCNT SSE4_2\n", 163 | " FP16 (1 files): + SSSE3 SSE4_1 POPCNT SSE4_2 FP16 AVX\n", 164 | " AVX (5 files): + SSSE3 SSE4_1 POPCNT SSE4_2 AVX\n", 165 | " AVX2 (34 files): + SSSE3 SSE4_1 POPCNT SSE4_2 FP16 FMA3 AVX AVX2\n", 166 | " AVX512_SKX (8 files): + SSSE3 SSE4_1 POPCNT SSE4_2 FP16 FMA3 AVX AVX2 AVX_512F AVX512_COMMON AVX512_SKX\n", 167 | "\n", 168 | " C/C++:\n", 169 | " Built as dynamic libs?: YES\n", 170 | " C++ standard: 11\n", 171 | " C++ Compiler: C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.33.31629/bin/Hostx64/x64/cl.exe (ver 19.33.31629.0)\n", 172 | " C++ flags (Release): /DWIN32 /D_WINDOWS /W4 /GR /D _CRT_SECURE_NO_DEPRECATE /D _CRT_NONSTDC_NO_DEPRECATE /D _SCL_SECURE_NO_WARNINGS /Gy /bigobj /Oi /fp:precise /FS /EHa /wd4127 /wd4251 /wd4324 /wd4275 /wd4512 /wd4589 /wd4819 /MD /O2 /Ob2 /DNDEBUG /Zi\n", 173 | " C++ flags (Debug): /DWIN32 /D_WINDOWS /W4 /GR /D _CRT_SECURE_NO_DEPRECATE /D _CRT_NONSTDC_NO_DEPRECATE /D _SCL_SECURE_NO_WARNINGS /Gy /bigobj /Oi /fp:precise /FS /EHa /wd4127 /wd4251 /wd4324 /wd4275 /wd4512 /wd4589 /wd4819 /MDd /Zi /Ob0 /Od /RTC1 \n", 174 | " C Compiler: C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.33.31629/bin/Hostx64/x64/cl.exe\n", 175 | " C flags (Release): /DWIN32 /D_WINDOWS /W3 /D _CRT_SECURE_NO_DEPRECATE /D _CRT_NONSTDC_NO_DEPRECATE /D _SCL_SECURE_NO_WARNINGS /Gy /bigobj /Oi /fp:precise /FS /MD /O2 /Ob2 /DNDEBUG /Zi\n", 176 | " C flags (Debug): /DWIN32 /D_WINDOWS /W3 /D _CRT_SECURE_NO_DEPRECATE /D _CRT_NONSTDC_NO_DEPRECATE /D _SCL_SECURE_NO_WARNINGS /Gy /bigobj /Oi /fp:precise /FS /MDd /Zi /Ob0 /Od /RTC1 \n", 177 | " Linker flags (Release): /machine:x64 /INCREMENTAL:NO /debug\n", 178 | " Linker flags (Debug): /machine:x64 /debug /INCREMENTAL \n", 179 | " ccache: NO\n", 180 | " Precompiled headers: NO\n", 181 | " Extra dependencies: cudart_static.lib nppc.lib nppial.lib nppicc.lib nppidei.lib nppif.lib nppig.lib nppim.lib nppist.lib nppisu.lib nppitc.lib npps.lib cublas.lib cudnn.lib cufft.lib -LIBPATH:\"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8/lib/x64\"\n", 182 | " 3rdparty dependencies:\n", 183 | "\n", 184 | " OpenCV modules:\n", 185 | " To be built: aruco barcode bgsegm bioinspired calib3d ccalib core cudaarithm cudabgsegm cudacodec cudafeatures2d cudafilters cudaimgproc cudalegacy cudaobjdetect cudaoptflow cudastereo cudawarping cudev datasets dnn dnn_objdetect dnn_superres dpm face features2d flann fuzzy gapi hfs highgui img_hash imgcodecs imgproc intensity_transform line_descriptor mcc ml objdetect optflow phase_unwrapping photo plot python3 quality rapid reg rgbd saliency shape stereo stitching structured_light superres surface_matching text tracking ts video videoio videostab wechat_qrcode world xfeatures2d ximgproc xobjdetect xphoto\n", 186 | " Disabled: -\n", 187 | " Disabled by dependency: -\n", 188 | " Unavailable: alphamat cvv freetype hdf java julia matlab ovis python2 python2 sfm viz\n", 189 | " Applications: tests perf_tests examples apps\n", 190 | " Documentation: NO\n", 191 | " Non-free algorithms: NO\n", 192 | "\n", 193 | " Windows RT support: NO\n", 194 | "\n", 195 | " GUI: \n", 196 | " Win32 UI: YES\n", 197 | " OpenGL support: YES (opengl32 glu32)\n", 198 | " VTK support: NO\n", 199 | "\n", 200 | " Media I/O: \n", 201 | " ZLib: build (ver 1.2.12)\n", 202 | " JPEG: build-libjpeg-turbo (ver 2.1.3-62)\n", 203 | " SIMD Support Request: YES\n", 204 | " SIMD Support: NO\n", 205 | " WEBP: build (ver encoder: 0x020f)\n", 206 | " PNG: build (ver 1.6.37)\n", 207 | " TIFF: build (ver 42 - 4.2.0)\n", 208 | " JPEG 2000: build (ver 2.4.0)\n", 209 | " OpenEXR: build (ver 2.3.0)\n", 210 | " HDR: YES\n", 211 | " SUNRASTER: YES\n", 212 | " PXM: YES\n", 213 | " PFM: YES\n", 214 | "\n", 215 | " Video I/O:\n", 216 | " DC1394: NO\n", 217 | " FFMPEG: YES (prebuilt binaries)\n", 218 | " avcodec: YES (58.134.100)\n", 219 | " avformat: YES (58.76.100)\n", 220 | " avutil: YES (56.70.100)\n", 221 | " swscale: YES (5.9.100)\n", 222 | " avresample: YES (4.0.0)\n", 223 | " GStreamer: NO\n", 224 | " DirectShow: YES\n", 225 | " Media Foundation: YES\n", 226 | " DXVA: YES\n", 227 | "\n", 228 | " Parallel framework: Concurrency\n", 229 | "\n", 230 | " Trace: YES (with Intel ITT)\n", 231 | "\n", 232 | " Other third-party libraries:\n", 233 | " Intel IPP: 2020.0.0 Gold [2020.0.0]\n", 234 | " at: D:/build/opencv/cuda_11_8_cc_all_sym/3rdparty/ippicv/ippicv_win/icv\n", 235 | " Intel IPP IW: sources (2020.0.0)\n", 236 | " at: D:/build/opencv/cuda_11_8_cc_all_sym/3rdparty/ippicv/ippicv_win/iw\n", 237 | " Lapack: NO\n", 238 | " Eigen: NO\n", 239 | " Custom HAL: NO\n", 240 | " Protobuf: build (3.19.1)\n", 241 | "\n", 242 | " NVIDIA CUDA: YES (ver 11.8, CUFFT CUBLAS NVCUVID NVCUVENC FAST_MATH)\n", 243 | " NVIDIA GPU arch: 35 37 50 52 60 61 70 75 80 86\n", 244 | " NVIDIA PTX archs: 86\n", 245 | "\n", 246 | " cuDNN: YES (ver 8.6.0)\n", 247 | "\n", 248 | " OpenCL: YES (NVD3D11)\n", 249 | " Include path: D:/repos/opencv/opencv/3rdparty/include/opencl/1.2\n", 250 | " Link libraries: Dynamic load\n", 251 | "\n", 252 | " Python 3:\n", 253 | " Interpreter: C:/Users/b/mambaforge//python.exe (ver 3.9.13)\n", 254 | " Libraries: C:/Users/b/mambaforge//libs/python39.lib (ver 3.9.13)\n", 255 | " numpy: C:/Users/b/mambaforge//lib/site-packages/numpy/core/include (ver 1.23.3)\n", 256 | " install path: C:/Users/b/mambaforge//Lib/site-packages//cv2/python-3.9\n", 257 | "\n", 258 | " Python (for build): C:/Users/b/mambaforge//python.exe\n", 259 | "\n", 260 | " Java: \n", 261 | " ant: NO\n", 262 | " JNI: NO\n", 263 | " Java wrappers: NO\n", 264 | " Java tests: NO\n", 265 | "\n", 266 | " Install to: D:/build/opencv/cuda_11_8_cc_all_sym/install\n", 267 | "-----------------------------------------------------------------\n", 268 | "\n", 269 | "\n" 270 | ] 271 | } 272 | ], 273 | "source": [ 274 | "print(cv2.getBuildInformation())" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "id": "d2a1fa50", 280 | "metadata": {}, 281 | "source": [ 282 | "That was easy but what can I do if I'm not as good at guessing what's missing. Next I will use the same example again to demonstrate how to find out which DLL's python is searching for." 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "id": "a19c2557", 288 | "metadata": {}, 289 | "source": [ 290 | "## Fix when path to missing dll's is not known" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "id": "2e87bb2c-2894-4b7b-b6cd-cb1d831d71e4", 296 | "metadata": {}, 297 | "source": [ 298 | "To find which DLL's are missing we can use [process monitor](https://learn.microsoft.com/en-us/sysinternals/downloads/procmon) which will enable us to see the names of the DLL's which python is trying to load.\n", 299 | "\n", 300 | "When you first run process monitor you will be presented with the option to filter the output. Since we only want to view files which are acessed by the python.exe process, on the dropdown select \"Process Name\" and in the text box type python.exe and click **Add** then select \"Operation\" from the first dropdown and \"CreateFile\" from the second and press **Add**. Your filter should now resemble the below." 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "id": "f50e1a35-ed8e-4e1a-b0e9-5082141f1e74", 306 | "metadata": {}, 307 | "source": [ 308 | "![](imgs/proc_mon_filter.png)" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "id": "d48f1533-76da-4985-8c1d-beaa17ce7d81", 314 | "metadata": {}, 315 | "source": [ 316 | "Before continuing it is advisable to close any other python proceses as the output from these will pollute the main window.\n", 317 | "\n", 318 | "Now start python and before typing `import cv2`, press the clear button (red trash can) in process monitor to clear any output generated during python's initialization." 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 1, 324 | "id": "11dae139", 325 | "metadata": {}, 326 | "outputs": [ 327 | { 328 | "ename": "ImportError", 329 | "evalue": "DLL load failed while importing cv2: The specified module could not be found.", 330 | "output_type": "error", 331 | "traceback": [ 332 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 333 | "\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)", 334 | "Cell \u001b[1;32mIn [1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcv2\u001b[39;00m\n", 335 | "\u001b[1;31mImportError\u001b[0m: DLL load failed while importing cv2: The specified module could not be found." 336 | ] 337 | } 338 | ], 339 | "source": [ 340 | "import cv2" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "id": "844f62eb-7f59-4fa7-8f99-4e878fc0a667", 346 | "metadata": {}, 347 | "source": [ 348 | "![title](imgs/proc_mon_failed_search_opencv.png)" 349 | ] 350 | }, 351 | { 352 | "cell_type": "markdown", 353 | "id": "5519ca3f-a004-4099-8e42-e9ed90c0b0b8", 354 | "metadata": {}, 355 | "source": [ 356 | "Because I have reset the python DLL search path on running `import cv2` I get the above output in process monitor which shows that we successfully found `cv2.cp3x-win_amd64.pyd` (otherwise we would see the \"ModuleNotFoundError: No module named 'cv2'\" error) however it also shows several attempts have been made to locate `opencv_img_hash_460.dll` and `opencv_world460.dll` without success.\n", 357 | "\n", 358 | "As before we add the directory containing these to the python DLL search path." 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": 2, 364 | "id": "db489147-2ea4-438e-bfeb-8faafabbdc92", 365 | "metadata": {}, 366 | "outputs": [ 367 | { 368 | "ename": "ImportError", 369 | "evalue": "DLL load failed while importing cv2: The specified module could not be found.", 370 | "output_type": "error", 371 | "traceback": [ 372 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 373 | "\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)", 374 | "Cell \u001b[1;32mIn [2], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mos\u001b[39;00m\n\u001b[0;32m 2\u001b[0m os\u001b[38;5;241m.\u001b[39madd_dll_directory(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mD:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mbuild\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mopencv\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mcuda_11_8_cc_all_sym\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mbin\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m----> 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcv2\u001b[39;00m\n", 375 | "\u001b[1;31mImportError\u001b[0m: DLL load failed while importing cv2: The specified module could not be found." 376 | ] 377 | } 378 | ], 379 | "source": [ 380 | "import os\n", 381 | "os.add_dll_directory(\"D:\\\\build\\\\opencv\\\\cuda_11_8_cc_all_sym\\\\bin\")\n", 382 | "import cv2" 383 | ] 384 | }, 385 | { 386 | "cell_type": "markdown", 387 | "id": "f081282d-4a50-4d7e-af6b-fee7cced09df", 388 | "metadata": {}, 389 | "source": [ 390 | "![title](imgs/proc_mon_failed_search_nvidia.png)" 391 | ] 392 | }, 393 | { 394 | "cell_type": "markdown", 395 | "id": "90e9fdc7-f159-4c00-b80a-2329b39dbdb7", 396 | "metadata": {}, 397 | "source": [ 398 | "Now process monitor shows that `opencv_img_hash_460.dll` and `opencv_world460.dll` were located successfully after a few attempts however we are missing `nppc64_11.dll` which is part of the CUDA SDK. As before if we add the CUDA SDK binary directory to the python DLL search path the call to `import cv2` will be successful. If however we were still seeing the same error we could simply repeat the process, that is examine the output in process monitor and add the directories containing the missing DLL's to the python DLL search path." 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": null, 404 | "id": "c7388249", 405 | "metadata": {}, 406 | "outputs": [], 407 | "source": [] 408 | } 409 | ], 410 | "metadata": { 411 | "kernelspec": { 412 | "display_name": "Python 3 (ipykernel)", 413 | "language": "python", 414 | "name": "python3" 415 | }, 416 | "language_info": { 417 | "codemirror_mode": { 418 | "name": "ipython", 419 | "version": 3 420 | }, 421 | "file_extension": ".py", 422 | "mimetype": "text/x-python", 423 | "name": "python", 424 | "nbconvert_exporter": "python", 425 | "pygments_lexer": "ipython3", 426 | "version": "3.9.13" 427 | }, 428 | "toc": { 429 | "base_numbering": 1, 430 | "nav_menu": {}, 431 | "number_sections": true, 432 | "sideBar": true, 433 | "skip_h1_title": false, 434 | "title_cell": "Table of Contents", 435 | "title_sidebar": "Contents", 436 | "toc_cell": false, 437 | "toc_position": {}, 438 | "toc_section_display": true, 439 | "toc_window_display": false 440 | }, 441 | "varInspector": { 442 | "cols": { 443 | "lenName": 16, 444 | "lenType": 16, 445 | "lenVar": 40 446 | }, 447 | "kernels_config": { 448 | "python": { 449 | "delete_cmd_postfix": "", 450 | "delete_cmd_prefix": "del ", 451 | "library": "var_list.py", 452 | "varRefreshCmd": "print(var_dic_list())" 453 | }, 454 | "r": { 455 | "delete_cmd_postfix": ") ", 456 | "delete_cmd_prefix": "rm(", 457 | "library": "var_list.r", 458 | "varRefreshCmd": "cat(var_dic_list()) " 459 | } 460 | }, 461 | "types_to_exclude": [ 462 | "module", 463 | "function", 464 | "builtin_function_or_method", 465 | "instance", 466 | "_Feature" 467 | ], 468 | "window_display": false 469 | } 470 | }, 471 | "nbformat": 4, 472 | "nbformat_minor": 5 473 | } 474 | -------------------------------------------------------------------------------- /nbs/opencv410x-video-read.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Comparisson of [Nvidia Video Codec SDK ](https://developer.nvidia.com/nvidia-video-codec-sdk) with CPU and iGPU ([Quick Sync](https://en.wikipedia.org/wiki/Intel_Quick_Sync_Video)) decoding using OpenCV 4.1.x with python" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "To run the notebook download modified binary [here](https://mega.nz/#!SAwCWY7D!Av4-wPjAkm6rlANWfJbp1R8HlahueT56bhJSAhvSN18).\n", 15 | "\n", 16 | "Notes: \n", 17 | "1. Will not work correctly with OpenCV 4.1.0 because:\n", 18 | " - The python bindings do not work correctly, manually modified pyopencv_generated_types.h to enable cv.cudacodec.createVideoReadernextFrame() to work.\n", 19 | " - HENC not enabled for Nvidia decoder.\n", 20 | " - Quick Sync can load software decoder if more than one device is present (multiple GPU's, Nvidia Optimus etc.)\n", 21 | "2. cv.cudacodec.createVideoReadernextFrame() returns before the end of the video file\n", 22 | "3. CPU decoding supports far more codecs than the GPU, additionally GPU codec support depends on the GPU generation, see NVDEC_VideoDecoder_API_ProgGuide.pdf in the [NVidia Video Codec SDK documentation](https://developer.nvidia.com/nvidia-video-codec-sdk) for details." 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "## Init" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "#export\n", 39 | "import os\n", 40 | "import time\n", 41 | "import numpy as np\n", 42 | "from functools import partial\n", 43 | "import matplotlib.pyplot as plt\n", 44 | "import cv2 as cv\n", 45 | "import pandas as pd" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 15, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "#export\n", 55 | "# globals\n", 56 | "#vid_path = os.environ['OPENCV_TEST_DATA_PATH'] + '/cv/video/768x576.avi'\n", 57 | "#vid_path = os.environ['OPENCV_TEST_DATA_PATH'] + '/cv/video/1920x1080.avi'\n", 58 | "#vid_path = 'rtsp://192.168.1.2/mediafile.264'\n", 59 | "\n", 60 | "# test files from http://jell.yfish.us/\n", 61 | "vid_path = os.environ['USERPROFILE'] + '/Videos/jellyfish-120-mbps-4k-uhd-h264.mkv'\n", 62 | "#vid_path = os.environ['USERPROFILE'] + '/Videos/jellyfish-120-mbps-4k-uhd-hevc-10bit.mkv'\n", 63 | "\n", 64 | "check_res = False" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 3, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "def CheckFrames(f1,f2,epsilon = -1,rows = -1,cols = -1,channels = -1):\n", 74 | " assert len(f1) > 0 and len(f1) == len(f2), f'f1 length {len(f1)}, f2 length {len(f2)}' \n", 75 | " epsilon = 0 if epsilon == -1 else epsilon\n", 76 | " rows = f1[0].shape[0] if rows == -1 else rows\n", 77 | " cols = f1[0].shape[1] if cols == -1 else cols\n", 78 | " channels = f1[0].shape[2] if channels == -1 and len(f1[0].shape) == 3 else channels \n", 79 | " for i in range(0,len(f1)):\n", 80 | " assert np.sum(f1[i][:rows,:cols,:channels] != f2[i][:rows,:cols,:channels]) <= epsilon, f'frame {i} different'" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 4, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "%matplotlib inline" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 5, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "#export\n", 99 | "def ProcVid0(cap):\n", 100 | " n_frames, start, end = 0,0,0\n", 101 | " if (cap.IsOpen()== False): # replace with catch\n", 102 | " print(\"Error opening video stream or file\")\n", 103 | " return\n", 104 | " frames_available = True\n", 105 | " start = time.time() \n", 106 | " while(cap.IsOpen()):\n", 107 | " ret,_ = cap.GetFrame()\n", 108 | " if(ret):\n", 109 | " n_frames += 1 \n", 110 | " end = time.time()\n", 111 | " return (end - start)*1000/n_frames, n_frames;" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 6, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "#export\n", 121 | "# host mem not implemented, manually pin memory\n", 122 | "class PinnedMem(object):\n", 123 | " def __init__(self, size, dtype=np.uint8):\n", 124 | " self.array = np.empty(size,dtype)\n", 125 | " cv.cuda.registerPageLocked(self.array)\n", 126 | " self.pinned = True\n", 127 | " def __del__(self):\n", 128 | " cv.cuda.unregisterPageLocked(self.array)\n", 129 | " self.pinned = False\n", 130 | " def __repr__(self):\n", 131 | " return f'pinned = {self.pinned}'" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 26, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "#export\n", 141 | "class VidCap:\n", 142 | " def __init__(self,vid_path,max_frames = -1,store_res = False):\n", 143 | " self.vid_path = vid_path\n", 144 | " self.store_res = store_res\n", 145 | " self.res = []\n", 146 | " self.frame_num = 0\n", 147 | " self.open = False\n", 148 | " cap = cv.VideoCapture(vid_path)\n", 149 | " assert cap.isOpened(), f\"{vid_path}: cannot be opened!\"\n", 150 | " self.num_frames = cap.get(cv.CAP_PROP_FRAME_COUNT)\n", 151 | " self.max_frames = self.num_frames if max_frames == -1 else max_frames\n", 152 | " ret, frame = cap.read()\n", 153 | " cap.release()\n", 154 | " self.rows,self.cols,self.channels = frame.shape\n", 155 | " \n", 156 | " def UpdateState(self,ret): \n", 157 | " if (not ret or self.frame_num+1 == self.max_frames): \n", 158 | " self.open = False \n", 159 | " if(ret or self.frame_num+1 == self.max_frames):\n", 160 | " self.frame_num += 1\n", 161 | " \n", 162 | " def IsOpen(self): return self.open\n", 163 | " \n", 164 | "class CudaCap(VidCap):\n", 165 | " def __init__(self,vid_path,max_frames=-1, store_res=False):\n", 166 | " VidCap.__init__(self, vid_path, max_frames, store_res)\n", 167 | " # cudacodec always returns 4 channels - check grey video\n", 168 | " self.channels = 4\n", 169 | " # cudacodec seems to need rows/16\n", 170 | " self.rows = (np.ceil(self.rows/16)*16).astype(int)\n", 171 | " self.cap = cv.cudacodec.createVideoReader(self.vid_path)\n", 172 | " self.open = True\n", 173 | " self.frame_device = cv.cuda_GpuMat(self.rows,self.cols,cv.CV_8UC4)\n", 174 | " self.frame_host = PinnedMem((self.rows,self.cols,self.channels))\n", 175 | " \n", 176 | " def GetFrame(self):\n", 177 | " if(self.store_res):\n", 178 | " ret,_ = self.GetHostFrame()\n", 179 | " if(ret):\n", 180 | " self.res.append(np.copy(self.frame_host.array))\n", 181 | " return ret,self.frame_device\n", 182 | " else:\n", 183 | " return self.GetDeviceFrame()\n", 184 | " \n", 185 | " def GetDeviceFrame(self):\n", 186 | " ret,_ = self.cap.nextFrame(self.frame_device)\n", 187 | " self.UpdateState(ret)\n", 188 | " return ret,self.frame_device\n", 189 | " \n", 190 | " def GetHostFrame(self):\n", 191 | " ret,_ = self.GetDeviceFrame()\n", 192 | " if(ret):\n", 193 | " self.frame_device.download(self.frame_host.array)\n", 194 | " return ret,self.frame_host.array\n", 195 | " \n", 196 | "class CudaCapNpa(CudaCap):\n", 197 | " def __init__(self,vid_path,max_frames=-1,store_res=False):\n", 198 | " CudaCap.__init__(self, vid_path, max_frames, store_res)\n", 199 | " \n", 200 | " def GetDeviceFrame(self): \n", 201 | " ret,self.frame_device = self.cap.nextFrame()\n", 202 | " self.UpdateState(ret)\n", 203 | " return ret,self.frame_device\n", 204 | " \n", 205 | "class CpuCap(VidCap):\n", 206 | " def __init__(self,vid_path,max_frames=-1,store_res=False,backend=cv.CAP_ANY):\n", 207 | " VidCap.__init__(self, vid_path, max_frames, store_res) \n", 208 | "\n", 209 | " self.cap = cv.VideoCapture(self.vid_path,backend)\n", 210 | " assert self.cap.isOpened(), f\"{vid_path}: cannot be opened for backend: {backend}!\"\n", 211 | " #if self.cap.isOpened():\n", 212 | " self.open = True\n", 213 | " self.frame = np.empty((self.rows,self.cols,self.channels),np.uint8)\n", 214 | " \n", 215 | " def GetFrame(self):\n", 216 | " ret,_ = self.cap.read(self.frame)\n", 217 | " self.UpdateState(ret)\n", 218 | " if (ret):\n", 219 | " if(self.store_res):\n", 220 | " self.res.append(np.copy(self.frame))\n", 221 | " return ret,self.frame\n", 222 | " \n", 223 | " def __del__(self):\n", 224 | " self.cap.release()\n", 225 | " \n", 226 | "class CpuCapNpa(CpuCap):\n", 227 | " def __init__(self,vid_path,max_frames=-1,store_res=False,backend=cv.CAP_ANY):\n", 228 | " CpuCap.__init__(self, vid_path, max_frames, store_res,backend) \n", 229 | " \n", 230 | " def GetFrame(self):\n", 231 | " ret,self.frame = self.cap.read()\n", 232 | " self.UpdateState(ret)\n", 233 | " if (ret):\n", 234 | " if(self.store_res):\n", 235 | " self.res.append(np.copy(self.frame))\n", 236 | " return ret,self.frame\n", 237 | " " 238 | ] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "metadata": {}, 243 | "source": [ 244 | "" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "## CPU" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 8, 257 | "metadata": {}, 258 | "outputs": [ 259 | { 260 | "name": "stdout", 261 | "output_type": "stream", 262 | "text": [ 263 | "CPU 0 (no pre alloc): 900 frames, 39.39 ms/frame\n" 264 | ] 265 | } 266 | ], 267 | "source": [ 268 | "#export\n", 269 | "cpu_cap_npa = CpuCapNpa(vid_path,-1,check_res)\n", 270 | "cpu_time_0,n_frames = ProcVid0(cpu_cap_npa)\n", 271 | "print(f'CPU 0 (no pre alloc): {n_frames} frames, {cpu_time_0:.2f} ms/frame')" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 9, 277 | "metadata": {}, 278 | "outputs": [ 279 | { 280 | "name": "stdout", 281 | "output_type": "stream", 282 | "text": [ 283 | "CPU 1: 900 frames, 23.99 ms/frame\n" 284 | ] 285 | } 286 | ], 287 | "source": [ 288 | "#export\n", 289 | "cpu_cap = CpuCap(vid_path,-1,check_res)\n", 290 | "cpu_time_1,n_frames = ProcVid0(cpu_cap)\n", 291 | "print(f'CPU 1: {n_frames} frames, {cpu_time_1:.2f} ms/frame')" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 10, 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "if(check_res):\n", 301 | " CheckFrames(cpu_cap.res,cpu_cap_npa.res)" 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": {}, 314 | "source": [ 315 | "## CPU - Quicksync" 316 | ] 317 | }, 318 | { 319 | "cell_type": "markdown", 320 | "metadata": {}, 321 | "source": [ 322 | "If this implementation is slower than the default, software decoding is probably taking place.\n", 323 | "\n", 324 | "To confirm hardware decoding in windows 10 check Video Decode window in the GPU pane of the task manager for activity as below. \n", 325 | "\n", 326 | "To fix check drivers, OpenCv version etc. " 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "![title](imgs/quicksync.PNG)" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 35, 339 | "metadata": {}, 340 | "outputs": [], 341 | "source": [ 342 | "#export\n", 343 | "vid_path_h264 = os.environ['USERPROFILE'] + '/Videos/jellyfish-120-mbps-4k-uhd-h264.h264'" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 31, 349 | "metadata": {}, 350 | "outputs": [ 351 | { 352 | "name": "stdout", 353 | "output_type": "stream", 354 | "text": [ 355 | "CPU Quick Sync (no pre alloc): 900 frames, 14.73 ms/frame\n" 356 | ] 357 | } 358 | ], 359 | "source": [ 360 | "#export\n", 361 | "cpu_cap_mfx_npa = CpuCapNpa(vid_path_h264,-1,check_res,cv.CAP_INTEL_MFX)\n", 362 | "cpu_time_mfx_0,n_frames = ProcVid0(cpu_cap_mfx_npa)\n", 363 | "print(f'CPU Quick Sync (no pre alloc): {n_frames} frames, {cpu_time_mfx_0:.2f} ms/frame')" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": 33, 369 | "metadata": {}, 370 | "outputs": [ 371 | { 372 | "name": "stdout", 373 | "output_type": "stream", 374 | "text": [ 375 | "CPU Quick Sync (no pre alloc): 900 frames, 9.27 ms/frame\n" 376 | ] 377 | } 378 | ], 379 | "source": [ 380 | "#export\n", 381 | "cpu_cap_mfx = CpuCap(vid_path_h264,-1,check_res,cv.CAP_INTEL_MFX)\n", 382 | "cpu_time_mfx_1,n_frames = ProcVid0(cpu_cap_mfx)\n", 383 | "print(f'CPU Quick Sync (no pre alloc): {n_frames} frames, {cpu_time_mfx_1:.2f} ms/frame')" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "" 391 | ] 392 | }, 393 | { 394 | "cell_type": "markdown", 395 | "metadata": {}, 396 | "source": [ 397 | "## GPU" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "Not all GPU's have a hardware decoder, e.g. anything with GM108, see\n", 405 | "[(1)](https://devtalk.nvidia.com/default/topic/1024934/video-codec-and-optical-flow-sdk/cuvidcreatedecoder-returns-error-cuda_error_no_device/) and \n", 406 | "[(2)](https://developer.nvidia.com/video-encode-decode-gpu-support-matrix) for discussions." 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": 16, 412 | "metadata": {}, 413 | "outputs": [ 414 | { 415 | "name": "stdout", 416 | "output_type": "stream", 417 | "text": [ 418 | "GPU 0 (no pre alloc): 899 frames, 14.34 ms/frame\n" 419 | ] 420 | } 421 | ], 422 | "source": [ 423 | "#export\n", 424 | "gpu_cap_npa = CudaCapNpa(vid_path,-1,check_res)\n", 425 | "gpu_time_0,n_frames = ProcVid0(gpu_cap_npa)\n", 426 | "print(f'GPU 0 (no pre alloc): {n_frames} frames, {gpu_time_0:.2f} ms/frame')" 427 | ] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": 17, 432 | "metadata": {}, 433 | "outputs": [ 434 | { 435 | "name": "stdout", 436 | "output_type": "stream", 437 | "text": [ 438 | "GPU 1: 899 frames, 11.74 ms/frame\n" 439 | ] 440 | } 441 | ], 442 | "source": [ 443 | "#export\n", 444 | "gpu_cap = CudaCap(vid_path,-1,check_res)\n", 445 | "gpu_time_1,n_frames = ProcVid0(gpu_cap)\n", 446 | "print(f'GPU 1: {n_frames} frames, {gpu_time_1:.2f} ms/frame')" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": 18, 452 | "metadata": {}, 453 | "outputs": [], 454 | "source": [ 455 | "if(check_res):\n", 456 | " n_frames = min(len(gpu_cap.res),len(gpu_cap_npa.res))\n", 457 | " CheckFrames(gpu_cap.res[:n_frames],gpu_cap_npa.res[:n_frames])" 458 | ] 459 | }, 460 | { 461 | "cell_type": "markdown", 462 | "metadata": {}, 463 | "source": [ 464 | "" 465 | ] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": {}, 470 | "source": [ 471 | "## Results" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": 3, 477 | "metadata": {}, 478 | "outputs": [ 479 | { 480 | "data": { 481 | "text/html": [ 482 | "\n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | "
CPU/GPU Frame Proc Time (ms)
GTX 1060 (pre-alloc)7.85
HD Graphics 530 (pre-alloc)9.27
GTX 980M (pre-alloc)11.74
HD Graphics 4400 (pre-alloc)13.97
GTX 980M14.34
HD Graphics 53014.73
HD Graphics 5500 HDD (pre-alloc)18.03
i5-6500 (pre-alloc)22.01
HD Graphics 5500 HDD23.74
HD Graphics 440023.88
i7-6700HQ (pre-alloc)23.99
i7-6700HQ39.39
GT 730M (pre-alloc)40.64
GT 730M40.8
i5-4210U (pre-alloc)47.72
i5-4210U50.65
i5-5200U HDD (pre-alloc)51.06
i5-5200U HDD58.64
" 557 | ], 558 | "text/plain": [ 559 | "" 560 | ] 561 | }, 562 | "execution_count": 3, 563 | "metadata": {}, 564 | "output_type": "execute_result" 565 | } 566 | ], 567 | "source": [ 568 | "results = [['i7-6700HQ', 39.39],['i7-6700HQ (pre-alloc)', 23.99],['i5-6500 (pre-alloc)',22.01],['i5-5200U HDD',58.64],\n", 569 | " ['i5-5200U HDD (pre-alloc)',51.06],['HD Graphics 530',14.73],['HD Graphics 530 (pre-alloc)',9.27],\n", 570 | " ['HD Graphics 5500 HDD',23.74],['HD Graphics 5500 HDD (pre-alloc)',18.03],['GTX 980M',14.34],\n", 571 | " ['GTX 980M (pre-alloc)',11.74],['GTX 1060 (pre-alloc)',7.85],['i5-4210U',50.65],['i5-4210U (pre-alloc)',47.72],\n", 572 | " ['GT 730M',40.80],['GT 730M (pre-alloc)',40.64],['HD Graphics 4400',23.88],['HD Graphics 4400 (pre-alloc)',13.97]]\n", 573 | "df = pd.DataFrame(results,columns=['CPU/GPU','Frame Proc Time (ms)']).sort_values('Frame Proc Time (ms)').round(2)\n", 574 | "df.style.hide_index()" 575 | ] 576 | }, 577 | { 578 | "cell_type": "markdown", 579 | "metadata": {}, 580 | "source": [ 581 | "# Export" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": 75, 587 | "metadata": {}, 588 | "outputs": [ 589 | { 590 | "name": "stdout", 591 | "output_type": "stream", 592 | "text": [ 593 | "Converted opencv410x-video-read.ipynb to exp\\nb_opencv410x-video-read.py\n" 594 | ] 595 | } 596 | ], 597 | "source": [ 598 | "# taken from https://github.com/fastai/fastai_docs/blob/master/dev_nb/notebook2script.py\n", 599 | "!python notebook2script.py opencv410x-video-read.ipynb" 600 | ] 601 | }, 602 | { 603 | "cell_type": "code", 604 | "execution_count": 76, 605 | "metadata": {}, 606 | "outputs": [ 607 | { 608 | "name": "stdout", 609 | "output_type": "stream", 610 | "text": [ 611 | "CPU 0 (no pre alloc): 900 frames, 37.96 ms/frame\n", 612 | "CPU 1: 900 frames, 21.88 ms/frame\n", 613 | "CPU Quick Sync (no pre alloc): 900 frames, 15.31 ms/frame\n", 614 | "CPU Quick Sync (no pre alloc): 900 frames, 9.97 ms/frame\n", 615 | "GPU 0 (no pre alloc): 899 frames, 13.84 ms/frame\n", 616 | "GPU 1: 899 frames, 11.72 ms/frame\n", 617 | "[ INFO:0] global D:\\SSDBackup\\Dev\\Repos\\opencv_fork_1\\modules\\videoio\\src\\videoio_registry.cpp (187) cv::`anonymous-namespace'::VideoBackendRegistry::VideoBackendRegistry VIDEOIO: Enabled backends(7, sorted by priority): FFMPEG(1000); GSTREAMER(990); INTEL_MFX(980); MSMF(970); DSHOW(960); CV_IMAGES(950); CV_MJPEG(940)\n" 618 | ] 619 | } 620 | ], 621 | "source": [ 622 | "! python exp/nb_opencv410x-video-read.py" 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": null, 628 | "metadata": {}, 629 | "outputs": [], 630 | "source": [] 631 | } 632 | ], 633 | "metadata": { 634 | "kernelspec": { 635 | "display_name": "Python 3", 636 | "language": "python", 637 | "name": "python3" 638 | }, 639 | "language_info": { 640 | "codemirror_mode": { 641 | "name": "ipython", 642 | "version": 3 643 | }, 644 | "file_extension": ".py", 645 | "mimetype": "text/x-python", 646 | "name": "python", 647 | "nbconvert_exporter": "python", 648 | "pygments_lexer": "ipython3", 649 | "version": "3.7.3" 650 | } 651 | }, 652 | "nbformat": 4, 653 | "nbformat_minor": 2 654 | } 655 | -------------------------------------------------------------------------------- /nbs/opencv450-video-read.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Comparisson of [Nvidia Video Codec SDK ](https://developer.nvidia.com/nvidia-video-codec-sdk) with CPU and iGPU ([Quick Sync](https://en.wikipedia.org/wiki/Intel_Quick_Sync_Video)) decoding using OpenCV 4.5.0 with python - CUDA 10.2, Ffmpeg 4.1.3" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "To run the h264_cuvid test in the notebook you will need to compile OpenCV against ffmpeg libs built with --enable-cuda --enable-cuvid.\n", 15 | "\n", 16 | "Notes: \n", 17 | "1. Whilst using VideoCapture with h264_cuvid decoding is only as fast as CPU decoding, it does offload the decoding leaving more CPU resources available. Additionally this option currently supports far more codecs than cv.cudacodec.VideoReader.\n", 18 | "2. GPU codec support depends on the GPU generation, see NVDEC_VideoDecoder_API_ProgGuide.pdf in the [NVidia Video Codec SDK documentation](https://developer.nvidia.com/nvidia-video-codec-sdk) for details.\n", 19 | "3. Unfortunately the current QuickSync implementation does not support container formats or RTSP streaming." 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "## Init" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 1, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "#export\n", 36 | "import os\n", 37 | "import time\n", 38 | "import numpy as np\n", 39 | "from functools import partial\n", 40 | "import matplotlib.pyplot as plt\n", 41 | "import cv2 as cv\n", 42 | "import pandas as pd" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 57, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "#export\n", 52 | "# globals\n", 53 | "#vid_path = os.environ['OPENCV_TEST_DATA_PATH'] + '/cv/video/768x576.avi'\n", 54 | "#vid_path = os.environ['OPENCV_TEST_DATA_PATH'] + '/cv/video/1920x1080.avi'\n", 55 | "#vid_path = 'rtsp://127.0.0.1/mediafile.264'\n", 56 | "#vid_path=\"rtsp://127.0.0.1/jellyfish-120-mbps-4k-uhd-h264.264\";\n", 57 | "#vid_path=\"rtsp://127.0.0.1/big_buck_bunny.264\";\n", 58 | "# test files from http://jell.yfish.us/\n", 59 | "vid_path = os.environ['USERPROFILE'] + '/Videos/jellyfish-120-mbps-4k-uhd-h264.mkv'\n", 60 | "#vid_path = os.environ['USERPROFILE'] + '/Videos/jellyfish-120-mbps-4k-uhd-hevc-10bit.mkv'\n", 61 | "\n", 62 | "check_res = False" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 3, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "def CheckFrames(f1,f2,epsilon = -1,rows = -1,cols = -1,channels = -1):\n", 72 | " assert len(f1) > 0 and len(f1) == len(f2), f'f1 length {len(f1)}, f2 length {len(f2)}' \n", 73 | " epsilon = 0 if epsilon == -1 else epsilon\n", 74 | " rows = f1[0].shape[0] if rows == -1 else rows\n", 75 | " cols = f1[0].shape[1] if cols == -1 else cols\n", 76 | " channels = f1[0].shape[2] if channels == -1 and len(f1[0].shape) == 3 else channels \n", 77 | " for i in range(0,len(f1)):\n", 78 | " assert np.sum(f1[i][:rows,:cols,:channels] != f2[i][:rows,:cols,:channels]) <= epsilon, f'frame {i} different'" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 4, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "%matplotlib inline" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 5, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "#export\n", 97 | "def ProcVid0(cap):\n", 98 | " n_frames, start, end = 0,0,0\n", 99 | " if (cap.IsOpen()== False): # replace with catch\n", 100 | " print(\"Error opening video stream or file\")\n", 101 | " return\n", 102 | " frames_available = True\n", 103 | " start = time.time() \n", 104 | " while(cap.IsOpen()):\n", 105 | " ret,_ = cap.GetFrame()\n", 106 | " if(ret):\n", 107 | " n_frames += 1 \n", 108 | " end = time.time()\n", 109 | " return (end - start)*1000/n_frames, n_frames;" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 6, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "#export\n", 119 | "# host mem not implemented, manually pin memory\n", 120 | "class PinnedMem(object):\n", 121 | " def __init__(self, size, dtype=np.uint8):\n", 122 | " self.array = np.empty(size,dtype)\n", 123 | " cv.cuda.registerPageLocked(self.array)\n", 124 | " self.pinned = True\n", 125 | " def __del__(self):\n", 126 | " cv.cuda.unregisterPageLocked(self.array)\n", 127 | " self.pinned = False\n", 128 | " def __repr__(self):\n", 129 | " return f'pinned = {self.pinned}'" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 7, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "#export\n", 139 | "class VidCap:\n", 140 | " def __init__(self,vid_path,max_frames = -1,store_res = False,file_to_write=None):\n", 141 | " self.vid_path = vid_path\n", 142 | " self.store_res = store_res\n", 143 | " self.res = []\n", 144 | " self.frame_num = 0\n", 145 | " self.open = False\n", 146 | " cap = cv.VideoCapture(vid_path)\n", 147 | " assert cap.isOpened(), f\"{vid_path}: cannot be opened!\"\n", 148 | " self.num_frames = cap.get(cv.CAP_PROP_FRAME_COUNT)\n", 149 | " self.max_frames = self.num_frames if max_frames == -1 else max_frames\n", 150 | " ret, frame = cap.read()\n", 151 | " cap.release()\n", 152 | " self.rows,self.cols,self.channels = frame.shape\n", 153 | " self.write_video = False\n", 154 | " if(file_to_write):\n", 155 | " self.write_video = True\n", 156 | " #fourcc = cv.VideoWriter_fourcc(*'H264')\n", 157 | " fourcc = cv.VideoWriter_fourcc('M', '4', 'S', '2')\n", 158 | " self.out = cv.VideoWriter(file_to_write,cv.CAP_FFMPEG,fourcc,25,(self.cols,self.rows))\n", 159 | " \n", 160 | " def UpdateState(self,ret): \n", 161 | " if (not ret or self.frame_num+1 == self.max_frames): \n", 162 | " self.open = False \n", 163 | " if(ret or self.frame_num+1 == self.max_frames):\n", 164 | " self.frame_num += 1\n", 165 | " \n", 166 | " def IsOpen(self): return self.open\n", 167 | " \n", 168 | " def __del__(self):\n", 169 | " if(self.write_video):\n", 170 | " self.out.release()\n", 171 | " \n", 172 | " #def WriteFrame(self,frame):\n", 173 | " # if(self.file_to_write):\n", 174 | " # self.out.write(frame)\n", 175 | " \n", 176 | "class CudaCap(VidCap):\n", 177 | " def __init__(self,vid_path,max_frames=-1, store_res=False, file_to_write=None):\n", 178 | " VidCap.__init__(self, vid_path, max_frames, store_res, file_to_write)\n", 179 | " # cudacodec always returns 4 channels - check grey video\n", 180 | " self.channels = 4\n", 181 | " # cudacodec seems to need rows/16\n", 182 | " self.rows = (np.ceil(self.rows/16)*16).astype(int)\n", 183 | " self.cap = cv.cudacodec.createVideoReader(self.vid_path)\n", 184 | " self.open = True\n", 185 | " self.frame_device = cv.cuda_GpuMat(self.rows,self.cols,cv.CV_8UC4)\n", 186 | " self.frame_host = PinnedMem((self.rows,self.cols,self.channels))\n", 187 | " self.stream = cv.cuda_Stream()\n", 188 | " \n", 189 | " def GetFrame(self):\n", 190 | " if(self.store_res or self.write_video):\n", 191 | " ret,_ = self.GetHostFrame()\n", 192 | " if(self.write_video):\n", 193 | " self.out.write(self.frame_host.array[:,:,:3])\n", 194 | " if(self.store_res and ret):\n", 195 | " self.res.append(np.copy(self.frame_host.array))\n", 196 | " return ret,self.frame_device\n", 197 | " else:\n", 198 | " return self.GetDeviceFrame()\n", 199 | " \n", 200 | " def GetDeviceFrame(self):\n", 201 | " ret,_ = self.cap.nextFrame(self.frame_device,self.stream)\n", 202 | " self.UpdateState(ret)\n", 203 | " return ret,self.frame_device\n", 204 | " \n", 205 | " def GetHostFrame(self):\n", 206 | " ret,_ = self.GetDeviceFrame()\n", 207 | " if(ret):\n", 208 | " self.frame_device.download(self.frame_host.array)\n", 209 | " return ret,self.frame_host.array\n", 210 | " \n", 211 | " \n", 212 | "class CudaCapNpa(CudaCap):\n", 213 | " def __init__(self,vid_path,max_frames=-1,store_res=False, file_to_write=None):\n", 214 | " CudaCap.__init__(self, vid_path, max_frames, store_res, file_to_write)\n", 215 | " \n", 216 | " def GetDeviceFrame(self): \n", 217 | " ret,self.frame_device = self.cap.nextFrame()\n", 218 | " self.UpdateState(ret)\n", 219 | " return ret,self.frame_device\n", 220 | " \n", 221 | "class CpuCap(VidCap):\n", 222 | " def __init__(self,vid_path,max_frames=-1,store_res=False,backend=cv.CAP_ANY):\n", 223 | " VidCap.__init__(self, vid_path, max_frames, store_res) \n", 224 | "\n", 225 | " self.cap = cv.VideoCapture(self.vid_path,backend)\n", 226 | " assert self.cap.isOpened(), f\"{vid_path}: cannot be opened for backend: {backend}!\"\n", 227 | " #if self.cap.isOpened():\n", 228 | " self.open = True\n", 229 | " self.frame = np.empty((self.rows,self.cols,self.channels),np.uint8)\n", 230 | " \n", 231 | " def GetFrame(self):\n", 232 | " ret,_ = self.cap.read(self.frame)\n", 233 | " self.UpdateState(ret)\n", 234 | " if (ret):\n", 235 | " if(self.store_res):\n", 236 | " self.res.append(np.copy(self.frame))\n", 237 | " return ret,self.frame\n", 238 | " \n", 239 | " def __del__(self):\n", 240 | " self.cap.release()\n", 241 | " \n", 242 | "class CpuCapNpa(CpuCap):\n", 243 | " def __init__(self,vid_path,max_frames=-1,store_res=False,backend=cv.CAP_ANY):\n", 244 | " CpuCap.__init__(self, vid_path, max_frames, store_res,backend) \n", 245 | " \n", 246 | " def GetFrame(self):\n", 247 | " ret,self.frame = self.cap.read()\n", 248 | " self.UpdateState(ret)\n", 249 | " if (ret):\n", 250 | " if(self.store_res):\n", 251 | " self.res.append(np.copy(self.frame))\n", 252 | " return ret,self.frame\n", 253 | " " 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "## CPU" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 71, 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "os.environ[\"OPENCV_FFMPEG_CAPTURE_OPTIONS\"] = \"\"" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 72, 282 | "metadata": {}, 283 | "outputs": [ 284 | { 285 | "name": "stdout", 286 | "output_type": "stream", 287 | "text": [ 288 | "CPU 0 (no pre alloc): 900 frames, 19.07 ms/frame\n" 289 | ] 290 | } 291 | ], 292 | "source": [ 293 | "#export\n", 294 | "cpu_cap_npa = CpuCapNpa(vid_path,-1,check_res)\n", 295 | "cpu_time_0,n_frames = ProcVid0(cpu_cap_npa)\n", 296 | "print(f'CPU 0 (no pre alloc): {n_frames} frames, {cpu_time_0:.2f} ms/frame')" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 73, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "name": "stdout", 306 | "output_type": "stream", 307 | "text": [ 308 | "CPU 1: 900 frames, 12.09 ms/frame\n" 309 | ] 310 | } 311 | ], 312 | "source": [ 313 | "#export\n", 314 | "cpu_cap = CpuCap(vid_path,-1,check_res)\n", 315 | "cpu_time_1,n_frames = ProcVid0(cpu_cap)\n", 316 | "print(f'CPU 1: {n_frames} frames, {cpu_time_1:.2f} ms/frame')" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 74, 322 | "metadata": {}, 323 | "outputs": [], 324 | "source": [ 325 | "if(check_res):\n", 326 | " CheckFrames(cpu_cap.res,cpu_cap_npa.res)" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "" 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "## CPU - Quicksync" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "If this implementation is slower than the default, software decoding is probably taking place.\n", 348 | "\n", 349 | "To confirm hardware decoding in windows 10 check Video Decode window in the GPU pane of the task manager for activity as below. \n", 350 | "\n", 351 | "To fix check drivers, OpenCv version etc. " 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "![title](imgs/quicksync.PNG)" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": 14, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "#export\n", 368 | "vid_path_h264 = os.environ['USERPROFILE'] + '/Videos/jellyfish-120-mbps-4k-uhd-h264.h264'" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 15, 374 | "metadata": {}, 375 | "outputs": [ 376 | { 377 | "name": "stdout", 378 | "output_type": "stream", 379 | "text": [ 380 | "CPU Quick Sync (no pre alloc): 900 frames, 29.84 ms/frame\n" 381 | ] 382 | } 383 | ], 384 | "source": [ 385 | "#export\n", 386 | "cpu_cap_mfx_npa = CpuCapNpa(vid_path_h264,-1,check_res,cv.CAP_INTEL_MFX)\n", 387 | "cpu_time_mfx_0,n_frames = ProcVid0(cpu_cap_mfx_npa)\n", 388 | "print(f'CPU Quick Sync (no pre alloc): {n_frames} frames, {cpu_time_mfx_0:.2f} ms/frame')" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": 16, 394 | "metadata": {}, 395 | "outputs": [ 396 | { 397 | "name": "stdout", 398 | "output_type": "stream", 399 | "text": [ 400 | "CPU Quick Sync (no pre alloc): 900 frames, 28.68 ms/frame\n" 401 | ] 402 | } 403 | ], 404 | "source": [ 405 | "#export\n", 406 | "cpu_cap_mfx = CpuCap(vid_path_h264,-1,check_res,cv.CAP_INTEL_MFX)\n", 407 | "cpu_time_mfx_1,n_frames = ProcVid0(cpu_cap_mfx)\n", 408 | "print(f'CPU Quick Sync (no pre alloc): {n_frames} frames, {cpu_time_mfx_1:.2f} ms/frame')" 409 | ] 410 | }, 411 | { 412 | "cell_type": "markdown", 413 | "metadata": {}, 414 | "source": [ 415 | "" 416 | ] 417 | }, 418 | { 419 | "cell_type": "markdown", 420 | "metadata": {}, 421 | "source": [ 422 | "## GPU" 423 | ] 424 | }, 425 | { 426 | "cell_type": "markdown", 427 | "metadata": {}, 428 | "source": [ 429 | "Not all GPU's have a hardware decoder, e.g. anything with GM108, see\n", 430 | "[(1)](https://devtalk.nvidia.com/default/topic/1024934/video-codec-and-optical-flow-sdk/cuvidcreatedecoder-returns-error-cuda_error_no_device/) and \n", 431 | "[(2)](https://developer.nvidia.com/video-encode-decode-gpu-support-matrix) for discussions." 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "metadata": {}, 437 | "source": [ 438 | "### cv.VideoCapture - h264_cuvid" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": 78, 444 | "metadata": {}, 445 | "outputs": [], 446 | "source": [ 447 | "os.environ[\"OPENCV_FFMPEG_CAPTURE_OPTIONS\"] = \"video_codec;h264_cuvid|video_codec;hevc_cuvid\"" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": 79, 453 | "metadata": {}, 454 | "outputs": [ 455 | { 456 | "name": "stdout", 457 | "output_type": "stream", 458 | "text": [ 459 | "CPU 0 with h264_cuvid (no pre alloc): 900 frames, 17.78 ms/frame\n" 460 | ] 461 | } 462 | ], 463 | "source": [ 464 | "#export\n", 465 | "cpu_cap_npa = CpuCapNpa(vid_path,-1,check_res)\n", 466 | "cpu_time_0,n_frames = ProcVid0(cpu_cap_npa)\n", 467 | "print(f'CPU 0 with h264_cuvid (no pre alloc): {n_frames} frames, {cpu_time_0:.2f} ms/frame')" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": 80, 473 | "metadata": {}, 474 | "outputs": [ 475 | { 476 | "name": "stdout", 477 | "output_type": "stream", 478 | "text": [ 479 | "CPU 1 with h264_cuvid: 900 frames, 12.89 ms/frame\n" 480 | ] 481 | } 482 | ], 483 | "source": [ 484 | "#export\n", 485 | "cpu_cap = CpuCap(vid_path,-1,check_res)\n", 486 | "cpu_time_1,n_frames = ProcVid0(cpu_cap)\n", 487 | "print(f'CPU 1 with h264_cuvid: {n_frames} frames, {cpu_time_1:.2f} ms/frame')" 488 | ] 489 | }, 490 | { 491 | "cell_type": "markdown", 492 | "metadata": {}, 493 | "source": [ 494 | "### cv.cudacodec.VideoReader" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": 70, 500 | "metadata": {}, 501 | "outputs": [], 502 | "source": [ 503 | "# Check the check_res frames.\n", 504 | "check_res = False\n", 505 | "file_to_write=\"F:\\\\Dev\\\\Media\\\\jelly_out.mp4\"" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": 67, 511 | "metadata": {}, 512 | "outputs": [ 513 | { 514 | "name": "stdout", 515 | "output_type": "stream", 516 | "text": [ 517 | "GPU 0 (no pre alloc): 900 frames, 6.03 ms/frame\n" 518 | ] 519 | } 520 | ], 521 | "source": [ 522 | "#export\n", 523 | "gpu_cap_npa = CudaCapNpa(vid_path,-1,check_res)\n", 524 | "gpu_time_0,n_frames = ProcVid0(gpu_cap_npa)\n", 525 | "print(f'GPU 0 (no pre alloc): {n_frames} frames, {gpu_time_0:.2f} ms/frame')" 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": 68, 531 | "metadata": {}, 532 | "outputs": [ 533 | { 534 | "name": "stdout", 535 | "output_type": "stream", 536 | "text": [ 537 | "GPU 0 (no pre alloc): 900 frames, 5.78 ms/frame\n" 538 | ] 539 | } 540 | ], 541 | "source": [ 542 | "gpu_cap_npa = CudaCapNpa(vid_path,-1,check_res)\n", 543 | "gpu_time_0,n_frames = ProcVid0(gpu_cap_npa)\n", 544 | "print(f'GPU 0 (no pre alloc): {n_frames} frames, {gpu_time_0:.2f} ms/frame')" 545 | ] 546 | }, 547 | { 548 | "cell_type": "code", 549 | "execution_count": 69, 550 | "metadata": {}, 551 | "outputs": [], 552 | "source": [ 553 | "if(check_res):\n", 554 | " n_frames = min(len(gpu_cap.res),len(gpu_cap_npa.res))\n", 555 | " CheckFrames(gpu_cap.res[:n_frames],gpu_cap_npa.res[:n_frames])" 556 | ] 557 | }, 558 | { 559 | "cell_type": "markdown", 560 | "metadata": {}, 561 | "source": [ 562 | "" 563 | ] 564 | }, 565 | { 566 | "cell_type": "markdown", 567 | "metadata": {}, 568 | "source": [ 569 | "## Results" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": 83, 575 | "metadata": {}, 576 | "outputs": [ 577 | { 578 | "data": { 579 | "text/html": [ 580 | "\n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | "
CPU/GPU Frame Proc Time (ms)
RTX 2080 (pre-alloc)5.780000
RTX 20806.030000
GTX 1060 (pre-alloc)7.850000
HD Graphics 530 (pre-alloc)9.270000
GTX 980M (pre-alloc)11.740000
i7-8700 (pre-alloc)12.090000
RTX 2080 h264_cuvid (pre-alloc)12.890000
HD Graphics 4400 (pre-alloc)13.970000
GTX 980M14.340000
HD Graphics 53014.730000
RTX 2080 h264_cuvid17.780000
HD Graphics 5500 HDD (pre-alloc)18.030000
i7-870019.070000
i5-6500 (pre-alloc)22.010000
HD Graphics 5500 HDD23.740000
HD Graphics 440023.880000
i7-6700HQ (pre-alloc)23.990000
i7-6700HQ39.390000
GT 730M (pre-alloc)40.640000
GT 730M40.800000
i5-4210U (pre-alloc)47.720000
i5-4210U50.650000
i5-5200U HDD (pre-alloc)51.060000
i5-5200U HDD58.640000
" 679 | ], 680 | "text/plain": [ 681 | "" 682 | ] 683 | }, 684 | "execution_count": 83, 685 | "metadata": {}, 686 | "output_type": "execute_result" 687 | } 688 | ], 689 | "source": [ 690 | "results = [['i7-6700HQ', 39.39],['i7-6700HQ (pre-alloc)', 23.99],['i5-6500 (pre-alloc)',22.01],['i5-5200U HDD',58.64],\n", 691 | " ['i5-5200U HDD (pre-alloc)',51.06],['HD Graphics 530',14.73],['HD Graphics 530 (pre-alloc)',9.27],\n", 692 | " ['HD Graphics 5500 HDD',23.74],['HD Graphics 5500 HDD (pre-alloc)',18.03],['GTX 980M',14.34],\n", 693 | " ['GTX 980M (pre-alloc)',11.74],['GTX 1060 (pre-alloc)',7.85],['i5-4210U',50.65],['i5-4210U (pre-alloc)',47.72],\n", 694 | " ['GT 730M',40.80],['GT 730M (pre-alloc)',40.64],['HD Graphics 4400',23.88],['HD Graphics 4400 (pre-alloc)',13.97],\n", 695 | " ['i7-8700',19.07],['i7-8700 (pre-alloc)',12.09],['RTX 2080 h264_cuvid',17.78],\n", 696 | " ['RTX 2080 h264_cuvid (pre-alloc)',12.89],['RTX 2080',6.03],['RTX 2080 (pre-alloc)',5.78]]\n", 697 | "df = pd.DataFrame(results,columns=['CPU/GPU','Frame Proc Time (ms)']).sort_values('Frame Proc Time (ms)').round(2)\n", 698 | "df.style.hide_index()" 699 | ] 700 | }, 701 | { 702 | "cell_type": "markdown", 703 | "metadata": {}, 704 | "source": [ 705 | "# Export" 706 | ] 707 | }, 708 | { 709 | "cell_type": "code", 710 | "execution_count": 84, 711 | "metadata": {}, 712 | "outputs": [ 713 | { 714 | "name": "stdout", 715 | "output_type": "stream", 716 | "text": [ 717 | "Converted opencv410x-video-read.ipynb to exp\\nb_opencv410x-video-read.py\n" 718 | ] 719 | } 720 | ], 721 | "source": [ 722 | "# taken from https://github.com/fastai/fastai_docs/blob/master/dev_nb/notebook2script.py\n", 723 | "!python notebook2script.py opencv410x-video-read.ipynb" 724 | ] 725 | }, 726 | { 727 | "cell_type": "code", 728 | "execution_count": null, 729 | "metadata": {}, 730 | "outputs": [], 731 | "source": [ 732 | "! python exp/nb_opencv410x-video-read.py" 733 | ] 734 | }, 735 | { 736 | "cell_type": "code", 737 | "execution_count": null, 738 | "metadata": {}, 739 | "outputs": [], 740 | "source": [] 741 | } 742 | ], 743 | "metadata": { 744 | "kernelspec": { 745 | "display_name": "Python 3", 746 | "language": "python", 747 | "name": "python3" 748 | }, 749 | "language_info": { 750 | "codemirror_mode": { 751 | "name": "ipython", 752 | "version": 3 753 | }, 754 | "file_extension": ".py", 755 | "mimetype": "text/x-python", 756 | "name": "python", 757 | "nbconvert_exporter": "python", 758 | "pygments_lexer": "ipython3", 759 | "version": "3.7.7" 760 | }, 761 | "toc": { 762 | "base_numbering": 1, 763 | "nav_menu": {}, 764 | "number_sections": true, 765 | "sideBar": true, 766 | "skip_h1_title": false, 767 | "title_cell": "Table of Contents", 768 | "title_sidebar": "Contents", 769 | "toc_cell": false, 770 | "toc_position": {}, 771 | "toc_section_display": true, 772 | "toc_window_display": false 773 | } 774 | }, 775 | "nbformat": 4, 776 | "nbformat_minor": 2 777 | } 778 | -------------------------------------------------------------------------------- /nbs/opencv450-video-read-CUDA_10_0_VideoCodecSDK_11_0_10.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Comparisson of [Nvidia Video Codec SDK ](https://developer.nvidia.com/nvidia-video-codec-sdk) with CPU and iGPU ([Quick Sync](https://en.wikipedia.org/wiki/Intel_Quick_Sync_Video)) decoding using OpenCV 4.5.0 with python - CUDA 10.0, Cuda Video Codec SDK 11.0.10 and Ffmpeg 4.3.1 master 99888-g5c7823ff1c-win64-lgpl - GPU Driver 457.30 " 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "To run the h264_cuvid and hevc_cuvid test in the notebook you will need to compile OpenCV against Ffmpeg libs built with --enable-cuda --enable-cuvid.\n", 15 | "\n", 16 | "Notes: \n", 17 | "1. Whilst using VideoCapture with h264_cuvid decoding is only as fast as CPU decoding, it does offload the decoding leaving more CPU resources available. Additionally this option currently supports far more codecs than cv.cudacodec.VideoReader.\n", 18 | "2. GPU codec support depends on the GPU generation, see NVDEC_VideoDecoder_API_ProgGuide.pdf in the [NVidia Video Codec SDK documentation](https://developer.nvidia.com/nvidia-video-codec-sdk) for details.\n", 19 | "3. Unfortunately the current QuickSync implementation does not support container formats or RTSP streaming." 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "## Init" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 1, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "#export\n", 36 | "import os\n", 37 | "import time\n", 38 | "import numpy as np\n", 39 | "from functools import partial\n", 40 | "import matplotlib.pyplot as plt\n", 41 | "import cv2 as cv\n", 42 | "import pandas as pd\n", 43 | "import psutil" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 2, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "#export\n", 53 | "# globals\n", 54 | "#vid_path = os.environ['OPENCV_TEST_DATA_PATH'] + '/cv/video/768x576.avi'\n", 55 | "#vid_path = os.environ['OPENCV_TEST_DATA_PATH'] + '/cv/video/1920x1080.avi'\n", 56 | "#vid_path = 'rtsp://127.0.0.1/mediafile.264'\n", 57 | "#vid_path=\"rtsp://127.0.0.1/jellyfish-120-mbps-4k-uhd-h264.264\";\n", 58 | "#vid_path=\"rtsp://127.0.0.1/big_buck_bunny.264\";\n", 59 | "# test files from http://jell.yfish.us/\n", 60 | "vid_path_264 = os.environ['USERPROFILE'] + '/Videos/jellyfish-120-mbps-4k-uhd-h264.mkv'\n", 61 | "vid_path_265 = os.environ['USERPROFILE'] + '/Videos/jellyfish-120-mbps-4k-uhd-hevc-10bit.mkv'\n", 62 | "\n", 63 | "check_res = False" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 3, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "def CheckFrames(f1,f2,epsilon = -1,rows = -1,cols = -1,channels = -1):\n", 73 | " assert len(f1) > 0 and len(f1) == len(f2), f'f1 length {len(f1)}, f2 length {len(f2)}' \n", 74 | " epsilon = 0 if epsilon == -1 else epsilon\n", 75 | " rows = f1[0].shape[0] if rows == -1 else rows\n", 76 | " cols = f1[0].shape[1] if cols == -1 else cols\n", 77 | " channels = f1[0].shape[2] if channels == -1 and len(f1[0].shape) == 3 else channels \n", 78 | " for i in range(0,len(f1)):\n", 79 | " assert np.sum(f1[i][:rows,:cols,:channels] != f2[i][:rows,:cols,:channels]) <= epsilon, f'frame {i} different'" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 4, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "%matplotlib inline" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 5, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "#export\n", 98 | "def ProcVid0(cap, measure_cpu = True):\n", 99 | " n_frames, start, end, max_cpu, av_cpu, n_cpu_snapshots = 0,0,0,0,0,0\n", 100 | " if(measure_cpu):\n", 101 | " p = psutil.Process()\n", 102 | " cpu_count = psutil.cpu_count()\n", 103 | " if (cap.IsOpen()== False): # replace with catch\n", 104 | " print(\"Error opening video stream or file\")\n", 105 | " return\n", 106 | " frames_available = True\n", 107 | " start = time.time() \n", 108 | " while(cap.IsOpen()):\n", 109 | " ret,_ = cap.GetFrame()\n", 110 | " if (measure_cpu):\n", 111 | " cpu_all_pc = p.cpu_percent()\n", 112 | " if(cpu_all_pc > 0):\n", 113 | " n_cpu_snapshots +=1\n", 114 | " cpu_pc = cpu_all_pc/cpu_count\n", 115 | " max_cpu = max(cpu_pc,max_cpu)\n", 116 | " av_cpu += cpu_pc \n", 117 | " if(ret):\n", 118 | " n_frames += 1 \n", 119 | " end = time.time()\n", 120 | " if(measure_cpu): \n", 121 | " print(f'CPU utilization - max: {max_cpu:.2f}%, average {av_cpu/(n_cpu_snapshots):.2f}%')\n", 122 | " return (end - start)*1000/n_frames, n_frames;" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 6, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "#export\n", 132 | "# host mem not implemented, manually pin memory\n", 133 | "class PinnedMem(object):\n", 134 | " def __init__(self, size, dtype=np.uint8):\n", 135 | " self.array = np.empty(size,dtype)\n", 136 | " cv.cuda.registerPageLocked(self.array)\n", 137 | " self.pinned = True\n", 138 | " def __del__(self):\n", 139 | " cv.cuda.unregisterPageLocked(self.array)\n", 140 | " self.pinned = False\n", 141 | " def __repr__(self):\n", 142 | " return f'pinned = {self.pinned}'" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 7, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "#export\n", 152 | "class VidCap:\n", 153 | " def __init__(self,vid_path,max_frames = -1,store_res = False,file_to_write=None):\n", 154 | " self.vid_path = vid_path\n", 155 | " self.store_res = store_res\n", 156 | " self.res = []\n", 157 | " self.frame_num = 0\n", 158 | " self.open = False\n", 159 | " cap = cv.VideoCapture(vid_path)\n", 160 | " assert cap.isOpened(), f\"{vid_path}: cannot be opened!\"\n", 161 | " self.num_frames = cap.get(cv.CAP_PROP_FRAME_COUNT)\n", 162 | " self.max_frames = self.num_frames if max_frames == -1 else max_frames\n", 163 | " ret, frame = cap.read()\n", 164 | " cap.release()\n", 165 | " self.rows,self.cols,self.channels = frame.shape\n", 166 | " self.write_video = False\n", 167 | " if(file_to_write):\n", 168 | " self.write_video = True\n", 169 | " #fourcc = cv.VideoWriter_fourcc(*'H264')\n", 170 | " fourcc = cv.VideoWriter_fourcc('M', '4', 'S', '2')\n", 171 | " self.out = cv.VideoWriter(file_to_write,cv.CAP_FFMPEG,fourcc,25,(self.cols,self.rows))\n", 172 | " \n", 173 | " def UpdateState(self,ret): \n", 174 | " if (not ret or self.frame_num+1 == self.max_frames): \n", 175 | " self.open = False \n", 176 | " if(ret or self.frame_num+1 == self.max_frames):\n", 177 | " self.frame_num += 1\n", 178 | " \n", 179 | " def IsOpen(self): return self.open\n", 180 | " \n", 181 | " def __del__(self):\n", 182 | " if(self.write_video):\n", 183 | " self.out.release()\n", 184 | " \n", 185 | " #def WriteFrame(self,frame):\n", 186 | " # if(self.file_to_write):\n", 187 | " # self.out.write(frame)\n", 188 | " \n", 189 | "class CudaCap(VidCap):\n", 190 | " def __init__(self,vid_path,max_frames=-1, store_res=False, file_to_write=None):\n", 191 | " VidCap.__init__(self, vid_path, max_frames, store_res, file_to_write)\n", 192 | " # cudacodec always returns 4 channels - check grey video\n", 193 | " self.channels = 4\n", 194 | " # cudacodec seems to need rows/16\n", 195 | " self.rows = (np.ceil(self.rows/16)*16).astype(int)\n", 196 | " self.cap = cv.cudacodec.createVideoReader(self.vid_path)\n", 197 | " self.open = True\n", 198 | " self.frame_device = cv.cuda_GpuMat(self.rows,self.cols,cv.CV_8UC4)\n", 199 | " self.frame_host = PinnedMem((self.rows,self.cols,self.channels))\n", 200 | " self.stream = cv.cuda_Stream()\n", 201 | " \n", 202 | " def GetFrame(self):\n", 203 | " if(self.store_res or self.write_video):\n", 204 | " ret,_ = self.GetHostFrame()\n", 205 | " if(self.write_video):\n", 206 | " self.out.write(self.frame_host.array[:,:,:3])\n", 207 | " if(self.store_res and ret):\n", 208 | " self.res.append(np.copy(self.frame_host.array))\n", 209 | " return ret,self.frame_device\n", 210 | " else:\n", 211 | " return self.GetDeviceFrame()\n", 212 | " \n", 213 | " def GetDeviceFrame(self):\n", 214 | " ret,_ = self.cap.nextFrame(self.frame_device,self.stream)\n", 215 | " self.UpdateState(ret)\n", 216 | " return ret,self.frame_device\n", 217 | " \n", 218 | " def GetHostFrame(self):\n", 219 | " ret,_ = self.GetDeviceFrame()\n", 220 | " if(ret):\n", 221 | " self.frame_device.download(self.frame_host.array)\n", 222 | " return ret,self.frame_host.array\n", 223 | " \n", 224 | " \n", 225 | "class CudaCapNpa(CudaCap):\n", 226 | " def __init__(self,vid_path,max_frames=-1,store_res=False, file_to_write=None):\n", 227 | " CudaCap.__init__(self, vid_path, max_frames, store_res, file_to_write)\n", 228 | " \n", 229 | " def GetDeviceFrame(self): \n", 230 | " ret,self.frame_device = self.cap.nextFrame()\n", 231 | " self.UpdateState(ret)\n", 232 | " return ret,self.frame_device\n", 233 | " \n", 234 | "class CpuCap(VidCap):\n", 235 | " def __init__(self,vid_path,max_frames=-1,store_res=False,backend=cv.CAP_ANY):\n", 236 | " VidCap.__init__(self, vid_path, max_frames, store_res) \n", 237 | "\n", 238 | " self.cap = cv.VideoCapture(self.vid_path,backend)\n", 239 | " assert self.cap.isOpened(), f\"{vid_path}: cannot be opened for backend: {backend}!\"\n", 240 | " #if self.cap.isOpened():\n", 241 | " self.open = True\n", 242 | " self.frame = np.empty((self.rows,self.cols,self.channels),np.uint8)\n", 243 | " \n", 244 | " def GetFrame(self):\n", 245 | " ret,_ = self.cap.read(self.frame)\n", 246 | " self.UpdateState(ret)\n", 247 | " if (ret):\n", 248 | " if(self.store_res):\n", 249 | " self.res.append(np.copy(self.frame))\n", 250 | " return ret,self.frame\n", 251 | " \n", 252 | " def __del__(self):\n", 253 | " self.cap.release()\n", 254 | " \n", 255 | "class CpuCapNpa(CpuCap):\n", 256 | " def __init__(self,vid_path,max_frames=-1,store_res=False,backend=cv.CAP_ANY):\n", 257 | " CpuCap.__init__(self, vid_path, max_frames, store_res,backend) \n", 258 | " \n", 259 | " def GetFrame(self):\n", 260 | " ret,self.frame = self.cap.read()\n", 261 | " self.UpdateState(ret)\n", 262 | " if (ret):\n", 263 | " if(self.store_res):\n", 264 | " self.res.append(np.copy(self.frame))\n", 265 | " return ret,self.frame\n", 266 | " " 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "" 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "metadata": {}, 279 | "source": [ 280 | "## CPU" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": {}, 286 | "source": [ 287 | "### h264" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 12, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [ 296 | "os.environ[\"OPENCV_FFMPEG_CAPTURE_OPTIONS\"] = \"\"" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 13, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "name": "stdout", 306 | "output_type": "stream", 307 | "text": [ 308 | "CPU utilization - max: 60.77%, average 39.18%\n", 309 | "CPU 0 (no pre alloc): 900 frames, 15.41 ms/frame\n" 310 | ] 311 | } 312 | ], 313 | "source": [ 314 | "#export\n", 315 | "cpu_cap_npa = CpuCapNpa(vid_path_264,-1,check_res)\n", 316 | "cpu_time_0,n_frames = ProcVid0(cpu_cap_npa)\n", 317 | "print(f'CPU 0 (no pre alloc): {n_frames} frames, {cpu_time_0:.2f} ms/frame')" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 14, 323 | "metadata": {}, 324 | "outputs": [ 325 | { 326 | "name": "stdout", 327 | "output_type": "stream", 328 | "text": [ 329 | "CPU utilization - max: 104.17%, average 76.64%\n", 330 | "CPU 1: 900 frames, 8.63 ms/frame\n" 331 | ] 332 | } 333 | ], 334 | "source": [ 335 | "#export\n", 336 | "cpu_cap = CpuCap(vid_path_264,-1,check_res)\n", 337 | "cpu_time_1,n_frames = ProcVid0(cpu_cap)\n", 338 | "print(f'CPU 1: {n_frames} frames, {cpu_time_1:.2f} ms/frame')" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 15, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [ 347 | "if(check_res):\n", 348 | " CheckFrames(cpu_cap.res,cpu_cap_npa.res)" 349 | ] 350 | }, 351 | { 352 | "cell_type": "markdown", 353 | "metadata": {}, 354 | "source": [ 355 | "### h265" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 16, 361 | "metadata": {}, 362 | "outputs": [], 363 | "source": [ 364 | "os.environ[\"OPENCV_FFMPEG_CAPTURE_OPTIONS\"] = \"\"" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 17, 370 | "metadata": {}, 371 | "outputs": [ 372 | { 373 | "name": "stdout", 374 | "output_type": "stream", 375 | "text": [ 376 | "CPU utilization - max: 65.11%, average 36.68%\n", 377 | "CPU 0 (no pre alloc): 900 frames, 34.34 ms/frame\n" 378 | ] 379 | } 380 | ], 381 | "source": [ 382 | "#export\n", 383 | "cpu_cap_npa = CpuCapNpa(vid_path_265,-1,check_res)\n", 384 | "cpu_time_0,n_frames = ProcVid0(cpu_cap_npa)\n", 385 | "print(f'CPU 0 (no pre alloc): {n_frames} frames, {cpu_time_0:.2f} ms/frame')" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": 18, 391 | "metadata": {}, 392 | "outputs": [ 393 | { 394 | "name": "stdout", 395 | "output_type": "stream", 396 | "text": [ 397 | "CPU utilization - max: 75.61%, average 45.85%\n", 398 | "CPU 1: 900 frames, 27.27 ms/frame\n" 399 | ] 400 | } 401 | ], 402 | "source": [ 403 | "#export\n", 404 | "cpu_cap = CpuCap(vid_path_265,-1,check_res)\n", 405 | "cpu_time_1,n_frames = ProcVid0(cpu_cap)\n", 406 | "print(f'CPU 1: {n_frames} frames, {cpu_time_1:.2f} ms/frame')" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": 19, 412 | "metadata": {}, 413 | "outputs": [], 414 | "source": [ 415 | "if(check_res):\n", 416 | " CheckFrames(cpu_cap.res,cpu_cap_npa.res)" 417 | ] 418 | }, 419 | { 420 | "cell_type": "markdown", 421 | "metadata": {}, 422 | "source": [ 423 | "" 424 | ] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "metadata": {}, 429 | "source": [ 430 | "## CPU - Quicksync" 431 | ] 432 | }, 433 | { 434 | "cell_type": "markdown", 435 | "metadata": {}, 436 | "source": [ 437 | "If this implementation is slower than the default, software decoding is probably taking place.\n", 438 | "\n", 439 | "To confirm hardware decoding in windows 10 check Video Decode window in the GPU pane of the task manager for activity as below. \n", 440 | "\n", 441 | "To fix check drivers, OpenCv version etc. " 442 | ] 443 | }, 444 | { 445 | "cell_type": "markdown", 446 | "metadata": {}, 447 | "source": [ 448 | "![title](imgs/quicksync.PNG)" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": 15, 454 | "metadata": {}, 455 | "outputs": [ 456 | { 457 | "name": "stdout", 458 | "output_type": "stream", 459 | "text": [ 460 | "CPU Quick Sync (no pre alloc): 900 frames, 29.84 ms/frame\n" 461 | ] 462 | } 463 | ], 464 | "source": [ 465 | "#export\n", 466 | "cpu_cap_mfx_npa = CpuCapNpa(vid_path_264,-1,check_res,cv.CAP_INTEL_MFX)\n", 467 | "cpu_time_mfx_0,n_frames = ProcVid0(cpu_cap_mfx_npa)\n", 468 | "print(f'CPU Quick Sync (no pre alloc): {n_frames} frames, {cpu_time_mfx_0:.2f} ms/frame')" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": 16, 474 | "metadata": {}, 475 | "outputs": [ 476 | { 477 | "name": "stdout", 478 | "output_type": "stream", 479 | "text": [ 480 | "CPU Quick Sync (no pre alloc): 900 frames, 28.68 ms/frame\n" 481 | ] 482 | } 483 | ], 484 | "source": [ 485 | "#export\n", 486 | "cpu_cap_mfx = CpuCap(vid_path_264,-1,check_res,cv.CAP_INTEL_MFX)\n", 487 | "cpu_time_mfx_1,n_frames = ProcVid0(cpu_cap_mfx)\n", 488 | "print(f'CPU Quick Sync (no pre alloc): {n_frames} frames, {cpu_time_mfx_1:.2f} ms/frame')" 489 | ] 490 | }, 491 | { 492 | "cell_type": "markdown", 493 | "metadata": {}, 494 | "source": [ 495 | "" 496 | ] 497 | }, 498 | { 499 | "cell_type": "markdown", 500 | "metadata": {}, 501 | "source": [ 502 | "## GPU" 503 | ] 504 | }, 505 | { 506 | "cell_type": "markdown", 507 | "metadata": {}, 508 | "source": [ 509 | "Not all GPU's have a hardware decoder, e.g. anything with GM108, see\n", 510 | "[(1)](https://devtalk.nvidia.com/default/topic/1024934/video-codec-and-optical-flow-sdk/cuvidcreatedecoder-returns-error-cuda_error_no_device/) and \n", 511 | "[(2)](https://developer.nvidia.com/video-encode-decode-gpu-support-matrix) for discussions." 512 | ] 513 | }, 514 | { 515 | "cell_type": "markdown", 516 | "metadata": {}, 517 | "source": [ 518 | "### cv.VideoCapture - h264_cuvid" 519 | ] 520 | }, 521 | { 522 | "cell_type": "markdown", 523 | "metadata": {}, 524 | "source": [ 525 | "#### h264" 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": 20, 531 | "metadata": {}, 532 | "outputs": [], 533 | "source": [ 534 | "os.environ[\"OPENCV_FFMPEG_CAPTURE_OPTIONS\"] = \"video_codec;h264_cuvid\"" 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "execution_count": 21, 540 | "metadata": {}, 541 | "outputs": [ 542 | { 543 | "name": "stdout", 544 | "output_type": "stream", 545 | "text": [ 546 | "CPU utilization - max: 8.49%, average 8.23%\n", 547 | "CPU 0 with h264_cuvid (no pre alloc): 900 frames, 32.84 ms/frame\n" 548 | ] 549 | } 550 | ], 551 | "source": [ 552 | "#export\n", 553 | "cpu_cap_npa = CpuCapNpa(vid_path_264,-1,check_res)\n", 554 | "cpu_time_0,n_frames = ProcVid0(cpu_cap_npa)\n", 555 | "print(f'CPU 0 with h264_cuvid (no pre alloc): {n_frames} frames, {cpu_time_0:.2f} ms/frame')" 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "execution_count": 22, 561 | "metadata": {}, 562 | "outputs": [ 563 | { 564 | "name": "stdout", 565 | "output_type": "stream", 566 | "text": [ 567 | "CPU utilization - max: 17.36%, average 8.30%\n", 568 | "CPU 1 with h264_cuvid: 900 frames, 25.20 ms/frame\n" 569 | ] 570 | } 571 | ], 572 | "source": [ 573 | "#export\n", 574 | "cpu_cap = CpuCap(vid_path_264,-1,check_res)\n", 575 | "cpu_time_1,n_frames = ProcVid0(cpu_cap)\n", 576 | "print(f'CPU 1 with h264_cuvid: {n_frames} frames, {cpu_time_1:.2f} ms/frame')" 577 | ] 578 | }, 579 | { 580 | "cell_type": "markdown", 581 | "metadata": {}, 582 | "source": [ 583 | "#### h265" 584 | ] 585 | }, 586 | { 587 | "cell_type": "code", 588 | "execution_count": 23, 589 | "metadata": {}, 590 | "outputs": [], 591 | "source": [ 592 | "os.environ[\"OPENCV_FFMPEG_CAPTURE_OPTIONS\"] = \"video_codec;hevc_cuvid\"" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": 24, 598 | "metadata": {}, 599 | "outputs": [ 600 | { 601 | "name": "stdout", 602 | "output_type": "stream", 603 | "text": [ 604 | "CPU utilization - max: 12.60%, average 8.24%\n", 605 | "CPU 0 with hevc_cuvid(no pre alloc): 900 frames, 38.32 ms/frame\n" 606 | ] 607 | } 608 | ], 609 | "source": [ 610 | "#export\n", 611 | "cpu_cap_npa = CpuCapNpa(vid_path_265,-1,check_res)\n", 612 | "cpu_time_0,n_frames = ProcVid0(cpu_cap_npa)\n", 613 | "print(f'CPU 0 with hevc_cuvid(no pre alloc): {n_frames} frames, {cpu_time_0:.2f} ms/frame')" 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": 25, 619 | "metadata": {}, 620 | "outputs": [ 621 | { 622 | "name": "stdout", 623 | "output_type": "stream", 624 | "text": [ 625 | "CPU utilization - max: 16.28%, average 8.24%\n", 626 | "CPU 1 with hevc_cuvid: 900 frames, 30.20 ms/frame\n" 627 | ] 628 | } 629 | ], 630 | "source": [ 631 | "#export\n", 632 | "cpu_cap = CpuCap(vid_path_265,-1,check_res)\n", 633 | "cpu_time_1,n_frames = ProcVid0(cpu_cap)\n", 634 | "print(f'CPU 1 with hevc_cuvid: {n_frames} frames, {cpu_time_1:.2f} ms/frame')" 635 | ] 636 | }, 637 | { 638 | "cell_type": "markdown", 639 | "metadata": {}, 640 | "source": [ 641 | "### cv.cudacodec.VideoReader" 642 | ] 643 | }, 644 | { 645 | "cell_type": "code", 646 | "execution_count": 26, 647 | "metadata": {}, 648 | "outputs": [ 649 | { 650 | "name": "stdout", 651 | "output_type": "stream", 652 | "text": [ 653 | "CPU utilization - max: 8.68%, average 8.38%\n", 654 | "GPU 0 (no pre alloc): 900 frames, 5.89 ms/frame\n" 655 | ] 656 | } 657 | ], 658 | "source": [ 659 | "#export\n", 660 | "gpu_cap_npa = CudaCapNpa(vid_path_264,-1,check_res)\n", 661 | "gpu_time_0,n_frames = ProcVid0(gpu_cap_npa)\n", 662 | "print(f'GPU 0 (no pre alloc): {n_frames} frames, {gpu_time_0:.2f} ms/frame')" 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": 27, 668 | "metadata": {}, 669 | "outputs": [ 670 | { 671 | "name": "stdout", 672 | "output_type": "stream", 673 | "text": [ 674 | "CPU utilization - max: 8.68%, average 8.40%\n", 675 | "GPU 0 (no pre alloc): 900 frames, 5.81 ms/frame\n" 676 | ] 677 | } 678 | ], 679 | "source": [ 680 | "gpu_cap_npa = CudaCapNpa(vid_path_264,-1,check_res)\n", 681 | "gpu_time_0,n_frames = ProcVid0(gpu_cap_npa)\n", 682 | "print(f'GPU 0 (no pre alloc): {n_frames} frames, {gpu_time_0:.2f} ms/frame')" 683 | ] 684 | }, 685 | { 686 | "cell_type": "code", 687 | "execution_count": 28, 688 | "metadata": {}, 689 | "outputs": [], 690 | "source": [ 691 | "if(check_res):\n", 692 | " n_frames = min(len(gpu_cap.res),len(gpu_cap_npa.res))\n", 693 | " CheckFrames(gpu_cap.res[:n_frames],gpu_cap_npa.res[:n_frames])" 694 | ] 695 | }, 696 | { 697 | "cell_type": "markdown", 698 | "metadata": {}, 699 | "source": [ 700 | "" 701 | ] 702 | }, 703 | { 704 | "cell_type": "markdown", 705 | "metadata": {}, 706 | "source": [ 707 | "## Results" 708 | ] 709 | }, 710 | { 711 | "cell_type": "markdown", 712 | "metadata": {}, 713 | "source": [ 714 | "### h264" 715 | ] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": 29, 720 | "metadata": {}, 721 | "outputs": [ 722 | { 723 | "data": { 724 | "text/html": [ 725 | "\n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | "
CPU/GPU Frame Proc Time (ms)
RTX 2080 Mobile (pre-alloc)5.780000
RTX 2080 Mobile6.030000
GTX 1060 (pre-alloc)7.850000
HD Graphics 530 (pre-alloc)9.270000
GTX 980M (pre-alloc)11.740000
i7-8700 (pre-alloc)12.090000
HD Graphics 4400 (pre-alloc)13.970000
GTX 980M14.340000
HD Graphics 53014.730000
HD Graphics 5500 HDD (pre-alloc)18.030000
i7-870019.070000
i5-6500 (pre-alloc)22.010000
HD Graphics 5500 HDD23.740000
HD Graphics 440023.880000
i7-6700HQ (pre-alloc)23.990000
RTX 2080 Mobile h264_cuvid (pre-alloc)25.200000
RTX 2080 Mobile h264_cuvid32.840000
i7-6700HQ39.390000
GT 730M (pre-alloc)40.640000
GT 730M40.800000
i5-4210U (pre-alloc)47.720000
i5-4210U50.650000
i5-5200U HDD (pre-alloc)51.060000
i5-5200U HDD58.640000
" 824 | ], 825 | "text/plain": [ 826 | "" 827 | ] 828 | }, 829 | "execution_count": 29, 830 | "metadata": {}, 831 | "output_type": "execute_result" 832 | } 833 | ], 834 | "source": [ 835 | "results = [['i7-6700HQ', 39.39],['i7-6700HQ (pre-alloc)', 23.99],['i5-6500 (pre-alloc)',22.01],['i5-5200U HDD',58.64],\n", 836 | " ['i5-5200U HDD (pre-alloc)',51.06],['HD Graphics 530',14.73],['HD Graphics 530 (pre-alloc)',9.27],\n", 837 | " ['HD Graphics 5500 HDD',23.74],['HD Graphics 5500 HDD (pre-alloc)',18.03],['GTX 980M',14.34],\n", 838 | " ['GTX 980M (pre-alloc)',11.74],['GTX 1060 (pre-alloc)',7.85],['i5-4210U',50.65],['i5-4210U (pre-alloc)',47.72],\n", 839 | " ['GT 730M',40.80],['GT 730M (pre-alloc)',40.64],['HD Graphics 4400',23.88],['HD Graphics 4400 (pre-alloc)',13.97],\n", 840 | " ['i7-8700',19.07],['i7-8700 (pre-alloc)',12.09],['RTX 2080 Mobile h264_cuvid',32.84],\n", 841 | " ['RTX 2080 Mobile h264_cuvid (pre-alloc)',25.20],['RTX 2080 Mobile',6.03],['RTX 2080 Mobile (pre-alloc)',5.78]]\n", 842 | "df = pd.DataFrame(results,columns=['CPU/GPU','Frame Proc Time (ms)']).sort_values('Frame Proc Time (ms)').round(2)\n", 843 | "df.style.hide_index()" 844 | ] 845 | }, 846 | { 847 | "cell_type": "markdown", 848 | "metadata": {}, 849 | "source": [ 850 | "### h265" 851 | ] 852 | }, 853 | { 854 | "cell_type": "markdown", 855 | "metadata": {}, 856 | "source": [ 857 | "# Export" 858 | ] 859 | }, 860 | { 861 | "cell_type": "code", 862 | "execution_count": 84, 863 | "metadata": {}, 864 | "outputs": [ 865 | { 866 | "name": "stdout", 867 | "output_type": "stream", 868 | "text": [ 869 | "Converted opencv410x-video-read.ipynb to exp\\nb_opencv410x-video-read.py\n" 870 | ] 871 | } 872 | ], 873 | "source": [ 874 | "# taken from https://github.com/fastai/fastai_docs/blob/master/dev_nb/notebook2script.py\n", 875 | "!python notebook2script.py opencv410x-video-read.ipynb" 876 | ] 877 | }, 878 | { 879 | "cell_type": "code", 880 | "execution_count": null, 881 | "metadata": {}, 882 | "outputs": [], 883 | "source": [ 884 | "! python exp/nb_opencv410x-video-read.py" 885 | ] 886 | }, 887 | { 888 | "cell_type": "code", 889 | "execution_count": null, 890 | "metadata": {}, 891 | "outputs": [], 892 | "source": [] 893 | } 894 | ], 895 | "metadata": { 896 | "kernelspec": { 897 | "display_name": "Python 3", 898 | "language": "python", 899 | "name": "python3" 900 | }, 901 | "language_info": { 902 | "codemirror_mode": { 903 | "name": "ipython", 904 | "version": 3 905 | }, 906 | "file_extension": ".py", 907 | "mimetype": "text/x-python", 908 | "name": "python", 909 | "nbconvert_exporter": "python", 910 | "pygments_lexer": "ipython3", 911 | "version": "3.6.9" 912 | }, 913 | "toc": { 914 | "base_numbering": 1, 915 | "nav_menu": {}, 916 | "number_sections": true, 917 | "sideBar": true, 918 | "skip_h1_title": false, 919 | "title_cell": "Table of Contents", 920 | "title_sidebar": "Contents", 921 | "toc_cell": false, 922 | "toc_position": {}, 923 | "toc_section_display": true, 924 | "toc_window_display": false 925 | } 926 | }, 927 | "nbformat": 4, 928 | "nbformat_minor": 2 929 | } 930 | --------------------------------------------------------------------------------