├── CNAME
├── README.md
├── imgs
    ├── profile.png
    ├── cpu_spec-2.webp
    ├── gpu_spec-2.webp
    ├── favicon-32x32.png
    ├── OpenCV_logo_black.webp
    ├── opencv_cuda_intel.webp
    ├── profile_1-e1674474909304.webp
    ├── vs_community_c++_workload.png
    ├── cropped-cuda_thread_blocks.webp
    └── cmake_visual_studio_install.webp
├── nbs
    ├── imgs
    │   ├── nvprof_1.PNG
    │   ├── nvprof_2.PNG
    │   ├── nvprof_3.PNG
    │   ├── nvprof_4.PNG
    │   ├── nvprof_5.PNG
    │   ├── nvprof_6.PNG
    │   ├── nvprof_7.PNG
    │   ├── quicksync.PNG
    │   ├── proc_mon_filter.png
    │   ├── proc_mon_failed_search_nvidia.png
    │   └── proc_mon_failed_search_opencv.png
    ├── misc
    │   └── ProcmonConfiguration.pmc
    ├── cudacodec_videoReader_memory_usage.ipynb
    ├── resize.ipynb
    ├── opencv4-sparse-optical-flow.ipynb
    ├── cuda_optimization_test.ipynb
    ├── cudacodec.ipynb
    ├── opencv_cvs_dll_load_failed.ipynb
    ├── opencv410x-video-read.ipynb
    ├── opencv450-video-read.ipynb
    └── opencv450-video-read-CUDA_10_0_VideoCodecSDK_11_0_10.ipynb
├── 404.qmd
├── qmd
    ├── about.qmd
    └── opencv_cuda_performance.qmd
├── index.qmd
├── .gitignore
├── _quarto.yml
└── LICENSE


/CNAME:
--------------------------------------------------------------------------------
1 | www.jamesbowley.co.uk


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # opencv-testing
2 | Notebooks for experimenting with OpenCV
3 | 


--------------------------------------------------------------------------------
/imgs/profile.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/profile.png


--------------------------------------------------------------------------------
/imgs/cpu_spec-2.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/cpu_spec-2.webp


--------------------------------------------------------------------------------
/imgs/gpu_spec-2.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/gpu_spec-2.webp


--------------------------------------------------------------------------------
/nbs/imgs/nvprof_1.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/nvprof_1.PNG


--------------------------------------------------------------------------------
/nbs/imgs/nvprof_2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/nvprof_2.PNG


--------------------------------------------------------------------------------
/nbs/imgs/nvprof_3.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/nvprof_3.PNG


--------------------------------------------------------------------------------
/nbs/imgs/nvprof_4.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/nvprof_4.PNG


--------------------------------------------------------------------------------
/nbs/imgs/nvprof_5.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/nvprof_5.PNG


--------------------------------------------------------------------------------
/nbs/imgs/nvprof_6.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/nvprof_6.PNG


--------------------------------------------------------------------------------
/nbs/imgs/nvprof_7.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/nvprof_7.PNG


--------------------------------------------------------------------------------
/imgs/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/favicon-32x32.png


--------------------------------------------------------------------------------
/nbs/imgs/quicksync.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/quicksync.PNG


--------------------------------------------------------------------------------
/imgs/OpenCV_logo_black.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/OpenCV_logo_black.webp


--------------------------------------------------------------------------------
/imgs/opencv_cuda_intel.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/opencv_cuda_intel.webp


--------------------------------------------------------------------------------
/nbs/imgs/proc_mon_filter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/proc_mon_filter.png


--------------------------------------------------------------------------------
/imgs/profile_1-e1674474909304.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/profile_1-e1674474909304.webp


--------------------------------------------------------------------------------
/imgs/vs_community_c++_workload.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/vs_community_c++_workload.png


--------------------------------------------------------------------------------
/nbs/misc/ProcmonConfiguration.pmc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/misc/ProcmonConfiguration.pmc


--------------------------------------------------------------------------------
/imgs/cropped-cuda_thread_blocks.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/cropped-cuda_thread_blocks.webp


--------------------------------------------------------------------------------
/imgs/cmake_visual_studio_install.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/imgs/cmake_visual_studio_install.webp


--------------------------------------------------------------------------------
/nbs/imgs/proc_mon_failed_search_nvidia.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/proc_mon_failed_search_nvidia.png


--------------------------------------------------------------------------------
/nbs/imgs/proc_mon_failed_search_opencv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cudawarped/opencv-experiments/HEAD/nbs/imgs/proc_mon_failed_search_opencv.png


--------------------------------------------------------------------------------
/404.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Page Not Found
3 | ---
4 | 
5 | The page you requested cannot be found (perhaps it was moved or renamed).
6 | 
7 | You may want to try searching to find the page's new location.


--------------------------------------------------------------------------------
/qmd/about.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "About"
 3 | about:
 4 |   template: jolla
 5 |   image: ..\imgs\profile.png
 6 |   links:
 7 |     - icon: github
 8 |       text: Github
 9 |       href: https://github.com/cudawarped/opencv-experiments/
10 | ---
11 | 
12 | Location for storing useful guides and notebooks
13 | 


--------------------------------------------------------------------------------
/index.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "OpenCV Guides"
 3 | listing:
 4 |   #image-height: 300px
 5 |   #image-placeholder: "../imgs/OpenCV_logo_black.webp"
 6 |   sort: "date desc"
 7 |   contents:
 8 |     - nbs/ImportError_dll_load_failed_while_importing_cv2.ipynb
 9 |     - nbs/opencv_cuda_streams_performance_python.ipynb
10 |     - qmd/opencv_cuda_python_windows.qmd
11 |     - qmd/opencv_cuda_performance.qmd
12 |     #- nbs\opencv_delay_jit_or_context_creation.ipynb
13 | ---


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | /.quarto/
107 | 


--------------------------------------------------------------------------------
/_quarto.yml:
--------------------------------------------------------------------------------
  1 | project:
  2 |   type: website
  3 |   resources:
  4 |       - CNAME
  5 |   render:
  6 |     - nbs/ImportError_dll_load_failed_while_importing_cv2.ipynb
  7 |     #- nbs/opencv4-cuda-streams.ipynb
  8 |     - nbs/opencv_cuda_streams_performance_python.ipynb
  9 |     #- nbs\opencv_delay_jit_or_context_creation.ipynb
 10 |     - "*.html"
 11 |     - "*.qmd"
 12 | 
 13 | 
 14 | website:
 15 |   title: cudawarped
 16 |   repo-url: https://github.com/cudawarped/opencv-experiments/
 17 |   repo-actions: [issue]
 18 |   favicon: imgs\favicon-32x32.png
 19 |   back-to-top-navigation: true
 20 |   google-analytics: "G-69X21EFRB2"
 21 |   open-graph: true
 22 |   navbar:
 23 |     tools:
 24 |     - icon: github
 25 |       href: https://github.com/cudawarped
 26 |     background: light
 27 |     logo: imgs\profile_1-e1674474909304.webp
 28 |     search: true
 29 |     left:
 30 |       - text: "About"
 31 |         file: qmd\about.qmd
 32 |       #- text: "Home"
 33 |       #  file: index.qmd
 34 |       - text: "OpenCV"
 35 |         file: index.qmd
 36 |       - text: "Downloads"
 37 |         #href: qmd/downloads.qmd
 38 |         menu:
 39 |           - text: "OpenCV"
 40 |             file: qmd/downloads.qmd
 41 |           - text: OpenCV Python CUDA wheels
 42 |             href: https://github.com/cudawarped/opencv-python-cuda-wheels/releases
 43 |           - text: OpenCV C++ CUDA builds 
 44 |             href: https://github.com/cudawarped/opencv_contrib/releases
 45 |       #    - https://github.com/cudawarped/opencv-python-cuda-wheels/releases
 46 |       #- text: "How-To"
 47 |       #  file: howto.qmd
 48 | 
 49 |   sidebar:
 50 |     - title: OpenCV
 51 |       style: "floating"
 52 |       #type: "dark"
 53 |       #background: light
 54 |       #contents: auto
 55 |       contents:
 56 |         - index.qmd
 57 |         - text: "Build OpenCV (including Python) with CUDA on Windows"
 58 |           file: qmd\opencv_cuda_python_windows.qmd
 59 |         - text: "Accelerate with CUDA streams in Python"
 60 |           file: nbs\opencv_cuda_streams_performance_python.ipynb
 61 |         - text: "ImportError: DLL load failed..."
 62 |           file: nbs\ImportError_dll_load_failed_while_importing_cv2.ipynb
 63 |         #- text: "OpenCV CUDA initialization delay"
 64 |         #  file: nbs\opencv_delay_jit_or_context_creation.ipynb
 65 |         - text: "CUDA Performance Comparisson"
 66 |           file: qmd\opencv_cuda_performance.qmd
 67 |       #  - text: "Import dll load failed while importing cv2"
 68 |       #    file: nbs\ImportError_dll_load_failed_while_importing_cv2.ipynb
 69 |         
 70 |         #- tutorial1.qmd
 71 |         #- tutorial2.qmd
 72 | 
 73 |     - title: "How-To"
 74 |       contents:
 75 |         - howto.qmd
 76 |         # navigation items
 77 | 
 78 |     - title: "Fundamentals"
 79 |       contents:
 80 |         - fundamentals.qmd
 81 |         # navigation items
 82 | 
 83 |     - title: "Reference"
 84 |       contents:
 85 |         - reference.qmd
 86 |         # navigation items
 87 | 
 88 | format:
 89 |   html:
 90 |     smooth-scroll: true
 91 |     theme: 
 92 |       light: cosmo
 93 |       dark: cyborg
 94 |     page-layout: full
 95 |     grid:
 96 |       sidebar-width: 330px
 97 |       body-width: 1000px
 98 |     #css: style.css
 99 |      #- custom.scss
100 |     #navbar: navbar-light
101 |     #backgroundcolor: red
102 |     #css: custom.scss
103 |     toc: false
104 |     link-external-newwindow: true
105 |     link-external-icon: false
106 |     linkcolor: "#76b900"
107 | 


--------------------------------------------------------------------------------
/nbs/cudacodec_videoReader_memory_usage.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "53117f25",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# `cudacodec.VideoReader` memory usage example"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "0adbfc27",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "The memory required by `cudacodec.VideoReader` is mainly influenced by the number of decode surfaces required as demonstrated in this example.  \n",
 17 |     "\n",
 18 |     "The minimum number of decode surfaces is determined by the video source and can be increased to increase decoding performance.\n",
 19 |     "\n",
 20 |     "Note: A [CUDA context](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#context) which requires several hundred MB of device memory needs to be created before any CUDA functions can be called.  Unfortunately the call to `cv.cuda.DeviceInfo()` will create the cuda context so there is no way to measure how much memory this allocates using the OpenCV API.\n",
 21 |     "\n",
 22 |     "This example uses python wheel from https://github.com/cudawarped/opencv-python-cuda-wheels/releases/tag/4.7.0.20221229"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 1,
 28 |    "id": "154b8b6a",
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import cv2 as cv\n",
 33 |     "import os\n",
 34 |     "import time\n",
 35 |     "b_to_mb = 2**20\n",
 36 |     "vid_root = os.environ['OPENCV_TEST_DATA_PATH'] + \"/cv/video/\""
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "id": "12f4db2a",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "## Create `cudacodec.VideoReader` using the default number of decode surfaces"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 2,
 50 |    "id": "7819f27c",
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# unfortunately this creates a cuda context therefore mb_free_start is the free memory after context creation\n",
 55 |     "device_info = cv.cuda.DeviceInfo()\n",
 56 |     "mb_free_start = device_info.freeMemory()/b_to_mb"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 3,
 62 |    "id": "9f5291b7",
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "videoReader = cv.cudacodec.createVideoReader(vid_root + \"1920x1080.avi\")"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 4,
 72 |    "id": "81d66b56",
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "mb_free_after_creation = device_info.freeMemory()/b_to_mb"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 5,
 82 |    "id": "4080e268",
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "# Get number of decode surfaces currently a frame needs to be processed before the format info is valid\n",
 87 |     "videoReader.grab()\n",
 88 |     "format_info = videoReader.format()"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 6,
 94 |    "id": "97570090",
 95 |    "metadata": {},
 96 |    "outputs": [
 97 |     {
 98 |      "name": "stdout",
 99 |      "output_type": "stream",
100 |      "text": [
101 |       "Total Memory:                            8191.50MB\n",
102 |       "Free Memory after context creation:      6999.00MB\n",
103 |       "Free Memory after creating video reader: 6973.41MB\n",
104 |       "25.59MB of internal memory when using 4 (1920x1088) decode surfaces\n"
105 |      ]
106 |     }
107 |    ],
108 |    "source": [
109 |     "mb_used = mb_free_start - mb_free_after_creation\n",
110 |     "print(f'Total Memory:                            {device_info.totalMemory()/b_to_mb:.2f}MB')\n",
111 |     "print(f'Free Memory after context creation:      {mb_free_start:.2f}MB')\n",
112 |     "print(f'Free Memory after creating video reader: {mb_free_after_creation:.2f}MB')\n",
113 |     "print(f'{mb_used:.2f}MB of internal memory when using {format_info.ulNumDecodeSurfaces} ({format_info.ulWidth}x{format_info.ulHeight}) decode surfaces')"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "id": "485293dd",
119 |    "metadata": {},
120 |    "source": [
121 |     "## Create `cudacodec.VideoReader` using twice as many decode surfaces"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 7,
127 |    "id": "f887489f",
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "params = cv.cudacodec.VideoReaderInitParams()\n",
132 |     "params.minNumDecodeSurfaces = format_info.ulNumDecodeSurfaces*2"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 8,
138 |    "id": "42733d60",
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "videoReader = cv.cudacodec.createVideoReader(vid_root + \"1920x1080.avi\",params=params)"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 9,
148 |    "id": "dc9904f1",
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "mb_used_double_sufaces = mb_free_start - device_info.freeMemory()/b_to_mb"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 10,
158 |    "id": "55bc7811",
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "videoReader.grab()\n",
163 |     "format_info = videoReader.format()\n",
164 |     "assert format_info.ulNumDecodeSurfaces == params.minNumDecodeSurfaces "
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 11,
170 |    "id": "c43fad5f",
171 |    "metadata": {},
172 |    "outputs": [
173 |     {
174 |      "name": "stdout",
175 |      "output_type": "stream",
176 |      "text": [
177 |       "Memory increase from doubling the number of decode surfaces: 62.52%\n"
178 |      ]
179 |     }
180 |    ],
181 |    "source": [
182 |     "print(f'Memory increase from doubling the number of decode surfaces: {100*(mb_used_double_sufaces - mb_used)/mb_used:.2f}%')"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "id": "26ca941d",
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": []
192 |   }
193 |  ],
194 |  "metadata": {
195 |   "kernelspec": {
196 |    "display_name": "Python 3 (ipykernel)",
197 |    "language": "python",
198 |    "name": "python3"
199 |   },
200 |   "language_info": {
201 |    "codemirror_mode": {
202 |     "name": "ipython",
203 |     "version": 3
204 |    },
205 |    "file_extension": ".py",
206 |    "mimetype": "text/x-python",
207 |    "name": "python",
208 |    "nbconvert_exporter": "python",
209 |    "pygments_lexer": "ipython3",
210 |    "version": "3.9.13"
211 |   },
212 |   "toc": {
213 |    "base_numbering": 1,
214 |    "nav_menu": {},
215 |    "number_sections": true,
216 |    "sideBar": true,
217 |    "skip_h1_title": false,
218 |    "title_cell": "Table of Contents",
219 |    "title_sidebar": "Contents",
220 |    "toc_cell": false,
221 |    "toc_position": {},
222 |    "toc_section_display": true,
223 |    "toc_window_display": false
224 |   },
225 |   "varInspector": {
226 |    "cols": {
227 |     "lenName": 16,
228 |     "lenType": 16,
229 |     "lenVar": 40
230 |    },
231 |    "kernels_config": {
232 |     "python": {
233 |      "delete_cmd_postfix": "",
234 |      "delete_cmd_prefix": "del ",
235 |      "library": "var_list.py",
236 |      "varRefreshCmd": "print(var_dic_list())"
237 |     },
238 |     "r": {
239 |      "delete_cmd_postfix": ") ",
240 |      "delete_cmd_prefix": "rm(",
241 |      "library": "var_list.r",
242 |      "varRefreshCmd": "cat(var_dic_list()) "
243 |     }
244 |    },
245 |    "types_to_exclude": [
246 |     "module",
247 |     "function",
248 |     "builtin_function_or_method",
249 |     "instance",
250 |     "_Feature"
251 |    ],
252 |    "window_display": false
253 |   }
254 |  },
255 |  "nbformat": 4,
256 |  "nbformat_minor": 5
257 | }
258 | 


--------------------------------------------------------------------------------
/nbs/resize.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 94,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import cv2\n",
 10 |     "import numpy as np\n",
 11 |     "import time"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 110,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "data": {
 21 |       "text/plain": [
 22 |        "'4.5.0'"
 23 |       ]
 24 |      },
 25 |      "execution_count": 110,
 26 |      "metadata": {},
 27 |      "output_type": "execute_result"
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "cv2.__version__"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "# OpenCV Resize"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 95,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "img = np.random.random((2048, 2048,3)).astype(np.uint8)"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "## Without pre-alloc"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 96,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "resize_width = 1024\n",
 64 |     "resize_height = 1024 \n",
 65 |     "loop_cnt = 100000"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {},
 71 |    "source": [
 72 |     "### CPU"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 97,
 78 |    "metadata": {},
 79 |    "outputs": [
 80 |     {
 81 |      "name": "stdout",
 82 |      "output_type": "stream",
 83 |      "text": [
 84 |       "cpu time: 952.11 us\n"
 85 |      ]
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "time_start = time.time() \n",
 90 |     "for _ in range(loop_cnt): \n",
 91 |     "    resize_img = cv2.resize(img, (resize_height, resize_width), interpolation=cv2.INTER_LINEAR) \n",
 92 |     "print('cpu time: {:.2f} us'.format((time.time() - time_start) * 1e6 / loop_cnt))"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "### GPU"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 98,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": [
108 |     "device_img = cv2.cuda_GpuMat()"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "#### Upload/Dload for single GPU operation"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 99,
121 |    "metadata": {},
122 |    "outputs": [
123 |     {
124 |      "name": "stdout",
125 |      "output_type": "stream",
126 |      "text": [
127 |       "gpu time: 3926.30 us\n"
128 |      ]
129 |     }
130 |    ],
131 |    "source": [
132 |     "time_start = time.time() \n",
133 |     "for _ in range(loop_cnt): \n",
134 |     "    device_img.upload(img) \n",
135 |     "    resize_device_img = cv2.cuda.resize(device_img, (resize_height, resize_width),interpolation=cv2.INTER_LINEAR)\n",
136 |     "    resize_img = resize_device_img.download()\n",
137 |     "print('gpu time: {:.2f} us'.format((time.time() - time_start) * 1e6 / loop_cnt)) "
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "#### Normal operation"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 100,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": [
153 |     "device_img = cv2.cuda_GpuMat(img)"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 101,
159 |    "metadata": {},
160 |    "outputs": [
161 |     {
162 |      "name": "stdout",
163 |      "output_type": "stream",
164 |      "text": [
165 |       "gpu time: 460.10 us\n"
166 |      ]
167 |     }
168 |    ],
169 |    "source": [
170 |     "time_start = time.time() \n",
171 |     "for _ in range(loop_cnt): \n",
172 |     "    resize_device_img = cv2.cuda.resize(device_img, (resize_height, resize_width),interpolation=cv2.INTER_LINEAR)\n",
173 |     "print('gpu time: {:.2f} us'.format((time.time() - time_start) * 1e6 / loop_cnt)) "
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "markdown",
178 |    "metadata": {},
179 |    "source": [
180 |     "## With pre-alloc"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "markdown",
185 |    "metadata": {},
186 |    "source": [
187 |     "### CPU"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": 102,
193 |    "metadata": {},
194 |    "outputs": [],
195 |    "source": [
196 |     "resize_img = np.zeros((resize_height, resize_width,3),dtype=np.uint8)"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": 103,
202 |    "metadata": {},
203 |    "outputs": [
204 |     {
205 |      "name": "stdout",
206 |      "output_type": "stream",
207 |      "text": [
208 |       "cpu time: 390.59 us\n"
209 |      ]
210 |     }
211 |    ],
212 |    "source": [
213 |     "time_start = time.time() \n",
214 |     "for _ in range(loop_cnt): \n",
215 |     "    cv2.resize(img, (resize_height, resize_width), resize_img, interpolation=cv2.INTER_LINEAR)\n",
216 |     "print('cpu time: {:.2f} us'.format((time.time() - time_start) * 1e6 / loop_cnt)) "
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "markdown",
221 |    "metadata": {},
222 |    "source": [
223 |     "### GPU"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": 111,
229 |    "metadata": {},
230 |    "outputs": [],
231 |    "source": [
232 |     "device_img = cv2.cuda_GpuMat(img)\n",
233 |     "resize_device_img = cv2.cuda_GpuMat(resize_img)"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 112,
239 |    "metadata": {},
240 |    "outputs": [
241 |     {
242 |      "name": "stdout",
243 |      "output_type": "stream",
244 |      "text": [
245 |       "gpu time: 87.67 us\n"
246 |      ]
247 |     }
248 |    ],
249 |    "source": [
250 |     "time_start = time.time() \n",
251 |     "for _ in range(loop_cnt):     \n",
252 |     "    cv2.cuda.resize(device_img, (resize_height, resize_width),resize_device_img,interpolation=cv2.INTER_LINEAR)\n",
253 |     "print('gpu time: {:.2f} us'.format((time.time() - time_start) * 1e6 / loop_cnt)) "
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "markdown",
258 |    "metadata": {},
259 |    "source": [
260 |     "#### Check timer"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": 106,
266 |    "metadata": {},
267 |    "outputs": [
268 |     {
269 |      "name": "stdout",
270 |      "output_type": "stream",
271 |      "text": [
272 |       "89.9 µs ± 1.53 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
273 |      ]
274 |     }
275 |    ],
276 |    "source": [
277 |     "%timeit cv2.cuda.resize(device_img, (resize_height, resize_width),dst=resize_device_img,interpolation=cv2.INTER_LINEAR)"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "markdown",
282 |    "metadata": {},
283 |    "source": [
284 |     "#### Streams - unrealistic without pause, check for GPU saturation"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "code",
289 |    "execution_count": 107,
290 |    "metadata": {},
291 |    "outputs": [],
292 |    "source": [
293 |     "stream = cv.cuda_Stream()"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 108,
299 |    "metadata": {},
300 |    "outputs": [
301 |     {
302 |      "name": "stdout",
303 |      "output_type": "stream",
304 |      "text": [
305 |       "44.3 µs ± 377 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
306 |      ]
307 |     }
308 |    ],
309 |    "source": [
310 |     "%timeit cv2.cuda.resize(device_img, (resize_height, resize_width),dst=resize_device_img,interpolation=cv2.INTER_LINEAR, stream=stream)"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": null,
316 |    "metadata": {},
317 |    "outputs": [],
318 |    "source": []
319 |   }
320 |  ],
321 |  "metadata": {
322 |   "kernelspec": {
323 |    "display_name": "Python 3",
324 |    "language": "python",
325 |    "name": "python3"
326 |   },
327 |   "language_info": {
328 |    "codemirror_mode": {
329 |     "name": "ipython",
330 |     "version": 3
331 |    },
332 |    "file_extension": ".py",
333 |    "mimetype": "text/x-python",
334 |    "name": "python",
335 |    "nbconvert_exporter": "python",
336 |    "pygments_lexer": "ipython3",
337 |    "version": "3.8.1"
338 |   }
339 |  },
340 |  "nbformat": 4,
341 |  "nbformat_minor": 4
342 | }
343 | 


--------------------------------------------------------------------------------
/qmd/opencv_cuda_performance.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "OpenCV CUDA Performance Comparison (Nvidia vs Intel)"
 3 | toc: true
 4 | date: 02/28/18
 5 | aliases:
 6 |   - ../opencv-3-4-gpu-cuda-performance-comparison-nvidia-vs-intel
 7 |   - ../opencv-3-4-gpu-cuda-perform
 8 | ---
 9 | 
10 | # Introduction
11 | 
12 | In this post I am going to use the OpenCV's performance tests to compare the CUDA and CPU implementations. The idea, is to get an indication of which OpenCV and/or Computer Vision algorithms, in general, benefit the most from GPU acceleration, and therefore, under what circumstances it might be a good idea to invest in a GPU.
13 | 
14 | # Test setup
15 | - **Software**: OpenCV 3.4 compiled on Visual Studio 2017 with CUDA 9.1, Intel MKL with TBB, and TBB. To generate the CPU results I simply ran the CUDA performance tests with CUDA disabled, so that the fall back CPU functions were called, by changing the following
16 | 
17 | 	```{.cpp}
18 | 	#define PERF_RUN_CUDA()  :perf::GpuPerf::targetDevice()
19 | 	```
20 | 
21 | 	to
22 | 
23 | 	```{.cpp}
24 | 	#define PERF_RUN_CUDA()  false
25 | 	```
26 | 
27 | 	in [modules\\ts\\include\\opencv2\\ts\\ts_perf.hpp](https://github.com/opencv/opencv/blob/6d4f66472e14b29b8e1623859cfebfdc67f677c3/modules/ts/include/opencv2/ts/ts_perf.hpp#L228).
28 | 
29 | 	The performance tests cover 104 of the OpenCV functions, with each function being tested for a number of different configurations (function arguments).  The total number of different CUDA performance configurations/tests which run successfully are 6031, of which only 5300 configurations are supported by both the GPU and CPU.
30 | - **Hardware**: Four different hardware configurations were tested, consisting of 3 laptops and 1 desktop, the CPU/GPU combinations are listed below:
31 | 
32 |   1) CPU: <a href="https://ark.intel.com/products/81016/Intel-Core-i5-4210U-Processor-3M-Cache-up-to-2_70-GHz" rel="noopener" target="_blank">i5-4120U</a>, GPU: <a href="https://www.geforce.co.uk/hardware/notebook-gpus/geforce-gt-730m" rel="noopener" target="_blank">730m</a> (laptop)
33 | 	2) CPU: <a href="https://ark.intel.com/products/85212/Intel-Core-i5-5200U-Processor-3M-Cache-up-to-2_70-GHz" rel="noopener" target="_blank">i5-5200U</a>, GPU: <a href="https://www.geforce.com/hardware/notebook-gpus/geforce-840m" rel="noopener" target="_blank">840m</a> (laptop)
34 | 	3) CPU: <a href="https://ark.intel.com/products/88967/Intel-Core-i7-6700HQ-Processor-6M-Cache-up-to-3_50-GHz" rel="noopener" target="_blank">i7-6700HQ</a>, GPU: <a href="https://www.geforce.co.uk/hardware/notebook-gpus/geforce-gtx-980m" rel="noopener" target="_blank">GTX 980m</a> (laptop)
35 | 	4) CPU: <a href="https://ark.intel.com/products/88184/Intel-Core-i5-6500-Processor-6M-Cache-up-to-3_60-GHz" rel="noopener" target="_blank">i5-6500</a>, GPU: <a href="https://www.geforce.co.uk/hardware/10series/geforce-gtx-1060/" rel="noopener" target="_blank">GTX 1060</a> (desktop)
36 | 
37 | 
38 | ## GPU specifications
39 | The GPU's tested comprise three different micro-architectures, ranging from a low end laptop (730m) to a mid range desktop (GTX 1060) GPU.  The full specifications are shown below, where I have also included the maximum theoretical speedup, if the OpenCV function were bandwidth or compute limited.  This value is just included to give an indication of what should be possible if architectural improvements, SM count etc. don't have any impact on performance.  In "general" most algorithms will be bandwidth limited implying that the average speed up of the OpenCV functions could be somewhere between these two values. If you are not familiar with this concept then I would recommend watching <a href="http://on-demand.gputechconf.com/gtc/2015/video/S5353.html" rel="noopener" target="_blank">Memory Bandwidth Bootcamp: Best Practices</a>, <a href="http://on-demand.gputechconf.com/gtc/2015/video/S5376.html" rel="noopener" target="_blank">Memory Bandwidth Bootcamp: Beyond Best Practices</a> and <a href="http://on-demand.gputechconf.com/gtc/2016/video/S6181.html" rel="noopener" target="_blank">Memory Bandwidth Bootcamp: Collaborative Access Patterns</a> by <a href="https://www.linkedin.com/in/tscudiero" rel="noopener" target="_blank">Tony Scudiero</a> for a good overview.
40 | 
41 | 
42 | ::: {.column-screen}
43 | [![](../imgs/gpu_spec-2.webp)](https://docs.google.com/spreadsheets/d/e/2PACX-1vTpHmM6T6sCkJUrr6UEVX2V9MRPUs-oD_z979UiGEixOmLaF1qZez_H3EWC96XYXuf4_i2WlhM0g-rz/pubhtml?gid=1232846262&amp;single=true){fig-align="center"}
44 | :::
45 | 
46 | # CPU specifications
47 | The CPU's tested also comprise three different micro-architectures, ranging from a low end laptop dual core (i5-4120U) to a mid range desktop quad core (i5-6500) CPU.  The full specifications are shown below, where I have again included the maximum theoretical speedup depending on whether the OpenCV functions are limited by the CPU bandwidth or clock speed (I could not find any Intel published GFLOPS information).
48 | 
49 | ::: {.column-screen}
50 | [![](../imgs/cpu_spec-2.webp)](https://docs.google.com/spreadsheets/d/e/2PACX-1vTpHmM6T6sCkJUrr6UEVX2V9MRPUs-oD_z979UiGEixOmLaF1qZez_H3EWC96XYXuf4_i2WlhM0g-rz/pubhtml?gid=921370251&amp;single=true){fig-align="center"}
51 | :::
52 | 
53 | # Benchmark results
54 | The results for all tests are available  [here](https://docs.google.com/spreadsheets/d/e/2PACX-1vTpHmM6T6sCkJUrr6UEVX2V9MRPUs-oD_z979UiGEixOmLaF1qZez_H3EWC96XYXuf4_i2WlhM0g-rz/pubhtml?gid=0&amp;single=true), where you can check if a specific configuration benefits from an improvement in performance when moved to the GPU.
55 | 
56 | To get an overall picture of the performance increase which can be achieved from using the CUDA functions over the standard CPU ones, the speedup of each CPU/GPU over the least powerful CPU (i5_4210U), is compared.  The below figure shows the speedup averaged over all 5300 tests (All Configs).  Because the average speedup is influenced by the number of different configurations tested per OpenCV function, two additional measures are also shown (which only consider one configuration per function) on the below figure:
57 | <ul>
58 |  	<li>GPU Min - the average speedup, taken over all OpenCV functions for the configuration where the GPU speedup was smallest.</li>
59 |  	<li>GPU Max - the average speedup, taken over all OpenCV functions for the configuration where the GPU speedup was greatest.</li>
60 | </ul>
61 | <a id="average_perf_increase"></a>
62 | <a href="https://docs.google.com/spreadsheets/d/e/2PACX-1vTpHmM6T6sCkJUrr6UEVX2V9MRPUs-oD_z979UiGEixOmLaF1qZez_H3EWC96XYXuf4_i2WlhM0g-rz/pubchart?oid=396876054&amp;format=interactive" rel="noopener" target="_blank"><img src="https://docs.google.com/spreadsheets/d/e/2PACX-1vTpHmM6T6sCkJUrr6UEVX2V9MRPUs-oD_z979UiGEixOmLaF1qZez_H3EWC96XYXuf4_i2WlhM0g-rz/pubchart?oid=396876054&amp;format=image" alt=""></a>
63 | 
64 | The results demonstrate that the configuration (function arguments), makes a massive difference to the CPU/GPU performance.  That said even the slowest configurations on the slowest GPU's are in the same ball park, performance wise, as the fastest configurations on the most powerful CPU's in the test.  This combined with a higher average performance for all GPU's tested, implies that you should nearly always see an improvement when moving to the GPU, if you have several OpenCV functions in your pipeline (as long as you don't keep moving your data to and from the GPU), even if you are using a low end two generation old laptop GPU (730m).
65 | 
66 | Now lets examine some individual OpenCV functions.  Because each function has many configurations, for each function the average execution time over all configurations tested, is used to calculate the speedup over the i5-4120U.  This will provides a guide to the expected performance of a function irrespective of the  specific configuration.  The next figure shows the top 20 functions where the GPU speedup, was largest.  It is worth noting that the speedup of the GTX 1060 over all of the CPU's is so large that it has to be shown on a log scale.
67 | <a id="top_20"></a>
68 | <a href="https://docs.google.com/spreadsheets/d/e/2PACX-1vTpHmM6T6sCkJUrr6UEVX2V9MRPUs-oD_z979UiGEixOmLaF1qZez_H3EWC96XYXuf4_i2WlhM0g-rz/pubchart?oid=1721545249&amp;format=interactive" rel="noopener" target="_blank"><img src="https://docs.google.com/spreadsheets/d/e/2PACX-1vTpHmM6T6sCkJUrr6UEVX2V9MRPUs-oD_z979UiGEixOmLaF1qZez_H3EWC96XYXuf4_i2WlhM0g-rz/pubchart?oid=1721545249&amp;format=image" alt="" class="alignnone size-full wp-image-547"></a>
69 | 
70 | <a id="bottom_20"></a>
71 | Next, the bottom 20 functions where the GPU speedup, was smallest.
72 | <a href="https://docs.google.com/spreadsheets/d/e/2PACX-1vTpHmM6T6sCkJUrr6UEVX2V9MRPUs-oD_z979UiGEixOmLaF1qZez_H3EWC96XYXuf4_i2WlhM0g-rz/pubchart?oid=289329247&amp;format=interactive" rel="noopener" target="_blank"><img src="https://docs.google.com/spreadsheets/d/e/2PACX-1vTpHmM6T6sCkJUrr6UEVX2V9MRPUs-oD_z979UiGEixOmLaF1qZez_H3EWC96XYXuf4_i2WlhM0g-rz/pubchart?oid=289329247&amp;format=image" alt="" class="alignnone size-full wp-image-547"></a>
73 | 
74 | The above figure demonstrates that, although the CUDA implementations are on average much quicker, some functions are significantly quicker on the CPU.  Generally this is due to the function using the Intel Integrated Performance Primitives for Image processing and Computer Vision (<a href="https://software.intel.com/en-us/articles/intel-integrated-performance-primitives-intel-ipp-open-source-computer-vision-library-opencv-faq" rel="noopener" target="_blank">IPP-ICV</a>) and/or <a href="https://en.wikipedia.org/wiki/SIMD" rel="noopener" target="_blank">SIMD</a> instructions.  That said the above results also show, that some of these slower functions, do benefit from the parallelism of the GPU, but a more powerful GPU is required to leverage this.
75 | 
76 | Finally lets examine which OpenCV functions took the longest.  This is importanti f you are using one of these functions, as you may consider calling its CUDA counterpart, even if it is the only OpenCV function you need.  The below figure contains the execution time for the 20 functions which took the longest on the i5-4120U, again this has to be shown on a log scale because the GPU execution time is much smaller than the CPU execution time.
77 | 
78 | <a id="slowest"></a>
79 | <a href="https://docs.google.com/spreadsheets/d/e/2PACX-1vTpHmM6T6sCkJUrr6UEVX2V9MRPUs-oD_z979UiGEixOmLaF1qZez_H3EWC96XYXuf4_i2WlhM0g-rz/pubchart?oid=675587392&amp;format=interactive" rel="noopener" target="_blank"><img src="https://docs.google.com/spreadsheets/d/e/2PACX-1vTpHmM6T6sCkJUrr6UEVX2V9MRPUs-oD_z979UiGEixOmLaF1qZez_H3EWC96XYXuf4_i2WlhM0g-rz/pubchart?oid=675587392&amp;format=image" alt="" class="alignnone size-full wp-image-547"></a>
80 | 
81 | Given the possible performance increases shown in the results, if you were performing mean shift filtering with OpenCV, on a laptop with only low end i5-4120U, the execution time of nearly 7 seconds may encourage you to upgrade your hardware.  From the above it is clear that it is much better to invest in a tiny GPU (730m) which will reduce your processing time by a factor of 10 to a more tolerable 0.6 seconds, or a mid range GPU (GTX 1060), reducing your processing time by a factor of 100 to 0.07 seconds, rather than a mid range i7 which will give you less than a 30% reduction.
82 | 
83 | To conclude I would just reiterate that, the benefit you will get from moving your processing to the GPU with OpenCV will depend on the function you call and configuration that you use, in addition to your processing pipeline.  That said from, what I have observed, on average the CUDA functions are much much quicker than their CPU counterparts.  Please let me know if there are any mistakes in my results and/or analysis.


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/nbs/opencv4-sparse-optical-flow.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## CUDA python implementation of [py_lucas_kanade](https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_video/py_lucas_kanade/py_lucas_kanade.html) example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 8,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "data": {
 17 |       "text/html": [
 18 |        "<style>#notebook { padding-top:0px !important; } .container { width:100% !important; } .end_space { min-height:0px !important; } </style>"
 19 |       ],
 20 |       "text/plain": [
 21 |        "<IPython.core.display.HTML object>"
 22 |       ]
 23 |      },
 24 |      "metadata": {},
 25 |      "output_type": "display_data"
 26 |     }
 27 |    ],
 28 |    "source": [
 29 |     "import numpy as np\n",
 30 |     "import os\n",
 31 |     "import cv2\n",
 32 |     "import time\n",
 33 |     "from IPython.core.display import display, HTML\n",
 34 |     "display(HTML(\"<style>\"\n",
 35 |     "    + \"#notebook { padding-top:0px !important; } \" \n",
 36 |     "    + \".container { width:100% !important; } \"\n",
 37 |     "    + \".end_space { min-height:0px !important; } \"\n",
 38 |     "    + \"</style>\"))\n",
 39 |     "\n",
 40 |     "# params for ShiTomasi corner detection\n",
 41 |     "feature_params = dict( maxCorners = 100,\n",
 42 |     "                       qualityLevel = 0.3,\n",
 43 |     "                       minDistance = 7,\n",
 44 |     "                       blockSize = 7 )\n",
 45 |     "\n",
 46 |     "# Parameters for lucas kanade optical flow\n",
 47 |     "lk_params = dict( winSize  = (15,15),\n",
 48 |     "                  maxLevel = 2,\n",
 49 |     "                  criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))\n",
 50 |     "\n",
 51 |     "vidPath = os.environ['OPENCV_TEST_DATA_PATH'] + '/cv/video/768x576.avi'"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "## Original CPU implementation"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 10,
 64 |    "metadata": {},
 65 |    "outputs": [
 66 |     {
 67 |      "ename": "error",
 68 |      "evalue": "OpenCV(4.1.1) E:\\Dev\\Repos\\OpenCV\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
 69 |      "output_type": "error",
 70 |      "traceback": [
 71 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
 72 |       "\u001b[1;31merror\u001b[0m                                     Traceback (most recent call last)",
 73 |       "\u001b[1;32m<ipython-input-10-33b49addccc6>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     14\u001b[0m \u001b[1;32mwhile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     15\u001b[0m     \u001b[0mret\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mframe\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcap\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 16\u001b[1;33m     \u001b[0mframe_gray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcv2\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcvtColor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mframe\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcv2\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mCOLOR_BGR2GRAY\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     17\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     18\u001b[0m     \u001b[1;31m# calculate optical flow\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 74 |       "\u001b[1;31merror\u001b[0m: OpenCV(4.1.1) E:\\Dev\\Repos\\OpenCV\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n"
 75 |      ]
 76 |     }
 77 |    ],
 78 |    "source": [
 79 |     "cap = cv2.VideoCapture(vidPath)\n",
 80 |     "\n",
 81 |     "# Create some random colors\n",
 82 |     "color = np.random.randint(0,255,(100,3))\n",
 83 |     "\n",
 84 |     "# Take first frame and find corners in it\n",
 85 |     "ret, old_frame = cap.read()\n",
 86 |     "old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)\n",
 87 |     "p0 = cv2.goodFeaturesToTrack(old_gray, mask = None, **feature_params)\n",
 88 |     "\n",
 89 |     "# Create a mask image for drawing purposes\n",
 90 |     "mask = np.zeros_like(old_frame)\n",
 91 |     "etime = 0\n",
 92 |     "while(1):\n",
 93 |     "    ret,frame = cap.read()\n",
 94 |     "    frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)\n",
 95 |     "\n",
 96 |     "    # calculate optical flow\n",
 97 |     "    t = time.perf_counter()\n",
 98 |     "    p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)\n",
 99 |     "    etime += (time.perf_counter() - t)\n",
100 |     "    \n",
101 |     "    # Select good points\n",
102 |     "    good_new = p1[st==1]\n",
103 |     "    good_old = p0[st==1]\n",
104 |     "\n",
105 |     "    # draw the tracks\n",
106 |     "    for i,(new,old) in enumerate(zip(good_new,good_old)):\n",
107 |     "        a,b = new.ravel()\n",
108 |     "        c,d = old.ravel()\n",
109 |     "        mask = cv2.line(mask, (a,b),(c,d), color[i].tolist(), 2)\n",
110 |     "        frame = cv2.circle(frame,(a,b),5,color[i].tolist(),-1)\n",
111 |     "    img = cv2.add(frame,mask)\n",
112 |     "\n",
113 |     "    cv2.imshow('frame',img)\n",
114 |     "    k = cv2.waitKey(30) & 0xff\n",
115 |     "    if k == 27:\n",
116 |     "        break\n",
117 |     "\n",
118 |     "    # Now update the previous frame and previous points\n",
119 |     "    old_gray = frame_gray.copy()\n",
120 |     "    p0 = good_new.reshape(-1,1,2)\n",
121 |     "\n",
122 |     "cv2.destroyAllWindows()\n",
123 |     "cap.release()"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 11,
129 |    "metadata": {},
130 |    "outputs": [
131 |     {
132 |      "data": {
133 |       "text/plain": [
134 |        "0.16676009999997632"
135 |       ]
136 |      },
137 |      "execution_count": 11,
138 |      "metadata": {},
139 |      "output_type": "execute_result"
140 |     }
141 |    ],
142 |    "source": [
143 |     "etime"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "markdown",
148 |    "metadata": {},
149 |    "source": [
150 |     "## Naive CUDA implementation without pre-alloc, streams or other optimizations"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 15,
156 |    "metadata": {},
157 |    "outputs": [
158 |     {
159 |      "ename": "error",
160 |      "evalue": "OpenCV(4.1.1) E:\\Dev\\Repos\\OpenCV\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
161 |      "output_type": "error",
162 |      "traceback": [
163 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
164 |       "\u001b[1;31merror\u001b[0m                                     Traceback (most recent call last)",
165 |       "\u001b[1;32m<ipython-input-15-f2b109fe3ac3>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     18\u001b[0m \u001b[1;32mwhile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     19\u001b[0m     \u001b[0mret\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mframe\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcap\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 20\u001b[1;33m     \u001b[0mframe_gray_device\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupload\u001b[0m\u001b[1;33m(\u001b[0m \u001b[0mcv2\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcvtColor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mframe\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcv2\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mCOLOR_BGR2GRAY\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     21\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     22\u001b[0m     \u001b[1;31m# calculate optical flow\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
166 |       "\u001b[1;31merror\u001b[0m: OpenCV(4.1.1) E:\\Dev\\Repos\\OpenCV\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n"
167 |      ]
168 |     }
169 |    ],
170 |    "source": [
171 |     "cap = cv2.VideoCapture(vidPath)\n",
172 |     "\n",
173 |     "# Take first frame and find corners in it\n",
174 |     "ret, old_frame = cap.read()\n",
175 |     "old_gray_device = cv2.cuda_GpuMat(cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY))\n",
176 |     "detector_device =  cv2.cuda.createGoodFeaturesToTrackDetector(cv2.CV_8UC1, feature_params['maxCorners'], \\\n",
177 |     "                                           feature_params['qualityLevel'], feature_params['minDistance'], \\\n",
178 |     "                                           feature_params['blockSize'])\n",
179 |     "p0_device = detector_device.detect(old_gray_device)\n",
180 |     "\n",
181 |     "optFlow = cv2.cuda_SparsePyrLKOpticalFlow.create()\n",
182 |     "\n",
183 |     "# Create a mask image for drawing purposes\n",
184 |     "mask = np.zeros_like(old_frame)\n",
185 |     "frame_gray_device = cv2.cuda_GpuMat()\n",
186 |     "p0 = p0_device.download()\n",
187 |     "etime = 0\n",
188 |     "while(1):\n",
189 |     "    ret,frame = cap.read()\n",
190 |     "    frame_gray_device.upload( cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))    \n",
191 |     "\n",
192 |     "    # calculate optical flow\n",
193 |     "    t = time.perf_counter()\n",
194 |     "    p1_device, st_device, err = optFlow.calc(old_gray_device,frame_gray_device,p0_device,None)\n",
195 |     "    etime += (time.perf_counter() - t)\n",
196 |     "    \n",
197 |     "    # dload points\n",
198 |     "    p1 = p1_device.download()\n",
199 |     "    st = st_device.download()\n",
200 |     "        \n",
201 |     "    # Select good points\n",
202 |     "    good_new = p1[st==1]\n",
203 |     "    good_old = p0[st==1]\n",
204 |     "\n",
205 |     "    # draw the tracks\n",
206 |     "    for i,(new,old) in enumerate(zip(good_new,good_old)):\n",
207 |     "        a,b = new.ravel()\n",
208 |     "        c,d = old.ravel()\n",
209 |     "        mask = cv2.line(mask, (a,b),(c,d), color[i].tolist(), 2)\n",
210 |     "        frame = cv2.circle(frame,(a,b),5,color[i].tolist(),-1)\n",
211 |     "    img = cv2.add(frame,mask)\n",
212 |     "\n",
213 |     "    cv2.imshow('frame',img)\n",
214 |     "    k = cv2.waitKey(30) & 0xff\n",
215 |     "    if k == 27:\n",
216 |     "        break\n",
217 |     "\n",
218 |     "    # Now update the previous frame and previous points\n",
219 |     "    frame_gray_device.copyTo(old_gray_device)\n",
220 |     "    p0 = np.expand_dims(good_new,axis=0)\n",
221 |     "    p0_device.upload(p0)\n",
222 |     "\n",
223 |     "cv2.destroyAllWindows()\n",
224 |     "cap.release()"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": 17,
230 |    "metadata": {},
231 |    "outputs": [
232 |     {
233 |      "data": {
234 |       "text/plain": [
235 |        "0.10651610000002165"
236 |       ]
237 |      },
238 |      "execution_count": 17,
239 |      "metadata": {},
240 |      "output_type": "execute_result"
241 |     }
242 |    ],
243 |    "source": [
244 |     "etime"
245 |    ]
246 |   }
247 |  ],
248 |  "metadata": {
249 |   "kernelspec": {
250 |    "display_name": "Python 3",
251 |    "language": "python",
252 |    "name": "python3"
253 |   },
254 |   "language_info": {
255 |    "codemirror_mode": {
256 |     "name": "ipython",
257 |     "version": 3
258 |    },
259 |    "file_extension": ".py",
260 |    "mimetype": "text/x-python",
261 |    "name": "python",
262 |    "nbconvert_exporter": "python",
263 |    "pygments_lexer": "ipython3",
264 |    "version": "3.7.3"
265 |   }
266 |  },
267 |  "nbformat": 4,
268 |  "nbformat_minor": 2
269 | }
270 | 


--------------------------------------------------------------------------------
/nbs/cuda_optimization_test.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import cv2\n",
 10 |     "import numpy as np\n",
 11 |     "import time\n",
 12 |     "import GPUtil\n",
 13 |     "import platform\n",
 14 |     "import cpuinfo"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {},
 21 |    "outputs": [
 22 |     {
 23 |      "name": "stdout",
 24 |      "output_type": "stream",
 25 |      "text": [
 26 |       "4.5.2\n",
 27 |       "Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz\n",
 28 |       "GeForce RTX 2080\n"
 29 |      ]
 30 |     }
 31 |    ],
 32 |    "source": [
 33 |     "gpus = GPUtil.getGPUs()\n",
 34 |     "print(cv2.__version__)\n",
 35 |     "print(cpuinfo.get_cpu_info()['brand_raw'])\n",
 36 |     "print(gpus[0].name)"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "# OpenCV without optimization"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 3,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "im = np.random.random((4000, 4000,3)).astype(np.uint8)\n",
 53 |     "loop_cnt = 1000"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "## CPU"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 4,
 66 |    "metadata": {},
 67 |    "outputs": [
 68 |     {
 69 |      "name": "stdout",
 70 |      "output_type": "stream",
 71 |      "text": [
 72 |       "cpu time: 15007.12 us\n"
 73 |      ]
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "start_t = time.time()\n",
 78 |     "for _ in range(loop_cnt):    \n",
 79 |     "    gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)\n",
 80 |     "    retval,thr = cv2.threshold(gray,128,255,cv2.THRESH_BINARY)\n",
 81 |     "    morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n",
 82 |     "    morph = cv2.dilate(thr,morph_kernel)\n",
 83 |     "    morph = cv2.resize(morph,(640,480))    \n",
 84 |     "print('cpu time: {:.2f} us'.format((time.time() - start_t) * 1e6 / loop_cnt))"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "### Pre-alloc return arrays and remove constant ops"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 5,
 97 |    "metadata": {},
 98 |    "outputs": [
 99 |     {
100 |      "name": "stdout",
101 |      "output_type": "stream",
102 |      "text": [
103 |       "cpu time: 12000.67 us\n"
104 |      ]
105 |     }
106 |    ],
107 |    "source": [
108 |     "# use ones from above except morph\n",
109 |     "morph_sm = np.empty((480,640),np.uint8)\n",
110 |     "morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n",
111 |     "start_t = time.time()\n",
112 |     "for _ in range(loop_cnt):    \n",
113 |     "    cv2.cvtColor(im, cv2.COLOR_BGR2GRAY, gray)\n",
114 |     "    cv2.threshold(gray,128,255,cv2.THRESH_BINARY,thr)    \n",
115 |     "    cv2.dilate(thr,morph_kernel,morph)\n",
116 |     "    cv2.resize(morph,(640,480),morph_sm)\n",
117 |     "cpu_time = (time.time() - start_t) * 1e6 / loop_cnt\n",
118 |     "print('cpu time: {:.2f} us'.format(cpu_time))"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "## GPU"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 7,
131 |    "metadata": {},
132 |    "outputs": [
133 |     {
134 |      "name": "stdout",
135 |      "output_type": "stream",
136 |      "text": [
137 |       "gpu time: 16019.12 us\n"
138 |      ]
139 |     }
140 |    ],
141 |    "source": [
142 |     "start_t = time.time()\n",
143 |     "for _ in range(loop_cnt):\n",
144 |     "    gpu_frame = cv2.cuda_GpuMat()\n",
145 |     "    gpu_frame.upload(im)\n",
146 |     "    gpu_gray = cv2.cuda.cvtColor(gpu_frame, cv2.COLOR_BGR2GRAY)\n",
147 |     "    retval,gpu_thr = cv2.cuda.threshold(gpu_gray,128,255,cv2.THRESH_BINARY)\n",
148 |     "    morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n",
149 |     "    morph_filter = cv2.cuda.createMorphologyFilter(cv2.MORPH_DILATE,cv2.CV_8U,morph_kernel)\n",
150 |     "    gpu_morph = morph_filter.apply(gpu_thr)\n",
151 |     "    gpu_morph = cv2.cuda.resize(gpu_morph,(640,480))\n",
152 |     "    res = gpu_morph.download()\n",
153 |     "print('gpu time: {:.2f} us'.format((time.time() - start_t) * 1e6 / loop_cnt))"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "markdown",
158 |    "metadata": {},
159 |    "source": [
160 |     "# OpenCV with optimization"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "## Demonstrate warm up"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 6,
173 |    "metadata": {},
174 |    "outputs": [
175 |     {
176 |      "name": "stdout",
177 |      "output_type": "stream",
178 |      "text": [
179 |       "gpu time warm up: 116695.36 us\n",
180 |       "gpu time when warm: 16002.82 us\n"
181 |      ]
182 |     }
183 |    ],
184 |    "source": [
185 |     "start_t = time.time()\n",
186 |     "loop_cnt_warm = 5\n",
187 |     "for _ in range(loop_cnt_warm):\n",
188 |     "    gpu_frame = cv2.cuda_GpuMat()\n",
189 |     "    gpu_frame.upload(im)\n",
190 |     "    gpu_gray = cv2.cuda.cvtColor(gpu_frame, cv2.COLOR_BGR2GRAY)\n",
191 |     "    retval,gpu_thr = cv2.cuda.threshold(gpu_gray,128,255,cv2.THRESH_BINARY)\n",
192 |     "    morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n",
193 |     "    morph_filter = cv2.cuda.createMorphologyFilter(cv2.MORPH_DILATE,cv2.CV_8U,morph_kernel)\n",
194 |     "    gpu_morph = morph_filter.apply(gpu_thr)\n",
195 |     "    gpu_morph = cv2.cuda.resize(gpu_morph,(640,480))\n",
196 |     "    res = gpu_morph.download()\n",
197 |     "print('gpu time warm up: {:.2f} us'.format((time.time() - start_t) * 1e6 / loop_cnt_warm))\n",
198 |     "start_t = time.time()\n",
199 |     "for _ in range(loop_cnt):\n",
200 |     "    gpu_frame = cv2.cuda_GpuMat()\n",
201 |     "    gpu_frame.upload(im)\n",
202 |     "    gpu_gray = cv2.cuda.cvtColor(gpu_frame, cv2.COLOR_BGR2GRAY)\n",
203 |     "    retval,gpu_thr = cv2.cuda.threshold(gpu_gray,128,255,cv2.THRESH_BINARY)\n",
204 |     "    morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n",
205 |     "    morph_filter = cv2.cuda.createMorphologyFilter(cv2.MORPH_DILATE,cv2.CV_8U,morph_kernel)\n",
206 |     "    gpu_morph = morph_filter.apply(gpu_thr)\n",
207 |     "    gpu_morph = cv2.cuda.resize(gpu_morph,(640,480))\n",
208 |     "    res = gpu_morph.download()\n",
209 |     "print('gpu time when warm: {:.2f} us'.format((time.time() - start_t) * 1e6 / loop_cnt))"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "markdown",
214 |    "metadata": {},
215 |    "source": [
216 |     "### Exlude CPU ops and upload/dload"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": 8,
222 |    "metadata": {},
223 |    "outputs": [
224 |     {
225 |      "name": "stdout",
226 |      "output_type": "stream",
227 |      "text": [
228 |       "gpu time: 7100.30 us\n"
229 |      ]
230 |     }
231 |    ],
232 |    "source": [
233 |     "morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n",
234 |     "gpu_frame = cv2.cuda_GpuMat(im)\n",
235 |     "start_t = time.time()\n",
236 |     "for _ in range(loop_cnt):\n",
237 |     "    gpu_gray = cv2.cuda.cvtColor(gpu_frame, cv2.COLOR_BGR2GRAY)\n",
238 |     "    retval,gpu_thr = cv2.cuda.threshold(gpu_gray,128,255,cv2.THRESH_BINARY)\n",
239 |     "    morph_filter = cv2.cuda.createMorphologyFilter(cv2.MORPH_DILATE,cv2.CV_8U,morph_kernel)\n",
240 |     "    gpu_morph = morph_filter.apply(gpu_thr)\n",
241 |     "    gpu_morph = cv2.cuda.resize(gpu_morph,(640,480))    \n",
242 |     "print('gpu time: {:.2f} us'.format((time.time() - start_t) * 1e6 / loop_cnt))\n",
243 |     "res = gpu_morph.download()"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "markdown",
248 |    "metadata": {},
249 |    "source": [
250 |     "### Pre-allocate"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": 9,
256 |    "metadata": {},
257 |    "outputs": [
258 |     {
259 |      "name": "stdout",
260 |      "output_type": "stream",
261 |      "text": [
262 |       "gpu time: 4493.87 us\n"
263 |      ]
264 |     }
265 |    ],
266 |    "source": [
267 |     "morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n",
268 |     "gpu_frame = cv2.cuda_GpuMat(im)\n",
269 |     "gpu_gray = cv2.cuda_GpuMat(im.shape[:-1][::-1],cv2.CV_8UC1)\n",
270 |     "gpu_thr = cv2.cuda_GpuMat(gpu_gray.size(),gpu_gray.type())\n",
271 |     "gpu_morph = cv2.cuda_GpuMat(gpu_gray.size(),gpu_gray.type())\n",
272 |     "gpu_morph_sm = cv2.cuda_GpuMat((640,480),gpu_gray.type())\n",
273 |     "start_t = time.time()\n",
274 |     "for _ in range(loop_cnt):\n",
275 |     "    cv2.cuda.cvtColor(gpu_frame, cv2.COLOR_BGR2GRAY,gpu_gray)\n",
276 |     "    cv2.cuda.threshold(gpu_gray,128,255,cv2.THRESH_BINARY,gpu_thr)\n",
277 |     "    morph_filter = cv2.cuda.createMorphologyFilter(cv2.MORPH_DILATE,cv2.CV_8U,morph_kernel)\n",
278 |     "    morph_filter.apply(gpu_thr,gpu_morph)\n",
279 |     "    cv2.cuda.resize(gpu_morph,(640,480),gpu_morph_sm)    \n",
280 |     "print('gpu time: {:.2f} us'.format((time.time() - start_t) * 1e6 / loop_cnt))\n",
281 |     "res = gpu_morph.download()"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "markdown",
286 |    "metadata": {},
287 |    "source": [
288 |     "### Remove morph_filter creation as this would also be pre-computed in practice"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "code",
293 |    "execution_count": 10,
294 |    "metadata": {},
295 |    "outputs": [
296 |     {
297 |      "name": "stdout",
298 |      "output_type": "stream",
299 |      "text": [
300 |       "gpu time: 3620.95 us\n"
301 |      ]
302 |     }
303 |    ],
304 |    "source": [
305 |     "morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n",
306 |     "gpu_frame = cv2.cuda_GpuMat(im)\n",
307 |     "gpu_gray = cv2.cuda_GpuMat(im.shape[:-1][::-1],cv2.CV_8UC1)\n",
308 |     "gpu_thr = cv2.cuda_GpuMat(gpu_gray.size(),gpu_gray.type())\n",
309 |     "gpu_morph = cv2.cuda_GpuMat(gpu_gray.size(),gpu_gray.type())\n",
310 |     "gpu_morph_sm = cv2.cuda_GpuMat((640,480),gpu_gray.type())\n",
311 |     "morph_filter = cv2.cuda.createMorphologyFilter(cv2.MORPH_DILATE,cv2.CV_8U,morph_kernel)\n",
312 |     "start_t = time.time()\n",
313 |     "for _ in range(loop_cnt):\n",
314 |     "    cv2.cuda.cvtColor(gpu_frame, cv2.COLOR_BGR2GRAY,gpu_gray)\n",
315 |     "    cv2.cuda.threshold(gpu_gray,128,255,cv2.THRESH_BINARY,gpu_thr)    \n",
316 |     "    morph_filter.apply(gpu_thr,gpu_morph)\n",
317 |     "    cv2.cuda.resize(gpu_morph,(640,480),gpu_morph_sm)    \n",
318 |     "print('gpu time: {:.2f} us'.format((time.time() - start_t) * 1e6 / loop_cnt))\n",
319 |     "res = gpu_morph.download()"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "markdown",
324 |    "metadata": {},
325 |    "source": [
326 |     "### Stream - worst case scenario stall on each loop iteration"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "execution_count": 11,
332 |    "metadata": {},
333 |    "outputs": [
334 |     {
335 |      "name": "stdout",
336 |      "output_type": "stream",
337 |      "text": [
338 |       "gpu time: 3443.38 us\n"
339 |      ]
340 |     }
341 |    ],
342 |    "source": [
343 |     "stream = cv2.cuda_Stream()\n",
344 |     "morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))\n",
345 |     "gpu_frame = cv2.cuda_GpuMat(im)\n",
346 |     "gpu_gray = cv2.cuda_GpuMat(im.shape[:-1][::-1],cv2.CV_8UC1)\n",
347 |     "gpu_thr = cv2.cuda_GpuMat(gpu_gray.size(),gpu_gray.type())\n",
348 |     "gpu_morph = cv2.cuda_GpuMat(gpu_gray.size(),gpu_gray.type())\n",
349 |     "gpu_morph_sm = cv2.cuda_GpuMat((640,480),gpu_gray.type())\n",
350 |     "morph_filter = cv2.cuda.createMorphologyFilter(cv2.MORPH_DILATE,cv2.CV_8U,morph_kernel)\n",
351 |     "start_t = time.time()\n",
352 |     "for _ in range(loop_cnt):\n",
353 |     "    cv2.cuda.cvtColor(gpu_frame, cv2.COLOR_BGR2GRAY,gpu_gray,stream = stream)\n",
354 |     "    cv2.cuda.threshold(gpu_gray,128,255,cv2.THRESH_BINARY,gpu_thr,stream=stream)    \n",
355 |     "    morph_filter.apply(gpu_thr,gpu_morph,stream=stream)\n",
356 |     "    cv2.cuda.resize(gpu_morph,(640,480),gpu_morph_sm,stream=stream)\n",
357 |     "    stream.waitForCompletion()\n",
358 |     "gpu_time = (time.time() - start_t) * 1e6 / loop_cnt\n",
359 |     "print('gpu time: {:.2f} us'.format(gpu_time))\n",
360 |     "res = gpu_morph.download()"
361 |    ]
362 |   },
363 |   {
364 |    "cell_type": "code",
365 |    "execution_count": 26,
366 |    "metadata": {},
367 |    "outputs": [],
368 |    "source": [
369 |     "# No real improvement from streams likely hard sync inside one of the routines"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "markdown",
374 |    "metadata": {},
375 |    "source": [
376 |     "# Speed up"
377 |    ]
378 |   },
379 |   {
380 |    "cell_type": "code",
381 |    "execution_count": 12,
382 |    "metadata": {},
383 |    "outputs": [
384 |     {
385 |      "data": {
386 |       "text/plain": [
387 |        "3.485140688715614"
388 |       ]
389 |      },
390 |      "execution_count": 12,
391 |      "metadata": {},
392 |      "output_type": "execute_result"
393 |     }
394 |    ],
395 |    "source": [
396 |     "cpu_time/gpu_time"
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "code",
401 |    "execution_count": null,
402 |    "metadata": {},
403 |    "outputs": [],
404 |    "source": []
405 |   }
406 |  ],
407 |  "metadata": {
408 |   "kernelspec": {
409 |    "display_name": "Python 3",
410 |    "language": "python",
411 |    "name": "python3"
412 |   },
413 |   "language_info": {
414 |    "codemirror_mode": {
415 |     "name": "ipython",
416 |     "version": 3
417 |    },
418 |    "file_extension": ".py",
419 |    "mimetype": "text/x-python",
420 |    "name": "python",
421 |    "nbconvert_exporter": "python",
422 |    "pygments_lexer": "ipython3",
423 |    "version": "3.7.7"
424 |   },
425 |   "toc": {
426 |    "base_numbering": 1,
427 |    "nav_menu": {},
428 |    "number_sections": true,
429 |    "sideBar": true,
430 |    "skip_h1_title": false,
431 |    "title_cell": "Table of Contents",
432 |    "title_sidebar": "Contents",
433 |    "toc_cell": false,
434 |    "toc_position": {},
435 |    "toc_section_display": true,
436 |    "toc_window_display": false
437 |   }
438 |  },
439 |  "nbformat": 4,
440 |  "nbformat_minor": 4
441 | }
442 | 


--------------------------------------------------------------------------------
/nbs/cudacodec.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "1545a406",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# cv.cudacodec"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "id": "d3f8a4b0",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import cv2 as cv\n",
 19 |     "import os\n",
 20 |     "import time\n",
 21 |     "import numpy as np"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "id": "ac450ca8",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "## Transcoding Example"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "id": "33f348a4",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "Quick example comparing cv.cudacodec transcoding with cv on ubuntu using python wheel from https://github.com/cudawarped/opencv-python-cuda-wheels/releases/tag/4.6.0.20221102.\n",
 38 |     "\n",
 39 |     "GPU: Mobile RTX 3070 Ti (5th gen decoder & 7th gen encoder)\n",
 40 |     "\n",
 41 |     "CPU: i7-12700H\n",
 42 |     "\n",
 43 |     "The benchmark results are not comparable because CPU hardware decoding was not available for the chosen codecs."
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 2,
 49 |    "id": "21825e40",
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "vid_path_in_4k = '/home/b/media/jellyfish-120-mbps-4k-uhd-h264.mkv'\n",
 54 |     "vid_path_in_out_1080p = '/home/b/media/jelly_1080p.hevc'\n",
 55 |     "vid_path_out_1080p = '/home/b/media/jelly.h264'\n",
 56 |     "vid_path_out_1080p_mp4 = '/home/b/media/jelly.mp4'"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 3,
 62 |    "id": "43ae6af1",
 63 |    "metadata": {},
 64 |    "outputs": [
 65 |     {
 66 |      "name": "stdout",
 67 |      "output_type": "stream",
 68 |      "text": [
 69 |       "\n",
 70 |       "General configuration for OpenCV 4.6.0-dev =====================================\n",
 71 |       "  Version control:               4.6.0-504-gee9137f176\n",
 72 |       "\n",
 73 |       "  Extra modules:\n",
 74 |       "    Location (extra):            /home/b/repos/opencv/opencv-python/opencv_contrib/modules\n",
 75 |       "    Version control (extra):     4.6.0-106-g9d84eaed\n",
 76 |       "\n",
 77 |       "  Platform:\n",
 78 |       "    Timestamp:                   2022-11-02T16:24:13Z\n",
 79 |       "    Host:                        Linux 5.10.16.3-microsoft-standard-WSL2 x86_64\n",
 80 |       "    CMake:                       3.24.1\n",
 81 |       "    CMake generator:             Ninja\n",
 82 |       "    CMake build tool:            /usr/bin/ninja\n",
 83 |       "    Configuration:               Release\n",
 84 |       "\n",
 85 |       "  CPU/HW features:\n",
 86 |       "    Baseline:                    SSE SSE2 SSE3\n",
 87 |       "      requested:                 SSE3\n",
 88 |       "    Dispatched code generation:  SSE4_1 SSE4_2 FP16 AVX AVX2 AVX512_SKX\n",
 89 |       "      requested:                 SSE4_1 SSE4_2 AVX FP16 AVX2 AVX512_SKX\n",
 90 |       "      SSE4_1 (16 files):         + SSSE3 SSE4_1\n",
 91 |       "      SSE4_2 (1 files):          + SSSE3 SSE4_1 POPCNT SSE4_2\n",
 92 |       "      FP16 (0 files):            + SSSE3 SSE4_1 POPCNT SSE4_2 FP16 AVX\n",
 93 |       "      AVX (4 files):             + SSSE3 SSE4_1 POPCNT SSE4_2 AVX\n",
 94 |       "      AVX2 (32 files):           + SSSE3 SSE4_1 POPCNT SSE4_2 FP16 FMA3 AVX AVX2\n",
 95 |       "      AVX512_SKX (5 files):      + SSSE3 SSE4_1 POPCNT SSE4_2 FP16 FMA3 AVX AVX2 AVX_512F AVX512_COMMON AVX512_SKX\n",
 96 |       "\n",
 97 |       "  C/C++:\n",
 98 |       "    Built as dynamic libs?:      NO\n",
 99 |       "    C++ standard:                11\n",
100 |       "    C++ Compiler:                /usr/bin/c++  (ver 9.4.0)\n",
101 |       "    C++ flags (Release):         -fsigned-char -W -Wall -Wreturn-type -Wnon-virtual-dtor -Waddress -Wsequence-point -Wformat -Wformat-security -Wmissing-declarations -Wundef -Winit-self -Wpointer-arith -Wshadow -Wsign-promo -Wuninitialized -Wsuggest-override -Wno-delete-non-virtual-dtor -Wno-comment -Wimplicit-fallthrough=3 -Wno-strict-overflow -fdiagnostics-show-option -Wno-long-long -pthread -fomit-frame-pointer -ffunction-sections -fdata-sections  -msse -msse2 -msse3 -fvisibility=hidden -fvisibility-inlines-hidden -O3 -DNDEBUG  -DNDEBUG\n",
102 |       "    C++ flags (Debug):           -fsigned-char -W -Wall -Wreturn-type -Wnon-virtual-dtor -Waddress -Wsequence-point -Wformat -Wformat-security -Wmissing-declarations -Wundef -Winit-self -Wpointer-arith -Wshadow -Wsign-promo -Wuninitialized -Wsuggest-override -Wno-delete-non-virtual-dtor -Wno-comment -Wimplicit-fallthrough=3 -Wno-strict-overflow -fdiagnostics-show-option -Wno-long-long -pthread -fomit-frame-pointer -ffunction-sections -fdata-sections  -msse -msse2 -msse3 -fvisibility=hidden -fvisibility-inlines-hidden -g  -O0 -DDEBUG -D_DEBUG\n",
103 |       "    C Compiler:                  /usr/bin/cc\n",
104 |       "    C flags (Release):           -fsigned-char -W -Wall -Wreturn-type -Waddress -Wsequence-point -Wformat -Wformat-security -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wundef -Winit-self -Wpointer-arith -Wshadow -Wuninitialized -Wno-comment -Wimplicit-fallthrough=3 -Wno-strict-overflow -fdiagnostics-show-option -Wno-long-long -pthread -fomit-frame-pointer -ffunction-sections -fdata-sections  -msse -msse2 -msse3 -fvisibility=hidden -O3 -DNDEBUG  -DNDEBUG\n",
105 |       "    C flags (Debug):             -fsigned-char -W -Wall -Wreturn-type -Waddress -Wsequence-point -Wformat -Wformat-security -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wundef -Winit-self -Wpointer-arith -Wshadow -Wuninitialized -Wno-comment -Wimplicit-fallthrough=3 -Wno-strict-overflow -fdiagnostics-show-option -Wno-long-long -pthread -fomit-frame-pointer -ffunction-sections -fdata-sections  -msse -msse2 -msse3 -fvisibility=hidden -g  -O0 -DDEBUG -D_DEBUG\n",
106 |       "    Linker flags (Release):      -Wl,--exclude-libs,libippicv.a -Wl,--exclude-libs,libippiw.a   -Wl,--gc-sections -Wl,--as-needed -Wl,--no-undefined  \n",
107 |       "    Linker flags (Debug):        -Wl,--exclude-libs,libippicv.a -Wl,--exclude-libs,libippiw.a   -Wl,--gc-sections -Wl,--as-needed -Wl,--no-undefined  \n",
108 |       "    ccache:                      NO\n",
109 |       "    Precompiled headers:         NO\n",
110 |       "    Extra dependencies:          openjp2 /usr/lib/wsl/lib/libcuda.so /home/b/stubs/nvcuvid/libnvcuvid.so /home/b/stubs/nvcuvid/libnvidia-encode.so Iconv::Iconv m pthread cudart_static dl rt nppc nppial nppicc nppidei nppif nppig nppim nppist nppisu nppitc npps cublas cudnn cufft -L/usr/local/cuda/lib64 -L/usr/lib/x86_64-linux-gnu\n",
111 |       "    3rdparty dependencies:       libprotobuf ade ittnotify libjpeg-turbo libwebp libpng libtiff IlmImf zlib quirc ippiw ippicv\n",
112 |       "\n",
113 |       "  OpenCV modules:\n",
114 |       "    To be built:                 aruco barcode bgsegm bioinspired calib3d ccalib core cudaarithm cudabgsegm cudacodec cudafeatures2d cudafilters cudaimgproc cudalegacy cudaobjdetect cudaoptflow cudastereo cudawarping cudev datasets dnn dnn_objdetect dnn_superres dpm face features2d flann fuzzy gapi hfs highgui img_hash imgcodecs imgproc intensity_transform line_descriptor mcc ml objdetect optflow phase_unwrapping photo plot python3 quality rapid reg rgbd saliency shape stereo stitching structured_light superres surface_matching text tracking video videoio videostab wechat_qrcode xfeatures2d ximgproc xobjdetect xphoto\n",
115 |       "    Disabled:                    world\n",
116 |       "    Disabled by dependency:      -\n",
117 |       "    Unavailable:                 alphamat cvv freetype hdf java julia matlab ovis python2 sfm ts viz\n",
118 |       "    Applications:                -\n",
119 |       "    Documentation:               NO\n",
120 |       "    Non-free algorithms:         NO\n",
121 |       "\n",
122 |       "  GUI:                           NONE\n",
123 |       "    GTK+:                        NO\n",
124 |       "    VTK support:                 NO\n",
125 |       "\n",
126 |       "  Media I/O: \n",
127 |       "    ZLib:                        zlib (ver 1.2.12)\n",
128 |       "    JPEG:                        libjpeg-turbo (ver 2.1.3-62)\n",
129 |       "    WEBP:                        build (ver encoder: 0x020f)\n",
130 |       "    PNG:                         build (ver 1.6.37)\n",
131 |       "    TIFF:                        build (ver 42 - 4.2.0)\n",
132 |       "    JPEG 2000:                   OpenJPEG (ver 2.5.0)\n",
133 |       "    OpenEXR:                     build (ver 2.3.0)\n",
134 |       "    HDR:                         YES\n",
135 |       "    SUNRASTER:                   YES\n",
136 |       "    PXM:                         YES\n",
137 |       "    PFM:                         YES\n",
138 |       "\n",
139 |       "  Video I/O:\n",
140 |       "    DC1394:                      NO\n",
141 |       "    FFMPEG:                      YES\n",
142 |       "      avcodec:                   YES (58.54.100)\n",
143 |       "      avformat:                  YES (58.29.100)\n",
144 |       "      avutil:                    YES (56.31.100)\n",
145 |       "      swscale:                   YES (5.5.100)\n",
146 |       "      avresample:                YES (4.0.0)\n",
147 |       "    GStreamer:                   NO\n",
148 |       "    v4l/v4l2:                    YES (linux/videodev2.h)\n",
149 |       "\n",
150 |       "  Parallel framework:            pthreads\n",
151 |       "\n",
152 |       "  Trace:                         YES (with Intel ITT)\n",
153 |       "\n",
154 |       "  Other third-party libraries:\n",
155 |       "    Intel IPP:                   2020.0.0 Gold [2020.0.0]\n",
156 |       "           at:                   /home/b/repos/opencv/opencv-python/_skbuild/linux-x86_64-3.9/cmake-build/3rdparty/ippicv/ippicv_lnx/icv\n",
157 |       "    Intel IPP IW:                sources (2020.0.0)\n",
158 |       "              at:                /home/b/repos/opencv/opencv-python/_skbuild/linux-x86_64-3.9/cmake-build/3rdparty/ippicv/ippicv_lnx/iw\n",
159 |       "    VA:                          NO\n",
160 |       "    Lapack:                      NO\n",
161 |       "    Eigen:                       NO\n",
162 |       "    Custom HAL:                  NO\n",
163 |       "    Protobuf:                    build (3.19.1)\n",
164 |       "\n",
165 |       "  NVIDIA CUDA:                   YES (ver 11.7, CUFFT CUBLAS NVCUVID NVCUVENC FAST_MATH)\n",
166 |       "    NVIDIA GPU arch:             35 37 50 52 60 61 70 75 80 86\n",
167 |       "    NVIDIA PTX archs:            86\n",
168 |       "\n",
169 |       "  cuDNN:                         YES (ver 8.4.1)\n",
170 |       "\n",
171 |       "  OpenCL:                        YES (no extra features)\n",
172 |       "    Include path:                /home/b/repos/opencv/opencv-python/opencv/3rdparty/include/opencl/1.2\n",
173 |       "    Link libraries:              Dynamic load\n",
174 |       "\n",
175 |       "  Python 3:\n",
176 |       "    Interpreter:                 /home/b/mambaforge/bin/python (ver 3.9.13)\n",
177 |       "    Libraries:                   /home/b/mambaforge/lib/libpython3.9.so (ver 3.9.13)\n",
178 |       "    numpy:                       /home/b/mambaforge/lib/python3.9/site-packages/numpy/core/include (ver 1.23.3)\n",
179 |       "    install path:                python/cv2/python-3\n",
180 |       "\n",
181 |       "  Python (for build):            /home/b/mambaforge/bin/python\n",
182 |       "\n",
183 |       "  Java:                          \n",
184 |       "    ant:                         NO\n",
185 |       "    JNI:                         NO\n",
186 |       "    Java wrappers:               NO\n",
187 |       "    Java tests:                  NO\n",
188 |       "\n",
189 |       "  Install to:                    /home/b/repos/opencv/opencv-python/_skbuild/linux-x86_64-3.9/cmake-install\n",
190 |       "-----------------------------------------------------------------\n",
191 |       "\n",
192 |       "\n"
193 |      ]
194 |     }
195 |    ],
196 |    "source": [
197 |     "print(cv.getBuildInformation())"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "markdown",
202 |    "id": "9a66cd2c",
203 |    "metadata": {},
204 |    "source": [
205 |     "### GPU"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": 4,
211 |    "id": "b00f632a",
212 |    "metadata": {},
213 |    "outputs": [],
214 |    "source": [
215 |     "def transcode(vid_path_in, vid_path_out, codec = cv.cudacodec.H264, params = cv.cudacodec_VideoReaderInitParams()):\n",
216 |     "    reader = cv.cudacodec.createVideoReader(vid_path_in,params=params)\n",
217 |     "    reader.set(cv.cudacodec.COLOR_FORMAT_BGR)\n",
218 |     "    format = reader.format()\n",
219 |     "    if params.targetSz != (0,0):\n",
220 |     "        w,h = params.targetSz\n",
221 |     "    else:\n",
222 |     "        w,h = (format.width,format.height)\n",
223 |     "        \n",
224 |     "    frame = cv.cuda.GpuMat(h,w,cv.CV_8UC3)\n",
225 |     "    writer = cv.cudacodec.createVideoWriter(vid_path_out,[w,h],codec)\n",
226 |     "    n_frames = 0\n",
227 |     "    start = time.time()\n",
228 |     "    ret, _ = reader.nextFrame(frame)\n",
229 |     "    while(ret):\n",
230 |     "        n_frames += 1\n",
231 |     "        writer.write(frame)\n",
232 |     "        ret, _ = reader.nextFrame(frame)\n",
233 |     "    writer.release()\n",
234 |     "    end = time.time()\n",
235 |     "    return n_frames/(end - start), n_frames;"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "markdown",
240 |    "id": "2eab816e",
241 |    "metadata": {},
242 |    "source": [
243 |     "First convert 4K(h264) to 1080p(hevc) for benchmarking"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": 5,
249 |    "id": "c2e2a129",
250 |    "metadata": {},
251 |    "outputs": [
252 |     {
253 |      "name": "stdout",
254 |      "output_type": "stream",
255 |      "text": [
256 |       "Transcoded 900 frames from 4k(h264) to 1080p(hevc) at fps= 130.99\n"
257 |      ]
258 |     }
259 |    ],
260 |    "source": [
261 |     "params = cv.cudacodec_VideoReaderInitParams()\n",
262 |     "params.targetSz = (1920,1080)\n",
263 |     "fps, n_frames = transcode(vid_path_in_4k,vid_path_in_out_1080p,cv.cudacodec.HEVC,params)\n",
264 |     "print(f'Transcoded {n_frames} frames from 4k(h264) to 1080p(hevc) at fps= {fps:.2f}')"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "markdown",
269 |    "id": "7e6dc675",
270 |    "metadata": {},
271 |    "source": [
272 |     "Benchmark 1080p transcoding - timings will be slightly optimistic because decoding begins as soon as the VideoWriter is created"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": 6,
278 |    "id": "511fd755",
279 |    "metadata": {},
280 |    "outputs": [
281 |     {
282 |      "name": "stdout",
283 |      "output_type": "stream",
284 |      "text": [
285 |       "Transcoded 900 frames from 1080p(hevc) to 1080p(h264) at fps = 544.81\n"
286 |      ]
287 |     }
288 |    ],
289 |    "source": [
290 |     "params = cv.cudacodec_VideoReaderInitParams()\n",
291 |     "params.targetSz = (1920,1080)\n",
292 |     "fps, n_frames = transcode(vid_path_in_out_1080p,vid_path_out_1080p,cv.cudacodec.H264,params)\n",
293 |     "print(f'Transcoded {n_frames} frames from 1080p(hevc) to 1080p(h264) at fps = {fps:.2f}')"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "markdown",
298 |    "id": "1f7c9033",
299 |    "metadata": {},
300 |    "source": [
301 |     "### CPU"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "markdown",
306 |    "id": "d54dbc48",
307 |    "metadata": {},
308 |    "source": [
309 |     "Hardware acceleration does not appear to be available for this codec"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "code",
314 |    "execution_count": 7,
315 |    "id": "75263318",
316 |    "metadata": {},
317 |    "outputs": [],
318 |    "source": [
319 |     "def transcode_cpu(vid_path_in, vid_path_out):    \n",
320 |     "    cap = cv.VideoCapture(vid_path_in,cv.CAP_FFMPEG,(cv.CAP_PROP_HW_ACCELERATION, cv.VIDEO_ACCELERATION_ANY))\n",
321 |     "    #fourcc = cv.VideoWriter_fourcc(*\"mp4v\")\n",
322 |     "    fourcc = cv.VideoWriter_fourcc(*\"avc1\")\n",
323 |     "    fps = cap.get(cv.CAP_PROP_FPS)\n",
324 |     "    width = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))\n",
325 |     "    height = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))\n",
326 |     "    writer = cv.VideoWriter(vid_path_out, fourcc, fps, (width,height),\n",
327 |     "                            (cv.VIDEOWRITER_PROP_HW_ACCELERATION, cv.VIDEO_ACCELERATION_ANY))\n",
328 |     "    frame = np.zeros((height,width,3),dtype='uint8')\n",
329 |     "    n_frames = 0\n",
330 |     "    start = time.time()\n",
331 |     "    ret, _ = cap.read(frame)\n",
332 |     "    while(ret):\n",
333 |     "        n_frames += 1\n",
334 |     "        writer.write(frame)\n",
335 |     "        ret, _ = cap.read(frame)\n",
336 |     "    writer.release()\n",
337 |     "    end = time.time()\n",
338 |     "    return n_frames/(end - start), n_frames;"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "markdown",
343 |    "id": "584b13a1",
344 |    "metadata": {},
345 |    "source": [
346 |     "Benchmarking 1080p transcoding without hardware acceleration"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": 8,
352 |    "id": "6ab764d4",
353 |    "metadata": {},
354 |    "outputs": [
355 |     {
356 |      "name": "stdout",
357 |      "output_type": "stream",
358 |      "text": [
359 |       "Transcoded 900 frames from 1080p(hevc) to 1080p(h264) at fps = 65.46\n"
360 |      ]
361 |     }
362 |    ],
363 |    "source": [
364 |     "fps, n_frames = transcode_cpu(vid_path_in_out_1080p,vid_path_out_1080p_mp4)\n",
365 |     "print(f'Transcoded {n_frames} frames from 1080p(hevc) to 1080p(h264) at fps = {fps:.2f}')"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "code",
370 |    "execution_count": null,
371 |    "id": "57534197",
372 |    "metadata": {},
373 |    "outputs": [],
374 |    "source": []
375 |   }
376 |  ],
377 |  "metadata": {
378 |   "kernelspec": {
379 |    "display_name": "Python 3 (ipykernel)",
380 |    "language": "python",
381 |    "name": "python3"
382 |   },
383 |   "language_info": {
384 |    "codemirror_mode": {
385 |     "name": "ipython",
386 |     "version": 3
387 |    },
388 |    "file_extension": ".py",
389 |    "mimetype": "text/x-python",
390 |    "name": "python",
391 |    "nbconvert_exporter": "python",
392 |    "pygments_lexer": "ipython3",
393 |    "version": "3.9.13"
394 |   }
395 |  },
396 |  "nbformat": 4,
397 |  "nbformat_minor": 5
398 | }
399 | 


--------------------------------------------------------------------------------
/nbs/opencv_cvs_dll_load_failed.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "6973dcb4-2e8a-409c-a147-291c8520fb18",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# \"ImportError: DLL load failed while importing cv2: The specified module could not be found.\""
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "0f79e94d-bc84-4f09-a4a9-90be2e41388e",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "If your on Windows using python >= 3.8 having built OpenCV >= 4.6 from source and your seeing the above error when calling ` import cv2`, this short guide should help solve your problem.\n",
 17 |     "\n",
 18 |     "The guide assumes that you have either installed the python bindings during the build process ([not recommended](https://github.com/opencv/opencv/issues/13202#issuecomment-439730899)) or manually copied `cv2.cp3x-win_amd64.pyd` to your distributions `site-packages` directory (e.g. `C:\\Users\\<USER>\\mambaforge\\Lib\\site-packages`).\n",
 19 |     "\n",
 20 |     "So what's the issue?  Although the message is quite explicit regarding the cause, it doesn't really help with finding a solution. In a nutshell python has found `cv2.cp3x-win_amd64.pyd`, tried and then failed to load it because a it can't find a dependant shared library. Now the advice I have seen online regarding this is to dig out trusty dependancy walker, load the `cv2.cp3x-win_amd64.pyd` and see which dependencies the system can't find.\n",
 21 |     "\n",
 22 |     "Now this is solid advice if we had a C++ application and/or we were using python < 3.8 (which uses the system/user path for dll resolution), however we are not, so even if dependency walker can't detect any problems we may still be facing the above error.\n",
 23 |     "\n",
 24 |     "The good news is there is an easy fix if you know where the missing DLL's are and only slightly more involved if you don't as long as you have access to the missing DLL's on your system."
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "id": "f8ded592",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "## Fix when path to missing DLL's is known"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "id": "eb494db2",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "To demonstrate the fix, I have built the OpenCV shared library and corresponding python bindings and manually copied them to the \n",
 41 |     "`site-packages` directory inside my python distribution (`C:\\Users\\b\\mambaforge\\Lib\\site-packages`).  \n",
 42 |     "\n",
 43 |     "As I have built a shared library the python bindings are dependant on `opencv_world460.dll` and I haven't told python where they are I get the error shown below whenn trying to import them."
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 1,
 49 |    "id": "53efd81a",
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "ename": "ImportError",
 54 |      "evalue": "DLL load failed while importing cv2: The specified module could not be found.",
 55 |      "output_type": "error",
 56 |      "traceback": [
 57 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
 58 |       "\u001b[1;31mImportError\u001b[0m                               Traceback (most recent call last)",
 59 |       "Cell \u001b[1;32mIn [1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcv2\u001b[39;00m\n",
 60 |       "\u001b[1;31mImportError\u001b[0m: DLL load failed while importing cv2: The specified module could not be found."
 61 |      ]
 62 |     }
 63 |    ],
 64 |    "source": [
 65 |     "import cv2"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "id": "cfa1c021",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "Given that I know the path to OpenCV's shared libraries is required and I haven't told python about it, the first thing to try is to add it to pythons DLL search path and see if that solves the problem."
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 2,
 79 |    "id": "ba4db59c",
 80 |    "metadata": {},
 81 |    "outputs": [
 82 |     {
 83 |      "ename": "ImportError",
 84 |      "evalue": "DLL load failed while importing cv2: The specified module could not be found.",
 85 |      "output_type": "error",
 86 |      "traceback": [
 87 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
 88 |       "\u001b[1;31mImportError\u001b[0m                               Traceback (most recent call last)",
 89 |       "Cell \u001b[1;32mIn [2], line 3\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mos\u001b[39;00m;\n\u001b[0;32m      2\u001b[0m os\u001b[38;5;241m.\u001b[39madd_dll_directory(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mD:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mbuild\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mopencv\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mcuda_11_8_cc_all_sym\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mbin\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m----> 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcv2\u001b[39;00m\n",
 90 |       "\u001b[1;31mImportError\u001b[0m: DLL load failed while importing cv2: The specified module could not be found."
 91 |      ]
 92 |     }
 93 |    ],
 94 |    "source": [
 95 |     "import os\n",
 96 |     "os.add_dll_directory(\"D:\\\\build\\\\opencv\\\\cuda_11_8_cc_all_sym\\\\bin\")\n",
 97 |     "import cv2"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "id": "e1b17eca",
103 |    "metadata": {},
104 |    "source": [
105 |     "Ahh the same error, what's going on.\n",
106 |     "\n",
107 |     "In this case I also built OpenCV against the CUDA SDK so there is a good chance its missing DLL's from there aswell. I can try to fix the issue by simply adding the location of the CUDA SDK binaries to the python DLL search path as shown below."
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 3,
113 |    "id": "235eb45b",
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "os.add_dll_directory(\"C:\\\\Program Files\\\\NVIDIA GPU Computing Toolkit\\\\CUDA\\\\v11.8\\\\bin\")\n",
118 |     "import cv2"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "id": "40b84c80",
124 |    "metadata": {},
125 |    "source": [
126 |     "This appears to have solved the issue, but its a good idea to examine the build information just to double check I have loaded the right version of OpenCV."
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 4,
132 |    "id": "a40cc8c5",
133 |    "metadata": {},
134 |    "outputs": [
135 |     {
136 |      "name": "stdout",
137 |      "output_type": "stream",
138 |      "text": [
139 |       "\n",
140 |       "General configuration for OpenCV 4.6.0-dev =====================================\n",
141 |       "  Version control:               4.6.0-508-g21133a2091\n",
142 |       "\n",
143 |       "  Extra modules:\n",
144 |       "    Location (extra):            D:/repos/opencv/contrib/modules\n",
145 |       "    Version control (extra):     4.6.0.20220920-35-g9d84eaed\n",
146 |       "\n",
147 |       "  Platform:\n",
148 |       "    Timestamp:                   2022-11-06T17:20:19Z\n",
149 |       "    Host:                        Windows 10.0.22000 AMD64\n",
150 |       "    CMake:                       3.23.2\n",
151 |       "    CMake generator:             Ninja\n",
152 |       "    CMake build tool:            C:/PROGRA~1/MICROS~2/2022/COMMUN~1/Common7/IDE/COMMON~1/MICROS~1/CMake/Ninja/ninja.exe\n",
153 |       "    MSVC:                        1933\n",
154 |       "    Configuration:               Release\n",
155 |       "\n",
156 |       "  CPU/HW features:\n",
157 |       "    Baseline:                    SSE SSE2 SSE3\n",
158 |       "      requested:                 SSE3\n",
159 |       "    Dispatched code generation:  SSE4_1 SSE4_2 FP16 AVX AVX2 AVX512_SKX\n",
160 |       "      requested:                 SSE4_1 SSE4_2 AVX FP16 AVX2 AVX512_SKX\n",
161 |       "      SSE4_1 (18 files):         + SSSE3 SSE4_1\n",
162 |       "      SSE4_2 (2 files):          + SSSE3 SSE4_1 POPCNT SSE4_2\n",
163 |       "      FP16 (1 files):            + SSSE3 SSE4_1 POPCNT SSE4_2 FP16 AVX\n",
164 |       "      AVX (5 files):             + SSSE3 SSE4_1 POPCNT SSE4_2 AVX\n",
165 |       "      AVX2 (34 files):           + SSSE3 SSE4_1 POPCNT SSE4_2 FP16 FMA3 AVX AVX2\n",
166 |       "      AVX512_SKX (8 files):      + SSSE3 SSE4_1 POPCNT SSE4_2 FP16 FMA3 AVX AVX2 AVX_512F AVX512_COMMON AVX512_SKX\n",
167 |       "\n",
168 |       "  C/C++:\n",
169 |       "    Built as dynamic libs?:      YES\n",
170 |       "    C++ standard:                11\n",
171 |       "    C++ Compiler:                C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.33.31629/bin/Hostx64/x64/cl.exe  (ver 19.33.31629.0)\n",
172 |       "    C++ flags (Release):         /DWIN32 /D_WINDOWS /W4 /GR  /D _CRT_SECURE_NO_DEPRECATE /D _CRT_NONSTDC_NO_DEPRECATE /D _SCL_SECURE_NO_WARNINGS /Gy /bigobj /Oi  /fp:precise /FS     /EHa /wd4127 /wd4251 /wd4324 /wd4275 /wd4512 /wd4589 /wd4819  /MD /O2 /Ob2 /DNDEBUG  /Zi\n",
173 |       "    C++ flags (Debug):           /DWIN32 /D_WINDOWS /W4 /GR  /D _CRT_SECURE_NO_DEPRECATE /D _CRT_NONSTDC_NO_DEPRECATE /D _SCL_SECURE_NO_WARNINGS /Gy /bigobj /Oi  /fp:precise /FS     /EHa /wd4127 /wd4251 /wd4324 /wd4275 /wd4512 /wd4589 /wd4819  /MDd /Zi /Ob0 /Od /RTC1 \n",
174 |       "    C Compiler:                  C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.33.31629/bin/Hostx64/x64/cl.exe\n",
175 |       "    C flags (Release):           /DWIN32 /D_WINDOWS /W3  /D _CRT_SECURE_NO_DEPRECATE /D _CRT_NONSTDC_NO_DEPRECATE /D _SCL_SECURE_NO_WARNINGS /Gy /bigobj /Oi  /fp:precise /FS       /MD /O2 /Ob2 /DNDEBUG  /Zi\n",
176 |       "    C flags (Debug):             /DWIN32 /D_WINDOWS /W3  /D _CRT_SECURE_NO_DEPRECATE /D _CRT_NONSTDC_NO_DEPRECATE /D _SCL_SECURE_NO_WARNINGS /Gy /bigobj /Oi  /fp:precise /FS     /MDd /Zi /Ob0 /Od /RTC1 \n",
177 |       "    Linker flags (Release):      /machine:x64  /INCREMENTAL:NO  /debug\n",
178 |       "    Linker flags (Debug):        /machine:x64  /debug /INCREMENTAL \n",
179 |       "    ccache:                      NO\n",
180 |       "    Precompiled headers:         NO\n",
181 |       "    Extra dependencies:          cudart_static.lib nppc.lib nppial.lib nppicc.lib nppidei.lib nppif.lib nppig.lib nppim.lib nppist.lib nppisu.lib nppitc.lib npps.lib cublas.lib cudnn.lib cufft.lib -LIBPATH:\"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8/lib/x64\"\n",
182 |       "    3rdparty dependencies:\n",
183 |       "\n",
184 |       "  OpenCV modules:\n",
185 |       "    To be built:                 aruco barcode bgsegm bioinspired calib3d ccalib core cudaarithm cudabgsegm cudacodec cudafeatures2d cudafilters cudaimgproc cudalegacy cudaobjdetect cudaoptflow cudastereo cudawarping cudev datasets dnn dnn_objdetect dnn_superres dpm face features2d flann fuzzy gapi hfs highgui img_hash imgcodecs imgproc intensity_transform line_descriptor mcc ml objdetect optflow phase_unwrapping photo plot python3 quality rapid reg rgbd saliency shape stereo stitching structured_light superres surface_matching text tracking ts video videoio videostab wechat_qrcode world xfeatures2d ximgproc xobjdetect xphoto\n",
186 |       "    Disabled:                    -\n",
187 |       "    Disabled by dependency:      -\n",
188 |       "    Unavailable:                 alphamat cvv freetype hdf java julia matlab ovis python2 python2 sfm viz\n",
189 |       "    Applications:                tests perf_tests examples apps\n",
190 |       "    Documentation:               NO\n",
191 |       "    Non-free algorithms:         NO\n",
192 |       "\n",
193 |       "  Windows RT support:            NO\n",
194 |       "\n",
195 |       "  GUI: \n",
196 |       "    Win32 UI:                    YES\n",
197 |       "    OpenGL support:              YES (opengl32 glu32)\n",
198 |       "    VTK support:                 NO\n",
199 |       "\n",
200 |       "  Media I/O: \n",
201 |       "    ZLib:                        build (ver 1.2.12)\n",
202 |       "    JPEG:                        build-libjpeg-turbo (ver 2.1.3-62)\n",
203 |       "      SIMD Support Request:      YES\n",
204 |       "      SIMD Support:              NO\n",
205 |       "    WEBP:                        build (ver encoder: 0x020f)\n",
206 |       "    PNG:                         build (ver 1.6.37)\n",
207 |       "    TIFF:                        build (ver 42 - 4.2.0)\n",
208 |       "    JPEG 2000:                   build (ver 2.4.0)\n",
209 |       "    OpenEXR:                     build (ver 2.3.0)\n",
210 |       "    HDR:                         YES\n",
211 |       "    SUNRASTER:                   YES\n",
212 |       "    PXM:                         YES\n",
213 |       "    PFM:                         YES\n",
214 |       "\n",
215 |       "  Video I/O:\n",
216 |       "    DC1394:                      NO\n",
217 |       "    FFMPEG:                      YES (prebuilt binaries)\n",
218 |       "      avcodec:                   YES (58.134.100)\n",
219 |       "      avformat:                  YES (58.76.100)\n",
220 |       "      avutil:                    YES (56.70.100)\n",
221 |       "      swscale:                   YES (5.9.100)\n",
222 |       "      avresample:                YES (4.0.0)\n",
223 |       "    GStreamer:                   NO\n",
224 |       "    DirectShow:                  YES\n",
225 |       "    Media Foundation:            YES\n",
226 |       "      DXVA:                      YES\n",
227 |       "\n",
228 |       "  Parallel framework:            Concurrency\n",
229 |       "\n",
230 |       "  Trace:                         YES (with Intel ITT)\n",
231 |       "\n",
232 |       "  Other third-party libraries:\n",
233 |       "    Intel IPP:                   2020.0.0 Gold [2020.0.0]\n",
234 |       "           at:                   D:/build/opencv/cuda_11_8_cc_all_sym/3rdparty/ippicv/ippicv_win/icv\n",
235 |       "    Intel IPP IW:                sources (2020.0.0)\n",
236 |       "              at:                D:/build/opencv/cuda_11_8_cc_all_sym/3rdparty/ippicv/ippicv_win/iw\n",
237 |       "    Lapack:                      NO\n",
238 |       "    Eigen:                       NO\n",
239 |       "    Custom HAL:                  NO\n",
240 |       "    Protobuf:                    build (3.19.1)\n",
241 |       "\n",
242 |       "  NVIDIA CUDA:                   YES (ver 11.8, CUFFT CUBLAS NVCUVID NVCUVENC FAST_MATH)\n",
243 |       "    NVIDIA GPU arch:             35 37 50 52 60 61 70 75 80 86\n",
244 |       "    NVIDIA PTX archs:            86\n",
245 |       "\n",
246 |       "  cuDNN:                         YES (ver 8.6.0)\n",
247 |       "\n",
248 |       "  OpenCL:                        YES (NVD3D11)\n",
249 |       "    Include path:                D:/repos/opencv/opencv/3rdparty/include/opencl/1.2\n",
250 |       "    Link libraries:              Dynamic load\n",
251 |       "\n",
252 |       "  Python 3:\n",
253 |       "    Interpreter:                 C:/Users/b/mambaforge//python.exe (ver 3.9.13)\n",
254 |       "    Libraries:                   C:/Users/b/mambaforge//libs/python39.lib (ver 3.9.13)\n",
255 |       "    numpy:                       C:/Users/b/mambaforge//lib/site-packages/numpy/core/include (ver 1.23.3)\n",
256 |       "    install path:                C:/Users/b/mambaforge//Lib/site-packages//cv2/python-3.9\n",
257 |       "\n",
258 |       "  Python (for build):            C:/Users/b/mambaforge//python.exe\n",
259 |       "\n",
260 |       "  Java:                          \n",
261 |       "    ant:                         NO\n",
262 |       "    JNI:                         NO\n",
263 |       "    Java wrappers:               NO\n",
264 |       "    Java tests:                  NO\n",
265 |       "\n",
266 |       "  Install to:                    D:/build/opencv/cuda_11_8_cc_all_sym/install\n",
267 |       "-----------------------------------------------------------------\n",
268 |       "\n",
269 |       "\n"
270 |      ]
271 |     }
272 |    ],
273 |    "source": [
274 |     "print(cv2.getBuildInformation())"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "markdown",
279 |    "id": "d2a1fa50",
280 |    "metadata": {},
281 |    "source": [
282 |     "That was easy but what can I do if I'm not as good at guessing what's missing.  Next I will use the same example again to demonstrate how to find out which DLL's python is searching for."
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "markdown",
287 |    "id": "a19c2557",
288 |    "metadata": {},
289 |    "source": [
290 |     "## Fix when path to missing dll's is not known"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "id": "2e87bb2c-2894-4b7b-b6cd-cb1d831d71e4",
296 |    "metadata": {},
297 |    "source": [
298 |     "To find which DLL's are missing we can use [process monitor](https://learn.microsoft.com/en-us/sysinternals/downloads/procmon) which will enable us to see the names of the DLL's which python is trying to load.\n",
299 |     "\n",
300 |     "When you first run process monitor you will be presented with the option to filter the output.  Since we only want to view files which are acessed by the python.exe process, on the dropdown select \"Process Name\" and in the text box type python.exe and click **Add** then select \"Operation\" from the first dropdown and \"CreateFile\" from the second and press **Add**.  Your filter should now resemble the below."
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "markdown",
305 |    "id": "f50e1a35-ed8e-4e1a-b0e9-5082141f1e74",
306 |    "metadata": {},
307 |    "source": [
308 |     "![](imgs/proc_mon_filter.png)"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "markdown",
313 |    "id": "d48f1533-76da-4985-8c1d-beaa17ce7d81",
314 |    "metadata": {},
315 |    "source": [
316 |     "Before continuing it is advisable to close any other python proceses as the output from these will pollute the main window.\n",
317 |     "\n",
318 |     "Now start python and before typing `import cv2`, press the clear button (red trash can) in process monitor to clear any output generated during python's initialization."
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": 1,
324 |    "id": "11dae139",
325 |    "metadata": {},
326 |    "outputs": [
327 |     {
328 |      "ename": "ImportError",
329 |      "evalue": "DLL load failed while importing cv2: The specified module could not be found.",
330 |      "output_type": "error",
331 |      "traceback": [
332 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
333 |       "\u001b[1;31mImportError\u001b[0m                               Traceback (most recent call last)",
334 |       "Cell \u001b[1;32mIn [1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcv2\u001b[39;00m\n",
335 |       "\u001b[1;31mImportError\u001b[0m: DLL load failed while importing cv2: The specified module could not be found."
336 |      ]
337 |     }
338 |    ],
339 |    "source": [
340 |     "import cv2"
341 |    ]
342 |   },
343 |   {
344 |    "cell_type": "markdown",
345 |    "id": "844f62eb-7f59-4fa7-8f99-4e878fc0a667",
346 |    "metadata": {},
347 |    "source": [
348 |     "![title](imgs/proc_mon_failed_search_opencv.png)"
349 |    ]
350 |   },
351 |   {
352 |    "cell_type": "markdown",
353 |    "id": "5519ca3f-a004-4099-8e42-e9ed90c0b0b8",
354 |    "metadata": {},
355 |    "source": [
356 |     "Because I have reset the python DLL search path on running `import cv2` I get the above output in process monitor which shows that we successfully found `cv2.cp3x-win_amd64.pyd` (otherwise we would see the \"ModuleNotFoundError: No module named 'cv2'\" error) however it also shows several attempts have been made to locate `opencv_img_hash_460.dll` and `opencv_world460.dll` without success.\n",
357 |     "\n",
358 |     "As before we add the directory containing these to the python DLL search path."
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": 2,
364 |    "id": "db489147-2ea4-438e-bfeb-8faafabbdc92",
365 |    "metadata": {},
366 |    "outputs": [
367 |     {
368 |      "ename": "ImportError",
369 |      "evalue": "DLL load failed while importing cv2: The specified module could not be found.",
370 |      "output_type": "error",
371 |      "traceback": [
372 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
373 |       "\u001b[1;31mImportError\u001b[0m                               Traceback (most recent call last)",
374 |       "Cell \u001b[1;32mIn [2], line 3\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mos\u001b[39;00m\n\u001b[0;32m      2\u001b[0m os\u001b[38;5;241m.\u001b[39madd_dll_directory(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mD:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mbuild\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mopencv\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mcuda_11_8_cc_all_sym\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mbin\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m----> 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcv2\u001b[39;00m\n",
375 |       "\u001b[1;31mImportError\u001b[0m: DLL load failed while importing cv2: The specified module could not be found."
376 |      ]
377 |     }
378 |    ],
379 |    "source": [
380 |     "import os\n",
381 |     "os.add_dll_directory(\"D:\\\\build\\\\opencv\\\\cuda_11_8_cc_all_sym\\\\bin\")\n",
382 |     "import cv2"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "markdown",
387 |    "id": "f081282d-4a50-4d7e-af6b-fee7cced09df",
388 |    "metadata": {},
389 |    "source": [
390 |     "![title](imgs/proc_mon_failed_search_nvidia.png)"
391 |    ]
392 |   },
393 |   {
394 |    "cell_type": "markdown",
395 |    "id": "90e9fdc7-f159-4c00-b80a-2329b39dbdb7",
396 |    "metadata": {},
397 |    "source": [
398 |     "Now process monitor shows that `opencv_img_hash_460.dll` and `opencv_world460.dll` were located successfully after a few attempts however we are missing `nppc64_11.dll` which is part of the CUDA SDK.  As before if we add the CUDA SDK binary directory to the python DLL search path the call to `import cv2` will be successful.  If however we were still seeing the same error we could simply repeat the process, that is examine the output in process monitor and add the directories containing the missing DLL's to the python DLL search path."
399 |    ]
400 |   },
401 |   {
402 |    "cell_type": "code",
403 |    "execution_count": null,
404 |    "id": "c7388249",
405 |    "metadata": {},
406 |    "outputs": [],
407 |    "source": []
408 |   }
409 |  ],
410 |  "metadata": {
411 |   "kernelspec": {
412 |    "display_name": "Python 3 (ipykernel)",
413 |    "language": "python",
414 |    "name": "python3"
415 |   },
416 |   "language_info": {
417 |    "codemirror_mode": {
418 |     "name": "ipython",
419 |     "version": 3
420 |    },
421 |    "file_extension": ".py",
422 |    "mimetype": "text/x-python",
423 |    "name": "python",
424 |    "nbconvert_exporter": "python",
425 |    "pygments_lexer": "ipython3",
426 |    "version": "3.9.13"
427 |   },
428 |   "toc": {
429 |    "base_numbering": 1,
430 |    "nav_menu": {},
431 |    "number_sections": true,
432 |    "sideBar": true,
433 |    "skip_h1_title": false,
434 |    "title_cell": "Table of Contents",
435 |    "title_sidebar": "Contents",
436 |    "toc_cell": false,
437 |    "toc_position": {},
438 |    "toc_section_display": true,
439 |    "toc_window_display": false
440 |   },
441 |   "varInspector": {
442 |    "cols": {
443 |     "lenName": 16,
444 |     "lenType": 16,
445 |     "lenVar": 40
446 |    },
447 |    "kernels_config": {
448 |     "python": {
449 |      "delete_cmd_postfix": "",
450 |      "delete_cmd_prefix": "del ",
451 |      "library": "var_list.py",
452 |      "varRefreshCmd": "print(var_dic_list())"
453 |     },
454 |     "r": {
455 |      "delete_cmd_postfix": ") ",
456 |      "delete_cmd_prefix": "rm(",
457 |      "library": "var_list.r",
458 |      "varRefreshCmd": "cat(var_dic_list()) "
459 |     }
460 |    },
461 |    "types_to_exclude": [
462 |     "module",
463 |     "function",
464 |     "builtin_function_or_method",
465 |     "instance",
466 |     "_Feature"
467 |    ],
468 |    "window_display": false
469 |   }
470 |  },
471 |  "nbformat": 4,
472 |  "nbformat_minor": 5
473 | }
474 | 


--------------------------------------------------------------------------------
/nbs/opencv410x-video-read.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Comparisson of [Nvidia Video Codec SDK ](https://developer.nvidia.com/nvidia-video-codec-sdk) with CPU and iGPU ([Quick Sync](https://en.wikipedia.org/wiki/Intel_Quick_Sync_Video)) decoding using OpenCV 4.1.x with python"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "To run the notebook download modified binary [here](https://mega.nz/#!SAwCWY7D!Av4-wPjAkm6rlANWfJbp1R8HlahueT56bhJSAhvSN18).\n",
 15 |     "\n",
 16 |     "Notes: \n",
 17 |     "1. Will not work correctly with OpenCV 4.1.0 because:\n",
 18 |     "    - The python bindings do not work correctly, manually modified pyopencv_generated_types.h to enable cv.cudacodec.createVideoReadernextFrame() to work.\n",
 19 |     "    - HENC not enabled for Nvidia decoder.\n",
 20 |     "    - Quick Sync can load software decoder if more than one device is present (multiple GPU's, Nvidia Optimus etc.)\n",
 21 |     "2. cv.cudacodec.createVideoReadernextFrame() returns before the end of the video file\n",
 22 |     "3. CPU decoding supports far more codecs than the GPU, additionally GPU codec support depends on the GPU generation, see NVDEC_VideoDecoder_API_ProgGuide.pdf in the [NVidia Video Codec SDK documentation](https://developer.nvidia.com/nvidia-video-codec-sdk) for details."
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "## Init"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "#export\n",
 39 |     "import os\n",
 40 |     "import time\n",
 41 |     "import numpy as np\n",
 42 |     "from functools import partial\n",
 43 |     "import matplotlib.pyplot as plt\n",
 44 |     "import cv2 as cv\n",
 45 |     "import pandas as pd"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 15,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "#export\n",
 55 |     "# globals\n",
 56 |     "#vid_path = os.environ['OPENCV_TEST_DATA_PATH'] + '/cv/video/768x576.avi'\n",
 57 |     "#vid_path = os.environ['OPENCV_TEST_DATA_PATH'] + '/cv/video/1920x1080.avi'\n",
 58 |     "#vid_path = 'rtsp://192.168.1.2/mediafile.264'\n",
 59 |     "\n",
 60 |     "# test files from http://jell.yfish.us/\n",
 61 |     "vid_path = os.environ['USERPROFILE'] + '/Videos/jellyfish-120-mbps-4k-uhd-h264.mkv'\n",
 62 |     "#vid_path = os.environ['USERPROFILE'] + '/Videos/jellyfish-120-mbps-4k-uhd-hevc-10bit.mkv'\n",
 63 |     "\n",
 64 |     "check_res = False"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 3,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "def CheckFrames(f1,f2,epsilon = -1,rows = -1,cols = -1,channels = -1):\n",
 74 |     "    assert len(f1) > 0 and len(f1) == len(f2), f'f1 length {len(f1)}, f2 length {len(f2)}'    \n",
 75 |     "    epsilon = 0 if epsilon == -1 else epsilon\n",
 76 |     "    rows = f1[0].shape[0] if rows == -1 else rows\n",
 77 |     "    cols = f1[0].shape[1] if cols == -1 else cols\n",
 78 |     "    channels = f1[0].shape[2] if channels == -1 and len(f1[0].shape) == 3  else channels    \n",
 79 |     "    for i in range(0,len(f1)):\n",
 80 |     "        assert np.sum(f1[i][:rows,:cols,:channels] != f2[i][:rows,:cols,:channels]) <= epsilon, f'frame {i} different'"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 4,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "%matplotlib inline"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 5,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "#export\n",
 99 |     "def ProcVid0(cap):\n",
100 |     "    n_frames, start, end  = 0,0,0\n",
101 |     "    if (cap.IsOpen()== False): # replace with catch\n",
102 |     "        print(\"Error opening video stream or file\")\n",
103 |     "        return\n",
104 |     "    frames_available = True\n",
105 |     "    start = time.time()    \n",
106 |     "    while(cap.IsOpen()):\n",
107 |     "        ret,_ = cap.GetFrame()\n",
108 |     "        if(ret):\n",
109 |     "            n_frames += 1 \n",
110 |     "    end = time.time()\n",
111 |     "    return (end - start)*1000/n_frames, n_frames;"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 6,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "#export\n",
121 |     "# host mem not implemented, manually pin memory\n",
122 |     "class PinnedMem(object):\n",
123 |     "    def __init__(self, size, dtype=np.uint8):\n",
124 |     "        self.array = np.empty(size,dtype)\n",
125 |     "        cv.cuda.registerPageLocked(self.array)\n",
126 |     "        self.pinned = True\n",
127 |     "    def __del__(self):\n",
128 |     "        cv.cuda.unregisterPageLocked(self.array)\n",
129 |     "        self.pinned = False\n",
130 |     "    def __repr__(self):\n",
131 |     "        return f'pinned = {self.pinned}'"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 26,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": [
140 |     "#export\n",
141 |     "class VidCap:\n",
142 |     "    def __init__(self,vid_path,max_frames = -1,store_res = False):\n",
143 |     "        self.vid_path = vid_path\n",
144 |     "        self.store_res = store_res\n",
145 |     "        self.res = []\n",
146 |     "        self.frame_num = 0\n",
147 |     "        self.open = False\n",
148 |     "        cap = cv.VideoCapture(vid_path)\n",
149 |     "        assert cap.isOpened(), f\"{vid_path}: cannot be opened!\"\n",
150 |     "        self.num_frames = cap.get(cv.CAP_PROP_FRAME_COUNT)\n",
151 |     "        self.max_frames = self.num_frames if max_frames == -1 else max_frames\n",
152 |     "        ret, frame = cap.read()\n",
153 |     "        cap.release()\n",
154 |     "        self.rows,self.cols,self.channels = frame.shape\n",
155 |     "    \n",
156 |     "    def UpdateState(self,ret): \n",
157 |     "        if (not ret or self.frame_num+1 == self.max_frames): \n",
158 |     "            self.open = False            \n",
159 |     "        if(ret or self.frame_num+1 == self.max_frames):\n",
160 |     "            self.frame_num += 1\n",
161 |     "        \n",
162 |     "    def IsOpen(self): return self.open\n",
163 |     "        \n",
164 |     "class CudaCap(VidCap):\n",
165 |     "    def __init__(self,vid_path,max_frames=-1, store_res=False):\n",
166 |     "        VidCap.__init__(self, vid_path, max_frames, store_res)\n",
167 |     "        # cudacodec always returns 4 channels - check grey video\n",
168 |     "        self.channels = 4\n",
169 |     "        # cudacodec seems to need rows/16\n",
170 |     "        self.rows = (np.ceil(self.rows/16)*16).astype(int)\n",
171 |     "        self.cap = cv.cudacodec.createVideoReader(self.vid_path)\n",
172 |     "        self.open = True\n",
173 |     "        self.frame_device = cv.cuda_GpuMat(self.rows,self.cols,cv.CV_8UC4)\n",
174 |     "        self.frame_host = PinnedMem((self.rows,self.cols,self.channels))\n",
175 |     "            \n",
176 |     "    def GetFrame(self):\n",
177 |     "        if(self.store_res):\n",
178 |     "            ret,_ = self.GetHostFrame()\n",
179 |     "            if(ret):\n",
180 |     "                self.res.append(np.copy(self.frame_host.array))\n",
181 |     "            return ret,self.frame_device\n",
182 |     "        else:\n",
183 |     "            return self.GetDeviceFrame()\n",
184 |     "            \n",
185 |     "    def GetDeviceFrame(self):\n",
186 |     "        ret,_ = self.cap.nextFrame(self.frame_device)\n",
187 |     "        self.UpdateState(ret)\n",
188 |     "        return ret,self.frame_device\n",
189 |     "        \n",
190 |     "    def GetHostFrame(self):\n",
191 |     "        ret,_ = self.GetDeviceFrame()\n",
192 |     "        if(ret):\n",
193 |     "            self.frame_device.download(self.frame_host.array)\n",
194 |     "        return ret,self.frame_host.array\n",
195 |     "    \n",
196 |     "class CudaCapNpa(CudaCap):\n",
197 |     "    def __init__(self,vid_path,max_frames=-1,store_res=False):\n",
198 |     "        CudaCap.__init__(self, vid_path, max_frames, store_res)\n",
199 |     "            \n",
200 |     "    def GetDeviceFrame(self):        \n",
201 |     "        ret,self.frame_device = self.cap.nextFrame()\n",
202 |     "        self.UpdateState(ret)\n",
203 |     "        return ret,self.frame_device\n",
204 |     "    \n",
205 |     "class CpuCap(VidCap):\n",
206 |     "    def __init__(self,vid_path,max_frames=-1,store_res=False,backend=cv.CAP_ANY):\n",
207 |     "        VidCap.__init__(self, vid_path, max_frames, store_res)      \n",
208 |     "\n",
209 |     "        self.cap = cv.VideoCapture(self.vid_path,backend)\n",
210 |     "        assert self.cap.isOpened(), f\"{vid_path}: cannot be opened for backend: {backend}!\"\n",
211 |     "        #if self.cap.isOpened():\n",
212 |     "        self.open = True\n",
213 |     "        self.frame = np.empty((self.rows,self.cols,self.channels),np.uint8)\n",
214 |     "            \n",
215 |     "    def GetFrame(self):\n",
216 |     "        ret,_ = self.cap.read(self.frame)\n",
217 |     "        self.UpdateState(ret)\n",
218 |     "        if (ret):\n",
219 |     "            if(self.store_res):\n",
220 |     "                self.res.append(np.copy(self.frame))\n",
221 |     "        return ret,self.frame\n",
222 |     "                \n",
223 |     "    def __del__(self):\n",
224 |     "        self.cap.release()\n",
225 |     "        \n",
226 |     "class CpuCapNpa(CpuCap):\n",
227 |     "    def __init__(self,vid_path,max_frames=-1,store_res=False,backend=cv.CAP_ANY):\n",
228 |     "        CpuCap.__init__(self, vid_path, max_frames, store_res,backend)      \n",
229 |     "            \n",
230 |     "    def GetFrame(self):\n",
231 |     "        ret,self.frame = self.cap.read()\n",
232 |     "        self.UpdateState(ret)\n",
233 |     "        if (ret):\n",
234 |     "            if(self.store_res):\n",
235 |     "                self.res.append(np.copy(self.frame))\n",
236 |     "        return ret,self.frame\n",
237 |     "    "
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "markdown",
242 |    "metadata": {},
243 |    "source": [
244 |     "<a id=\"cpu\"></a>"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "markdown",
249 |    "metadata": {},
250 |    "source": [
251 |     "## CPU"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": 8,
257 |    "metadata": {},
258 |    "outputs": [
259 |     {
260 |      "name": "stdout",
261 |      "output_type": "stream",
262 |      "text": [
263 |       "CPU 0 (no pre alloc): 900 frames, 39.39 ms/frame\n"
264 |      ]
265 |     }
266 |    ],
267 |    "source": [
268 |     "#export\n",
269 |     "cpu_cap_npa = CpuCapNpa(vid_path,-1,check_res)\n",
270 |     "cpu_time_0,n_frames = ProcVid0(cpu_cap_npa)\n",
271 |     "print(f'CPU 0 (no pre alloc): {n_frames} frames, {cpu_time_0:.2f} ms/frame')"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": 9,
277 |    "metadata": {},
278 |    "outputs": [
279 |     {
280 |      "name": "stdout",
281 |      "output_type": "stream",
282 |      "text": [
283 |       "CPU 1: 900 frames, 23.99 ms/frame\n"
284 |      ]
285 |     }
286 |    ],
287 |    "source": [
288 |     "#export\n",
289 |     "cpu_cap = CpuCap(vid_path,-1,check_res)\n",
290 |     "cpu_time_1,n_frames = ProcVid0(cpu_cap)\n",
291 |     "print(f'CPU 1: {n_frames} frames, {cpu_time_1:.2f} ms/frame')"
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "code",
296 |    "execution_count": 10,
297 |    "metadata": {},
298 |    "outputs": [],
299 |    "source": [
300 |     "if(check_res):\n",
301 |     "    CheckFrames(cpu_cap.res,cpu_cap_npa.res)"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "markdown",
306 |    "metadata": {},
307 |    "source": [
308 |     "<a id=\"cpu_quicksync\"></a>"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "markdown",
313 |    "metadata": {},
314 |    "source": [
315 |     "## CPU - Quicksync"
316 |    ]
317 |   },
318 |   {
319 |    "cell_type": "markdown",
320 |    "metadata": {},
321 |    "source": [
322 |     "If this implementation is slower than the default, software decoding is probably taking place.\n",
323 |     "\n",
324 |     "To confirm hardware decoding in windows 10 check Video Decode window in the GPU pane of the task manager for activity as below. \n",
325 |     "\n",
326 |     "To fix check drivers, OpenCv version etc. "
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "markdown",
331 |    "metadata": {},
332 |    "source": [
333 |     "![title](imgs/quicksync.PNG)"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": 35,
339 |    "metadata": {},
340 |    "outputs": [],
341 |    "source": [
342 |     "#export\n",
343 |     "vid_path_h264 = os.environ['USERPROFILE'] + '/Videos/jellyfish-120-mbps-4k-uhd-h264.h264'"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "code",
348 |    "execution_count": 31,
349 |    "metadata": {},
350 |    "outputs": [
351 |     {
352 |      "name": "stdout",
353 |      "output_type": "stream",
354 |      "text": [
355 |       "CPU Quick Sync (no pre alloc): 900 frames, 14.73 ms/frame\n"
356 |      ]
357 |     }
358 |    ],
359 |    "source": [
360 |     "#export\n",
361 |     "cpu_cap_mfx_npa = CpuCapNpa(vid_path_h264,-1,check_res,cv.CAP_INTEL_MFX)\n",
362 |     "cpu_time_mfx_0,n_frames = ProcVid0(cpu_cap_mfx_npa)\n",
363 |     "print(f'CPU Quick Sync (no pre alloc): {n_frames} frames, {cpu_time_mfx_0:.2f} ms/frame')"
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "code",
368 |    "execution_count": 33,
369 |    "metadata": {},
370 |    "outputs": [
371 |     {
372 |      "name": "stdout",
373 |      "output_type": "stream",
374 |      "text": [
375 |       "CPU Quick Sync (no pre alloc): 900 frames, 9.27 ms/frame\n"
376 |      ]
377 |     }
378 |    ],
379 |    "source": [
380 |     "#export\n",
381 |     "cpu_cap_mfx = CpuCap(vid_path_h264,-1,check_res,cv.CAP_INTEL_MFX)\n",
382 |     "cpu_time_mfx_1,n_frames = ProcVid0(cpu_cap_mfx)\n",
383 |     "print(f'CPU Quick Sync (no pre alloc): {n_frames} frames, {cpu_time_mfx_1:.2f} ms/frame')"
384 |    ]
385 |   },
386 |   {
387 |    "cell_type": "markdown",
388 |    "metadata": {},
389 |    "source": [
390 |     "<a id=\"gpu\"></a>"
391 |    ]
392 |   },
393 |   {
394 |    "cell_type": "markdown",
395 |    "metadata": {},
396 |    "source": [
397 |     "## GPU"
398 |    ]
399 |   },
400 |   {
401 |    "cell_type": "markdown",
402 |    "metadata": {},
403 |    "source": [
404 |     "Not all GPU's have a hardware decoder, e.g. anything with GM108, see\n",
405 |     "[(1)](https://devtalk.nvidia.com/default/topic/1024934/video-codec-and-optical-flow-sdk/cuvidcreatedecoder-returns-error-cuda_error_no_device/) and \n",
406 |     "[(2)](https://developer.nvidia.com/video-encode-decode-gpu-support-matrix) for discussions."
407 |    ]
408 |   },
409 |   {
410 |    "cell_type": "code",
411 |    "execution_count": 16,
412 |    "metadata": {},
413 |    "outputs": [
414 |     {
415 |      "name": "stdout",
416 |      "output_type": "stream",
417 |      "text": [
418 |       "GPU 0 (no pre alloc): 899 frames, 14.34 ms/frame\n"
419 |      ]
420 |     }
421 |    ],
422 |    "source": [
423 |     "#export\n",
424 |     "gpu_cap_npa = CudaCapNpa(vid_path,-1,check_res)\n",
425 |     "gpu_time_0,n_frames = ProcVid0(gpu_cap_npa)\n",
426 |     "print(f'GPU 0 (no pre alloc): {n_frames} frames, {gpu_time_0:.2f} ms/frame')"
427 |    ]
428 |   },
429 |   {
430 |    "cell_type": "code",
431 |    "execution_count": 17,
432 |    "metadata": {},
433 |    "outputs": [
434 |     {
435 |      "name": "stdout",
436 |      "output_type": "stream",
437 |      "text": [
438 |       "GPU 1: 899 frames, 11.74 ms/frame\n"
439 |      ]
440 |     }
441 |    ],
442 |    "source": [
443 |     "#export\n",
444 |     "gpu_cap = CudaCap(vid_path,-1,check_res)\n",
445 |     "gpu_time_1,n_frames = ProcVid0(gpu_cap)\n",
446 |     "print(f'GPU 1: {n_frames} frames, {gpu_time_1:.2f} ms/frame')"
447 |    ]
448 |   },
449 |   {
450 |    "cell_type": "code",
451 |    "execution_count": 18,
452 |    "metadata": {},
453 |    "outputs": [],
454 |    "source": [
455 |     "if(check_res):\n",
456 |     "    n_frames = min(len(gpu_cap.res),len(gpu_cap_npa.res))\n",
457 |     "    CheckFrames(gpu_cap.res[:n_frames],gpu_cap_npa.res[:n_frames])"
458 |    ]
459 |   },
460 |   {
461 |    "cell_type": "markdown",
462 |    "metadata": {},
463 |    "source": [
464 |     "<a id='opencv_decoding_performance_comparisson'></a>"
465 |    ]
466 |   },
467 |   {
468 |    "cell_type": "markdown",
469 |    "metadata": {},
470 |    "source": [
471 |     "## Results"
472 |    ]
473 |   },
474 |   {
475 |    "cell_type": "code",
476 |    "execution_count": 3,
477 |    "metadata": {},
478 |    "outputs": [
479 |     {
480 |      "data": {
481 |       "text/html": [
482 |        "<style  type=\"text/css\" >\n",
483 |        "</style><table id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0\" ><thead>    <tr>        <th class=\"col_heading level0 col0\" >CPU/GPU</th>        <th class=\"col_heading level0 col1\" >Frame Proc Time (ms)</th>    </tr></thead><tbody>\n",
484 |        "                <tr>\n",
485 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row0_col0\" class=\"data row0 col0\" >GTX 1060 (pre-alloc)</td>\n",
486 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row0_col1\" class=\"data row0 col1\" >7.85</td>\n",
487 |        "            </tr>\n",
488 |        "            <tr>\n",
489 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row1_col0\" class=\"data row1 col0\" >HD Graphics 530 (pre-alloc)</td>\n",
490 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row1_col1\" class=\"data row1 col1\" >9.27</td>\n",
491 |        "            </tr>\n",
492 |        "            <tr>\n",
493 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row2_col0\" class=\"data row2 col0\" >GTX 980M (pre-alloc)</td>\n",
494 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row2_col1\" class=\"data row2 col1\" >11.74</td>\n",
495 |        "            </tr>\n",
496 |        "            <tr>\n",
497 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row3_col0\" class=\"data row3 col0\" >HD Graphics 4400 (pre-alloc)</td>\n",
498 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row3_col1\" class=\"data row3 col1\" >13.97</td>\n",
499 |        "            </tr>\n",
500 |        "            <tr>\n",
501 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row4_col0\" class=\"data row4 col0\" >GTX 980M</td>\n",
502 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row4_col1\" class=\"data row4 col1\" >14.34</td>\n",
503 |        "            </tr>\n",
504 |        "            <tr>\n",
505 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row5_col0\" class=\"data row5 col0\" >HD Graphics 530</td>\n",
506 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row5_col1\" class=\"data row5 col1\" >14.73</td>\n",
507 |        "            </tr>\n",
508 |        "            <tr>\n",
509 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row6_col0\" class=\"data row6 col0\" >HD Graphics 5500 HDD (pre-alloc)</td>\n",
510 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row6_col1\" class=\"data row6 col1\" >18.03</td>\n",
511 |        "            </tr>\n",
512 |        "            <tr>\n",
513 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row7_col0\" class=\"data row7 col0\" >i5-6500 (pre-alloc)</td>\n",
514 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row7_col1\" class=\"data row7 col1\" >22.01</td>\n",
515 |        "            </tr>\n",
516 |        "            <tr>\n",
517 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row8_col0\" class=\"data row8 col0\" >HD Graphics 5500 HDD</td>\n",
518 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row8_col1\" class=\"data row8 col1\" >23.74</td>\n",
519 |        "            </tr>\n",
520 |        "            <tr>\n",
521 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row9_col0\" class=\"data row9 col0\" >HD Graphics 4400</td>\n",
522 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row9_col1\" class=\"data row9 col1\" >23.88</td>\n",
523 |        "            </tr>\n",
524 |        "            <tr>\n",
525 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row10_col0\" class=\"data row10 col0\" >i7-6700HQ (pre-alloc)</td>\n",
526 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row10_col1\" class=\"data row10 col1\" >23.99</td>\n",
527 |        "            </tr>\n",
528 |        "            <tr>\n",
529 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row11_col0\" class=\"data row11 col0\" >i7-6700HQ</td>\n",
530 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row11_col1\" class=\"data row11 col1\" >39.39</td>\n",
531 |        "            </tr>\n",
532 |        "            <tr>\n",
533 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row12_col0\" class=\"data row12 col0\" >GT 730M (pre-alloc)</td>\n",
534 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row12_col1\" class=\"data row12 col1\" >40.64</td>\n",
535 |        "            </tr>\n",
536 |        "            <tr>\n",
537 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row13_col0\" class=\"data row13 col0\" >GT 730M</td>\n",
538 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row13_col1\" class=\"data row13 col1\" >40.8</td>\n",
539 |        "            </tr>\n",
540 |        "            <tr>\n",
541 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row14_col0\" class=\"data row14 col0\" >i5-4210U (pre-alloc)</td>\n",
542 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row14_col1\" class=\"data row14 col1\" >47.72</td>\n",
543 |        "            </tr>\n",
544 |        "            <tr>\n",
545 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row15_col0\" class=\"data row15 col0\" >i5-4210U</td>\n",
546 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row15_col1\" class=\"data row15 col1\" >50.65</td>\n",
547 |        "            </tr>\n",
548 |        "            <tr>\n",
549 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row16_col0\" class=\"data row16 col0\" >i5-5200U HDD (pre-alloc)</td>\n",
550 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row16_col1\" class=\"data row16 col1\" >51.06</td>\n",
551 |        "            </tr>\n",
552 |        "            <tr>\n",
553 |        "                                <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row17_col0\" class=\"data row17 col0\" >i5-5200U HDD</td>\n",
554 |        "                        <td id=\"T_5fd9155c_97e7_11e9_93ad_6c40085859e0row17_col1\" class=\"data row17 col1\" >58.64</td>\n",
555 |        "            </tr>\n",
556 |        "    </tbody></table>"
557 |       ],
558 |       "text/plain": [
559 |        "<pandas.io.formats.style.Styler at 0x1bb03f20e10>"
560 |       ]
561 |      },
562 |      "execution_count": 3,
563 |      "metadata": {},
564 |      "output_type": "execute_result"
565 |     }
566 |    ],
567 |    "source": [
568 |     "results = [['i7-6700HQ', 39.39],['i7-6700HQ (pre-alloc)', 23.99],['i5-6500 (pre-alloc)',22.01],['i5-5200U HDD',58.64],\n",
569 |     "           ['i5-5200U HDD (pre-alloc)',51.06],['HD Graphics 530',14.73],['HD Graphics 530 (pre-alloc)',9.27],\n",
570 |     "           ['HD Graphics 5500 HDD',23.74],['HD Graphics 5500 HDD (pre-alloc)',18.03],['GTX 980M',14.34],\n",
571 |     "          ['GTX 980M (pre-alloc)',11.74],['GTX 1060 (pre-alloc)',7.85],['i5-4210U',50.65],['i5-4210U (pre-alloc)',47.72],\n",
572 |     "           ['GT 730M',40.80],['GT 730M (pre-alloc)',40.64],['HD Graphics 4400',23.88],['HD Graphics 4400 (pre-alloc)',13.97]]\n",
573 |     "df = pd.DataFrame(results,columns=['CPU/GPU','Frame Proc Time (ms)']).sort_values('Frame Proc Time (ms)').round(2)\n",
574 |     "df.style.hide_index()"
575 |    ]
576 |   },
577 |   {
578 |    "cell_type": "markdown",
579 |    "metadata": {},
580 |    "source": [
581 |     "# Export"
582 |    ]
583 |   },
584 |   {
585 |    "cell_type": "code",
586 |    "execution_count": 75,
587 |    "metadata": {},
588 |    "outputs": [
589 |     {
590 |      "name": "stdout",
591 |      "output_type": "stream",
592 |      "text": [
593 |       "Converted opencv410x-video-read.ipynb to exp\\nb_opencv410x-video-read.py\n"
594 |      ]
595 |     }
596 |    ],
597 |    "source": [
598 |     "# taken from https://github.com/fastai/fastai_docs/blob/master/dev_nb/notebook2script.py\n",
599 |     "!python notebook2script.py opencv410x-video-read.ipynb"
600 |    ]
601 |   },
602 |   {
603 |    "cell_type": "code",
604 |    "execution_count": 76,
605 |    "metadata": {},
606 |    "outputs": [
607 |     {
608 |      "name": "stdout",
609 |      "output_type": "stream",
610 |      "text": [
611 |       "CPU 0 (no pre alloc): 900 frames, 37.96 ms/frame\n",
612 |       "CPU 1: 900 frames, 21.88 ms/frame\n",
613 |       "CPU Quick Sync (no pre alloc): 900 frames, 15.31 ms/frame\n",
614 |       "CPU Quick Sync (no pre alloc): 900 frames, 9.97 ms/frame\n",
615 |       "GPU 0 (no pre alloc): 899 frames, 13.84 ms/frame\n",
616 |       "GPU 1: 899 frames, 11.72 ms/frame\n",
617 |       "[ INFO:0] global D:\\SSDBackup\\Dev\\Repos\\opencv_fork_1\\modules\\videoio\\src\\videoio_registry.cpp (187) cv::`anonymous-namespace'::VideoBackendRegistry::VideoBackendRegistry VIDEOIO: Enabled backends(7, sorted by priority): FFMPEG(1000); GSTREAMER(990); INTEL_MFX(980); MSMF(970); DSHOW(960); CV_IMAGES(950); CV_MJPEG(940)\n"
618 |      ]
619 |     }
620 |    ],
621 |    "source": [
622 |     "! python exp/nb_opencv410x-video-read.py"
623 |    ]
624 |   },
625 |   {
626 |    "cell_type": "code",
627 |    "execution_count": null,
628 |    "metadata": {},
629 |    "outputs": [],
630 |    "source": []
631 |   }
632 |  ],
633 |  "metadata": {
634 |   "kernelspec": {
635 |    "display_name": "Python 3",
636 |    "language": "python",
637 |    "name": "python3"
638 |   },
639 |   "language_info": {
640 |    "codemirror_mode": {
641 |     "name": "ipython",
642 |     "version": 3
643 |    },
644 |    "file_extension": ".py",
645 |    "mimetype": "text/x-python",
646 |    "name": "python",
647 |    "nbconvert_exporter": "python",
648 |    "pygments_lexer": "ipython3",
649 |    "version": "3.7.3"
650 |   }
651 |  },
652 |  "nbformat": 4,
653 |  "nbformat_minor": 2
654 | }
655 | 


--------------------------------------------------------------------------------
/nbs/opencv450-video-read.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Comparisson of [Nvidia Video Codec SDK ](https://developer.nvidia.com/nvidia-video-codec-sdk) with CPU and iGPU ([Quick Sync](https://en.wikipedia.org/wiki/Intel_Quick_Sync_Video)) decoding using OpenCV 4.5.0 with python - CUDA 10.2, Ffmpeg 4.1.3"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "To run the h264_cuvid test in the notebook you will need to compile OpenCV against ffmpeg libs built with --enable-cuda --enable-cuvid.\n",
 15 |     "\n",
 16 |     "Notes: \n",
 17 |     "1. Whilst using VideoCapture with h264_cuvid decoding is only as fast as CPU decoding, it does offload the decoding leaving more CPU resources available.  Additionally this option currently supports far more codecs than cv.cudacodec.VideoReader.\n",
 18 |     "2. GPU codec support depends on the GPU generation, see NVDEC_VideoDecoder_API_ProgGuide.pdf in the [NVidia Video Codec SDK documentation](https://developer.nvidia.com/nvidia-video-codec-sdk) for details.\n",
 19 |     "3. Unfortunately the current QuickSync implementation does not support container formats or RTSP streaming."
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "## Init"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 1,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "#export\n",
 36 |     "import os\n",
 37 |     "import time\n",
 38 |     "import numpy as np\n",
 39 |     "from functools import partial\n",
 40 |     "import matplotlib.pyplot as plt\n",
 41 |     "import cv2 as cv\n",
 42 |     "import pandas as pd"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 57,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "#export\n",
 52 |     "# globals\n",
 53 |     "#vid_path = os.environ['OPENCV_TEST_DATA_PATH'] + '/cv/video/768x576.avi'\n",
 54 |     "#vid_path = os.environ['OPENCV_TEST_DATA_PATH'] + '/cv/video/1920x1080.avi'\n",
 55 |     "#vid_path = 'rtsp://127.0.0.1/mediafile.264'\n",
 56 |     "#vid_path=\"rtsp://127.0.0.1/jellyfish-120-mbps-4k-uhd-h264.264\";\n",
 57 |     "#vid_path=\"rtsp://127.0.0.1/big_buck_bunny.264\";\n",
 58 |     "# test files from http://jell.yfish.us/\n",
 59 |     "vid_path = os.environ['USERPROFILE'] + '/Videos/jellyfish-120-mbps-4k-uhd-h264.mkv'\n",
 60 |     "#vid_path = os.environ['USERPROFILE'] + '/Videos/jellyfish-120-mbps-4k-uhd-hevc-10bit.mkv'\n",
 61 |     "\n",
 62 |     "check_res = False"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 3,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "def CheckFrames(f1,f2,epsilon = -1,rows = -1,cols = -1,channels = -1):\n",
 72 |     "    assert len(f1) > 0 and len(f1) == len(f2), f'f1 length {len(f1)}, f2 length {len(f2)}'    \n",
 73 |     "    epsilon = 0 if epsilon == -1 else epsilon\n",
 74 |     "    rows = f1[0].shape[0] if rows == -1 else rows\n",
 75 |     "    cols = f1[0].shape[1] if cols == -1 else cols\n",
 76 |     "    channels = f1[0].shape[2] if channels == -1 and len(f1[0].shape) == 3  else channels    \n",
 77 |     "    for i in range(0,len(f1)):\n",
 78 |     "        assert np.sum(f1[i][:rows,:cols,:channels] != f2[i][:rows,:cols,:channels]) <= epsilon, f'frame {i} different'"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 4,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "%matplotlib inline"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 5,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "#export\n",
 97 |     "def ProcVid0(cap):\n",
 98 |     "    n_frames, start, end  = 0,0,0\n",
 99 |     "    if (cap.IsOpen()== False): # replace with catch\n",
100 |     "        print(\"Error opening video stream or file\")\n",
101 |     "        return\n",
102 |     "    frames_available = True\n",
103 |     "    start = time.time()    \n",
104 |     "    while(cap.IsOpen()):\n",
105 |     "        ret,_ = cap.GetFrame()\n",
106 |     "        if(ret):\n",
107 |     "            n_frames += 1 \n",
108 |     "    end = time.time()\n",
109 |     "    return (end - start)*1000/n_frames, n_frames;"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 6,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "#export\n",
119 |     "# host mem not implemented, manually pin memory\n",
120 |     "class PinnedMem(object):\n",
121 |     "    def __init__(self, size, dtype=np.uint8):\n",
122 |     "        self.array = np.empty(size,dtype)\n",
123 |     "        cv.cuda.registerPageLocked(self.array)\n",
124 |     "        self.pinned = True\n",
125 |     "    def __del__(self):\n",
126 |     "        cv.cuda.unregisterPageLocked(self.array)\n",
127 |     "        self.pinned = False\n",
128 |     "    def __repr__(self):\n",
129 |     "        return f'pinned = {self.pinned}'"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 7,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "#export\n",
139 |     "class VidCap:\n",
140 |     "    def __init__(self,vid_path,max_frames = -1,store_res = False,file_to_write=None):\n",
141 |     "        self.vid_path = vid_path\n",
142 |     "        self.store_res = store_res\n",
143 |     "        self.res = []\n",
144 |     "        self.frame_num = 0\n",
145 |     "        self.open = False\n",
146 |     "        cap = cv.VideoCapture(vid_path)\n",
147 |     "        assert cap.isOpened(), f\"{vid_path}: cannot be opened!\"\n",
148 |     "        self.num_frames = cap.get(cv.CAP_PROP_FRAME_COUNT)\n",
149 |     "        self.max_frames = self.num_frames if max_frames == -1 else max_frames\n",
150 |     "        ret, frame = cap.read()\n",
151 |     "        cap.release()\n",
152 |     "        self.rows,self.cols,self.channels = frame.shape\n",
153 |     "        self.write_video = False\n",
154 |     "        if(file_to_write):\n",
155 |     "            self.write_video = True\n",
156 |     "            #fourcc = cv.VideoWriter_fourcc(*'H264')\n",
157 |     "            fourcc = cv.VideoWriter_fourcc('M', '4', 'S', '2')\n",
158 |     "            self.out = cv.VideoWriter(file_to_write,cv.CAP_FFMPEG,fourcc,25,(self.cols,self.rows))\n",
159 |     "    \n",
160 |     "    def UpdateState(self,ret): \n",
161 |     "        if (not ret or self.frame_num+1 == self.max_frames): \n",
162 |     "            self.open = False            \n",
163 |     "        if(ret or self.frame_num+1 == self.max_frames):\n",
164 |     "            self.frame_num += 1\n",
165 |     "        \n",
166 |     "    def IsOpen(self): return self.open\n",
167 |     "    \n",
168 |     "    def __del__(self):\n",
169 |     "        if(self.write_video):\n",
170 |     "            self.out.release()\n",
171 |     "        \n",
172 |     "    #def WriteFrame(self,frame):\n",
173 |     "    #    if(self.file_to_write):\n",
174 |     "    #        self.out.write(frame)\n",
175 |     "        \n",
176 |     "class CudaCap(VidCap):\n",
177 |     "    def __init__(self,vid_path,max_frames=-1, store_res=False, file_to_write=None):\n",
178 |     "        VidCap.__init__(self, vid_path, max_frames, store_res, file_to_write)\n",
179 |     "        # cudacodec always returns 4 channels - check grey video\n",
180 |     "        self.channels = 4\n",
181 |     "        # cudacodec seems to need rows/16\n",
182 |     "        self.rows = (np.ceil(self.rows/16)*16).astype(int)\n",
183 |     "        self.cap = cv.cudacodec.createVideoReader(self.vid_path)\n",
184 |     "        self.open = True\n",
185 |     "        self.frame_device = cv.cuda_GpuMat(self.rows,self.cols,cv.CV_8UC4)\n",
186 |     "        self.frame_host = PinnedMem((self.rows,self.cols,self.channels))\n",
187 |     "        self.stream = cv.cuda_Stream()\n",
188 |     "            \n",
189 |     "    def GetFrame(self):\n",
190 |     "        if(self.store_res or self.write_video):\n",
191 |     "            ret,_ = self.GetHostFrame()\n",
192 |     "            if(self.write_video):\n",
193 |     "                self.out.write(self.frame_host.array[:,:,:3])\n",
194 |     "            if(self.store_res and ret):\n",
195 |     "                self.res.append(np.copy(self.frame_host.array))\n",
196 |     "            return ret,self.frame_device\n",
197 |     "        else:\n",
198 |     "            return self.GetDeviceFrame()\n",
199 |     "            \n",
200 |     "    def GetDeviceFrame(self):\n",
201 |     "        ret,_ = self.cap.nextFrame(self.frame_device,self.stream)\n",
202 |     "        self.UpdateState(ret)\n",
203 |     "        return ret,self.frame_device\n",
204 |     "        \n",
205 |     "    def GetHostFrame(self):\n",
206 |     "        ret,_ = self.GetDeviceFrame()\n",
207 |     "        if(ret):\n",
208 |     "            self.frame_device.download(self.frame_host.array)\n",
209 |     "        return ret,self.frame_host.array\n",
210 |     "        \n",
211 |     "    \n",
212 |     "class CudaCapNpa(CudaCap):\n",
213 |     "    def __init__(self,vid_path,max_frames=-1,store_res=False, file_to_write=None):\n",
214 |     "        CudaCap.__init__(self, vid_path, max_frames, store_res, file_to_write)\n",
215 |     "            \n",
216 |     "    def GetDeviceFrame(self):        \n",
217 |     "        ret,self.frame_device = self.cap.nextFrame()\n",
218 |     "        self.UpdateState(ret)\n",
219 |     "        return ret,self.frame_device\n",
220 |     "    \n",
221 |     "class CpuCap(VidCap):\n",
222 |     "    def __init__(self,vid_path,max_frames=-1,store_res=False,backend=cv.CAP_ANY):\n",
223 |     "        VidCap.__init__(self, vid_path, max_frames, store_res)      \n",
224 |     "\n",
225 |     "        self.cap = cv.VideoCapture(self.vid_path,backend)\n",
226 |     "        assert self.cap.isOpened(), f\"{vid_path}: cannot be opened for backend: {backend}!\"\n",
227 |     "        #if self.cap.isOpened():\n",
228 |     "        self.open = True\n",
229 |     "        self.frame = np.empty((self.rows,self.cols,self.channels),np.uint8)\n",
230 |     "            \n",
231 |     "    def GetFrame(self):\n",
232 |     "        ret,_ = self.cap.read(self.frame)\n",
233 |     "        self.UpdateState(ret)\n",
234 |     "        if (ret):\n",
235 |     "            if(self.store_res):\n",
236 |     "                self.res.append(np.copy(self.frame))\n",
237 |     "        return ret,self.frame\n",
238 |     "                \n",
239 |     "    def __del__(self):\n",
240 |     "        self.cap.release()\n",
241 |     "        \n",
242 |     "class CpuCapNpa(CpuCap):\n",
243 |     "    def __init__(self,vid_path,max_frames=-1,store_res=False,backend=cv.CAP_ANY):\n",
244 |     "        CpuCap.__init__(self, vid_path, max_frames, store_res,backend)      \n",
245 |     "            \n",
246 |     "    def GetFrame(self):\n",
247 |     "        ret,self.frame = self.cap.read()\n",
248 |     "        self.UpdateState(ret)\n",
249 |     "        if (ret):\n",
250 |     "            if(self.store_res):\n",
251 |     "                self.res.append(np.copy(self.frame))\n",
252 |     "        return ret,self.frame\n",
253 |     "    "
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "markdown",
258 |    "metadata": {},
259 |    "source": [
260 |     "<a id=\"cpu\"></a>"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "markdown",
265 |    "metadata": {},
266 |    "source": [
267 |     "## CPU"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": 71,
273 |    "metadata": {},
274 |    "outputs": [],
275 |    "source": [
276 |     "os.environ[\"OPENCV_FFMPEG_CAPTURE_OPTIONS\"] = \"\""
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "code",
281 |    "execution_count": 72,
282 |    "metadata": {},
283 |    "outputs": [
284 |     {
285 |      "name": "stdout",
286 |      "output_type": "stream",
287 |      "text": [
288 |       "CPU 0 (no pre alloc): 900 frames, 19.07 ms/frame\n"
289 |      ]
290 |     }
291 |    ],
292 |    "source": [
293 |     "#export\n",
294 |     "cpu_cap_npa = CpuCapNpa(vid_path,-1,check_res)\n",
295 |     "cpu_time_0,n_frames = ProcVid0(cpu_cap_npa)\n",
296 |     "print(f'CPU 0 (no pre alloc): {n_frames} frames, {cpu_time_0:.2f} ms/frame')"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": 73,
302 |    "metadata": {},
303 |    "outputs": [
304 |     {
305 |      "name": "stdout",
306 |      "output_type": "stream",
307 |      "text": [
308 |       "CPU 1: 900 frames, 12.09 ms/frame\n"
309 |      ]
310 |     }
311 |    ],
312 |    "source": [
313 |     "#export\n",
314 |     "cpu_cap = CpuCap(vid_path,-1,check_res)\n",
315 |     "cpu_time_1,n_frames = ProcVid0(cpu_cap)\n",
316 |     "print(f'CPU 1: {n_frames} frames, {cpu_time_1:.2f} ms/frame')"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": 74,
322 |    "metadata": {},
323 |    "outputs": [],
324 |    "source": [
325 |     "if(check_res):\n",
326 |     "    CheckFrames(cpu_cap.res,cpu_cap_npa.res)"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "markdown",
331 |    "metadata": {},
332 |    "source": [
333 |     "<a id=\"cpu_quicksync\"></a>"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "markdown",
338 |    "metadata": {},
339 |    "source": [
340 |     "## CPU - Quicksync"
341 |    ]
342 |   },
343 |   {
344 |    "cell_type": "markdown",
345 |    "metadata": {},
346 |    "source": [
347 |     "If this implementation is slower than the default, software decoding is probably taking place.\n",
348 |     "\n",
349 |     "To confirm hardware decoding in windows 10 check Video Decode window in the GPU pane of the task manager for activity as below. \n",
350 |     "\n",
351 |     "To fix check drivers, OpenCv version etc. "
352 |    ]
353 |   },
354 |   {
355 |    "cell_type": "markdown",
356 |    "metadata": {},
357 |    "source": [
358 |     "![title](imgs/quicksync.PNG)"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": 14,
364 |    "metadata": {},
365 |    "outputs": [],
366 |    "source": [
367 |     "#export\n",
368 |     "vid_path_h264 = os.environ['USERPROFILE'] + '/Videos/jellyfish-120-mbps-4k-uhd-h264.h264'"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "code",
373 |    "execution_count": 15,
374 |    "metadata": {},
375 |    "outputs": [
376 |     {
377 |      "name": "stdout",
378 |      "output_type": "stream",
379 |      "text": [
380 |       "CPU Quick Sync (no pre alloc): 900 frames, 29.84 ms/frame\n"
381 |      ]
382 |     }
383 |    ],
384 |    "source": [
385 |     "#export\n",
386 |     "cpu_cap_mfx_npa = CpuCapNpa(vid_path_h264,-1,check_res,cv.CAP_INTEL_MFX)\n",
387 |     "cpu_time_mfx_0,n_frames = ProcVid0(cpu_cap_mfx_npa)\n",
388 |     "print(f'CPU Quick Sync (no pre alloc): {n_frames} frames, {cpu_time_mfx_0:.2f} ms/frame')"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "code",
393 |    "execution_count": 16,
394 |    "metadata": {},
395 |    "outputs": [
396 |     {
397 |      "name": "stdout",
398 |      "output_type": "stream",
399 |      "text": [
400 |       "CPU Quick Sync (no pre alloc): 900 frames, 28.68 ms/frame\n"
401 |      ]
402 |     }
403 |    ],
404 |    "source": [
405 |     "#export\n",
406 |     "cpu_cap_mfx = CpuCap(vid_path_h264,-1,check_res,cv.CAP_INTEL_MFX)\n",
407 |     "cpu_time_mfx_1,n_frames = ProcVid0(cpu_cap_mfx)\n",
408 |     "print(f'CPU Quick Sync (no pre alloc): {n_frames} frames, {cpu_time_mfx_1:.2f} ms/frame')"
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "markdown",
413 |    "metadata": {},
414 |    "source": [
415 |     "<a id=\"gpu\"></a>"
416 |    ]
417 |   },
418 |   {
419 |    "cell_type": "markdown",
420 |    "metadata": {},
421 |    "source": [
422 |     "## GPU"
423 |    ]
424 |   },
425 |   {
426 |    "cell_type": "markdown",
427 |    "metadata": {},
428 |    "source": [
429 |     "Not all GPU's have a hardware decoder, e.g. anything with GM108, see\n",
430 |     "[(1)](https://devtalk.nvidia.com/default/topic/1024934/video-codec-and-optical-flow-sdk/cuvidcreatedecoder-returns-error-cuda_error_no_device/) and \n",
431 |     "[(2)](https://developer.nvidia.com/video-encode-decode-gpu-support-matrix) for discussions."
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "markdown",
436 |    "metadata": {},
437 |    "source": [
438 |     "### cv.VideoCapture - h264_cuvid"
439 |    ]
440 |   },
441 |   {
442 |    "cell_type": "code",
443 |    "execution_count": 78,
444 |    "metadata": {},
445 |    "outputs": [],
446 |    "source": [
447 |     "os.environ[\"OPENCV_FFMPEG_CAPTURE_OPTIONS\"] = \"video_codec;h264_cuvid|video_codec;hevc_cuvid\""
448 |    ]
449 |   },
450 |   {
451 |    "cell_type": "code",
452 |    "execution_count": 79,
453 |    "metadata": {},
454 |    "outputs": [
455 |     {
456 |      "name": "stdout",
457 |      "output_type": "stream",
458 |      "text": [
459 |       "CPU 0 with h264_cuvid (no pre alloc): 900 frames, 17.78 ms/frame\n"
460 |      ]
461 |     }
462 |    ],
463 |    "source": [
464 |     "#export\n",
465 |     "cpu_cap_npa = CpuCapNpa(vid_path,-1,check_res)\n",
466 |     "cpu_time_0,n_frames = ProcVid0(cpu_cap_npa)\n",
467 |     "print(f'CPU 0 with h264_cuvid (no pre alloc): {n_frames} frames, {cpu_time_0:.2f} ms/frame')"
468 |    ]
469 |   },
470 |   {
471 |    "cell_type": "code",
472 |    "execution_count": 80,
473 |    "metadata": {},
474 |    "outputs": [
475 |     {
476 |      "name": "stdout",
477 |      "output_type": "stream",
478 |      "text": [
479 |       "CPU 1 with h264_cuvid: 900 frames, 12.89 ms/frame\n"
480 |      ]
481 |     }
482 |    ],
483 |    "source": [
484 |     "#export\n",
485 |     "cpu_cap = CpuCap(vid_path,-1,check_res)\n",
486 |     "cpu_time_1,n_frames = ProcVid0(cpu_cap)\n",
487 |     "print(f'CPU 1 with h264_cuvid: {n_frames} frames, {cpu_time_1:.2f} ms/frame')"
488 |    ]
489 |   },
490 |   {
491 |    "cell_type": "markdown",
492 |    "metadata": {},
493 |    "source": [
494 |     "### cv.cudacodec.VideoReader"
495 |    ]
496 |   },
497 |   {
498 |    "cell_type": "code",
499 |    "execution_count": 70,
500 |    "metadata": {},
501 |    "outputs": [],
502 |    "source": [
503 |     "# Check the check_res frames.\n",
504 |     "check_res = False\n",
505 |     "file_to_write=\"F:\\\\Dev\\\\Media\\\\jelly_out.mp4\""
506 |    ]
507 |   },
508 |   {
509 |    "cell_type": "code",
510 |    "execution_count": 67,
511 |    "metadata": {},
512 |    "outputs": [
513 |     {
514 |      "name": "stdout",
515 |      "output_type": "stream",
516 |      "text": [
517 |       "GPU 0 (no pre alloc): 900 frames, 6.03 ms/frame\n"
518 |      ]
519 |     }
520 |    ],
521 |    "source": [
522 |     "#export\n",
523 |     "gpu_cap_npa = CudaCapNpa(vid_path,-1,check_res)\n",
524 |     "gpu_time_0,n_frames = ProcVid0(gpu_cap_npa)\n",
525 |     "print(f'GPU 0 (no pre alloc): {n_frames} frames, {gpu_time_0:.2f} ms/frame')"
526 |    ]
527 |   },
528 |   {
529 |    "cell_type": "code",
530 |    "execution_count": 68,
531 |    "metadata": {},
532 |    "outputs": [
533 |     {
534 |      "name": "stdout",
535 |      "output_type": "stream",
536 |      "text": [
537 |       "GPU 0 (no pre alloc): 900 frames, 5.78 ms/frame\n"
538 |      ]
539 |     }
540 |    ],
541 |    "source": [
542 |     "gpu_cap_npa = CudaCapNpa(vid_path,-1,check_res)\n",
543 |     "gpu_time_0,n_frames = ProcVid0(gpu_cap_npa)\n",
544 |     "print(f'GPU 0 (no pre alloc): {n_frames} frames, {gpu_time_0:.2f} ms/frame')"
545 |    ]
546 |   },
547 |   {
548 |    "cell_type": "code",
549 |    "execution_count": 69,
550 |    "metadata": {},
551 |    "outputs": [],
552 |    "source": [
553 |     "if(check_res):\n",
554 |     "    n_frames = min(len(gpu_cap.res),len(gpu_cap_npa.res))\n",
555 |     "    CheckFrames(gpu_cap.res[:n_frames],gpu_cap_npa.res[:n_frames])"
556 |    ]
557 |   },
558 |   {
559 |    "cell_type": "markdown",
560 |    "metadata": {},
561 |    "source": [
562 |     "<a id='opencv_decoding_performance_comparisson'></a>"
563 |    ]
564 |   },
565 |   {
566 |    "cell_type": "markdown",
567 |    "metadata": {},
568 |    "source": [
569 |     "## Results"
570 |    ]
571 |   },
572 |   {
573 |    "cell_type": "code",
574 |    "execution_count": 83,
575 |    "metadata": {},
576 |    "outputs": [
577 |     {
578 |      "data": {
579 |       "text/html": [
580 |        "<style  type=\"text/css\" >\n",
581 |        "</style><table id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9\" ><thead>    <tr>        <th class=\"col_heading level0 col0\" >CPU/GPU</th>        <th class=\"col_heading level0 col1\" >Frame Proc Time (ms)</th>    </tr></thead><tbody>\n",
582 |        "                <tr>\n",
583 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row0_col0\" class=\"data row0 col0\" >RTX 2080 (pre-alloc)</td>\n",
584 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row0_col1\" class=\"data row0 col1\" >5.780000</td>\n",
585 |        "            </tr>\n",
586 |        "            <tr>\n",
587 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row1_col0\" class=\"data row1 col0\" >RTX 2080</td>\n",
588 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row1_col1\" class=\"data row1 col1\" >6.030000</td>\n",
589 |        "            </tr>\n",
590 |        "            <tr>\n",
591 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row2_col0\" class=\"data row2 col0\" >GTX 1060 (pre-alloc)</td>\n",
592 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row2_col1\" class=\"data row2 col1\" >7.850000</td>\n",
593 |        "            </tr>\n",
594 |        "            <tr>\n",
595 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row3_col0\" class=\"data row3 col0\" >HD Graphics 530 (pre-alloc)</td>\n",
596 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row3_col1\" class=\"data row3 col1\" >9.270000</td>\n",
597 |        "            </tr>\n",
598 |        "            <tr>\n",
599 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row4_col0\" class=\"data row4 col0\" >GTX 980M (pre-alloc)</td>\n",
600 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row4_col1\" class=\"data row4 col1\" >11.740000</td>\n",
601 |        "            </tr>\n",
602 |        "            <tr>\n",
603 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row5_col0\" class=\"data row5 col0\" >i7-8700 (pre-alloc)</td>\n",
604 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row5_col1\" class=\"data row5 col1\" >12.090000</td>\n",
605 |        "            </tr>\n",
606 |        "            <tr>\n",
607 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row6_col0\" class=\"data row6 col0\" >RTX 2080 h264_cuvid (pre-alloc)</td>\n",
608 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row6_col1\" class=\"data row6 col1\" >12.890000</td>\n",
609 |        "            </tr>\n",
610 |        "            <tr>\n",
611 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row7_col0\" class=\"data row7 col0\" >HD Graphics 4400 (pre-alloc)</td>\n",
612 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row7_col1\" class=\"data row7 col1\" >13.970000</td>\n",
613 |        "            </tr>\n",
614 |        "            <tr>\n",
615 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row8_col0\" class=\"data row8 col0\" >GTX 980M</td>\n",
616 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row8_col1\" class=\"data row8 col1\" >14.340000</td>\n",
617 |        "            </tr>\n",
618 |        "            <tr>\n",
619 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row9_col0\" class=\"data row9 col0\" >HD Graphics 530</td>\n",
620 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row9_col1\" class=\"data row9 col1\" >14.730000</td>\n",
621 |        "            </tr>\n",
622 |        "            <tr>\n",
623 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row10_col0\" class=\"data row10 col0\" >RTX 2080 h264_cuvid</td>\n",
624 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row10_col1\" class=\"data row10 col1\" >17.780000</td>\n",
625 |        "            </tr>\n",
626 |        "            <tr>\n",
627 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row11_col0\" class=\"data row11 col0\" >HD Graphics 5500 HDD (pre-alloc)</td>\n",
628 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row11_col1\" class=\"data row11 col1\" >18.030000</td>\n",
629 |        "            </tr>\n",
630 |        "            <tr>\n",
631 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row12_col0\" class=\"data row12 col0\" >i7-8700</td>\n",
632 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row12_col1\" class=\"data row12 col1\" >19.070000</td>\n",
633 |        "            </tr>\n",
634 |        "            <tr>\n",
635 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row13_col0\" class=\"data row13 col0\" >i5-6500 (pre-alloc)</td>\n",
636 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row13_col1\" class=\"data row13 col1\" >22.010000</td>\n",
637 |        "            </tr>\n",
638 |        "            <tr>\n",
639 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row14_col0\" class=\"data row14 col0\" >HD Graphics 5500 HDD</td>\n",
640 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row14_col1\" class=\"data row14 col1\" >23.740000</td>\n",
641 |        "            </tr>\n",
642 |        "            <tr>\n",
643 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row15_col0\" class=\"data row15 col0\" >HD Graphics 4400</td>\n",
644 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row15_col1\" class=\"data row15 col1\" >23.880000</td>\n",
645 |        "            </tr>\n",
646 |        "            <tr>\n",
647 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row16_col0\" class=\"data row16 col0\" >i7-6700HQ (pre-alloc)</td>\n",
648 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row16_col1\" class=\"data row16 col1\" >23.990000</td>\n",
649 |        "            </tr>\n",
650 |        "            <tr>\n",
651 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row17_col0\" class=\"data row17 col0\" >i7-6700HQ</td>\n",
652 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row17_col1\" class=\"data row17 col1\" >39.390000</td>\n",
653 |        "            </tr>\n",
654 |        "            <tr>\n",
655 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row18_col0\" class=\"data row18 col0\" >GT 730M (pre-alloc)</td>\n",
656 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row18_col1\" class=\"data row18 col1\" >40.640000</td>\n",
657 |        "            </tr>\n",
658 |        "            <tr>\n",
659 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row19_col0\" class=\"data row19 col0\" >GT 730M</td>\n",
660 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row19_col1\" class=\"data row19 col1\" >40.800000</td>\n",
661 |        "            </tr>\n",
662 |        "            <tr>\n",
663 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row20_col0\" class=\"data row20 col0\" >i5-4210U (pre-alloc)</td>\n",
664 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row20_col1\" class=\"data row20 col1\" >47.720000</td>\n",
665 |        "            </tr>\n",
666 |        "            <tr>\n",
667 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row21_col0\" class=\"data row21 col0\" >i5-4210U</td>\n",
668 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row21_col1\" class=\"data row21 col1\" >50.650000</td>\n",
669 |        "            </tr>\n",
670 |        "            <tr>\n",
671 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row22_col0\" class=\"data row22 col0\" >i5-5200U HDD (pre-alloc)</td>\n",
672 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row22_col1\" class=\"data row22 col1\" >51.060000</td>\n",
673 |        "            </tr>\n",
674 |        "            <tr>\n",
675 |        "                                <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row23_col0\" class=\"data row23 col0\" >i5-5200U HDD</td>\n",
676 |        "                        <td id=\"T_3f8c6446_2285_11eb_9633_84fdd1e21bc9row23_col1\" class=\"data row23 col1\" >58.640000</td>\n",
677 |        "            </tr>\n",
678 |        "    </tbody></table>"
679 |       ],
680 |       "text/plain": [
681 |        "<pandas.io.formats.style.Styler at 0x18ea6742088>"
682 |       ]
683 |      },
684 |      "execution_count": 83,
685 |      "metadata": {},
686 |      "output_type": "execute_result"
687 |     }
688 |    ],
689 |    "source": [
690 |     "results = [['i7-6700HQ', 39.39],['i7-6700HQ (pre-alloc)', 23.99],['i5-6500 (pre-alloc)',22.01],['i5-5200U HDD',58.64],\n",
691 |     "           ['i5-5200U HDD (pre-alloc)',51.06],['HD Graphics 530',14.73],['HD Graphics 530 (pre-alloc)',9.27],\n",
692 |     "           ['HD Graphics 5500 HDD',23.74],['HD Graphics 5500 HDD (pre-alloc)',18.03],['GTX 980M',14.34],\n",
693 |     "          ['GTX 980M (pre-alloc)',11.74],['GTX 1060 (pre-alloc)',7.85],['i5-4210U',50.65],['i5-4210U (pre-alloc)',47.72],\n",
694 |     "           ['GT 730M',40.80],['GT 730M (pre-alloc)',40.64],['HD Graphics 4400',23.88],['HD Graphics 4400 (pre-alloc)',13.97],\n",
695 |     "          ['i7-8700',19.07],['i7-8700 (pre-alloc)',12.09],['RTX 2080 h264_cuvid',17.78],\n",
696 |     "           ['RTX 2080 h264_cuvid (pre-alloc)',12.89],['RTX 2080',6.03],['RTX 2080 (pre-alloc)',5.78]]\n",
697 |     "df = pd.DataFrame(results,columns=['CPU/GPU','Frame Proc Time (ms)']).sort_values('Frame Proc Time (ms)').round(2)\n",
698 |     "df.style.hide_index()"
699 |    ]
700 |   },
701 |   {
702 |    "cell_type": "markdown",
703 |    "metadata": {},
704 |    "source": [
705 |     "# Export"
706 |    ]
707 |   },
708 |   {
709 |    "cell_type": "code",
710 |    "execution_count": 84,
711 |    "metadata": {},
712 |    "outputs": [
713 |     {
714 |      "name": "stdout",
715 |      "output_type": "stream",
716 |      "text": [
717 |       "Converted opencv410x-video-read.ipynb to exp\\nb_opencv410x-video-read.py\n"
718 |      ]
719 |     }
720 |    ],
721 |    "source": [
722 |     "# taken from https://github.com/fastai/fastai_docs/blob/master/dev_nb/notebook2script.py\n",
723 |     "!python notebook2script.py opencv410x-video-read.ipynb"
724 |    ]
725 |   },
726 |   {
727 |    "cell_type": "code",
728 |    "execution_count": null,
729 |    "metadata": {},
730 |    "outputs": [],
731 |    "source": [
732 |     "! python exp/nb_opencv410x-video-read.py"
733 |    ]
734 |   },
735 |   {
736 |    "cell_type": "code",
737 |    "execution_count": null,
738 |    "metadata": {},
739 |    "outputs": [],
740 |    "source": []
741 |   }
742 |  ],
743 |  "metadata": {
744 |   "kernelspec": {
745 |    "display_name": "Python 3",
746 |    "language": "python",
747 |    "name": "python3"
748 |   },
749 |   "language_info": {
750 |    "codemirror_mode": {
751 |     "name": "ipython",
752 |     "version": 3
753 |    },
754 |    "file_extension": ".py",
755 |    "mimetype": "text/x-python",
756 |    "name": "python",
757 |    "nbconvert_exporter": "python",
758 |    "pygments_lexer": "ipython3",
759 |    "version": "3.7.7"
760 |   },
761 |   "toc": {
762 |    "base_numbering": 1,
763 |    "nav_menu": {},
764 |    "number_sections": true,
765 |    "sideBar": true,
766 |    "skip_h1_title": false,
767 |    "title_cell": "Table of Contents",
768 |    "title_sidebar": "Contents",
769 |    "toc_cell": false,
770 |    "toc_position": {},
771 |    "toc_section_display": true,
772 |    "toc_window_display": false
773 |   }
774 |  },
775 |  "nbformat": 4,
776 |  "nbformat_minor": 2
777 | }
778 | 


--------------------------------------------------------------------------------
/nbs/opencv450-video-read-CUDA_10_0_VideoCodecSDK_11_0_10.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Comparisson of [Nvidia Video Codec SDK ](https://developer.nvidia.com/nvidia-video-codec-sdk) with CPU and iGPU ([Quick Sync](https://en.wikipedia.org/wiki/Intel_Quick_Sync_Video)) decoding using OpenCV 4.5.0 with python - CUDA 10.0, Cuda Video Codec SDK 11.0.10 and Ffmpeg 4.3.1 master  99888-g5c7823ff1c-win64-lgpl - GPU Driver 457.30 "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "To run the h264_cuvid and hevc_cuvid test in the notebook you will need to compile OpenCV against Ffmpeg libs built with --enable-cuda --enable-cuvid.\n",
 15 |     "\n",
 16 |     "Notes: \n",
 17 |     "1. Whilst using VideoCapture with h264_cuvid decoding is only as fast as CPU decoding, it does offload the decoding leaving more CPU resources available.  Additionally this option currently supports far more codecs than cv.cudacodec.VideoReader.\n",
 18 |     "2. GPU codec support depends on the GPU generation, see NVDEC_VideoDecoder_API_ProgGuide.pdf in the [NVidia Video Codec SDK documentation](https://developer.nvidia.com/nvidia-video-codec-sdk) for details.\n",
 19 |     "3. Unfortunately the current QuickSync implementation does not support container formats or RTSP streaming."
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "## Init"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 1,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "#export\n",
 36 |     "import os\n",
 37 |     "import time\n",
 38 |     "import numpy as np\n",
 39 |     "from functools import partial\n",
 40 |     "import matplotlib.pyplot as plt\n",
 41 |     "import cv2 as cv\n",
 42 |     "import pandas as pd\n",
 43 |     "import psutil"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 2,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "#export\n",
 53 |     "# globals\n",
 54 |     "#vid_path = os.environ['OPENCV_TEST_DATA_PATH'] + '/cv/video/768x576.avi'\n",
 55 |     "#vid_path = os.environ['OPENCV_TEST_DATA_PATH'] + '/cv/video/1920x1080.avi'\n",
 56 |     "#vid_path = 'rtsp://127.0.0.1/mediafile.264'\n",
 57 |     "#vid_path=\"rtsp://127.0.0.1/jellyfish-120-mbps-4k-uhd-h264.264\";\n",
 58 |     "#vid_path=\"rtsp://127.0.0.1/big_buck_bunny.264\";\n",
 59 |     "# test files from http://jell.yfish.us/\n",
 60 |     "vid_path_264 = os.environ['USERPROFILE'] + '/Videos/jellyfish-120-mbps-4k-uhd-h264.mkv'\n",
 61 |     "vid_path_265 = os.environ['USERPROFILE'] + '/Videos/jellyfish-120-mbps-4k-uhd-hevc-10bit.mkv'\n",
 62 |     "\n",
 63 |     "check_res = False"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 3,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "def CheckFrames(f1,f2,epsilon = -1,rows = -1,cols = -1,channels = -1):\n",
 73 |     "    assert len(f1) > 0 and len(f1) == len(f2), f'f1 length {len(f1)}, f2 length {len(f2)}'    \n",
 74 |     "    epsilon = 0 if epsilon == -1 else epsilon\n",
 75 |     "    rows = f1[0].shape[0] if rows == -1 else rows\n",
 76 |     "    cols = f1[0].shape[1] if cols == -1 else cols\n",
 77 |     "    channels = f1[0].shape[2] if channels == -1 and len(f1[0].shape) == 3  else channels    \n",
 78 |     "    for i in range(0,len(f1)):\n",
 79 |     "        assert np.sum(f1[i][:rows,:cols,:channels] != f2[i][:rows,:cols,:channels]) <= epsilon, f'frame {i} different'"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 4,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "%matplotlib inline"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 5,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "#export\n",
 98 |     "def ProcVid0(cap, measure_cpu = True):\n",
 99 |     "    n_frames, start, end, max_cpu, av_cpu, n_cpu_snapshots  = 0,0,0,0,0,0\n",
100 |     "    if(measure_cpu):\n",
101 |     "        p = psutil.Process()\n",
102 |     "        cpu_count = psutil.cpu_count()\n",
103 |     "    if (cap.IsOpen()== False): # replace with catch\n",
104 |     "        print(\"Error opening video stream or file\")\n",
105 |     "        return\n",
106 |     "    frames_available = True\n",
107 |     "    start = time.time()    \n",
108 |     "    while(cap.IsOpen()):\n",
109 |     "        ret,_ = cap.GetFrame()\n",
110 |     "        if (measure_cpu):\n",
111 |     "            cpu_all_pc = p.cpu_percent()\n",
112 |     "            if(cpu_all_pc > 0):\n",
113 |     "                n_cpu_snapshots +=1\n",
114 |     "                cpu_pc = cpu_all_pc/cpu_count\n",
115 |     "                max_cpu = max(cpu_pc,max_cpu)\n",
116 |     "                av_cpu += cpu_pc   \n",
117 |     "        if(ret):\n",
118 |     "            n_frames += 1 \n",
119 |     "    end = time.time()\n",
120 |     "    if(measure_cpu): \n",
121 |     "        print(f'CPU utilization - max: {max_cpu:.2f}%, average {av_cpu/(n_cpu_snapshots):.2f}%')\n",
122 |     "    return (end - start)*1000/n_frames, n_frames;"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 6,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "#export\n",
132 |     "# host mem not implemented, manually pin memory\n",
133 |     "class PinnedMem(object):\n",
134 |     "    def __init__(self, size, dtype=np.uint8):\n",
135 |     "        self.array = np.empty(size,dtype)\n",
136 |     "        cv.cuda.registerPageLocked(self.array)\n",
137 |     "        self.pinned = True\n",
138 |     "    def __del__(self):\n",
139 |     "        cv.cuda.unregisterPageLocked(self.array)\n",
140 |     "        self.pinned = False\n",
141 |     "    def __repr__(self):\n",
142 |     "        return f'pinned = {self.pinned}'"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 7,
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "#export\n",
152 |     "class VidCap:\n",
153 |     "    def __init__(self,vid_path,max_frames = -1,store_res = False,file_to_write=None):\n",
154 |     "        self.vid_path = vid_path\n",
155 |     "        self.store_res = store_res\n",
156 |     "        self.res = []\n",
157 |     "        self.frame_num = 0\n",
158 |     "        self.open = False\n",
159 |     "        cap = cv.VideoCapture(vid_path)\n",
160 |     "        assert cap.isOpened(), f\"{vid_path}: cannot be opened!\"\n",
161 |     "        self.num_frames = cap.get(cv.CAP_PROP_FRAME_COUNT)\n",
162 |     "        self.max_frames = self.num_frames if max_frames == -1 else max_frames\n",
163 |     "        ret, frame = cap.read()\n",
164 |     "        cap.release()\n",
165 |     "        self.rows,self.cols,self.channels = frame.shape\n",
166 |     "        self.write_video = False\n",
167 |     "        if(file_to_write):\n",
168 |     "            self.write_video = True\n",
169 |     "            #fourcc = cv.VideoWriter_fourcc(*'H264')\n",
170 |     "            fourcc = cv.VideoWriter_fourcc('M', '4', 'S', '2')\n",
171 |     "            self.out = cv.VideoWriter(file_to_write,cv.CAP_FFMPEG,fourcc,25,(self.cols,self.rows))\n",
172 |     "    \n",
173 |     "    def UpdateState(self,ret): \n",
174 |     "        if (not ret or self.frame_num+1 == self.max_frames): \n",
175 |     "            self.open = False            \n",
176 |     "        if(ret or self.frame_num+1 == self.max_frames):\n",
177 |     "            self.frame_num += 1\n",
178 |     "        \n",
179 |     "    def IsOpen(self): return self.open\n",
180 |     "    \n",
181 |     "    def __del__(self):\n",
182 |     "        if(self.write_video):\n",
183 |     "            self.out.release()\n",
184 |     "        \n",
185 |     "    #def WriteFrame(self,frame):\n",
186 |     "    #    if(self.file_to_write):\n",
187 |     "    #        self.out.write(frame)\n",
188 |     "        \n",
189 |     "class CudaCap(VidCap):\n",
190 |     "    def __init__(self,vid_path,max_frames=-1, store_res=False, file_to_write=None):\n",
191 |     "        VidCap.__init__(self, vid_path, max_frames, store_res, file_to_write)\n",
192 |     "        # cudacodec always returns 4 channels - check grey video\n",
193 |     "        self.channels = 4\n",
194 |     "        # cudacodec seems to need rows/16\n",
195 |     "        self.rows = (np.ceil(self.rows/16)*16).astype(int)\n",
196 |     "        self.cap = cv.cudacodec.createVideoReader(self.vid_path)\n",
197 |     "        self.open = True\n",
198 |     "        self.frame_device = cv.cuda_GpuMat(self.rows,self.cols,cv.CV_8UC4)\n",
199 |     "        self.frame_host = PinnedMem((self.rows,self.cols,self.channels))\n",
200 |     "        self.stream = cv.cuda_Stream()\n",
201 |     "            \n",
202 |     "    def GetFrame(self):\n",
203 |     "        if(self.store_res or self.write_video):\n",
204 |     "            ret,_ = self.GetHostFrame()\n",
205 |     "            if(self.write_video):\n",
206 |     "                self.out.write(self.frame_host.array[:,:,:3])\n",
207 |     "            if(self.store_res and ret):\n",
208 |     "                self.res.append(np.copy(self.frame_host.array))\n",
209 |     "            return ret,self.frame_device\n",
210 |     "        else:\n",
211 |     "            return self.GetDeviceFrame()\n",
212 |     "            \n",
213 |     "    def GetDeviceFrame(self):\n",
214 |     "        ret,_ = self.cap.nextFrame(self.frame_device,self.stream)\n",
215 |     "        self.UpdateState(ret)\n",
216 |     "        return ret,self.frame_device\n",
217 |     "        \n",
218 |     "    def GetHostFrame(self):\n",
219 |     "        ret,_ = self.GetDeviceFrame()\n",
220 |     "        if(ret):\n",
221 |     "            self.frame_device.download(self.frame_host.array)\n",
222 |     "        return ret,self.frame_host.array\n",
223 |     "        \n",
224 |     "    \n",
225 |     "class CudaCapNpa(CudaCap):\n",
226 |     "    def __init__(self,vid_path,max_frames=-1,store_res=False, file_to_write=None):\n",
227 |     "        CudaCap.__init__(self, vid_path, max_frames, store_res, file_to_write)\n",
228 |     "            \n",
229 |     "    def GetDeviceFrame(self):        \n",
230 |     "        ret,self.frame_device = self.cap.nextFrame()\n",
231 |     "        self.UpdateState(ret)\n",
232 |     "        return ret,self.frame_device\n",
233 |     "    \n",
234 |     "class CpuCap(VidCap):\n",
235 |     "    def __init__(self,vid_path,max_frames=-1,store_res=False,backend=cv.CAP_ANY):\n",
236 |     "        VidCap.__init__(self, vid_path, max_frames, store_res)      \n",
237 |     "\n",
238 |     "        self.cap = cv.VideoCapture(self.vid_path,backend)\n",
239 |     "        assert self.cap.isOpened(), f\"{vid_path}: cannot be opened for backend: {backend}!\"\n",
240 |     "        #if self.cap.isOpened():\n",
241 |     "        self.open = True\n",
242 |     "        self.frame = np.empty((self.rows,self.cols,self.channels),np.uint8)\n",
243 |     "            \n",
244 |     "    def GetFrame(self):\n",
245 |     "        ret,_ = self.cap.read(self.frame)\n",
246 |     "        self.UpdateState(ret)\n",
247 |     "        if (ret):\n",
248 |     "            if(self.store_res):\n",
249 |     "                self.res.append(np.copy(self.frame))\n",
250 |     "        return ret,self.frame\n",
251 |     "                \n",
252 |     "    def __del__(self):\n",
253 |     "        self.cap.release()\n",
254 |     "        \n",
255 |     "class CpuCapNpa(CpuCap):\n",
256 |     "    def __init__(self,vid_path,max_frames=-1,store_res=False,backend=cv.CAP_ANY):\n",
257 |     "        CpuCap.__init__(self, vid_path, max_frames, store_res,backend)      \n",
258 |     "            \n",
259 |     "    def GetFrame(self):\n",
260 |     "        ret,self.frame = self.cap.read()\n",
261 |     "        self.UpdateState(ret)\n",
262 |     "        if (ret):\n",
263 |     "            if(self.store_res):\n",
264 |     "                self.res.append(np.copy(self.frame))\n",
265 |     "        return ret,self.frame\n",
266 |     "    "
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "markdown",
271 |    "metadata": {},
272 |    "source": [
273 |     "<a id=\"cpu\"></a>"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "markdown",
278 |    "metadata": {},
279 |    "source": [
280 |     "## CPU"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "markdown",
285 |    "metadata": {},
286 |    "source": [
287 |     "### h264"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": 12,
293 |    "metadata": {},
294 |    "outputs": [],
295 |    "source": [
296 |     "os.environ[\"OPENCV_FFMPEG_CAPTURE_OPTIONS\"] = \"\""
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": 13,
302 |    "metadata": {},
303 |    "outputs": [
304 |     {
305 |      "name": "stdout",
306 |      "output_type": "stream",
307 |      "text": [
308 |       "CPU utilization - max: 60.77%, average 39.18%\n",
309 |       "CPU 0 (no pre alloc): 900 frames, 15.41 ms/frame\n"
310 |      ]
311 |     }
312 |    ],
313 |    "source": [
314 |     "#export\n",
315 |     "cpu_cap_npa = CpuCapNpa(vid_path_264,-1,check_res)\n",
316 |     "cpu_time_0,n_frames = ProcVid0(cpu_cap_npa)\n",
317 |     "print(f'CPU 0 (no pre alloc): {n_frames} frames, {cpu_time_0:.2f} ms/frame')"
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "code",
322 |    "execution_count": 14,
323 |    "metadata": {},
324 |    "outputs": [
325 |     {
326 |      "name": "stdout",
327 |      "output_type": "stream",
328 |      "text": [
329 |       "CPU utilization - max: 104.17%, average 76.64%\n",
330 |       "CPU 1: 900 frames, 8.63 ms/frame\n"
331 |      ]
332 |     }
333 |    ],
334 |    "source": [
335 |     "#export\n",
336 |     "cpu_cap = CpuCap(vid_path_264,-1,check_res)\n",
337 |     "cpu_time_1,n_frames = ProcVid0(cpu_cap)\n",
338 |     "print(f'CPU 1: {n_frames} frames, {cpu_time_1:.2f} ms/frame')"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": 15,
344 |    "metadata": {},
345 |    "outputs": [],
346 |    "source": [
347 |     "if(check_res):\n",
348 |     "    CheckFrames(cpu_cap.res,cpu_cap_npa.res)"
349 |    ]
350 |   },
351 |   {
352 |    "cell_type": "markdown",
353 |    "metadata": {},
354 |    "source": [
355 |     "### h265"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "code",
360 |    "execution_count": 16,
361 |    "metadata": {},
362 |    "outputs": [],
363 |    "source": [
364 |     "os.environ[\"OPENCV_FFMPEG_CAPTURE_OPTIONS\"] = \"\""
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "code",
369 |    "execution_count": 17,
370 |    "metadata": {},
371 |    "outputs": [
372 |     {
373 |      "name": "stdout",
374 |      "output_type": "stream",
375 |      "text": [
376 |       "CPU utilization - max: 65.11%, average 36.68%\n",
377 |       "CPU 0 (no pre alloc): 900 frames, 34.34 ms/frame\n"
378 |      ]
379 |     }
380 |    ],
381 |    "source": [
382 |     "#export\n",
383 |     "cpu_cap_npa = CpuCapNpa(vid_path_265,-1,check_res)\n",
384 |     "cpu_time_0,n_frames = ProcVid0(cpu_cap_npa)\n",
385 |     "print(f'CPU 0 (no pre alloc): {n_frames} frames, {cpu_time_0:.2f} ms/frame')"
386 |    ]
387 |   },
388 |   {
389 |    "cell_type": "code",
390 |    "execution_count": 18,
391 |    "metadata": {},
392 |    "outputs": [
393 |     {
394 |      "name": "stdout",
395 |      "output_type": "stream",
396 |      "text": [
397 |       "CPU utilization - max: 75.61%, average 45.85%\n",
398 |       "CPU 1: 900 frames, 27.27 ms/frame\n"
399 |      ]
400 |     }
401 |    ],
402 |    "source": [
403 |     "#export\n",
404 |     "cpu_cap = CpuCap(vid_path_265,-1,check_res)\n",
405 |     "cpu_time_1,n_frames = ProcVid0(cpu_cap)\n",
406 |     "print(f'CPU 1: {n_frames} frames, {cpu_time_1:.2f} ms/frame')"
407 |    ]
408 |   },
409 |   {
410 |    "cell_type": "code",
411 |    "execution_count": 19,
412 |    "metadata": {},
413 |    "outputs": [],
414 |    "source": [
415 |     "if(check_res):\n",
416 |     "    CheckFrames(cpu_cap.res,cpu_cap_npa.res)"
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "markdown",
421 |    "metadata": {},
422 |    "source": [
423 |     "<a id=\"cpu_quicksync\"></a>"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "markdown",
428 |    "metadata": {},
429 |    "source": [
430 |     "## CPU - Quicksync"
431 |    ]
432 |   },
433 |   {
434 |    "cell_type": "markdown",
435 |    "metadata": {},
436 |    "source": [
437 |     "If this implementation is slower than the default, software decoding is probably taking place.\n",
438 |     "\n",
439 |     "To confirm hardware decoding in windows 10 check Video Decode window in the GPU pane of the task manager for activity as below. \n",
440 |     "\n",
441 |     "To fix check drivers, OpenCv version etc. "
442 |    ]
443 |   },
444 |   {
445 |    "cell_type": "markdown",
446 |    "metadata": {},
447 |    "source": [
448 |     "![title](imgs/quicksync.PNG)"
449 |    ]
450 |   },
451 |   {
452 |    "cell_type": "code",
453 |    "execution_count": 15,
454 |    "metadata": {},
455 |    "outputs": [
456 |     {
457 |      "name": "stdout",
458 |      "output_type": "stream",
459 |      "text": [
460 |       "CPU Quick Sync (no pre alloc): 900 frames, 29.84 ms/frame\n"
461 |      ]
462 |     }
463 |    ],
464 |    "source": [
465 |     "#export\n",
466 |     "cpu_cap_mfx_npa = CpuCapNpa(vid_path_264,-1,check_res,cv.CAP_INTEL_MFX)\n",
467 |     "cpu_time_mfx_0,n_frames = ProcVid0(cpu_cap_mfx_npa)\n",
468 |     "print(f'CPU Quick Sync (no pre alloc): {n_frames} frames, {cpu_time_mfx_0:.2f} ms/frame')"
469 |    ]
470 |   },
471 |   {
472 |    "cell_type": "code",
473 |    "execution_count": 16,
474 |    "metadata": {},
475 |    "outputs": [
476 |     {
477 |      "name": "stdout",
478 |      "output_type": "stream",
479 |      "text": [
480 |       "CPU Quick Sync (no pre alloc): 900 frames, 28.68 ms/frame\n"
481 |      ]
482 |     }
483 |    ],
484 |    "source": [
485 |     "#export\n",
486 |     "cpu_cap_mfx = CpuCap(vid_path_264,-1,check_res,cv.CAP_INTEL_MFX)\n",
487 |     "cpu_time_mfx_1,n_frames = ProcVid0(cpu_cap_mfx)\n",
488 |     "print(f'CPU Quick Sync (no pre alloc): {n_frames} frames, {cpu_time_mfx_1:.2f} ms/frame')"
489 |    ]
490 |   },
491 |   {
492 |    "cell_type": "markdown",
493 |    "metadata": {},
494 |    "source": [
495 |     "<a id=\"gpu\"></a>"
496 |    ]
497 |   },
498 |   {
499 |    "cell_type": "markdown",
500 |    "metadata": {},
501 |    "source": [
502 |     "## GPU"
503 |    ]
504 |   },
505 |   {
506 |    "cell_type": "markdown",
507 |    "metadata": {},
508 |    "source": [
509 |     "Not all GPU's have a hardware decoder, e.g. anything with GM108, see\n",
510 |     "[(1)](https://devtalk.nvidia.com/default/topic/1024934/video-codec-and-optical-flow-sdk/cuvidcreatedecoder-returns-error-cuda_error_no_device/) and \n",
511 |     "[(2)](https://developer.nvidia.com/video-encode-decode-gpu-support-matrix) for discussions."
512 |    ]
513 |   },
514 |   {
515 |    "cell_type": "markdown",
516 |    "metadata": {},
517 |    "source": [
518 |     "### cv.VideoCapture - h264_cuvid"
519 |    ]
520 |   },
521 |   {
522 |    "cell_type": "markdown",
523 |    "metadata": {},
524 |    "source": [
525 |     "#### h264"
526 |    ]
527 |   },
528 |   {
529 |    "cell_type": "code",
530 |    "execution_count": 20,
531 |    "metadata": {},
532 |    "outputs": [],
533 |    "source": [
534 |     "os.environ[\"OPENCV_FFMPEG_CAPTURE_OPTIONS\"] = \"video_codec;h264_cuvid\""
535 |    ]
536 |   },
537 |   {
538 |    "cell_type": "code",
539 |    "execution_count": 21,
540 |    "metadata": {},
541 |    "outputs": [
542 |     {
543 |      "name": "stdout",
544 |      "output_type": "stream",
545 |      "text": [
546 |       "CPU utilization - max: 8.49%, average 8.23%\n",
547 |       "CPU 0 with h264_cuvid (no pre alloc): 900 frames, 32.84 ms/frame\n"
548 |      ]
549 |     }
550 |    ],
551 |    "source": [
552 |     "#export\n",
553 |     "cpu_cap_npa = CpuCapNpa(vid_path_264,-1,check_res)\n",
554 |     "cpu_time_0,n_frames = ProcVid0(cpu_cap_npa)\n",
555 |     "print(f'CPU 0 with h264_cuvid (no pre alloc): {n_frames} frames, {cpu_time_0:.2f} ms/frame')"
556 |    ]
557 |   },
558 |   {
559 |    "cell_type": "code",
560 |    "execution_count": 22,
561 |    "metadata": {},
562 |    "outputs": [
563 |     {
564 |      "name": "stdout",
565 |      "output_type": "stream",
566 |      "text": [
567 |       "CPU utilization - max: 17.36%, average 8.30%\n",
568 |       "CPU 1 with h264_cuvid: 900 frames, 25.20 ms/frame\n"
569 |      ]
570 |     }
571 |    ],
572 |    "source": [
573 |     "#export\n",
574 |     "cpu_cap = CpuCap(vid_path_264,-1,check_res)\n",
575 |     "cpu_time_1,n_frames = ProcVid0(cpu_cap)\n",
576 |     "print(f'CPU 1 with h264_cuvid: {n_frames} frames, {cpu_time_1:.2f} ms/frame')"
577 |    ]
578 |   },
579 |   {
580 |    "cell_type": "markdown",
581 |    "metadata": {},
582 |    "source": [
583 |     "#### h265"
584 |    ]
585 |   },
586 |   {
587 |    "cell_type": "code",
588 |    "execution_count": 23,
589 |    "metadata": {},
590 |    "outputs": [],
591 |    "source": [
592 |     "os.environ[\"OPENCV_FFMPEG_CAPTURE_OPTIONS\"] = \"video_codec;hevc_cuvid\""
593 |    ]
594 |   },
595 |   {
596 |    "cell_type": "code",
597 |    "execution_count": 24,
598 |    "metadata": {},
599 |    "outputs": [
600 |     {
601 |      "name": "stdout",
602 |      "output_type": "stream",
603 |      "text": [
604 |       "CPU utilization - max: 12.60%, average 8.24%\n",
605 |       "CPU 0 with hevc_cuvid(no pre alloc): 900 frames, 38.32 ms/frame\n"
606 |      ]
607 |     }
608 |    ],
609 |    "source": [
610 |     "#export\n",
611 |     "cpu_cap_npa = CpuCapNpa(vid_path_265,-1,check_res)\n",
612 |     "cpu_time_0,n_frames = ProcVid0(cpu_cap_npa)\n",
613 |     "print(f'CPU 0 with hevc_cuvid(no pre alloc): {n_frames} frames, {cpu_time_0:.2f} ms/frame')"
614 |    ]
615 |   },
616 |   {
617 |    "cell_type": "code",
618 |    "execution_count": 25,
619 |    "metadata": {},
620 |    "outputs": [
621 |     {
622 |      "name": "stdout",
623 |      "output_type": "stream",
624 |      "text": [
625 |       "CPU utilization - max: 16.28%, average 8.24%\n",
626 |       "CPU 1 with hevc_cuvid: 900 frames, 30.20 ms/frame\n"
627 |      ]
628 |     }
629 |    ],
630 |    "source": [
631 |     "#export\n",
632 |     "cpu_cap = CpuCap(vid_path_265,-1,check_res)\n",
633 |     "cpu_time_1,n_frames = ProcVid0(cpu_cap)\n",
634 |     "print(f'CPU 1 with hevc_cuvid: {n_frames} frames, {cpu_time_1:.2f} ms/frame')"
635 |    ]
636 |   },
637 |   {
638 |    "cell_type": "markdown",
639 |    "metadata": {},
640 |    "source": [
641 |     "### cv.cudacodec.VideoReader"
642 |    ]
643 |   },
644 |   {
645 |    "cell_type": "code",
646 |    "execution_count": 26,
647 |    "metadata": {},
648 |    "outputs": [
649 |     {
650 |      "name": "stdout",
651 |      "output_type": "stream",
652 |      "text": [
653 |       "CPU utilization - max: 8.68%, average 8.38%\n",
654 |       "GPU 0 (no pre alloc): 900 frames, 5.89 ms/frame\n"
655 |      ]
656 |     }
657 |    ],
658 |    "source": [
659 |     "#export\n",
660 |     "gpu_cap_npa = CudaCapNpa(vid_path_264,-1,check_res)\n",
661 |     "gpu_time_0,n_frames = ProcVid0(gpu_cap_npa)\n",
662 |     "print(f'GPU 0 (no pre alloc): {n_frames} frames, {gpu_time_0:.2f} ms/frame')"
663 |    ]
664 |   },
665 |   {
666 |    "cell_type": "code",
667 |    "execution_count": 27,
668 |    "metadata": {},
669 |    "outputs": [
670 |     {
671 |      "name": "stdout",
672 |      "output_type": "stream",
673 |      "text": [
674 |       "CPU utilization - max: 8.68%, average 8.40%\n",
675 |       "GPU 0 (no pre alloc): 900 frames, 5.81 ms/frame\n"
676 |      ]
677 |     }
678 |    ],
679 |    "source": [
680 |     "gpu_cap_npa = CudaCapNpa(vid_path_264,-1,check_res)\n",
681 |     "gpu_time_0,n_frames = ProcVid0(gpu_cap_npa)\n",
682 |     "print(f'GPU 0 (no pre alloc): {n_frames} frames, {gpu_time_0:.2f} ms/frame')"
683 |    ]
684 |   },
685 |   {
686 |    "cell_type": "code",
687 |    "execution_count": 28,
688 |    "metadata": {},
689 |    "outputs": [],
690 |    "source": [
691 |     "if(check_res):\n",
692 |     "    n_frames = min(len(gpu_cap.res),len(gpu_cap_npa.res))\n",
693 |     "    CheckFrames(gpu_cap.res[:n_frames],gpu_cap_npa.res[:n_frames])"
694 |    ]
695 |   },
696 |   {
697 |    "cell_type": "markdown",
698 |    "metadata": {},
699 |    "source": [
700 |     "<a id='opencv_decoding_performance_comparisson'></a>"
701 |    ]
702 |   },
703 |   {
704 |    "cell_type": "markdown",
705 |    "metadata": {},
706 |    "source": [
707 |     "## Results"
708 |    ]
709 |   },
710 |   {
711 |    "cell_type": "markdown",
712 |    "metadata": {},
713 |    "source": [
714 |     "### h264"
715 |    ]
716 |   },
717 |   {
718 |    "cell_type": "code",
719 |    "execution_count": 29,
720 |    "metadata": {},
721 |    "outputs": [
722 |     {
723 |      "data": {
724 |       "text/html": [
725 |        "<style  type=\"text/css\" >\n",
726 |        "</style><table id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9\" ><thead>    <tr>        <th class=\"col_heading level0 col0\" >CPU/GPU</th>        <th class=\"col_heading level0 col1\" >Frame Proc Time (ms)</th>    </tr></thead><tbody>\n",
727 |        "                <tr>\n",
728 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row0_col0\" class=\"data row0 col0\" >RTX 2080 Mobile (pre-alloc)</td>\n",
729 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row0_col1\" class=\"data row0 col1\" >5.780000</td>\n",
730 |        "            </tr>\n",
731 |        "            <tr>\n",
732 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row1_col0\" class=\"data row1 col0\" >RTX 2080 Mobile</td>\n",
733 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row1_col1\" class=\"data row1 col1\" >6.030000</td>\n",
734 |        "            </tr>\n",
735 |        "            <tr>\n",
736 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row2_col0\" class=\"data row2 col0\" >GTX 1060 (pre-alloc)</td>\n",
737 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row2_col1\" class=\"data row2 col1\" >7.850000</td>\n",
738 |        "            </tr>\n",
739 |        "            <tr>\n",
740 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row3_col0\" class=\"data row3 col0\" >HD Graphics 530 (pre-alloc)</td>\n",
741 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row3_col1\" class=\"data row3 col1\" >9.270000</td>\n",
742 |        "            </tr>\n",
743 |        "            <tr>\n",
744 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row4_col0\" class=\"data row4 col0\" >GTX 980M (pre-alloc)</td>\n",
745 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row4_col1\" class=\"data row4 col1\" >11.740000</td>\n",
746 |        "            </tr>\n",
747 |        "            <tr>\n",
748 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row5_col0\" class=\"data row5 col0\" >i7-8700 (pre-alloc)</td>\n",
749 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row5_col1\" class=\"data row5 col1\" >12.090000</td>\n",
750 |        "            </tr>\n",
751 |        "            <tr>\n",
752 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row6_col0\" class=\"data row6 col0\" >HD Graphics 4400 (pre-alloc)</td>\n",
753 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row6_col1\" class=\"data row6 col1\" >13.970000</td>\n",
754 |        "            </tr>\n",
755 |        "            <tr>\n",
756 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row7_col0\" class=\"data row7 col0\" >GTX 980M</td>\n",
757 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row7_col1\" class=\"data row7 col1\" >14.340000</td>\n",
758 |        "            </tr>\n",
759 |        "            <tr>\n",
760 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row8_col0\" class=\"data row8 col0\" >HD Graphics 530</td>\n",
761 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row8_col1\" class=\"data row8 col1\" >14.730000</td>\n",
762 |        "            </tr>\n",
763 |        "            <tr>\n",
764 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row9_col0\" class=\"data row9 col0\" >HD Graphics 5500 HDD (pre-alloc)</td>\n",
765 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row9_col1\" class=\"data row9 col1\" >18.030000</td>\n",
766 |        "            </tr>\n",
767 |        "            <tr>\n",
768 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row10_col0\" class=\"data row10 col0\" >i7-8700</td>\n",
769 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row10_col1\" class=\"data row10 col1\" >19.070000</td>\n",
770 |        "            </tr>\n",
771 |        "            <tr>\n",
772 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row11_col0\" class=\"data row11 col0\" >i5-6500 (pre-alloc)</td>\n",
773 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row11_col1\" class=\"data row11 col1\" >22.010000</td>\n",
774 |        "            </tr>\n",
775 |        "            <tr>\n",
776 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row12_col0\" class=\"data row12 col0\" >HD Graphics 5500 HDD</td>\n",
777 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row12_col1\" class=\"data row12 col1\" >23.740000</td>\n",
778 |        "            </tr>\n",
779 |        "            <tr>\n",
780 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row13_col0\" class=\"data row13 col0\" >HD Graphics 4400</td>\n",
781 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row13_col1\" class=\"data row13 col1\" >23.880000</td>\n",
782 |        "            </tr>\n",
783 |        "            <tr>\n",
784 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row14_col0\" class=\"data row14 col0\" >i7-6700HQ (pre-alloc)</td>\n",
785 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row14_col1\" class=\"data row14 col1\" >23.990000</td>\n",
786 |        "            </tr>\n",
787 |        "            <tr>\n",
788 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row15_col0\" class=\"data row15 col0\" >RTX 2080 Mobile h264_cuvid (pre-alloc)</td>\n",
789 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row15_col1\" class=\"data row15 col1\" >25.200000</td>\n",
790 |        "            </tr>\n",
791 |        "            <tr>\n",
792 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row16_col0\" class=\"data row16 col0\" >RTX 2080 Mobile h264_cuvid</td>\n",
793 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row16_col1\" class=\"data row16 col1\" >32.840000</td>\n",
794 |        "            </tr>\n",
795 |        "            <tr>\n",
796 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row17_col0\" class=\"data row17 col0\" >i7-6700HQ</td>\n",
797 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row17_col1\" class=\"data row17 col1\" >39.390000</td>\n",
798 |        "            </tr>\n",
799 |        "            <tr>\n",
800 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row18_col0\" class=\"data row18 col0\" >GT 730M (pre-alloc)</td>\n",
801 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row18_col1\" class=\"data row18 col1\" >40.640000</td>\n",
802 |        "            </tr>\n",
803 |        "            <tr>\n",
804 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row19_col0\" class=\"data row19 col0\" >GT 730M</td>\n",
805 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row19_col1\" class=\"data row19 col1\" >40.800000</td>\n",
806 |        "            </tr>\n",
807 |        "            <tr>\n",
808 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row20_col0\" class=\"data row20 col0\" >i5-4210U (pre-alloc)</td>\n",
809 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row20_col1\" class=\"data row20 col1\" >47.720000</td>\n",
810 |        "            </tr>\n",
811 |        "            <tr>\n",
812 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row21_col0\" class=\"data row21 col0\" >i5-4210U</td>\n",
813 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row21_col1\" class=\"data row21 col1\" >50.650000</td>\n",
814 |        "            </tr>\n",
815 |        "            <tr>\n",
816 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row22_col0\" class=\"data row22 col0\" >i5-5200U HDD (pre-alloc)</td>\n",
817 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row22_col1\" class=\"data row22 col1\" >51.060000</td>\n",
818 |        "            </tr>\n",
819 |        "            <tr>\n",
820 |        "                                <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row23_col0\" class=\"data row23 col0\" >i5-5200U HDD</td>\n",
821 |        "                        <td id=\"T_d1827ffe_2509_11eb_be6b_84fdd1e21bc9row23_col1\" class=\"data row23 col1\" >58.640000</td>\n",
822 |        "            </tr>\n",
823 |        "    </tbody></table>"
824 |       ],
825 |       "text/plain": [
826 |        "<pandas.io.formats.style.Styler at 0x24182787240>"
827 |       ]
828 |      },
829 |      "execution_count": 29,
830 |      "metadata": {},
831 |      "output_type": "execute_result"
832 |     }
833 |    ],
834 |    "source": [
835 |     "results = [['i7-6700HQ', 39.39],['i7-6700HQ (pre-alloc)', 23.99],['i5-6500 (pre-alloc)',22.01],['i5-5200U HDD',58.64],\n",
836 |     "           ['i5-5200U HDD (pre-alloc)',51.06],['HD Graphics 530',14.73],['HD Graphics 530 (pre-alloc)',9.27],\n",
837 |     "           ['HD Graphics 5500 HDD',23.74],['HD Graphics 5500 HDD (pre-alloc)',18.03],['GTX 980M',14.34],\n",
838 |     "          ['GTX 980M (pre-alloc)',11.74],['GTX 1060 (pre-alloc)',7.85],['i5-4210U',50.65],['i5-4210U (pre-alloc)',47.72],\n",
839 |     "           ['GT 730M',40.80],['GT 730M (pre-alloc)',40.64],['HD Graphics 4400',23.88],['HD Graphics 4400 (pre-alloc)',13.97],\n",
840 |     "          ['i7-8700',19.07],['i7-8700 (pre-alloc)',12.09],['RTX 2080 Mobile h264_cuvid',32.84],\n",
841 |     "           ['RTX 2080 Mobile h264_cuvid (pre-alloc)',25.20],['RTX 2080 Mobile',6.03],['RTX 2080 Mobile (pre-alloc)',5.78]]\n",
842 |     "df = pd.DataFrame(results,columns=['CPU/GPU','Frame Proc Time (ms)']).sort_values('Frame Proc Time (ms)').round(2)\n",
843 |     "df.style.hide_index()"
844 |    ]
845 |   },
846 |   {
847 |    "cell_type": "markdown",
848 |    "metadata": {},
849 |    "source": [
850 |     "### h265"
851 |    ]
852 |   },
853 |   {
854 |    "cell_type": "markdown",
855 |    "metadata": {},
856 |    "source": [
857 |     "# Export"
858 |    ]
859 |   },
860 |   {
861 |    "cell_type": "code",
862 |    "execution_count": 84,
863 |    "metadata": {},
864 |    "outputs": [
865 |     {
866 |      "name": "stdout",
867 |      "output_type": "stream",
868 |      "text": [
869 |       "Converted opencv410x-video-read.ipynb to exp\\nb_opencv410x-video-read.py\n"
870 |      ]
871 |     }
872 |    ],
873 |    "source": [
874 |     "# taken from https://github.com/fastai/fastai_docs/blob/master/dev_nb/notebook2script.py\n",
875 |     "!python notebook2script.py opencv410x-video-read.ipynb"
876 |    ]
877 |   },
878 |   {
879 |    "cell_type": "code",
880 |    "execution_count": null,
881 |    "metadata": {},
882 |    "outputs": [],
883 |    "source": [
884 |     "! python exp/nb_opencv410x-video-read.py"
885 |    ]
886 |   },
887 |   {
888 |    "cell_type": "code",
889 |    "execution_count": null,
890 |    "metadata": {},
891 |    "outputs": [],
892 |    "source": []
893 |   }
894 |  ],
895 |  "metadata": {
896 |   "kernelspec": {
897 |    "display_name": "Python 3",
898 |    "language": "python",
899 |    "name": "python3"
900 |   },
901 |   "language_info": {
902 |    "codemirror_mode": {
903 |     "name": "ipython",
904 |     "version": 3
905 |    },
906 |    "file_extension": ".py",
907 |    "mimetype": "text/x-python",
908 |    "name": "python",
909 |    "nbconvert_exporter": "python",
910 |    "pygments_lexer": "ipython3",
911 |    "version": "3.6.9"
912 |   },
913 |   "toc": {
914 |    "base_numbering": 1,
915 |    "nav_menu": {},
916 |    "number_sections": true,
917 |    "sideBar": true,
918 |    "skip_h1_title": false,
919 |    "title_cell": "Table of Contents",
920 |    "title_sidebar": "Contents",
921 |    "toc_cell": false,
922 |    "toc_position": {},
923 |    "toc_section_display": true,
924 |    "toc_window_display": false
925 |   }
926 |  },
927 |  "nbformat": 4,
928 |  "nbformat_minor": 2
929 | }
930 | 


--------------------------------------------------------------------------------