├── samples ├── speed.cfg ├── balanced.cfg └── quality.cfg ├── OpenEncodeVFW ├── pOVEncode.h ├── openencode.def ├── OVEncodeTypes.h ├── OpenCL_Logo.bmp ├── OpenCL_Logo.png ├── stdafx.cpp ├── log.h ├── dllmain.cpp ├── stdafx.h ├── log.cpp ├── colorspace.h ├── targetver.h ├── CL │ └── opencl.h ├── decompress.cpp ├── resource.h ├── bitstream.h ├── clconvert.h ├── OpenEncodeVFW.vcxproj.filters ├── oveDynload.cpp ├── ReadMe.txt ├── drvproc.cpp ├── OpenEncodeVFW.rc ├── OpenEncodeVFW.vcproj ├── OvEncodeTypedef.h ├── OVEncodeDyn.h ├── perf.h ├── perf.cpp ├── bs.h ├── device.cpp ├── OpenEncodeVFW.vcxproj ├── NV12_kernels.cl ├── OpenEncodeVFW.h └── clconvert.cpp ├── OpenEncode32 └── OpenEncodeVFW.dll ├── OpenEncode64 └── OpenEncodeVFW.dll ├── .gitignore ├── OpenEncodeVFW.sln ├── install.bat ├── openencode.inf ├── CMakeLists.txt └── README.md /samples/speed.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackun/openencodevfw/HEAD/samples/speed.cfg -------------------------------------------------------------------------------- /samples/balanced.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackun/openencodevfw/HEAD/samples/balanced.cfg -------------------------------------------------------------------------------- /samples/quality.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackun/openencodevfw/HEAD/samples/quality.cfg -------------------------------------------------------------------------------- /OpenEncodeVFW/pOVEncode.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackun/openencodevfw/HEAD/OpenEncodeVFW/pOVEncode.h -------------------------------------------------------------------------------- /OpenEncodeVFW/openencode.def: -------------------------------------------------------------------------------- 1 | LIBRARY "OpenEncodeVFW" 2 | 3 | EXPORTS 4 | DriverProc 5 | Configure 6 | -------------------------------------------------------------------------------- /OpenEncode32/OpenEncodeVFW.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackun/openencodevfw/HEAD/OpenEncode32/OpenEncodeVFW.dll -------------------------------------------------------------------------------- /OpenEncode64/OpenEncodeVFW.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackun/openencodevfw/HEAD/OpenEncode64/OpenEncodeVFW.dll -------------------------------------------------------------------------------- /OpenEncodeVFW/OVEncodeTypes.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackun/openencodevfw/HEAD/OpenEncodeVFW/OVEncodeTypes.h -------------------------------------------------------------------------------- /OpenEncodeVFW/OpenCL_Logo.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackun/openencodevfw/HEAD/OpenEncodeVFW/OpenCL_Logo.bmp -------------------------------------------------------------------------------- /OpenEncodeVFW/OpenCL_Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackun/openencodevfw/HEAD/OpenEncodeVFW/OpenCL_Logo.png -------------------------------------------------------------------------------- /OpenEncodeVFW/stdafx.cpp: -------------------------------------------------------------------------------- 1 | // stdafx.cpp : source file that includes just the standard includes 2 | // OpenEncodeVFW.pch will be the pre-compiled header 3 | // stdafx.obj will contain the pre-compiled type information 4 | 5 | #include "stdafx.h" 6 | 7 | // TODO: reference any additional headers you need in STDAFX.H 8 | // and not in this file 9 | -------------------------------------------------------------------------------- /OpenEncodeVFW/log.h: -------------------------------------------------------------------------------- 1 | #ifndef _LOGGER 2 | #define _LOGGER 3 | 4 | class Logger 5 | { 6 | private: 7 | FILE* mLog; 8 | bool mWritelog; 9 | public: 10 | Logger(bool _log); 11 | ~Logger(); 12 | void enableLog(bool b); 13 | bool open(); 14 | void close(); 15 | 16 | void Log_internal(const wchar_t *psz_fmt, va_list arg); 17 | void Log(const wchar_t *psz_fmt, ...); 18 | }; 19 | 20 | #endif -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | 6 | # Compiled Dynamic libraries 7 | *.so 8 | *.dylib 9 | 10 | # Compiled Static libraries 11 | *.lai 12 | *.la 13 | *.a 14 | 15 | *.dll 16 | *.exe 17 | *.lib 18 | *.exp 19 | *.suo 20 | *.ncb 21 | *.tlb 22 | *.tlh 23 | *.log 24 | *.scc 25 | *.ilk 26 | *.sdf 27 | *.opensdf 28 | *.pdb 29 | *.user 30 | *.aps 31 | *.pch 32 | *.vspscc 33 | *.vssscc 34 | *.sbr 35 | *.scc 36 | ipch 37 | Debug 38 | Release 39 | build -------------------------------------------------------------------------------- /OpenEncodeVFW/dllmain.cpp: -------------------------------------------------------------------------------- 1 | // dllmain.cpp : Defines the entry point for the DLL application. 2 | #include "stdafx.h" 3 | 4 | /*BOOL APIENTRY DllMain( HMODULE hModule, 5 | DWORD ul_reason_for_call, 6 | LPVOID lpReserved 7 | ) 8 | { 9 | switch (ul_reason_for_call) 10 | { 11 | case DLL_PROCESS_ATTACH: 12 | case DLL_THREAD_ATTACH: 13 | case DLL_THREAD_DETACH: 14 | case DLL_PROCESS_DETACH: 15 | break; 16 | } 17 | return TRUE; 18 | }*/ 19 | 20 | -------------------------------------------------------------------------------- /OpenEncodeVFW/stdafx.h: -------------------------------------------------------------------------------- 1 | // stdafx.h : include file for standard system include files, 2 | // or project specific include files that are used frequently, but 3 | // are changed infrequently 4 | // 5 | 6 | #pragma once 7 | 8 | #include "targetver.h" 9 | #include "resource.h" 10 | 11 | #include 12 | #include 13 | #define WIN32_LEAN_AND_MEAN 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include "bs.h" 29 | 30 | 31 | // TODO: reference additional headers your program requires here 32 | -------------------------------------------------------------------------------- /OpenEncodeVFW/log.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "log.h" 3 | 4 | Logger::Logger(bool _log) : mLog(NULL), mWritelog(_log) 5 | { 6 | } 7 | 8 | bool Logger::open() 9 | { 10 | if(!mWritelog) return false; 11 | close(); 12 | mLog = fopen("openencode.log", "w, ccs=UNICODE"); 13 | return mLog != NULL; 14 | } 15 | 16 | void Logger::close() 17 | { 18 | if(mLog) fclose(mLog); 19 | mLog = NULL; 20 | } 21 | 22 | Logger::~Logger() 23 | { 24 | if(mLog) fclose(mLog); 25 | } 26 | 27 | void Logger::Log_internal(const wchar_t *psz_fmt, va_list args) 28 | { 29 | if(!mLog) open(); 30 | 31 | if(mLog) 32 | { 33 | int bufsize = _vscwprintf(psz_fmt, args) + 1; 34 | std::vector msg(bufsize); 35 | _vsnwprintf_s(&msg[0], bufsize, bufsize-1, psz_fmt, args); 36 | fwrite(&msg[0], sizeof(wchar_t), bufsize - 1, mLog); 37 | fflush(mLog); 38 | } 39 | } 40 | 41 | void Logger::Log(const wchar_t *psz_fmt, ...) 42 | { 43 | if(!mWritelog) return; 44 | va_list arg; 45 | va_start(arg, psz_fmt); 46 | Log_internal(psz_fmt, arg); 47 | va_end(arg); 48 | } 49 | 50 | void Logger::enableLog(bool b) 51 | { 52 | mWritelog = b; 53 | if(!b) close(); 54 | } -------------------------------------------------------------------------------- /OpenEncodeVFW/colorspace.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | //Virtualdub doesn't seem to load if fourcc is different then it is in registry after VIDC.* 4 | #define FOURCC_OPEN mmioFOURCC('H','2','6','4') 5 | #define FOURCC_H264 mmioFOURCC('H','2','6','4') 6 | 7 | /* YUV 4:2:0 planar */ 8 | #define FOURCC_I420 mmioFOURCC('I','4','2','0') 9 | #define FOURCC_IYUV mmioFOURCC('I','Y','U','V') 10 | #define FOURCC_YV12 mmioFOURCC('Y','V','1','2') 11 | /* YUV 4:2:2 planar */ 12 | #define FOURCC_YV16 mmioFOURCC('Y','V','1','6') 13 | /* YUV 4:4:4 planar */ 14 | #define FOURCC_YV24 mmioFOURCC('Y','V','2','4') 15 | /* YUV 4:2:0, with one Y plane and one packed U+V */ 16 | #define FOURCC_NV12 mmioFOURCC('N','V','1','2') 17 | /* YUV 4:2:2 packed */ 18 | #define FOURCC_YUYV mmioFOURCC('Y','U','Y','V') 19 | #define FOURCC_YUY2 mmioFOURCC('Y','U','Y','2') 20 | #define FOURCC_UYVY mmioFOURCC('U','Y','V','Y') 21 | #define FOURCC_HDYC mmioFOURCC('H','D','Y','C') 22 | 23 | // possible colorspaces 24 | #define RGB24 24 25 | #define RGB32 32 26 | #define YUY2 16 27 | #define YV12 12 28 | 29 | enum COLORMATRIX 30 | { 31 | BT601_FULL = 0, 32 | BT601_LIMITED, 33 | BT601_FULL_YCbCr, 34 | BT709_FULL, 35 | BT709_LIMITED, 36 | //BT709_ALT1_FULL, 37 | BT709_ALT1_LIMITED, 38 | COLORMATRIX_COUNT 39 | }; -------------------------------------------------------------------------------- /OpenEncodeVFW.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 2013 4 | VisualStudioVersion = 12.0.30723.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OpenEncodeVFW", "OpenEncodeVFW\OpenEncodeVFW.vcxproj", "{2EE32DA3-C809-449C-9D24-2ACA848AED59}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Win32 = Debug|Win32 11 | Debug|x64 = Debug|x64 12 | Release|Win32 = Release|Win32 13 | Release|x64 = Release|x64 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {2EE32DA3-C809-449C-9D24-2ACA848AED59}.Debug|Win32.ActiveCfg = Debug|Win32 17 | {2EE32DA3-C809-449C-9D24-2ACA848AED59}.Debug|Win32.Build.0 = Debug|Win32 18 | {2EE32DA3-C809-449C-9D24-2ACA848AED59}.Debug|x64.ActiveCfg = Debug|x64 19 | {2EE32DA3-C809-449C-9D24-2ACA848AED59}.Debug|x64.Build.0 = Debug|x64 20 | {2EE32DA3-C809-449C-9D24-2ACA848AED59}.Release|Win32.ActiveCfg = Release|Win32 21 | {2EE32DA3-C809-449C-9D24-2ACA848AED59}.Release|Win32.Build.0 = Release|Win32 22 | {2EE32DA3-C809-449C-9D24-2ACA848AED59}.Release|x64.ActiveCfg = Release|x64 23 | {2EE32DA3-C809-449C-9D24-2ACA848AED59}.Release|x64.Build.0 = Release|x64 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /OpenEncodeVFW/targetver.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // The following macros define the minimum required platform. The minimum required platform 4 | // is the earliest version of Windows, Internet Explorer etc. that has the necessary features to run 5 | // your application. The macros work by enabling all features available on platform versions up to and 6 | // including the version specified. 7 | 8 | // Modify the following defines if you have to target a platform prior to the ones specified below. 9 | // Refer to MSDN for the latest info on corresponding values for different platforms. 10 | #ifndef WINVER // Specifies that the minimum required platform is Windows Vista. 11 | #define WINVER 0x0600 // Change this to the appropriate value to target other versions of Windows. 12 | #endif 13 | 14 | #ifndef _WIN32_WINNT // Specifies that the minimum required platform is Windows Vista. 15 | #define _WIN32_WINNT 0x0600 // Change this to the appropriate value to target other versions of Windows. 16 | #endif 17 | 18 | #ifndef _WIN32_WINDOWS // Specifies that the minimum required platform is Windows 98. 19 | #define _WIN32_WINDOWS 0x0410 // Change this to the appropriate value to target Windows Me or later. 20 | #endif 21 | 22 | #ifndef _WIN32_IE // Specifies that the minimum required platform is Internet Explorer 7.0. 23 | #define _WIN32_IE 0x0700 // Change this to the appropriate value to target other versions of IE. 24 | #endif 25 | -------------------------------------------------------------------------------- /install.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | REM stolen from lagarith 3 | 4 | Set RegQry=HKLM\Hardware\Description\System\CentralProcessor\0 5 | REM set OLDDIR=%CD% 6 | pushd %~dp0% 7 | 8 | REG.exe Query %RegQry% > checkOS.txt 9 | 10 | Find /i "x86" < CheckOS.txt > StringCheck.txt 11 | 12 | If %ERRORLEVEL% == 0 ( 13 | del StringCheck.txt 14 | del CheckOS.txt 15 | Echo "32 Bit Operating system detected, installing 32 bit OpenEncode version" 16 | copy openencode.inf %windir%\system32\ 17 | copy OpenEncode32\OPENENCODEVFW.DLL %windir%\system32\ 18 | 19 | cd /d %windir%\system32\ 20 | rundll32 setupapi.dll,InstallHinfSection DefaultInstall 0 %windir%\system32\OpenEncode.inf 21 | ) ELSE ( 22 | del StringCheck.txt 23 | del CheckOS.txt 24 | 25 | echo === 26 | echo === 64 Bit Operating System detected. 27 | echo === 28 | 29 | REM (With how currently INF is set up) setupapi seems to look for DLLs in the same folder as INF. 30 | REM So just copy DLLs (aka do the installer's work twice :P) and run INF from dest. dir 31 | REM Copy INF for uninstaller. INF is also copied to %windir%\inf, but first run uninstaller probably removes that. 32 | copy OpenEncode.inf %windir%\system32\ 33 | copy OpenEncode64\OPENENCODEVFW.DLL %windir%\system32\ 34 | 35 | copy OpenEncode.inf %windir%\SysWOW64\ 36 | copy OpenEncode32\OPENENCODEVFW.DLL %windir%\SysWOW64\ 37 | 38 | rundll32 setupapi.dll,InstallHinfSection DefaultInstall 0 %windir%\System32\OpenEncode.inf 39 | 40 | REM Because Windows-On-Windows, you have to run this from within syswow64 dir 41 | REM so that windows would know it is 32bit version. 42 | cd /d %windir%\SysWOW64\ 43 | rundll32 setupapi.dll,InstallHinfSection DefaultInstall 0 %windir%\SYSWOW64\OpenEncode.inf 44 | ) 45 | 46 | popd 47 | pause 48 | -------------------------------------------------------------------------------- /OpenEncodeVFW/CL/opencl.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2008-2012 The Khronos Group Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and/or associated documentation files (the 6 | * "Materials"), to deal in the Materials without restriction, including 7 | * without limitation the rights to use, copy, modify, merge, publish, 8 | * distribute, sublicense, and/or sell copies of the Materials, and to 9 | * permit persons to whom the Materials are furnished to do so, subject to 10 | * the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included 13 | * in all copies or substantial portions of the Materials. 14 | * 15 | * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 18 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 19 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 20 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 21 | * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. 22 | ******************************************************************************/ 23 | 24 | /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ 25 | 26 | #ifndef __OPENCL_H 27 | #define __OPENCL_H 28 | 29 | #ifdef __cplusplus 30 | extern "C" { 31 | #endif 32 | 33 | #ifdef __APPLE__ 34 | 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | #else 41 | 42 | #include 43 | #include 44 | #include 45 | #include 46 | 47 | #endif 48 | 49 | #ifdef __cplusplus 50 | } 51 | #endif 52 | 53 | #endif /* __OPENCL_H */ 54 | 55 | -------------------------------------------------------------------------------- /openencode.inf: -------------------------------------------------------------------------------- 1 | [version] 2 | signature="$CHICAGO$" 3 | 4 | [DefaultInstall] 5 | CopyFiles=OPENENC.Files.Inf,OPENENC.Files.Dll 6 | AddReg=OPENENC.Reg 7 | UpdateInis=OPENENC.INIs 8 | 9 | [DefaultUnInstall] 10 | DelFiles=OPENENC.Files.Dll,OPENENC.Files.Inf,OPENENC.Files.Ini 11 | DelReg=OPENENC.Reg 12 | UpdateInis=OPENENC.INIs.Del 13 | 14 | [SourceDisksNames] 15 | 1="AMD OpenEncodeVFW video codec","",1 16 | 17 | [SourceDisksFiles] 18 | OPENENCODE.INF=1 19 | OPENENCODEVFW.DLL=1 20 | 21 | ;[SourceDisksFiles.amd64] 22 | ;OPENENCODE.INF=1 23 | ;OPENENCODEVFW.DLL=1,OpenEncode64 24 | 25 | [DestinationDirs] 26 | OPENENC.Files.Inf=11 27 | OPENENC.Files.Dll=11 28 | OPENENC.Files.Ini=25 29 | 30 | ;%windir%\inf 31 | [OPENENC.Files.Inf] 32 | OPENENCODE.INF 33 | 34 | [OPENENC.Files.Dll] 35 | OPENENCODEVFW.DLL 36 | 37 | [OPENENC.Files.Ini] 38 | OPENENCODEVFW.INI 39 | 40 | ; ********************** WARNING ******************** 41 | ; Using VIDC.H264 might override preinstalled codecs 42 | ; *************************************************** 43 | [OPENENC.Reg] 44 | HKLM,SYSTEM\CurrentControlSet\Control\MediaResources\icm\VIDC.H264,Description,,"OpenEncodeVFW codec" 45 | HKLM,SYSTEM\CurrentControlSet\Control\MediaResources\icm\VIDC.H264,Driver,,"OPENENCODEVFW.DLL" 46 | HKLM,SYSTEM\CurrentControlSet\Control\MediaResources\icm\VIDC.H264,FriendlyName,,"OpenEncodeVFW codec" 47 | 48 | HKLM,"Software\Microsoft\Windows NT\CurrentVersion\drivers.desc",OPENENCODEVFW.DLL,,"OpenEncodeVFW codec" 49 | HKLM,"Software\Microsoft\Windows NT\CurrentVersion\Drivers32",VIDC.H264,,"OPENENCODEVFW.DLL" 50 | 51 | HKLM,Software\Microsoft\Windows\CurrentVersion\Uninstall\OpenEncode 52 | HKLM,Software\Microsoft\Windows\CurrentVersion\Uninstall\OpenEncode,DisplayName,,"OpenEncodeVFW codec (Remove Only)" 53 | ;http://msdn.microsoft.com/en-us/library/windows/hardware/ff553598%28v=vs.85%29.aspx 54 | ;%11% - %windir%\system32 (gets translated to SysWOW64 if installed from syswow64 folder) 55 | HKLM,Software\Microsoft\Windows\CurrentVersion\Uninstall\OpenEncode,UninstallString,,"rundll32.exe setupapi.dll,InstallHinfSection DefaultUninstall 132 %11%\OPENENCODE.INF" 56 | ;%17% - use *.inf from %windir%\inf folder 57 | ;HKLM,Software\Microsoft\Windows\CurrentVersion\Uninstall\OpenEncode,UninstallString,,"rundll32.exe setupapi.dll,InstallHinfSection DefaultUninstall 132 %17%\OPENENCODE.INF" 58 | 59 | [OPENENC.INIs] 60 | system.ini, drivers32,, "VIDC.H264=OPENENCODEVFW.DLL" 61 | 62 | [OPENENC.INIs.Del] 63 | system.ini, drivers32, "VIDC.H264=OPENENCODEVFW.DLL" -------------------------------------------------------------------------------- /OpenEncodeVFW/decompress.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "OpenEncodeVFW.h" 3 | 4 | // check if the codec can decompress the given format to the desired format 5 | DWORD CodecInst::DecompressQuery(const LPBITMAPINFOHEADER lpbiIn, const LPBITMAPINFOHEADER lpbiOut){ 6 | 7 | if ( lpbiIn->biCompression != FOURCC_H264 ){ 8 | return_badformat(); 9 | } 10 | 11 | return_badformat();//TODO Doesn't support decoding yet 12 | 13 | return (DWORD)ICERR_OK; 14 | } 15 | 16 | // return the default decompress format for the given input format 17 | DWORD CodecInst::DecompressGetFormat(const LPBITMAPINFOHEADER lpbiIn, LPBITMAPINFOHEADER lpbiOut){ 18 | 19 | if ( DecompressQuery(lpbiIn, NULL ) != ICERR_OK){ 20 | return_badformat(); 21 | } 22 | 23 | if ( !lpbiOut) 24 | return sizeof(BITMAPINFOHEADER); 25 | 26 | 27 | return_badformat(); 28 | 29 | return (DWORD)ICERR_OK; 30 | } 31 | 32 | DWORD CodecInst::DecompressGetPalette(LPBITMAPINFOHEADER lpbiIn, LPBITMAPINFOHEADER lpbiOut) { 33 | return_badformat() 34 | } 35 | 36 | // initalize the codec for decompression 37 | DWORD CodecInst::DecompressBegin(LPBITMAPINFOHEADER lpbiIn, LPBITMAPINFOHEADER lpbiOut){ 38 | if ( started == 0x1337){ 39 | DecompressEnd(); 40 | } 41 | started = 0; 42 | 43 | if ( int error = DecompressQuery(lpbiIn,lpbiOut) != ICERR_OK ){ 44 | return error; 45 | } 46 | 47 | started = 0x1337; 48 | return ICERR_OK; 49 | } 50 | 51 | // release resources when decompression is done 52 | DWORD CodecInst::DecompressEnd(){ 53 | if ( started == 0x1337 ){ 54 | 55 | } 56 | started=0; 57 | return ICERR_OK; 58 | } 59 | 60 | 61 | // Called to decompress a frame, the actual decompression will be 62 | // handed off to other functions based on the frame type. 63 | DWORD CodecInst::Decompress(ICDECOMPRESS* icinfo, DWORD dwSize) { 64 | // try { 65 | 66 | DWORD return_code=ICERR_OK; 67 | if ( started != 0x1337 ){ 68 | DecompressBegin(icinfo->lpbiInput,icinfo->lpbiOutput); 69 | } 70 | //out = (uint8 *)icinfo->lpOutput; 71 | //in = (uint8 *)icinfo->lpInput; 72 | icinfo->lpbiOutput->biSizeImage = 0;//length; 73 | 74 | mCompressed_size = icinfo->lpbiInput->biSizeImage; 75 | 76 | return_badformat(); //TODO Doesn't support decoding yet 77 | return return_code; 78 | //} catch ( ... ){ 79 | // MessageBox (HWND_DESKTOP, "Exception caught in decompress main", "Error", MB_OK | MB_ICONEXCLAMATION); 80 | // return ICERR_INTERNAL; 81 | //} 82 | } 83 | 84 | //MessageBox (HWND_DESKTOP, msg, "Error", MB_OK | MB_ICONEXCLAMATION); -------------------------------------------------------------------------------- /OpenEncodeVFW/resource.h: -------------------------------------------------------------------------------- 1 | //{{NO_DEPENDENCIES}} 2 | // Microsoft Visual C++ generated include file. 3 | // Used by OpenEncodeVFW.rc 4 | // 5 | #define IDD_DIALOG1 101 6 | #define IDR_OPENCL_KERNELS 103 7 | #define IDB_BITMAP1 105 8 | #define IDC_OK 1001 9 | #define IDC_CANCEL 1002 10 | #define IDC_RC_MODE 1007 11 | #define IDC_EDIT2 1008 12 | #define IDC_RC_VAL 1008 13 | #define IDC_SLIDER2 1009 14 | #define IDC_RC_VAL_SLIDER 1009 15 | #define IDC_CABAC 1013 16 | #define IDC_RC_LABEL 1014 17 | #define IDC_OPENCL_LOGO 1015 18 | #define IDC_RC_LOW_LABEL 1016 19 | #define IDC_RC_HIGH_LABEL 1017 20 | #define IDC_USE_OPENCL 1018 21 | #define IDC_BUILD_DATE 1019 22 | #define IDC_COMBO1 1020 23 | #define IDC_DEVICE_CB 1020 24 | #define IDC_PROF_BASE 1021 25 | #define IDC_PROF_MAIN 1022 26 | #define IDC_PROFILE 1023 27 | #define IDC_LEVEL_CB 1024 28 | #define IDC_PROF_HIGH 1025 29 | #define IDC_USE_ME_AMD 1026 30 | #define IDC_SKIP_MV16 1027 31 | #define IDC_FRAMERATE 1028 32 | #define IDC_SEARCHRX 1029 33 | #define IDC_SEARCHRY 1030 34 | #define IDC_BLEND 1031 35 | #define IDC_COLORMATRIX 1031 36 | #define IDC_YV12ASNV12 1032 37 | #define IDC_SPEEDY_MATH 1033 38 | #define IDC_CS_RGBA 1034 39 | #define IDC_LOG 1035 40 | #define IDC_USE_OPENCL2 1036 41 | #define IDC_IDR 1037 42 | #define IDC_USE_OPENCL3 1038 43 | #define IDC_QS_SPEED 1039 44 | #define IDC_QS_BALANCED 1040 45 | #define IDC_QS_QUALITY 1041 46 | #define IDC_LOG2 1042 47 | #define IDC_GOP 1043 48 | #define IDC_LOG3 1044 49 | #define IDC_CROPH 1044 50 | #define IDC_HDRINSERTION 1045 51 | 52 | // Next default values for new objects 53 | // 54 | #ifdef APSTUDIO_INVOKED 55 | #ifndef APSTUDIO_READONLY_SYMBOLS 56 | #define _APS_NEXT_RESOURCE_VALUE 106 57 | #define _APS_NEXT_COMMAND_VALUE 40001 58 | #define _APS_NEXT_CONTROL_VALUE 1042 59 | #define _APS_NEXT_SYMED_VALUE 101 60 | #endif 61 | #endif 62 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.5) 2 | macro(getenv_path VAR) 3 | set(ENV_${VAR} $ENV{${VAR}}) 4 | # replace won't work if var is blank 5 | if (ENV_${VAR}) 6 | string( REGEX REPLACE "\\\\" "/" ENV_${VAR} ${ENV_${VAR}} ) 7 | endif () 8 | endmacro(getenv_path) 9 | 10 | SET(TargetName "OpenEncodeVFW") 11 | project(${TargetName}) 12 | 13 | if(CMAKE_SIZEOF_VOID_P MATCHES "8") 14 | SET(DIR_POSTFIX "64") 15 | SET(LIB_POSTFIX "64") 16 | SET(LIB_DIR_POSTFIX "_64") 17 | else() 18 | SET(DIR_POSTFIX "32") 19 | SET(LIB_POSTFIX "") 20 | SET(LIB_DIR_POSTFIX "") 21 | endif() 22 | 23 | set (PLUGIN_VERSION_MAJOR "0") 24 | set (PLUGIN_VERSION_MINOR "0") 25 | set (PLUGIN_VERSION_PATCH "1") 26 | set (PLUGIN_VERSION "${PLUGIN_VERSION_MAJOR}.${PLUGIN_VERSION_MINOR}.${PLUGIN_VERSION_PATCH}") 27 | 28 | set(HDRS 29 | OpenEncodeVFW/bitstream.h 30 | OpenEncodeVFW/bs.h 31 | OpenEncodeVFW/clconvert.h 32 | OpenEncodeVFW/log.h 33 | OpenEncodeVFW/OpenEncodeVFW.h 34 | OpenEncodeVFW/OvEncodeTypedef.h 35 | OpenEncodeVFW/perf.h 36 | OpenEncodeVFW/resource.h 37 | OpenEncodeVFW/stdafx.h 38 | OpenEncodeVFW/targetver.h 39 | ) 40 | 41 | set(SRCS 42 | OpenEncodeVFW/bitstream.cpp 43 | OpenEncodeVFW/clconvert.cpp 44 | OpenEncodeVFW/compress.cpp 45 | OpenEncodeVFW/configure.cpp 46 | OpenEncodeVFW/decompress.cpp 47 | OpenEncodeVFW/device.cpp 48 | OpenEncodeVFW/dllmain.cpp 49 | OpenEncodeVFW/drvproc.cpp 50 | OpenEncodeVFW/log.cpp 51 | OpenEncodeVFW/OpenEncodeVFW.cpp 52 | OpenEncodeVFW/perf.cpp 53 | OpenEncodeVFW/stdafx.cpp 54 | ) 55 | 56 | SOURCE_GROUP("Header Files" FILES ${HDRS}) 57 | SOURCE_GROUP("Source Files" FILES ${SRCS}) 58 | 59 | ### Find and setup AMD APP SDK 60 | getenv_path(AMDAPPSDKROOT) 61 | SET (BUILD_AMDAPPSDK_INCLUDE "${ENV_AMDAPPSDKROOT}/include" CACHE PATH "AMD APP SDK include dir") 62 | SET (BUILD_AMDAPPSDK_LIB "${ENV_AMDAPPSDKROOT}/lib/x86${LIB_DIR_POSTFIX}" CACHE PATH "AMD APP SDK library dir") 63 | include_directories(${BUILD_AMDAPPSDK_INCLUDE}) 64 | link_directories(${BUILD_AMDAPPSDK_LIB}) 65 | LIST(APPEND LIBS OpenCL OpenVideo${LIB_POSTFIX} WinMM) 66 | 67 | file(GLOB RCS ./OpenEncodeVFW/*.rc) 68 | file(GLOB DEF ./OpenEncodeVFW/*.def) 69 | 70 | add_definitions(-D_UNICODE -DUNICODE) #Make VS use unicode instead of multi-byte 71 | add_definitions(-DOPENENCODEVFW_EXPORTS) 72 | add_library(${TargetName} SHARED ${SRCS} ${HDRS} ${RCS} ${DEF}) 73 | target_link_libraries(${TargetName} ${LIBS}) 74 | #set_target_properties(${TargetName} PROPERTIES DEBUG_POSTFIX _d) 75 | set_target_properties(${TargetName} PROPERTIES DEBUG_POSTFIX "") 76 | set_target_properties(${TargetName} PROPERTIES VERSION ${PLUGIN_VERSION} SOVERSION ${PLUGIN_VERSION_MAJOR}) 77 | 78 | # post-build copy for win32 79 | IF(WIN32 AND NOT MINGW) 80 | add_custom_command( TARGET ${TargetName} PRE_BUILD 81 | COMMAND if not exist \"${CMAKE_SOURCE_DIR}/OpenEncode${DIR_POSTFIX}\" mkdir \"${CMAKE_SOURCE_DIR}/OpenEncode${DIR_POSTFIX}\" ) 82 | add_custom_command( TARGET ${TargetName} POST_BUILD 83 | COMMAND copy \"$(TargetPath)\" \"${CMAKE_SOURCE_DIR}/OpenEncode${DIR_POSTFIX}\" ) 84 | ENDIF(WIN32 AND NOT MINGW) 85 | 86 | # Left over from another CMakeList 87 | # set the executable output path for UNIX and MinGW instead of post-build copy 88 | IF(MINGW) 89 | set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/OpenEncode${DIR_POSTFIX}) 90 | ENDIF(MINGW) 91 | 92 | #-DCMAKE_INSTALL_PREFIX=some/where 93 | #IF(WIN32) 94 | # install(TARGETS ${TargetName} 95 | # RUNTIME DESTINATION . 96 | # CONFIGURATIONS Release RelWithDebInfo MinSizeRel Debug) #All 97 | #ENDIF(WIN32) 98 | -------------------------------------------------------------------------------- /OpenEncodeVFW/bitstream.h: -------------------------------------------------------------------------------- 1 | #include "OvEncodeTypedef.h" 2 | 3 | //NAL ref idc codes 4 | #define NAL_REF_IDC_PRIORITY_HIGHEST 3 5 | #define NAL_REF_IDC_PRIORITY_HIGH 2 6 | #define NAL_REF_IDC_PRIORITY_LOW 1 7 | #define NAL_REF_IDC_PRIORITY_DISPOSABLE 0 8 | //Table 7-1 NAL unit type codes 9 | #define NAL_UNIT_TYPE_UNSPECIFIED 0 // Unspecified 10 | #define NAL_UNIT_TYPE_CODED_SLICE_NON_IDR 1 // Coded slice of a non-IDR picture 11 | #define NAL_UNIT_TYPE_CODED_SLICE_DATA_PARTITION_A 2 // Coded slice data partition A 12 | #define NAL_UNIT_TYPE_CODED_SLICE_DATA_PARTITION_B 3 // Coded slice data partition B 13 | #define NAL_UNIT_TYPE_CODED_SLICE_DATA_PARTITION_C 4 // Coded slice data partition C 14 | #define NAL_UNIT_TYPE_CODED_SLICE_IDR 5 // Coded slice of an IDR picture 15 | #define NAL_UNIT_TYPE_SEI 6 // Supplemental enhancement information (SEI) 16 | #define NAL_UNIT_TYPE_SPS 7 // Sequence parameter set 17 | #define NAL_UNIT_TYPE_PPS 8 // Picture parameter set 18 | #define NAL_UNIT_TYPE_AUD 9 // Access unit delimiter 19 | #define NAL_UNIT_TYPE_END_OF_SEQUENCE 10 // End of sequence 20 | #define NAL_UNIT_TYPE_END_OF_STREAM 11 // End of stream 21 | #define NAL_UNIT_TYPE_FILLER 12 // Filler data 22 | #define NAL_UNIT_TYPE_SPS_EXT 13 // Sequence parameter set extension 23 | // 14..18 // Reserved 24 | #define NAL_UNIT_TYPE_CODED_SLICE_AUX 19 // Coded slice of an auxiliary coded picture without partitioning 25 | // 20..23 // Reserved 26 | // 24..31 // Unspecified 27 | 28 | //7.4.3 Table 7-6. Name association to slice_type 29 | #define SH_SLICE_TYPE_P 0 // P (P slice) 30 | #define SH_SLICE_TYPE_B 1 // B (B slice) 31 | #define SH_SLICE_TYPE_I 2 // I (I slice) 32 | #define SH_SLICE_TYPE_SP 3 // SP (SP slice) 33 | #define SH_SLICE_TYPE_SI 4 // SI (SI slice) 34 | //as per footnote to Table 7-6, the *_ONLY slice types indicate that all other slices in that picture are of the same type 35 | #define SH_SLICE_TYPE_P_ONLY 5 // P (P slice) 36 | #define SH_SLICE_TYPE_B_ONLY 6 // B (B slice) 37 | #define SH_SLICE_TYPE_I_ONLY 7 // I (I slice) 38 | #define SH_SLICE_TYPE_SP_ONLY 8 // SP (SP slice) 39 | #define SH_SLICE_TYPE_SI_ONLY 9 // SI (SI slice) 40 | 41 | 42 | //Stolen from avc2avi 43 | #define MAX_DATA 3000000 44 | 45 | enum nal_unit_type_e 46 | { 47 | NAL_UNKNOWN = 0, 48 | NAL_SLICE = 1, 49 | NAL_SLICE_DPA = 2, 50 | NAL_SLICE_DPB = 3, 51 | NAL_SLICE_DPC = 4, 52 | NAL_SLICE_IDR = 5, /* ref_idc != 0 */ 53 | NAL_SEI = 6, /* ref_idc == 0 */ 54 | NAL_SPS = 7, 55 | NAL_PPS = 8 56 | /* ref_idc == 0 for 6,9,10,11,12 */ 57 | }; 58 | enum nal_priority_e 59 | { 60 | NAL_PRIORITY_DISPOSABLE = 0, 61 | NAL_PRIORITY_LOW = 1, 62 | NAL_PRIORITY_HIGH = 2, 63 | NAL_PRIORITY_HIGHEST = 3, 64 | }; 65 | 66 | typedef struct 67 | { 68 | int i_ref_idc; /* nal_priority_e */ 69 | int i_type; /* nal_unit_type_e */ 70 | 71 | /* This data are raw payload */ 72 | int i_payload; 73 | uint8 *p_payload; 74 | } nal_t; 75 | 76 | typedef struct 77 | { 78 | int i_width; 79 | int i_height; 80 | 81 | int i_nal_type; 82 | int i_ref_idc; 83 | int i_idr_pic_id; 84 | int i_frame_num; 85 | int i_poc; 86 | 87 | int b_key; 88 | int i_log2_max_frame_num; 89 | int i_poc_type; 90 | int i_log2_max_poc_lsb; 91 | } h264_t; 92 | 93 | class Parser 94 | { 95 | public: 96 | 97 | h264_t h264; 98 | 99 | nal_t nal; 100 | int i_frame; 101 | int i_data; 102 | int b_eof; 103 | int b_key; 104 | int b_slice; 105 | int b_hiprofile; 106 | 107 | Parser(): i_frame(0), i_data(0), 108 | b_eof(0), b_key(0), b_slice(0), b_hiprofile(0) 109 | { 110 | nal.p_payload = NULL;//(uint8*) malloc(MAX_DATA); 111 | init(); 112 | } 113 | 114 | ~Parser() 115 | { 116 | //free(nal.p_payload); 117 | nal.p_payload = 0; 118 | } 119 | 120 | void init(); 121 | void parse( /*h264_t *h, nal_t *n, */ int *pb_nal_start ); 122 | 123 | int nal_decode( /*nal_t *nal,*/ void *p_data, int i_data ); 124 | }; 125 | 126 | int add_vui(void *srcPtr, size_t srcSize, void *dstPtr, size_t dstSize, int color); -------------------------------------------------------------------------------- /OpenEncodeVFW/clconvert.h: -------------------------------------------------------------------------------- 1 | #ifndef _CLCONVERT 2 | #define _CLCONVERT 3 | 4 | #include 5 | #include 6 | #include "OVEncodeDyn.h" 7 | #include "OVEncodeTypes.h" 8 | #include 9 | #include "OvEncodeTypedef.h" 10 | #include "log.h" 11 | #include "perf.h" 12 | #include "colorspace.h" 13 | 14 | #define SUCCESS 0 15 | #define FAILURE 1 16 | #define EXPECTED_FAILURE 2 17 | 18 | #define CHECK_ALLOCATION(actual, msg) \ 19 | if(actual == NULL) \ 20 | { \ 21 | mLog->Log(L"Location : %S : %d\n", __FILE__, __LINE__); \ 22 | return FAILURE; \ 23 | } 24 | 25 | 26 | #define CHECK_ERROR(actual, reference, msg) \ 27 | if(actual != reference) \ 28 | { \ 29 | mLog->Log(L"Location : %S : %d\n", __FILE__, __LINE__); \ 30 | return FAILURE; \ 31 | } 32 | 33 | #define CHECK_OPENCL_ERROR(actual, msg) \ 34 | if(checkVal(actual, CL_SUCCESS, msg, true)) \ 35 | { \ 36 | mLog->Log(L"Location : %S : %d\n", __FILE__, __LINE__); \ 37 | return FAILURE; \ 38 | } 39 | 40 | #define OPENCL_EXPECTED_ERROR(msg) \ 41 | { \ 42 | mLog->Log(L"Expected Error %S\n", msg); \ 43 | return EXPECTED_FAILURE; \ 44 | } 45 | 46 | class clConvert 47 | { 48 | public: 49 | double profSecs1,profSecs2; 50 | bool prof2ndPass; 51 | 52 | clConvert(cl_context ctx, cl_device_id dev, cl_command_queue cmdqueue, 53 | int width, int height, unsigned int _bpp_bytes, Logger *lg, OVprofile *prof, 54 | bool opt = true, bool rgb = false): 55 | g_context(ctx), g_cmd_queue(cmdqueue), deviceID(dev), 56 | iWidth(width), oWidth(width), 57 | iHeight(height), oHeight(height), bpp_bytes(_bpp_bytes), 58 | g_y_kernel(NULL), g_uv_kernel(NULL), 59 | host_ptr(NULL), g_output_size(0), g_outputBuffer(NULL), 60 | g_program(NULL), mLog(lg), mProf(prof), 61 | mOptimize(opt), 62 | profSecs1(0), profSecs2(0), prof2ndPass(false), 63 | hRaw(NULL), mRGB(rgb), needsDestriding(false) 64 | { 65 | localThreads_Max[0] = 1; 66 | localThreads_Max[1] = 1; 67 | 68 | g_inputBuffer[0] = NULL; 69 | g_inputBuffer[1] = NULL; 70 | } 71 | 72 | ~clConvert() 73 | { 74 | Cleanup_OpenCL(); 75 | } 76 | 77 | /*bool init() 78 | { 79 | //if(setupCL() == SUCCESS && ) 80 | if(createKernels() == SUCCESS && encodeInit(false) == SUCCESS) 81 | return true; 82 | return false; 83 | }*/ 84 | 85 | int convert(const uint8* srcPtr, cl_mem dstBuffer, bool profile); 86 | 87 | int decodeInit(); 88 | int encodeInit(cl_mem dstBuffer); 89 | int createKernels(COLORMATRIX matrix); 90 | 91 | private: 92 | //cl_platform_id platform; //OVEncode CL platform ? 93 | 94 | std::string deviceType;// = "cpu"; 95 | unsigned int num_event_in_wait_list; 96 | int iWidth; //input 97 | int iHeight; 98 | int oWidth; //output 99 | int oHeight; 100 | int oAlignedWidth; 101 | int bmpStride, mapStride; 102 | bool needsDestriding; 103 | unsigned int bpp_bytes; 104 | void *host_ptr; 105 | void *mapPtr; 106 | std::map bufferMap; 107 | int input_size; 108 | int g_output_size; 109 | cl_mem g_inputBuffer[2]; 110 | cl_mem g_outputBuffer; 111 | cl_context g_context; 112 | cl_command_queue g_cmd_queue; 113 | cl_program g_program; 114 | FILE *hRaw; 115 | OVprofile *mProf; 116 | 117 | // Kernels 118 | cl_kernel g_y_kernel; 119 | cl_kernel g_uv_kernel; 120 | size_t localThreads_Max[2];// = {1, 1}; 121 | 122 | bool g_bRunOnGPU;// = false; 123 | cl_device_id deviceID;// = 0; 124 | Logger *mLog; 125 | bool mOptimize; 126 | bool mRGB; 127 | 128 | int setupCL(); 129 | int waitForEventAndRelease(cl_event *event); 130 | void Cleanup_OpenCL(); 131 | 132 | int setKernelArgs(cl_kernel kernel, cl_mem input, cl_mem output); 133 | int setKernelOffset(cl_kernel kernel, int offset); 134 | int runKernel(cl_kernel kernel, 135 | cl_command_queue queue, 136 | size_t globalThreads[2], 137 | size_t localThreads[2], 138 | double *prof, 139 | bool wait); 140 | int profileEvent(cl_event evt, double *prof); 141 | 142 | template 143 | int checkVal( 144 | T input, 145 | T reference, 146 | std::string message, 147 | bool isAPIerror); 148 | }; 149 | 150 | 151 | #endif -------------------------------------------------------------------------------- /OpenEncodeVFW/OpenEncodeVFW.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | {785b9d67-5138-4b46-b920-f816bc3ae91f} 18 | 19 | 20 | 21 | 22 | Source Files 23 | 24 | 25 | Source Files 26 | 27 | 28 | Source Files 29 | 30 | 31 | Source Files 32 | 33 | 34 | Source Files 35 | 36 | 37 | Source Files 38 | 39 | 40 | Source Files 41 | 42 | 43 | Source Files 44 | 45 | 46 | Source Files 47 | 48 | 49 | Source Files 50 | 51 | 52 | Source Files 53 | 54 | 55 | Source Files 56 | 57 | 58 | 59 | 60 | Source Files 61 | 62 | 63 | 64 | Resource Files 65 | 66 | 67 | Resource Files 68 | 69 | 70 | 71 | 72 | Header Files 73 | 74 | 75 | Header Files 76 | 77 | 78 | Header Files 79 | 80 | 81 | Header Files 82 | 83 | 84 | Header Files 85 | 86 | 87 | Header Files 88 | 89 | 90 | Header Files 91 | 92 | 93 | Header Files 94 | 95 | 96 | Header Files 97 | 98 | 99 | Header Files 100 | 101 | 102 | Header Files 103 | 104 | 105 | Header Files 106 | 107 | 108 | Header Files 109 | 110 | 111 | Header Files\CL 112 | 113 | 114 | Header Files\CL 115 | 116 | 117 | Header Files\CL 118 | 119 | 120 | Header Files 121 | 122 | 123 | 124 | 125 | Resource Files 126 | 127 | 128 | -------------------------------------------------------------------------------- /OpenEncodeVFW/oveDynload.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "OVEncodeDyn.h" 3 | 4 | static HMODULE hMod = NULL, hModCL = NULL; 5 | static int64 refCount = 0; 6 | #define FUNDEF(x) f_##x x = nullptr 7 | #define FUNDEFDECL(x) decltype(&x) f_##x = nullptr 8 | 9 | #define LOADPROC(x) \ 10 | do {\ 11 | if ((##x = (f_##x)GetProcAddress(hMod, #x)) == NULL){\ 12 | goto error; }\ 13 | } while (0) 14 | 15 | #define LOADPROC2(x) \ 16 | do {\ 17 | if ((f_##x = (decltype(&##x))GetProcAddress(hModCL, #x)) == NULL){\ 18 | goto error; }\ 19 | } while (0) 20 | 21 | FUNDEF(OVEncodeGetDeviceInfo); 22 | FUNDEF(OVEncodeGetDeviceCap); 23 | FUNDEF(OVCreateOVEHandleFromOPHandle); 24 | FUNDEF(OVReleaseOVEHandle); 25 | FUNDEF(OVEncodeAcquireObject); 26 | FUNDEF(OVEncodeReleaseObject); 27 | FUNDEF(OVCreateOVEEventFromOPEventHandle); 28 | FUNDEF(OVEncodeReleaseOVEEventHandle); 29 | FUNDEF(OVEncodeCreateSession); 30 | FUNDEF(OVEncodeDestroySession); 31 | FUNDEF(OVEncodeGetPictureControlConfig); 32 | FUNDEF(OVEncodeGetRateControlConfig); 33 | FUNDEF(OVEncodeGetMotionEstimationConfig); 34 | FUNDEF(OVEncodeGetRDOControlConfig); 35 | FUNDEF(OVEncodeSendConfig); 36 | FUNDEF(OVEncodeTask); 37 | FUNDEF(OVEncodeQueryTaskDescription); 38 | FUNDEF(OVEncodeReleaseTask); 39 | 40 | FUNDEFDECL(clGetPlatformIDs); 41 | FUNDEFDECL(clGetPlatformInfo); 42 | FUNDEFDECL(clCreateContext); 43 | FUNDEFDECL(clReleaseContext); 44 | FUNDEFDECL(clCreateContextFromType); 45 | FUNDEFDECL(clCreateCommandQueue); 46 | FUNDEFDECL(clReleaseCommandQueue); 47 | FUNDEFDECL(clGetEventInfo); 48 | FUNDEFDECL(clCreateBuffer); 49 | FUNDEFDECL(clCreateKernel); 50 | FUNDEFDECL(clReleaseMemObject); 51 | FUNDEFDECL(clReleaseKernel); 52 | FUNDEFDECL(clReleaseProgram); 53 | FUNDEFDECL(clEnqueueMapBuffer); 54 | FUNDEFDECL(clEnqueueNDRangeKernel); 55 | FUNDEFDECL(clEnqueueUnmapMemObject); 56 | FUNDEFDECL(clReleaseEvent); 57 | FUNDEFDECL(clCreateProgramWithSource); 58 | FUNDEFDECL(clBuildProgram); 59 | FUNDEFDECL(clSetKernelArg); 60 | FUNDEFDECL(clGetDeviceInfo); 61 | FUNDEFDECL(clFlush); 62 | FUNDEFDECL(clFinish); 63 | FUNDEFDECL(clWaitForEvents); 64 | FUNDEFDECL(clGetProgramBuildInfo); 65 | FUNDEFDECL(clGetEventProfilingInfo); 66 | FUNDEFDECL(clEnqueueWriteBuffer); 67 | FUNDEFDECL(clGetDeviceIDs); 68 | FUNDEFDECL(clGetKernelWorkGroupInfo); 69 | 70 | void deinitOVE() 71 | { 72 | InterlockedDecrement64(&refCount); 73 | if (refCount > 0) 74 | return; 75 | 76 | //TODO a bit suspect 77 | if (refCount < 0) 78 | refCount = 0; 79 | 80 | if (hMod) 81 | { 82 | FreeLibrary(hMod); 83 | hMod = NULL; 84 | } 85 | 86 | if (hModCL) 87 | { 88 | FreeLibrary(hModCL); 89 | hModCL = NULL; 90 | } 91 | } 92 | 93 | bool initOVE() 94 | { 95 | InterlockedIncrement64(&refCount); 96 | if (hMod && hModCL && f_clGetKernelWorkGroupInfo) 97 | return true; 98 | 99 | #ifdef _WIN64 100 | hMod = LoadLibrary(TEXT("OpenVideo64.dll")); 101 | #else 102 | hMod = LoadLibrary(TEXT("OpenVideo.dll")); 103 | #endif 104 | hModCL = LoadLibrary(TEXT("OpenCL.dll")); 105 | 106 | if (!hMod || !hModCL) 107 | { 108 | deinitOVE(); 109 | return false; 110 | } 111 | 112 | LOADPROC(OVEncodeGetDeviceInfo); 113 | LOADPROC(OVEncodeGetDeviceCap); 114 | LOADPROC(OVCreateOVEHandleFromOPHandle); 115 | LOADPROC(OVReleaseOVEHandle); 116 | LOADPROC(OVEncodeAcquireObject); 117 | LOADPROC(OVEncodeReleaseObject); 118 | LOADPROC(OVCreateOVEEventFromOPEventHandle); 119 | LOADPROC(OVEncodeReleaseOVEEventHandle); 120 | LOADPROC(OVEncodeCreateSession); 121 | LOADPROC(OVEncodeDestroySession); 122 | LOADPROC(OVEncodeGetPictureControlConfig); 123 | LOADPROC(OVEncodeGetRateControlConfig); 124 | LOADPROC(OVEncodeGetMotionEstimationConfig); 125 | LOADPROC(OVEncodeGetRDOControlConfig); 126 | LOADPROC(OVEncodeSendConfig); 127 | LOADPROC(OVEncodeTask); 128 | LOADPROC(OVEncodeQueryTaskDescription); 129 | LOADPROC(OVEncodeReleaseTask); 130 | 131 | LOADPROC2(clGetPlatformIDs); 132 | LOADPROC2(clGetPlatformInfo); 133 | LOADPROC2(clCreateContext); 134 | LOADPROC2(clReleaseContext); 135 | LOADPROC2(clCreateContextFromType); 136 | LOADPROC2(clCreateCommandQueue); 137 | LOADPROC2(clReleaseCommandQueue); 138 | LOADPROC2(clGetEventInfo); 139 | LOADPROC2(clCreateBuffer); 140 | LOADPROC2(clCreateKernel); 141 | LOADPROC2(clReleaseMemObject); 142 | LOADPROC2(clReleaseKernel); 143 | LOADPROC2(clReleaseProgram); 144 | LOADPROC2(clEnqueueMapBuffer); 145 | LOADPROC2(clEnqueueNDRangeKernel); 146 | LOADPROC2(clEnqueueUnmapMemObject); 147 | LOADPROC2(clReleaseEvent); 148 | LOADPROC2(clCreateProgramWithSource); 149 | LOADPROC2(clBuildProgram); 150 | LOADPROC2(clSetKernelArg); 151 | LOADPROC2(clGetDeviceInfo); 152 | LOADPROC2(clFlush); 153 | LOADPROC2(clFinish); 154 | LOADPROC2(clWaitForEvents); 155 | LOADPROC2(clGetProgramBuildInfo); 156 | LOADPROC2(clGetEventProfilingInfo); 157 | LOADPROC2(clEnqueueWriteBuffer); 158 | LOADPROC2(clGetDeviceIDs); 159 | LOADPROC2(clGetKernelWorkGroupInfo); 160 | return true; 161 | 162 | error: 163 | deinitOVE(); 164 | return false; 165 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # THIS DOESN'T WORK anymore with newer Catalyst drivers (since 15.7) 2 | # OpenEncodeVFW 3 | 4 | VFW encoder for AMD VCE h264 encoder. Usable with Virtualdub, Dxtory etc. 5 | 6 | https://github.com/jackun/openencodevfw/archive/master.zip 7 | 8 | Extra settings are saved to registry under `HKCU\Software\OpenEncodeVFW` 9 | 10 | **As OpenEncode has been deprecated by AMD for a long time already, it appears the support has been finally dropped from Catalyst 15.7** 11 | 12 | **You may need to install [MSVC++ 2013 runtimes](http://www.microsoft.com/en-us/download/details.aspx?id=40784).** 13 | Last [MSVC++2010 commit](https://github.com/jackun/openencodevfw/tree/d6c7c53b61af9447b30d6d6d86be8725801d0fb7). 14 | 15 | **NOTE: You need to install x86 version for 32bit codec even if your Windows is 64 bit.** 16 | 17 | **NOTE: VCE on cards/APUs prior to Tonga only go up to 1080p and solid 1080p60 recording can not be guaranteed (yet) unfortunately.** 18 | 19 | ## Compatible hardware 20 | 21 | AMD's GCN based cards and APUs. 22 | From [AMD's blog](http://developer.amd.com/community/blog/2014/02/19/introducing-video-coding-engine-vce/): 23 | 24 | | VCE Version | Product Family | Distinguishing Features | 25 | | :---------: |:--------------:| -----------------------| 26 | | VCE 1.0 | Radeon HD 7900 series/Radeon R9 280X dGPU | First release: AVC – I,P and DEM | 27 | | | Radeon HD 7800 series dGPU | | 28 | | | Radeon R9 270X/270 dGPU | | 29 | | | Radeon HD 7700 series/Radeon R7 250X dGPU | | 30 | | | A10 – 58XX (and other variations) APU | | 31 | | | A10 – 68XX APU | | 32 | | | | | 33 | | VCE 2.0 | Radeon R9 390x/390/290x/290 dGPU | SVC (temporal) + B-pictures + DEM improvements | 34 | | | Radeon R7 260X/260 dGPU | | 35 | | | A10 – 7850K APU | | 36 | | | A4-5350, A4-3850, or E1-2650 APU | | 37 | | | A4-1200/A6-1450 APU | | 38 | | | | | 39 | | VCE 3.0 | Radeon R9 Fury/285 dGPU | 4K | 40 | 41 | 42 | ## Installing 43 | 44 | * Unpack the archive somewhere, right click on `install.bat` and click `Run as Administrator`. 45 | 46 | If it complains about missing files, try the more manual version: 47 | 48 | * Unpack the archive somewhere, open command prompt as administrator by typing `cmd` to start menu or "Metro" and press SHIFT+CTRL+Enter or right click on the icon and click `Run as Administrator`. 49 | * Go to unpacked folder by typing into opened command prompt `cd some\where\OpenEncodeVFW-bin`. 50 | * Type `install.bat` and press enter to run the installer. 51 | 52 | ## Uninstalling 53 | 54 | If uninstaller fails its job, manually remove these registry keys: 55 | 56 | HKLM\SYSTEM\CurrentControlSet\Control\MediaResources\icm\VIDC.H264 57 | HKLM\Software\Microsoft\Windows NT\CurrentVersion\drivers.desc\OPENENCODEVFW.DLL 58 | HKLM\Software\Microsoft\Windows NT\CurrentVersion\Drivers32\VIDC.H264 59 | HKLM\Software\Wow6432Node\Microsoft\Windows NT\CurrentVersion\drivers.desc\OPENENCODEVFW.DLL 60 | HKLM\Software\Wow6432Node\Microsoft\Windows NT\CurrentVersion\Drivers32\VIDC.H264 61 | 62 | and `OPENENCODEVFW.DLL` in %WINDIR%\syswow64 or %WINDIR%\system32 63 | 64 | ## Recommended usage 65 | * 32 bit input format 66 | * width/height multiples of 2 67 | 68 | ## Some setting descriptions 69 | 70 | * `Fixed QP` basically keeps picture quality constant across all frames. 71 | * `CBR` keeps constant bitrate so picture quality gets worse if there is frequently fast motion in video and bitrate is too low or wastes harddrive space if frame could have been compressed more. Seems to fluctuate too much though. 72 | * `VBR` uses variable bitrate, tries to keep in target bitrate but rises bitrate a little bit if needed or lowers if frame can be compressed more. 73 | * [CABAC](http://en.wikipedia.org/wiki/Context-adaptive_binary_arithmetic_coding) is more efficient and resource intensive encoding option. 74 | * `Search range` is motion vector range. Specifies how wide the codec looks for moved pixels so it can just say that these pixels moved to x,y and just save that. Higher (max 36?) is better and more resource intensive encoding option. 75 | * `Profiles` / `levels`: start from http://en.wikipedia.org/wiki/H.264/MPEG-4_AVC#Profiles . Colorspace is limited to Y'UV420. 76 | 77 | Probably not very accurate descriptions :P 78 | 79 | Also: 80 | 81 | * `Send FPS` sets encoder framerate properties to video framerate, but not all framerates are supported by encoder. Untick to treat all videos as having 30 fps, but this may make encoding inefficient and increase bitrate more than necessary. 82 | * `Speedy Math` tries to speed up OpenCL floating point math by making it less accurate, but should be good enough. 83 | * `Switch byte order` : for the rare case when input bitmap is RGB(A) instead of BGR(A). 84 | * `Header insertion` : adds SPS/PPS to every frame, may make cutting/splitting video easier. More of a 'debug' feature. 85 | 86 | **Quickset** buttons for speed vs quality: 87 | 88 | * `Speed` : encodes 1080p at 60+ fps (theoretical max 80+) 89 | * `Balanced` : encodes 1080p at 40+ fps 90 | * `Quality` : encodes 1080p at 30+ fps (can probably do 720p@60) 91 | 92 | 93 | With newer AMD cards (hawaii+), seem to support B-frames, though VCE may not actually generate B-frames with OpenVideo, and AVI kinda sucks with these ([see](http://guru.multimedia.cx/avi-and-b-frames/)). You may need to remux to MKV/MP4 for better audio/video sync. 94 | (Also [maybe](https://trac.ffmpeg.org/ticket/1979#comment:7) `ffmpeg -fflags +genpts`) 95 | -------------------------------------------------------------------------------- /OpenEncodeVFW/ReadMe.txt: -------------------------------------------------------------------------------- 1 | ======================================================================== 2 | OpenEncodeVFW Project Overview 3 | ======================================================================== 4 | 5 | NOTE: I'm trying to use unicode everywhere. VFW will probably hate that. 6 | 7 | Using AMD's OpenEncode sample and VFW parts from Lagarith lossless codec. 8 | 9 | ======================================================================== 10 | AMD PARTS 11 | ======================================================================== 12 | 13 | Copyright (c) 2012 Advanced Micro Devices, Inc. All rights reserved. 14 | 15 | Redistribution and use of this material is permitted under the following 16 | conditions: 17 | 18 | Redistributions must retain the above copyright notice and all terms of this 19 | license. 20 | 21 | In no event shall anyone redistributing or accessing or using this material 22 | commence or participate in any arbitration or legal action relating to this 23 | material against Advanced Micro Devices, Inc. or any copyright holders or 24 | contributors. The foregoing shall survive any expiration or termination of 25 | this license or any agreement or access or use related to this material. 26 | 27 | ANY BREACH OF ANY TERM OF THIS LICENSE SHALL RESULT IN THE IMMEDIATE REVOCATION 28 | OF ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE THIS MATERIAL. 29 | 30 | THIS MATERIAL IS PROVIDED BY ADVANCED MICRO DEVICES, INC. AND ANY COPYRIGHT 31 | HOLDERS AND CONTRIBUTORS "AS IS" IN ITS CURRENT CONDITION AND WITHOUT ANY 32 | REPRESENTATIONS, GUARANTEE, OR WARRANTY OF ANY KIND OR IN ANY WAY RELATED TO 33 | SUPPORT, INDEMNITY, ERROR FREE OR UNINTERRUPTED OPERA TION, OR THAT IT IS FREE 34 | FROM DEFECTS OR VIRUSES. ALL OBLIGATIONS ARE HEREBY DISCLAIMED - WHETHER 35 | EXPRESS, IMPLIED, OR STATUTORY - INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED 36 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, 37 | ACCURACY, COMPLETENESS, OPERABILITY, QUALITY OF SERVICE, OR NON-INFRINGEMENT. 38 | IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR 39 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, PUNITIVE, 40 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 41 | OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, REVENUE, DATA, OR PROFITS; OR 42 | BUSINESS INTERRUPTION) HOWEVER CAUSED OR BASED ON ANY THEORY OF LIABILITY 43 | ARISING IN ANY WAY RELATED TO THIS MATERIAL, EVEN IF ADVISED OF THE POSSIBILITY 44 | OF SUCH DAMAGE. THE ENTIRE AND AGGREGATE LIABILITY OF ADVANCED MICRO DEVICES, 45 | INC. AND ANY COPYRIGHT HOLDERS AND CONTRIBUTORS SHALL NOT EXCEED TEN DOLLARS 46 | (US $10.00). ANYONE REDISTRIBUTING OR ACCESSING OR USING THIS MATERIAL ACCEPTS 47 | THIS ALLOCATION OF RISK AND AGREES TO RELEASE ADVANCED MICRO DEVICES, INC. AND 48 | ANY COPYRIGHT HOLDERS AND CONTRIBUTORS FROM ANY AND ALL LIABILITIES, 49 | OBLIGATIONS, CLAIMS, OR DEMANDS IN EXCESS OF TEN DOLLARS (US $10.00). THE 50 | FOREGOING ARE ESSENTIAL TERMS OF THIS LICENSE AND, IF ANY OF THESE TERMS ARE 51 | CONSTRUED AS UNENFORCEABLE, FAIL IN ESSENTIAL PURPOSE, OR BECOME VOID OR 52 | DETRIMENTAL TO ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR 53 | CONTRIBUTORS FOR ANY REASON, THEN ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE 54 | THIS MATERIAL SHALL TERMINATE IMMEDIATELY. MOREOVER, THE FOREGOING SHALL 55 | SURVIVE ANY EXPIRATION OR TERMINATION OF THIS LICENSE OR ANY AGREEMENT OR 56 | ACCESS OR USE RELATED TO THIS MATERIAL. 57 | 58 | NOTICE IS HEREBY PROVIDED, AND BY REDISTRIBUTING OR ACCESSING OR USING THIS 59 | MATERIAL SUCH NOTICE IS ACKNOWLEDGED, THAT THIS MATERIAL MAY BE SUBJECT TO 60 | RESTRICTIONS UNDER THE LAWS AND REGULATIONS OF THE UNITED STATES OR OTHER 61 | COUNTRIES, WHICH INCLUDE BUT ARE NOT LIMITED TO, U.S. EXPORT CONTROL LAWS SUCH 62 | AS THE EXPORT ADMINISTRATION REGULATIONS AND NATIONAL SECURITY CONTROLS AS 63 | DEFINED THEREUNDER, AS WELL AS STATE DEPARTMENT CONTROLS UNDER THE U.S. 64 | MUNITIONS LIST. THIS MATERIAL MAY NOT BE USED, RELEASED, TRANSFERRED, IMPORTED, 65 | EXPORTED AND/OR RE-EXPORTED IN ANY MANNER PROHIBITED UNDER ANY APPLICABLE LAWS, 66 | INCLUDING U.S. EXPORT CONTROL LAWS REGARDING SPECIFICALLY DESIGNATED PERSONS, 67 | COUNTRIES AND NATIONALS OF COUNTRIES SUBJECT TO NATIONAL SECURITY CONTROLS. 68 | MOREOVER, THE FOREGOING SHALL SURVIVE ANY EXPIRATION OR TERMINATION OF ANY 69 | LICENSE OR AGREEMENT OR ACCESS OR USE RELATED TO THIS MATERIAL. 70 | 71 | NOTICE REGARDING THE U.S. GOVERNMENT AND DOD AGENCIES: This material is 72 | provided with "RESTRICTED RIGHTS" and/or "LIMITED RIGHTS" as applicable to 73 | computer software and technical data, respectively. Use, duplication, 74 | distribution or disclosure by the U.S. Government and/or DOD agencies is 75 | subject to the full extent of restrictions in all applicable regulations, 76 | including those found at FAR52.227 and DFARS252.227 et seq. and any successor 77 | regulations thereof. Use of this material by the U.S. Government and/or DOD 78 | agencies is acknowledgment of the proprietary rights of any copyright holders 79 | and contributors, including those of Advanced Micro Devices, Inc., as well as 80 | the provisions of FAR52.227-14 through 23 regarding privately developed and/or 81 | commercial computer software. 82 | 83 | This license forms the entire agreement regarding the subject matter hereof and 84 | supersedes all proposals and prior discussions and writings between the parties 85 | with respect thereto. This license does not affect any ownership, rights, title, 86 | or interest in, or relating to, this material. No terms of this license can be 87 | modified or waived, and no breach of this license can be excused, unless done 88 | so in a writing signed by all affected parties. Each term of this license is 89 | separately enforceable. If any term of this license is determined to be or 90 | becomes unenforceable or illegal, such term shall be reformed to the minimum 91 | extent necessary in order for this license to remain in effect in accordance 92 | with its terms as modified by such reformation. This license shall be governed 93 | by and construed in accordance with the laws of the State of Texas without 94 | regard to rules on conflicts of law of any state or jurisdiction or the United 95 | Nations Convention on the International Sale of Goods. All disputes arising out 96 | of this license shall be subject to the jurisdiction of the federal and state 97 | courts in Austin, Texas, and all defenses are hereby waived concerning personal 98 | jurisdiction and venue of these courts. -------------------------------------------------------------------------------- /OpenEncodeVFW/drvproc.cpp: -------------------------------------------------------------------------------- 1 | // Based off of Lagarith which is based off of Ben Rudiak-Gould's huffyuv source code 2 | 3 | #include "stdafx.h" 4 | #include "OpenEncodeVFW.h" 5 | 6 | /*************************************************************************** 7 | * DriverProc - The entry point for an installable driver. 8 | * 9 | * PARAMETERS 10 | * dwDriverId: For most messages, is the DWORD 11 | * value that the driver returns in response to a message. 12 | * Each time that the driver is opened, through the API, 13 | * the driver receives a message and can return an 14 | * arbitrary, non-zero value. The installable driver interface 15 | * saves this value and returns a unique driver handle to the 16 | * application. Whenever the application sends a message to the 17 | * driver using the driver handle, the interface routes the message 18 | * to this entry point and passes the corresponding . 19 | * This mechanism allows the driver to use the same or different 20 | * identifiers for multiple opens but ensures that driver handles 21 | * are unique at the application interface layer. 22 | * 23 | * The following messages are not related to a particular open 24 | * instance of the driver. For these messages, the dwDriverId 25 | * will always be zero. 26 | * 27 | * DRV_LOAD, DRV_FREE, DRV_ENABLE, DRV_DISABLE, DRV_OPEN 28 | * 29 | * hDriver: This is the handle returned to the application by the 30 | * driver interface. 31 | * 32 | * uiMessage: The requested action to be performed. Message 33 | * values below are used for globally defined messages. 34 | * Message values from to are used for 35 | * defined driver protocols. Messages above are used 36 | * for driver specific messages. 37 | * 38 | * lParam1: Data for this message. Defined separately for 39 | * each message 40 | * 41 | * lParam2: Data for this message. Defined separately for 42 | * each message 43 | * 44 | * RETURNS 45 | * Defined separately for each message. 46 | * 47 | ***************************************************************************/ 48 | LRESULT OPENENCODEVFW_API PASCAL DriverProc(DWORD_PTR dwDriverID, HDRVR hDriver, UINT uiMessage, LPARAM lParam1, LPARAM lParam2) { 49 | CodecInst* pi = (CodecInst*)dwDriverID; 50 | switch (uiMessage) { 51 | case DRV_LOAD: 52 | return (LRESULT)1L; 53 | 54 | case DRV_FREE: 55 | return (LRESULT)1L; 56 | 57 | case DRV_OPEN: 58 | return (LRESULT)Open((ICOPEN*) lParam2); 59 | 60 | case DRV_CLOSE: 61 | if (pi) Close(pi); 62 | return (LRESULT)1L; 63 | 64 | /********************************************************************* 65 | 66 | state messages 67 | 68 | *********************************************************************/ 69 | 70 | // cwk 71 | case DRV_QUERYCONFIGURE: // configuration from drivers applet 72 | return (LRESULT)1L; 73 | 74 | case DRV_CONFIGURE: 75 | pi->Configure((HWND)lParam1); 76 | return DRV_OK; 77 | 78 | case ICM_CONFIGURE: 79 | // 80 | // return ICERR_OK if you will do a configure box, error otherwise 81 | // 82 | if (lParam1 == -1) 83 | return ICERR_OK; 84 | else 85 | return pi->Configure((HWND)lParam1); 86 | 87 | case ICM_ABOUT: 88 | return ICERR_UNSUPPORTED; 89 | 90 | case ICM_GETSTATE: 91 | return pi->GetState((LPVOID)lParam1, (DWORD)lParam2); 92 | 93 | case ICM_SETSTATE: 94 | return pi->SetState((LPVOID)lParam1, (DWORD)lParam2); 95 | 96 | case ICM_GETINFO: 97 | return pi->GetInfo((ICINFO*)lParam1, (DWORD)lParam2); 98 | 99 | case ICM_GETDEFAULTQUALITY: 100 | if (lParam1) { 101 | *((LPDWORD)lParam1) = 10000; 102 | return ICERR_OK; 103 | } 104 | break; 105 | 106 | /********************************************************************* 107 | 108 | compression messages 109 | 110 | *********************************************************************/ 111 | 112 | case ICM_COMPRESS_QUERY: 113 | return pi->CompressQuery((LPBITMAPINFOHEADER)lParam1, (LPBITMAPINFOHEADER)lParam2); 114 | 115 | case ICM_COMPRESS_BEGIN: 116 | return pi->CompressBegin((LPBITMAPINFOHEADER)lParam1, (LPBITMAPINFOHEADER)lParam2); 117 | 118 | case ICM_COMPRESS_GET_FORMAT: 119 | return pi->CompressGetFormat((LPBITMAPINFOHEADER)lParam1, (LPBITMAPINFOHEADER)lParam2); 120 | 121 | case ICM_COMPRESS_GET_SIZE: 122 | return pi->CompressGetSize((LPBITMAPINFOHEADER)lParam1, (LPBITMAPINFOHEADER)lParam2); 123 | 124 | case ICM_COMPRESS: 125 | return pi->Compress((ICCOMPRESS*)lParam1, (DWORD)lParam2); 126 | 127 | case ICM_COMPRESS_END: 128 | return pi->CompressEnd(); 129 | 130 | case ICM_COMPRESS_FRAMES_INFO: 131 | return pi->CompressFramesInfo((ICCOMPRESSFRAMES *)lParam1); 132 | 133 | /********************************************************************* 134 | 135 | decompress messages 136 | 137 | *********************************************************************/ 138 | 139 | /*case ICM_DECOMPRESS_QUERY: 140 | 141 | return pi->DecompressQuery((LPBITMAPINFOHEADER)lParam1, (LPBITMAPINFOHEADER)lParam2); 142 | 143 | case ICM_DECOMPRESS_BEGIN: 144 | return pi->DecompressBegin((LPBITMAPINFOHEADER)lParam1, (LPBITMAPINFOHEADER)lParam2); 145 | 146 | case ICM_DECOMPRESS_GET_FORMAT: 147 | return pi->DecompressGetFormat((LPBITMAPINFOHEADER)lParam1, (LPBITMAPINFOHEADER)lParam2); 148 | 149 | case ICM_DECOMPRESS_GET_PALETTE: 150 | return pi->DecompressGetPalette((LPBITMAPINFOHEADER)lParam1, (LPBITMAPINFOHEADER)lParam2); 151 | 152 | case ICM_DECOMPRESS: 153 | return pi->Decompress((ICDECOMPRESS*)lParam1, (DWORD)lParam2); 154 | 155 | case ICM_DECOMPRESS_END: 156 | return pi->DecompressEnd();*/ 157 | 158 | /********************************************************************* 159 | 160 | standard driver messages 161 | 162 | *********************************************************************/ 163 | 164 | case DRV_DISABLE: 165 | case DRV_ENABLE: 166 | return (LRESULT)1L; 167 | 168 | case DRV_INSTALL: 169 | case DRV_REMOVE: 170 | return (LRESULT)DRV_OK; 171 | } 172 | 173 | if (uiMessage < DRV_USER) 174 | return DefDriverProc(dwDriverID, hDriver, uiMessage, lParam1, lParam2); 175 | return ICERR_UNSUPPORTED; 176 | 177 | } -------------------------------------------------------------------------------- /OpenEncodeVFW/OpenEncodeVFW.rc: -------------------------------------------------------------------------------- 1 | // Microsoft Visual C++ generated resource script. 2 | // 3 | #include "resource.h" 4 | 5 | #define APSTUDIO_READONLY_SYMBOLS 6 | ///////////////////////////////////////////////////////////////////////////// 7 | // 8 | // Generated from the TEXTINCLUDE 2 resource. 9 | // 10 | #include "afxres.h" 11 | 12 | ///////////////////////////////////////////////////////////////////////////// 13 | #undef APSTUDIO_READONLY_SYMBOLS 14 | 15 | ///////////////////////////////////////////////////////////////////////////// 16 | // English (United States) resources 17 | 18 | #if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) 19 | LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US 20 | #pragma code_page(1252) 21 | 22 | ///////////////////////////////////////////////////////////////////////////// 23 | // 24 | // Dialog 25 | // 26 | 27 | IDD_DIALOG1 DIALOGEX 0, 0, 334, 290 28 | STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU 29 | CAPTION "OpenEncodeVFW Configuration" 30 | FONT 8, "MS Shell Dlg", 400, 0, 0x1 31 | BEGIN 32 | DEFPUSHBUTTON "OK",IDC_OK,222,270,50,14 33 | PUSHBUTTON "Cancel",IDC_CANCEL,276,270,50,14 34 | CONTROL IDB_BITMAP1,IDC_OPENCL_LOGO,"Static",SS_BITMAP,274,219,48,44 35 | LTEXT "",IDC_BUILD_DATE,6,270,186,8 36 | LTEXT "1",IDC_RC_LOW_LABEL,12,60,96,8 37 | RTEXT "20000",IDC_RC_HIGH_LABEL,210,60,111,8 38 | LTEXT "Bitrate (kbit/s)",IDC_RC_LABEL,12,24,144,8 39 | COMBOBOX IDC_RC_MODE,6,6,162,78,CBS_DROPDOWNLIST | CBS_SORT | WS_VSCROLL | WS_TABSTOP 40 | CONTROL "",IDC_RC_VAL_SLIDER,"msctls_trackbar32",TBS_BOTH | TBS_NOTICKS | WS_TABSTOP,6,42,318,18 41 | EDITTEXT IDC_RC_VAL,234,24,90,12,ES_AUTOHSCROLL | ES_NUMBER 42 | COMBOBOX IDC_DEVICE_CB,234,6,90,30,CBS_DROPDOWNLIST | CBS_SORT | WS_VSCROLL | WS_TABSTOP 43 | GROUPBOX "Profile",IDC_PROFILE,6,72,72,54 44 | CONTROL "Baseline",IDC_PROF_BASE,"Button",BS_AUTORADIOBUTTON | WS_GROUP | WS_TABSTOP,12,84,54,10 45 | CONTROL "Mainline",IDC_PROF_MAIN,"Button",BS_AUTORADIOBUTTON | WS_TABSTOP,12,96,54,10 46 | CONTROL "High",IDC_PROF_HIGH,"Button",BS_AUTORADIOBUTTON,12,108,54,10 47 | GROUPBOX "Level",IDC_STATIC,6,132,72,30 48 | COMBOBOX IDC_LEVEL_CB,12,144,60,69,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP 49 | GROUPBOX "Force key / GOP",IDC_STATIC,6,162,72,30 50 | EDITTEXT IDC_IDR,12,174,30,12,ES_AUTOHSCROLL | ES_NUMBER 51 | EDITTEXT IDC_GOP,48,174,24,12,ES_AUTOHSCROLL | ES_NUMBER 52 | GROUPBOX "Search Range X,Y ",IDC_STATIC,84,72,84,30 53 | EDITTEXT IDC_SEARCHRX,90,85,30,12,ES_AUTOHSCROLL | ES_NUMBER 54 | EDITTEXT IDC_SEARCHRY,132,85,30,12,ES_AUTOHSCROLL | ES_NUMBER 55 | GROUPBOX "Quick set",IDC_STATIC,174,72,150,30 56 | PUSHBUTTON "Speed",IDC_QS_SPEED,180,84,42,14 57 | PUSHBUTTON "Balanced",IDC_QS_BALANCED,228,84,42,14 58 | PUSHBUTTON "Quality",IDC_QS_QUALITY,276,84,42,14 59 | GROUPBOX "Misc",IDC_STATIC,84,108,240,84 60 | CONTROL "Log (saved to current working dir)",IDC_LOG,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,90,120,125,10 61 | CONTROL "CABAC",IDC_CABAC,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,90,132,39,10 62 | CONTROL "Force 16x16 MV skip",IDC_SKIP_MV16,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,132,132,84,10 63 | CONTROL "Forward ME adv. mode decision",IDC_USE_ME_AMD,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,90,144,120,10 64 | CONTROL "Show message box if fatal error",IDC_LOG2,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,90,156,118,10 65 | CONTROL "Crop",IDC_CROPH,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,90,168,31,10 66 | CONTROL "Send frame rate to VCE",IDC_FRAMERATE,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,222,120,91,10 67 | CONTROL "Switch byte order",IDC_CS_RGBA,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,222,133,73,10 68 | CONTROL "Treat YV12 as NV12",IDC_YV12ASNV12,"Button",BS_AUTOCHECKBOX | NOT WS_VISIBLE | WS_DISABLED | WS_TABSTOP,222,144,78,10 69 | CONTROL "Header insertion",IDC_HDRINSERTION,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,222,156,69,10 70 | GROUPBOX "OpenCL",IDC_STATIC,6,198,318,66 71 | CONTROL "Use for NV12 conversion",IDC_USE_OPENCL,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,12,210,95,10 72 | CONTROL "Run on CPU (slow, unsupported)",IDC_USE_OPENCL2,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,18,223,121,10 73 | CONTROL "Profile kernels (check logging too)",IDC_USE_OPENCL3, 74 | "Button",BS_AUTOCHECKBOX | WS_TABSTOP,18,234,123,10 75 | CONTROL "Speedy math",IDC_SPEEDY_MATH,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,18,247,58,10 76 | GROUPBOX "Color matrix",IDC_STATIC,144,204,128,30 77 | COMBOBOX IDC_COLORMATRIX,145,216,124,69,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP 78 | END 79 | 80 | 81 | ///////////////////////////////////////////////////////////////////////////// 82 | // 83 | // DESIGNINFO 84 | // 85 | 86 | #ifdef APSTUDIO_INVOKED 87 | GUIDELINES DESIGNINFO 88 | BEGIN 89 | IDD_DIALOG1, DIALOG 90 | BEGIN 91 | LEFTMARGIN, 7 92 | RIGHTMARGIN, 327 93 | TOPMARGIN, 7 94 | BOTTOMMARGIN, 283 95 | END 96 | END 97 | #endif // APSTUDIO_INVOKED 98 | 99 | 100 | ///////////////////////////////////////////////////////////////////////////// 101 | // 102 | // STRING 103 | // 104 | 105 | IDR_OPENCL_KERNELS STRING "NV12_kernels.cl" 106 | 107 | #ifdef APSTUDIO_INVOKED 108 | ///////////////////////////////////////////////////////////////////////////// 109 | // 110 | // TEXTINCLUDE 111 | // 112 | 113 | 1 TEXTINCLUDE 114 | BEGIN 115 | "resource.h\0" 116 | END 117 | 118 | 2 TEXTINCLUDE 119 | BEGIN 120 | "#include ""afxres.h""\r\n" 121 | "\0" 122 | END 123 | 124 | 3 TEXTINCLUDE 125 | BEGIN 126 | "\r\n" 127 | "\0" 128 | END 129 | 130 | #endif // APSTUDIO_INVOKED 131 | 132 | 133 | ///////////////////////////////////////////////////////////////////////////// 134 | // 135 | // Bitmap 136 | // 137 | 138 | IDB_BITMAP1 BITMAP "OpenCL_Logo.bmp" 139 | #endif // English (United States) resources 140 | ///////////////////////////////////////////////////////////////////////////// 141 | 142 | 143 | 144 | #ifndef APSTUDIO_INVOKED 145 | ///////////////////////////////////////////////////////////////////////////// 146 | // 147 | // Generated from the TEXTINCLUDE 3 resource. 148 | // 149 | 150 | 151 | ///////////////////////////////////////////////////////////////////////////// 152 | #endif // not APSTUDIO_INVOKED 153 | 154 | -------------------------------------------------------------------------------- /OpenEncodeVFW/OpenEncodeVFW.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 15 | 16 | 17 | 18 | 19 | 26 | 29 | 32 | 35 | 38 | 41 | 52 | 55 | 58 | 61 | 70 | 73 | 76 | 79 | 82 | 85 | 88 | 92 | 93 | 101 | 104 | 107 | 110 | 113 | 116 | 127 | 130 | 133 | 136 | 147 | 150 | 153 | 156 | 159 | 162 | 165 | 169 | 170 | 171 | 172 | 173 | 174 | 179 | 182 | 183 | 186 | 187 | 190 | 193 | 198 | 199 | 202 | 207 | 208 | 209 | 212 | 213 | 216 | 217 | 220 | 221 | 224 | 227 | 231 | 232 | 235 | 239 | 240 | 241 | 242 | 247 | 250 | 251 | 254 | 255 | 258 | 259 | 262 | 263 | 264 | 269 | 272 | 273 | 274 | 277 | 278 | 279 | 280 | 281 | 282 | -------------------------------------------------------------------------------- /OpenEncodeVFW/OvEncodeTypedef.h: -------------------------------------------------------------------------------- 1 | 2 | /* ============================================================ 3 | 4 | Copyright (c) 2012 Advanced Micro Devices, Inc. All rights reserved. 5 | 6 | Redistribution and use of this material is permitted under the following 7 | conditions: 8 | 9 | Redistributions must retain the above copyright notice and all terms of this 10 | license. 11 | 12 | In no event shall anyone redistributing or accessing or using this material 13 | commence or participate in any arbitration or legal action relating to this 14 | material against Advanced Micro Devices, Inc. or any copyright holders or 15 | contributors. The foregoing shall survive any expiration or termination of 16 | this license or any agreement or access or use related to this material. 17 | 18 | ANY BREACH OF ANY TERM OF THIS LICENSE SHALL RESULT IN THE IMMEDIATE REVOCATION 19 | OF ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE THIS MATERIAL. 20 | 21 | THIS MATERIAL IS PROVIDED BY ADVANCED MICRO DEVICES, INC. AND ANY COPYRIGHT 22 | HOLDERS AND CONTRIBUTORS "AS IS" IN ITS CURRENT CONDITION AND WITHOUT ANY 23 | REPRESENTATIONS, GUARANTEE, OR WARRANTY OF ANY KIND OR IN ANY WAY RELATED TO 24 | SUPPORT, INDEMNITY, ERROR FREE OR UNINTERRUPTED OPERA TION, OR THAT IT IS FREE 25 | FROM DEFECTS OR VIRUSES. ALL OBLIGATIONS ARE HEREBY DISCLAIMED - WHETHER 26 | EXPRESS, IMPLIED, OR STATUTORY - INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED 27 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, 28 | ACCURACY, COMPLETENESS, OPERABILITY, QUALITY OF SERVICE, OR NON-INFRINGEMENT. 29 | IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR 30 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, PUNITIVE, 31 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 32 | OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, REVENUE, DATA, OR PROFITS; OR 33 | BUSINESS INTERRUPTION) HOWEVER CAUSED OR BASED ON ANY THEORY OF LIABILITY 34 | ARISING IN ANY WAY RELATED TO THIS MATERIAL, EVEN IF ADVISED OF THE POSSIBILITY 35 | OF SUCH DAMAGE. THE ENTIRE AND AGGREGATE LIABILITY OF ADVANCED MICRO DEVICES, 36 | INC. AND ANY COPYRIGHT HOLDERS AND CONTRIBUTORS SHALL NOT EXCEED TEN DOLLARS 37 | (US $10.00). ANYONE REDISTRIBUTING OR ACCESSING OR USING THIS MATERIAL ACCEPTS 38 | THIS ALLOCATION OF RISK AND AGREES TO RELEASE ADVANCED MICRO DEVICES, INC. AND 39 | ANY COPYRIGHT HOLDERS AND CONTRIBUTORS FROM ANY AND ALL LIABILITIES, 40 | OBLIGATIONS, CLAIMS, OR DEMANDS IN EXCESS OF TEN DOLLARS (US $10.00). THE 41 | FOREGOING ARE ESSENTIAL TERMS OF THIS LICENSE AND, IF ANY OF THESE TERMS ARE 42 | CONSTRUED AS UNENFORCEABLE, FAIL IN ESSENTIAL PURPOSE, OR BECOME VOID OR 43 | DETRIMENTAL TO ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR 44 | CONTRIBUTORS FOR ANY REASON, THEN ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE 45 | THIS MATERIAL SHALL TERMINATE IMMEDIATELY. MOREOVER, THE FOREGOING SHALL 46 | SURVIVE ANY EXPIRATION OR TERMINATION OF THIS LICENSE OR ANY AGREEMENT OR 47 | ACCESS OR USE RELATED TO THIS MATERIAL. 48 | 49 | NOTICE IS HEREBY PROVIDED, AND BY REDISTRIBUTING OR ACCESSING OR USING THIS 50 | MATERIAL SUCH NOTICE IS ACKNOWLEDGED, THAT THIS MATERIAL MAY BE SUBJECT TO 51 | RESTRICTIONS UNDER THE LAWS AND REGULATIONS OF THE UNITED STATES OR OTHER 52 | COUNTRIES, WHICH INCLUDE BUT ARE NOT LIMITED TO, U.S. EXPORT CONTROL LAWS SUCH 53 | AS THE EXPORT ADMINISTRATION REGULATIONS AND NATIONAL SECURITY CONTROLS AS 54 | DEFINED THEREUNDER, AS WELL AS STATE DEPARTMENT CONTROLS UNDER THE U.S. 55 | MUNITIONS LIST. THIS MATERIAL MAY NOT BE USED, RELEASED, TRANSFERRED, IMPORTED, 56 | EXPORTED AND/OR RE-EXPORTED IN ANY MANNER PROHIBITED UNDER ANY APPLICABLE LAWS, 57 | INCLUDING U.S. EXPORT CONTROL LAWS REGARDING SPECIFICALLY DESIGNATED PERSONS, 58 | COUNTRIES AND NATIONALS OF COUNTRIES SUBJECT TO NATIONAL SECURITY CONTROLS. 59 | MOREOVER, THE FOREGOING SHALL SURVIVE ANY EXPIRATION OR TERMINATION OF ANY 60 | LICENSE OR AGREEMENT OR ACCESS OR USE RELATED TO THIS MATERIAL. 61 | 62 | NOTICE REGARDING THE U.S. GOVERNMENT AND DOD AGENCIES: This material is 63 | provided with "RESTRICTED RIGHTS" and/or "LIMITED RIGHTS" as applicable to 64 | computer software and technical data, respectively. Use, duplication, 65 | distribution or disclosure by the U.S. Government and/or DOD agencies is 66 | subject to the full extent of restrictions in all applicable regulations, 67 | including those found at FAR52.227 and DFARS252.227 et seq. and any successor 68 | regulations thereof. Use of this material by the U.S. Government and/or DOD 69 | agencies is acknowledgment of the proprietary rights of any copyright holders 70 | and contributors, including those of Advanced Micro Devices, Inc., as well as 71 | the provisions of FAR52.227-14 through 23 regarding privately developed and/or 72 | commercial computer software. 73 | 74 | This license forms the entire agreement regarding the subject matter hereof and 75 | supersedes all proposals and prior discussions and writings between the parties 76 | with respect thereto. This license does not affect any ownership, rights, title, 77 | or interest in, or relating to, this material. No terms of this license can be 78 | modified or waived, and no breach of this license can be excused, unless done 79 | so in a writing signed by all affected parties. Each term of this license is 80 | separately enforceable. If any term of this license is determined to be or 81 | becomes unenforceable or illegal, such term shall be reformed to the minimum 82 | extent necessary in order for this license to remain in effect in accordance 83 | with its terms as modified by such reformation. This license shall be governed 84 | by and construed in accordance with the laws of the State of Texas without 85 | regard to rules on conflicts of law of any state or jurisdiction or the United 86 | Nations Convention on the International Sale of Goods. All disputes arising out 87 | of this license shall be subject to the jurisdiction of the federal and state 88 | courts in Austin, Texas, and all defenses are hereby waived concerning personal 89 | jurisdiction and venue of these courts. 90 | 91 | ============================================================ */ 92 | 93 | /** 94 | ******************************************************************************** 95 | * @file 96 | * 97 | * @brief Contains typedefs for data types 98 | * 99 | ******************************************************************************** 100 | */ 101 | #ifndef _OVENCODE_TYPEDEF_H_ 102 | #define _OVENCODE_TYPEDEF_H_ 103 | 104 | /******************************************************************************/ 105 | /* Define sized-based typedefs up to 32-bits. */ 106 | /******************************************************************************/ 107 | typedef char int8; 108 | typedef signed char sint8; 109 | typedef unsigned char uint8; 110 | 111 | typedef signed short int16; 112 | typedef unsigned short uint16; 113 | 114 | typedef signed int int32; 115 | typedef unsigned int uint32; 116 | 117 | typedef float float32; 118 | typedef double float64; 119 | 120 | 121 | /******************************************************************************/ 122 | /* Define 64-bit typedefs, depending on the compiler and operating system. */ 123 | /******************************************************************************/ 124 | #ifdef __GNUC__ 125 | typedef long long int64; 126 | typedef unsigned long long uint64; 127 | 128 | #else /* not __GNUC__ */ 129 | #ifdef _WIN32 130 | typedef __int64 int64; 131 | typedef unsigned __int64 uint64; 132 | 133 | #else /* not _WIN32 */ 134 | #error Unsupported compiler and/or operating system 135 | #endif /* end ifdef _WIN32 */ 136 | 137 | #endif /* end ifdef __GNUC__ */ 138 | 139 | /******************************************************************************/ 140 | /* Define other generic typedefs. */ 141 | /******************************************************************************/ 142 | typedef unsigned long ulong; 143 | 144 | /******************************************************************************/ 145 | /* End of _STANDARD_TYPEDEFS_DEFINED_ */ 146 | /******************************************************************************/ 147 | #endif -------------------------------------------------------------------------------- /OpenEncodeVFW/OVEncodeDyn.h: -------------------------------------------------------------------------------- 1 | /* ============================================================ 2 | 3 | New sdk seems to not include openvideo headers and libs 4 | so use GetProc...yaddayadda. 5 | Based on OVEncode.h from APP SDK 2.8. 6 | Copyright (c) 2011 Advanced Micro Devices, Inc. 7 | 8 | ============================================================ */ 9 | 10 | #ifndef __OVENCODEDYN_H__ 11 | #define __OVENCODEDYN_H__ 12 | 13 | #ifndef OPENVIDEOAPI 14 | #define OPENVIDEOAPI __stdcall 15 | #endif // OPENVIDEOAPI 16 | 17 | #include "OVEncodeTypes.h" 18 | 19 | typedef OVresult(OPENVIDEOAPI *f_OVEncodeGetDeviceInfo) ( 20 | unsigned int *num_device, 21 | ovencode_device_info *device_info); 22 | extern f_OVEncodeGetDeviceInfo OVEncodeGetDeviceInfo; 23 | 24 | /* 25 | * This function is used by application to query the encoder capability that includes 26 | * codec information and format that the device can support. 27 | */ 28 | typedef OVresult (OPENVIDEOAPI *f_OVEncodeGetDeviceCap) ( 29 | OPContextHandle platform_context, 30 | unsigned int device_id, 31 | unsigned int encode_cap_list_size, 32 | unsigned int *num_encode_cap, 33 | OVE_ENCODE_CAPS *encode_cap_list); 34 | extern f_OVEncodeGetDeviceCap OVEncodeGetDeviceCap; 35 | 36 | /* 37 | * This function is used by the application to create the encode handle from the 38 | * platform memory handle. The encode handle can be used in the OVEncodePicture 39 | * function as the output encode buffer. The application can create multiple 40 | * output buffers to queue up the decode job. 41 | */ 42 | typedef ove_handle (OPENVIDEOAPI *f_OVCreateOVEHandleFromOPHandle) ( 43 | OPMemHandle platform_memhandle); 44 | extern f_OVCreateOVEHandleFromOPHandle OVCreateOVEHandleFromOPHandle; 45 | 46 | /* 47 | * This function is used by the application to release the encode handle. 48 | * After release, the handle is invalid and should not be used for encode picture. 49 | */ 50 | typedef OVresult (OPENVIDEOAPI *f_OVReleaseOVEHandle)(ove_handle encode_handle); 51 | extern f_OVReleaseOVEHandle OVReleaseOVEHandle; 52 | 53 | /* 54 | * This function is used by the application to acquire the memory objects that 55 | * have been created from OpenCL. These objects need to be acquired before they 56 | * can be used by the decode function. 57 | */ 58 | 59 | typedef OVresult (OPENVIDEOAPI *f_OVEncodeAcquireObject) ( 60 | ove_session session, 61 | unsigned int num_handle, 62 | ove_handle *encode_handle, 63 | unsigned int num_event_in_wait_list, 64 | OPEventHandle *event_wait_list, 65 | OPEventHandle *event); 66 | extern f_OVEncodeAcquireObject OVEncodeAcquireObject; 67 | 68 | /* 69 | * This function is used by the application to release the memory objects that 70 | * have been created from OpenCL. The objects need to be released before they 71 | * can be used by OpenCL. 72 | */ 73 | typedef OVresult (OPENVIDEOAPI *f_OVEncodeReleaseObject) ( 74 | ove_session session, 75 | unsigned int num_handle, 76 | ove_handle *encode_handle, 77 | unsigned int num_event_in_wait_list, 78 | OPEventHandle *event_wait_list, 79 | OPEventHandle *event); 80 | extern f_OVEncodeReleaseObject OVEncodeReleaseObject; 81 | 82 | typedef ove_event (OPENVIDEOAPI *f_OVCreateOVEEventFromOPEventHandle) ( 83 | OPEventHandle platform_eventhandle); 84 | extern f_OVCreateOVEEventFromOPEventHandle OVCreateOVEEventFromOPEventHandle; 85 | 86 | /* 87 | * This function is used by the application to release the encode event handle. 88 | * After release, the event handle is invalid and should not be used. 89 | */ 90 | typedef OVresult (OPENVIDEOAPI *f_OVEncodeReleaseOVEEventHandle) ( 91 | ove_event ove_ev); 92 | extern f_OVEncodeReleaseOVEEventHandle OVEncodeReleaseOVEEventHandle; 93 | 94 | 95 | /* 96 | * This function is used by the application to create the encode session for 97 | * each encoding stream. After the session creation, the encoder is ready to 98 | * accept the encode picture job from the application. For multiple streams 99 | * encoding, the application can create multiple sessions within the same 100 | * platform context and the application is responsible to manage the input and 101 | * output buffers for each corresponding session. 102 | */ 103 | typedef ove_session (OPENVIDEOAPI *f_OVEncodeCreateSession) ( 104 | OPContextHandle platform_context, 105 | unsigned int device_id, 106 | OVE_ENCODE_MODE encode_mode, 107 | OVE_PROFILE_LEVEL encode_profile, 108 | OVE_PICTURE_FORMAT encode_format, 109 | unsigned int encode_width, 110 | unsigned int encode_height, 111 | OVE_ENCODE_TASK_PRIORITY encode_task_priority); 112 | extern f_OVEncodeCreateSession OVEncodeCreateSession; 113 | 114 | /* 115 | * This function is used by the application to destroy the encode session. Destroying a 116 | * session will release all associated hardware resources. No further decoding work 117 | * can be performed with the session after it is destroyed. 118 | */ 119 | typedef OVresult (OPENVIDEOAPI *f_OVEncodeDestroySession) ( 120 | ove_session session); 121 | extern f_OVEncodeDestroySession OVEncodeDestroySession; 122 | 123 | // Retrieve one configuration data structure 124 | typedef OVresult (OPENVIDEOAPI *f_OVEncodeGetPictureControlConfig) ( 125 | ove_session session, 126 | OVE_CONFIG_PICTURE_CONTROL *pPictureControlConfig); 127 | extern f_OVEncodeGetPictureControlConfig OVEncodeGetPictureControlConfig; 128 | 129 | // Get current rate control configuration 130 | typedef OVresult (OPENVIDEOAPI *f_OVEncodeGetRateControlConfig) ( 131 | ove_session session, 132 | OVE_CONFIG_RATE_CONTROL *pRateControlConfig); 133 | extern f_OVEncodeGetRateControlConfig OVEncodeGetRateControlConfig; 134 | 135 | // Get current motion estimation configuration 136 | typedef OVresult (OPENVIDEOAPI *f_OVEncodeGetMotionEstimationConfig) ( 137 | ove_session session, 138 | OVE_CONFIG_MOTION_ESTIMATION *pMotionEstimationConfig); 139 | extern f_OVEncodeGetMotionEstimationConfig OVEncodeGetMotionEstimationConfig; 140 | 141 | // Get current RDO configuration 142 | typedef OVresult (OPENVIDEOAPI *f_OVEncodeGetRDOControlConfig) ( 143 | ove_session session, 144 | OVE_CONFIG_RDO *pRDOConfig); 145 | extern f_OVEncodeGetRDOControlConfig OVEncodeGetRDOControlConfig; 146 | 147 | typedef OVresult (OPENVIDEOAPI *f_OVEncodeSendConfig) ( 148 | ove_session session, 149 | unsigned int num_of_config_buffers, 150 | OVE_CONFIG *pConfigBuffers); 151 | extern f_OVEncodeSendConfig OVEncodeSendConfig; 152 | 153 | // Fully encode a single picture 154 | typedef OVresult (OPENVIDEOAPI *f_OVEncodeTask) ( 155 | ove_session session, 156 | unsigned int number_of_encode_task_input_buffers, 157 | OVE_INPUT_DESCRIPTION *encode_task_input_buffers_list, 158 | void *picture_parameter, 159 | unsigned int *pTaskID, 160 | unsigned int num_event_in_wait_list, 161 | ove_event *event_wait_list, 162 | ove_event *event); 163 | extern f_OVEncodeTask OVEncodeTask; 164 | 165 | // Query outputs 166 | typedef OVresult (OPENVIDEOAPI *f_OVEncodeQueryTaskDescription) ( 167 | ove_session session, 168 | unsigned int num_of_task_description_request, 169 | unsigned int *num_of_task_description_return, 170 | OVE_OUTPUT_DESCRIPTION *task_description_list); 171 | extern f_OVEncodeQueryTaskDescription OVEncodeQueryTaskDescription; 172 | 173 | // Reclaim the resource of the output ring up to the specified task. 174 | typedef OVresult (OPENVIDEOAPI *f_OVEncodeReleaseTask) ( 175 | ove_session session, 176 | unsigned int taskID); 177 | extern f_OVEncodeReleaseTask OVEncodeReleaseTask; 178 | 179 | #include 180 | #define FUNDEF(x) extern decltype(&x) f_##x 181 | FUNDEF(clGetPlatformIDs); 182 | FUNDEF(clGetPlatformInfo); 183 | FUNDEF(clCreateContext); 184 | FUNDEF(clReleaseContext); 185 | FUNDEF(clCreateContextFromType); 186 | FUNDEF(clCreateCommandQueue); 187 | FUNDEF(clReleaseCommandQueue); 188 | FUNDEF(clGetEventInfo); 189 | FUNDEF(clCreateBuffer); 190 | FUNDEF(clCreateKernel); 191 | FUNDEF(clReleaseMemObject); 192 | FUNDEF(clReleaseKernel); 193 | FUNDEF(clReleaseProgram); 194 | FUNDEF(clEnqueueMapBuffer); 195 | FUNDEF(clEnqueueNDRangeKernel); 196 | FUNDEF(clEnqueueUnmapMemObject); 197 | FUNDEF(clReleaseEvent); 198 | FUNDEF(clCreateProgramWithSource); 199 | FUNDEF(clBuildProgram); 200 | FUNDEF(clSetKernelArg); 201 | FUNDEF(clGetDeviceInfo); 202 | FUNDEF(clFlush); 203 | FUNDEF(clFinish); 204 | FUNDEF(clWaitForEvents); 205 | 206 | FUNDEF(clGetProgramBuildInfo); 207 | FUNDEF(clGetEventProfilingInfo); 208 | FUNDEF(clEnqueueWriteBuffer); 209 | FUNDEF(clGetKernelWorkGroupInfo); 210 | FUNDEF(clGetDeviceIDs); 211 | #undef FUNDEF 212 | 213 | void deinitOVE(); 214 | bool initOVE(); 215 | 216 | #endif // __OVENCODEDYN_H__ -------------------------------------------------------------------------------- /OpenEncodeVFW/perf.h: -------------------------------------------------------------------------------- 1 | 2 | /* ============================================================ 3 | 4 | Copyright (c) 2012 Advanced Micro Devices, Inc. All rights reserved. 5 | 6 | Redistribution and use of this material is permitted under the following 7 | conditions: 8 | 9 | Redistributions must retain the above copyright notice and all terms of this 10 | license. 11 | 12 | In no event shall anyone redistributing or accessing or using this material 13 | commence or participate in any arbitration or legal action relating to this 14 | material against Advanced Micro Devices, Inc. or any copyright holders or 15 | contributors. The foregoing shall survive any expiration or termination of 16 | this license or any agreement or access or use related to this material. 17 | 18 | ANY BREACH OF ANY TERM OF THIS LICENSE SHALL RESULT IN THE IMMEDIATE REVOCATION 19 | OF ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE THIS MATERIAL. 20 | 21 | THIS MATERIAL IS PROVIDED BY ADVANCED MICRO DEVICES, INC. AND ANY COPYRIGHT 22 | HOLDERS AND CONTRIBUTORS "AS IS" IN ITS CURRENT CONDITION AND WITHOUT ANY 23 | REPRESENTATIONS, GUARANTEE, OR WARRANTY OF ANY KIND OR IN ANY WAY RELATED TO 24 | SUPPORT, INDEMNITY, ERROR FREE OR UNINTERRUPTED OPERA TION, OR THAT IT IS FREE 25 | FROM DEFECTS OR VIRUSES. ALL OBLIGATIONS ARE HEREBY DISCLAIMED - WHETHER 26 | EXPRESS, IMPLIED, OR STATUTORY - INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED 27 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, 28 | ACCURACY, COMPLETENESS, OPERABILITY, QUALITY OF SERVICE, OR NON-INFRINGEMENT. 29 | IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR 30 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, PUNITIVE, 31 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 32 | OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, REVENUE, DATA, OR PROFITS; OR 33 | BUSINESS INTERRUPTION) HOWEVER CAUSED OR BASED ON ANY THEORY OF LIABILITY 34 | ARISING IN ANY WAY RELATED TO THIS MATERIAL, EVEN IF ADVISED OF THE POSSIBILITY 35 | OF SUCH DAMAGE. THE ENTIRE AND AGGREGATE LIABILITY OF ADVANCED MICRO DEVICES, 36 | INC. AND ANY COPYRIGHT HOLDERS AND CONTRIBUTORS SHALL NOT EXCEED TEN DOLLARS 37 | (US $10.00). ANYONE REDISTRIBUTING OR ACCESSING OR USING THIS MATERIAL ACCEPTS 38 | THIS ALLOCATION OF RISK AND AGREES TO RELEASE ADVANCED MICRO DEVICES, INC. AND 39 | ANY COPYRIGHT HOLDERS AND CONTRIBUTORS FROM ANY AND ALL LIABILITIES, 40 | OBLIGATIONS, CLAIMS, OR DEMANDS IN EXCESS OF TEN DOLLARS (US $10.00). THE 41 | FOREGOING ARE ESSENTIAL TERMS OF THIS LICENSE AND, IF ANY OF THESE TERMS ARE 42 | CONSTRUED AS UNENFORCEABLE, FAIL IN ESSENTIAL PURPOSE, OR BECOME VOID OR 43 | DETRIMENTAL TO ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR 44 | CONTRIBUTORS FOR ANY REASON, THEN ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE 45 | THIS MATERIAL SHALL TERMINATE IMMEDIATELY. MOREOVER, THE FOREGOING SHALL 46 | SURVIVE ANY EXPIRATION OR TERMINATION OF THIS LICENSE OR ANY AGREEMENT OR 47 | ACCESS OR USE RELATED TO THIS MATERIAL. 48 | 49 | NOTICE IS HEREBY PROVIDED, AND BY REDISTRIBUTING OR ACCESSING OR USING THIS 50 | MATERIAL SUCH NOTICE IS ACKNOWLEDGED, THAT THIS MATERIAL MAY BE SUBJECT TO 51 | RESTRICTIONS UNDER THE LAWS AND REGULATIONS OF THE UNITED STATES OR OTHER 52 | COUNTRIES, WHICH INCLUDE BUT ARE NOT LIMITED TO, U.S. EXPORT CONTROL LAWS SUCH 53 | AS THE EXPORT ADMINISTRATION REGULATIONS AND NATIONAL SECURITY CONTROLS AS 54 | DEFINED THEREUNDER, AS WELL AS STATE DEPARTMENT CONTROLS UNDER THE U.S. 55 | MUNITIONS LIST. THIS MATERIAL MAY NOT BE USED, RELEASED, TRANSFERRED, IMPORTED, 56 | EXPORTED AND/OR RE-EXPORTED IN ANY MANNER PROHIBITED UNDER ANY APPLICABLE LAWS, 57 | INCLUDING U.S. EXPORT CONTROL LAWS REGARDING SPECIFICALLY DESIGNATED PERSONS, 58 | COUNTRIES AND NATIONALS OF COUNTRIES SUBJECT TO NATIONAL SECURITY CONTROLS. 59 | MOREOVER, THE FOREGOING SHALL SURVIVE ANY EXPIRATION OR TERMINATION OF ANY 60 | LICENSE OR AGREEMENT OR ACCESS OR USE RELATED TO THIS MATERIAL. 61 | 62 | NOTICE REGARDING THE U.S. GOVERNMENT AND DOD AGENCIES: This material is 63 | provided with "RESTRICTED RIGHTS" and/or "LIMITED RIGHTS" as applicable to 64 | computer software and technical data, respectively. Use, duplication, 65 | distribution or disclosure by the U.S. Government and/or DOD agencies is 66 | subject to the full extent of restrictions in all applicable regulations, 67 | including those found at FAR52.227 and DFARS252.227 et seq. and any successor 68 | regulations thereof. Use of this material by the U.S. Government and/or DOD 69 | agencies is acknowledgment of the proprietary rights of any copyright holders 70 | and contributors, including those of Advanced Micro Devices, Inc., as well as 71 | the provisions of FAR52.227-14 through 23 regarding privately developed and/or 72 | commercial computer software. 73 | 74 | This license forms the entire agreement regarding the subject matter hereof and 75 | supersedes all proposals and prior discussions and writings between the parties 76 | with respect thereto. This license does not affect any ownership, rights, title, 77 | or interest in, or relating to, this material. No terms of this license can be 78 | modified or waived, and no breach of this license can be excused, unless done 79 | so in a writing signed by all affected parties. Each term of this license is 80 | separately enforceable. If any term of this license is determined to be or 81 | becomes unenforceable or illegal, such term shall be reformed to the minimum 82 | extent necessary in order for this license to remain in effect in accordance 83 | with its terms as modified by such reformation. This license shall be governed 84 | by and construed in accordance with the laws of the State of Texas without 85 | regard to rules on conflicts of law of any state or jurisdiction or the United 86 | Nations Convention on the International Sale of Goods. All disputes arising out 87 | of this license shall be subject to the jurisdiction of the federal and state 88 | courts in Austin, Texas, and all defenses are hereby waived concerning personal 89 | jurisdiction and venue of these courts. 90 | 91 | ============================================================ */ 92 | 93 | /** 94 | ******************************************************************************** 95 | * @file 96 | * 97 | * @brief Contains declaration for performance measurement functions 98 | * 99 | ******************************************************************************** 100 | */ 101 | #ifndef _OVENCODEPERF_H_ 102 | #define _OVENCODEPERF_H_ 103 | /******************************************************************************* 104 | * INCLUDE FILES * 105 | *******************************************************************************/ 106 | #include 107 | #include "OVEncodeDyn.h" 108 | #include "OvEncodeTypedef.h" 109 | #include 110 | #include "cl\cl.h" 111 | #include "log.h" 112 | /*******************************************************************************/ 113 | /* Max number of timers which can be used */ 114 | /*******************************************************************************/ 115 | #define MAX_TIMING 64 116 | /*******************************************************************************/ 117 | /* This is used to skip the initial number of frames for profiling */ 118 | /*******************************************************************************/ 119 | #define SKIP_TIMING 1 120 | 121 | typedef struct profile 122 | { 123 | int64 sTime[MAX_TIMING]; /**< Records the start time of the profiler */ 124 | int64 accSum[MAX_TIMING]; /**< Accumulates the timer values */ 125 | int32 callCount[MAX_TIMING]; /**< Number of times timer is called */ 126 | }OVprofile; 127 | 128 | /** 129 | ******************************************************************************* 130 | * @fn captureTimeStop 131 | * @brief calculates difference between start and end timers 132 | * 133 | * @param[in/out] profileCnt : Pointer to the structure containing profile counters 134 | * @param[in] type : Timer type 135 | * 136 | * @return bool : true if successful; otherwise false. 137 | ******************************************************************************* 138 | */ 139 | void captureTimeStop(OVprofile *profileCnt,int32 type); 140 | /** 141 | ******************************************************************************* 142 | * @fn captureTimeStart 143 | * @brief Records start of the timer 144 | * 145 | * @param[in/out] profileCnt : Pointer to the structure containing profile counters 146 | * @param[in] type : Timer type 147 | * 148 | * @return bool : true if successful; otherwise false. 149 | ******************************************************************************* 150 | */ 151 | void captureTimeStart(OVprofile *profileCnt,int32 type); 152 | /** 153 | ******************************************************************************* 154 | * @fn displayFps 155 | * @brief Calculates Frames per sec 156 | * 157 | * @param[in/out] profileCnt : Pointer to the structure containing profile counters 158 | * 159 | * @return bool : true if successful; otherwise false. 160 | ******************************************************************************* 161 | */ 162 | void displayFps(Logger *, OVprofile *profileCnt,cl_device_id clDeviceID); 163 | /** 164 | ******************************************************************************* 165 | * @fn initProfileCnt 166 | * @brief Initialize the timers 167 | * 168 | * @param[in/out] profileCnt : Pointer to the structure containing profile counters 169 | * 170 | * @return bool : true if successful; otherwise false. 171 | ******************************************************************************* 172 | */ 173 | void initProfileCnt(OVprofile *profileCnt); 174 | 175 | /** 176 | ******************************************************************************* 177 | * @fn getGpuFrequency 178 | * @brief Returns the GPU clock frequency 179 | * 180 | * @param[in] clDeviceID : Device ID for gpu 181 | * @param[in] gpuFreq : Gpu frequency to be written by the function 182 | * 183 | * @return bool : true if successful; otherwise false. 184 | ******************************************************************************* 185 | */ 186 | cl_int getGpuFrequency(cl_device_id clDeviceID, uint32 *gpuFreq); 187 | #ifndef _M_X64 188 | inline int64 myRdtsc() 189 | { 190 | _asm _emit 0x0f; 191 | _asm _emit 0x31; 192 | } 193 | #else /*_M_X64 */ 194 | #define myRdtsc() __rdtsc() 195 | #endif /*_M_X64 */ 196 | 197 | #endif -------------------------------------------------------------------------------- /OpenEncodeVFW/perf.cpp: -------------------------------------------------------------------------------- 1 | /* ============================================================ 2 | 3 | Copyright (c) 2012 Advanced Micro Devices, Inc. All rights reserved. 4 | 5 | Redistribution and use of this material is permitted under the following 6 | conditions: 7 | 8 | Redistributions must retain the above copyright notice and all terms of this 9 | license. 10 | 11 | In no event shall anyone redistributing or accessing or using this material 12 | commence or participate in any arbitration or legal action relating to this 13 | material against Advanced Micro Devices, Inc. or any copyright holders or 14 | contributors. The foregoing shall survive any expiration or termination of 15 | this license or any agreement or access or use related to this material. 16 | 17 | ANY BREACH OF ANY TERM OF THIS LICENSE SHALL RESULT IN THE IMMEDIATE REVOCATION 18 | OF ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE THIS MATERIAL. 19 | 20 | THIS MATERIAL IS PROVIDED BY ADVANCED MICRO DEVICES, INC. AND ANY COPYRIGHT 21 | HOLDERS AND CONTRIBUTORS "AS IS" IN ITS CURRENT CONDITION AND WITHOUT ANY 22 | REPRESENTATIONS, GUARANTEE, OR WARRANTY OF ANY KIND OR IN ANY WAY RELATED TO 23 | SUPPORT, INDEMNITY, ERROR FREE OR UNINTERRUPTED OPERA TION, OR THAT IT IS FREE 24 | FROM DEFECTS OR VIRUSES. ALL OBLIGATIONS ARE HEREBY DISCLAIMED - WHETHER 25 | EXPRESS, IMPLIED, OR STATUTORY - INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED 26 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, 27 | ACCURACY, COMPLETENESS, OPERABILITY, QUALITY OF SERVICE, OR NON-INFRINGEMENT. 28 | IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR 29 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, PUNITIVE, 30 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 31 | OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, REVENUE, DATA, OR PROFITS; OR 32 | BUSINESS INTERRUPTION) HOWEVER CAUSED OR BASED ON ANY THEORY OF LIABILITY 33 | ARISING IN ANY WAY RELATED TO THIS MATERIAL, EVEN IF ADVISED OF THE POSSIBILITY 34 | OF SUCH DAMAGE. THE ENTIRE AND AGGREGATE LIABILITY OF ADVANCED MICRO DEVICES, 35 | INC. AND ANY COPYRIGHT HOLDERS AND CONTRIBUTORS SHALL NOT EXCEED TEN DOLLARS 36 | (US $10.00). ANYONE REDISTRIBUTING OR ACCESSING OR USING THIS MATERIAL ACCEPTS 37 | THIS ALLOCATION OF RISK AND AGREES TO RELEASE ADVANCED MICRO DEVICES, INC. AND 38 | ANY COPYRIGHT HOLDERS AND CONTRIBUTORS FROM ANY AND ALL LIABILITIES, 39 | OBLIGATIONS, CLAIMS, OR DEMANDS IN EXCESS OF TEN DOLLARS (US $10.00). THE 40 | FOREGOING ARE ESSENTIAL TERMS OF THIS LICENSE AND, IF ANY OF THESE TERMS ARE 41 | CONSTRUED AS UNENFORCEABLE, FAIL IN ESSENTIAL PURPOSE, OR BECOME VOID OR 42 | DETRIMENTAL TO ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR 43 | CONTRIBUTORS FOR ANY REASON, THEN ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE 44 | THIS MATERIAL SHALL TERMINATE IMMEDIATELY. MOREOVER, THE FOREGOING SHALL 45 | SURVIVE ANY EXPIRATION OR TERMINATION OF THIS LICENSE OR ANY AGREEMENT OR 46 | ACCESS OR USE RELATED TO THIS MATERIAL. 47 | 48 | NOTICE IS HEREBY PROVIDED, AND BY REDISTRIBUTING OR ACCESSING OR USING THIS 49 | MATERIAL SUCH NOTICE IS ACKNOWLEDGED, THAT THIS MATERIAL MAY BE SUBJECT TO 50 | RESTRICTIONS UNDER THE LAWS AND REGULATIONS OF THE UNITED STATES OR OTHER 51 | COUNTRIES, WHICH INCLUDE BUT ARE NOT LIMITED TO, U.S. EXPORT CONTROL LAWS SUCH 52 | AS THE EXPORT ADMINISTRATION REGULATIONS AND NATIONAL SECURITY CONTROLS AS 53 | DEFINED THEREUNDER, AS WELL AS STATE DEPARTMENT CONTROLS UNDER THE U.S. 54 | MUNITIONS LIST. THIS MATERIAL MAY NOT BE USED, RELEASED, TRANSFERRED, IMPORTED, 55 | EXPORTED AND/OR RE-EXPORTED IN ANY MANNER PROHIBITED UNDER ANY APPLICABLE LAWS, 56 | INCLUDING U.S. EXPORT CONTROL LAWS REGARDING SPECIFICALLY DESIGNATED PERSONS, 57 | COUNTRIES AND NATIONALS OF COUNTRIES SUBJECT TO NATIONAL SECURITY CONTROLS. 58 | MOREOVER, THE FOREGOING SHALL SURVIVE ANY EXPIRATION OR TERMINATION OF ANY 59 | LICENSE OR AGREEMENT OR ACCESS OR USE RELATED TO THIS MATERIAL. 60 | 61 | NOTICE REGARDING THE U.S. GOVERNMENT AND DOD AGENCIES: This material is 62 | provided with "RESTRICTED RIGHTS" and/or "LIMITED RIGHTS" as applicable to 63 | computer software and technical data, respectively. Use, duplication, 64 | distribution or disclosure by the U.S. Government and/or DOD agencies is 65 | subject to the full extent of restrictions in all applicable regulations, 66 | including those found at FAR52.227 and DFARS252.227 et seq. and any successor 67 | regulations thereof. Use of this material by the U.S. Government and/or DOD 68 | agencies is acknowledgment of the proprietary rights of any copyright holders 69 | and contributors, including those of Advanced Micro Devices, Inc., as well as 70 | the provisions of FAR52.227-14 through 23 regarding privately developed and/or 71 | commercial computer software. 72 | 73 | This license forms the entire agreement regarding the subject matter hereof and 74 | supersedes all proposals and prior discussions and writings between the parties 75 | with respect thereto. This license does not affect any ownership, rights, title, 76 | or interest in, or relating to, this material. No terms of this license can be 77 | modified or waived, and no breach of this license can be excused, unless done 78 | so in a writing signed by all affected parties. Each term of this license is 79 | separately enforceable. If any term of this license is determined to be or 80 | becomes unenforceable or illegal, such term shall be reformed to the minimum 81 | extent necessary in order for this license to remain in effect in accordance 82 | with its terms as modified by such reformation. This license shall be governed 83 | by and construed in accordance with the laws of the State of Texas without 84 | regard to rules on conflicts of law of any state or jurisdiction or the United 85 | Nations Convention on the International Sale of Goods. All disputes arising out 86 | of this license shall be subject to the jurisdiction of the federal and state 87 | courts in Austin, Texas, and all defenses are hereby waived concerning personal 88 | jurisdiction and venue of these courts. 89 | 90 | ============================================================ */ 91 | 92 | /** 93 | ******************************************************************************** 94 | * @file 95 | * 96 | * @brief This file contains functions for measuring the performance 97 | * 98 | ******************************************************************************** 99 | */ 100 | 101 | #include "stdafx.h" 102 | #include "OpenEncodeVFW.h" 103 | //#define NOPROF 104 | 105 | /** 106 | ******************************************************************************* 107 | * @fn initProfileCnt 108 | * @brief Initialize the timers 109 | * 110 | * @param[in/out] profileCnt : Pointer to the structure containing profile counters 111 | * 112 | * @return bool : true if successful; otherwise false. 113 | ******************************************************************************* 114 | */ 115 | void initProfileCnt(OVprofile *profileCnt) 116 | { 117 | int32 i; 118 | for(i=0;icallCount[i] = 0; 121 | profileCnt->sTime[i] = 0; 122 | profileCnt->accSum[i] = 0; 123 | } 124 | } 125 | /** 126 | ******************************************************************************* 127 | * @fn displayFps 128 | * @brief Calculates Frames per sec 129 | * 130 | * @param[in/out] profileCnt : Pointer to the structure containing profile counters 131 | * 132 | * @return bool : true if successful; otherwise false. 133 | ******************************************************************************* 134 | */ 135 | void displayFps(Logger *mLog, OVprofile *profileCnt,cl_device_id clDeviceID ) 136 | { 137 | #ifndef NOPROF 138 | //static int32 dumped = 0; if(dumped) return; dumped = 1; 139 | uint32 gpuFreq; 140 | float32 means[MAX_TIMING]; 141 | float32 perfs[MAX_TIMING]; 142 | if(mLog) 143 | { 144 | LARGE_INTEGER li, l2; 145 | QueryPerformanceFrequency(&li); 146 | getGpuFrequency(clDeviceID,&gpuFreq); 147 | float32 freq = (float32)li.QuadPart; 148 | QueryPerformanceCounter(&li); 149 | int64 stime = myRdtsc(); 150 | Sleep(1000); 151 | QueryPerformanceCounter(&l2); 152 | int64 etime = myRdtsc(); 153 | freq *= ((float32)(etime-stime))/((float32)(l2.QuadPart-li.QuadPart)); 154 | mLog->Log(L"\nVCE Performance\n"); 155 | mLog->Log(L"Processor Frequency: %5.2f MHz (%6.2f)\n", freq/1000000, freq); 156 | mLog->Log(L"GPU Frequency : %6.2f MHz\n", (float32)gpuFreq); 157 | for(int32 i = 0; i < MAX_TIMING; i++) 158 | { 159 | if(profileCnt->callCount[i]) { 160 | float32 count = (float32)(profileCnt->callCount[i] - SKIP_TIMING); 161 | float32 mean = ((float32) profileCnt->accSum[i])/count; 162 | if (mean != 0){ 163 | means[i] = mean; 164 | perfs[i] = freq/mean; 165 | } 166 | } 167 | } 168 | //Log(L"VCE Frame Rate (encode+query) : %5.2f [FPS]\n", perf1); 169 | mLog->Log(L"VCE Frame Rate (encode) : %5.2f / %5.2f FPS\n", means[0], perfs[0]); 170 | if(means[3]) 171 | mLog->Log(L"VCE Frame Rate (copy back) : %5.2f / %5.2f FPS\n", means[3], perfs[3]); 172 | if(means[2]) 173 | mLog->Log(L"CL colourspace conversion : %5.2f / %5.2f FPS\n", means[2], perfs[2]); 174 | mLog->Log(L"Memory write : %5.2f / %5.2f [FPS]\n", means[5], perfs[5]); 175 | mLog->Log(L"Whole compression : %5.2f / %5.2f [FPS]\n", means[4], perfs[4]); 176 | } 177 | #endif 178 | } 179 | 180 | /** 181 | ******************************************************************************* 182 | * @fn captureTimeStop 183 | * @brief calculates difference between start and end timers 184 | * 185 | * @param[in/out] profileCnt : Pointer to the structure containing profile counters 186 | * @param[in] type : Timer type 187 | * 188 | * @return bool : true if successful; otherwise false. 189 | ******************************************************************************* 190 | */ 191 | void captureTimeStop(OVprofile *profileCnt, int32 type) 192 | { 193 | #ifndef NOPROF 194 | if(profileCnt->callCount[type]++ >= SKIP_TIMING) 195 | { 196 | int64 time = myRdtsc() - profileCnt->sTime[type]; 197 | profileCnt->accSum[type] += time; 198 | } 199 | #endif 200 | } 201 | /** 202 | ******************************************************************************* 203 | * @fn captureTimeStart 204 | * @brief Records start of the timer 205 | * 206 | * @param[in/out] profileCnt : Pointer to the structure containing profile counters 207 | * @param[in] type : Timer type 208 | * 209 | * @return bool : true if successful; otherwise false. 210 | ******************************************************************************* 211 | */ 212 | void captureTimeStart(OVprofile *profileCnt, int32 type) 213 | { 214 | #ifndef NOPROF 215 | profileCnt->sTime[type] = myRdtsc(); 216 | #endif 217 | } 218 | 219 | /** 220 | ******************************************************************************* 221 | * @fn getGpuFrequency 222 | * @brief Returns the GPU clock frequency 223 | * 224 | * @param[in] clDeviceID : Device ID for gpu 225 | * @param[in] gpuFreq : Gpu frequency to be written by the function 226 | * 227 | * @return bool : true if successful; otherwise false. 228 | ******************************************************************************* 229 | */ 230 | cl_int getGpuFrequency(cl_device_id clDeviceID, uint32 *gpuFreq) 231 | { 232 | cl_int status; 233 | size_t size; 234 | status = f_clGetDeviceInfo(clDeviceID, 235 | CL_DEVICE_MAX_CLOCK_FREQUENCY, 236 | sizeof(uint32), 237 | gpuFreq, &size 238 | ); 239 | return(status); 240 | } -------------------------------------------------------------------------------- /OpenEncodeVFW/bs.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * bs.h : 3 | ***************************************************************************** 4 | * Copyright (C) 2003 Laurent Aimar 5 | * $Id: bs.h,v 1.1 2006/12/10 14:10:30 crypto1 Exp $ 6 | * 7 | * Authors: Laurent Aimar 8 | * 9 | * This program is free software; you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License as published by 11 | * the Free Software Foundation; either version 2 of the License, or 12 | * (at your option) any later version. 13 | * 14 | * This program is distributed in the hope that it will be useful, 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * GNU General Public License for more details. 18 | * 19 | * You should have received a copy of the GNU General Public License 20 | * along with this program; if not, write to the Free Software 21 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. 22 | *****************************************************************************/ 23 | #include "OvEncodeTypedef.h" 24 | 25 | #ifdef _BS_H 26 | #warning FIXME Multiple inclusion of bs.h 27 | #else 28 | #define _BS_H 29 | 30 | typedef struct bs_s 31 | { 32 | uint8 *p_start; 33 | uint8 *p; 34 | uint8 *p_end; 35 | 36 | int i_left; /* i_count number of available bits */ 37 | } bs_t; 38 | 39 | static inline void bs_init( bs_t *s, void *p_data, int i_data ) 40 | { 41 | s->p_start = (uint8*)p_data; 42 | s->p = (uint8*)p_data; 43 | s->p_end = s->p + i_data; 44 | s->i_left = 8; 45 | } 46 | static inline int bs_pos_byte(bs_t* s) 47 | { 48 | return (s->p - s->p_start); 49 | } 50 | static inline int bs_pos( bs_t *s ) 51 | { 52 | return (int)( 8 * ( s->p - s->p_start ) + 8 - s->i_left ); 53 | } 54 | static inline int bs_eof( bs_t *s ) 55 | { 56 | return( s->p >= s->p_end ? 1: 0 ); 57 | } 58 | static inline uint32 bs_read( bs_t *s, int i_count ) 59 | { 60 | static uint32 i_mask[33] ={0x00, 61 | 0x01, 0x03, 0x07, 0x0f, 62 | 0x1f, 0x3f, 0x7f, 0xff, 63 | 0x1ff, 0x3ff, 0x7ff, 0xfff, 64 | 0x1fff, 0x3fff, 0x7fff, 0xffff, 65 | 0x1ffff, 0x3ffff, 0x7ffff, 0xfffff, 66 | 0x1fffff, 0x3fffff, 0x7fffff, 0xffffff, 67 | 0x1ffffff, 0x3ffffff, 0x7ffffff, 0xfffffff, 68 | 0x1fffffff,0x3fffffff,0x7fffffff,0xffffffff}; 69 | int i_shr; 70 | uint32 i_result = 0; 71 | 72 | while( i_count > 0 ) 73 | { 74 | if( s->p >= s->p_end ) 75 | { 76 | break; 77 | } 78 | 79 | if( ( i_shr = s->i_left - i_count ) >= 0 ) 80 | { 81 | /* more in the buffer than requested */ 82 | i_result |= ( *s->p >> i_shr )&i_mask[i_count]; 83 | s->i_left -= i_count; 84 | if( s->i_left == 0 ) 85 | { 86 | s->p++; 87 | s->i_left = 8; 88 | } 89 | return( i_result ); 90 | } 91 | else 92 | { 93 | /* less in the buffer than requested */ 94 | i_result |= (*s->p&i_mask[s->i_left]) << -i_shr; 95 | i_count -= s->i_left; 96 | s->p++; 97 | s->i_left = 8; 98 | } 99 | } 100 | 101 | return( i_result ); 102 | } 103 | 104 | #if 0 105 | /* Only > i386 */ 106 | static uint32_t bswap32( uint32_t x ) 107 | { 108 | asm( "bswap %0": "=r" (x):"0" (x)); 109 | return x; 110 | } 111 | /* work only for i_count <= 32 - 7 */ 112 | static inline uint32_t bs_read( bs_t *s, int i_count ) 113 | { 114 | if( s->p < s->p_end && i_count > 0 ) 115 | { 116 | #if 0 117 | uint32_t i_cache = ((s->p[0] << 24)+(s->p[1] << 16)+(s->p[2] << 8)+s->p[3]) << (8-s->i_left); 118 | #else 119 | uint32_t i_cache = bswap32( *((uint32_t*)s->p) ) << (8-s->i_left); 120 | #endif 121 | uint32_t i_ret = i_cache >> ( 32 - i_count); 122 | 123 | s->i_left -= i_count; 124 | #if 0 125 | if( s->i_left <= 0 ) 126 | { 127 | int i_skip = (8-s->i_left) >> 3; 128 | 129 | s->p += i_skip; 130 | 131 | s->i_left += i_skip << 3; 132 | } 133 | #else 134 | while( s->i_left <= 0 ) 135 | { 136 | s->p++; 137 | s->i_left += 8; 138 | } 139 | #endif 140 | return i_ret; 141 | } 142 | return 0; 143 | } 144 | 145 | #endif 146 | static inline uint32 bs_read1( bs_t *s ) 147 | { 148 | 149 | if( s->p < s->p_end ) 150 | { 151 | unsigned int i_result; 152 | 153 | s->i_left--; 154 | i_result = ( *s->p >> s->i_left )&0x01; 155 | if( s->i_left == 0 ) 156 | { 157 | s->p++; 158 | s->i_left = 8; 159 | } 160 | return i_result; 161 | } 162 | 163 | return 0; 164 | } 165 | static inline uint32 bs_show( bs_t *s, int i_count ) 166 | { 167 | #if 0 168 | bs_t s_tmp = *s; 169 | return bs_read( &s_tmp, i_count ); 170 | #else 171 | if( s->p < s->p_end && i_count > 0 ) 172 | { 173 | uint32 i_cache = ((s->p[0] << 24)+(s->p[1] << 16)+(s->p[2] << 8)+s->p[3]) << (8-s->i_left); 174 | return( i_cache >> ( 32 - i_count) ); 175 | } 176 | return 0; 177 | #endif 178 | } 179 | 180 | /* TODO optimize */ 181 | static inline void bs_skip( bs_t *s, int i_count ) 182 | { 183 | s->i_left -= i_count; 184 | 185 | while( s->i_left <= 0 ) 186 | { 187 | s->p++; 188 | s->i_left += 8; 189 | } 190 | } 191 | 192 | 193 | static inline int bs_read_ue( bs_t *s ) 194 | { 195 | int i = 0; 196 | 197 | while( bs_read1( s ) == 0 && s->p < s->p_end && i < 32 ) 198 | { 199 | i++; 200 | } 201 | return( ( 1 << i) - 1 + bs_read( s, i ) ); 202 | } 203 | static inline int bs_read_se( bs_t *s ) 204 | { 205 | int val = bs_read_ue( s ); 206 | 207 | return val&0x01 ? (val+1)/2 : -(val/2); 208 | } 209 | 210 | static inline int bs_read_te( bs_t *s, int x ) 211 | { 212 | if( x == 1 ) 213 | { 214 | return 1 - bs_read1( s ); 215 | } 216 | else if( x > 1 ) 217 | { 218 | return bs_read_ue( s ); 219 | } 220 | return 0; 221 | } 222 | 223 | /* TODO optimize (write x bits at once) */ 224 | static inline void bs_write( bs_t *s, int i_count, uint32 i_bits ) 225 | { 226 | while( i_count > 0 ) 227 | { 228 | if( s->p >= s->p_end ) 229 | { 230 | break; 231 | } 232 | 233 | i_count--; 234 | 235 | if( ( i_bits >> i_count )&0x01 ) 236 | { 237 | *s->p |= 1 << ( s->i_left - 1 ); 238 | } 239 | else 240 | { 241 | *s->p &= ~( 1 << ( s->i_left - 1 ) ); 242 | } 243 | s->i_left--; 244 | if( s->i_left == 0 ) 245 | { 246 | s->p++; 247 | s->i_left = 8; 248 | } 249 | } 250 | } 251 | 252 | static inline void bs_write1( bs_t *s, uint32 i_bits ) 253 | { 254 | if( s->p < s->p_end ) 255 | { 256 | s->i_left--; 257 | 258 | if( i_bits&0x01 ) 259 | { 260 | *s->p |= 1 << s->i_left; 261 | } 262 | else 263 | { 264 | *s->p &= ~( 1 << s->i_left ); 265 | } 266 | if( s->i_left == 0 ) 267 | { 268 | s->p++; 269 | s->i_left = 8; 270 | } 271 | } 272 | } 273 | 274 | static inline bool bs_byte_aligned(bs_t* b) 275 | { 276 | return (b->i_left == 8); 277 | } 278 | 279 | static inline void bs_align( bs_t *s ) 280 | { 281 | if( s->i_left != 8 ) 282 | { 283 | s->i_left = 8; 284 | s->p++; 285 | } 286 | } 287 | static inline void bs_align_0( bs_t *s ) 288 | { 289 | if( s->i_left != 8 ) 290 | { 291 | bs_write( s, s->i_left, 0 ); 292 | } 293 | } 294 | static inline void bs_align_1( bs_t *s ) 295 | { 296 | if( s->i_left != 8 ) 297 | { 298 | bs_write( s, s->i_left, ~0 ); 299 | } 300 | } 301 | 302 | 303 | 304 | /* golomb functions */ 305 | 306 | static inline void bs_write_ue( bs_t *s, unsigned int val ) 307 | { 308 | int i_size = 0; 309 | static const int i_size0_255[256] = 310 | { 311 | 1,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 312 | 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 313 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 314 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 315 | 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 316 | 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 317 | 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 318 | 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 319 | }; 320 | 321 | if( val == 0 ) 322 | { 323 | bs_write( s, 1, 1 ); 324 | } 325 | else 326 | { 327 | unsigned int tmp = ++val; 328 | 329 | if( tmp >= 0x00010000 ) 330 | { 331 | i_size += 16; 332 | tmp >>= 16; 333 | } 334 | if( tmp >= 0x100 ) 335 | { 336 | i_size += 8; 337 | tmp >>= 8; 338 | } 339 | i_size += i_size0_255[tmp]; 340 | 341 | bs_write( s, 2 * i_size - 1, val ); 342 | } 343 | } 344 | 345 | static inline void bs_write_se( bs_t *s, int val ) 346 | { 347 | bs_write_ue( s, val <= 0 ? -val * 2 : val * 2 - 1); 348 | } 349 | 350 | static inline void bs_write_te( bs_t *s, int x, int val ) 351 | { 352 | if( x == 1 ) 353 | { 354 | bs_write( s, 1, ~val ); 355 | } 356 | else if( x > 1 ) 357 | { 358 | bs_write_ue( s, val ); 359 | } 360 | } 361 | 362 | static inline void bs_rbsp_trailing( bs_t *s ) 363 | { 364 | bs_write( s, 1, 1 ); 365 | if( s->i_left != 8 ) 366 | { 367 | bs_write( s, s->i_left, 0x00 ); 368 | } 369 | } 370 | 371 | static inline int bs_size_ue( unsigned int val ) 372 | { 373 | static const int i_size0_254[255] = 374 | { 375 | 1, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 376 | 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 377 | 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11, 378 | 11,11,11,11,11,11,11,11,11,13,13,13,13,13,13,13,13,13,13,13,13,13,13, 379 | 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13, 380 | 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13, 381 | 13,13,13,13,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, 382 | 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, 383 | 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, 384 | 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, 385 | 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, 386 | 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15 387 | }; 388 | 389 | if( val < 255 ) 390 | { 391 | return i_size0_254[val]; 392 | } 393 | else 394 | { 395 | int i_size = 0; 396 | 397 | val++; 398 | 399 | if( val >= 0x10000 ) 400 | { 401 | i_size += 32; 402 | val = (val >> 16) - 1; 403 | } 404 | if( val >= 0x100 ) 405 | { 406 | i_size += 16; 407 | val = (val >> 8) - 1; 408 | } 409 | return i_size0_254[val] + i_size; 410 | } 411 | } 412 | 413 | static inline int bs_size_se( int val ) 414 | { 415 | return bs_size_ue( val <= 0 ? -val * 2 : val * 2 - 1); 416 | } 417 | 418 | static inline int bs_size_te( int x, int val ) 419 | { 420 | if( x == 1 ) 421 | { 422 | return 1; 423 | } 424 | else if( x > 1 ) 425 | { 426 | return bs_size_ue( val ); 427 | } 428 | return 0; 429 | } 430 | 431 | 432 | 433 | #endif 434 | 435 | -------------------------------------------------------------------------------- /OpenEncodeVFW/device.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "OpenEncodeVFW.h" 3 | 4 | #define Log(...) LogMsg(false, __VA_ARGS__) 5 | 6 | /** 7 | ******************************************************************************* 8 | * @fn getDevice 9 | * @brief returns the platform and devices found 10 | * 11 | * @param[in/out] deviceHandle : Handle for the device information 12 | * 13 | * @return bool : true if successful; otherwise false. 14 | ******************************************************************************* 15 | */ 16 | bool CodecInst::getDevice(OVDeviceHandle *deviceHandle) 17 | { 18 | bool status; 19 | 20 | /**************************************************************************/ 21 | /* Get the Platform */ 22 | /**************************************************************************/ 23 | deviceHandle->platform = NULL; 24 | status = getPlatform(deviceHandle->platform); 25 | if(status == false) 26 | { 27 | return false; 28 | } 29 | 30 | /// STEP 1: Check for GPU 31 | cl_device_type dType = CL_DEVICE_TYPE_GPU; 32 | status = gpuCheck(deviceHandle->platform,&dType); 33 | if(status == false) 34 | { 35 | return false; 36 | } 37 | 38 | /// STEP 2: Get the number of devices 39 | deviceHandle->numDevices = 0; 40 | deviceHandle->deviceInfo = NULL; 41 | 42 | /**************************************************************************/ 43 | /* Memory for deviceInfo gets allocated inside the getDeviceInfo */ 44 | /* function depending on numDevices. This needs to be freed after the */ 45 | /* usage */ 46 | /**************************************************************************/ 47 | status = getDeviceInfo(&deviceHandle->deviceInfo,&deviceHandle->numDevices); 48 | if(status == false) 49 | { 50 | return false; 51 | } 52 | return true; 53 | } 54 | 55 | /** 56 | ******************************************************************************* 57 | * @fn getDeviceCap 58 | * @brief This function returns the device capabilities. 59 | * 60 | * @param[in] oveContext : Encoder context 61 | * @param[in] oveDeviceID : Device ID 62 | * @param[out] encodeCaps : pointer to encoder capabilities structure 63 | * 64 | * @return bool : true if successful; otherwise false. 65 | ******************************************************************************* 66 | */ 67 | bool CodecInst::getDeviceCap(OPContextHandle oveContext,uint32 oveDeviceID, 68 | OVE_ENCODE_CAPS *encodeCaps) 69 | { 70 | uint32 numCaps=1; 71 | bool status; 72 | 73 | /**************************************************************************/ 74 | /* initialize the encode capabilities variable */ 75 | /**************************************************************************/ 76 | encodeCaps->EncodeModes = OVE_AVC_FULL; 77 | encodeCaps->encode_cap_size = sizeof(OVE_ENCODE_CAPS); 78 | encodeCaps->caps.encode_cap_full->max_picture_size_in_MB = 0; 79 | encodeCaps->caps.encode_cap_full->min_picture_size_in_MB = 0; 80 | encodeCaps->caps.encode_cap_full->num_picture_formats = 0; 81 | encodeCaps->caps.encode_cap_full->num_Profile_level = 0; 82 | encodeCaps->caps.encode_cap_full->max_bit_rate = 0; 83 | encodeCaps->caps.encode_cap_full->min_bit_rate = 0; 84 | encodeCaps->caps.encode_cap_full->supported_task_priority = OVE_ENCODE_TASK_PRIORITY_LEVEL1; 85 | 86 | for(int32 j=0; jcaps.encode_cap_full->supported_picture_formats[j] = OVE_PICTURE_FORMAT_NONE; 88 | 89 | for(int32 j=0; jcaps.encode_cap_full->supported_profile_level[j].profile = 0; 92 | encodeCaps->caps.encode_cap_full->supported_profile_level[j].level = 0; 93 | } 94 | 95 | /**************************************************************************/ 96 | /* Get the device capabilities */ 97 | /**************************************************************************/ 98 | status = OVEncodeGetDeviceCap(oveContext, 99 | oveDeviceID, 100 | encodeCaps->encode_cap_size, 101 | &numCaps, 102 | encodeCaps); 103 | return(status); 104 | } 105 | 106 | /** 107 | ******************************************************************************* 108 | * @fn getDeviceInfo 109 | * @brief returns device information 110 | * 111 | * @param[out] deviceInfo : Device info 112 | * @param[out] numDevices : Number of devices present 113 | * 114 | * @return bool : true if successful; otherwise false. 115 | ******************************************************************************* 116 | */ 117 | bool CodecInst::getDeviceInfo(ovencode_device_info **deviceInfo, 118 | uint32 *numDevices) 119 | { 120 | bool status; 121 | status = OVEncodeGetDeviceInfo(numDevices, 0); 122 | if(!status) 123 | { 124 | Log(L"OVEncodeGetDeviceInfo failed!\n"); 125 | return false; 126 | } 127 | else 128 | { 129 | if(*numDevices == 0) 130 | { 131 | Log(L"No suitable devices found!\n"); 132 | return false; 133 | } 134 | } 135 | /**************************************************************************/ 136 | /* Get information about each device found */ 137 | /**************************************************************************/ 138 | *deviceInfo = new ovencode_device_info[*numDevices]; 139 | memset(*deviceInfo,0,sizeof(ovencode_device_info)* (*numDevices)); 140 | status = OVEncodeGetDeviceInfo(numDevices, *deviceInfo); 141 | if(!status) 142 | { 143 | Log(L"OVEncodeGetDeviceInfo failed!\n"); 144 | return false; 145 | } 146 | return true; 147 | } 148 | 149 | /** 150 | ******************************************************************************* 151 | * @fn gpuCheck 152 | * @brief Checks for GPU present or not 153 | * 154 | * @param[in] platform : Platform id 155 | * @param[out] dType : Device type returned GPU/CPU 156 | * 157 | * @return bool : true if successful; otherwise false. 158 | ******************************************************************************* 159 | */ 160 | bool CodecInst::gpuCheck(cl_platform_id platform,cl_device_type* dType) 161 | { 162 | cl_int err; 163 | cl_context_properties cps[3] = 164 | { 165 | CL_CONTEXT_PLATFORM, 166 | (cl_context_properties)platform, 167 | 0 168 | }; 169 | 170 | cl_context context = f_clCreateContextFromType(cps, 171 | (*dType), 172 | NULL, 173 | NULL, 174 | &err); 175 | f_clReleaseContext(context); 176 | 177 | if(err == CL_DEVICE_NOT_FOUND) 178 | { 179 | Log(L"GPU not found. Fallback to CPU\n"); 180 | *dType = CL_DEVICE_TYPE_CPU; 181 | return false; 182 | } 183 | 184 | return true; 185 | } 186 | /** 187 | ******************************************************************************* 188 | * @fn getPlatform 189 | * @brief Get platform to run 190 | * 191 | * @param[in] platform : Platform id 192 | * 193 | * @return bool : true if successful; otherwise false. 194 | ******************************************************************************* 195 | */ 196 | bool CodecInst::getPlatform(cl_platform_id &platform) 197 | { 198 | cl_uint numPlatforms; 199 | cl_int err = f_clGetPlatformIDs(0, NULL, &numPlatforms); 200 | if (CL_SUCCESS != err) 201 | { 202 | Log(L"clGetPlatformIDs() failed %d\n", err); 203 | return false; 204 | } 205 | /**************************************************************************/ 206 | /* If there are platforms, make sure they are AMD. */ 207 | /**************************************************************************/ 208 | if (0 < numPlatforms) 209 | { 210 | cl_platform_id* platforms = new cl_platform_id[numPlatforms]; 211 | err = f_clGetPlatformIDs(numPlatforms, platforms, NULL); 212 | if (CL_SUCCESS != err) 213 | { 214 | Log(L"clGetPlatformIDs() failed %d\n", err); 215 | delete [] platforms; 216 | return false; 217 | } 218 | /**********************************************************************/ 219 | /* Loop through all the platforms looking for an AMD system. */ 220 | /**********************************************************************/ 221 | for (uint32 i = 0; i < numPlatforms; ++i) 222 | { 223 | int8 pbuf[100]; 224 | err = f_clGetPlatformInfo(platforms[i], 225 | CL_PLATFORM_VENDOR, 226 | sizeof(pbuf), 227 | pbuf, 228 | NULL); 229 | /******************************************************************/ 230 | /* Stop at the first platform that is an AMD system. */ 231 | /******************************************************************/ 232 | if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) 233 | { 234 | platform = platforms[i]; 235 | break; 236 | } 237 | } 238 | delete [] platforms; 239 | } 240 | 241 | if (NULL == platform) 242 | { 243 | Log(L"Couldn't find AMD platform, cannot proceed.\n"); 244 | return false; 245 | } 246 | 247 | return true; 248 | } 249 | 250 | /** 251 | Utility function for configuration dialog. 252 | */ 253 | DeviceMap CodecInst::getDeviceList() 254 | { 255 | DeviceMap devs; 256 | OVDeviceHandle hDev; 257 | memset(&hDev, 0, sizeof(hDev)); 258 | 259 | wchar_t tmp[1024]; 260 | 261 | if(getDevice(&hDev)) 262 | { 263 | for(uint32 i=0; i < hDev.numDevices; i++) 264 | { 265 | uint32 deviceId = hDev.deviceInfo[i].device_id; 266 | cl_device_id clDevId = reinterpret_cast(deviceId); 267 | #ifdef _M_X64 268 | // May ${DEITY} have mercy on us all. 269 | intptr_t ptr = intptr_t((intptr_t*)&clDevId); 270 | clDevId = (cl_device_id)((intptr_t)clDevId | (ptr & 0xFFFFFFFF00000000)); 271 | #endif 272 | 273 | // print device name 274 | size_t valueSize = 0; 275 | f_clGetDeviceInfo(clDevId, CL_DEVICE_NAME, 0, NULL, &valueSize); 276 | char* value = (char*) malloc(valueSize); 277 | f_clGetDeviceInfo(clDevId, CL_DEVICE_NAME, valueSize, value, NULL); 278 | cl_int iVal; 279 | //f_clGetDeviceInfo(clDevId, CL_DEVICE_ENDIAN_LITTLE, sizeof(iVal), &iVal, NULL); 280 | f_clGetDeviceInfo(clDevId, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(iVal), &iVal, NULL); 281 | 282 | //swprintf(tmp, 1023, L"%S (%S)", value, iVal == CL_TRUE ? "LE" : "BE"); 283 | swprintf(tmp, 1023, L"%S (%d CU)", value, iVal); 284 | wstring wstr = tmp; 285 | devs.insert(pair(clDevId, wstr)); 286 | free(value); 287 | } 288 | } 289 | 290 | if(hDev.deviceInfo) delete [] hDev.deviceInfo; 291 | 292 | return devs; 293 | } 294 | 295 | bool CodecInst::createCPUContext(cl_platform_id platform) 296 | { 297 | cl_int err; 298 | cl_context_properties cps[3] = 299 | { 300 | CL_CONTEXT_PLATFORM, 301 | (cl_context_properties)platform, 302 | 0 303 | }; 304 | 305 | mCpuCtx = f_clCreateContextFromType(cps, 306 | CL_DEVICE_TYPE_CPU, 307 | NULL, 308 | NULL, 309 | &err); 310 | 311 | if(err != CL_SUCCESS) { 312 | Log(L"Could not create CPU CL context. Error: %d.\n", err); 313 | return false; 314 | } 315 | 316 | cl_uint count = 1; 317 | err = f_clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &mCpuDev, &count); 318 | if(err != CL_SUCCESS || count == 0){ 319 | Log(L"Could not get CPU device id. Error: %d.\n", err); 320 | return false; 321 | } 322 | 323 | mCpuCmdQueue = f_clCreateCommandQueue(mCpuCtx, mCpuDev, 0, &err); 324 | if(err != CL_SUCCESS) { 325 | Log(L"\nCreate command queue #0 failed! Error : %d\n", err); 326 | return false; 327 | } 328 | //clReleaseContext(context); 329 | return true; 330 | } -------------------------------------------------------------------------------- /OpenEncodeVFW/OpenEncodeVFW.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {2EE32DA3-C809-449C-9D24-2ACA848AED59} 23 | OpenEncodeVFW 24 | Win32Proj 25 | 26 | 27 | 28 | DynamicLibrary 29 | Unicode 30 | true 31 | v120 32 | 33 | 34 | DynamicLibrary 35 | Unicode 36 | true 37 | v120 38 | 39 | 40 | DynamicLibrary 41 | Unicode 42 | v120 43 | 44 | 45 | DynamicLibrary 46 | Unicode 47 | v120 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | <_ProjectFileVersion>10.0.30319.1 67 | $(SolutionDir)$(Configuration)\ 68 | $(SolutionDir)$(Configuration)\ 69 | $(Configuration)\ 70 | $(Configuration)\ 71 | true 72 | true 73 | $(SolutionDir)$(Configuration)\ 74 | $(SolutionDir)$(Configuration)\ 75 | $(Configuration)\ 76 | $(Configuration)\ 77 | false 78 | false 79 | $(ProjectDir)\openencodeinc;$(AMDAPPSDKROOT)include;$(IncludePath) 80 | $(ProjectDir)\openencodeinc;$(AMDAPPSDKROOT)include;$(IncludePath) 81 | $(ProjectDir)\openencodeinc;$(AMDAPPSDKROOT)include;$(IncludePath) 82 | $(ProjectDir)\openencodeinc;$(AMDAPPSDKROOT)include;$(IncludePath) 83 | 84 | 85 | 86 | Disabled 87 | WIN32;_DEBUG;_WINDOWS;_USRDLL;OPENENCODEVFW_EXPORTS;%(PreprocessorDefinitions) 88 | true 89 | EnableFastChecks 90 | MultiThreadedDebugDLL 91 | Use 92 | Level3 93 | EditAndContinue 94 | StreamingSIMDExtensions2 95 | 96 | 97 | .;%(AdditionalIncludeDirectories) 98 | 99 | 100 | WinMM.lib;psapi.lib;Shlwapi.lib;User32.lib;Gdi32.lib;%(AdditionalDependencies) 101 | openencode.def 102 | true 103 | Windows 104 | MachineX86 105 | %(AdditionalLibraryDirectories) 106 | 107 | 108 | mkdir ..\OpenEncode32 109 | copy $(TargetPath) ..\OpenEncode32 110 | 111 | 112 | 113 | 114 | Disabled 115 | WIN32;_DEBUG;_WINDOWS;_USRDLL;OPENENCODEVFW_EXPORTS;%(PreprocessorDefinitions) 116 | EnableFastChecks 117 | MultiThreadedDebugDLL 118 | Use 119 | Level3 120 | ProgramDatabase 121 | StreamingSIMDExtensions2 122 | .;%(AdditionalIncludeDirectories) 123 | 124 | 125 | WinMM.lib;psapi.lib;Shlwapi.lib;User32.lib;Gdi32.lib;%(AdditionalDependencies) 126 | openencode.def 127 | true 128 | Windows 129 | %(AdditionalLibraryDirectories) 130 | 131 | 132 | mkdir ..\OpenEncode64 133 | copy $(TargetPath) ..\OpenEncode64 134 | 135 | 136 | 137 | 138 | MaxSpeed 139 | true 140 | WIN32;NDEBUG;_WINDOWS;_USRDLL;OPENENCODEVFW_EXPORTS;%(PreprocessorDefinitions) 141 | MultiThreadedDLL 142 | true 143 | Use 144 | Level3 145 | ProgramDatabase 146 | StreamingSIMDExtensions2 147 | Speed 148 | true 149 | true 150 | AnySuitable 151 | true 152 | Async 153 | .;%(AdditionalIncludeDirectories) 154 | 155 | 156 | WinMM.lib;psapi.lib;Shlwapi.lib;User32.lib;Gdi32.lib;%(AdditionalDependencies) 157 | openencode.def 158 | true 159 | Windows 160 | true 161 | true 162 | MachineX86 163 | %(AdditionalLibraryDirectories) 164 | 165 | 166 | mkdir ..\OpenEncode32 167 | copy $(TargetPath) ..\OpenEncode32 168 | 169 | 170 | 171 | 172 | MaxSpeed 173 | true 174 | WIN32;NDEBUG;_WINDOWS;_USRDLL;OPENENCODEVFW_EXPORTS;%(PreprocessorDefinitions) 175 | MultiThreadedDLL 176 | true 177 | Use 178 | Level3 179 | ProgramDatabase 180 | StreamingSIMDExtensions2 181 | .;%(AdditionalIncludeDirectories) 182 | 183 | 184 | WinMM.lib;psapi.lib;Shlwapi.lib;User32.lib;Gdi32.lib;%(AdditionalDependencies) 185 | openencode.def 186 | true 187 | Windows 188 | true 189 | true 190 | %(AdditionalLibraryDirectories) 191 | $(OutDir)$(TargetName)$(TargetExt) 192 | 193 | 194 | mkdir ..\OpenEncode64 195 | copy $(TargetPath) ..\OpenEncode64 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | Create 212 | Create 213 | Create 214 | Create 215 | 216 | 217 | 218 | 219 | Text 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | -------------------------------------------------------------------------------- /OpenEncodeVFW/NV12_kernels.cl: -------------------------------------------------------------------------------- 1 | //BMP is usually upside-down 2 | #define FLIP 3 | //TODO RGB_LIMITED stuff is iffy maybe 4 | //0.062745f <- 16.f / 255.f 5 | //0.501961f <- 128.f / 255.f 6 | 7 | #ifdef BT601_FULL 8 | // Y + 16.f 9 | //#define NO_OFFSET 10 | //#define RGB_LIMITED 11 | //RGB in full range [0...255] 12 | //http://www.mplayerhq.hu/DOCS/tech/colorspaces.txt 13 | #define Ycoeff ((float4)(0.257f, 0.504f, 0.098f, 0.062745f)) 14 | #define Ucoeff ((float4)(-0.148f, -0.291f, 0.439f, 0.501961f)) 15 | #define Vcoeff ((float4)(0.439f, -0.368f, -0.071f, 0.501961f)) 16 | 17 | #define YcoeffB ((float4)(0.098f, 0.504f, 0.257f, 0.062745f)) 18 | #define UcoeffB ((float4)(0.439f, -0.291f, -0.148f, 0.501961f)) 19 | #define VcoeffB ((float4)(-0.071f, -0.368f, 0.439f, 0.501961f)) 20 | #endif 21 | 22 | #ifdef BT601_LIMITED 23 | #define RGB_LIMITED 24 | #define Ycoeff ((float4)(0.299f, 0.587f, 0.114f, 0.f)) 25 | #define Ucoeff ((float4)(-0.14713f, -0.28886f, 0.436f, 0.501961f)) 26 | #define Vcoeff ((float4)(0.615f, -0.51499f, -0.10001f, 0.501961f)) 27 | 28 | //BGR 29 | #define YcoeffB ((float4)(0.114f, 0.587f, 0.299f, 0.f)) 30 | #define UcoeffB ((float4)(0.436f, -0.28886f, -0.14713f, 0.501961f)) 31 | #define VcoeffB ((float4)(-0.10001f, -0.51499f, 0.615f, 0.501961f)) 32 | #endif 33 | 34 | #ifdef BT601_FULL_YCbCr 35 | //RGB 0..255 36 | //YCbCr 0..255 37 | #define Ycoeff ((float4)(0.299f, 0.587f, 0.114f, 0.f)) 38 | #define Ucoeff ((float4)(-0.169f, -0.331f, 0.5f, 0.501961f)) 39 | #define Vcoeff ((float4)(0.5f, -0.419f, -0.081f, 0.501961f)) 40 | 41 | //BGR 42 | #define YcoeffB ((float4)(0.114f, 0.587f, 0.299f, 0.f)) 43 | #define UcoeffB ((float4)(0.5f, -0.331f, -0.169f, 0.501961f)) 44 | #define VcoeffB ((float4)(-0.081f, -0.419f, 0.5f, 0.501961f)) 45 | #endif 46 | 47 | #ifdef BT709_FULL2 48 | // Y + 16.f 49 | //RGB in full range [0...255] 50 | #define Ycoeff ((float4)(0.1826f, 0.6142f, 0.0620f, 0.062745f)) 51 | #define Ucoeff ((float4)(-0.1006f, -0.3386f, 0.4392f, 0.501961f)) 52 | #define Vcoeff ((float4)(0.4392f, -0.3989f, -0.0403f, 0.501961f)) 53 | 54 | //BGR 55 | #define YcoeffB ((float4)(0.0620f, 0.6142f, 0.1826f, 0.062745f)) 56 | #define UcoeffB ((float4)(0.4392f, -0.3386f, -0.1006f, 0.501961f)) 57 | #define VcoeffB ((float4)(-0.0403f, -0.3989f, 0.4392f, 0.501961f)) 58 | #endif 59 | 60 | //#ifdef BT709_ALT2_FULL 61 | #ifdef BT709_FULL 62 | // Y + 16.f, from OBS 63 | //RGB in full range [0...255] 64 | #define Ycoeff ((float4)(0.182586f, 0.614231f, 0.062007f, 0.062745f)) 65 | #define Ucoeff ((float4)(-0.100644f, -0.338572f, 0.439216f, 0.501961f)) 66 | #define Vcoeff ((float4)(0.439216f, -0.398942f, -0.040274f, 0.501961f)) 67 | 68 | //BGR 69 | #define YcoeffB ((float4)(0.062007f, 0.614231f, 0.182586f, 0.062745f)) 70 | #define UcoeffB ((float4)(0.439216f, -0.338572f, -0.100644f, 0.501961f)) 71 | #define VcoeffB ((float4)(-0.040274f, -0.398942f, 0.439216f, 0.501961f)) 72 | #endif 73 | 74 | #ifdef BT709_LIMITED 75 | #define RGB_LIMITED 76 | #define Ycoeff ((float4)(0.2126f, 0.7152f, 0.0722f, 0.f)) 77 | #define Ucoeff ((float4)(-0.09991f, -0.33609f, 0.436f, 0.501961f)) 78 | #define Vcoeff ((float4)(0.615f, -0.55861f, -0.05639f, 0.501961f)) 79 | 80 | //BGR 81 | #define YcoeffB ((float4)(0.0722f, 0.7152f, 0.2126f, 0.f)) 82 | #define UcoeffB ((float4)(0.436f, -0.33609f, -0.09991f, 0.501961f)) 83 | #define VcoeffB ((float4)(-0.05639f, -0.55861f, 0.615f, 0.501961f)) 84 | #endif 85 | 86 | #ifdef BT709_ALT1_LIMITED 87 | //RGB limited to [16...235] 88 | #define RGB_LIMITED 89 | #define Ycoeff ((float4)(0.2126f, 0.7152f, 0.0722f, 0.f)) 90 | #define Ucoeff ((float4)(-0.1146f, -0.3854f, 0.5000f, 0.501961f)) 91 | #define Vcoeff ((float4)(0.5000f, -0.4542f, -0.0468f, 0.501961f)) 92 | 93 | //BGR 94 | #define YcoeffB ((float4)(0.0722f, 0.7152f, 0.2126f, 0.f)) 95 | #define UcoeffB ((float4)(0.5000f, -0.3854f, -0.1146f, 0.501961f)) 96 | #define VcoeffB ((float4)(-0.0468f, -0.4542f, 0.5000f, 0.501961f)) 97 | #endif 98 | 99 | // Convert RGBA format to NV12 100 | __kernel void RGBAtoNV12_Y(__global uchar4 *input, 101 | __global uchar *output, 102 | int alignedWidth, 103 | int offset) 104 | { 105 | int2 id = (int2)(get_global_id(0), get_global_id(1)); 106 | 107 | int width = get_global_size(0); 108 | int height = get_global_size(1); 109 | 110 | float4 rgba = (float4)(convert_float3(input[id.x + width * id.y].xyz), 255.f); 111 | 112 | #ifdef RGB_LIMITED 113 | rgba.xyz = 16.f + rgba.xyz * 219.f / 255.f; 114 | #endif 115 | 116 | uchar Y = convert_uchar_sat_rte(dot(Ycoeff, rgba)); 117 | 118 | #ifdef FLIP 119 | output[id.x + (height- id.y - 1) * alignedWidth] = Y; 120 | #else 121 | output[offset + id.x + id.y * alignedWidth] = Y; 122 | #endif 123 | } 124 | 125 | // Convert only UV from RGBA format to NV12 126 | // Run over half width/height 127 | __kernel void RGBAtoNV12_UV(__global uchar4 *input, 128 | __global uchar *output, 129 | int alignedWidth, 130 | int offset) 131 | { 132 | int2 id = (int2)(get_global_id(0), get_global_id(1)); 133 | 134 | uint width = get_global_size(0) * 2; 135 | uint heightHalf = get_global_size(1); 136 | uint height = get_global_size(1) * 2; 137 | 138 | #ifdef FLIP 139 | uint uv_offset = alignedWidth * height + //Skip Y bytes 140 | (heightHalf - id.y - 1) * alignedWidth + id.x * 2; 141 | #else 142 | uint uv_offset = offset + alignedWidth * height + id.y * alignedWidth + id.x * 2; 143 | #endif 144 | 145 | uint src = id.x * 2 + width * id.y * 2; 146 | 147 | // sample 2x2 square 148 | float4 rgb00 = (float4)(convert_float3(input[src].xyz), 255.f); 149 | float4 rgb01 = (float4)(convert_float3(input[src + 1].xyz), 255.f); 150 | //next line 151 | float4 rgb10 = (float4)(convert_float3(input[src + width].xyz), 255.f); 152 | float4 rgb11 = (float4)(convert_float3(input[src + width + 1].xyz), 255.f); 153 | 154 | #ifdef RGB_LIMITED 155 | rgb00.xyz = 16.f + rgb00.xyz * 219.f / 255.f; 156 | rgb01.xyz = 16.f + rgb01.xyz * 219.f / 255.f; 157 | rgb10.xyz = 16.f + rgb10.xyz * 219.f / 255.f; 158 | rgb11.xyz = 16.f + rgb11.xyz * 219.f / 255.f; 159 | #endif 160 | 161 | float2 UV00 = (float2)(dot(rgb00, Ucoeff), dot(rgb00, Vcoeff)); 162 | float2 UV01 = (float2)(dot(rgb01, Ucoeff), dot(rgb01, Vcoeff)); 163 | float2 UV10 = (float2)(dot(rgb10, Ucoeff), dot(rgb10, Vcoeff)); 164 | float2 UV11 = (float2)(dot(rgb11, Ucoeff), dot(rgb11, Vcoeff)); 165 | 166 | uchar2 UV = convert_uchar2_sat_rte((UV00 + UV01 + UV10 + UV11) / 4); 167 | 168 | output[uv_offset] = UV.x; 169 | output[uv_offset + 1] = UV.y; 170 | } 171 | 172 | __kernel void BGRAtoNV12_Y(const __global uchar4 *input, 173 | __global uchar *output, 174 | int alignedWidth, 175 | int offset) 176 | { 177 | int2 id = (int2)(get_global_id(0), get_global_id(1)); 178 | 179 | int width = get_global_size(0); 180 | int height = get_global_size(1); 181 | 182 | float4 bgra = (float4)(convert_float3(input[id.x + width * id.y].xyz), 255.f); 183 | 184 | #ifdef RGB_LIMITED 185 | bgra.xyz = 16.f + bgra.xyz * 219.f / 255.f; 186 | #endif 187 | 188 | uchar Y = convert_uchar_sat_rte(dot(YcoeffB, bgra)); 189 | 190 | //should use convert_uchar_sat_rte but that seems to slow shit down 191 | #ifdef FLIP 192 | output[id.x + (height- id.y - 1) * alignedWidth] = Y; 193 | #else 194 | output[offset + id.x + id.y * alignedWidth] = Y; 195 | #endif 196 | } 197 | 198 | __kernel void BGRAtoNV12_UV(const __global uchar4 *input, 199 | __global uchar *output, 200 | int alignedWidth, 201 | int offset) 202 | { 203 | int2 id = (int2)(get_global_id(0), get_global_id(1)); 204 | 205 | uint width = get_global_size(0) * 2; 206 | uint src = id.x * 2 + width * id.y * 2; 207 | uint heightHalf = get_global_size(1); 208 | uint height = get_global_size(1) * 2; 209 | 210 | #ifdef FLIP 211 | uint uv_offset = alignedWidth * height + //Skip luma bytes 212 | (heightHalf - id.y - 1) * alignedWidth + id.x * 2; 213 | #else 214 | uint uv_offset = offset + alignedWidth * height + id.y * alignedWidth + id.x * 2; 215 | #endif 216 | 217 | //Seems like no difference between dot() and plain mul/add on GPU atleast 218 | // sample 2x2 square 219 | float4 bgr00 = (float4)(convert_float3(input[src].xyz), 255.f); 220 | float4 bgr01 = (float4)(convert_float3(input[src + 1].xyz), 255.f); 221 | //next line 222 | float4 bgr10 = (float4)(convert_float3(input[src + width].xyz), 255.f); 223 | float4 bgr11 = (float4)(convert_float3(input[src + width + 1].xyz), 255.f); 224 | 225 | #ifdef RGB_LIMITED 226 | bgr00.xyz = 16.f + bgr00.xyz * 219.f / 255.f; 227 | bgr01.xyz = 16.f + bgr01.xyz * 219.f / 255.f; 228 | bgr10.xyz = 16.f + bgr10.xyz * 219.f / 255.f; 229 | bgr11.xyz = 16.f + bgr11.xyz * 219.f / 255.f; 230 | #endif 231 | 232 | float2 UV00 = (float2)(dot(bgr00, UcoeffB), dot(bgr00, VcoeffB)); 233 | float2 UV01 = (float2)(dot(bgr01, UcoeffB), dot(bgr01, VcoeffB)); 234 | float2 UV10 = (float2)(dot(bgr10, UcoeffB), dot(bgr10, VcoeffB)); 235 | float2 UV11 = (float2)(dot(bgr11, UcoeffB), dot(bgr11, VcoeffB)); 236 | 237 | uchar2 UV = convert_uchar2_sat_rte((UV00 + UV01 + UV10 + UV11) / 4); 238 | 239 | output[uv_offset] = UV.x; 240 | output[uv_offset + 1] = UV.y; 241 | } 242 | 243 | // Convert RGB format to NV12. 244 | __kernel void RGBtoNV12_Y(__global uchar *input, 245 | __global uchar *output, 246 | int alignedWidth, 247 | int offset) 248 | { 249 | int2 id = (int2)(get_global_id(0), get_global_id(1)); 250 | 251 | uint width = get_global_size(0); 252 | uint height = get_global_size(1); 253 | 254 | //Unaligned read and probably slooooow 255 | float4 rgba = (float4)(convert_float3(vload3(id.x + width * id.y, input)), 255.0f); 256 | 257 | #ifdef RGB_LIMITED 258 | rgba.xyz = 16.f + rgba.xyz * 219.f / 255.f; 259 | #endif 260 | 261 | uchar Y = convert_uchar_sat_rte(dot(Ycoeff, rgba)); 262 | 263 | #ifdef FLIP 264 | output[id.x + (height- id.y - 1) * alignedWidth] = Y; 265 | #else 266 | output[offset + id.x + id.y * alignedWidth] = Y; 267 | #endif 268 | } 269 | 270 | // Convert only UV from RGB format to NV12 271 | // Run over half width/height 272 | __kernel void RGBtoNV12_UV(__global uchar *input, 273 | __global uchar *output, 274 | int alignedWidth, 275 | int offset) 276 | { 277 | int2 id = (int2)(get_global_id(0), get_global_id(1)); 278 | 279 | uint width = get_global_size(0) * 2; 280 | uint heightHalf = get_global_size(1); 281 | uint height = get_global_size(1) * 2; 282 | 283 | #ifdef FLIP 284 | uint uv_offset = alignedWidth * height + //Skip luma bytes 285 | (heightHalf - id.y - 1) * alignedWidth + id.x * 2; 286 | #else 287 | uint uv_offset = offset + alignedWidth * height + id.y * alignedWidth + id.x * 2; 288 | #endif 289 | 290 | uint src = id.x * 2 + width * id.y * 2; 291 | 292 | // sample 2x2 square 293 | float4 rgb00 = (float4)(convert_float3(vload3(src, input)), 255.0f); 294 | float4 rgb01 = (float4)(convert_float3(vload3(src + 1, input)), 255.0f); 295 | //next line 296 | float4 rgb10 = (float4)(convert_float3(vload3(src + width, input)), 255.0f); 297 | float4 rgb11 = (float4)(convert_float3(vload3(src + width + 1, input)), 255.0f); 298 | 299 | #ifdef RGB_LIMITED 300 | rgb00.xyz = 16.f + rgb00.xyz * 219.f / 255.f; 301 | rgb01.xyz = 16.f + rgb01.xyz * 219.f / 255.f; 302 | rgb10.xyz = 16.f + rgb10.xyz * 219.f / 255.f; 303 | rgb11.xyz = 16.f + rgb11.xyz * 219.f / 255.f; 304 | #endif 305 | 306 | float2 UV00 = (float2)(dot(rgb00, Ucoeff), dot(rgb00, Vcoeff)); 307 | float2 UV01 = (float2)(dot(rgb01, Ucoeff), dot(rgb01, Vcoeff)); 308 | float2 UV10 = (float2)(dot(rgb10, Ucoeff), dot(rgb10, Vcoeff)); 309 | float2 UV11 = (float2)(dot(rgb11, Ucoeff), dot(rgb11, Vcoeff)); 310 | 311 | uchar2 UV = convert_uchar2_sat_rte((UV00 + UV01 + UV10 + UV11) / 4); 312 | 313 | output[uv_offset] = UV.x; 314 | output[uv_offset + 1] = UV.y; 315 | } 316 | 317 | __kernel void BGRtoNV12_Y(__global uchar *input, 318 | __global uchar *output, 319 | int alignedWidth, 320 | int offset) 321 | { 322 | int2 id = (int2)(get_global_id(0), get_global_id(1)); 323 | 324 | uint width = get_global_size(0); 325 | uint height = get_global_size(1); 326 | 327 | //Unaligned read and probably slooooow 328 | float4 bgra = (float4)(convert_float3(vload3(id.x + width * id.y, input)), 255.0f); 329 | 330 | #ifdef RGB_LIMITED 331 | bgra.xyz = 16.f + bgra.xyz * 219.f / 255.f; 332 | #endif 333 | 334 | uchar Y = convert_uchar_sat_rte(dot(YcoeffB, bgra)); 335 | 336 | #ifdef FLIP 337 | output[id.x + (height- id.y - 1) * alignedWidth] = Y; 338 | #else 339 | output[offset + id.x + id.y * alignedWidth] = Y; 340 | #endif 341 | } 342 | 343 | // Run over half width/height 344 | __kernel void BGRtoNV12_UV(__global uchar *input, 345 | __global uchar *output, 346 | int alignedWidth, 347 | int offset) 348 | { 349 | int2 id = (int2)(get_global_id(0), get_global_id(1)); 350 | 351 | uint width = get_global_size(0) * 2; 352 | uint heightHalf = get_global_size(1); 353 | uint height = get_global_size(1) * 2; 354 | 355 | #ifdef FLIP 356 | uint uv_offset = alignedWidth * height + //Skip luma bytes 357 | (heightHalf - id.y - 1) * alignedWidth + id.x * 2; 358 | #else 359 | uint uv_offset = offset + alignedWidth * height + id.y * alignedWidth + id.x * 2; 360 | #endif 361 | 362 | uint src = id.x * 2 + width * id.y * 2; 363 | 364 | // sample 2x2 square 365 | float4 bgr00 = (float4)(convert_float3(vload3(src, input)), 255.0f); 366 | float4 bgr01 = (float4)(convert_float3(vload3(src + 1, input)), 255.0f); 367 | //next line 368 | float4 bgr10 = (float4)(convert_float3(vload3(src + width, input)), 255.0f); 369 | float4 bgr11 = (float4)(convert_float3(vload3(src + width + 1, input)), 255.0f); 370 | 371 | #ifdef RGB_LIMITED 372 | bgr00.xyz = 16.f + bgr00.xyz * 219.f / 255.f; 373 | bgr01.xyz = 16.f + bgr01.xyz * 219.f / 255.f; 374 | bgr10.xyz = 16.f + bgr10.xyz * 219.f / 255.f; 375 | bgr11.xyz = 16.f + bgr11.xyz * 219.f / 255.f; 376 | #endif 377 | 378 | float2 UV00 = (float2)(dot(bgr00, UcoeffB), dot(bgr00, VcoeffB)); 379 | float2 UV01 = (float2)(dot(bgr01, UcoeffB), dot(bgr01, VcoeffB)); 380 | float2 UV10 = (float2)(dot(bgr10, UcoeffB), dot(bgr10, VcoeffB)); 381 | float2 UV11 = (float2)(dot(bgr11, UcoeffB), dot(bgr11, VcoeffB)); 382 | 383 | uchar2 UV = convert_uchar2_sat_rte((UV00 + UV01 + UV10 + UV11) / 4); 384 | 385 | output[uv_offset] = UV.x; 386 | output[uv_offset + 1] = UV.y; 387 | } 388 | 389 | //AMD openCL frontend adds gibberish at the end, so add a comment here to ... comment it. Mind the editors that append new line (\n). 390 | // -------------------------------------------------------------------------------- /OpenEncodeVFW/OpenEncodeVFW.h: -------------------------------------------------------------------------------- 1 | #ifndef _MAIN_HEADER 2 | #define _MAIN_HEADER 3 | 4 | #include "OVEncodeDyn.h" 5 | #include "OVEncodeTypes.h" 6 | #include "CL\cl.h" 7 | #include "OvEncodeTypedef.h" 8 | #include "perf.h" 9 | #include "bitstream.h" 10 | #include "colorspace.h" 11 | #include "clconvert.h" 12 | 13 | using namespace std; 14 | 15 | #ifdef OPENENCODEVFW_EXPORTS 16 | #define OPENENCODEVFW_API __declspec(dllexport) 17 | #else 18 | #define OPENENCODEVFW_API __declspec(dllimport) 19 | #endif 20 | 21 | // y must be 2^n 22 | #define align_round(x,y) ((((unsigned int)(x))+(y-1))&(~(y-1))) 23 | 24 | #define return_badformat() return (DWORD)ICERR_BADFORMAT; 25 | 26 | #define MIN(a, b) (((a)<(b)) ? (a) : (b)) 27 | #define MAX(a, b) (((a)>(b)) ? (a) : (b)) 28 | #define CLIP(v, min, max) (((v)<(min)) ? (min) : ((v)>(max)) ? (max) : (v)) 29 | 30 | /* Registry */ 31 | #define OVE_REG_KEY HKEY_CURRENT_USER 32 | #define OVE_REG_PARENT L"Software" 33 | #define OVE_REG_CHILD L"OpenEncodeVFW" 34 | #define OVE_REG_CLASS L"config" 35 | 36 | extern CRITICAL_SECTION ove_CS; 37 | 38 | typedef struct _OVConfigCtrl 39 | { 40 | uint32 height; 41 | uint32 width; 42 | OVE_ENCODE_MODE encodeMode; 43 | 44 | OVE_PROFILE_LEVEL profileLevel; /**< Profile Level */ 45 | 46 | OVE_PICTURE_FORMAT pictFormat; /**< Profile format */ 47 | OVE_ENCODE_TASK_PRIORITY priority; /**< priority settings */ 48 | 49 | OVE_CONFIG_PICTURE_CONTROL pictControl; /**< Picture control */ 50 | OVE_CONFIG_RATE_CONTROL rateControl; /**< Rate contorl config */ 51 | OVE_CONFIG_MOTION_ESTIMATION meControl; /**< Motion Estimation settings */ 52 | OVE_CONFIG_RDO rdoControl; /**< Rate distorsion optimization control*/ 53 | } OvConfigCtrl, far * pConfig; 54 | 55 | /******************************************************************************/ 56 | /* Input surface used for encoder */ 57 | /******************************************************************************/ 58 | #define MAX_INPUT_SURFACE 1 //VFW can't do multiple encodes at once anyway? 59 | 60 | typedef struct OVDeviceHandle 61 | { 62 | ovencode_device_info *deviceInfo; /**< Pointer to device info */ 63 | uint32 numDevices; /**< Number of devices available */ 64 | cl_platform_id platform; /**< Platform */ 65 | }OVDeviceHandle; 66 | 67 | /******************************************************************************/ 68 | /* Encoder Handle for sharing context between create process and destroy */ 69 | /******************************************************************************/ 70 | typedef struct OVEncodeHandle 71 | { 72 | ove_session session; /**< Pointer to encoder session */ 73 | OPMemHandle inputSurfaces[MAX_INPUT_SURFACE]; /**< input buffer */ 74 | cl_command_queue clCmdQueue; /**< command queue */ 75 | }OVEncodeHandle; 76 | 77 | typedef std::map DeviceMap; 78 | 79 | /* Return the maximum number of bytes a single compressed frame can occupy */ 80 | LRESULT x264vfw_compress_get_size(LPBITMAPINFOHEADER lpbiOut); 81 | bool isH264iFrame(int8 *frame); 82 | void ConvertRGB24toYV12_SSE2(const uint8 *src, uint8 *ydest, uint8 *udest, uint8 *vdest, unsigned int w, unsigned int h); 83 | void ConvertRGB32toYV12_SSE2(const uint8 *src, uint8 *ydest, uint8 *udest, uint8 *vdest, unsigned int w, unsigned int h); 84 | void ff_rgb24toyv12_c(const uint8 *src, uint8 *ydst, uint8 *udst, 85 | uint8 *vdst, int width, int height, int lumStride, 86 | int chromStride, int srcStride, int32 *rgb2yuv); 87 | void BGRtoNV12(const uint8 * rgb, 88 | uint8 * yuv, 89 | unsigned rgbIncrement, 90 | uint8 flip, uint8 isBGR, 91 | int srcFrameWidth, int srcFrameHeight, uint32 alignedWidth); 92 | 93 | class CodecInst { 94 | public: 95 | Logger *mLog; 96 | bool mMsgBox; 97 | bool mWarnedBuggy; 98 | FILE* mRaw; 99 | int started; //if the codec has been properly initalized yet 100 | 101 | unsigned int mLength; 102 | unsigned int mWidth; 103 | unsigned int mHeight; 104 | unsigned int mFormat; //input format for compressing, output format for decompression. Also the bitdepth. 105 | unsigned int mCompression; 106 | uint32 mAlignedSurfaceWidth; 107 | uint32 mAlignedSurfaceHeight; 108 | int32 mHostPtrSize; 109 | int32 mIDRFrames; 110 | bool mProfKernels; 111 | 112 | unsigned int mCompressed_size; 113 | clConvert *mCLConvert; 114 | bool mUseCLConv; // Use openCL on GPU for rgb-to-nv12 or just cpu 115 | bool mUseCPU; // Use openCL on CPU for RGB to NV12 conversion 116 | bool mDialogUpdated; // Used with configuration dialog to avoid loop-de-loops 117 | 118 | /* ICM_COMPRESS_FRAMES_INFO params */ 119 | int frame_total; 120 | uint32 fps_num; 121 | uint32 fps_den; 122 | uint32 mFrameNum; //may overflow, don't care (maybe VFW does) 123 | 124 | cl_device_id clDeviceID; 125 | 126 | cl_context mCpuCtx; 127 | cl_device_id mCpuDev; 128 | cl_command_queue mCpuCmdQueue; 129 | 130 | /**************************************************************************/ 131 | /* Create profile counters */ 132 | /**************************************************************************/ 133 | //OVprofile perfCounter; 134 | OVDeviceHandle mDeviceHandle; 135 | OPContextHandle mOveContext; 136 | /**************************************************************************/ 137 | /* Create encoder handle */ 138 | /**************************************************************************/ 139 | OVEncodeHandle mEncodeHandle; 140 | 141 | OVprofile mProfile; 142 | 143 | bool status; 144 | 145 | /**************************************************************************/ 146 | /* Currently the OpenEncode support is only for vista and w7 */ 147 | /**************************************************************************/ 148 | bool isVistaOrNewer; 149 | OvConfigCtrl mConfigCtrl; 150 | map mConfigTable; 151 | DeviceMap mDevList; 152 | 153 | //H264 ES parser 154 | Parser *mParser; 155 | bool mHasIDR; 156 | 157 | CodecInst(); 158 | ~CodecInst(); 159 | 160 | DWORD GetState(LPVOID pv, DWORD dwSize); 161 | DWORD SetState(LPVOID pv, DWORD dwSize); 162 | DWORD Configure(HWND hwnd); 163 | DWORD GetInfo(ICINFO* icinfo, DWORD dwSize); 164 | 165 | DWORD CompressQuery(LPBITMAPINFOHEADER lpbiIn, LPBITMAPINFOHEADER lpbiOut); 166 | DWORD CompressGetFormat(LPBITMAPINFOHEADER lpbiIn, LPBITMAPINFOHEADER lpbiOut); 167 | DWORD CompressBegin(LPBITMAPINFOHEADER lpbiIn, LPBITMAPINFOHEADER lpbiOut); 168 | DWORD CompressGetSize(LPBITMAPINFOHEADER lpbiIn, LPBITMAPINFOHEADER lpbiOut); 169 | DWORD Compress(ICCOMPRESS* icinfo, DWORD dwSize); 170 | DWORD CompressEnd(); 171 | DWORD CompressFramesInfo(ICCOMPRESSFRAMES *); 172 | 173 | DWORD DecompressQuery(LPBITMAPINFOHEADER lpbiIn, LPBITMAPINFOHEADER lpbiOut); 174 | DWORD DecompressGetFormat(LPBITMAPINFOHEADER lpbiIn, LPBITMAPINFOHEADER lpbiOut); 175 | DWORD DecompressBegin(LPBITMAPINFOHEADER lpbiIn, LPBITMAPINFOHEADER lpbiOut); 176 | DWORD Decompress(ICDECOMPRESS* icinfo, DWORD dwSize); 177 | DWORD DecompressGetPalette(LPBITMAPINFOHEADER lpbiIn, LPBITMAPINFOHEADER lpbiOut); 178 | DWORD DecompressEnd(); 179 | 180 | BOOL QueryConfigure(); 181 | 182 | void LogMsg(bool msgBox, const wchar_t *psz_fmt, ...); 183 | 184 | bool readRegistry(); 185 | bool saveRegistry(); 186 | void quickSet(int qs); 187 | 188 | DeviceMap getDeviceList(); 189 | bool createCPUContext(cl_platform_id platform); 190 | 191 | /** 192 | ******************************************************************************* 193 | * @fn prepareConfigMap 194 | * @brief configuration mapping table, used for mapping values from user 195 | * configuration to config control structure 196 | * 197 | * @param[in/out] pConfigTable : Pointer to the configuration map table 198 | * 199 | * @return bool : true if successful; otherwise false. 200 | ******************************************************************************* 201 | */ 202 | void prepareConfigMap(bool quickset = false); 203 | 204 | /** 205 | ******************************************************************************* 206 | * @fn readConfigFile 207 | * @brief Reading in user-specified configuration file 208 | * 209 | * @param[in] fileName : user specified configuration file name 210 | * @param[in/out] pConfig : Pointer to the configuration structure 211 | * @param[in/out] pConfigTable : Pointer to the configuration map table 212 | * 213 | * @return bool : true if successful; otherwise false. 214 | ******************************************************************************* 215 | */ 216 | bool readConfigFile(int8 *fileName, OvConfigCtrl *pConfig, 217 | std::map* pConfigTable); 218 | /** 219 | ******************************************************************************* 220 | * @fn encodeSetParam 221 | * @brief Setting up configuration parameters 222 | * 223 | * @param[in/out] pConfig : Pointer to the configuration structure 224 | * @param[in] pConfigTable : Pointer to the configuration map table 225 | * 226 | * @return void 227 | ******************************************************************************* 228 | */ 229 | void encodeSetParam(OvConfigCtrl *pConfig, std::map* pConfigTable); 230 | 231 | /** 232 | ******************************************************************************* 233 | * @fn setEncodeConfig 234 | * @brief This function sets the encoder configuration by using user 235 | * supplied configuration information from .cfg file 236 | * 237 | * @param[in] session : Encoder session for which encoder configuration to be 238 | * set 239 | * @param[in] pConfig : pointer to the user configuration from .cfg file 240 | * 241 | * @return bool : true if successful; otherwise false. 242 | ******************************************************************************* 243 | */ 244 | bool setEncodeConfig(ove_session session, OvConfigCtrl *pConfig); 245 | 246 | /** 247 | ******************************************************************************* 248 | * @fn getDevice 249 | * @brief returns the platform and devices found 250 | * 251 | * @param[in/out] deviceHandle : Hanlde for the device information 252 | * 253 | * @return bool : true if successful; otherwise false. 254 | ******************************************************************************* 255 | */ 256 | bool getDevice(OVDeviceHandle *deviceHandle); 257 | 258 | /** 259 | ******************************************************************************* 260 | * @fn getDeviceCap 261 | * @brief This function returns the device capabilities. 262 | * 263 | * @param[in] oveContext : Encoder context 264 | * @param[in] oveDeviceID : Device ID 265 | * @param[out] encodeCaps : pointer to encoder capabilities structure 266 | * 267 | * @return bool : true if successful; otherwise false. 268 | ******************************************************************************* 269 | */ 270 | bool getDeviceCap(OPContextHandle oveContext,uint32 oveDeviceID, 271 | OVE_ENCODE_CAPS *encodeCaps); 272 | 273 | /** 274 | ******************************************************************************* 275 | * @fn getDeviceInfo 276 | * @brief returns device information 277 | * 278 | * @param[out] deviceInfo : Device info 279 | * @param[out] numDevices : Number of devices present 280 | * 281 | * @return bool : true if successful; otherwise false. 282 | ******************************************************************************* 283 | */ 284 | bool getDeviceInfo(ovencode_device_info **deviceInfo, uint32 *numDevices); 285 | 286 | /** 287 | ******************************************************************************* 288 | * @fn gpuCheck 289 | * @brief Checks for GPU present or not 290 | * 291 | * @param[in] platform : Platform id 292 | * @param[out] dType : Device type returned GPU/CPU 293 | * 294 | * @return bool : true if successful; otherwise false. 295 | ******************************************************************************* 296 | */ 297 | bool gpuCheck(cl_platform_id platform,cl_device_type* dType); 298 | 299 | /** 300 | ******************************************************************************* 301 | * @fn getPlatform 302 | * @brief Get platform to run 303 | * 304 | * @param[in] platform : Platform id 305 | * 306 | * @return bool : true if successful; otherwise false. 307 | ******************************************************************************* 308 | */ 309 | bool getPlatform(cl_platform_id &platform); 310 | 311 | /** 312 | ******************************************************************************* 313 | * @fn yuvToNV12 314 | * @brief Read yuv video file and converts it to NV12 format 315 | * 316 | * @param[in] fr : pointer to the input picture data 317 | * @param[in] uiHeight : video frame height 318 | * @param[in] uiWidth : video frame width 319 | * @param[in] alignedSurfaceWidth : aligned frame width 320 | * @param[out] *pBitstreamData : input surface buffer pointer 321 | * 322 | * @return bool : true if successful; otherwise false. 323 | ******************************************************************************* 324 | */ 325 | 326 | bool yuvToNV12(const uint8 *inData, uint32 uiHeight, uint32 uiWidth, 327 | uint32 alignedSurfaceWidth, int8 *pBitstreamData); 328 | bool yv12ToNV12(const uint8 *inData, uint32 uiHeight, uint32 uiWidth, 329 | uint32 alignedSurfaceWidth, int8 *pBitstreamData); 330 | 331 | bool nv12ToNV12Aligned(const uint8 *inData, uint32 uiHeight, uint32 uiWidth, 332 | uint32 alignedSurfaceWidth, int8 *pBitstreamData); 333 | /** 334 | ******************************************************************************* 335 | * @fn encodeCreate 336 | * @brief Creates encoder context 337 | * 338 | * @param[in/out] oveContext : Hanlde to the encoder context 339 | * @param[in] deviceID : Device on which encoder context to be created 340 | * @param[in] deviceHandle : Hanlde for the device information 341 | * 342 | * @return bool : true if successful; otherwise false. 343 | ******************************************************************************* 344 | */ 345 | bool encodeCreate(OPContextHandle *oveContext,uint32 deviceId, 346 | OVDeviceHandle *deviceHandle); 347 | 348 | /** 349 | ******************************************************************************* 350 | * @fn encodeOpen 351 | * @brief Creates encoder session, buffers and initilizes 352 | * configuration for the encoder session 353 | * 354 | * @param[in/out] encodeHandle : Hanlde to the encoder instance 355 | * @param[in] oveContext : Hanlde to the encoder context 356 | * @param[in] deviceID : Device on which encoder context to be created 357 | * @param[in] deviceHandle : Hanlde for the device information 358 | * 359 | * @return bool : true if successful; otherwise false. 360 | ******************************************************************************* 361 | */ 362 | bool encodeOpen(OVEncodeHandle *encodeHandle,OPContextHandle oveContext, 363 | uint32 deviceId,OvConfigCtrl *pConfig); 364 | 365 | /** 366 | ******************************************************************************* 367 | * @fn encodeProcess 368 | * @brief Encode an input video file and output encoded H.264 video file 369 | * 370 | * @param[in] encodeHandle : Hanlde for the encoder 371 | * @param[in] inFile : input video file to be encoded 372 | * @param[out] outFile : output encoded H.264 video file 373 | * @param[in] pConfig : pointer to custom configuration setting file 374 | * @param[out] profileCnt : pointer to profile couters 375 | * 376 | * @return bool : true if successful; otherwise false. 377 | ******************************************************************************* 378 | */ 379 | bool encodeProcess(OVEncodeHandle *encodeHandle, const uint8 *inData, uint8 *outData, DWORD buf_size, 380 | OvConfigCtrl *pConfig/*, OVprofile *profileCnt*/); 381 | 382 | /** 383 | ******************************************************************************* 384 | * @fn encodeClose 385 | * @brief This function destroys the resources used by the encoder session 386 | * 387 | * @param[in] encodeHandle : Handle for the encoder context 388 | * 389 | * @return bool : true if successful; otherwise false. 390 | ******************************************************************************* 391 | */ 392 | bool encodeClose(OVEncodeHandle *encodeHandle); 393 | 394 | /** 395 | ******************************************************************************* 396 | * @fn encodeDestroy 397 | * @brief Destroy encoder context 398 | * 399 | * @param[in] oveContext : Handle for the encoder context 400 | * 401 | * @return bool : true if successful; otherwise false. 402 | ******************************************************************************* 403 | */ 404 | bool encodeDestroy(OPContextHandle oveContext); 405 | 406 | /** 407 | ******************************************************************************* 408 | * @fn waitForEvent 409 | * @brief This function waits for the event completion 410 | * 411 | * @param[in] inMapEvt : Event for which it has to wait for completion 412 | * 413 | * @return bool : true if successful; otherwise false. 414 | ******************************************************************************* 415 | */ 416 | void waitForEvent(cl_event inMapEvt); 417 | 418 | }; 419 | 420 | CodecInst* Open(ICOPEN* icinfo); 421 | DWORD Close(CodecInst* pinst); 422 | 423 | #endif -------------------------------------------------------------------------------- /OpenEncodeVFW/clconvert.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "clconvert.h" 3 | 4 | //void error(std::string err) 5 | //{ 6 | // std::cerr << err << std::endl; 7 | //} 8 | 9 | const char* 10 | getOpenCLErrorCodeStr(std::string input) 11 | { 12 | return "unknown error code"; 13 | } 14 | 15 | template 16 | const char* 17 | getOpenCLErrorCodeStr(T input) 18 | { 19 | int errorCode = (int)input; 20 | switch (errorCode) 21 | { 22 | case CL_DEVICE_NOT_FOUND: 23 | return "CL_DEVICE_NOT_FOUND"; 24 | case CL_DEVICE_NOT_AVAILABLE: 25 | return "CL_DEVICE_NOT_AVAILABLE"; 26 | case CL_COMPILER_NOT_AVAILABLE: 27 | return "CL_COMPILER_NOT_AVAILABLE"; 28 | case CL_MEM_OBJECT_ALLOCATION_FAILURE: 29 | return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; 30 | case CL_OUT_OF_RESOURCES: 31 | return "CL_OUT_OF_RESOURCES"; 32 | case CL_OUT_OF_HOST_MEMORY: 33 | return "CL_OUT_OF_HOST_MEMORY"; 34 | case CL_PROFILING_INFO_NOT_AVAILABLE: 35 | return "CL_PROFILING_INFO_NOT_AVAILABLE"; 36 | case CL_MEM_COPY_OVERLAP: 37 | return "CL_MEM_COPY_OVERLAP"; 38 | case CL_IMAGE_FORMAT_MISMATCH: 39 | return "CL_IMAGE_FORMAT_MISMATCH"; 40 | case CL_IMAGE_FORMAT_NOT_SUPPORTED: 41 | return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; 42 | case CL_BUILD_PROGRAM_FAILURE: 43 | return "CL_BUILD_PROGRAM_FAILURE"; 44 | case CL_MAP_FAILURE: 45 | return "CL_MAP_FAILURE"; 46 | case CL_MISALIGNED_SUB_BUFFER_OFFSET: 47 | return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; 48 | case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: 49 | return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; 50 | case CL_INVALID_VALUE: 51 | return "CL_INVALID_VALUE"; 52 | case CL_INVALID_DEVICE_TYPE: 53 | return "CL_INVALID_DEVICE_TYPE"; 54 | case CL_INVALID_PLATFORM: 55 | return "CL_INVALID_PLATFORM"; 56 | case CL_INVALID_DEVICE: 57 | return "CL_INVALID_DEVICE"; 58 | case CL_INVALID_CONTEXT: 59 | return "CL_INVALID_CONTEXT"; 60 | case CL_INVALID_QUEUE_PROPERTIES: 61 | return "CL_INVALID_QUEUE_PROPERTIES"; 62 | case CL_INVALID_COMMAND_QUEUE: 63 | return "CL_INVALID_COMMAND_QUEUE"; 64 | case CL_INVALID_HOST_PTR: 65 | return "CL_INVALID_HOST_PTR"; 66 | case CL_INVALID_MEM_OBJECT: 67 | return "CL_INVALID_MEM_OBJECT"; 68 | case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: 69 | return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; 70 | case CL_INVALID_IMAGE_SIZE: 71 | return "CL_INVALID_IMAGE_SIZE"; 72 | case CL_INVALID_SAMPLER: 73 | return "CL_INVALID_SAMPLER"; 74 | case CL_INVALID_BINARY: 75 | return "CL_INVALID_BINARY"; 76 | case CL_INVALID_BUILD_OPTIONS: 77 | return "CL_INVALID_BUILD_OPTIONS"; 78 | case CL_INVALID_PROGRAM: 79 | return "CL_INVALID_PROGRAM"; 80 | case CL_INVALID_PROGRAM_EXECUTABLE: 81 | return "CL_INVALID_PROGRAM_EXECUTABLE"; 82 | case CL_INVALID_KERNEL_NAME: 83 | return "CL_INVALID_KERNEL_NAME"; 84 | case CL_INVALID_KERNEL_DEFINITION: 85 | return "CL_INVALID_KERNEL_DEFINITION"; 86 | case CL_INVALID_KERNEL: 87 | return "CL_INVALID_KERNEL"; 88 | case CL_INVALID_ARG_INDEX: 89 | return "CL_INVALID_ARG_INDEX"; 90 | case CL_INVALID_ARG_VALUE: 91 | return "CL_INVALID_ARG_VALUE"; 92 | case CL_INVALID_ARG_SIZE: 93 | return "CL_INVALID_ARG_SIZE"; 94 | case CL_INVALID_KERNEL_ARGS: 95 | return "CL_INVALID_KERNEL_ARGS"; 96 | case CL_INVALID_WORK_DIMENSION: 97 | return "CL_INVALID_WORK_DIMENSION"; 98 | case CL_INVALID_WORK_GROUP_SIZE: 99 | return "CL_INVALID_WORK_GROUP_SIZE"; 100 | case CL_INVALID_WORK_ITEM_SIZE: 101 | return "CL_INVALID_WORK_ITEM_SIZE"; 102 | case CL_INVALID_GLOBAL_OFFSET: 103 | return "CL_INVALID_GLOBAL_OFFSET"; 104 | case CL_INVALID_EVENT_WAIT_LIST: 105 | return "CL_INVALID_EVENT_WAIT_LIST"; 106 | case CL_INVALID_EVENT: 107 | return "CL_INVALID_EVENT"; 108 | case CL_INVALID_OPERATION: 109 | return "CL_INVALID_OPERATION"; 110 | case CL_INVALID_GL_OBJECT: 111 | return "CL_INVALID_GL_OBJECT"; 112 | case CL_INVALID_BUFFER_SIZE: 113 | return "CL_INVALID_BUFFER_SIZE"; 114 | case CL_INVALID_MIP_LEVEL: 115 | return "CL_INVALID_MIP_LEVEL"; 116 | case CL_INVALID_GLOBAL_WORK_SIZE: 117 | return "CL_INVALID_GLOBAL_WORK_SIZE"; 118 | default: 119 | return "unknown error code"; 120 | } 121 | 122 | return "unknown error code"; 123 | } 124 | 125 | template 126 | int clConvert::checkVal( 127 | T input, 128 | T reference, 129 | std::string message, 130 | bool isAPIerror) 131 | { 132 | if (input == reference) 133 | { 134 | return SUCCESS; 135 | } 136 | else 137 | { 138 | if (isAPIerror) 139 | { 140 | //std::cout<<"Error: "<< message << " Error code : "; 141 | //std::cout << getOpenCLErrorCodeStr(input) << std::endl; 142 | // TODO unicode vs ansi 143 | mLog->Log(L"Error: %S Error code: %S\n", message.c_str(), getOpenCLErrorCodeStr(input)); 144 | } 145 | else 146 | //error(message); 147 | mLog->Log(L"%S", message); 148 | return FAILURE; 149 | } 150 | } 151 | 152 | int clConvert::waitForEventAndRelease(cl_event *event) 153 | { 154 | cl_int status = CL_SUCCESS; 155 | cl_int eventStatus = CL_QUEUED; 156 | while (eventStatus != CL_COMPLETE) 157 | { 158 | status = f_clGetEventInfo( 159 | *event, 160 | CL_EVENT_COMMAND_EXECUTION_STATUS, 161 | sizeof(cl_int), 162 | &eventStatus, 163 | NULL); 164 | CHECK_OPENCL_ERROR(status, "clGetEventEventInfo Failed with Error Code:"); 165 | } 166 | 167 | status = f_clReleaseEvent(*event); 168 | CHECK_OPENCL_ERROR(status, "clReleaseEvent Failed with Error Code:"); 169 | 170 | return SUCCESS; 171 | } 172 | 173 | void clConvert::Cleanup_OpenCL() 174 | { 175 | if (g_inputBuffer[0]) { f_clReleaseMemObject(g_inputBuffer[0]); g_inputBuffer[0] = NULL; } 176 | if (g_inputBuffer[1]) { f_clReleaseMemObject(g_inputBuffer[1]); g_inputBuffer[1] = NULL; } 177 | 178 | if (g_outputBuffer) { f_clReleaseMemObject(g_outputBuffer); g_outputBuffer = NULL; } 179 | if (g_y_kernel) { f_clReleaseKernel(g_y_kernel); g_y_kernel = NULL; } 180 | if (g_uv_kernel) { f_clReleaseKernel(g_uv_kernel); g_uv_kernel = NULL; } 181 | if (g_program) { f_clReleaseProgram(g_program); g_program = NULL; } 182 | //if( g_cmd_queue ) {f_clReleaseCommandQueue( g_cmd_queue ); g_cmd_queue = NULL;} 183 | //if( g_context ) {f_clReleaseContext( g_context ); g_context = NULL;} 184 | if (host_ptr) { free(host_ptr); host_ptr = NULL; } 185 | if (hRaw) { fclose(hRaw); hRaw = NULL; } 186 | } 187 | 188 | //Unused 189 | int clConvert::setupCL() 190 | { 191 | g_cmd_queue = f_clCreateCommandQueue(g_context, deviceID, 0, NULL); 192 | if (g_cmd_queue == (cl_command_queue)0) 193 | { 194 | Cleanup_OpenCL(); 195 | return FAILURE; 196 | } 197 | return SUCCESS; 198 | } 199 | 200 | 201 | extern HMODULE hmoduleVFW; 202 | int clConvert::createKernels(COLORMATRIX matrix) 203 | { 204 | cl_int status; 205 | // create a CL program using the kernel source, load it from resource 206 | char* source; 207 | HRSRC hResource = FindResourceExA(hmoduleVFW, "STRING", 208 | MAKEINTRESOURCEA(IDR_OPENCL_KERNELS), 209 | MAKELANGID(LANG_NEUTRAL, 210 | SUBLANG_DEFAULT)); 211 | 212 | if (hResource != NULL) 213 | { 214 | source = (char*)LoadResource(hmoduleVFW, hResource); 215 | } 216 | size_t sourceSize[] = { strlen(source) }; 217 | g_program = f_clCreateProgramWithSource(g_context, 218 | 1, 219 | (const char**)&source, 220 | sourceSize, 221 | &status); 222 | //free(source); 223 | CHECK_OPENCL_ERROR(status, "clCreateProgramWithSource failed."); 224 | 225 | std::string flagsStr(""); //"-save-temps" 226 | if (mOptimize) 227 | flagsStr.append("-cl-single-precision-constant -cl-mad-enable -cl-fast-relaxed-math -cl-unsafe-math-optimizations "); 228 | 229 | switch (matrix) 230 | { 231 | case BT601_LIMITED: 232 | flagsStr.append("-DBT601_LIMITED "); 233 | break; 234 | case BT601_FULL: 235 | flagsStr.append("-DBT601_FULL "); 236 | break; 237 | case BT601_FULL_YCbCr: 238 | flagsStr.append("-DBT601_FULL_YCbCr "); 239 | break; 240 | case BT709_LIMITED: 241 | flagsStr.append("-DBT709_LIMITED "); 242 | break; 243 | case BT709_FULL: 244 | flagsStr.append("-DBT709_FULL "); 245 | break; 246 | case BT709_ALT1_LIMITED: 247 | flagsStr.append("-DBT709_ALT1_LIMITED "); 248 | break; 249 | /*case BT709_ALT1_FULL: 250 | flagsStr.append("-DBT709_ALT1_FULL "); 251 | break;*/ 252 | default: 253 | flagsStr.append("-DBT601_LIMITED "); 254 | break; 255 | } 256 | 257 | if (flagsStr.size() != 0) 258 | mLog->Log(L"Build Options are : %S\n", flagsStr.c_str()); 259 | 260 | 261 | /* create a cl program executable for all the devices specified */ 262 | status = f_clBuildProgram(g_program, 263 | 1, 264 | &deviceID, 265 | flagsStr.c_str(), 266 | NULL, 267 | NULL); 268 | if (status != CL_SUCCESS) 269 | { 270 | if (status == CL_BUILD_PROGRAM_FAILURE) 271 | { 272 | cl_int logStatus; 273 | char * buildLog = NULL; 274 | size_t buildLogSize = 0; 275 | logStatus = f_clGetProgramBuildInfo(g_program, 276 | deviceID, 277 | CL_PROGRAM_BUILD_LOG, 278 | buildLogSize, 279 | buildLog, 280 | &buildLogSize); 281 | CHECK_OPENCL_ERROR(logStatus, "clGetProgramBuildInfo failed."); 282 | 283 | buildLog = (char*)malloc(buildLogSize); 284 | if (buildLog == NULL) 285 | { 286 | mLog->Log(L"Failed to allocate host memory.(buildLog)\n"); 287 | return FAILURE; 288 | } 289 | memset(buildLog, 0, buildLogSize); 290 | 291 | logStatus = f_clGetProgramBuildInfo(g_program, 292 | deviceID, 293 | CL_PROGRAM_BUILD_LOG, 294 | buildLogSize, 295 | buildLog, 296 | NULL); 297 | CHECK_OPENCL_ERROR(logStatus, "clGetProgramBuildInfo failed."); 298 | mLog->Log( 299 | L"\n\t\t\tBUILD LOG\n" 300 | L" ************************************************\n" 301 | L" %S" 302 | L" ************************************************\n", 303 | buildLog); 304 | free(buildLog); 305 | } 306 | 307 | CHECK_OPENCL_ERROR(status, "clBuildProgram() failed."); 308 | } 309 | 310 | size_t temp = 0; 311 | 312 | /* get a kernel object handle for a kernel with the given name */ 313 | /*remove_pitch_kernel = clCreateKernel(program, "removePitch", &status); 314 | CHECK_OPENCL_ERROR(status, "clCreateKernel(removePitch) failed!"); 315 | 316 | status = clGetKernelWorkGroupInfo( 317 | remove_pitch_kernel, 318 | deviceID, 319 | CL_KERNEL_WORK_GROUP_SIZE, 320 | sizeof(temp), 321 | &temp, 322 | 0); 323 | CHECK_OPENCL_ERROR(status, "clGetKernelWorkGroupInfo failed"); 324 | 325 | while(localThreads_remove_pitch_kernel[0] * 326 | localThreads_remove_pitch_kernel[1] < temp) 327 | { 328 | if(2 * localThreads_remove_pitch_kernel[0] * 329 | localThreads_remove_pitch_kernel[1] <= temp) 330 | localThreads_remove_pitch_kernel[0] *= 2; 331 | 332 | if(2 * localThreads_remove_pitch_kernel[0] * 333 | localThreads_remove_pitch_kernel[1] <= temp) 334 | localThreads_remove_pitch_kernel[1] *= 2; 335 | }*/ 336 | 337 | //TODO Enable if you need NV12-to-RGB(A) 338 | //g_nv12_to_rgb_kernel = clCreateKernel(g_program, "NV12toRGB", &status); 339 | //CHECK_OPENCL_ERROR(status, "clCreateKernel(NV12toRGB) failed!"); 340 | 341 | //g_nv12_to_rgba_kernel = clCreateKernel(g_program, "NV12toRGBA", &status); 342 | //CHECK_OPENCL_ERROR(status, "clCreateKernel(NV12toRGBA) failed!"); 343 | 344 | /*status = clGetKernelWorkGroupInfo( 345 | g_nv12_to_rgba_kernel, 346 | deviceID, 347 | CL_KERNEL_WORK_GROUP_SIZE, 348 | sizeof(temp), 349 | &temp, 350 | 0); 351 | CHECK_OPENCL_ERROR(status, "clGetKernelWorkGroupInfo failed"); 352 | 353 | std::cout << "clGetKernelWorkGroupInfo: " << temp << std::endl; 354 | 355 | while(localThreads_nv12_to_rgba_kernel[0] * 356 | localThreads_nv12_to_rgba_kernel[1] < temp) 357 | { 358 | if(2 * localThreads_nv12_to_rgba_kernel[0] * 359 | localThreads_nv12_to_rgba_kernel[1] <= temp) 360 | localThreads_nv12_to_rgba_kernel[0] *= 2; 361 | 362 | if(2 * localThreads_nv12_to_rgba_kernel[0] * 363 | localThreads_nv12_to_rgba_kernel[1] <= temp) 364 | localThreads_nv12_to_rgba_kernel[1] *= 2; 365 | }*/ 366 | 367 | char* kernels[][2] = { 368 | { "BGRAtoNV12_Y", "BGRAtoNV12_UV" }, 369 | { "BGRtoNV12_Y", "BGRtoNV12_UV" }, 370 | 371 | { "RGBAtoNV12_Y", "RGBAtoNV12_UV" }, 372 | { "RGBtoNV12_Y", "RGBtoNV12_UV" }, 373 | }; 374 | 375 | int i = mRGB ? 2 : 0; 376 | if (bpp_bytes == 3) 377 | i++; 378 | 379 | g_y_kernel = f_clCreateKernel(g_program, kernels[i][0], &status); 380 | CHECK_OPENCL_ERROR(status, "clCreateKernel(Y) failed!"); 381 | 382 | g_uv_kernel = f_clCreateKernel(g_program, kernels[i][1], &status); 383 | CHECK_OPENCL_ERROR(status, "clCreateKernel(UV) failed!"); 384 | 385 | status = f_clGetKernelWorkGroupInfo( 386 | g_y_kernel, 387 | deviceID, 388 | CL_KERNEL_WORK_GROUP_SIZE, 389 | sizeof(temp), 390 | &temp, 391 | 0); 392 | CHECK_OPENCL_ERROR(status, "clGetKernelWorkGroupInfo failed"); 393 | 394 | mLog->Log(L"clGetKernelWorkGroupInfo: %d\n", temp); 395 | 396 | //TODO Should limit to half as work_dim is 2? May give invalid WG size on cpu atleast 397 | while (localThreads_Max[0] * 398 | localThreads_Max[1] < temp) 399 | { 400 | if (2 * localThreads_Max[0] * 401 | localThreads_Max[1] <= temp) 402 | localThreads_Max[0] *= 2; 403 | 404 | if (2 * localThreads_Max[0] * 405 | localThreads_Max[1] <= temp) 406 | localThreads_Max[1] *= 2; 407 | } 408 | 409 | return SUCCESS; 410 | } 411 | 412 | int clConvert::setKernelArgs(cl_kernel kernel, cl_mem input, cl_mem output) 413 | { 414 | cl_int status = 0; 415 | 416 | // Set up kernel arguments 417 | status = f_clSetKernelArg( 418 | kernel, 419 | 0, 420 | sizeof(cl_mem), 421 | &input); 422 | CHECK_OPENCL_ERROR(status, "clSetKernelArg(input) failed!\n"); 423 | 424 | status = f_clSetKernelArg( 425 | kernel, 426 | 1, 427 | sizeof(cl_mem), 428 | &output); 429 | CHECK_OPENCL_ERROR(status, "clSetKernelArg(output) failed!"); 430 | 431 | status = f_clSetKernelArg( 432 | kernel, 433 | 2, 434 | sizeof(int), 435 | &oAlignedWidth); 436 | CHECK_OPENCL_ERROR(status, "clSetKernelArg(alignedWidth) failed!"); 437 | return SUCCESS; 438 | } 439 | 440 | int clConvert::setKernelOffset(cl_kernel kernel, int offset) 441 | { 442 | cl_int status = 0; 443 | 444 | // Set up kernel arguments 445 | status = f_clSetKernelArg( 446 | kernel, 447 | 3, 448 | sizeof(int), 449 | &offset); 450 | CHECK_OPENCL_ERROR(status, "clSetKernelArg(offset) failed!\n"); 451 | return SUCCESS; 452 | } 453 | 454 | int clConvert::runKernel(cl_kernel kernel, 455 | cl_command_queue queue, 456 | size_t globalThreads[2], 457 | size_t localThreads[2], 458 | double *prof, 459 | bool wait) 460 | { 461 | cl_int status = 0; 462 | 463 | cl_event ndrEvt; 464 | status = f_clEnqueueNDRangeKernel( 465 | queue, 466 | kernel, 467 | 2, 468 | 0, 469 | globalThreads, 470 | localThreads, 471 | 0, 472 | 0, 473 | wait ? &ndrEvt : NULL); 474 | CHECK_OPENCL_ERROR(status, "clEnqueueNDRangeKernel failed!"); 475 | 476 | if (wait) { 477 | status = f_clFlush(queue); 478 | CHECK_OPENCL_ERROR(status, "clFlush failed"); 479 | } 480 | 481 | // Wait for event and release event 482 | //status = waitForEventAndRelease(&ndrEvt); 483 | //CHECK_OPENCL_ERROR(status, "waitForEventAndRelease(ndrEvt) failed."); 484 | 485 | //set 'wait' to true for profiling. Also pass profiling option when creating command queues. 486 | if (wait) { 487 | status = f_clWaitForEvents(1, &ndrEvt); 488 | CHECK_OPENCL_ERROR(status, "clWaitForEvents failed."); 489 | profileEvent(ndrEvt, prof); 490 | status = f_clReleaseEvent(ndrEvt); 491 | CHECK_OPENCL_ERROR(status, "clRelease Event Failed"); 492 | } 493 | return SUCCESS; 494 | } 495 | 496 | int clConvert::profileEvent(cl_event evt, double *prof) 497 | { 498 | // Calculate performance 499 | cl_ulong startTime; 500 | cl_ulong endTime; 501 | cl_int status; 502 | 503 | if (!prof) 504 | return SUCCESS; 505 | 506 | // Get kernel profiling info 507 | status = f_clGetEventProfilingInfo(evt, 508 | CL_PROFILING_COMMAND_START, 509 | sizeof(cl_ulong), 510 | &startTime, 511 | 0); 512 | CHECK_OPENCL_ERROR(status, "clGetEventProfilingInfo failed.(startTime)"); 513 | 514 | status = f_clGetEventProfilingInfo(evt, 515 | CL_PROFILING_COMMAND_END, 516 | sizeof(cl_ulong), 517 | &endTime, 518 | 0); 519 | CHECK_OPENCL_ERROR(status, "clGetEventProfilingInfo failed.(endTime)"); 520 | 521 | // Cumulate time for each iteration 522 | *prof += 1e-9 * (endTime - startTime); 523 | return SUCCESS; 524 | } 525 | 526 | int clConvert::decodeInit() 527 | { 528 | return FAILURE; 529 | } 530 | 531 | int clConvert::encodeInit(cl_mem dstBuffer) 532 | { 533 | cl_int statusCL = CL_SUCCESS; 534 | profSecs1 = 0; 535 | profSecs2 = 0; 536 | prof2ndPass = false; 537 | 538 | //TODO Odd framebuffer sizes. DIBs are DWORD aligned? 539 | input_size = iWidth * iHeight * bpp_bytes + ((iWidth * bpp_bytes) % 4) * iHeight; 540 | //int align = 4 * bpp_bytes - 1;//or always to 16 bytes (float4)? 541 | int align = 256 - 1;//or align to 256 bytes for faster memory access? 542 | int input_size_aligned = (input_size + align) & ~align; 543 | 544 | //VCE encoder needs aligned input, adjust pitch here 545 | oAlignedWidth = ((iWidth + (256 - 1)) & ~(256 - 1)); 546 | g_output_size = oAlignedWidth * oHeight * 3 / 2; 547 | 548 | bmpStride = iWidth * bpp_bytes; 549 | needsDestriding = (bmpStride % 4 != 0); 550 | bmpStride += bmpStride % 4; 551 | mapStride = iWidth * bpp_bytes; 552 | 553 | // Create buffer to store the source frame 554 | g_inputBuffer[0] = f_clCreateBuffer( 555 | g_context, 556 | //1080p RGBA ~5GB/s *WriteBuffer, ~6.4GB/s map/memcpy, PCI-e x16 2.0 557 | //640x272 RGBA >~3GB/s map/memcpy, <~3GB/s WriteBuffer 558 | CL_MEM_READ_ONLY | CL_MEM_USE_PERSISTENT_MEM_AMD, 559 | //CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, //~6GB/s map/memcpy 560 | input_size_aligned, 561 | NULL, 562 | &statusCL); 563 | CHECK_OPENCL_ERROR(statusCL, "clCreateBuffer(g_inputBuffer[0]) failed!"); 564 | /*cl_image_format fmt; 565 | fmt.image_channel_order = CL_BGRA; 566 | fmt.image_channel_data_type = CL_UNSIGNED_INT8; 567 | cl_image_desc desc; 568 | memset(&desc, 0, sizeof(desc)); 569 | desc.image_type = CL_MEM_OBJECT_IMAGE2D; 570 | desc.image_height = iHeight; 571 | desc.image_width = iWidth; 572 | desc.image_array_size = 1; 573 | 574 | g_inputBuffer[0] = clCreateImage(g_context, CL_MEM_READ_ONLY,// | CL_MEM_COPY_HOST_PTR, 575 | &fmt, &desc, NULL, &statusCL);*/ 576 | 577 | g_inputBuffer[1] = NULL; 578 | 579 | //overhead test 580 | setKernelArgs(g_y_kernel, g_inputBuffer[0], dstBuffer); 581 | setKernelArgs(g_uv_kernel, g_inputBuffer[0], dstBuffer); 582 | setKernelOffset(g_y_kernel, 0); 583 | setKernelOffset(g_uv_kernel, 0); 584 | 585 | if (hRaw) { fclose(hRaw); hRaw = NULL; } 586 | char tmp[1024]; 587 | sprintf_s(tmp, "raw_%dx%d.nv12", oAlignedWidth, oHeight); 588 | //hRaw = fopen(tmp, "wb+"); 589 | return SUCCESS; 590 | } 591 | 592 | //RGB(A) to NV12 593 | int clConvert::convert(const uint8* srcPtr, cl_mem dstBuffer, bool profile) 594 | { 595 | cl_int status = CL_SUCCESS; 596 | size_t offset[] = { 0, 0 }; 597 | size_t globalThreads[] = { iWidth, iHeight }; 598 | size_t localThreads[] = { localThreads_Max[0] * 599 | localThreads_Max[1], 1 }; 600 | 601 | cl_event inMapEvt; 602 | cl_event unmapEvent; 603 | captureTimeStart(mProf, 5); 604 | #if 1 605 | mapPtr = f_clEnqueueMapBuffer(g_cmd_queue, 606 | g_inputBuffer[0], 607 | //g_pinnedBuffer, 608 | CL_TRUE, 609 | //CL_FALSE, 610 | CL_MAP_WRITE_INVALIDATE_REGION, 611 | 0, 612 | input_size, 613 | 0, 614 | NULL, 615 | NULL,//&inMapEvt, 616 | &status); 617 | CHECK_OPENCL_ERROR(status, "clEnqueueMapBuffer() failed"); 618 | //sync at unmapping 619 | //status = clFlush(g_cmd_queue); 620 | //waitForEventAndRelease(&inMapEvt); 621 | 622 | //copy to mapped buffer or clEnqueueWriteBuffer instead 623 | if (bpp_bytes == 4 || !needsDestriding) 624 | memcpy(mapPtr, srcPtr, input_size); 625 | else if (bpp_bytes == 3) 626 | { 627 | uint8* tmpMap = (uint8*)mapPtr; 628 | for (int y = 0; y < iHeight; y++, tmpMap+=mapStride, srcPtr+=bmpStride) 629 | memcpy(tmpMap, srcPtr, mapStride); 630 | } 631 | 632 | status = f_clEnqueueUnmapMemObject(g_cmd_queue, 633 | g_inputBuffer[0], 634 | //g_pinnedBuffer, 635 | mapPtr, 636 | 0, 637 | NULL, 638 | &unmapEvent); 639 | status = f_clFlush(g_cmd_queue); 640 | waitForEventAndRelease(&unmapEvent); 641 | 642 | #else 643 | size_t origin[] = {0,0,0}; 644 | size_t region[] = {iWidth,iHeight,1}; 645 | status = f_clEnqueueWriteImage(g_cmd_queue[0], 646 | g_inputBuffer[0], 647 | CL_TRUE, 648 | origin, 649 | region, 650 | iWidth*bpp_bytes, 651 | 0, 652 | srcPtr, 653 | 0, 654 | NULL, 655 | NULL);//&inMapEvt); 656 | CHECK_OPENCL_ERROR(status, "clEnqueueWriteBuffer() failed"); 657 | #endif 658 | captureTimeStop(mProf, 5); 659 | 660 | /*setKernelArgs(g_y_kernel, g_inputBuffer[0], dstBuffer); 661 | setKernelArgs(g_uv_kernel, g_inputBuffer[0], dstBuffer); 662 | setKernelOffset(g_y_kernel, 0); 663 | setKernelOffset(g_uv_kernel, 0);*/ 664 | 665 | if (runKernel(g_y_kernel, g_cmd_queue, globalThreads, NULL/*localThreads*/, &profSecs1, profile)) 666 | { 667 | mLog->Log(L"kernelY failed!\n"); 668 | return FAILURE; 669 | } 670 | 671 | //encoder should be feeding divideable by 2 frames anyway 672 | globalThreads[0] = (iWidth >> 1); 673 | //globalThreads[0] -= globalThreads[0] % 2; 674 | globalThreads[1] = (iHeight >> 1); 675 | //globalThreads[1] -= globalThreads[1] % 2; 676 | //mLog->Log(L"GID: %dx%d\n", globalThreads[0],globalThreads[1]); 677 | if (runKernel(g_uv_kernel, g_cmd_queue, globalThreads, NULL/*localThreads*/, &profSecs2, profile)) 678 | { 679 | mLog->Log(L"kernelUV failed!\n"); 680 | return FAILURE; 681 | } 682 | 683 | f_clFinish(g_cmd_queue); 684 | 685 | //average from second sample 686 | if (prof2ndPass) { 687 | profSecs1 /= 2; 688 | profSecs2 /= 2; 689 | } 690 | else 691 | prof2ndPass = true; 692 | 693 | if (hRaw) { 694 | mapPtr = f_clEnqueueMapBuffer(g_cmd_queue, 695 | dstBuffer, 696 | CL_TRUE, 697 | CL_MAP_READ, 698 | 0, 699 | g_output_size, 700 | 0, 701 | NULL, 702 | &inMapEvt, 703 | &status); 704 | CHECK_OPENCL_ERROR(status, "clEnqueueMapBuffer() failed"); 705 | status = f_clFlush(g_cmd_queue); 706 | waitForEventAndRelease(&inMapEvt); 707 | fwrite(mapPtr, 1, g_output_size, hRaw); 708 | status = f_clEnqueueUnmapMemObject(g_cmd_queue, 709 | dstBuffer, 710 | mapPtr, 711 | 0, 712 | NULL, 713 | &unmapEvent); 714 | status = f_clFlush(g_cmd_queue); 715 | waitForEventAndRelease(&unmapEvent); 716 | fclose(hRaw); hRaw = NULL; 717 | 718 | hRaw = fopen("rgb.raw", "wb"); 719 | fwrite(srcPtr, 1, input_size, hRaw); 720 | 721 | fclose(hRaw); hRaw = NULL; 722 | } 723 | return SUCCESS; 724 | } 725 | --------------------------------------------------------------------------------