├── .gitignore ├── README.md ├── Util ├── ReadMe.txt ├── Timer.h ├── Util.h ├── Bitmap.h ├── Timer.cpp ├── helper_cuda_gl.h ├── NvFBCLibrary.h ├── NvIFRLibrary.h ├── Util.vcxproj ├── helper_string.h ├── Util.vcproj ├── drvapi_error_string.h ├── Bitmap.cpp ├── helper_cuda_drvapi.h └── helper_cuda.h └── NvFBCH264 ├── NvFBCH264.sln ├── main.cpp ├── NvFBCH264.vcproj └── NvFBCH264.vcxproj /.gitignore: -------------------------------------------------------------------------------- 1 | *.user 2 | .vs/ 3 | *.sdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NvFBCCapture 2 | A simple screen capture utility for modern NVIDIA video adapters 3 | 4 | # Building 5 | To build the project, you need to have [NVIDIA GRID API](https://developer.nvidia.com/grid-app-game-streaming) and Boost installed. 6 | Once you have all the dependencies installed, open the project with Visual Studio and change the libraries/headers paths. 7 | Then it should be buildable from Visual Studio. 8 | -------------------------------------------------------------------------------- /Util/ReadMe.txt: -------------------------------------------------------------------------------- 1 | Utilities used by the NvFBC samples. 2 | 3 | Util.h 4 | Defines a macro and method which checks all NvFBC calls for errors. 5 | 6 | Bitmap.h 7 | Declares the methods implemented in Bitmap.cpp. 8 | 9 | Bitmap.cpp 10 | Defines a method to save a 24-bit per pixel bitmap and defines 11 | methods which convert various buffer formats into bitmap compatible 12 | formats. 13 | 14 | Timer.h 15 | Declares a simple timer class. 16 | 17 | Timer.cpp 18 | Defines the timer class using QueryPerformanceCounter. 19 | -------------------------------------------------------------------------------- /Util/Timer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 NVIDIA CORPORATION. All Rights Reserved. 3 | * 4 | * NVIDIA CORPORATION and its licensors retain all intellectual property 5 | * and proprietary rights in and to this software, related documentation 6 | * and any modifications thereto. Any use, reproduction, disclosure or 7 | * distribution of this software and related documentation without an express 8 | * license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | * 10 | */ 11 | 12 | #pragma once 13 | 14 | #include 15 | 16 | // Simple timer class, measures time in milliseconds 17 | class Timer 18 | { 19 | public: 20 | // Constructs the timer and starts timing. 21 | Timer(); 22 | ~Timer(); 23 | 24 | // Reset the starting point to now. 25 | void reset(); 26 | 27 | // Get the elapsed milliseconds since the starting point. 28 | double now(); 29 | 30 | protected: 31 | LONGLONG m_llStartTick; 32 | }; -------------------------------------------------------------------------------- /Util/Util.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 NVIDIA CORPORATION. All Rights Reserved. 3 | * 4 | * NVIDIA CORPORATION and its licensors retain all intellectual property 5 | * and proprietary rights in and to this software, related documentation 6 | * and any modifications thereto. Any use, reproduction, disclosure or 7 | * distribution of this software and related documentation without an express 8 | * license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | * 10 | */ 11 | 12 | #pragma once 13 | 14 | #include 15 | #include 16 | #include "NvFBC/NvFBC.h" 17 | 18 | // Simple macro which checks for NvFBC errors 19 | #define NVFBC_SAFE_CALL(result) nvfbcSafeCall(result, __FILE__, __LINE__) 20 | 21 | inline void nvfbcSafeCall(NVFBCRESULT result, const char *file, const int line) 22 | { 23 | if(result != NVFBC_SUCCESS) 24 | { 25 | fprintf(stderr, "NvFBC call failed %s:%d\n", file, line); 26 | exit(-1); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /Util/Bitmap.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 NVIDIA CORPORATION. All Rights Reserved. 3 | * 4 | * NVIDIA CORPORATION and its licensors retain all intellectual property 5 | * and proprietary rights in and to this software, related documentation 6 | * and any modifications thereto. Any use, reproduction, disclosure or 7 | * distribution of this software and related documentation without an express 8 | * license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | * 10 | */ 11 | 12 | #pragma once 13 | 14 | #include 15 | 16 | // Saves the RGB buffer as a bitmap 17 | bool SaveRGB(const char *fileName, BYTE *data, int width, int height); 18 | 19 | // Saves the BGR buffer as a bitmap 20 | bool SaveBGR(const char *fileName, BYTE *data, int width, int height); 21 | 22 | // Saves the ARGB buffer as a bitmap 23 | bool SaveARGB(const char *fileName, BYTE *data, int width, int height); 24 | 25 | // Saves the RGBPlanar buffer as three bitmaps, one bitmap for each channel 26 | bool SaveRGBPlanar(const char *fileName, BYTE *data, int width, int height); 27 | 28 | // Saves the Y'UV420p buffer as three bitmaps, one bitmap for Y', one for U and one for V 29 | bool SaveYUV(const char *fileName, BYTE *data, int width, int height); 30 | 31 | // Saves the provided buffer as a bitmap, this method assumes the data is formated as a bitmap. 32 | bool SaveBitmap(const char *fileName, BYTE *data, int width, int height); -------------------------------------------------------------------------------- /NvFBCH264/NvFBCH264.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 11.00 3 | # Visual Studio 2010 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NvFBCH264", "NvFBCH264.vcxproj", "{C0C74593-8F68-4202-995C-9B6A654EDD72}" 5 | EndProject 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Util", "..\Util\Util.vcxproj", "{1204D7DC-7E0B-4710-87D7-5BBC67FAAC63}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Win32 = Debug|Win32 11 | Debug|x64 = Debug|x64 12 | Release|Win32 = Release|Win32 13 | Release|x64 = Release|x64 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {C0C74593-8F68-4202-995C-9B6A654EDD72}.Debug|Win32.ActiveCfg = Debug|Win32 17 | {C0C74593-8F68-4202-995C-9B6A654EDD72}.Debug|Win32.Build.0 = Debug|Win32 18 | {C0C74593-8F68-4202-995C-9B6A654EDD72}.Debug|x64.ActiveCfg = Debug|x64 19 | {C0C74593-8F68-4202-995C-9B6A654EDD72}.Debug|x64.Build.0 = Debug|x64 20 | {C0C74593-8F68-4202-995C-9B6A654EDD72}.Release|Win32.ActiveCfg = Release|Win32 21 | {C0C74593-8F68-4202-995C-9B6A654EDD72}.Release|Win32.Build.0 = Release|Win32 22 | {C0C74593-8F68-4202-995C-9B6A654EDD72}.Release|x64.ActiveCfg = Release|x64 23 | {C0C74593-8F68-4202-995C-9B6A654EDD72}.Release|x64.Build.0 = Release|x64 24 | {1204D7DC-7E0B-4710-87D7-5BBC67FAAC63}.Debug|Win32.ActiveCfg = Debug|Win32 25 | {1204D7DC-7E0B-4710-87D7-5BBC67FAAC63}.Debug|Win32.Build.0 = Debug|Win32 26 | {1204D7DC-7E0B-4710-87D7-5BBC67FAAC63}.Debug|x64.ActiveCfg = Debug|x64 27 | {1204D7DC-7E0B-4710-87D7-5BBC67FAAC63}.Debug|x64.Build.0 = Debug|x64 28 | {1204D7DC-7E0B-4710-87D7-5BBC67FAAC63}.Release|Win32.ActiveCfg = Release|Win32 29 | {1204D7DC-7E0B-4710-87D7-5BBC67FAAC63}.Release|Win32.Build.0 = Release|Win32 30 | {1204D7DC-7E0B-4710-87D7-5BBC67FAAC63}.Release|x64.ActiveCfg = Release|x64 31 | {1204D7DC-7E0B-4710-87D7-5BBC67FAAC63}.Release|x64.Build.0 = Release|x64 32 | EndGlobalSection 33 | GlobalSection(SolutionProperties) = preSolution 34 | HideSolutionNode = FALSE 35 | EndGlobalSection 36 | EndGlobal 37 | -------------------------------------------------------------------------------- /Util/Timer.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | * \copyright 3 | * Copyright 1993-2014 NVIDIA Corporation. All rights reserved. 4 | * 5 | * NOTICE TO LICENSEE: 6 | * 7 | * This source code and/or documentation ("Licensed Deliverables") are 8 | * subject to NVIDIA intellectual property rights under U.S. and 9 | * international Copyright laws. 10 | * 11 | * These Licensed Deliverables contained herein is PROPRIETARY and 12 | * CONFIDENTIAL to NVIDIA and is being provided under the terms and 13 | * conditions of a form of NVIDIA software license agreement by and 14 | * between NVIDIA and Licensee ("License Agreement") or electronically 15 | * accepted by Licensee. Notwithstanding any terms or conditions to 16 | * the contrary in the License Agreement, reproduction or disclosure 17 | * of the Licensed Deliverables to any third party without the express 18 | * written consent of NVIDIA is prohibited. 19 | * 20 | * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 21 | * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 22 | * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 23 | * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 24 | * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 25 | * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 26 | * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 27 | * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 28 | * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 29 | * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 30 | * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 31 | * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 32 | * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 33 | * OF THESE LICENSED DELIVERABLES. 34 | * 35 | * U.S. Government End Users. These Licensed Deliverables are a 36 | * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 37 | * 1995), consisting of "commercial computer software" and "commercial 38 | * computer software documentation" as such terms are used in 48 39 | * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 40 | * only as a commercial end item. Consistent with 48 C.F.R.12.212 and 41 | * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 42 | * U.S. Government End Users acquire the Licensed Deliverables with 43 | * only those rights set forth herein. 44 | * 45 | * Any use of the Licensed Deliverables in individual and commercial 46 | * software must include, in the user documentation and internal 47 | * comments to the code, the above Disclaimer and U.S. Government End 48 | * Users Notice. 49 | */ 50 | 51 | 52 | #include "Timer.h" 53 | 54 | LONGLONG g_llPerfFrequency; 55 | bool g_timeInitialized=false; 56 | 57 | #define QPC(Int64) QueryPerformanceCounter((LARGE_INTEGER*)&Int64) 58 | #define QPF(Int64) QueryPerformanceFrequency((LARGE_INTEGER*)&Int64) 59 | 60 | Timer::Timer() 61 | : m_llStartTick(0) 62 | { 63 | if(!g_timeInitialized) 64 | { 65 | QPF(g_llPerfFrequency); 66 | g_timeInitialized = true; 67 | } 68 | 69 | QPC(m_llStartTick); 70 | } 71 | 72 | Timer::~Timer() 73 | { 74 | } 75 | 76 | void Timer::reset() 77 | { 78 | QPC(m_llStartTick); 79 | } 80 | 81 | double Timer::now() 82 | { 83 | LONGLONG now; 84 | QPC(now); 85 | return (((double)(now - m_llStartTick)/(double)g_llPerfFrequency) * 1000.0); 86 | } -------------------------------------------------------------------------------- /Util/helper_cuda_gl.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 1993-2014 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #ifndef HELPER_CUDA_GL_H 13 | #define HELPER_CUDA_GL_H 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | // includes, graphics 20 | #if defined (__APPLE__) || defined(MACOSX) 21 | #include 22 | #include 23 | #else 24 | #include 25 | #include 26 | #endif 27 | 28 | #ifndef EXIT_WAIVED 29 | #define EXIT_WAIVED 2 30 | #endif 31 | 32 | #ifdef __DRIVER_TYPES_H__ 33 | #ifndef DEVICE_RESET 34 | #define DEVICE_RESET cudaDeviceReset() 35 | #endif 36 | #else 37 | #ifndef DEVICE_RESET 38 | #define DEVICE_RESET 39 | #endif 40 | #endif 41 | 42 | #ifdef __CUDA_GL_INTEROP_H__ 43 | //////////////////////////////////////////////////////////////////////////////// 44 | // These are CUDA OpenGL Helper functions 45 | 46 | inline int gpuGLDeviceInit(int ARGC, const char **ARGV) 47 | { 48 | int deviceCount; 49 | checkCudaErrors(cudaGetDeviceCount(&deviceCount)); 50 | 51 | if (deviceCount == 0) 52 | { 53 | fprintf(stderr, "CUDA error: no devices supporting CUDA.\n"); 54 | exit(EXIT_FAILURE); 55 | } 56 | 57 | int dev = 0; 58 | dev = getCmdLineArgumentInt(ARGC, ARGV, "device="); 59 | 60 | if (dev < 0) 61 | { 62 | dev = 0; 63 | } 64 | 65 | if (dev > deviceCount-1) 66 | { 67 | fprintf(stderr, "\n"); 68 | fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", deviceCount); 69 | fprintf(stderr, ">> gpuGLDeviceInit (-device=%d) is not a valid GPU device. <<\n", dev); 70 | fprintf(stderr, "\n"); 71 | return -dev; 72 | } 73 | 74 | cudaDeviceProp deviceProp; 75 | checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev)); 76 | 77 | if (deviceProp.computeMode == cudaComputeModeProhibited) 78 | { 79 | fprintf(stderr, "Error: device is running in , no threads can use ::cudaSetDevice().\n"); 80 | return -1; 81 | } 82 | 83 | if (deviceProp.major < 1) 84 | { 85 | fprintf(stderr, "Error: device does not support CUDA.\n"); 86 | exit(EXIT_FAILURE); 87 | } 88 | 89 | if (checkCmdLineFlag(ARGC, ARGV, "quiet") == false) 90 | { 91 | fprintf(stderr, "Using device %d: %s\n", dev, deviceProp.name); 92 | } 93 | 94 | checkCudaErrors(cudaGLSetGLDevice(dev)); 95 | return dev; 96 | } 97 | 98 | // This function will pick the best CUDA device available with OpenGL interop 99 | inline int findCudaGLDevice(int argc, const char **argv) 100 | { 101 | int devID = 0; 102 | 103 | // If the command-line has a device number specified, use it 104 | if (checkCmdLineFlag(argc, (const char **)argv, "device")) 105 | { 106 | devID = gpuGLDeviceInit(argc, (const char **)argv); 107 | 108 | if (devID < 0) 109 | { 110 | printf("no CUDA capable devices found, exiting...\n"); 111 | DEVICE_RESET 112 | exit(EXIT_SUCCESS); 113 | } 114 | } 115 | else 116 | { 117 | // Otherwise pick the device with highest Gflops/s 118 | devID = gpuGetMaxGflopsDeviceId(); 119 | cudaGLSetGLDevice(devID); 120 | } 121 | 122 | return devID; 123 | } 124 | 125 | //////////////////////////////////////////////////////////////////////////// 126 | //! Check for OpenGL error 127 | //! @return bool if no GL error has been encountered, otherwise 0 128 | //! @param file __FILE__ macro 129 | //! @param line __LINE__ macro 130 | //! @note The GL error is listed on stderr 131 | //! @note This function should be used via the CHECK_ERROR_GL() macro 132 | //////////////////////////////////////////////////////////////////////////// 133 | inline bool 134 | sdkCheckErrorGL(const char *file, const int line) 135 | { 136 | bool ret_val = true; 137 | 138 | // check for error 139 | GLenum gl_error = glGetError(); 140 | 141 | if (gl_error != GL_NO_ERROR) 142 | { 143 | #ifdef _WIN32 144 | char tmpStr[512]; 145 | // NOTE: "%s(%i) : " allows Visual Studio to directly jump to the file at the right line 146 | // when the user double clicks on the error line in the Output pane. Like any compile error. 147 | sprintf_s(tmpStr, 255, "\n%s(%i) : GL Error : %s\n\n", file, line, gluErrorString(gl_error)); 148 | fprintf(stderr, "%s", tmpStr); 149 | #endif 150 | fprintf(stderr, "GL Error in file '%s' in line %d :\n", file, line); 151 | fprintf(stderr, "%s\n", gluErrorString(gl_error)); 152 | ret_val = false; 153 | } 154 | 155 | return ret_val; 156 | } 157 | 158 | #define SDK_CHECK_ERROR_GL() \ 159 | if( false == sdkCheckErrorGL( __FILE__, __LINE__)) { \ 160 | DEVICE_RESET \ 161 | exit(EXIT_FAILURE); \ 162 | } 163 | #endif 164 | 165 | #endif 166 | -------------------------------------------------------------------------------- /Util/NvFBCLibrary.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define WINDOWS_LEAN_AND_MEAN 4 | #include 5 | 6 | #include "NvFBC/nvFBC.h" 7 | #include 8 | 9 | #define NVFBC64_LIBRARY_NAME "NvFBC64.dll" 10 | #define NVFBC_LIBRARY_NAME "NvFBC.dll" 11 | 12 | // Wraps loading and using NvFBC 13 | class NvFBCLibrary 14 | { 15 | NvFBCLibrary(const NvFBCLibrary &); 16 | NvFBCLibrary &operator=(const NvFBCLibrary &); 17 | 18 | public: 19 | NvFBCLibrary() 20 | : m_handle(NULL) 21 | , pfn_get_status(NULL) 22 | , pfn_create(NULL) 23 | {} 24 | 25 | ~NvFBCLibrary() 26 | { 27 | if(NULL != m_handle) 28 | close(); 29 | } 30 | 31 | // Attempts to load NvFBC from system directory. 32 | // on 32-bit OS: looks for NvFBC.dll in system32 33 | // for 32-bit app on 64-bit OS: looks for NvFBC.dll in syswow64 34 | // for 64-bit app on 64-bit OS: looks for NvFBC64.dll in system32 35 | bool load(std::string fileName = std::string()) 36 | { 37 | if(NULL != m_handle) 38 | return true; 39 | 40 | if(!fileName.empty()) 41 | m_handle = ::LoadLibraryA(fileName.c_str()); 42 | 43 | if(NULL == m_handle) 44 | { 45 | m_handle = ::LoadLibraryA(getDefaultPath().c_str()); 46 | } 47 | 48 | if(NULL == m_handle) 49 | { 50 | fprintf(stderr, "Unable to load NvFBC.\n"); 51 | return false; 52 | } 53 | 54 | // Load the three functions exported by NvFBC 55 | pfn_create = (NvFBC_CreateFunctionExType)::GetProcAddress(m_handle, "NvFBC_CreateEx"); 56 | pfn_set_global_flags = (NvFBC_SetGlobalFlagsType)::GetProcAddress(m_handle, "NvFBC_SetGlobalFlags"); 57 | pfn_get_status = (NvFBC_GetStatusExFunctionType)::GetProcAddress(m_handle, "NvFBC_GetStatusEx"); 58 | 59 | if((NULL == pfn_create) || (NULL == pfn_set_global_flags) || (NULL == pfn_get_status)) 60 | { 61 | fprintf(stderr, "Unable to load the NvFBC function pointers.\n"); 62 | close(); 63 | 64 | return false; 65 | } 66 | 67 | return true; 68 | } 69 | 70 | // Close the NvFBC dll 71 | void close() 72 | { 73 | if(NULL != m_handle) 74 | FreeLibrary(m_handle); 75 | 76 | m_handle = NULL; 77 | pfn_create = NULL; 78 | pfn_get_status = NULL; 79 | } 80 | 81 | // Get the status for the provided adapter, if no adapter is 82 | // provided the default adapter is used. 83 | NVFBCRESULT getStatus(NvFBCStatusEx *status) 84 | { 85 | return pfn_get_status((void*)status); 86 | } 87 | 88 | // Sets the global flags for the provided adapter, if 89 | // no adapter is provided the default adapter is used 90 | void setGlobalFlags(DWORD flags, int adapter = 0) 91 | { 92 | setTargetAdapter(adapter); 93 | pfn_set_global_flags(flags); 94 | } 95 | 96 | // Creates an instance of the provided NvFBC type if possible 97 | NVFBCRESULT createEx(NvFBCCreateParams *pParams) 98 | { 99 | return pfn_create((void *)pParams); 100 | } 101 | // Creates an instance of the provided NvFBC type if possible. 102 | void *create(DWORD type, DWORD *maxWidth, DWORD *maxHeight, int adapter = 0, void *devicePtr = NULL) 103 | { 104 | if(NULL == m_handle) 105 | return NULL; 106 | 107 | NVFBCRESULT res = NVFBC_SUCCESS; 108 | NvFBCStatusEx status = {0}; 109 | status.dwVersion = NVFBC_STATUS_VER; 110 | status.dwAdapterIdx = adapter; 111 | res = getStatus(&status); 112 | 113 | if (res != NVFBC_SUCCESS) 114 | { 115 | fprintf(stderr, "NvFBC not supported on this device + driver.\r\n"); 116 | return NULL; 117 | } 118 | 119 | // Check to see if the device and driver are supported 120 | if(!status.bIsCapturePossible) 121 | { 122 | fprintf(stderr, "Unsupported device or driver.\r\n"); 123 | return NULL; 124 | } 125 | 126 | // Check to see if an instance can be created 127 | if(!status.bCanCreateNow) 128 | { 129 | fprintf(stderr, "Unable to create an instance of NvFBC.\r\n"); 130 | return NULL; 131 | } 132 | 133 | NvFBCCreateParams createParams; 134 | memset(&createParams, 0, sizeof(createParams)); 135 | createParams.dwVersion = NVFBC_CREATE_PARAMS_VER; 136 | createParams.dwInterfaceType = type; 137 | createParams.pDevice = devicePtr; 138 | createParams.dwAdapterIdx = adapter; 139 | 140 | res = pfn_create(&createParams); 141 | 142 | *maxWidth = createParams.dwMaxDisplayWidth; 143 | *maxHeight = createParams.dwMaxDisplayHeight; 144 | 145 | return createParams.pNvFBC; 146 | } 147 | 148 | protected: 149 | // Get the default NvFBC library path 150 | typedef BOOL (WINAPI *pfnIsWow64Process) (HANDLE, PBOOL); 151 | pfnIsWow64Process fnIsWow64Process; 152 | 153 | BOOL IsWow64() 154 | { 155 | BOOL bIsWow64 = FALSE; 156 | 157 | fnIsWow64Process = (pfnIsWow64Process) GetProcAddress( 158 | GetModuleHandle(TEXT("kernel32.dll")),"IsWow64Process"); 159 | 160 | if (NULL != fnIsWow64Process) 161 | { 162 | if (!fnIsWow64Process(GetCurrentProcess(),&bIsWow64)) 163 | { 164 | bIsWow64 = false; 165 | } 166 | } 167 | return bIsWow64; 168 | } 169 | 170 | std::string getDefaultPath() 171 | { 172 | std::string defaultPath; 173 | 174 | size_t pathSize; 175 | char *libPath; 176 | 177 | if(0 != _dupenv_s(&libPath, &pathSize, "SystemRoot")) 178 | { 179 | fprintf(stderr, "Unable to get the SystemRoot environment variable\n"); 180 | return defaultPath; 181 | } 182 | 183 | if(0 == pathSize) 184 | { 185 | fprintf(stderr, "The SystemRoot environment variable is not set\n"); 186 | return defaultPath; 187 | } 188 | #ifdef _WIN64 189 | defaultPath = std::string(libPath) + "\\System32\\" + NVFBC64_LIBRARY_NAME; 190 | #else 191 | if (IsWow64()) 192 | { 193 | defaultPath = std::string(libPath) + "\\Syswow64\\" + NVFBC_LIBRARY_NAME; 194 | } 195 | else 196 | { 197 | defaultPath = std::string(libPath) + "\\System32\\" + NVFBC_LIBRARY_NAME; 198 | } 199 | #endif 200 | return defaultPath; 201 | } 202 | 203 | void setTargetAdapter(int adapter = 0) 204 | { 205 | char targetAdapter[10] = {0}; 206 | _snprintf_s(targetAdapter, 10, 9, "%d", adapter); 207 | SetEnvironmentVariableA("NVFBC_TARGET_ADAPTER", targetAdapter); 208 | } 209 | 210 | 211 | protected: 212 | HMODULE m_handle; 213 | NvFBC_GetStatusExFunctionType pfn_get_status; 214 | NvFBC_SetGlobalFlagsType pfn_set_global_flags; 215 | NvFBC_CreateFunctionExType pfn_create; 216 | }; 217 | -------------------------------------------------------------------------------- /NvFBCH264/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | const NvU32 FPS = 30; 14 | 15 | using namespace std; 16 | 17 | enum class profiles 18 | { 19 | BASE = 66, 20 | MAIN = 77, 21 | HIGH = 100 22 | }; 23 | 24 | istream& operator>>(std::istream& in, profiles& profile) 25 | { 26 | string token; 27 | in >> token; 28 | if (token == "BASE") 29 | profile = profiles::BASE; 30 | else if (token == "MAIN") 31 | profile = profiles::MAIN; 32 | else if (token == "HIGH") 33 | profile = profiles::HIGH; 34 | else 35 | in.setstate(ios_base::failbit); 36 | return in; 37 | } 38 | 39 | ostream& operator<<(std::ostream& out, profiles const& profile) 40 | { 41 | switch (profile) { 42 | case profiles::BASE: 43 | out << "BASE"; 44 | break; 45 | case profiles::MAIN: 46 | out << "MAIN"; 47 | break; 48 | case profiles::HIGH: 49 | out << "HIGH"; 50 | break; 51 | } 52 | return out; 53 | } 54 | 55 | // Command line arguments 56 | struct cmdargs 57 | { 58 | NvU32 frame_cnt; 59 | NvU32 bitrate; 60 | profiles profile; 61 | string filename; 62 | bool is_lossless; 63 | bool bYUV444; 64 | }; 65 | 66 | int main(int argc, char *argv[]) 67 | { 68 | cmdargs args; 69 | namespace po = boost::program_options; 70 | po::options_description desc("Usage"); 71 | desc.add_options() 72 | ("frames,f", po::value(&args.frame_cnt)->required()->default_value(FPS), "Number of frames to capture") 73 | ("bitrate,b", po::value(&args.bitrate)->default_value(8'000'000), "The desired average bitrate") 74 | ("profile,p", po::value(&args.profile)->default_value(profiles::MAIN), "The encoding profile (BASE/MAIN/HIGH)") 75 | ("output,o", po::value(&args.filename)->default_value("stream.h264"), "The filename for the output stream") 76 | ("lossless,l", po::bool_switch(&args.is_lossless), "If set, the frames are encoded lossless") 77 | ("yuv444", po::bool_switch(&args.bYUV444), "If set, YUV444 encoding is enabled, hence no color resampling performed") 78 | ; 79 | 80 | po::variables_map vm; 81 | try { 82 | po::store(po::command_line_parser(argc, argv).options(desc).run(), vm); 83 | po::notify(vm); 84 | } catch (boost::program_options::error const& e) { 85 | cerr << e.what() << endl; 86 | cout << desc; 87 | return EXIT_FAILURE; 88 | } 89 | 90 | DWORD max_width, max_height; 91 | 92 | NvFBCLibrary nvfbc; 93 | 94 | NvFBCFrameGrabInfo grab_info = {0}; 95 | NvFBC_H264HWEncoder_FrameInfo frame_info = {0}; 96 | NVFBC_H264_GRAB_FRAME_PARAMS fbch264GrabFrameParams = {0}; 97 | NVFBCRESULT res; 98 | 99 | if (!nvfbc.load()) { 100 | cerr << "Cannot load NvFBC library" << endl; 101 | return EXIT_FAILURE; 102 | } 103 | 104 | // Create the encoder instance 105 | unique_ptr encoder { 106 | static_cast(nvfbc.create(NVFBC_TO_H264_HW_ENCODER, &max_width, &max_height)), 107 | bind(&NvFBCToH264HWEncoder::NvFBCH264Release, placeholders::_1) }; 108 | 109 | if (!encoder) { 110 | cerr << "Cannot create the H.264 encoder\n"; 111 | return EXIT_FAILURE; 112 | } 113 | 114 | ofstream output_file { args.filename, ios::binary }; 115 | 116 | if (!output_file) { 117 | cerr << "Cannot open " << args.filename << " for writing\n"; 118 | return EXIT_FAILURE; 119 | } 120 | 121 | vector output_buffer(max_width * max_height); 122 | 123 | NvFBC_H264HWEncoder_Config encode_config = {0}; 124 | encode_config.dwVersion = NVFBC_H264HWENC_CONFIG_VER; 125 | encode_config.dwProfile = static_cast(args.profile); 126 | encode_config.dwFrameRateNum = FPS; 127 | encode_config.dwFrameRateDen = 1; // fps == fps / 1 128 | encode_config.bOutBandSPSPPS = FALSE; // Use inband SPSPPS, if you need to grab headers on demand use outband SPSPPS 129 | encode_config.bRecordTimeStamps = TRUE; // Do record timestamps 130 | encode_config.stereoFormat = NVFBC_H264_STEREO_NONE; 131 | 132 | if (args.bYUV444) 133 | encode_config.bEnableYUV444Encoding = TRUE; 134 | 135 | if (args.is_lossless) { 136 | encode_config.ePresetConfig = NVFBC_H264_PRESET_LOSSLESS_HP; 137 | encode_config.eRateControl = NVFBC_H264_ENC_PARAMS_RC_CONSTQP; 138 | } else { 139 | encode_config.dwAvgBitRate = args.bitrate; 140 | encode_config.dwPeakBitRate = args.bitrate * 2; // Set the peak bitrate twice of the average 141 | encode_config.dwGOPLength = 100; // The keyframe frequency 142 | encode_config.eRateControl = NVFBC_H264_ENC_PARAMS_RC_VBR; // Variable bitrate 143 | encode_config.ePresetConfig= NVFBC_H264_PRESET_LOW_LATENCY_HQ; 144 | encode_config.dwQP = 26; // Quantization parameter, between 0 and 51 145 | } 146 | 147 | NVFBC_H264_SETUP_PARAMS fbch264SetupParams = {0}; 148 | fbch264SetupParams.dwVersion = NVFBC_H264_SETUP_PARAMS_VER; 149 | fbch264SetupParams.bWithHWCursor = TRUE; 150 | fbch264SetupParams.pEncodeConfig = &encode_config; 151 | 152 | res = encoder->NvFBCH264SetUp(&fbch264SetupParams); 153 | if (res != NVFBC_SUCCESS) { 154 | cerr << "Cannot setup H264 encoder\n"; 155 | return EXIT_FAILURE; 156 | } 157 | 158 | for (unsigned i = 0; i < args.frame_cnt; ++i) { 159 | memset(&grab_info, 0, sizeof(grab_info)); 160 | memset(&frame_info, 0, sizeof(frame_info)); 161 | memset(&fbch264GrabFrameParams, 0, sizeof(fbch264GrabFrameParams)); 162 | fbch264GrabFrameParams.dwVersion = NVFBC_H264_GRAB_FRAME_PARAMS_VER; 163 | fbch264GrabFrameParams.dwFlags = NVFBC_TOH264_NOWAIT; 164 | fbch264GrabFrameParams.pNvFBCFrameGrabInfo = &grab_info; 165 | fbch264GrabFrameParams.pFrameInfo = &frame_info; 166 | fbch264GrabFrameParams.pBitStreamBuffer = output_buffer.data(); 167 | 168 | res = encoder->NvFBCH264GrabFrame(&fbch264GrabFrameParams); // blocks until a new frame available 169 | if (res == NVFBC_SUCCESS) { 170 | if (frame_info.dwByteSize == 0) { 171 | cerr << "Got zero-sized frame\n"; 172 | return EXIT_FAILURE; 173 | } 174 | 175 | output_file.write(reinterpret_cast(output_buffer.data()), frame_info.dwByteSize); 176 | 177 | cerr << "Wrote frame " << i << " to " << args.filename << endl; 178 | } else { 179 | if (res == NVFBC_ERROR_INVALIDATED_SESSION) { 180 | // Invalidated session: need to re-create the encoder... 181 | encoder.reset(static_cast(nvfbc.create(NVFBC_TO_H264_HW_ENCODER, &max_width, &max_height))); 182 | res = encoder->NvFBCH264SetUp(&fbch264SetupParams); 183 | // ...and then try again 184 | if (res == NVFBC_SUCCESS) { 185 | output_buffer.resize(max_width * max_height); 186 | res = encoder->NvFBCH264GrabFrame(&fbch264GrabFrameParams); 187 | } 188 | } 189 | if (res != NVFBC_SUCCESS) { 190 | cerr << "Cannot grab the frame\n"; 191 | return EXIT_FAILURE; 192 | } 193 | } 194 | } 195 | return EXIT_SUCCESS; 196 | } -------------------------------------------------------------------------------- /Util/NvIFRLibrary.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #define NVIFR_LIBRARY_NAME "NvIFR.dll" 8 | #define NVIFR64_LIBRARY_NAME "NvIFR64.dll" 9 | 10 | typedef BOOL (WINAPI *pfnIsWow64Process) (HANDLE, PBOOL); 11 | 12 | class NvIFRLibrary 13 | { 14 | protected: 15 | HMODULE m_handle; 16 | NvIFRLibrary(const NvIFRLibrary &); 17 | NvIFRLibrary &operator=(const NvIFRLibrary &); 18 | pfnIsWow64Process fnIsWow64Process; 19 | 20 | NvIFR_CreateFunctionExType NvIFR_CreateEx_fn; 21 | #if DIRECT3D_VERSION == 0x0900 22 | NvIFR_CreateSharedSurfaceEXTFunctionType NvIFR_CreateSharedSurfaceEXT_fn; 23 | NvIFR_DestroySharedSurfaceEXTFunctionType NvIFR_DestroySharedSurfaceEXT_fn; 24 | NvIFR_CopyToSharedSurfaceEXTFunctionType NvIFR_CopyToSharedSurfaceEXT_fn; 25 | NvIFR_CopyFromSharedSurfaceEXTFunctionType NvIFR_CopyFromSharedSurfaceEXT_fn; 26 | #endif 27 | 28 | BOOL IsWow64() 29 | { 30 | BOOL bIsWow64 = FALSE; 31 | 32 | fnIsWow64Process = (pfnIsWow64Process) GetProcAddress( 33 | GetModuleHandle(TEXT("kernel32.dll")),"IsWow64Process"); 34 | 35 | if (NULL != fnIsWow64Process) 36 | { 37 | if (!fnIsWow64Process(GetCurrentProcess(),&bIsWow64)) 38 | { 39 | bIsWow64 = false; 40 | } 41 | } 42 | return bIsWow64; 43 | } 44 | 45 | std::string getNvIFRDefaultPath() 46 | { 47 | std::string defaultPath; 48 | 49 | size_t pathSize; 50 | char *libPath; 51 | 52 | if(0 != _dupenv_s(&libPath, &pathSize, "SystemRoot")) 53 | { 54 | fprintf(stderr, "Unable to get the SystemRoot environment variable\n"); 55 | return defaultPath; 56 | } 57 | 58 | if(0 == pathSize) 59 | { 60 | fprintf(stderr, "The SystemRoot environment variable is not set\n"); 61 | return defaultPath; 62 | } 63 | #ifdef _WIN64 64 | defaultPath = std::string(libPath) + "\\System32\\" + NVIFR64_LIBRARY_NAME; 65 | #else 66 | if (IsWow64()) 67 | { 68 | defaultPath = std::string(libPath) + "\\Syswow64\\" + NVIFR_LIBRARY_NAME; 69 | } 70 | else 71 | { 72 | defaultPath = std::string(libPath) + "\\System32\\" + NVIFR_LIBRARY_NAME; 73 | } 74 | #endif 75 | return defaultPath; 76 | } 77 | 78 | public: 79 | NvIFRLibrary() 80 | : m_handle(NULL) 81 | , fnIsWow64Process(NULL) 82 | , NvIFR_CreateEx_fn(NULL) 83 | #if DIRECT3D_VERSION == 0x0900 84 | , NvIFR_CreateSharedSurfaceEXT_fn(NULL) 85 | , NvIFR_DestroySharedSurfaceEXT_fn(NULL) 86 | , NvIFR_CopyToSharedSurfaceEXT_fn(NULL) 87 | , NvIFR_CopyFromSharedSurfaceEXT_fn(NULL) 88 | #endif 89 | { 90 | } 91 | 92 | ~NvIFRLibrary() 93 | { 94 | //if(NULL != m_handle) 95 | // FreeLibrary(m_handle); 96 | } 97 | 98 | HMODULE load() 99 | { 100 | if (NULL != m_handle) 101 | { 102 | return m_handle; 103 | } 104 | m_handle = LoadLibrary(getNvIFRDefaultPath().c_str()); 105 | if (!m_handle) 106 | { 107 | fprintf(stderr, "Unable to load NvIFR.\n"); 108 | } 109 | else 110 | { 111 | NvIFR_CreateEx_fn = (NvIFR_CreateFunctionExType) GetProcAddress(m_handle, "NvIFR_CreateEx"); 112 | #if DIRECT3D_VERSION == 0x0900 113 | NvIFR_CreateSharedSurfaceEXT_fn = (NvIFR_CreateSharedSurfaceEXTFunctionType) GetProcAddress( m_handle, "NvIFR_CreateSharedSurfaceEXT"); 114 | NvIFR_DestroySharedSurfaceEXT_fn = (NvIFR_DestroySharedSurfaceEXTFunctionType) GetProcAddress( m_handle, "NvIFR_DestroySharedSurfaceEXT"); 115 | NvIFR_CopyToSharedSurfaceEXT_fn = (NvIFR_CopyToSharedSurfaceEXTFunctionType) GetProcAddress( m_handle, "NvIFR_CopyToSharedSurfaceEXT"); 116 | NvIFR_CopyFromSharedSurfaceEXT_fn = (NvIFR_CopyFromSharedSurfaceEXTFunctionType) GetProcAddress( m_handle, "NvIFR_CopyFromSharedSurfaceEXT"); 117 | #endif 118 | 119 | #if DIRECT3D_VERSION == 0x0900 120 | if (NvIFR_CreateEx_fn == NULL || 121 | NvIFR_CreateSharedSurfaceEXT_fn == NULL || 122 | NvIFR_DestroySharedSurfaceEXT_fn == NULL || 123 | NvIFR_CopyToSharedSurfaceEXT_fn == NULL || 124 | NvIFR_CopyFromSharedSurfaceEXT_fn == NULL) 125 | #else 126 | if (NvIFR_CreateEx_fn == NULL) 127 | #endif 128 | { 129 | fprintf(stderr, "Unable to load NvIFR Entrypoints.\n"); 130 | FreeLibrary(m_handle); 131 | m_handle = NULL; 132 | } 133 | } 134 | return m_handle; 135 | } 136 | 137 | void *create(void *pDev, NvU32 dwInterfaceType) 138 | { 139 | //! Get the proc address for the NvIFR_Create function 140 | //! Create the NvIFR object 141 | NVIFR_CREATE_PARAMS params = {0}; 142 | if (NvIFR_CreateEx_fn) 143 | { 144 | params.dwVersion = NVIFR_CREATE_PARAMS_VER; 145 | params.dwInterfaceType = dwInterfaceType; 146 | params.pDevice = pDev; 147 | NVIFRRESULT res = NvIFR_CreateEx_fn(¶ms); 148 | if (res != NVIFR_SUCCESS) 149 | { 150 | fprintf(stderr, "NvIFR_CreateEx failed with error %d.\n", res); 151 | } 152 | } 153 | else 154 | { 155 | fprintf(stderr, "Invalid call. NvIFR Library not initialized.\n"); 156 | } 157 | return params.pNvIFR; 158 | } 159 | 160 | NVIFRRESULT createEx(NVIFR_CREATE_PARAMS *pParams) 161 | { 162 | //! Get the proc address for the NvIFR_Create function 163 | //! Create the NvIFRToSys object 164 | NVIFRRESULT res = NVIFR_ERROR_INVALID_CALL; 165 | if (NvIFR_CreateEx_fn) 166 | { 167 | res = NvIFR_CreateEx_fn(pParams); 168 | } 169 | else 170 | { 171 | fprintf(stderr, "Invalid call. NvIFR Library not initialized.\n"); 172 | } 173 | return res; 174 | } 175 | #if DIRECT3D_VERSION == 0x0900 176 | BOOL CreateSharedSurface (IDirect3DDevice9 * pDevice, NvU32 dwWidth, NvU32 dwHeight, IFRSharedSurfaceHandle * phIFRSharedSurface) 177 | { 178 | if (NvIFR_CreateSharedSurfaceEXT_fn) 179 | { 180 | return NvIFR_CreateSharedSurfaceEXT_fn(pDevice, dwWidth, dwHeight, phIFRSharedSurface); 181 | } 182 | return false; 183 | } 184 | 185 | BOOL DestroySharedSurface (IDirect3DDevice9 * pDevice, IFRSharedSurfaceHandle hIFRSharedSurface) 186 | { 187 | if (NvIFR_DestroySharedSurfaceEXT_fn) 188 | { 189 | return NvIFR_DestroySharedSurfaceEXT_fn(pDevice, hIFRSharedSurface); 190 | } 191 | return false; 192 | } 193 | 194 | BOOL CopyToSharedSurface (IDirect3DDevice9 * pDevice, IFRSharedSurfaceHandle hIFRSharedSurface, IDirect3DSurface9 * pSurface) 195 | { 196 | if (NvIFR_CopyToSharedSurfaceEXT_fn) 197 | { 198 | return NvIFR_CopyToSharedSurfaceEXT_fn(pDevice, hIFRSharedSurface, pSurface); 199 | } 200 | return false; 201 | } 202 | 203 | BOOL CopyFromSharedSurface (IDirect3DDevice9 * pDevice, IFRSharedSurfaceHandle hIFRSharedSurface, IDirect3DSurface9 * pSurface) 204 | { 205 | if (NvIFR_CopyFromSharedSurfaceEXT_fn) 206 | { 207 | return NvIFR_CopyFromSharedSurfaceEXT_fn(pDevice, hIFRSharedSurface, pSurface); 208 | } 209 | return false; 210 | } 211 | #endif 212 | }; 213 | -------------------------------------------------------------------------------- /Util/Util.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {1204D7DC-7E0B-4710-87D7-5BBC67FAAC63} 23 | Util 24 | Win32Proj 25 | 26 | 27 | 28 | StaticLibrary 29 | MultiByte 30 | true 31 | v140 32 | 33 | 34 | StaticLibrary 35 | MultiByte 36 | v140 37 | 38 | 39 | StaticLibrary 40 | MultiByte 41 | true 42 | v140 43 | 44 | 45 | StaticLibrary 46 | MultiByte 47 | v140 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | <_ProjectFileVersion>10.0.40219.1 67 | $(ProjectDir)\..\$(Configuration)\$(Platform)\ 68 | $(Configuration)\$(Platform)\ 69 | $(ProjectDir)\..\$(Configuration)\$(Platform)\ 70 | $(Configuration)\$(Platform)\ 71 | $(ProjectDir)\..\$(Configuration)\$(Platform)\ 72 | $(Configuration)\$(Platform)\ 73 | $(ProjectDir)\..\$(Configuration)\$(Platform)\ 74 | $(Configuration)\$(Platform)\ 75 | 76 | 77 | 78 | Disabled 79 | WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 80 | true 81 | EnableFastChecks 82 | MultiThreadedDebugDLL 83 | 84 | 85 | Level3 86 | EditAndContinue 87 | 88 | 89 | 90 | 91 | X64 92 | 93 | 94 | Disabled 95 | WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 96 | true 97 | EnableFastChecks 98 | MultiThreadedDebugDLL 99 | 100 | 101 | Level3 102 | ProgramDatabase 103 | 104 | 105 | 106 | 107 | WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 108 | MultiThreadedDLL 109 | 110 | 111 | Level3 112 | ProgramDatabase 113 | 114 | 115 | 116 | 117 | X64 118 | 119 | 120 | WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 121 | MultiThreadedDLL 122 | 123 | 124 | Level3 125 | ProgramDatabase 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /Util/helper_string.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 1993-2014 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | // These are helper functions for the SDK samples (string parsing, timers, etc) 13 | #ifndef STRING_HELPER_H 14 | #define STRING_HELPER_H 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) 22 | #ifndef _CRT_SECURE_NO_DEPRECATE 23 | #define _CRT_SECURE_NO_DEPRECATE 24 | #endif 25 | #ifndef STRCASECMP 26 | #define STRCASECMP _stricmp 27 | #endif 28 | #ifndef STRNCASECMP 29 | #define STRNCASECMP _strnicmp 30 | #endif 31 | #ifndef STRCPY 32 | #define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath) 33 | #endif 34 | 35 | #ifndef FOPEN 36 | #define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode) 37 | #endif 38 | #ifndef FOPEN_FAIL 39 | #define FOPEN_FAIL(result) (result != 0) 40 | #endif 41 | #ifndef SSCANF 42 | #define SSCANF sscanf_s 43 | #endif 44 | #ifndef SPRINTF 45 | #define SPRINTF sprintf_s 46 | #endif 47 | #else // Linux Includes 48 | #include 49 | #include 50 | 51 | #ifndef STRCASECMP 52 | #define STRCASECMP strcasecmp 53 | #endif 54 | #ifndef STRNCASECMP 55 | #define STRNCASECMP strncasecmp 56 | #endif 57 | #ifndef STRCPY 58 | #define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath) 59 | #endif 60 | 61 | #ifndef FOPEN 62 | #define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode)) 63 | #endif 64 | #ifndef FOPEN_FAIL 65 | #define FOPEN_FAIL(result) (result == NULL) 66 | #endif 67 | #ifndef SSCANF 68 | #define SSCANF sscanf 69 | #endif 70 | #ifndef SPRINTF 71 | #define SPRINTF sprintf 72 | #endif 73 | #endif 74 | 75 | #ifndef EXIT_WAIVED 76 | #define EXIT_WAIVED 2 77 | #endif 78 | 79 | // CUDA Utility Helper Functions 80 | inline int stringRemoveDelimiter(char delimiter, const char *string) 81 | { 82 | int string_start = 0; 83 | 84 | while (string[string_start] == delimiter) 85 | { 86 | string_start++; 87 | } 88 | 89 | if (string_start >= (int)strlen(string)-1) 90 | { 91 | return 0; 92 | } 93 | 94 | return string_start; 95 | } 96 | 97 | inline int getFileExtension(char *filename, char **extension) 98 | { 99 | int string_length = (int)strlen(filename); 100 | 101 | while (filename[string_length--] != '.') 102 | { 103 | if (string_length == 0) 104 | break; 105 | } 106 | 107 | if (string_length > 0) string_length += 2; 108 | 109 | if (string_length == 0) 110 | *extension = NULL; 111 | else 112 | *extension = &filename[string_length]; 113 | 114 | return string_length; 115 | } 116 | 117 | 118 | inline bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref) 119 | { 120 | bool bFound = false; 121 | 122 | if (argc >= 1) 123 | { 124 | for (int i=1; i < argc; i++) 125 | { 126 | int string_start = stringRemoveDelimiter('-', argv[i]); 127 | const char *string_argv = &argv[i][string_start]; 128 | 129 | const char *equal_pos = strchr(string_argv, '='); 130 | int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv); 131 | 132 | int length = (int)strlen(string_ref); 133 | 134 | if (length == argv_length && !STRNCASECMP(string_argv, string_ref, length)) 135 | { 136 | bFound = true; 137 | continue; 138 | } 139 | } 140 | } 141 | 142 | return bFound; 143 | } 144 | 145 | // This function wraps the CUDA Driver API into a template function 146 | template 147 | inline bool getCmdLineArgumentValue(const int argc, const char **argv, const char *string_ref, T *value) 148 | { 149 | bool bFound = false; 150 | 151 | if (argc >= 1) 152 | { 153 | for (int i=1; i < argc; i++) 154 | { 155 | int string_start = stringRemoveDelimiter('-', argv[i]); 156 | const char *string_argv = &argv[i][string_start]; 157 | int length = (int)strlen(string_ref); 158 | 159 | if (!STRNCASECMP(string_argv, string_ref, length)) 160 | { 161 | if (length+1 <= (int)strlen(string_argv)) 162 | { 163 | int auto_inc = (string_argv[length] == '=') ? 1 : 0; 164 | *value = (T)atoi(&string_argv[length + auto_inc]); 165 | } 166 | 167 | bFound = true; 168 | i=argc; 169 | } 170 | } 171 | } 172 | 173 | return bFound; 174 | } 175 | 176 | inline int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref) 177 | { 178 | bool bFound = false; 179 | int value = -1; 180 | 181 | if (argc >= 1) 182 | { 183 | for (int i=1; i < argc; i++) 184 | { 185 | int string_start = stringRemoveDelimiter('-', argv[i]); 186 | const char *string_argv = &argv[i][string_start]; 187 | int length = (int)strlen(string_ref); 188 | 189 | if (!STRNCASECMP(string_argv, string_ref, length)) 190 | { 191 | if (length+1 <= (int)strlen(string_argv)) 192 | { 193 | int auto_inc = (string_argv[length] == '=') ? 1 : 0; 194 | value = atoi(&string_argv[length + auto_inc]); 195 | } 196 | else 197 | { 198 | value = 0; 199 | } 200 | 201 | bFound = true; 202 | continue; 203 | } 204 | } 205 | } 206 | 207 | if (bFound) 208 | { 209 | return value; 210 | } 211 | else 212 | { 213 | return 0; 214 | } 215 | } 216 | 217 | inline float getCmdLineArgumentFloat(const int argc, const char **argv, const char *string_ref) 218 | { 219 | bool bFound = false; 220 | float value = -1; 221 | 222 | if (argc >= 1) 223 | { 224 | for (int i=1; i < argc; i++) 225 | { 226 | int string_start = stringRemoveDelimiter('-', argv[i]); 227 | const char *string_argv = &argv[i][string_start]; 228 | int length = (int)strlen(string_ref); 229 | 230 | if (!STRNCASECMP(string_argv, string_ref, length)) 231 | { 232 | if (length+1 <= (int)strlen(string_argv)) 233 | { 234 | int auto_inc = (string_argv[length] == '=') ? 1 : 0; 235 | value = (float)atof(&string_argv[length + auto_inc]); 236 | } 237 | else 238 | { 239 | value = 0.f; 240 | } 241 | 242 | bFound = true; 243 | continue; 244 | } 245 | } 246 | } 247 | 248 | if (bFound) 249 | { 250 | return value; 251 | } 252 | else 253 | { 254 | return 0; 255 | } 256 | } 257 | 258 | inline bool getCmdLineArgumentString(const int argc, const char **argv, 259 | const char *string_ref, char **string_retval) 260 | { 261 | bool bFound = false; 262 | 263 | if (argc >= 1) 264 | { 265 | for (int i=1; i < argc; i++) 266 | { 267 | int string_start = stringRemoveDelimiter('-', argv[i]); 268 | char *string_argv = (char *)&argv[i][string_start]; 269 | int length = (int)strlen(string_ref); 270 | 271 | if (!STRNCASECMP(string_argv, string_ref, length)) 272 | { 273 | *string_retval = &string_argv[length+1]; 274 | bFound = true; 275 | continue; 276 | } 277 | } 278 | } 279 | 280 | if (!bFound) 281 | { 282 | *string_retval = NULL; 283 | } 284 | 285 | return bFound; 286 | } 287 | 288 | #endif 289 | -------------------------------------------------------------------------------- /NvFBCH264/NvFBCH264.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 15 | 18 | 19 | 20 | 21 | 22 | 29 | 32 | 35 | 38 | 41 | 44 | 56 | 59 | 62 | 65 | 72 | 75 | 78 | 81 | 84 | 87 | 90 | 94 | 95 | 102 | 105 | 108 | 111 | 114 | 118 | 130 | 133 | 136 | 139 | 146 | 149 | 152 | 155 | 158 | 161 | 164 | 168 | 169 | 177 | 180 | 183 | 186 | 189 | 192 | 204 | 207 | 210 | 213 | 222 | 225 | 228 | 231 | 234 | 237 | 240 | 244 | 245 | 253 | 256 | 259 | 262 | 265 | 269 | 281 | 284 | 287 | 290 | 299 | 302 | 305 | 308 | 311 | 314 | 317 | 321 | 322 | 323 | 324 | 325 | 326 | 329 | 330 | 331 | 332 | 333 | 334 | -------------------------------------------------------------------------------- /Util/Util.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 15 | 18 | 19 | 20 | 21 | 22 | 29 | 32 | 35 | 38 | 41 | 44 | 56 | 59 | 62 | 65 | 68 | 71 | 74 | 77 | 80 | 83 | 84 | 91 | 94 | 97 | 100 | 103 | 107 | 119 | 122 | 125 | 128 | 131 | 134 | 137 | 140 | 143 | 146 | 147 | 155 | 158 | 161 | 164 | 167 | 170 | 179 | 182 | 185 | 188 | 191 | 194 | 197 | 200 | 203 | 206 | 207 | 215 | 218 | 221 | 224 | 227 | 231 | 240 | 243 | 246 | 249 | 252 | 255 | 258 | 261 | 264 | 267 | 268 | 269 | 270 | 271 | 272 | 277 | 280 | 281 | 284 | 285 | 286 | 291 | 294 | 295 | 298 | 299 | 302 | 303 | 306 | 307 | 310 | 311 | 314 | 315 | 318 | 319 | 322 | 323 | 326 | 327 | 330 | 331 | 334 | 335 | 338 | 339 | 342 | 343 | 346 | 347 | 348 | 353 | 354 | 357 | 358 | 359 | 360 | 361 | 362 | -------------------------------------------------------------------------------- /NvFBCH264/NvFBCH264.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {C0C74593-8F68-4202-995C-9B6A654EDD72} 23 | NvFBCH264 24 | Win32Proj 25 | 8.1 26 | 27 | 28 | 29 | Application 30 | MultiByte 31 | true 32 | v140 33 | 34 | 35 | Application 36 | MultiByte 37 | v140 38 | 39 | 40 | Application 41 | MultiByte 42 | true 43 | v140 44 | 45 | 46 | Application 47 | MultiByte 48 | v140 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | <_ProjectFileVersion>10.0.40219.1 68 | $(ProjectDir)\..\$(Configuration)\$(Platform)\ 69 | $(Configuration)\$(Platform)\ 70 | true 71 | $(ProjectDir)\..\$(Configuration)\$(Platform)\ 72 | $(Configuration)\$(Platform)\ 73 | true 74 | $(ProjectDir)\..\$(Configuration)\$(Platform)\ 75 | $(Configuration)\$(Platform)\ 76 | false 77 | $(ProjectDir)\..\$(Configuration)\$(Platform)\ 78 | $(Configuration)\$(Platform)\ 79 | false 80 | 81 | 82 | C:\Program Files\Boost\1.60.0;C:\Users\ignat\Desktop\grid-sdk-2.3.7-windows\inc;$(IncludePath) 83 | C:\Program Files\Boost\1.60.0\lib64-msvc-14.0;C:\Users\ignat\Desktop\grid-sdk-2.3.7-windows\lib;$(LibraryPath) 84 | 85 | 86 | 87 | Disabled 88 | ../Util;../../inc 89 | _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 90 | true 91 | EnableFastChecks 92 | MultiThreadedDebugDLL 93 | 94 | 95 | Level3 96 | EditAndContinue 97 | 98 | 99 | true 100 | Console 101 | MachineX86 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | X64 111 | 112 | 113 | Disabled 114 | ../Util;../../inc 115 | _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 116 | true 117 | EnableFastChecks 118 | MultiThreadedDebugDLL 119 | 120 | 121 | Level3 122 | ProgramDatabase 123 | 124 | 125 | true 126 | Console 127 | MachineX64 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | MaxSpeed 137 | true 138 | ../Util;../../inc 139 | _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 140 | MultiThreadedDLL 141 | true 142 | 143 | 144 | Level3 145 | ProgramDatabase 146 | 147 | 148 | true 149 | Console 150 | true 151 | true 152 | MachineX86 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | X64 162 | 163 | 164 | MaxSpeed 165 | true 166 | ../Util;../../inc 167 | _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 168 | MultiThreadedDLL 169 | true 170 | 171 | 172 | Level3 173 | ProgramDatabase 174 | 175 | 176 | true 177 | Console 178 | true 179 | true 180 | MachineX64 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | {1204d7dc-7e0b-4710-87d7-5bbc67faac63} 193 | false 194 | 195 | 196 | 197 | 198 | 199 | -------------------------------------------------------------------------------- /Util/drvapi_error_string.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2014 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #ifndef _DRVAPI_ERROR_STRING_H_ 13 | #define _DRVAPI_ERROR_STRING_H_ 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | #ifdef __cuda_cuda_h__ // check to see if CUDA_H is included above 20 | 21 | // Error Code string definitions here 22 | typedef struct 23 | { 24 | char const *error_string; 25 | int error_id; 26 | } s_CudaErrorStr; 27 | 28 | /** 29 | * Error codes 30 | */ 31 | static s_CudaErrorStr sCudaDrvErrorString[] = 32 | { 33 | /** 34 | * The API call returned with no errors. In the case of query calls, this 35 | * can also mean that the operation being queried is complete (see 36 | * ::cuEventQuery() and ::cuStreamQuery()). 37 | */ 38 | { "CUDA_SUCCESS", 0 }, 39 | 40 | /** 41 | * This indicates that one or more of the parameters passed to the API call 42 | * is not within an acceptable range of values. 43 | */ 44 | { "CUDA_ERROR_INVALID_VALUE", 1 }, 45 | 46 | /** 47 | * The API call failed because it was unable to allocate enough memory to 48 | * perform the requested operation. 49 | */ 50 | { "CUDA_ERROR_OUT_OF_MEMORY", 2 }, 51 | 52 | /** 53 | * This indicates that the CUDA driver has not been initialized with 54 | * ::cuInit() or that initialization has failed. 55 | */ 56 | { "CUDA_ERROR_NOT_INITIALIZED", 3 }, 57 | 58 | /** 59 | * This indicates that the CUDA driver is in the process of shutting down. 60 | */ 61 | { "CUDA_ERROR_DEINITIALIZED", 4 }, 62 | 63 | /** 64 | * This indicates profiling APIs are called while application is running 65 | * in visual profiler mode. 66 | */ 67 | { "CUDA_ERROR_PROFILER_DISABLED", 5 }, 68 | /** 69 | * This indicates profiling has not been initialized for this context. 70 | * Call cuProfilerInitialize() to resolve this. 71 | */ 72 | { "CUDA_ERROR_PROFILER_NOT_INITIALIZED", 6 }, 73 | /** 74 | * This indicates profiler has already been started and probably 75 | * cuProfilerStart() is incorrectly called. 76 | */ 77 | { "CUDA_ERROR_PROFILER_ALREADY_STARTED", 7 }, 78 | /** 79 | * This indicates profiler has already been stopped and probably 80 | * cuProfilerStop() is incorrectly called. 81 | */ 82 | { "CUDA_ERROR_PROFILER_ALREADY_STOPPED", 8 }, 83 | /** 84 | * This indicates that no CUDA-capable devices were detected by the installed 85 | * CUDA driver. 86 | */ 87 | { "CUDA_ERROR_NO_DEVICE (no CUDA-capable devices were detected)", 100 }, 88 | 89 | /** 90 | * This indicates that the device ordinal supplied by the user does not 91 | * correspond to a valid CUDA device. 92 | */ 93 | { "CUDA_ERROR_INVALID_DEVICE (device specified is not a valid CUDA device)", 101 }, 94 | 95 | 96 | /** 97 | * This indicates that the device kernel image is invalid. This can also 98 | * indicate an invalid CUDA module. 99 | */ 100 | { "CUDA_ERROR_INVALID_IMAGE", 200 }, 101 | 102 | /** 103 | * This most frequently indicates that there is no context bound to the 104 | * current thread. This can also be returned if the context passed to an 105 | * API call is not a valid handle (such as a context that has had 106 | * ::cuCtxDestroy() invoked on it). This can also be returned if a user 107 | * mixes different API versions (i.e. 3010 context with 3020 API calls). 108 | * See ::cuCtxGetApiVersion() for more details. 109 | */ 110 | { "CUDA_ERROR_INVALID_CONTEXT", 201 }, 111 | 112 | /** 113 | * This indicated that the context being supplied as a parameter to the 114 | * API call was already the active context. 115 | * \deprecated 116 | * This error return is deprecated as of CUDA 3.2. It is no longer an 117 | * error to attempt to push the active context via ::cuCtxPushCurrent(). 118 | */ 119 | { "CUDA_ERROR_CONTEXT_ALREADY_CURRENT", 202 }, 120 | 121 | /** 122 | * This indicates that a map or register operation has failed. 123 | */ 124 | { "CUDA_ERROR_MAP_FAILED", 205 }, 125 | 126 | /** 127 | * This indicates that an unmap or unregister operation has failed. 128 | */ 129 | { "CUDA_ERROR_UNMAP_FAILED", 206 }, 130 | 131 | /** 132 | * This indicates that the specified array is currently mapped and thus 133 | * cannot be destroyed. 134 | */ 135 | { "CUDA_ERROR_ARRAY_IS_MAPPED", 207 }, 136 | 137 | /** 138 | * This indicates that the resource is already mapped. 139 | */ 140 | { "CUDA_ERROR_ALREADY_MAPPED", 208 }, 141 | 142 | /** 143 | * This indicates that there is no kernel image available that is suitable 144 | * for the device. This can occur when a user specifies code generation 145 | * options for a particular CUDA source file that do not include the 146 | * corresponding device configuration. 147 | */ 148 | { "CUDA_ERROR_NO_BINARY_FOR_GPU", 209 }, 149 | 150 | /** 151 | * This indicates that a resource has already been acquired. 152 | */ 153 | { "CUDA_ERROR_ALREADY_ACQUIRED", 210 }, 154 | 155 | /** 156 | * This indicates that a resource is not mapped. 157 | */ 158 | { "CUDA_ERROR_NOT_MAPPED", 211 }, 159 | 160 | /** 161 | * This indicates that a mapped resource is not available for access as an 162 | * array. 163 | */ 164 | { "CUDA_ERROR_NOT_MAPPED_AS_ARRAY", 212 }, 165 | 166 | /** 167 | * This indicates that a mapped resource is not available for access as a 168 | * pointer. 169 | */ 170 | { "CUDA_ERROR_NOT_MAPPED_AS_POINTER", 213 }, 171 | 172 | /** 173 | * This indicates that an uncorrectable ECC error was detected during 174 | * execution. 175 | */ 176 | { "CUDA_ERROR_ECC_UNCORRECTABLE", 214 }, 177 | 178 | /** 179 | * This indicates that the ::CUlimit passed to the API call is not 180 | * supported by the active device. 181 | */ 182 | { "CUDA_ERROR_UNSUPPORTED_LIMIT", 215 }, 183 | 184 | /** 185 | * This indicates that the ::CUcontext passed to the API call can 186 | * only be bound to a single CPU thread at a time but is already 187 | * bound to a CPU thread. 188 | */ 189 | { "CUDA_ERROR_CONTEXT_ALREADY_IN_USE", 216 }, 190 | 191 | /** 192 | * This indicates that the device kernel source is invalid. 193 | */ 194 | { "CUDA_ERROR_INVALID_SOURCE", 300 }, 195 | 196 | /** 197 | * This indicates that the file specified was not found. 198 | */ 199 | { "CUDA_ERROR_FILE_NOT_FOUND", 301 }, 200 | 201 | /** 202 | * This indicates that a link to a shared object failed to resolve. 203 | */ 204 | { "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND", 302 }, 205 | 206 | /** 207 | * This indicates that initialization of a shared object failed. 208 | */ 209 | { "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED", 303 }, 210 | 211 | /** 212 | * This indicates that an OS call failed. 213 | */ 214 | { "CUDA_ERROR_OPERATING_SYSTEM", 304 }, 215 | 216 | 217 | /** 218 | * This indicates that a resource handle passed to the API call was not 219 | * valid. Resource handles are opaque types like ::CUstream and ::CUevent. 220 | */ 221 | { "CUDA_ERROR_INVALID_HANDLE", 400 }, 222 | 223 | 224 | /** 225 | * This indicates that a named symbol was not found. Examples of symbols 226 | * are global/constant variable names, texture names }, and surface names. 227 | */ 228 | { "CUDA_ERROR_NOT_FOUND", 500 }, 229 | 230 | 231 | /** 232 | * This indicates that asynchronous operations issued previously have not 233 | * completed yet. This result is not actually an error, but must be indicated 234 | * differently than ::CUDA_SUCCESS (which indicates completion). Calls that 235 | * may return this value include ::cuEventQuery() and ::cuStreamQuery(). 236 | */ 237 | { "CUDA_ERROR_NOT_READY", 600 }, 238 | 239 | 240 | /** 241 | * An exception occurred on the device while executing a kernel. Common 242 | * causes include dereferencing an invalid device pointer and accessing 243 | * out of bounds shared memory. The context cannot be used }, so it must 244 | * be destroyed (and a new one should be created). All existing device 245 | * memory allocations from this context are invalid and must be 246 | * reconstructed if the program is to continue using CUDA. 247 | */ 248 | { "CUDA_ERROR_LAUNCH_FAILED", 700 }, 249 | 250 | /** 251 | * This indicates that a launch did not occur because it did not have 252 | * appropriate resources. This error usually indicates that the user has 253 | * attempted to pass too many arguments to the device kernel, or the 254 | * kernel launch specifies too many threads for the kernel's register 255 | * count. Passing arguments of the wrong size (i.e. a 64-bit pointer 256 | * when a 32-bit int is expected) is equivalent to passing too many 257 | * arguments and can also result in this error. 258 | */ 259 | { "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES", 701 }, 260 | 261 | /** 262 | * This indicates that the device kernel took too long to execute. This can 263 | * only occur if timeouts are enabled - see the device attribute 264 | * ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. The 265 | * context cannot be used (and must be destroyed similar to 266 | * ::CUDA_ERROR_LAUNCH_FAILED). All existing device memory allocations from 267 | * this context are invalid and must be reconstructed if the program is to 268 | * continue using CUDA. 269 | */ 270 | { "CUDA_ERROR_LAUNCH_TIMEOUT", 702 }, 271 | 272 | /** 273 | * This error indicates a kernel launch that uses an incompatible texturing 274 | * mode. 275 | */ 276 | { "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING", 703 }, 277 | 278 | /** 279 | * This error indicates that a call to ::cuCtxEnablePeerAccess() is 280 | * trying to re-enable peer access to a context which has already 281 | * had peer access to it enabled. 282 | */ 283 | { "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED", 704 }, 284 | 285 | /** 286 | * This error indicates that ::cuCtxDisablePeerAccess() is 287 | * trying to disable peer access which has not been enabled yet 288 | * via ::cuCtxEnablePeerAccess(). 289 | */ 290 | { "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED", 705 }, 291 | 292 | /** 293 | * This error indicates that the primary context for the specified device 294 | * has already been initialized. 295 | */ 296 | { "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE", 708 }, 297 | 298 | /** 299 | * This error indicates that the context current to the calling thread 300 | * has been destroyed using ::cuCtxDestroy }, or is a primary context which 301 | * has not yet been initialized. 302 | */ 303 | { "CUDA_ERROR_CONTEXT_IS_DESTROYED", 709 }, 304 | 305 | /** 306 | * A device-side assert triggered during kernel execution. The context 307 | * cannot be used anymore, and must be destroyed. All existing device 308 | * memory allocations from this context are invalid and must be 309 | * reconstructed if the program is to continue using CUDA. 310 | */ 311 | { "CUDA_ERROR_ASSERT", 710 }, 312 | 313 | /** 314 | * This indicates that an unknown internal error has occurred. 315 | */ 316 | { "CUDA_ERROR_UNKNOWN", 999 }, 317 | { NULL, -1 } 318 | }; 319 | 320 | // This is just a linear search through the array, since the error_id's are not 321 | // always ocurring consecutively 322 | inline const char *getCudaDrvErrorString(CUresult error_id) 323 | { 324 | int index = 0; 325 | 326 | while (sCudaDrvErrorString[index].error_id != error_id && 327 | sCudaDrvErrorString[index].error_id != -1) 328 | { 329 | index++; 330 | } 331 | 332 | if (sCudaDrvErrorString[index].error_id == error_id) 333 | return (const char *)sCudaDrvErrorString[index].error_string; 334 | else 335 | return (const char *)"CUDA_ERROR not found!"; 336 | } 337 | 338 | #endif // __cuda_cuda_h__ 339 | 340 | 341 | #endif 342 | -------------------------------------------------------------------------------- /Util/Bitmap.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | * \copyright 3 | * Copyright 1993-2014 NVIDIA Corporation. All rights reserved. 4 | * 5 | * NOTICE TO LICENSEE: 6 | * 7 | * This source code and/or documentation ("Licensed Deliverables") are 8 | * subject to NVIDIA intellectual property rights under U.S. and 9 | * international Copyright laws. 10 | * 11 | * These Licensed Deliverables contained herein is PROPRIETARY and 12 | * CONFIDENTIAL to NVIDIA and is being provided under the terms and 13 | * conditions of a form of NVIDIA software license agreement by and 14 | * between NVIDIA and Licensee ("License Agreement") or electronically 15 | * accepted by Licensee. Notwithstanding any terms or conditions to 16 | * the contrary in the License Agreement, reproduction or disclosure 17 | * of the Licensed Deliverables to any third party without the express 18 | * written consent of NVIDIA is prohibited. 19 | * 20 | * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 21 | * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 22 | * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 23 | * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 24 | * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 25 | * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 26 | * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 27 | * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 28 | * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 29 | * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 30 | * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 31 | * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 32 | * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 33 | * OF THESE LICENSED DELIVERABLES. 34 | * 35 | * U.S. Government End Users. These Licensed Deliverables are a 36 | * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 37 | * 1995), consisting of "commercial computer software" and "commercial 38 | * computer software documentation" as such terms are used in 48 39 | * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 40 | * only as a commercial end item. Consistent with 48 C.F.R.12.212 and 41 | * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 42 | * U.S. Government End Users acquire the Licensed Deliverables with 43 | * only those rights set forth herein. 44 | * 45 | * Any use of the Licensed Deliverables in individual and commercial 46 | * software must include, in the user documentation and internal 47 | * comments to the code, the above Disclaimer and U.S. Government End 48 | * Users Notice. 49 | */ 50 | 51 | 52 | #pragma warning(disable : 4995 4996) 53 | 54 | #include "Bitmap.h" 55 | 56 | #include 57 | #include 58 | 59 | // Macros to help with bitmap padding 60 | #define BITMAP_SIZE(width, height) ((((width) + 3) & ~3) * (height)) 61 | #define BITMAP_INDEX(x, y, width) (((y) * (((width) + 3) & ~3)) + (x)) 62 | 63 | // Describes the structure of a 24-bpp Bitmap pixel 64 | struct BitmapPixel 65 | { 66 | unsigned char blue; 67 | unsigned char green; 68 | unsigned char red; 69 | }; 70 | 71 | // Describes the structure of a RGB pixel 72 | struct RGBPixel 73 | { 74 | unsigned char red; 75 | unsigned char green; 76 | unsigned char blue; 77 | }; 78 | 79 | // Describes the structure of a ARGB pixel 80 | struct ARGBPixel 81 | { 82 | unsigned char blue; 83 | unsigned char green; 84 | unsigned char red; 85 | unsigned char alpha; 86 | }; 87 | 88 | bool SaveBitmap(const char *fileName, BYTE *data, int width, int height) 89 | { 90 | BITMAPFILEHEADER fileHeader; 91 | BITMAPINFOHEADER infoHeader; 92 | FILE *outputFile; 93 | bool bRet = false; 94 | 95 | if (data) 96 | { 97 | if(outputFile = fopen(fileName, "wb")) 98 | { 99 | width = (width + 3) & (~3); 100 | int size = width * height * 3; // 24 bits per pixel 101 | 102 | fileHeader.bfType = 0x4D42; 103 | fileHeader.bfSize = sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER) + size; 104 | fileHeader.bfOffBits = sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER); 105 | 106 | infoHeader.biSize = sizeof(BITMAPINFOHEADER); 107 | infoHeader.biWidth = width; 108 | infoHeader.biHeight = height; 109 | infoHeader.biPlanes = 1; 110 | infoHeader.biBitCount = 24; 111 | infoHeader.biCompression = BI_RGB; 112 | infoHeader.biSizeImage = BITMAP_SIZE(width, height); 113 | infoHeader.biXPelsPerMeter = 0; 114 | infoHeader.biYPelsPerMeter = 0; 115 | infoHeader.biClrUsed = 0; 116 | infoHeader.biClrImportant = 0; 117 | 118 | fwrite((unsigned char *)&fileHeader, 1, sizeof(BITMAPFILEHEADER), outputFile); 119 | fwrite((unsigned char *)&infoHeader, 1, sizeof(BITMAPINFOHEADER), outputFile); 120 | fwrite(data, 1, size, outputFile); 121 | 122 | bRet = true; 123 | fclose(outputFile); 124 | } 125 | } 126 | 127 | return bRet; 128 | } 129 | 130 | bool SaveRGB(const char *fileName, BYTE *data, int width, int height) 131 | { 132 | bool result = false; 133 | 134 | RGBPixel *input = (RGBPixel *)data; 135 | BitmapPixel *output = new BitmapPixel[BITMAP_SIZE(width, height)]; 136 | 137 | // Pad bytes need to be set to zero, it's easier to just set the entire chunk of memory 138 | memset(output, 0, BITMAP_SIZE(width, height) * sizeof(BitmapPixel)); 139 | 140 | for(int row = 0; row < height; ++row) 141 | { 142 | for(int col = 0; col < width; ++col) 143 | { 144 | // In a bitmap (0,0) is at the bottom left, in the frame buffer it is the top left. 145 | int outputIdx = BITMAP_INDEX(col, row, width); 146 | int inputIdx = ((height - row - 1) * width) + col; 147 | 148 | output[outputIdx].red = input[inputIdx].red; 149 | output[outputIdx].green = input[inputIdx].green; 150 | output[outputIdx].blue = input[inputIdx].blue; 151 | } 152 | } 153 | 154 | result = SaveBitmap(fileName, (BYTE *)output, width, height); 155 | 156 | delete [] output; 157 | 158 | return result; 159 | } 160 | 161 | bool SaveBGR(const char *fileName, BYTE *data, int width, int height) 162 | { 163 | bool result = false; 164 | 165 | if (!data) 166 | return false; 167 | RGBPixel *input = (RGBPixel *)data; 168 | BitmapPixel *output = new BitmapPixel[BITMAP_SIZE(width, height)]; 169 | 170 | // Pad bytes need to be set to zero, it's easier to just set the entire chunk of memory 171 | memset(output, 0, BITMAP_SIZE(width, height) * sizeof(BitmapPixel)); 172 | 173 | for(int row = 0; row < height; ++row) 174 | { 175 | for(int col = 0; col < width; ++col) 176 | { 177 | // In a bitmap (0,0) is at the bottom left, in the frame buffer it is the top left. 178 | int outputIdx = BITMAP_INDEX(col, row, width); 179 | int inputIdx = ((height - row - 1) * width) + col; 180 | 181 | output[outputIdx].red = input[inputIdx].blue; 182 | output[outputIdx].green = input[inputIdx].green; 183 | output[outputIdx].blue = input[inputIdx].red; 184 | } 185 | } 186 | 187 | result = SaveBitmap(fileName, (BYTE *)output, width, height); 188 | 189 | delete [] output; 190 | 191 | return result; 192 | } 193 | 194 | bool SaveRGBPlanar(const char *fileName, BYTE *data, int width, int height) 195 | { 196 | if (!data) 197 | return false; 198 | 199 | const char *nameExt[] = {"red", "green", "blue"}; 200 | BitmapPixel *output = new BitmapPixel[BITMAP_SIZE(width, height)]; 201 | memset(output, 0, BITMAP_SIZE(width, height) * sizeof(BitmapPixel)); 202 | 203 | for(int color = 0; color < 3; ++color) 204 | { 205 | for(int row = 0; row < height; ++row) 206 | { 207 | for(int col = 0; col < width; ++col) 208 | { 209 | int outputIdx = BITMAP_INDEX(col, row, width); 210 | int inputIdx = ((height - row - 1) * width) + col; 211 | 212 | output[outputIdx].blue = 0; 213 | output[outputIdx].green = 0; 214 | output[outputIdx].red = 0; 215 | 216 | switch(color) 217 | { 218 | case 0: 219 | output[outputIdx].red = data[inputIdx]; 220 | break; 221 | 222 | case 1: 223 | output[outputIdx].green = data[inputIdx + (width * height)]; 224 | break; 225 | 226 | case 2: 227 | output[outputIdx].blue = data[inputIdx + 2 * (width * height)]; 228 | break; 229 | 230 | default: 231 | break; 232 | } 233 | } 234 | } 235 | 236 | std::string outputFile = fileName; 237 | size_t find = outputFile.find_last_of("."); 238 | 239 | outputFile.insert(find, "-"); 240 | outputFile.insert(find+1, nameExt[color]); 241 | 242 | if(!SaveBitmap(outputFile.c_str(), (BYTE *)output, width, height)) 243 | { 244 | delete [] output; 245 | return false; 246 | } 247 | } 248 | 249 | delete [] output; 250 | 251 | return true; 252 | } 253 | 254 | bool SaveARGB(const char *fileName, BYTE *data, int width, int height) 255 | { 256 | bool result = false; 257 | if (!data) 258 | return result; 259 | 260 | ARGBPixel *input = (ARGBPixel *)data; 261 | BitmapPixel *output = new BitmapPixel[BITMAP_SIZE(width, height)]; 262 | memset(output, 0, BITMAP_SIZE(width, height) * sizeof(BitmapPixel)); 263 | 264 | for(int row = 0; row < height; ++row) 265 | { 266 | for(int col = 0; col < width; ++col) 267 | { 268 | int outputIdx = BITMAP_INDEX(col, row, width); 269 | int inputIdx = ((height - row - 1) * width) + col; 270 | 271 | output[outputIdx].red = input[inputIdx].red; 272 | output[outputIdx].green = input[inputIdx].green; 273 | output[outputIdx].blue = input[inputIdx].blue; 274 | } 275 | } 276 | 277 | result = SaveBitmap(fileName, (BYTE *)output, width, height); 278 | 279 | delete [] output; 280 | 281 | return result; 282 | } 283 | 284 | bool SaveYUV(const char *fileName, BYTE *data, int width, int height) 285 | { 286 | if (!data) 287 | return false; 288 | 289 | int hWidth = width >> 1; 290 | int hHeight = height >> 1; 291 | size_t find = -1; 292 | std::string outputFile; 293 | 294 | BitmapPixel *luma = new BitmapPixel[BITMAP_SIZE(width, height)]; 295 | BitmapPixel *chrom = new BitmapPixel[BITMAP_SIZE(width, height)]; 296 | 297 | memset(luma, 0, BITMAP_SIZE(width, height) * sizeof(BitmapPixel)); 298 | memset(chrom, 0, BITMAP_SIZE(hWidth, hHeight) * sizeof(BitmapPixel)); 299 | 300 | for(int row = 0; row < height; ++row) 301 | { 302 | for(int col = 0; col < width; ++col) 303 | { 304 | int outputIdx = BITMAP_INDEX(col, row, width); 305 | int inputIdx = ((height - row - 1) * width) + col; 306 | 307 | luma[outputIdx].red = data[inputIdx]; 308 | luma[outputIdx].green = data[inputIdx]; 309 | luma[outputIdx].blue = data[inputIdx]; 310 | } 311 | } 312 | 313 | data += width * height; 314 | 315 | outputFile = fileName; 316 | find = outputFile.find_last_of("."); 317 | 318 | outputFile.insert(find, "-"); 319 | outputFile.insert(find+1, "y"); 320 | 321 | if(!SaveBitmap(outputFile.c_str(), (BYTE *)luma, width, height)) 322 | { 323 | delete [] luma; 324 | delete [] chrom; 325 | return false; 326 | } 327 | 328 | for(int row = 0; row < hHeight; ++row) 329 | { 330 | for(int col = 0; col < hWidth; ++col) 331 | { 332 | int outputIdx = BITMAP_INDEX(col, row, hWidth); 333 | int inputIdx = ((hHeight - row - 1) * hWidth) + col; 334 | 335 | chrom[outputIdx].red = data[inputIdx]; 336 | chrom[outputIdx].green = 255 - data[inputIdx]; 337 | chrom[outputIdx].blue = 0; 338 | } 339 | } 340 | 341 | data += hWidth * hHeight; 342 | 343 | outputFile = fileName; 344 | find = outputFile.find_last_of("."); 345 | 346 | outputFile.insert(find, "-"); 347 | outputFile.insert(find+1, "u"); 348 | 349 | if(!SaveBitmap(outputFile.c_str(), (BYTE *)chrom, hWidth, hHeight)) 350 | { 351 | delete [] luma; 352 | delete [] chrom; 353 | return false; 354 | } 355 | 356 | for(int row = 0; row < hHeight; ++row) 357 | { 358 | for(int col = 0; col < hWidth; ++col) 359 | { 360 | int outputIdx = BITMAP_INDEX(col, row, hWidth); 361 | int inputIdx = ((hHeight - row - 1) * hWidth) + col; 362 | 363 | chrom[outputIdx].red = 0; 364 | chrom[outputIdx].green = 255 - data[inputIdx]; 365 | chrom[outputIdx].blue = data[inputIdx]; 366 | } 367 | } 368 | 369 | data += hWidth * hHeight; 370 | 371 | outputFile = fileName; 372 | find = outputFile.find_last_of("."); 373 | 374 | outputFile.insert(find, "-"); 375 | outputFile.insert(find+1, "v"); 376 | 377 | if(!SaveBitmap(outputFile.c_str(), (BYTE *)chrom, hWidth, hHeight)) 378 | { 379 | delete [] luma; 380 | delete [] chrom; 381 | return false; 382 | } 383 | 384 | delete [] luma; 385 | delete [] chrom; 386 | return true; 387 | } 388 | -------------------------------------------------------------------------------- /Util/helper_cuda_drvapi.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 1993-2014 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | // Helper functions for CUDA Driver API error handling (make sure that CUDA_H is included in your projects) 13 | #ifndef HELPER_CUDA_DRVAPI_H 14 | #define HELPER_CUDA_DRVAPI_H 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | 23 | #ifndef MAX 24 | #define MAX(a,b) (a > b ? a : b) 25 | #endif 26 | 27 | #ifndef EXIT_WAIVED 28 | #define EXIT_WAIVED 2 29 | #endif 30 | 31 | //////////////////////////////////////////////////////////////////////////////// 32 | // These are CUDA Helper functions 33 | 34 | // add a level of protection to the CUDA SDK samples, let's force samples to explicitly include CUDA.H 35 | #ifdef __cuda_cuda_h__ 36 | // This will output the proper CUDA error strings in the event that a CUDA host call returns an error 37 | #ifndef checkCudaErrors 38 | #define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) 39 | 40 | // These are the inline versions for all of the SDK helper functions 41 | inline void __checkCudaErrors(CUresult err, const char *file, const int line) 42 | { 43 | if (CUDA_SUCCESS != err) 44 | { 45 | fprintf(stderr, "checkCudaErrors() Driver API error = %04d \"%s\" from file <%s>, line %i.\n", 46 | err, getCudaDrvErrorString(err), file, line); 47 | exit(EXIT_FAILURE); 48 | } 49 | } 50 | #endif 51 | 52 | #ifdef getLastCudaDrvErrorMsg 53 | #undef getLastCudaDrvErrorMsg 54 | #endif 55 | 56 | #define getLastCudaDrvErrorMsg(msg) __getLastCudaDrvErrorMsg (msg, __FILE__, __LINE__) 57 | 58 | inline void __getLastCudaDrvErrorMsg(const char *msg, const char *file, const int line) 59 | { 60 | CUresult err = cuCtxSynchronize(); 61 | 62 | if (CUDA_SUCCESS != err) 63 | { 64 | fprintf(stderr, "getLastCudaDrvErrorMsg -> %s", msg); 65 | fprintf(stderr, "getLastCudaDrvErrorMsg -> cuCtxSynchronize API error = %04d \"%s\" in file <%s>, line %i.\n", 66 | err, getCudaDrvErrorString(err), file, line); 67 | exit(EXIT_FAILURE); 68 | } 69 | } 70 | 71 | // This function wraps the CUDA Driver API into a template function 72 | template 73 | inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device) 74 | { 75 | CUresult error_result = cuDeviceGetAttribute(attribute, device_attribute, device); 76 | 77 | if (error_result != CUDA_SUCCESS) 78 | { 79 | printf("cuDeviceGetAttribute returned %d\n-> %s\n", (int)error_result, getCudaDrvErrorString(error_result)); 80 | exit(EXIT_SUCCESS); 81 | } 82 | } 83 | #endif 84 | 85 | // Beginning of GPU Architecture definitions 86 | inline int _ConvertSMVer2CoresDRV(int major, int minor) 87 | { 88 | // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM 89 | typedef struct 90 | { 91 | int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version 92 | int Cores; 93 | } sSMtoCores; 94 | 95 | sSMtoCores nGpuArchCoresPerSM[] = 96 | { 97 | { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class 98 | { 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class 99 | { 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class 100 | { 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class 101 | { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class 102 | { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class 103 | { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class 104 | { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class 105 | { -1, -1 } 106 | }; 107 | 108 | int index = 0; 109 | 110 | while (nGpuArchCoresPerSM[index].SM != -1) 111 | { 112 | if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) 113 | { 114 | return nGpuArchCoresPerSM[index].Cores; 115 | } 116 | 117 | index++; 118 | } 119 | 120 | // If we don't find the values, we default use the previous one to run properly 121 | printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[7].Cores); 122 | return nGpuArchCoresPerSM[7].Cores; 123 | } 124 | // end of GPU Architecture definitions 125 | 126 | #ifdef __cuda_cuda_h__ 127 | // General GPU Device CUDA Initialization 128 | inline int gpuDeviceInitDRV(int ARGC, const char **ARGV) 129 | { 130 | int cuDevice = 0; 131 | int deviceCount = 0; 132 | CUresult err = cuInit(0); 133 | 134 | if (CUDA_SUCCESS == err) 135 | { 136 | checkCudaErrors(cuDeviceGetCount(&deviceCount)); 137 | } 138 | 139 | if (deviceCount == 0) 140 | { 141 | fprintf(stderr, "cudaDeviceInit error: no devices supporting CUDA\n"); 142 | exit(EXIT_FAILURE); 143 | } 144 | 145 | int dev = 0; 146 | dev = getCmdLineArgumentInt(ARGC, (const char **) ARGV, "device="); 147 | 148 | if (dev < 0) 149 | { 150 | dev = 0; 151 | } 152 | 153 | if (dev > deviceCount-1) 154 | { 155 | fprintf(stderr, "\n"); 156 | fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", deviceCount); 157 | fprintf(stderr, ">> cudaDeviceInit (-device=%d) is not a valid GPU device. <<\n", dev); 158 | fprintf(stderr, "\n"); 159 | return -dev; 160 | } 161 | 162 | checkCudaErrors(cuDeviceGet(&cuDevice, dev)); 163 | char name[100]; 164 | cuDeviceGetName(name, 100, cuDevice); 165 | 166 | int computeMode; 167 | getCudaAttribute(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, dev); 168 | 169 | if (computeMode == CU_COMPUTEMODE_PROHIBITED) 170 | { 171 | fprintf(stderr, "Error: device is running in , no threads can use this CUDA Device.\n"); 172 | return -1; 173 | } 174 | 175 | if (checkCmdLineFlag(ARGC, (const char **) ARGV, "quiet") == false) 176 | { 177 | printf("gpuDeviceInitDRV() Using CUDA Device [%d]: %s\n", dev, name); 178 | } 179 | 180 | return dev; 181 | } 182 | 183 | // This function returns the best GPU based on performance 184 | inline int gpuGetMaxGflopsDeviceIdDRV() 185 | { 186 | CUdevice current_device = 0, max_perf_device = 0; 187 | int device_count = 0, sm_per_multiproc = 0; 188 | int max_compute_perf = 0, best_SM_arch = 0; 189 | int major = 0, minor = 0 , multiProcessorCount, clockRate; 190 | 191 | cuInit(0); 192 | checkCudaErrors(cuDeviceGetCount(&device_count)); 193 | 194 | if (device_count == 0) 195 | { 196 | fprintf(stderr, "gpuGetMaxGflopsDeviceIdDRV error: no devices supporting CUDA\n"); 197 | exit(EXIT_FAILURE); 198 | } 199 | 200 | // Find the best major SM Architecture GPU device 201 | while (current_device < device_count) 202 | { 203 | checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device)); 204 | 205 | if (major > 0 && major < 9999) 206 | { 207 | best_SM_arch = MAX(best_SM_arch, major); 208 | } 209 | 210 | current_device++; 211 | } 212 | 213 | // Find the best CUDA capable GPU device 214 | current_device = 0; 215 | 216 | while (current_device < device_count) 217 | { 218 | checkCudaErrors(cuDeviceGetAttribute(&multiProcessorCount, 219 | CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, 220 | current_device)); 221 | checkCudaErrors(cuDeviceGetAttribute(&clockRate, 222 | CU_DEVICE_ATTRIBUTE_CLOCK_RATE, 223 | current_device)); 224 | checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device)); 225 | 226 | int computeMode; 227 | getCudaAttribute(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device); 228 | 229 | if (computeMode != CU_COMPUTEMODE_PROHIBITED) 230 | { 231 | if (major == 9999 && minor == 9999) 232 | { 233 | sm_per_multiproc = 1; 234 | } 235 | else 236 | { 237 | sm_per_multiproc = _ConvertSMVer2CoresDRV(major, minor); 238 | } 239 | 240 | int compute_perf = multiProcessorCount * sm_per_multiproc * clockRate; 241 | 242 | if (compute_perf > max_compute_perf) 243 | { 244 | // If we find GPU with SM major > 2, search only these 245 | if (best_SM_arch > 2) 246 | { 247 | // If our device==dest_SM_arch, choose this, or else pass 248 | if (major == best_SM_arch) 249 | { 250 | max_compute_perf = compute_perf; 251 | max_perf_device = current_device; 252 | } 253 | } 254 | else 255 | { 256 | max_compute_perf = compute_perf; 257 | max_perf_device = current_device; 258 | } 259 | } 260 | } 261 | 262 | ++current_device; 263 | } 264 | 265 | return max_perf_device; 266 | } 267 | 268 | // This function returns the best Graphics GPU based on performance 269 | inline int gpuGetMaxGflopsGLDeviceIdDRV() 270 | { 271 | CUdevice current_device = 0, max_perf_device = 0; 272 | int device_count = 0, sm_per_multiproc = 0; 273 | int max_compute_perf = 0, best_SM_arch = 0; 274 | int major = 0, minor = 0, multiProcessorCount, clockRate; 275 | int bTCC = 0; 276 | char deviceName[256]; 277 | 278 | cuInit(0); 279 | checkCudaErrors(cuDeviceGetCount(&device_count)); 280 | 281 | if (device_count == 0) 282 | { 283 | fprintf(stderr, "gpuGetMaxGflopsGLDeviceIdDRV error: no devices supporting CUDA\n"); 284 | exit(EXIT_FAILURE); 285 | } 286 | 287 | // Find the best major SM Architecture GPU device that are graphics devices 288 | while (current_device < device_count) 289 | { 290 | checkCudaErrors(cuDeviceGetName(deviceName, 256, current_device)); 291 | checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device)); 292 | 293 | #if CUDA_VERSION >= 3020 294 | checkCudaErrors(cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device)); 295 | #else 296 | 297 | // Assume a Tesla GPU is running in TCC if we are running CUDA 3.1 298 | if (deviceName[0] == 'T') 299 | { 300 | bTCC = 1; 301 | } 302 | 303 | #endif 304 | 305 | int computeMode; 306 | getCudaAttribute(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device); 307 | 308 | if (computeMode != CU_COMPUTEMODE_PROHIBITED) 309 | { 310 | if (!bTCC) 311 | { 312 | if (major > 0 && major < 9999) 313 | { 314 | best_SM_arch = MAX(best_SM_arch, major); 315 | } 316 | } 317 | } 318 | 319 | current_device++; 320 | } 321 | 322 | // Find the best CUDA capable GPU device 323 | current_device = 0; 324 | 325 | while (current_device < device_count) 326 | { 327 | checkCudaErrors(cuDeviceGetAttribute(&multiProcessorCount, 328 | CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, 329 | current_device)); 330 | checkCudaErrors(cuDeviceGetAttribute(&clockRate, 331 | CU_DEVICE_ATTRIBUTE_CLOCK_RATE, 332 | current_device)); 333 | checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device)); 334 | 335 | #if CUDA_VERSION >= 3020 336 | checkCudaErrors(cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device)); 337 | #else 338 | 339 | // Assume a Tesla GPU is running in TCC if we are running CUDA 3.1 340 | if (deviceName[0] == 'T') 341 | { 342 | bTCC = 1; 343 | } 344 | 345 | #endif 346 | 347 | int computeMode; 348 | getCudaAttribute(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device); 349 | 350 | if (computeMode != CU_COMPUTEMODE_PROHIBITED) 351 | { 352 | if (major == 9999 && minor == 9999) 353 | { 354 | sm_per_multiproc = 1; 355 | } 356 | else 357 | { 358 | sm_per_multiproc = _ConvertSMVer2CoresDRV(major, minor); 359 | } 360 | 361 | // If this is a Tesla based GPU and SM 2.0, and TCC is disabled, this is a contendor 362 | if (!bTCC) // Is this GPU running the TCC driver? If so we pass on this 363 | { 364 | int compute_perf = multiProcessorCount * sm_per_multiproc * clockRate; 365 | 366 | if (compute_perf > max_compute_perf) 367 | { 368 | // If we find GPU with SM major > 2, search only these 369 | if (best_SM_arch > 2) 370 | { 371 | // If our device = dest_SM_arch, then we pick this one 372 | if (major == best_SM_arch) 373 | { 374 | max_compute_perf = compute_perf; 375 | max_perf_device = current_device; 376 | } 377 | } 378 | else 379 | { 380 | max_compute_perf = compute_perf; 381 | max_perf_device = current_device; 382 | } 383 | } 384 | } 385 | } 386 | 387 | ++current_device; 388 | } 389 | 390 | return max_perf_device; 391 | } 392 | 393 | // General initialization call to pick the best CUDA Device 394 | inline CUdevice findCudaDeviceDRV(int argc, const char **argv) 395 | { 396 | CUdevice cuDevice; 397 | int devID = 0; 398 | 399 | // If the command-line has a device number specified, use it 400 | if (checkCmdLineFlag(argc, (const char **)argv, "device")) 401 | { 402 | devID = gpuDeviceInitDRV(argc, argv); 403 | 404 | if (devID < 0) 405 | { 406 | printf("exiting...\n"); 407 | exit(EXIT_SUCCESS); 408 | } 409 | } 410 | else 411 | { 412 | // Otherwise pick the device with highest Gflops/s 413 | char name[100]; 414 | devID = gpuGetMaxGflopsDeviceIdDRV(); 415 | checkCudaErrors(cuDeviceGet(&cuDevice, devID)); 416 | cuDeviceGetName(name, 100, cuDevice); 417 | printf("> Using CUDA Device [%d]: %s\n", devID, name); 418 | } 419 | 420 | cuDeviceGet(&cuDevice, devID); 421 | 422 | return cuDevice; 423 | } 424 | 425 | // This function will pick the best CUDA device available with OpenGL interop 426 | inline CUdevice findCudaGLDeviceDRV(int argc, const char **argv) 427 | { 428 | CUdevice cuDevice; 429 | int devID = 0; 430 | 431 | // If the command-line has a device number specified, use it 432 | if (checkCmdLineFlag(argc, (const char **)argv, "device")) 433 | { 434 | devID = gpuDeviceInitDRV(argc, (const char **)argv); 435 | 436 | if (devID < 0) 437 | { 438 | printf("no CUDA capable devices found, exiting...\n"); 439 | exit(EXIT_SUCCESS); 440 | } 441 | } 442 | else 443 | { 444 | char name[100]; 445 | // Otherwise pick the device with highest Gflops/s 446 | devID = gpuGetMaxGflopsGLDeviceIdDRV(); 447 | checkCudaErrors(cuDeviceGet(&cuDevice, devID)); 448 | cuDeviceGetName(name, 100, cuDevice); 449 | printf("> Using CUDA/GL Device [%d]: %s\n", devID, name); 450 | } 451 | 452 | return devID; 453 | } 454 | 455 | // General check for CUDA GPU SM Capabilities 456 | inline bool checkCudaCapabilitiesDRV(int major_version, int minor_version, int devID) 457 | { 458 | CUdevice cuDevice; 459 | char name[256]; 460 | int major = 0, minor = 0; 461 | 462 | checkCudaErrors(cuDeviceGet(&cuDevice, devID)); 463 | checkCudaErrors(cuDeviceGetName(name, 100, cuDevice)); 464 | checkCudaErrors(cuDeviceComputeCapability(&major, &minor, devID)); 465 | 466 | if ((major > major_version) || 467 | (major == major_version && minor >= minor_version)) 468 | { 469 | printf("> Device %d: <%16s >, Compute SM %d.%d detected\n", devID, name, major, minor); 470 | return true; 471 | } 472 | else 473 | { 474 | printf("No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version); 475 | return false; 476 | } 477 | } 478 | #endif 479 | 480 | // end of CUDA Helper Functions 481 | 482 | #endif 483 | -------------------------------------------------------------------------------- /Util/helper_cuda.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 1993-2014 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | //////////////////////////////////////////////////////////////////////////////// 13 | // These are CUDA Helper functions for initialization and error checking 14 | 15 | #ifndef HELPER_CUDA_H 16 | #define HELPER_CUDA_H 17 | 18 | #pragma once 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | //#include 25 | 26 | #ifndef EXIT_WAIVED 27 | #define EXIT_WAIVED 2 28 | #endif 29 | 30 | // Note, it is required that your SDK sample to include the proper header files, please 31 | // refer the CUDA examples for examples of the needed CUDA headers, which may change depending 32 | // on which CUDA functions are used. 33 | 34 | // CUDA Runtime error messages 35 | #ifdef __DRIVER_TYPES_H__ 36 | static const char *_cudaGetErrorEnum(cudaError_t error) 37 | { 38 | switch (error) 39 | { 40 | case cudaSuccess: 41 | return "cudaSuccess"; 42 | 43 | case cudaErrorMissingConfiguration: 44 | return "cudaErrorMissingConfiguration"; 45 | 46 | case cudaErrorMemoryAllocation: 47 | return "cudaErrorMemoryAllocation"; 48 | 49 | case cudaErrorInitializationError: 50 | return "cudaErrorInitializationError"; 51 | 52 | case cudaErrorLaunchFailure: 53 | return "cudaErrorLaunchFailure"; 54 | 55 | case cudaErrorPriorLaunchFailure: 56 | return "cudaErrorPriorLaunchFailure"; 57 | 58 | case cudaErrorLaunchTimeout: 59 | return "cudaErrorLaunchTimeout"; 60 | 61 | case cudaErrorLaunchOutOfResources: 62 | return "cudaErrorLaunchOutOfResources"; 63 | 64 | case cudaErrorInvalidDeviceFunction: 65 | return "cudaErrorInvalidDeviceFunction"; 66 | 67 | case cudaErrorInvalidConfiguration: 68 | return "cudaErrorInvalidConfiguration"; 69 | 70 | case cudaErrorInvalidDevice: 71 | return "cudaErrorInvalidDevice"; 72 | 73 | case cudaErrorInvalidValue: 74 | return "cudaErrorInvalidValue"; 75 | 76 | case cudaErrorInvalidPitchValue: 77 | return "cudaErrorInvalidPitchValue"; 78 | 79 | case cudaErrorInvalidSymbol: 80 | return "cudaErrorInvalidSymbol"; 81 | 82 | case cudaErrorMapBufferObjectFailed: 83 | return "cudaErrorMapBufferObjectFailed"; 84 | 85 | case cudaErrorUnmapBufferObjectFailed: 86 | return "cudaErrorUnmapBufferObjectFailed"; 87 | 88 | case cudaErrorInvalidHostPointer: 89 | return "cudaErrorInvalidHostPointer"; 90 | 91 | case cudaErrorInvalidDevicePointer: 92 | return "cudaErrorInvalidDevicePointer"; 93 | 94 | case cudaErrorInvalidTexture: 95 | return "cudaErrorInvalidTexture"; 96 | 97 | case cudaErrorInvalidTextureBinding: 98 | return "cudaErrorInvalidTextureBinding"; 99 | 100 | case cudaErrorInvalidChannelDescriptor: 101 | return "cudaErrorInvalidChannelDescriptor"; 102 | 103 | case cudaErrorInvalidMemcpyDirection: 104 | return "cudaErrorInvalidMemcpyDirection"; 105 | 106 | case cudaErrorAddressOfConstant: 107 | return "cudaErrorAddressOfConstant"; 108 | 109 | case cudaErrorTextureFetchFailed: 110 | return "cudaErrorTextureFetchFailed"; 111 | 112 | case cudaErrorTextureNotBound: 113 | return "cudaErrorTextureNotBound"; 114 | 115 | case cudaErrorSynchronizationError: 116 | return "cudaErrorSynchronizationError"; 117 | 118 | case cudaErrorInvalidFilterSetting: 119 | return "cudaErrorInvalidFilterSetting"; 120 | 121 | case cudaErrorInvalidNormSetting: 122 | return "cudaErrorInvalidNormSetting"; 123 | 124 | case cudaErrorMixedDeviceExecution: 125 | return "cudaErrorMixedDeviceExecution"; 126 | 127 | case cudaErrorCudartUnloading: 128 | return "cudaErrorCudartUnloading"; 129 | 130 | case cudaErrorUnknown: 131 | return "cudaErrorUnknown"; 132 | 133 | case cudaErrorNotYetImplemented: 134 | return "cudaErrorNotYetImplemented"; 135 | 136 | case cudaErrorMemoryValueTooLarge: 137 | return "cudaErrorMemoryValueTooLarge"; 138 | 139 | case cudaErrorInvalidResourceHandle: 140 | return "cudaErrorInvalidResourceHandle"; 141 | 142 | case cudaErrorNotReady: 143 | return "cudaErrorNotReady"; 144 | 145 | case cudaErrorInsufficientDriver: 146 | return "cudaErrorInsufficientDriver"; 147 | 148 | case cudaErrorSetOnActiveProcess: 149 | return "cudaErrorSetOnActiveProcess"; 150 | 151 | case cudaErrorInvalidSurface: 152 | return "cudaErrorInvalidSurface"; 153 | 154 | case cudaErrorNoDevice: 155 | return "cudaErrorNoDevice"; 156 | 157 | case cudaErrorECCUncorrectable: 158 | return "cudaErrorECCUncorrectable"; 159 | 160 | case cudaErrorSharedObjectSymbolNotFound: 161 | return "cudaErrorSharedObjectSymbolNotFound"; 162 | 163 | case cudaErrorSharedObjectInitFailed: 164 | return "cudaErrorSharedObjectInitFailed"; 165 | 166 | case cudaErrorUnsupportedLimit: 167 | return "cudaErrorUnsupportedLimit"; 168 | 169 | case cudaErrorDuplicateVariableName: 170 | return "cudaErrorDuplicateVariableName"; 171 | 172 | case cudaErrorDuplicateTextureName: 173 | return "cudaErrorDuplicateTextureName"; 174 | 175 | case cudaErrorDuplicateSurfaceName: 176 | return "cudaErrorDuplicateSurfaceName"; 177 | 178 | case cudaErrorDevicesUnavailable: 179 | return "cudaErrorDevicesUnavailable"; 180 | 181 | case cudaErrorInvalidKernelImage: 182 | return "cudaErrorInvalidKernelImage"; 183 | 184 | case cudaErrorNoKernelImageForDevice: 185 | return "cudaErrorNoKernelImageForDevice"; 186 | 187 | case cudaErrorIncompatibleDriverContext: 188 | return "cudaErrorIncompatibleDriverContext"; 189 | 190 | case cudaErrorPeerAccessAlreadyEnabled: 191 | return "cudaErrorPeerAccessAlreadyEnabled"; 192 | 193 | case cudaErrorPeerAccessNotEnabled: 194 | return "cudaErrorPeerAccessNotEnabled"; 195 | 196 | case cudaErrorDeviceAlreadyInUse: 197 | return "cudaErrorDeviceAlreadyInUse"; 198 | 199 | case cudaErrorProfilerDisabled: 200 | return "cudaErrorProfilerDisabled"; 201 | 202 | case cudaErrorProfilerNotInitialized: 203 | return "cudaErrorProfilerNotInitialized"; 204 | 205 | case cudaErrorProfilerAlreadyStarted: 206 | return "cudaErrorProfilerAlreadyStarted"; 207 | 208 | case cudaErrorProfilerAlreadyStopped: 209 | return "cudaErrorProfilerAlreadyStopped"; 210 | 211 | #if __CUDA_API_VERSION >= 0x4000 212 | case cudaErrorAssert: 213 | return "cudaErrorAssert"; 214 | 215 | case cudaErrorTooManyPeers: 216 | return "cudaErrorTooManyPeers"; 217 | 218 | case cudaErrorHostMemoryAlreadyRegistered: 219 | return "cudaErrorHostMemoryAlreadyRegistered"; 220 | 221 | case cudaErrorHostMemoryNotRegistered: 222 | return "cudaErrorHostMemoryNotRegistered"; 223 | #endif 224 | 225 | case cudaErrorStartupFailure: 226 | return "cudaErrorStartupFailure"; 227 | 228 | case cudaErrorApiFailureBase: 229 | return "cudaErrorApiFailureBase"; 230 | } 231 | 232 | return ""; 233 | } 234 | #endif 235 | 236 | #ifdef __cuda_cuda_h__ 237 | // CUDA Driver API errors 238 | static const char *_cudaGetErrorEnum(CUresult error) 239 | { 240 | switch (error) 241 | { 242 | case CUDA_SUCCESS: 243 | return "CUDA_SUCCESS"; 244 | 245 | case CUDA_ERROR_INVALID_VALUE: 246 | return "CUDA_ERROR_INVALID_VALUE"; 247 | 248 | case CUDA_ERROR_OUT_OF_MEMORY: 249 | return "CUDA_ERROR_OUT_OF_MEMORY"; 250 | 251 | case CUDA_ERROR_NOT_INITIALIZED: 252 | return "CUDA_ERROR_NOT_INITIALIZED"; 253 | 254 | case CUDA_ERROR_DEINITIALIZED: 255 | return "CUDA_ERROR_DEINITIALIZED"; 256 | 257 | case CUDA_ERROR_PROFILER_DISABLED: 258 | return "CUDA_ERROR_PROFILER_DISABLED"; 259 | 260 | case CUDA_ERROR_PROFILER_NOT_INITIALIZED: 261 | return "CUDA_ERROR_PROFILER_NOT_INITIALIZED"; 262 | 263 | case CUDA_ERROR_PROFILER_ALREADY_STARTED: 264 | return "CUDA_ERROR_PROFILER_ALREADY_STARTED"; 265 | 266 | case CUDA_ERROR_PROFILER_ALREADY_STOPPED: 267 | return "CUDA_ERROR_PROFILER_ALREADY_STOPPED"; 268 | 269 | case CUDA_ERROR_NO_DEVICE: 270 | return "CUDA_ERROR_NO_DEVICE"; 271 | 272 | case CUDA_ERROR_INVALID_DEVICE: 273 | return "CUDA_ERROR_INVALID_DEVICE"; 274 | 275 | case CUDA_ERROR_INVALID_IMAGE: 276 | return "CUDA_ERROR_INVALID_IMAGE"; 277 | 278 | case CUDA_ERROR_INVALID_CONTEXT: 279 | return "CUDA_ERROR_INVALID_CONTEXT"; 280 | 281 | case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: 282 | return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT"; 283 | 284 | case CUDA_ERROR_MAP_FAILED: 285 | return "CUDA_ERROR_MAP_FAILED"; 286 | 287 | case CUDA_ERROR_UNMAP_FAILED: 288 | return "CUDA_ERROR_UNMAP_FAILED"; 289 | 290 | case CUDA_ERROR_ARRAY_IS_MAPPED: 291 | return "CUDA_ERROR_ARRAY_IS_MAPPED"; 292 | 293 | case CUDA_ERROR_ALREADY_MAPPED: 294 | return "CUDA_ERROR_ALREADY_MAPPED"; 295 | 296 | case CUDA_ERROR_NO_BINARY_FOR_GPU: 297 | return "CUDA_ERROR_NO_BINARY_FOR_GPU"; 298 | 299 | case CUDA_ERROR_ALREADY_ACQUIRED: 300 | return "CUDA_ERROR_ALREADY_ACQUIRED"; 301 | 302 | case CUDA_ERROR_NOT_MAPPED: 303 | return "CUDA_ERROR_NOT_MAPPED"; 304 | 305 | case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: 306 | return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY"; 307 | 308 | case CUDA_ERROR_NOT_MAPPED_AS_POINTER: 309 | return "CUDA_ERROR_NOT_MAPPED_AS_POINTER"; 310 | 311 | case CUDA_ERROR_ECC_UNCORRECTABLE: 312 | return "CUDA_ERROR_ECC_UNCORRECTABLE"; 313 | 314 | case CUDA_ERROR_UNSUPPORTED_LIMIT: 315 | return "CUDA_ERROR_UNSUPPORTED_LIMIT"; 316 | 317 | case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: 318 | return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE"; 319 | 320 | case CUDA_ERROR_INVALID_SOURCE: 321 | return "CUDA_ERROR_INVALID_SOURCE"; 322 | 323 | case CUDA_ERROR_FILE_NOT_FOUND: 324 | return "CUDA_ERROR_FILE_NOT_FOUND"; 325 | 326 | case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: 327 | return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND"; 328 | 329 | case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: 330 | return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED"; 331 | 332 | case CUDA_ERROR_OPERATING_SYSTEM: 333 | return "CUDA_ERROR_OPERATING_SYSTEM"; 334 | 335 | case CUDA_ERROR_INVALID_HANDLE: 336 | return "CUDA_ERROR_INVALID_HANDLE"; 337 | 338 | case CUDA_ERROR_NOT_FOUND: 339 | return "CUDA_ERROR_NOT_FOUND"; 340 | 341 | case CUDA_ERROR_NOT_READY: 342 | return "CUDA_ERROR_NOT_READY"; 343 | 344 | case CUDA_ERROR_LAUNCH_FAILED: 345 | return "CUDA_ERROR_LAUNCH_FAILED"; 346 | 347 | case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: 348 | return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES"; 349 | 350 | case CUDA_ERROR_LAUNCH_TIMEOUT: 351 | return "CUDA_ERROR_LAUNCH_TIMEOUT"; 352 | 353 | case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: 354 | return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING"; 355 | 356 | case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: 357 | return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED"; 358 | 359 | case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: 360 | return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED"; 361 | 362 | case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: 363 | return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE"; 364 | 365 | case CUDA_ERROR_CONTEXT_IS_DESTROYED: 366 | return "CUDA_ERROR_CONTEXT_IS_DESTROYED"; 367 | 368 | #if __CUDA_API_VERSION >= 0x4000 369 | case CUDA_ERROR_ASSERT: 370 | return "CUDA_ERROR_ASSERT"; 371 | 372 | case CUDA_ERROR_TOO_MANY_PEERS: 373 | return "CUDA_ERROR_TOO_MANY_PEERS"; 374 | 375 | case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: 376 | return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED"; 377 | 378 | case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: 379 | return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED"; 380 | #endif 381 | 382 | case CUDA_ERROR_UNKNOWN: 383 | return "CUDA_ERROR_UNKNOWN"; 384 | } 385 | 386 | return ""; 387 | } 388 | #endif 389 | 390 | #ifdef CUBLAS_API_H_ 391 | // cuBLAS API errors 392 | static const char *_cudaGetErrorEnum(cublasStatus_t error) 393 | { 394 | switch (error) 395 | { 396 | case CUBLAS_STATUS_SUCCESS: 397 | return "CUBLAS_STATUS_SUCCESS"; 398 | 399 | case CUBLAS_STATUS_NOT_INITIALIZED: 400 | return "CUBLAS_STATUS_NOT_INITIALIZED"; 401 | 402 | case CUBLAS_STATUS_ALLOC_FAILED: 403 | return "CUBLAS_STATUS_ALLOC_FAILED"; 404 | 405 | case CUBLAS_STATUS_INVALID_VALUE: 406 | return "CUBLAS_STATUS_INVALID_VALUE"; 407 | 408 | case CUBLAS_STATUS_ARCH_MISMATCH: 409 | return "CUBLAS_STATUS_ARCH_MISMATCH"; 410 | 411 | case CUBLAS_STATUS_MAPPING_ERROR: 412 | return "CUBLAS_STATUS_MAPPING_ERROR"; 413 | 414 | case CUBLAS_STATUS_EXECUTION_FAILED: 415 | return "CUBLAS_STATUS_EXECUTION_FAILED"; 416 | 417 | case CUBLAS_STATUS_INTERNAL_ERROR: 418 | return "CUBLAS_STATUS_INTERNAL_ERROR"; 419 | } 420 | 421 | return ""; 422 | } 423 | #endif 424 | 425 | #ifdef _CUFFT_H_ 426 | // cuFFT API errors 427 | static const char *_cudaGetErrorEnum(cufftResult error) 428 | { 429 | switch (error) 430 | { 431 | case CUFFT_SUCCESS: 432 | return "CUFFT_SUCCESS"; 433 | 434 | case CUFFT_INVALID_PLAN: 435 | return "CUFFT_INVALID_PLAN"; 436 | 437 | case CUFFT_ALLOC_FAILED: 438 | return "CUFFT_ALLOC_FAILED"; 439 | 440 | case CUFFT_INVALID_TYPE: 441 | return "CUFFT_INVALID_TYPE"; 442 | 443 | case CUFFT_INVALID_VALUE: 444 | return "CUFFT_INVALID_VALUE"; 445 | 446 | case CUFFT_INTERNAL_ERROR: 447 | return "CUFFT_INTERNAL_ERROR"; 448 | 449 | case CUFFT_EXEC_FAILED: 450 | return "CUFFT_EXEC_FAILED"; 451 | 452 | case CUFFT_SETUP_FAILED: 453 | return "CUFFT_SETUP_FAILED"; 454 | 455 | case CUFFT_INVALID_SIZE: 456 | return "CUFFT_INVALID_SIZE"; 457 | 458 | case CUFFT_UNALIGNED_DATA: 459 | return "CUFFT_UNALIGNED_DATA"; 460 | } 461 | 462 | return ""; 463 | } 464 | #endif 465 | 466 | 467 | #ifdef CUSPARSEAPI 468 | // cuSPARSE API errors 469 | static const char *_cudaGetErrorEnum(cusparseStatus_t error) 470 | { 471 | switch (error) 472 | { 473 | case CUSPARSE_STATUS_SUCCESS: 474 | return "CUSPARSE_STATUS_SUCCESS"; 475 | 476 | case CUSPARSE_STATUS_NOT_INITIALIZED: 477 | return "CUSPARSE_STATUS_NOT_INITIALIZED"; 478 | 479 | case CUSPARSE_STATUS_ALLOC_FAILED: 480 | return "CUSPARSE_STATUS_ALLOC_FAILED"; 481 | 482 | case CUSPARSE_STATUS_INVALID_VALUE: 483 | return "CUSPARSE_STATUS_INVALID_VALUE"; 484 | 485 | case CUSPARSE_STATUS_ARCH_MISMATCH: 486 | return "CUSPARSE_STATUS_ARCH_MISMATCH"; 487 | 488 | case CUSPARSE_STATUS_MAPPING_ERROR: 489 | return "CUSPARSE_STATUS_MAPPING_ERROR"; 490 | 491 | case CUSPARSE_STATUS_EXECUTION_FAILED: 492 | return "CUSPARSE_STATUS_EXECUTION_FAILED"; 493 | 494 | case CUSPARSE_STATUS_INTERNAL_ERROR: 495 | return "CUSPARSE_STATUS_INTERNAL_ERROR"; 496 | 497 | case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: 498 | return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; 499 | } 500 | 501 | return ""; 502 | } 503 | #endif 504 | 505 | #ifdef CURAND_H_ 506 | // cuRAND API errors 507 | static const char *_cudaGetErrorEnum(curandStatus_t error) 508 | { 509 | switch (error) 510 | { 511 | case CURAND_STATUS_SUCCESS: 512 | return "CURAND_STATUS_SUCCESS"; 513 | 514 | case CURAND_STATUS_VERSION_MISMATCH: 515 | return "CURAND_STATUS_VERSION_MISMATCH"; 516 | 517 | case CURAND_STATUS_NOT_INITIALIZED: 518 | return "CURAND_STATUS_NOT_INITIALIZED"; 519 | 520 | case CURAND_STATUS_ALLOCATION_FAILED: 521 | return "CURAND_STATUS_ALLOCATION_FAILED"; 522 | 523 | case CURAND_STATUS_TYPE_ERROR: 524 | return "CURAND_STATUS_TYPE_ERROR"; 525 | 526 | case CURAND_STATUS_OUT_OF_RANGE: 527 | return "CURAND_STATUS_OUT_OF_RANGE"; 528 | 529 | case CURAND_STATUS_LENGTH_NOT_MULTIPLE: 530 | return "CURAND_STATUS_LENGTH_NOT_MULTIPLE"; 531 | 532 | case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: 533 | return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED"; 534 | 535 | case CURAND_STATUS_LAUNCH_FAILURE: 536 | return "CURAND_STATUS_LAUNCH_FAILURE"; 537 | 538 | case CURAND_STATUS_PREEXISTING_FAILURE: 539 | return "CURAND_STATUS_PREEXISTING_FAILURE"; 540 | 541 | case CURAND_STATUS_INITIALIZATION_FAILED: 542 | return "CURAND_STATUS_INITIALIZATION_FAILED"; 543 | 544 | case CURAND_STATUS_ARCH_MISMATCH: 545 | return "CURAND_STATUS_ARCH_MISMATCH"; 546 | 547 | case CURAND_STATUS_INTERNAL_ERROR: 548 | return "CURAND_STATUS_INTERNAL_ERROR"; 549 | } 550 | 551 | return ""; 552 | } 553 | #endif 554 | 555 | #ifdef NV_NPPIDEFS_H 556 | // NPP API errors 557 | static const char *_cudaGetErrorEnum(NppStatus error) 558 | { 559 | switch (error) 560 | { 561 | case NPP_NOT_SUPPORTED_MODE_ERROR: 562 | return "NPP_NOT_SUPPORTED_MODE_ERROR"; 563 | 564 | case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR: 565 | return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR"; 566 | 567 | case NPP_RESIZE_NO_OPERATION_ERROR: 568 | return "NPP_RESIZE_NO_OPERATION_ERROR"; 569 | 570 | case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY: 571 | return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY"; 572 | 573 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 574 | case NPP_BAD_ARG_ERROR: 575 | return "NPP_BAD_ARGUMENT_ERROR"; 576 | 577 | case NPP_COEFF_ERROR: 578 | return "NPP_COEFFICIENT_ERROR"; 579 | 580 | case NPP_RECT_ERROR: 581 | return "NPP_RECTANGLE_ERROR"; 582 | 583 | case NPP_QUAD_ERROR: 584 | return "NPP_QUADRANGLE_ERROR"; 585 | 586 | case NPP_MEM_ALLOC_ERR: 587 | return "NPP_MEMORY_ALLOCATION_ERROR"; 588 | 589 | case NPP_HISTO_NUMBER_OF_LEVELS_ERROR: 590 | return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; 591 | 592 | case NPP_INVALID_INPUT: 593 | return "NPP_INVALID_INPUT"; 594 | 595 | case NPP_POINTER_ERROR: 596 | return "NPP_POINTER_ERROR"; 597 | 598 | case NPP_WARNING: 599 | return "NPP_WARNING"; 600 | 601 | case NPP_ODD_ROI_WARNING: 602 | return "NPP_ODD_ROI_WARNING"; 603 | #else 604 | 605 | // These are for CUDA 5.5 or higher 606 | case NPP_BAD_ARGUMENT_ERROR: 607 | return "NPP_BAD_ARGUMENT_ERROR"; 608 | 609 | case NPP_COEFFICIENT_ERROR: 610 | return "NPP_COEFFICIENT_ERROR"; 611 | 612 | case NPP_RECTANGLE_ERROR: 613 | return "NPP_RECTANGLE_ERROR"; 614 | 615 | case NPP_QUADRANGLE_ERROR: 616 | return "NPP_QUADRANGLE_ERROR"; 617 | 618 | case NPP_MEMORY_ALLOCATION_ERR: 619 | return "NPP_MEMORY_ALLOCATION_ERROR"; 620 | 621 | case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR: 622 | return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; 623 | 624 | case NPP_INVALID_HOST_POINTER_ERROR: 625 | return "NPP_INVALID_HOST_POINTER_ERROR"; 626 | 627 | case NPP_INVALID_DEVICE_POINTER_ERROR: 628 | return "NPP_INVALID_DEVICE_POINTER_ERROR"; 629 | #endif 630 | 631 | case NPP_LUT_NUMBER_OF_LEVELS_ERROR: 632 | return "NPP_LUT_NUMBER_OF_LEVELS_ERROR"; 633 | 634 | case NPP_TEXTURE_BIND_ERROR: 635 | return "NPP_TEXTURE_BIND_ERROR"; 636 | 637 | case NPP_WRONG_INTERSECTION_ROI_ERROR: 638 | return "NPP_WRONG_INTERSECTION_ROI_ERROR"; 639 | 640 | case NPP_NOT_EVEN_STEP_ERROR: 641 | return "NPP_NOT_EVEN_STEP_ERROR"; 642 | 643 | case NPP_INTERPOLATION_ERROR: 644 | return "NPP_INTERPOLATION_ERROR"; 645 | 646 | case NPP_RESIZE_FACTOR_ERROR: 647 | return "NPP_RESIZE_FACTOR_ERROR"; 648 | 649 | case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR: 650 | return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR"; 651 | 652 | 653 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 654 | case NPP_MEMFREE_ERR: 655 | return "NPP_MEMFREE_ERR"; 656 | 657 | case NPP_MEMSET_ERR: 658 | return "NPP_MEMSET_ERR"; 659 | 660 | case NPP_MEMCPY_ERR: 661 | return "NPP_MEMCPY_ERROR"; 662 | 663 | case NPP_MIRROR_FLIP_ERR: 664 | return "NPP_MIRROR_FLIP_ERR"; 665 | #else 666 | case NPP_MEMFREE_ERROR: 667 | return "NPP_MEMFREE_ERROR"; 668 | 669 | case NPP_MEMSET_ERROR: 670 | return "NPP_MEMSET_ERROR"; 671 | 672 | case NPP_MEMCPY_ERROR: 673 | return "NPP_MEMCPY_ERROR"; 674 | 675 | case NPP_MIRROR_FLIP_ERROR: 676 | return "NPP_MIRROR_FLIP_ERROR"; 677 | #endif 678 | 679 | case NPP_ALIGNMENT_ERROR: 680 | return "NPP_ALIGNMENT_ERROR"; 681 | 682 | case NPP_STEP_ERROR: 683 | return "NPP_STEP_ERROR"; 684 | 685 | case NPP_SIZE_ERROR: 686 | return "NPP_SIZE_ERROR"; 687 | 688 | case NPP_NULL_POINTER_ERROR: 689 | return "NPP_NULL_POINTER_ERROR"; 690 | 691 | case NPP_CUDA_KERNEL_EXECUTION_ERROR: 692 | return "NPP_CUDA_KERNEL_EXECUTION_ERROR"; 693 | 694 | case NPP_NOT_IMPLEMENTED_ERROR: 695 | return "NPP_NOT_IMPLEMENTED_ERROR"; 696 | 697 | case NPP_ERROR: 698 | return "NPP_ERROR"; 699 | 700 | case NPP_SUCCESS: 701 | return "NPP_SUCCESS"; 702 | 703 | case NPP_WRONG_INTERSECTION_QUAD_WARNING: 704 | return "NPP_WRONG_INTERSECTION_QUAD_WARNING"; 705 | 706 | case NPP_MISALIGNED_DST_ROI_WARNING: 707 | return "NPP_MISALIGNED_DST_ROI_WARNING"; 708 | 709 | case NPP_AFFINE_QUAD_INCORRECT_WARNING: 710 | return "NPP_AFFINE_QUAD_INCORRECT_WARNING"; 711 | 712 | case NPP_DOUBLE_SIZE_WARNING: 713 | return "NPP_DOUBLE_SIZE_WARNING"; 714 | 715 | case NPP_WRONG_INTERSECTION_ROI_WARNING: 716 | return "NPP_WRONG_INTERSECTION_ROI_WARNING"; 717 | } 718 | 719 | return ""; 720 | } 721 | #endif 722 | 723 | #ifdef __DRIVER_TYPES_H__ 724 | #ifndef DEVICE_RESET 725 | #define DEVICE_RESET cudaDeviceReset(); 726 | #endif 727 | #else 728 | #ifndef DEVICE_RESET 729 | #define DEVICE_RESET 730 | #endif 731 | #endif 732 | 733 | template< typename T > 734 | void check(T result, char const *const func, const char *const file, int const line) 735 | { 736 | if (result) 737 | { 738 | fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", 739 | file, line, static_cast(result), _cudaGetErrorEnum(result), func); 740 | DEVICE_RESET 741 | // Make sure we call CUDA Device Reset before exiting 742 | exit(EXIT_FAILURE); 743 | } 744 | } 745 | 746 | #ifdef __DRIVER_TYPES_H__ 747 | // This will output the proper CUDA error strings in the event that a CUDA host call returns an error 748 | #define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ ) 749 | 750 | // This will output the proper error string when calling cudaGetLastError 751 | #define getLastCudaError(msg) __getLastCudaError (msg, __FILE__, __LINE__) 752 | 753 | inline void __getLastCudaError(const char *errorMessage, const char *file, const int line) 754 | { 755 | cudaError_t err = cudaGetLastError(); 756 | 757 | if (cudaSuccess != err) 758 | { 759 | fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n", 760 | file, line, errorMessage, (int)err, cudaGetErrorString(err)); 761 | DEVICE_RESET 762 | exit(EXIT_FAILURE); 763 | } 764 | } 765 | #endif 766 | 767 | #ifndef MAX 768 | #define MAX(a,b) (a > b ? a : b) 769 | #endif 770 | 771 | // Beginning of GPU Architecture definitions 772 | inline int _ConvertSMVer2Cores(int major, int minor) 773 | { 774 | // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM 775 | typedef struct 776 | { 777 | int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version 778 | int Cores; 779 | } sSMtoCores; 780 | 781 | sSMtoCores nGpuArchCoresPerSM[] = 782 | { 783 | { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class 784 | { 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class 785 | { 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class 786 | { 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class 787 | { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class 788 | { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class 789 | { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class 790 | { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class 791 | { -1, -1 } 792 | }; 793 | 794 | int index = 0; 795 | 796 | while (nGpuArchCoresPerSM[index].SM != -1) 797 | { 798 | if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) 799 | { 800 | return nGpuArchCoresPerSM[index].Cores; 801 | } 802 | 803 | index++; 804 | } 805 | 806 | // If we don't find the values, we default use the previous one to run properly 807 | printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[7].Cores); 808 | return nGpuArchCoresPerSM[7].Cores; 809 | } 810 | // end of GPU Architecture definitions 811 | 812 | #ifdef __CUDA_RUNTIME_H__ 813 | // General GPU Device CUDA Initialization 814 | inline int gpuDeviceInit(int devID) 815 | { 816 | int device_count; 817 | checkCudaErrors(cudaGetDeviceCount(&device_count)); 818 | 819 | if (device_count == 0) 820 | { 821 | fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n"); 822 | exit(EXIT_FAILURE); 823 | } 824 | 825 | if (devID < 0) 826 | { 827 | devID = 0; 828 | } 829 | 830 | if (devID > device_count-1) 831 | { 832 | fprintf(stderr, "\n"); 833 | fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", device_count); 834 | fprintf(stderr, ">> gpuDeviceInit (-device=%d) is not a valid GPU device. <<\n", devID); 835 | fprintf(stderr, "\n"); 836 | return -devID; 837 | } 838 | 839 | cudaDeviceProp deviceProp; 840 | checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID)); 841 | 842 | if (deviceProp.computeMode == cudaComputeModeProhibited) 843 | { 844 | fprintf(stderr, "Error: device is running in , no threads can use ::cudaSetDevice().\n"); 845 | return -1; 846 | } 847 | 848 | if (deviceProp.major < 1) 849 | { 850 | fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n"); 851 | exit(EXIT_FAILURE); 852 | } 853 | 854 | checkCudaErrors(cudaSetDevice(devID)); 855 | printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name); 856 | 857 | return devID; 858 | } 859 | 860 | // This function returns the best GPU (with maximum GFLOPS) 861 | inline int gpuGetMaxGflopsDeviceId() 862 | { 863 | int current_device = 0, sm_per_multiproc = 0; 864 | int max_compute_perf = 0, max_perf_device = 0; 865 | int device_count = 0, best_SM_arch = 0; 866 | cudaDeviceProp deviceProp; 867 | cudaGetDeviceCount(&device_count); 868 | 869 | checkCudaErrors(cudaGetDeviceCount(&device_count)); 870 | 871 | if (device_count == 0) 872 | { 873 | fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: no devices supporting CUDA.\n"); 874 | exit(EXIT_FAILURE); 875 | } 876 | 877 | // Find the best major SM Architecture GPU device 878 | while (current_device < device_count) 879 | { 880 | cudaGetDeviceProperties(&deviceProp, current_device); 881 | 882 | // If this GPU is not running on Compute Mode prohibited, then we can add it to the list 883 | if (deviceProp.computeMode != cudaComputeModeProhibited) 884 | { 885 | if (deviceProp.major > 0 && deviceProp.major < 9999) 886 | { 887 | best_SM_arch = MAX(best_SM_arch, deviceProp.major); 888 | } 889 | } 890 | 891 | current_device++; 892 | } 893 | 894 | // Find the best CUDA capable GPU device 895 | current_device = 0; 896 | 897 | while (current_device < device_count) 898 | { 899 | cudaGetDeviceProperties(&deviceProp, current_device); 900 | 901 | // If this GPU is not running on Compute Mode prohibited, then we can add it to the list 902 | if (deviceProp.computeMode != cudaComputeModeProhibited) 903 | { 904 | if (deviceProp.major == 9999 && deviceProp.minor == 9999) 905 | { 906 | sm_per_multiproc = 1; 907 | } 908 | else 909 | { 910 | sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor); 911 | } 912 | 913 | int compute_perf = deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate; 914 | 915 | if (compute_perf > max_compute_perf) 916 | { 917 | // If we find GPU with SM major > 2, search only these 918 | if (best_SM_arch > 2) 919 | { 920 | // If our device==dest_SM_arch, choose this, or else pass 921 | if (deviceProp.major == best_SM_arch) 922 | { 923 | max_compute_perf = compute_perf; 924 | max_perf_device = current_device; 925 | } 926 | } 927 | else 928 | { 929 | max_compute_perf = compute_perf; 930 | max_perf_device = current_device; 931 | } 932 | } 933 | } 934 | 935 | ++current_device; 936 | } 937 | 938 | return max_perf_device; 939 | } 940 | 941 | 942 | // Initialization code to find the best CUDA Device 943 | inline int findCudaDevice(int argc, const char **argv) 944 | { 945 | cudaDeviceProp deviceProp; 946 | int devID = 0; 947 | 948 | // If the command-line has a device number specified, use it 949 | if (checkCmdLineFlag(argc, argv, "device")) 950 | { 951 | devID = getCmdLineArgumentInt(argc, argv, "device="); 952 | 953 | if (devID < 0) 954 | { 955 | printf("Invalid command line parameter\n "); 956 | exit(EXIT_FAILURE); 957 | } 958 | else 959 | { 960 | devID = gpuDeviceInit(devID); 961 | 962 | if (devID < 0) 963 | { 964 | printf("exiting...\n"); 965 | exit(EXIT_FAILURE); 966 | } 967 | } 968 | } 969 | else 970 | { 971 | // Otherwise pick the device with highest Gflops/s 972 | devID = gpuGetMaxGflopsDeviceId(); 973 | checkCudaErrors(cudaSetDevice(devID)); 974 | checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID)); 975 | printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor); 976 | } 977 | 978 | return devID; 979 | } 980 | 981 | // General check for CUDA GPU SM Capabilities 982 | inline bool checkCudaCapabilities(int major_version, int minor_version) 983 | { 984 | cudaDeviceProp deviceProp; 985 | deviceProp.major = 0; 986 | deviceProp.minor = 0; 987 | int dev; 988 | 989 | checkCudaErrors(cudaGetDevice(&dev)); 990 | checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev)); 991 | 992 | if ((deviceProp.major > major_version) || 993 | (deviceProp.major == major_version && deviceProp.minor >= minor_version)) 994 | { 995 | printf("> Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor); 996 | return true; 997 | } 998 | else 999 | { 1000 | printf("No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version); 1001 | return false; 1002 | } 1003 | } 1004 | #endif 1005 | 1006 | // end of CUDA Helper Functions 1007 | 1008 | 1009 | #endif 1010 | --------------------------------------------------------------------------------