├── Makefile ├── cudadma.c ├── gpucc.c ├── gpudma.c ├── gpuinfo.c ├── gpustub.c ├── gputest.c ├── memeat.c ├── nvinfo.c └── opencl_entry.c /Makefile: -------------------------------------------------------------------------------- 1 | MODULE_big = gputest 2 | OBJS = gputest.o 3 | EXTRA_CLEAN = gpuinfo gpucc gpudma memeat nvinfo 4 | 5 | # Header and Libraries of OpenCL (to be autoconf?) 6 | IPATH_LIST := /usr/include \ 7 | /usr/local/cuda/include \ 8 | /opt/AMDAPP/include 9 | LPATH_LIST := /usr/lib64 \ 10 | /usr/lib \ 11 | /usr/local/cuda/lib64 \ 12 | /usr/local/cuda/lib 13 | CL_IPATH := $(shell for x in $(IPATH_LIST); \ 14 | do test -e "$$x/CL/cl.h" && (echo -I $$x; break); done) 15 | CL_LPATH := $(shell for x in $(LPATH_LIST); \ 16 | do test -e "$$x/libOpenCL.so" && (echo -L $$x; break); done) 17 | CUDA_IPATH := $(shell for x in $(IPATH_LIST); \ 18 | do test -e "$$x/cuda.h" && (echo -I $$x; break); done) 19 | CUDA_LPATH := $(shell for x in $(LPATH_LIST); \ 20 | do test -e "$$x/libcuda.so" && (echo -L $$x; break); done) 21 | 22 | PG_CPPFLAGS := $(IPATH) 23 | SHLIB_LINK := $(LPATH) -lcuda 24 | 25 | PG_CONFIG = pg_config 26 | PGXS := $(shell $(PG_CONFIG) --pgxs) 27 | include $(PGXS) 28 | 29 | misc: $(EXTRA_CLEAN) 30 | 31 | gpuinfo: gpuinfo.c misc.c 32 | $(CC) $(CFLAGS) $^ -o $@ -lOpenCL $(CL_IPATH) $(CL_LPATH) 33 | 34 | gpucc: gpucc.c opencl_entry.c 35 | $(CC) $(CFLAGS) $^ -o $@ -ldl $(CL_IPATH) $(CL_LPATH) 36 | 37 | gpudma: gpudma.c opencl_entry.c 38 | $(CC) $(CFLAGS) $^ -o $@ -ldl $(CL_IPATH) $(CL_LPATH) 39 | 40 | gpustub: gpustub.c opencl_entry.c 41 | $(CC) $(CFLAGS) $^ -o $@ -ldl $(CL_IPATH) $(CL_LPATH) 42 | 43 | cudadma: cudadma.c 44 | $(CC) $(CFLAGS) $^ -o $@ -lcuda $(CUDA_IPATH) $(CUDA_LPATH) 45 | 46 | nvinfo: nvinfo.c 47 | $(CC) $(CFLAGS) $^ -o $@ -lcuda $(CUDA_IPATH) $(CUDA_LPATH) 48 | 49 | memeat: memeat.c 50 | $(CC) $(CFLAGS) $^ -o $@ 51 | -------------------------------------------------------------------------------- /cudadma.c: -------------------------------------------------------------------------------- 1 | /* 2 | * cudadma - test for DMA transfer on CUDA device 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #define lengthof(array) (sizeof (array) / sizeof ((array)[0])) 14 | #define error_exit(fmt,...) \ 15 | do { \ 16 | fprintf(stderr, "%s:%d " fmt "\n", \ 17 | __FUNCTION__, __LINE__, \ 18 | ##__VA_ARGS__); \ 19 | exit(1); \ 20 | } while(0) 21 | 22 | static int is_blocking = 1; 23 | static int num_trial = 100; /* 100 times */ 24 | static size_t buffer_size = 128 << 20; /* 128MB */ 25 | static size_t chunk_size = 0; 26 | 27 | static const char * 28 | cuGetErrorString(CUresult errcode) 29 | { 30 | static char strbuf[256]; 31 | 32 | switch (errcode) 33 | { 34 | case CUDA_SUCCESS: 35 | return "success"; 36 | case CUDA_ERROR_INVALID_VALUE: 37 | return "invalid value"; 38 | case CUDA_ERROR_OUT_OF_MEMORY: 39 | return "out of memory"; 40 | case CUDA_ERROR_NOT_INITIALIZED: 41 | return "not initialized"; 42 | case CUDA_ERROR_DEINITIALIZED: 43 | return "deinitialized"; 44 | case CUDA_ERROR_PROFILER_DISABLED: 45 | return "profiler disabled"; 46 | case CUDA_ERROR_PROFILER_NOT_INITIALIZED: 47 | return "profiler not initialized"; 48 | case CUDA_ERROR_PROFILER_ALREADY_STARTED: 49 | return "profiler already started"; 50 | case CUDA_ERROR_PROFILER_ALREADY_STOPPED: 51 | return "profiler already stopped"; 52 | case CUDA_ERROR_NO_DEVICE: 53 | return "no device"; 54 | case CUDA_ERROR_INVALID_DEVICE: 55 | return "invalid device"; 56 | case CUDA_ERROR_INVALID_IMAGE: 57 | return "invalid image"; 58 | case CUDA_ERROR_INVALID_CONTEXT: 59 | return "invalid context"; 60 | case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: 61 | return "context already current"; 62 | case CUDA_ERROR_MAP_FAILED: 63 | return "map failed"; 64 | case CUDA_ERROR_UNMAP_FAILED: 65 | return "unmap failed"; 66 | case CUDA_ERROR_ARRAY_IS_MAPPED: 67 | return "array is mapped"; 68 | case CUDA_ERROR_ALREADY_MAPPED: 69 | return "already mapped"; 70 | case CUDA_ERROR_NO_BINARY_FOR_GPU: 71 | return "no binary for gpu"; 72 | case CUDA_ERROR_ALREADY_ACQUIRED: 73 | return "already acquired"; 74 | case CUDA_ERROR_NOT_MAPPED: 75 | return "not mapped"; 76 | case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: 77 | return "not mapped as array"; 78 | case CUDA_ERROR_NOT_MAPPED_AS_POINTER: 79 | return "not mapped as pointer"; 80 | case CUDA_ERROR_ECC_UNCORRECTABLE: 81 | return "ecc uncorrectable"; 82 | case CUDA_ERROR_UNSUPPORTED_LIMIT: 83 | return "unsupported limit"; 84 | case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: 85 | return "context already in use"; 86 | case CUDA_ERROR_INVALID_SOURCE: 87 | return "invalid source"; 88 | case CUDA_ERROR_FILE_NOT_FOUND: 89 | return "file not found"; 90 | case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: 91 | return "shared object symbol not found"; 92 | case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: 93 | return "shared object init failed"; 94 | case CUDA_ERROR_OPERATING_SYSTEM: 95 | return "operating system"; 96 | case CUDA_ERROR_INVALID_HANDLE: 97 | return "invalid handle"; 98 | case CUDA_ERROR_NOT_FOUND: 99 | return "not found"; 100 | case CUDA_ERROR_NOT_READY: 101 | return "not ready"; 102 | case CUDA_ERROR_LAUNCH_FAILED: 103 | return "launch failed"; 104 | case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: 105 | return "launch out of resources"; 106 | case CUDA_ERROR_LAUNCH_TIMEOUT: 107 | return "launch timeout"; 108 | case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: 109 | return "launch incompatible texturing"; 110 | case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: 111 | return "peer access already enabled"; 112 | case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: 113 | return "peer access not enabled"; 114 | case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: 115 | return "primary context active"; 116 | case CUDA_ERROR_CONTEXT_IS_DESTROYED: 117 | return "context is destroyed"; 118 | default: 119 | snprintf(strbuf, sizeof(strbuf), "cuda error = %d", errcode); 120 | break; 121 | } 122 | return strbuf; 123 | } 124 | 125 | 126 | static void 127 | run_test(const char *namebuf, CUcontext context, CUstream stream) 128 | { 129 | char *hmem; 130 | CUdeviceptr dmem; 131 | int num_chunks = buffer_size / chunk_size; 132 | int i, j, k; 133 | CUresult rc; 134 | struct timeval tv1, tv2; 135 | 136 | if (is_blocking) 137 | { 138 | hmem = malloc(buffer_size); 139 | if (!hmem) 140 | error_exit("failed on malloc : %s", strerror(rc)); 141 | } 142 | else 143 | { 144 | rc = cuMemAllocHost((void **)&hmem, buffer_size); 145 | if (rc != CUDA_SUCCESS) 146 | error_exit("failed on cuMemAllocHost : %s", cuGetErrorString(rc)); 147 | } 148 | rc = cuMemAlloc(&dmem, buffer_size); 149 | if (rc != CUDA_SUCCESS) 150 | error_exit("failed on cuMemAlloc : %s", cuGetErrorString(rc)); 151 | 152 | gettimeofday(&tv1, NULL); 153 | for (i=0, k=0; i < num_trial; i++) 154 | { 155 | for (j=0; j < num_chunks; j++) 156 | { 157 | if (is_blocking) 158 | { 159 | rc = cuMemcpyHtoD(dmem + j * chunk_size, 160 | hmem + j * chunk_size, 161 | chunk_size); 162 | if (rc != CUDA_SUCCESS) 163 | error_exit("failed on cuMemcpyHtoD : %s", 164 | cuGetErrorString(rc)); 165 | } 166 | else 167 | { 168 | rc = cuMemcpyHtoDAsync(dmem + j * chunk_size, 169 | hmem + j * chunk_size, 170 | chunk_size, 171 | stream); 172 | if (rc != CUDA_SUCCESS) 173 | error_exit("failed on cuMemcpyHtoDAsync : %s", 174 | cuGetErrorString(rc)); 175 | } 176 | } 177 | 178 | if (is_blocking) 179 | { 180 | rc = cuMemcpyDtoH(hmem, dmem, buffer_size); 181 | if (rc != CUDA_SUCCESS) 182 | error_exit("failed on cuMemcpyDtoH : %s", 183 | cuGetErrorString(rc)); 184 | } 185 | else 186 | { 187 | rc = cuMemcpyDtoHAsync(hmem, dmem, buffer_size, stream); 188 | if (rc != CUDA_SUCCESS) 189 | error_exit("failed on cuMemcpyDtoHAsync : %s", 190 | cuGetErrorString(rc)); 191 | } 192 | } 193 | /* wait for completion */ 194 | rc = cuCtxSynchronize(); 195 | if (rc != CUDA_SUCCESS) 196 | error_exit("failed on cuCtxSynchronize : %s", cuGetErrorString(rc)); 197 | 198 | gettimeofday(&tv2, NULL); 199 | 200 | printf("DMA send/recv test result\n" 201 | "device: %s\n" 202 | "size: %luMB\n" 203 | "chunks: %lu%s x %d\n" 204 | "ntrials: %d\n" 205 | "total_size: %luMB\n" 206 | "time: %.2fs\n" 207 | "speed: %.2fMB/s\n" 208 | "mode: %s\n", 209 | namebuf, 210 | buffer_size >> 20, 211 | chunk_size > (1UL<<20) ? chunk_size >> 20 : chunk_size >> 10, 212 | chunk_size > (1UL<<20) ? "MB" : "KB", 213 | num_chunks, 214 | num_trial, 215 | (buffer_size >> 20) * num_trial, 216 | (double)((tv2.tv_sec * 1000000 + tv2.tv_usec) - 217 | (tv1.tv_sec * 1000000 + tv1.tv_usec)) / 1000000.0, 218 | (double)(((buffer_size >> 20) * num_trial) * 1000000) / 219 | (double)((tv2.tv_sec * 1000000 + tv2.tv_usec) - 220 | (tv1.tv_sec * 1000000 + tv1.tv_usec)), 221 | is_blocking ? "sync" : "async"); 222 | /* release resources */ 223 | cuMemFree(dmem); 224 | cuMemFreeHost(hmem); 225 | } 226 | 227 | static void usage(const char *cmdname) 228 | { 229 | fprintf(stderr, 230 | "usage: %s [ ..]\n" 231 | "\n" 232 | "options:\n" 233 | " -d (default: 0)\n" 234 | " -m (sync|async) (default: sync)\n" 235 | " -n (default: 100)\n" 236 | " -s (default: 128 = 128MB)\n" 237 | " -c (default: buffer size)\n", 238 | cmdname); 239 | exit(1); 240 | } 241 | 242 | int main(int argc, char *argv[]) 243 | { 244 | int device_id = 0; 245 | CUdevice device; 246 | CUcontext context = NULL; 247 | CUstream stream = NULL; 248 | CUresult rc; 249 | int c; 250 | char namebuf[1024]; 251 | 252 | while ((c = getopt(argc, argv, "d:m:n:s:c:")) >= 0) 253 | { 254 | switch (c) 255 | { 256 | case 'd': 257 | device_id = atoi(optarg); 258 | break; 259 | case 'm': 260 | if (strcmp(optarg, "sync") == 0) 261 | is_blocking = 1; 262 | else if (strcmp(optarg, "async") == 0) 263 | is_blocking = 0; 264 | else 265 | usage(basename(argv[0])); 266 | break; 267 | case 'n': 268 | num_trial = atoi(optarg); 269 | break; 270 | case 's': 271 | buffer_size = atoi(optarg) << 20; 272 | break; 273 | case 'c': 274 | chunk_size = atoi(optarg) << 10; 275 | break; 276 | default: 277 | usage(basename(argv[0])); 278 | break; 279 | } 280 | } 281 | if (optind != argc) 282 | usage(basename(argv[0])); 283 | 284 | if (chunk_size == 0) 285 | chunk_size = buffer_size; 286 | else if (buffer_size % chunk_size != 0 || buffer_size < chunk_size) 287 | { 288 | fprintf(stderr, "chunk_size (-c) must be aligned to buffer_size\n"); 289 | return 1; 290 | } 291 | 292 | /* 293 | * Initialize CUDA device 294 | */ 295 | rc = cuInit(0); 296 | if (rc != CUDA_SUCCESS) 297 | error_exit("failed on cuInit : %s", cuGetErrorString(rc)); 298 | 299 | rc = cuDeviceGet(&device, device_id); 300 | if (rc != CUDA_SUCCESS) 301 | error_exit("failed on cuDeviceGet(%d) : %s", 302 | device_id, cuGetErrorString(rc)); 303 | 304 | /* Get name of cuda device */ 305 | rc = cuDeviceGetName(namebuf, sizeof(namebuf), device); 306 | if (rc != CUDA_SUCCESS) 307 | error_exit("failed on cuDeviceGetName : %s", cuGetErrorString(rc)); 308 | 309 | /* Construct an CUDA context */ 310 | rc = cuCtxCreate(&context, CU_CTX_SCHED_AUTO, device); 311 | if (rc != CUDA_SUCCESS) 312 | error_exit("failed on cuCtxCreate : %s", cuGetErrorString(rc)); 313 | 314 | rc = cuCtxSetCurrent(context); 315 | if (rc != CUDA_SUCCESS) 316 | error_exit("failed on cuCtxSetCurrent : %s", cuGetErrorString(rc)); 317 | 318 | /* do the job */ 319 | run_test(namebuf, context, stream); 320 | 321 | return 0; 322 | } 323 | -------------------------------------------------------------------------------- /gpucc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define lengthof(array) (sizeof (array) / sizeof ((array)[0])) 12 | 13 | extern const char *opencl_strerror(cl_int errcode); 14 | 15 | static int platform_idx = 1; 16 | static int device_idx = 1; 17 | static char *cl_build_opts = "-Werror"; 18 | 19 | static int 20 | opencl_compile(cl_context context, 21 | cl_device_id device_id, 22 | const char *filename) 23 | { 24 | cl_program program; 25 | int fdesc; 26 | char *source; 27 | size_t length; 28 | cl_int rc; 29 | struct stat stbuf; 30 | cl_build_status status; 31 | char logbuf[65536]; 32 | size_t loglen; 33 | 34 | fdesc = open(filename, O_RDONLY); 35 | if (fdesc < 0) 36 | { 37 | fprintf(stderr, "failed to open '%s' (%s)\n", 38 | filename, strerror(errno)); 39 | return 1; 40 | } 41 | 42 | if (fstat(fdesc, &stbuf) != 0) 43 | { 44 | fprintf(stderr, "failed to fstat on '%s' (%s)\n", 45 | filename, strerror(errno)); 46 | return 1; 47 | } 48 | length = stbuf.st_size; 49 | 50 | source = malloc(length); 51 | if (!source) 52 | { 53 | fprintf(stderr, "out of memory (%s)\n", 54 | strerror(errno)); 55 | return 1; 56 | } 57 | 58 | if (read(fdesc, source, length) != length) 59 | { 60 | fprintf(stderr, "failed to read whole of source file (%s)\n", 61 | strerror(errno)); 62 | return 1; 63 | } 64 | 65 | /* make a program object */ 66 | program = clCreateProgramWithSource(context, 67 | 1, 68 | (const char **)&source, 69 | &length, 70 | &rc); 71 | if (rc != CL_SUCCESS) 72 | { 73 | fprintf(stderr, "failed on clCreateProgramWithSource (%s)\n", 74 | opencl_strerror(rc)); 75 | return 1; 76 | } 77 | 78 | /* build this program */ 79 | rc = clBuildProgram(program, 80 | 1, 81 | &device_id, 82 | cl_build_opts, 83 | NULL, 84 | NULL); 85 | if (rc != CL_SUCCESS && rc != CL_BUILD_PROGRAM_FAILURE) 86 | { 87 | if (rc == CL_INVALID_BUILD_OPTIONS) 88 | fprintf(stderr, 89 | "failed on clBuildProgram with build options: %s (%s)", 90 | cl_build_opts, opencl_strerror(rc)); 91 | else 92 | fprintf(stderr, "failed on clBuildProgram (%s)\n", 93 | opencl_strerror(rc)); 94 | return 1; 95 | } 96 | 97 | /* Get status and logs */ 98 | rc = clGetProgramBuildInfo(program, 99 | device_id, 100 | CL_PROGRAM_BUILD_STATUS, 101 | sizeof(cl_build_status), 102 | &status, 103 | NULL); 104 | if (rc != CL_SUCCESS) 105 | { 106 | fprintf(stderr, "failed on clGetProgramBuildInfo (%s)\n", 107 | opencl_strerror(rc)); 108 | return 1; 109 | } 110 | 111 | rc = clGetProgramBuildInfo(program, 112 | device_id, 113 | CL_PROGRAM_BUILD_LOG, 114 | sizeof(logbuf), 115 | &logbuf, 116 | &loglen); 117 | if (rc != CL_SUCCESS) 118 | { 119 | fprintf(stderr, "failed on clGetProgramBuildInfo(%s)\n", 120 | opencl_strerror(rc)); 121 | return 1; 122 | } 123 | 124 | switch (status) 125 | { 126 | case CL_BUILD_NONE: 127 | puts("build none"); 128 | break; 129 | case CL_BUILD_ERROR: 130 | puts("build error"); 131 | break; 132 | case CL_BUILD_SUCCESS: 133 | puts("build success"); 134 | break; 135 | case CL_BUILD_IN_PROGRESS: 136 | puts("build in progress"); 137 | break; 138 | default: 139 | puts("unknown"); 140 | break; 141 | } 142 | write(fileno(stdout), logbuf, loglen); 143 | 144 | clReleaseProgram(program); 145 | free(source); 146 | return 0; 147 | } 148 | 149 | int main(int argc, char *argv[]) 150 | { 151 | cl_platform_id platform_ids[32]; 152 | cl_int platform_num; 153 | cl_device_id device_ids[256]; 154 | cl_int device_num; 155 | cl_context context; 156 | cl_int code, rc, i; 157 | char namebuf[1024]; 158 | 159 | while ((code = getopt(argc, argv, "p:d:o:")) >= 0) 160 | { 161 | switch (code) 162 | { 163 | case 'p': 164 | platform_idx = atoi(optarg); 165 | break; 166 | case 'd': 167 | device_idx = atoi(optarg); 168 | break; 169 | case 'o': 170 | cl_build_opts = optarg; 171 | break; 172 | default: 173 | fprintf(stderr, 174 | "usage: %s " 175 | "[-p ][-d ]" 176 | "[-o \n", 177 | basename(argv[0])); 178 | return 1; 179 | } 180 | } 181 | if (optind >= argc) { 182 | fprintf(stderr, "no source files were given.\n"); 183 | return 1; 184 | } 185 | opencl_entry_init(); 186 | 187 | /* Get platform IDs */ 188 | rc = clGetPlatformIDs(lengthof(platform_ids), 189 | platform_ids, 190 | &platform_num); 191 | if (rc != CL_SUCCESS) 192 | { 193 | fprintf(stderr, "failed on clGetPlatformIDs (%s)", 194 | opencl_strerror(rc)); 195 | return 1; 196 | } 197 | if (platform_idx < 1 || platform_idx > platform_num) 198 | { 199 | fprintf(stderr, "opencl platform index %d did not exist.\n"); 200 | return 1; 201 | } 202 | 203 | /* Get device IDs */ 204 | rc = clGetDeviceIDs(platform_ids[platform_idx - 1], 205 | CL_DEVICE_TYPE_ALL, 206 | lengthof(device_ids), 207 | device_ids, 208 | &device_num); 209 | if (rc != CL_SUCCESS) 210 | { 211 | fprintf(stderr, "failed on clGetDeviceIDs (%s)\n", 212 | opencl_strerror(rc)); 213 | return 1; 214 | } 215 | if (device_idx < 1 || device_idx > device_num) 216 | { 217 | fprintf(stderr, "opencl device index %d did not exist.\n"); 218 | return 1; 219 | } 220 | 221 | /* Print name of opencl platform */ 222 | rc = clGetPlatformInfo(platform_ids[platform_idx - 1], 223 | CL_PLATFORM_NAME, 224 | sizeof(namebuf), namebuf, NULL); 225 | if (rc != CL_SUCCESS) 226 | { 227 | fprintf(stderr, "failed on clGetPlatformInfo (%s)\n", 228 | opencl_strerror(rc)); 229 | return 1; 230 | } 231 | printf("platform: %s\n", namebuf); 232 | 233 | /* Print name of opencl device */ 234 | rc = clGetDeviceInfo(device_ids[device_idx - 1], 235 | CL_DEVICE_NAME, 236 | sizeof(namebuf), namebuf, NULL); 237 | if (rc != CL_SUCCESS) 238 | { 239 | fprintf(stderr, "failed on clGetDeviceInfo (%s)\n", 240 | opencl_strerror(rc)); 241 | return 1; 242 | } 243 | printf("device: %s\n", namebuf); 244 | 245 | /* create an opencl context */ 246 | context = clCreateContext(NULL, 247 | 1, 248 | device_ids + (device_idx - 1), 249 | NULL, 250 | NULL, 251 | &rc); 252 | if (rc != CL_SUCCESS) 253 | { 254 | fprintf(stderr, "failed to create an opencl context (%s)\n", 255 | opencl_strerror(rc)); 256 | return 1; 257 | } 258 | 259 | /* do the jobs */ 260 | for (i = optind; i < argc; i++) 261 | { 262 | printf("source: %s ... ", argv[i]); 263 | if (opencl_compile(context, device_ids[device_idx - 1], argv[i]) != 0) 264 | { 265 | puts("error"); 266 | return 1; 267 | } 268 | putchar('\n'); 269 | } 270 | clReleaseContext(context); 271 | } 272 | -------------------------------------------------------------------------------- /gpudma.c: -------------------------------------------------------------------------------- 1 | /* 2 | * gpudma - test for DMA transfer 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #define lengthof(array) (sizeof (array) / sizeof ((array)[0])) 14 | #define error_exit(fmt,...) \ 15 | do { \ 16 | fprintf(stderr, "%s:%d " fmt "\n", \ 17 | __FUNCTION__, __LINE__, \ 18 | ##__VA_ARGS__); \ 19 | exit(1); \ 20 | } while(0) 21 | 22 | extern const char *opencl_strerror(cl_int errcode); 23 | 24 | static cl_int platform_idx = 1; 25 | static cl_int device_idx = 1; 26 | static cl_bool is_blocking = CL_TRUE; 27 | static cl_int num_trial = 100; /* 100 times */ 28 | static size_t buffer_size = 128 << 20; /* 128MB */ 29 | static size_t chunk_size = 0; 30 | 31 | static void 32 | run_test(const char *namebuf, cl_context context, cl_command_queue cmdq) 33 | { 34 | cl_event *ev; 35 | char *hmem; 36 | cl_mem dmem; 37 | cl_mem pinned = NULL; 38 | cl_int num_chunks; 39 | cl_int rc, i, j, k; 40 | struct timeval tv1, tv2; 41 | 42 | num_chunks = buffer_size / chunk_size; 43 | ev = malloc(sizeof(cl_event) * (num_chunks + 1) * num_trial); 44 | if (!ev) 45 | error_exit("out of memory (%s)", strerror(rc)); 46 | 47 | hmem = malloc(buffer_size); 48 | if (!hmem) 49 | error_exit("out of memory (%s)", strerror(rc)); 50 | 51 | dmem = clCreateBuffer(context, 52 | CL_MEM_READ_WRITE, 53 | buffer_size, 54 | NULL, 55 | &rc); 56 | if (rc != CL_SUCCESS) 57 | error_exit("failed on clCreateBuffer(size=%lu) (%s)", 58 | buffer_size, opencl_strerror(rc)); 59 | 60 | gettimeofday(&tv1, NULL); 61 | 62 | if (!is_blocking) 63 | { 64 | pinned = clCreateBuffer(context, 65 | CL_MEM_READ_WRITE | 66 | CL_MEM_USE_HOST_PTR, 67 | buffer_size, 68 | hmem, 69 | &rc); 70 | if (rc != CL_SUCCESS) 71 | error_exit("failed on clCreateBuffer(size=%lu) (%s)", 72 | buffer_size, opencl_strerror(rc)); 73 | } 74 | 75 | for (i=0, k=0; i < num_trial; i++) 76 | { 77 | for (j=0; j < num_chunks; j++) 78 | { 79 | rc = clEnqueueWriteBuffer(cmdq, 80 | dmem, 81 | is_blocking, 82 | j * chunk_size, 83 | chunk_size, 84 | hmem + j * chunk_size, 85 | i > 0 ? 1 : 0, 86 | i > 0 ? &ev[k-1] : NULL, 87 | &ev[k+j]); 88 | if (rc != CL_SUCCESS) 89 | error_exit("failed on clEnqueueWriteBuffer (%s)", 90 | opencl_strerror(rc)); 91 | } 92 | 93 | rc = clEnqueueReadBuffer(cmdq, 94 | dmem, 95 | is_blocking, 96 | 0, 97 | buffer_size, 98 | hmem, 99 | num_chunks, 100 | &ev[k], 101 | &ev[k+num_chunks]); 102 | if (rc != CL_SUCCESS) 103 | error_exit("failed on clEnqueueReadBuffer (%s)", 104 | opencl_strerror(rc)); 105 | k += num_chunks + 1; 106 | } 107 | rc = clFinish(cmdq); 108 | if (rc != CL_SUCCESS) 109 | error_exit("failed on clFinish (%s)", opencl_strerror(rc)); 110 | 111 | gettimeofday(&tv2, NULL); 112 | 113 | printf("DMA send/recv test result\n" 114 | "device: %s\n" 115 | "size: %luMB\n" 116 | "chunks: %lu%s x %d\n" 117 | "ntrials: %d\n" 118 | "total_size: %luMB\n" 119 | "time: %.2fs\n" 120 | "speed: %.2fMB/s\n" 121 | "mode: %s\n", 122 | namebuf, 123 | buffer_size >> 20, 124 | chunk_size > (1UL<<20) ? chunk_size >> 20 : chunk_size >> 10, 125 | chunk_size > (1UL<<20) ? "MB" : "KB", 126 | num_chunks, 127 | num_trial, 128 | (buffer_size >> 20) * num_trial, 129 | (double)((tv2.tv_sec * 1000000 + tv2.tv_usec) - 130 | (tv1.tv_sec * 1000000 + tv1.tv_usec)) / 1000000.0, 131 | (double)(((buffer_size >> 20) * num_trial) * 1000000) / 132 | (double)((tv2.tv_sec * 1000000 + tv2.tv_usec) - 133 | (tv1.tv_sec * 1000000 + tv1.tv_usec)), 134 | is_blocking ? "sync" : "async"); 135 | 136 | /* release resources */ 137 | clReleaseMemObject(dmem); 138 | free(hmem); 139 | free(ev); 140 | } 141 | 142 | static void usage(const char *cmdname) 143 | { 144 | fprintf(stderr, 145 | "usage: %s [ ..]\n" 146 | "\n" 147 | "options:\n" 148 | " -p (default: 1)\n" 149 | " -d (default: 1)\n" 150 | " -m (sync|async) (default: sync)\n" 151 | " -n (default: 100)\n" 152 | " -s (default: 128 = 128MB)\n" 153 | " -c (default: buffer size)\n", 154 | cmdname); 155 | exit(1); 156 | } 157 | 158 | int main(int argc, char *argv[]) 159 | { 160 | cl_platform_id platform_ids[32]; 161 | cl_int platform_num; 162 | cl_device_id device_ids[256]; 163 | cl_int device_num; 164 | cl_context context; 165 | cl_command_queue cmdq; 166 | cl_int c, rc; 167 | char namebuf[1024]; 168 | 169 | while ((c = getopt(argc, argv, "p:d:m:n:s:c:")) >= 0) 170 | { 171 | switch (c) 172 | { 173 | case 'p': 174 | platform_idx = atoi(optarg); 175 | break; 176 | case 'd': 177 | device_idx = atoi(optarg); 178 | break; 179 | case 'm': 180 | if (strcmp(optarg, "sync") == 0) 181 | is_blocking = CL_TRUE; 182 | else if (strcmp(optarg, "async") == 0) 183 | is_blocking = CL_FALSE; 184 | else 185 | usage(basename(argv[0])); 186 | break; 187 | case 'n': 188 | num_trial = atoi(optarg); 189 | break; 190 | case 's': 191 | buffer_size = atoi(optarg) << 20; 192 | break; 193 | case 'c': 194 | chunk_size = atoi(optarg) << 10; 195 | break; 196 | default: 197 | usage(basename(argv[0])); 198 | break; 199 | } 200 | } 201 | if (optind != argc) 202 | usage(basename(argv[0])); 203 | 204 | if (chunk_size == 0) 205 | chunk_size = buffer_size; 206 | else if (buffer_size % chunk_size != 0 || buffer_size < chunk_size) 207 | { 208 | fprintf(stderr, "chunk_size (-c) must be aligned to buffer_size\n"); 209 | return 1; 210 | } 211 | 212 | /* 213 | * Initialize OpenCL platform/device 214 | */ 215 | opencl_entry_init(); 216 | 217 | /* Get platform IDs */ 218 | rc = clGetPlatformIDs(lengthof(platform_ids), 219 | platform_ids, 220 | &platform_num); 221 | if (rc != CL_SUCCESS) 222 | error_exit("failed on clGetPlatformIDs (%s)", opencl_strerror(rc)); 223 | if (platform_idx < 1 || platform_idx > platform_num) 224 | error_exit("opencl platform index %d did not exist", platform_idx); 225 | 226 | /* Get device IDs */ 227 | rc = clGetDeviceIDs(platform_ids[platform_idx - 1], 228 | CL_DEVICE_TYPE_ALL, 229 | lengthof(device_ids), 230 | device_ids, 231 | &device_num); 232 | if (rc != CL_SUCCESS) 233 | error_exit("failed on clGetDeviceIDs (%s)\n", opencl_strerror(rc)); 234 | if (device_idx < 1 || device_idx > device_num) 235 | error_exit("opencl device index %d did not exist", device_idx); 236 | 237 | /* Get name of opencl device */ 238 | rc = clGetDeviceInfo(device_ids[device_idx - 1], 239 | CL_DEVICE_NAME, 240 | sizeof(namebuf), namebuf, NULL); 241 | if (rc != CL_SUCCESS) 242 | error_exit("failed on clGetDeviceInfo (%s)", opencl_strerror(rc)); 243 | 244 | /* Construct an OpenCL context */ 245 | context = clCreateContext(NULL, 246 | 1, 247 | &device_ids[device_idx - 1], 248 | NULL, 249 | NULL, 250 | &rc); 251 | if (rc != CL_SUCCESS) 252 | error_exit("failed to create an opencl context (%s)", 253 | opencl_strerror(rc)); 254 | 255 | /* Construct an OpenCL command queue */ 256 | cmdq = clCreateCommandQueue(context, 257 | device_ids[device_idx - 1], 258 | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 259 | &rc); 260 | if (rc != CL_SUCCESS) 261 | error_exit("failed to create an opencl command queue (%s)", 262 | opencl_strerror(rc)); 263 | 264 | /* do the job */ 265 | run_test(namebuf, context, cmdq); 266 | 267 | /* cleanup resources */ 268 | clReleaseCommandQueue(cmdq); 269 | clReleaseContext(context); 270 | 271 | return 0; 272 | } 273 | -------------------------------------------------------------------------------- /gpuinfo.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define lengthof(array) (sizeof (array) / sizeof ((array)[0])) 9 | 10 | extern const char *opencl_strerror(cl_int errcode); 11 | 12 | static int only_list = 0; 13 | static int only_platform = -1; 14 | static int only_device = -1; 15 | static struct { 16 | char profile[256]; 17 | char version[256]; 18 | char name[256]; 19 | char vendor[256]; 20 | char extensions[1024]; 21 | } platform_info; 22 | #define PLATFORM_ATTR(param,field) \ 23 | { param, sizeof(platform_info.field), &(platform_info.field) } 24 | 25 | static struct { 26 | cl_uint address_bits; 27 | cl_bool available; 28 | cl_bool compiler_available; 29 | cl_device_fp_config double_fp_config; 30 | cl_bool endian_little; 31 | cl_bool error_correction_support; 32 | cl_device_exec_capabilities execution_capabilities; 33 | char extensions[1024]; 34 | cl_ulong global_mem_cache_size; 35 | cl_device_mem_cache_type global_mem_cache_type; 36 | cl_uint global_mem_cacheline_size; 37 | cl_ulong global_mem_size; 38 | cl_device_fp_config half_fp_config; 39 | cl_bool host_unified_memory; 40 | cl_bool image_support; 41 | size_t image2d_max_height; 42 | size_t image2d_max_width; 43 | size_t image3d_max_depth; 44 | size_t image3d_max_height; 45 | size_t image3d_max_width; 46 | cl_ulong local_mem_size; 47 | cl_device_local_mem_type local_mem_type; 48 | cl_uint max_clock_frequency; 49 | cl_uint max_compute_units; 50 | cl_uint max_constant_args; 51 | cl_ulong max_constant_buffer_size; 52 | cl_ulong max_mem_alloc_size; 53 | size_t max_parameter_size; 54 | cl_uint max_read_image_args; 55 | cl_uint max_samplers; 56 | size_t max_work_group_size; 57 | cl_uint max_work_item_dimensions; 58 | size_t max_work_item_sizes[10]; 59 | cl_uint max_write_image_args; 60 | cl_uint mem_base_addr_align; 61 | cl_uint min_data_type_align_size; 62 | char name[256]; 63 | cl_uint native_vector_width_char; 64 | cl_uint native_vector_width_short; 65 | cl_uint native_vector_width_int; 66 | cl_uint native_vector_width_long; 67 | cl_uint native_vector_width_float; 68 | cl_uint native_vector_width_double; 69 | cl_uint native_vector_width_half; 70 | char opencl_c_version[256]; 71 | cl_uint preferred_vector_width_char; 72 | cl_uint preferred_vector_width_short; 73 | cl_uint preferred_vector_width_int; 74 | cl_uint preferred_vector_width_long; 75 | cl_uint preferred_vector_width_float; 76 | cl_uint preferred_vector_width_double; 77 | cl_uint preferred_vector_width_half; 78 | char profile[256]; 79 | size_t profiling_timer_resolution; 80 | cl_command_queue_properties queue_properties; 81 | cl_device_fp_config single_fp_config; 82 | cl_device_type type; 83 | char vendor[256]; 84 | cl_uint vendor_id; 85 | char version[256]; 86 | char driver_version[256]; 87 | } dinfo; 88 | #define DEVICE_ATTR(param,field) \ 89 | { param, sizeof(dinfo.field), &(dinfo.field) } 90 | 91 | static const char *dev_fp_config_str(cl_device_fp_config conf) 92 | { 93 | static char buf[256]; 94 | size_t offset = 0; 95 | 96 | buf[offset] = '\0'; 97 | if (conf & CL_FP_DENORM) 98 | offset += sprintf(buf + offset, 99 | "%sDenorm", offset > 0 ? "," : ""); 100 | if (conf & CL_FP_INF_NAN) 101 | offset += sprintf(buf + offset, "%sINF/NaN", 102 | offset > 0 ? ", " : ""); 103 | if (conf & CL_FP_ROUND_TO_NEAREST) 104 | offset += sprintf(buf + offset, "%sR/nearest", 105 | offset > 0 ? ", " : ""); 106 | if (conf & CL_FP_ROUND_TO_ZERO) 107 | offset += sprintf(buf + offset, "%sR/zero", 108 | offset > 0 ? ", " : ""); 109 | if (conf & CL_FP_ROUND_TO_INF) 110 | offset += sprintf(buf + offset, "%sR/INF", 111 | offset > 0 ? ", " : ""); 112 | if (conf & CL_FP_FMA) 113 | offset += sprintf(buf + offset, "%sFMA", 114 | offset > 0 ? ", " : ""); 115 | return buf; 116 | } 117 | 118 | static const char * 119 | dev_execution_capabilities_str(cl_device_exec_capabilities caps) 120 | { 121 | return ((caps & CL_EXEC_KERNEL) != 0 122 | ? ((caps & CL_EXEC_NATIVE_KERNEL) != 0 123 | ? "kernel, native kernel" 124 | : "kernel") 125 | : ((caps & CL_EXEC_NATIVE_KERNEL) != 0 126 | ? "native kernel" 127 | : "none")); 128 | } 129 | 130 | static const char * 131 | dev_mem_cache_type_str(cl_device_mem_cache_type cache_type) 132 | { 133 | switch (cache_type) 134 | { 135 | case CL_NONE: 136 | return "none"; 137 | case CL_READ_ONLY_CACHE: 138 | return "read-only"; 139 | case CL_READ_WRITE_CACHE: 140 | return "read-write"; 141 | default: 142 | return "unknown"; 143 | } 144 | } 145 | 146 | static const char * 147 | dev_local_mem_type_str(cl_device_local_mem_type local_type) 148 | { 149 | switch (local_type) 150 | { 151 | case CL_LOCAL: 152 | return "SRAM"; 153 | case CL_GLOBAL: 154 | return "DRAM"; 155 | default: 156 | return "unknown"; 157 | } 158 | } 159 | 160 | static const char * 161 | dev_type_str(cl_device_type dev_type) 162 | { 163 | switch (dev_type) 164 | { 165 | case CL_DEVICE_TYPE_CPU: 166 | return "CPU"; 167 | case CL_DEVICE_TYPE_GPU: 168 | return "GPU"; 169 | case CL_DEVICE_TYPE_ACCELERATOR: 170 | return "Accelerator"; 171 | case CL_DEVICE_TYPE_DEFAULT: 172 | return "Default"; 173 | default: 174 | return "unknown"; 175 | } 176 | } 177 | 178 | static const char * 179 | dev_queue_properties_str(cl_command_queue_properties cmdq) 180 | { 181 | static char buf[256]; 182 | size_t offset = 0; 183 | 184 | buf[offset] = '\0'; 185 | if (cmdq & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) 186 | offset += sprintf(buf + offset, "%sout-of-order execution", 187 | (offset > 0 ? ", " : "")); 188 | if (cmdq & CL_QUEUE_PROFILING_ENABLE) 189 | offset += sprintf(buf + offset, "%sprofiling", 190 | (offset > 0 ? ", " : "")); 191 | return buf; 192 | } 193 | 194 | static void dump_device(int index, cl_device_id device_id) 195 | { 196 | static struct { 197 | cl_device_info info; 198 | size_t size; 199 | void *addr; 200 | } catalog[] = { 201 | DEVICE_ATTR(CL_DEVICE_ADDRESS_BITS, address_bits), 202 | DEVICE_ATTR(CL_DEVICE_AVAILABLE, available), 203 | DEVICE_ATTR(CL_DEVICE_COMPILER_AVAILABLE, compiler_available), 204 | DEVICE_ATTR(CL_DEVICE_DOUBLE_FP_CONFIG, double_fp_config), 205 | DEVICE_ATTR(CL_DEVICE_ENDIAN_LITTLE, endian_little), 206 | DEVICE_ATTR(CL_DEVICE_ERROR_CORRECTION_SUPPORT, 207 | error_correction_support), 208 | DEVICE_ATTR(CL_DEVICE_EXECUTION_CAPABILITIES, 209 | execution_capabilities), 210 | DEVICE_ATTR(CL_DEVICE_EXTENSIONS, extensions), 211 | DEVICE_ATTR(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, global_mem_cache_size), 212 | DEVICE_ATTR(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, global_mem_cache_type), 213 | DEVICE_ATTR(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, 214 | global_mem_cacheline_size), 215 | DEVICE_ATTR(CL_DEVICE_GLOBAL_MEM_SIZE, global_mem_size), 216 | DEVICE_ATTR(CL_DEVICE_HALF_FP_CONFIG, half_fp_config), 217 | DEVICE_ATTR(CL_DEVICE_HOST_UNIFIED_MEMORY, host_unified_memory), 218 | DEVICE_ATTR(CL_DEVICE_IMAGE_SUPPORT, image_support), 219 | DEVICE_ATTR(CL_DEVICE_IMAGE2D_MAX_HEIGHT, image2d_max_height), 220 | DEVICE_ATTR(CL_DEVICE_IMAGE2D_MAX_WIDTH, image2d_max_width), 221 | DEVICE_ATTR(CL_DEVICE_IMAGE3D_MAX_DEPTH, image3d_max_depth), 222 | DEVICE_ATTR(CL_DEVICE_IMAGE3D_MAX_HEIGHT, image3d_max_height), 223 | DEVICE_ATTR(CL_DEVICE_IMAGE3D_MAX_WIDTH, image3d_max_width), 224 | DEVICE_ATTR(CL_DEVICE_LOCAL_MEM_SIZE, local_mem_size), 225 | DEVICE_ATTR(CL_DEVICE_LOCAL_MEM_TYPE, local_mem_type), 226 | DEVICE_ATTR(CL_DEVICE_MAX_CLOCK_FREQUENCY, max_clock_frequency), 227 | DEVICE_ATTR(CL_DEVICE_MAX_COMPUTE_UNITS, max_compute_units), 228 | DEVICE_ATTR(CL_DEVICE_MAX_CONSTANT_ARGS, max_constant_args), 229 | DEVICE_ATTR(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, 230 | max_constant_buffer_size), 231 | DEVICE_ATTR(CL_DEVICE_MAX_MEM_ALLOC_SIZE, max_mem_alloc_size), 232 | DEVICE_ATTR(CL_DEVICE_MAX_PARAMETER_SIZE, max_parameter_size), 233 | DEVICE_ATTR(CL_DEVICE_MAX_READ_IMAGE_ARGS, max_read_image_args), 234 | DEVICE_ATTR(CL_DEVICE_MAX_SAMPLERS, max_samplers), 235 | DEVICE_ATTR(CL_DEVICE_MAX_WORK_GROUP_SIZE, max_work_group_size), 236 | DEVICE_ATTR(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, 237 | max_work_item_dimensions), 238 | DEVICE_ATTR(CL_DEVICE_MAX_WORK_ITEM_SIZES, max_work_item_sizes), 239 | DEVICE_ATTR(CL_DEVICE_MAX_WRITE_IMAGE_ARGS, max_write_image_args), 240 | DEVICE_ATTR(CL_DEVICE_MEM_BASE_ADDR_ALIGN, mem_base_addr_align), 241 | DEVICE_ATTR(CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, 242 | min_data_type_align_size), 243 | DEVICE_ATTR(CL_DEVICE_NAME, name), 244 | DEVICE_ATTR(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, 245 | native_vector_width_char), 246 | DEVICE_ATTR(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, 247 | native_vector_width_short), 248 | DEVICE_ATTR(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, 249 | native_vector_width_int), 250 | DEVICE_ATTR(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, 251 | native_vector_width_long), 252 | DEVICE_ATTR(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, 253 | native_vector_width_float), 254 | DEVICE_ATTR(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 255 | native_vector_width_double), 256 | DEVICE_ATTR(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, 257 | native_vector_width_half), 258 | DEVICE_ATTR(CL_DEVICE_OPENCL_C_VERSION, opencl_c_version), 259 | DEVICE_ATTR(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, 260 | preferred_vector_width_char), 261 | DEVICE_ATTR(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, 262 | preferred_vector_width_short), 263 | DEVICE_ATTR(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, 264 | preferred_vector_width_int), 265 | DEVICE_ATTR(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, 266 | preferred_vector_width_long), 267 | DEVICE_ATTR(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, 268 | preferred_vector_width_float), 269 | DEVICE_ATTR(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 270 | preferred_vector_width_double), 271 | DEVICE_ATTR(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, 272 | preferred_vector_width_half), 273 | DEVICE_ATTR(CL_DEVICE_PROFILE, profile), 274 | DEVICE_ATTR(CL_DEVICE_PROFILING_TIMER_RESOLUTION, 275 | profiling_timer_resolution), 276 | DEVICE_ATTR(CL_DEVICE_QUEUE_PROPERTIES, queue_properties), 277 | DEVICE_ATTR(CL_DEVICE_SINGLE_FP_CONFIG, single_fp_config), 278 | DEVICE_ATTR(CL_DEVICE_TYPE, type), 279 | DEVICE_ATTR(CL_DEVICE_VENDOR, vendor), 280 | DEVICE_ATTR(CL_DEVICE_VENDOR_ID, vendor_id), 281 | DEVICE_ATTR(CL_DEVICE_VERSION, version), 282 | DEVICE_ATTR(CL_DRIVER_VERSION, driver_version), 283 | }; 284 | cl_int i, rc; 285 | 286 | for (i=0; i < lengthof(catalog); i++) 287 | { 288 | rc = clGetDeviceInfo(device_id, 289 | catalog[i].info, 290 | catalog[i].size, 291 | catalog[i].addr, 292 | NULL); 293 | if (rc != CL_SUCCESS && 294 | !(rc == CL_INVALID_VALUE && 295 | (catalog[i].info == CL_DEVICE_DOUBLE_FP_CONFIG || 296 | catalog[i].info == CL_DEVICE_HALF_FP_CONFIG))) 297 | { 298 | fprintf(stderr, "failed on clGetDeviceInfo (%s)\n", 299 | opencl_strerror(rc)); 300 | exit(1); 301 | } 302 | } 303 | 304 | if (only_list) 305 | printf(" Device-%02d: %s / %s - %s\n", 306 | index + 1, 307 | dinfo.vendor, 308 | dinfo.name, 309 | dinfo.version); 310 | else 311 | { 312 | printf(" Device-%02d\n", index + 1); 313 | printf(" Device type: %s\n", 314 | dev_type_str(dinfo.type)); 315 | printf(" Vendor: %s (id: %08x)\n", 316 | dinfo.vendor, dinfo.vendor_id); 317 | printf(" Name: %s\n", 318 | dinfo.name); 319 | printf(" Version: %s\n", 320 | dinfo.version); 321 | printf(" Driver version: %s\n", 322 | dinfo.driver_version); 323 | printf(" OpenCL C version: %s\n", 324 | dinfo.opencl_c_version); 325 | printf(" Profile: %s\n", 326 | dinfo.profile); 327 | printf(" Device available: %s\n", 328 | dinfo.available ? "yes" : "no"); 329 | printf(" Address bits: %u\n", 330 | dinfo.address_bits); 331 | printf(" Compiler available: %s\n", 332 | dinfo.compiler_available ? "yes" : "no"); 333 | if (strstr(dinfo.extensions, "cl_khr_fp64") != NULL) 334 | printf(" Double FP config: %s\n", 335 | dev_fp_config_str(dinfo.double_fp_config)); 336 | printf(" Endian: %s\n", 337 | dinfo.endian_little ? "little" : "big"); 338 | printf(" Error correction support: %s\n", 339 | dinfo.error_correction_support ? "yes" : "no"); 340 | printf(" Execution capability: %s\n", 341 | dev_execution_capabilities_str(dinfo.execution_capabilities)); 342 | printf(" Extensions: %s\n", 343 | dinfo.extensions); 344 | printf(" Global memory cache size: %lu KB\n", 345 | dinfo.global_mem_cache_size / 1024); 346 | printf(" Global memory cache type: %s\n", 347 | dev_mem_cache_type_str(dinfo.global_mem_cache_type)); 348 | printf(" Global memory cacheline size: %u\n", 349 | dinfo.global_mem_cacheline_size); 350 | printf(" Global memory size: %zu MB\n", 351 | dinfo.global_mem_size / (1024 * 1024)); 352 | if (strstr(dinfo.extensions, "cl_khr_fp16") != NULL) 353 | printf(" Half FP config: %s\n", 354 | dev_fp_config_str(dinfo.half_fp_config)); 355 | printf(" Host unified memory: %s\n", 356 | dinfo.host_unified_memory ? "yes" : "no"); 357 | printf(" Image support: %s\n", 358 | dinfo.image_support ? "yes" : "no"); 359 | printf(" Image 2D max size: %lu x %lu\n", 360 | dinfo.image2d_max_width, 361 | dinfo.image2d_max_height); 362 | printf(" Image 3D max size: %lu x %lu x %lu\n", 363 | dinfo.image3d_max_width, 364 | dinfo.image3d_max_height, 365 | dinfo.image3d_max_depth); 366 | printf(" Local memory size: %lu\n", 367 | dinfo.local_mem_size); 368 | printf(" Local memory type: %s\n", 369 | dev_local_mem_type_str(dinfo.local_mem_type)); 370 | printf(" Max clock frequency: %u\n", 371 | dinfo.max_clock_frequency); 372 | printf(" Max compute units: %u\n", 373 | dinfo.max_compute_units); 374 | printf(" Max constant args: %u\n", 375 | dinfo.max_constant_args); 376 | printf(" Max constant buffer size: %zu\n", 377 | dinfo.max_constant_buffer_size); 378 | printf(" Max memory allocation size: %zu MB\n", 379 | dinfo.max_mem_alloc_size / (1024 * 1024)); 380 | printf(" Max parameter size: %zu\n", 381 | (cl_ulong)dinfo.max_parameter_size); 382 | printf(" Max read image args: %u\n", 383 | dinfo.max_read_image_args); 384 | printf(" Max samplers: %u\n", 385 | dinfo.max_samplers); 386 | printf(" Max work-group size: %zu\n", 387 | (cl_ulong)dinfo.max_work_group_size); 388 | printf(" Max work-item sizes: {%u,%u,%u}\n", 389 | (cl_uint) dinfo.max_work_item_sizes[0], 390 | (cl_uint) dinfo.max_work_item_sizes[1], 391 | (cl_uint) dinfo.max_work_item_sizes[2]); 392 | printf(" Max write image args: %u\n", 393 | dinfo.max_write_image_args); 394 | printf(" Memory base address align: %u\n", 395 | dinfo.mem_base_addr_align); 396 | printf(" Min data type align size: %u\n", 397 | dinfo.min_data_type_align_size); 398 | printf(" Native vector width - char: %u\n", 399 | dinfo.native_vector_width_char); 400 | printf(" Native vector width - short: %u\n", 401 | dinfo.native_vector_width_short); 402 | printf(" Native vector width - int: %u\n", 403 | dinfo.native_vector_width_int); 404 | printf(" Native vector width - long: %u\n", 405 | dinfo.native_vector_width_long); 406 | printf(" Native vector width - float: %u\n", 407 | dinfo.native_vector_width_float); 408 | if (strstr(dinfo.extensions, "cl_khr_fp64") != NULL) 409 | printf(" Native vector width - double: %u\n", 410 | dinfo.native_vector_width_double); 411 | if (strstr(dinfo.extensions, "cl_khr_fp16") != NULL) 412 | printf(" Native vector width - half: %u\n", 413 | dinfo.native_vector_width_half); 414 | printf(" Preferred vector width - char: %u\n", 415 | dinfo.preferred_vector_width_char); 416 | printf(" Preferred vector width - short: %u\n", 417 | dinfo.preferred_vector_width_short); 418 | printf(" Preferred vector width - int: %u\n", 419 | dinfo.preferred_vector_width_int); 420 | printf(" Preferred vector width - long: %u\n", 421 | dinfo.preferred_vector_width_long); 422 | printf(" Preferred vector width - float: %u\n", 423 | dinfo.preferred_vector_width_float); 424 | if (strstr(dinfo.extensions, "cl_khr_fp64") != NULL) 425 | printf(" Preferred vector width - double: %u\n", 426 | dinfo.preferred_vector_width_double); 427 | if (strstr(dinfo.extensions, "cl_khr_fp16") != NULL) 428 | printf(" Preferred vector width - half: %u\n", 429 | dinfo.preferred_vector_width_half); 430 | printf(" Profiling timer resolution: %lu\n", 431 | dinfo.profiling_timer_resolution); 432 | printf(" Queue properties: %s\n", 433 | dev_queue_properties_str(dinfo.queue_properties)); 434 | printf(" Sindle FP config: %s\n", 435 | dev_fp_config_str(dinfo.single_fp_config)); 436 | 437 | } 438 | } 439 | 440 | static void dump_platform(int index, cl_platform_id platform_id) 441 | { 442 | static struct { 443 | cl_platform_info info; 444 | size_t size; 445 | void *addr; 446 | } catalog[] = { 447 | PLATFORM_ATTR(CL_PLATFORM_PROFILE, profile), 448 | PLATFORM_ATTR(CL_PLATFORM_VERSION, version), 449 | PLATFORM_ATTR(CL_PLATFORM_NAME, name), 450 | PLATFORM_ATTR(CL_PLATFORM_VENDOR, vendor), 451 | PLATFORM_ATTR(CL_PLATFORM_EXTENSIONS, extensions), 452 | }; 453 | cl_device_id device_ids[256]; 454 | cl_uint device_num; 455 | cl_int i, rc; 456 | 457 | for (i=0; i < lengthof(catalog); i++) 458 | { 459 | rc = clGetPlatformInfo(platform_id, 460 | catalog[i].info, 461 | catalog[i].size, 462 | catalog[i].addr, 463 | NULL); 464 | if (rc != CL_SUCCESS) 465 | { 466 | fprintf(stderr, "failed on clGetPlatformInfo (%s)\n", 467 | opencl_strerror(rc)); 468 | exit(1); 469 | } 470 | } 471 | 472 | rc = clGetDeviceIDs(platform_id, 473 | CL_DEVICE_TYPE_ALL, 474 | lengthof(device_ids), 475 | device_ids, 476 | &device_num); 477 | if (rc != CL_SUCCESS) 478 | { 479 | fprintf(stderr, "failed on clGetDeviceIDs (%s)\n", 480 | opencl_strerror(rc)); 481 | exit(1); 482 | } 483 | 484 | if (only_list) 485 | printf("Platform-%02d: %s / %s - %s\n", index + 1, 486 | platform_info.vendor, 487 | platform_info.name, 488 | platform_info.version); 489 | else 490 | { 491 | printf("platform-index: %d\n", index + 1); 492 | printf("platform-vendor: %s\n", platform_info.vendor); 493 | printf("platform-name: %s\n", platform_info.name); 494 | printf("platform-version: %s\n", platform_info.version); 495 | printf("platform-profile: %s\n", platform_info.profile); 496 | printf("platform-extensions: %s\n", platform_info.extensions); 497 | } 498 | 499 | for (i=0; i < device_num; i++) 500 | { 501 | if (only_device < 0 || i + 1 == only_device) 502 | dump_device(i, device_ids[i]); 503 | } 504 | putchar('\n'); 505 | } 506 | 507 | int main(int argc, char *argv[]) 508 | { 509 | cl_platform_id platform_ids[32]; 510 | cl_uint platform_num; 511 | cl_int i, c, rc; 512 | 513 | while ((c = getopt(argc, argv, "lp:d:")) != -1) 514 | { 515 | switch (c) 516 | { 517 | case 'l': 518 | only_list = 1; 519 | break; 520 | case 'p': 521 | only_platform = atoi(optarg); 522 | break; 523 | case 'd': 524 | only_device = atoi(optarg); 525 | break; 526 | default: 527 | fprintf(stderr, 528 | "usage: %s [-l] [-p ] [-d ]\n", 529 | basename(argv[0])); 530 | return 1; 531 | } 532 | } 533 | 534 | rc = clGetPlatformIDs(lengthof(platform_ids), 535 | platform_ids, 536 | &platform_num); 537 | if (rc != CL_SUCCESS) 538 | { 539 | fprintf(stderr, "failed on clGetPlatformIDs (%s)", 540 | opencl_strerror(rc)); 541 | return 1; 542 | } 543 | 544 | for (i=0; i < platform_num; i++) 545 | { 546 | if (only_platform < 0 || i + 1 == only_platform) 547 | dump_platform(i, platform_ids[i]); 548 | } 549 | return 0; 550 | } 551 | -------------------------------------------------------------------------------- /gpustub.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define lengthof(array) (sizeof (array) / sizeof ((array)[0])) 8 | 9 | extern const char *opencl_strerror(cl_int errcode); 10 | 11 | static const char *kernel_source = 12 | "__kernel void\n" 13 | "kernel_test(__global uint *arg)\n" 14 | "{\n" 15 | " arg[get_global_id(0)] = (get_global_size(0) -\n" 16 | " get_global_id(0));\n" 17 | "}\n"; 18 | 19 | #define MEM_SIZE 2048 20 | 21 | typedef struct { 22 | cl_kernel kernel; 23 | cl_mem dmem; 24 | cl_event ev[4]; 25 | cl_uint hmem[MEM_SIZE]; 26 | } clstate_t; 27 | 28 | static void 29 | cb_kernel_complete(cl_event event, cl_int status, void *user_data) 30 | { 31 | clstate_t *clstate = user_data; 32 | int i; 33 | 34 | for (i=0; i < MEM_SIZE; i++) 35 | printf(" %u", clstate->hmem[i]); 36 | putchar('\n'); 37 | 38 | clReleaseKernel(clstate->kernel); 39 | clReleaseMemObject(clstate->dmem); 40 | clReleaseEvent(clstate->ev[0]); 41 | clReleaseEvent(clstate->ev[1]); 42 | clReleaseEvent(clstate->ev[2]); 43 | free(clstate); 44 | } 45 | 46 | static int 47 | run_opencl_kernel(cl_context context, cl_device_id device) 48 | { 49 | cl_command_queue cmdq; 50 | cl_program program; 51 | clstate_t *clstate; 52 | size_t gwork_sz = MEM_SIZE; 53 | size_t lwork_sz = MEM_SIZE / 4; 54 | size_t source_len = strlen(kernel_source); 55 | cl_int rc; 56 | 57 | cmdq = clCreateCommandQueue(context, device, 58 | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | 59 | CL_QUEUE_PROFILING_ENABLE, 60 | &rc); 61 | if (rc != CL_SUCCESS) 62 | { 63 | fprintf(stderr, "failed on clCreateCommandQueue (%s)\n", 64 | opencl_strerror(rc)); 65 | return 1; 66 | } 67 | 68 | program = clCreateProgramWithSource(context, 69 | 1, 70 | &kernel_source, 71 | &source_len, 72 | &rc); 73 | if (rc != CL_SUCCESS) 74 | { 75 | fprintf(stderr, "failed on clCreateProgramWithSource (%s)\n", 76 | opencl_strerror(rc)); 77 | return 1; 78 | } 79 | 80 | rc = clBuildProgram(program, 81 | 1, 82 | &device, 83 | NULL, 84 | NULL, 85 | NULL); 86 | if (rc != CL_SUCCESS) 87 | { 88 | fprintf(stderr, "failed on clBuildProgram(%s)\n", 89 | opencl_strerror(rc)); 90 | if (rc == CL_BUILD_PROGRAM_FAILURE ) 91 | { 92 | char buffer[65536]; 93 | 94 | rc = clGetProgramBuildInfo(program, 95 | device, 96 | CL_PROGRAM_BUILD_LOG, 97 | sizeof(buffer), 98 | buffer, 99 | NULL); 100 | if (rc == CL_SUCCESS) 101 | fputs(buffer, stderr); 102 | } 103 | return 1; 104 | } 105 | 106 | retry: 107 | clstate = malloc(sizeof(clstate_t)); 108 | if (!clstate) 109 | { 110 | fprintf(stderr, "out of memory"); 111 | return 1; 112 | } 113 | 114 | clstate->kernel = clCreateKernel(program, 115 | "kernel_test", 116 | &rc); 117 | if (rc != CL_SUCCESS) 118 | { 119 | fprintf(stderr, "failed on clCreateKernel (%s)", 120 | opencl_strerror(rc)); 121 | return 1; 122 | } 123 | 124 | clstate->dmem = clCreateBuffer(context, 125 | CL_MEM_READ_WRITE, 126 | sizeof(cl_uint) * MEM_SIZE, 127 | NULL, 128 | &rc); 129 | if (rc != CL_SUCCESS) 130 | { 131 | fprintf(stderr, "failed on clCreateBuffer (%s)", 132 | opencl_strerror(rc)); 133 | return 1; 134 | } 135 | 136 | rc = clSetKernelArg(clstate->kernel, 137 | 0, 138 | sizeof(cl_mem), 139 | &clstate->dmem); 140 | if (rc != CL_SUCCESS) 141 | { 142 | fprintf(stderr, "failed on clSetKernelArg (%s)", 143 | opencl_strerror(rc)); 144 | return 1; 145 | } 146 | 147 | /* OK, enqueue kernel */ 148 | rc = clEnqueueWriteBuffer(cmdq, 149 | clstate->dmem, 150 | CL_FALSE, 151 | 0, 152 | sizeof(cl_uint) * MEM_SIZE, 153 | clstate->hmem, 154 | 0, 155 | NULL, 156 | &clstate->ev[0]); 157 | if (rc != CL_SUCCESS) 158 | { 159 | fprintf(stderr, "failed on clEnqueueWriteBuffer (%s)", 160 | opencl_strerror(rc)); 161 | return 1; 162 | } 163 | 164 | rc = clEnqueueNDRangeKernel(cmdq, 165 | clstate->kernel, 166 | 1, 167 | NULL, 168 | &gwork_sz, 169 | &lwork_sz, 170 | 1, 171 | &clstate->ev[0], 172 | &clstate->ev[1]); 173 | if (rc != CL_SUCCESS) 174 | { 175 | fprintf(stderr, "failed on clEnqueueNDRangeKernel (%s)", 176 | opencl_strerror(rc)); 177 | return 1; 178 | } 179 | 180 | rc = clEnqueueReadBuffer(cmdq, 181 | clstate->dmem, 182 | CL_FALSE, 183 | 0, 184 | sizeof(cl_uint) * MEM_SIZE, 185 | clstate->hmem, 186 | 1, 187 | &clstate->ev[1], 188 | &clstate->ev[2]); 189 | if (rc != CL_SUCCESS) 190 | { 191 | fprintf(stderr, "failed on clEnqueueReadBuffer (%s)", 192 | opencl_strerror(rc)); 193 | return 1; 194 | } 195 | 196 | rc = clSetEventCallback(clstate->ev[2], 197 | CL_COMPLETE, 198 | cb_kernel_complete, 199 | clstate); 200 | if (rc != CL_SUCCESS) 201 | { 202 | fprintf(stderr, "failed on clSetEventCallback (%s)", 203 | opencl_strerror(rc)); 204 | return 1; 205 | } 206 | sleep(15); 207 | goto retry; 208 | } 209 | 210 | int main(int argc, char *argv[]) 211 | { 212 | cl_platform_id platforms[32]; 213 | cl_device_id devices[32]; 214 | cl_context context; 215 | cl_int num_platforms; 216 | cl_int num_devices; 217 | cl_int pindex = 0; 218 | cl_int dindex = 0; 219 | cl_int i, c, rc; 220 | 221 | while ((c = getopt(argc, argv, "p:d:")) != -1) 222 | { 223 | switch (c) 224 | { 225 | case 'p': 226 | pindex = atoi(optarg); 227 | break; 228 | case 'd': 229 | dindex = atoi(optarg); 230 | break; 231 | default: 232 | fprintf(stderr, "usage: %s [-p ] [-d ]\n", 233 | basename(argv[0])); 234 | return 1; 235 | } 236 | } 237 | opencl_entry_init(); 238 | 239 | rc = clGetPlatformIDs(lengthof(platforms), 240 | platforms, 241 | &num_platforms); 242 | if (rc != CL_SUCCESS) 243 | { 244 | fprintf(stderr, "failed on clGetPlatformIDs (%s)\n", 245 | opencl_strerror(rc)); 246 | return 1; 247 | } 248 | if (pindex < 0 || pindex >= num_platforms) 249 | { 250 | fprintf(stderr, "platform (%d) is not valid\n", pindex); 251 | return 1; 252 | } 253 | 254 | rc = clGetDeviceIDs(platforms[pindex], 255 | CL_DEVICE_TYPE_ALL, 256 | lengthof(devices), 257 | devices, 258 | &num_devices); 259 | if (rc != CL_SUCCESS) 260 | { 261 | fprintf(stderr, "failed on clGetDeviceIDs (%s)\n", 262 | opencl_strerror(rc)); 263 | return 1; 264 | } 265 | if (dindex < 0 || dindex >= num_devices) 266 | { 267 | fprintf(stderr, "device (%d) is not valid\n", dindex); 268 | return 1; 269 | } 270 | 271 | context = clCreateContext(NULL, 272 | num_devices, 273 | devices, 274 | NULL, 275 | NULL, 276 | &rc); 277 | if (rc != CL_SUCCESS) 278 | { 279 | fprintf(stderr, "failed to create opencl context (%s)", 280 | opencl_strerror(rc)); 281 | return 1; 282 | } 283 | return run_opencl_kernel(context, devices[dindex]); 284 | } 285 | -------------------------------------------------------------------------------- /gputest.c: -------------------------------------------------------------------------------- 1 | /* 2 | * gputest.c - test module for OpenCL/CUDA functionalities 3 | */ 4 | //#define GPUTEST_CUDA 1 5 | #define GPUTEST_OPENCL 1 6 | 7 | #include "postgres.h" 8 | #include "fmgr.h" 9 | #include "miscadmin.h" 10 | #include "storage/bufmgr.h" 11 | #include "storage/ipc.h" 12 | #include 13 | #ifdef GPUTEST_CUDA 14 | #include 15 | #endif 16 | #ifdef GPUTEST_OPENCL 17 | #include 18 | #endif 19 | 20 | PG_MODULE_MAGIC; 21 | 22 | /* declarations */ 23 | extern Datum gputest_init_opencl(PG_FUNCTION_ARGS); 24 | extern Datum gputest_dmasend_opencl(PG_FUNCTION_ARGS); 25 | extern Datum gputest_cleanup_opencl(PG_FUNCTION_ARGS); 26 | extern void _PG_init(void); 27 | 28 | static shmem_startup_hook_type shmem_startup_hook_next; 29 | 30 | #define TIMEVAL_DIFF(tv2,tv1) \ 31 | (((double)((tv2)->tv_sec * 1000000L + (tv2)->tv_usec) - \ 32 | (double)((tv1)->tv_sec * 1000000L + (tv1)->tv_usec)) / 1000000.0) 33 | 34 | #ifdef GPUTEST_CUDA 35 | static bool cuda_initialized = false; 36 | static CUdevice cuda_device; 37 | static CUcontext cuda_context = NULL; 38 | 39 | /* why CUDA 6.5 lacks declaration? */ 40 | extern CUresult cuGetErrorString(CUresult error, const char** pStr); 41 | 42 | static const char * 43 | cuda_strerror(CUresult rc) 44 | { 45 | static char buffer[256]; 46 | const char *result; 47 | 48 | if (cuGetErrorString(rc, &result) != CUDA_SUCCESS) 49 | { 50 | snprintf(buffer, sizeof(buffer), "cuda error (%d)", rc); 51 | return buffer; 52 | } 53 | return result; 54 | } 55 | #endif 56 | #ifdef GPUTEST_OPENCL 57 | static cl_platform_id opencl_platform_id; 58 | static cl_device_id opencl_device_id; 59 | static cl_context opencl_context = NULL; 60 | #endif 61 | 62 | Datum 63 | gputest_init_opencl(PG_FUNCTION_ARGS) 64 | { 65 | #ifdef GPUTEST_CUDA 66 | CUresult rc; 67 | struct timeval tv1, tv2; 68 | 69 | if (!cuda_initialized) 70 | { 71 | rc = cuInit(0); 72 | if (rc != CUDA_SUCCESS) 73 | elog(ERROR, "failed on cuInit: %s", cuda_strerror(rc)); 74 | cuda_initialized = true; 75 | } 76 | 77 | if (!cuda_context) 78 | { 79 | rc = cuDeviceGet(&cuda_device, 0); 80 | if (rc != CUDA_SUCCESS) 81 | elog(ERROR, "failed on cuDeviceGet: %s", cuda_strerror(rc)); 82 | 83 | rc = cuCtxCreate(&cuda_context, 0, cuda_device); 84 | if (rc != CUDA_SUCCESS) 85 | elog(ERROR, "failed on cuCtxCreate: %s", cuda_strerror(rc)); 86 | 87 | rc = cuCtxSetCurrent(cuda_context); 88 | if (rc != CUDA_SUCCESS) 89 | elog(ERROR, "failed on cuCtxSetCurrent: %s", cuda_strerror(rc)); 90 | 91 | gettimeofday(&tv1, NULL); 92 | rc = cuMemHostRegister(BufferBlocks, NBuffers * (Size) BLCKSZ, 93 | CU_MEMHOSTREGISTER_PORTABLE); 94 | if (rc != CUDA_SUCCESS) 95 | elog(ERROR, "cuMemHostRegister: %s", cuda_strerror(rc)); 96 | gettimeofday(&tv2, NULL); 97 | elog(INFO, "cuMemHostRegister takes %.2fsec to map %zuGB", 98 | TIMEVAL_DIFF(&tv2, &tv1), ((Size)NBuffers * (Size) BLCKSZ) >> 30); 99 | } 100 | else 101 | elog(INFO, "we already have cuda context"); 102 | #endif 103 | #ifdef GPUTEST_OPENCL 104 | cl_int rc; 105 | struct timeval tv1, tv2; 106 | 107 | if (!opencl_context) 108 | { 109 | rc = clGetPlatformIDs(1, &opencl_platform_id, NULL); 110 | if (rc != CL_SUCCESS) 111 | elog(ERROR, "failed on clGetPlatformIDs: %d", rc); 112 | 113 | rc = clGetDeviceIDs(opencl_platform_id, 114 | CL_DEVICE_TYPE_ALL, 115 | 1, 116 | &opencl_device_id, 117 | NULL); 118 | if (rc != CL_SUCCESS) 119 | elog(ERROR, "failed on clGetDeviceIDs: %d", rc); 120 | 121 | opencl_context = clCreateContext(NULL, 122 | 1, 123 | &opencl_device_id, 124 | NULL, 125 | NULL, 126 | &rc); 127 | if (rc != CL_SUCCESS) 128 | elog(ERROR, "failed on clCreateContext: %d", rc); 129 | } 130 | gettimeofday(&tv1, NULL); 131 | (void) clCreateBuffer(opencl_context, 132 | CL_MEM_READ_WRITE | 133 | CL_MEM_USE_HOST_PTR, 134 | NBuffers * (Size) BLCKSZ, 135 | BufferBlocks, 136 | &rc); 137 | if (rc != CL_SUCCESS) 138 | elog(ERROR, "failed on clCreateBuffer: %d", rc); 139 | gettimeofday(&tv2, NULL); 140 | elog(LOG, "clCreateBuffer takes %.2fsec to map %zuGB", 141 | TIMEVAL_DIFF(&tv2, &tv1), ((Size)NBuffers * (Size) BLCKSZ) >> 30); 142 | #endif 143 | PG_RETURN_NULL(); 144 | } 145 | PG_FUNCTION_INFO_V1(gputest_init_opencl); 146 | 147 | Datum 148 | gputest_dmasend_opencl(PG_FUNCTION_ARGS) 149 | { 150 | #ifdef GPUTEST_CUDA 151 | CUdevice device; 152 | CUcontext context; 153 | CUstream stream; 154 | CUdeviceptr daddr; 155 | CUevent start; 156 | CUevent stop; 157 | CUresult rc; 158 | int loop; 159 | float elapsed; 160 | Size unitsz = 100 * 1024 * 1024; //100MB 161 | Size offset; 162 | 163 | if (!cuda_initialized) 164 | { 165 | rc = cuInit(0); 166 | if (rc != CUDA_SUCCESS) 167 | elog(ERROR, "failed on cuInit: %s", cuda_strerror(rc)); 168 | cuda_initialized = true; 169 | } 170 | 171 | rc = cuDeviceGet(&device, 0); 172 | if (rc != CUDA_SUCCESS) 173 | elog(ERROR, "failed on cuDeviceGet: %s", cuda_strerror(rc)); 174 | 175 | rc = cuCtxCreate(&context, 0, device); 176 | if (rc != CUDA_SUCCESS) 177 | elog(ERROR, "failed on cuCtxCreate: %s", cuda_strerror(rc)); 178 | 179 | rc = cuCtxSetCurrent(context); 180 | if (rc != CUDA_SUCCESS) 181 | elog(ERROR, "failed on cuCtxSetCurrent: %s", cuda_strerror(rc)); 182 | 183 | rc = cuStreamCreate(&stream, CU_STREAM_DEFAULT); 184 | if (rc != CUDA_SUCCESS) 185 | elog(ERROR, "failed on cuStreamCreate: %s", cuda_strerror(rc)); 186 | 187 | rc = cuMemAlloc(&daddr, unitsz); 188 | if (rc != CUDA_SUCCESS) 189 | elog(ERROR, "failed on cuMemAlloc: %s", cuda_strerror(rc)); 190 | 191 | rc = cuEventCreate(&start, CU_EVENT_DEFAULT); 192 | if (rc != CUDA_SUCCESS) 193 | elog(ERROR, "failed on cuEventCreate: %s", cuda_strerror(rc)); 194 | 195 | rc = cuEventCreate(&stop, CU_EVENT_DEFAULT); 196 | if (rc != CUDA_SUCCESS) 197 | elog(ERROR, "failed on cuEventCreate: %s", cuda_strerror(rc)); 198 | 199 | rc = cuEventRecord(start, stream); 200 | if (rc != CUDA_SUCCESS) 201 | elog(ERROR, "failed on cuEventRecord: %s", cuda_strerror(rc)); 202 | 203 | for (loop = 0; loop < 1; loop++) 204 | { 205 | for (offset = 0; 206 | offset < NBuffers * (Size) BLCKSZ - unitsz; 207 | offset += unitsz) 208 | { 209 | rc = cuMemcpyHtoDAsync(daddr, 210 | BufferBlocks + offset, 211 | unitsz, 212 | stream); 213 | if (rc != CUDA_SUCCESS) 214 | elog(ERROR, "failed on cuMemcpyHtoDAsync: %s", 215 | cuda_strerror(rc)); 216 | } 217 | } 218 | rc = cuEventRecord(stop, stream); 219 | if (rc != CUDA_SUCCESS) 220 | elog(ERROR, "failed on cuEventRecord: %s", cuda_strerror(rc)); 221 | 222 | rc = cuStreamSynchronize(stream); 223 | if (rc != CUDA_SUCCESS) 224 | elog(ERROR, "failed on cuStreamSynchronize: %s", cuda_strerror(rc)); 225 | 226 | rc = cuEventElapsedTime (&elapsed, start, stop); 227 | if (rc != CUDA_SUCCESS) 228 | elog(ERROR, "failed on cuEventElapsedTime: %s", cuda_strerror(rc)); 229 | elapsed /= 1000000.0; /* sec */ 230 | 231 | rc = cuMemFree(daddr); 232 | if (rc != CUDA_SUCCESS) 233 | elog(ERROR, "failed on cuMemFree: %s", cuda_strerror(rc)); 234 | 235 | elog(INFO, "%zu GB DMA took %.2f sec (%.2f GB/sec)", 236 | (loop * NBuffers * (Size) BLCKSZ) >> 30, 237 | elapsed, 238 | (double)((loop * NBuffers * (Size) BLCKSZ) >> 30) / elapsed); 239 | 240 | rc = cuCtxDestroy(context); 241 | if (rc != CUDA_SUCCESS) 242 | elog(ERROR, "failed on cuCtxDestroy: %s", cuda_strerror(rc)); 243 | #endif 244 | #ifdef GPUTEST_OPENCL 245 | cl_int rc; 246 | struct timeval tv1, tv2; 247 | 248 | if (!opencl_context) 249 | { 250 | rc = clGetPlatformIDs(1, &opencl_platform_id, NULL); 251 | if (rc != CL_SUCCESS) 252 | elog(ERROR, "failed on clGetPlatformIDs: %d", rc); 253 | 254 | rc = clGetDeviceIDs(opencl_platform_id, 255 | CL_DEVICE_TYPE_ALL, 256 | 1, 257 | &opencl_device_id, 258 | NULL); 259 | if (rc != CL_SUCCESS) 260 | elog(ERROR, "failed on clGetDeviceIDs: %d", rc); 261 | 262 | opencl_context = clCreateContext(NULL, 263 | 1, 264 | &opencl_device_id, 265 | NULL, 266 | NULL, 267 | &rc); 268 | if (rc != CL_SUCCESS) 269 | elog(ERROR, "failed on clCreateContext: %d", rc); 270 | } 271 | gettimeofday(&tv1, NULL); 272 | (void) clCreateBuffer(opencl_context, 273 | CL_MEM_READ_WRITE | 274 | CL_MEM_USE_HOST_PTR, 275 | NBuffers * (Size) BLCKSZ, 276 | BufferBlocks, 277 | &rc); 278 | if (rc != CL_SUCCESS) 279 | elog(ERROR, "failed on clCreateBuffer: %d", rc); 280 | gettimeofday(&tv2, NULL); 281 | elog(LOG, "clCreateBuffer takes %.2fsec to map %zuGB", 282 | TIMEVAL_DIFF(&tv2, &tv1), ((Size)NBuffers * (Size) BLCKSZ) >> 30); 283 | #endif 284 | PG_RETURN_NULL(); 285 | } 286 | PG_FUNCTION_INFO_V1(gputest_dmasend_opencl); 287 | 288 | Datum 289 | gputest_cleanup_opencl(PG_FUNCTION_ARGS) 290 | { 291 | #ifdef GPUTEST_CUDA 292 | CUresult rc; 293 | 294 | if (!cuda_initialized) 295 | { 296 | rc = cuInit(0); 297 | if (rc != CUDA_SUCCESS) 298 | elog(ERROR, "failed on cuInit: %s", cuda_strerror(rc)); 299 | cuda_initialized = true; 300 | } 301 | 302 | if (cuda_context) 303 | { 304 | rc = cuCtxDestroy(cuda_context); 305 | if (rc != CUDA_SUCCESS) 306 | elog(ERROR, "failed on cuCtxDestroy: %s", cuda_strerror(rc)); 307 | cuda_context = NULL; 308 | } 309 | else 310 | elog(INFO, "no cuda context exists"); 311 | #endif 312 | #ifdef GPUTEST_OPENCL 313 | 314 | 315 | 316 | #endif 317 | PG_RETURN_NULL(); 318 | } 319 | PG_FUNCTION_INFO_V1(gputest_cleanup_opencl); 320 | 321 | static void 322 | gputest_init(void) 323 | { 324 | #ifdef GPUTEST_CUDA 325 | int n_devices; 326 | CUdevice devices[10]; 327 | CUcontext context; 328 | CUresult rc; 329 | struct timeval tv1, tv2; 330 | 331 | if (shmem_startup_hook_next) 332 | (*shmem_startup_hook_next)(); 333 | 334 | elog(LOG, "Loading GPU Tests"); 335 | 336 | rc = cuInit(0); 337 | if (rc != CUDA_SUCCESS) 338 | elog(ERROR, "failed on cuInit: %s", cuda_strerror(rc)); 339 | 340 | rc = cuDeviceGetCount(&n_devices); 341 | if (rc != CUDA_SUCCESS) 342 | elog(ERROR, "failed on cuDeviceGetCount: %s", cuda_strerror(rc)); 343 | if (n_devices < 1) 344 | elog(ERROR, "no cuda device found"); 345 | 346 | rc = cuDeviceGet(devices, 0); 347 | if (rc != CUDA_SUCCESS) 348 | elog(ERROR, "failed on cuDeviceGet: %s", cuda_strerror(rc)); 349 | 350 | rc = cuCtxCreate(&context, 0, devices[0]); 351 | if (rc != CUDA_SUCCESS) 352 | elog(ERROR, "failed on cuCtxCreate: %s", cuda_strerror(rc)); 353 | 354 | rc = cuCtxSetCurrent(context); 355 | if (rc != CUDA_SUCCESS) 356 | elog(ERROR, "failed on cuCtxSetCurrent: %s", cuda_strerror(rc)); 357 | 358 | elog(LOG, "%p %zu", BufferBlocks, NBuffers * (Size) BLCKSZ); 359 | gettimeofday(&tv1, NULL); 360 | rc = cuMemHostRegister(BufferBlocks, NBuffers * (Size) BLCKSZ, 361 | CU_MEMHOSTREGISTER_PORTABLE); 362 | if (rc != CUDA_SUCCESS) 363 | elog(ERROR, "cuMemHostRegister: %s", cuda_strerror(rc)); 364 | gettimeofday(&tv2, NULL); 365 | elog(LOG, "cuMemHostRegister takes %.2fsec to map %zuGB", 366 | TIMEVAL_DIFF(&tv2, &tv1), ((Size)NBuffers * (Size) BLCKSZ) >> 30); 367 | #endif 368 | #ifdef GPUTEST_OPENCL 369 | cl_platform_id platform_id; 370 | cl_device_id device_id; 371 | cl_context context; 372 | cl_int rc; 373 | struct timeval tv1, tv2; 374 | 375 | rc = clGetPlatformIDs(1, &platform_id, NULL); 376 | if (rc != CL_SUCCESS) 377 | elog(ERROR, "failed on clGetPlatformIDs: %d", rc); 378 | 379 | rc = clGetDeviceIDs(platform_id, 380 | CL_DEVICE_TYPE_ALL, 381 | 1, 382 | &device_id, 383 | NULL); 384 | if (rc != CL_SUCCESS) 385 | elog(ERROR, "failed on clGetDeviceIDs: %d", rc); 386 | 387 | context = clCreateContext(NULL, 388 | 1, 389 | &device_id, 390 | NULL, 391 | NULL, 392 | &rc); 393 | if (rc != CL_SUCCESS) 394 | elog(ERROR, "failed on clCreateContext: %d", rc); 395 | 396 | gettimeofday(&tv1, NULL); 397 | (void) clCreateBuffer(context, 398 | CL_MEM_READ_WRITE | 399 | CL_MEM_USE_HOST_PTR, 400 | NBuffers * (Size) BLCKSZ, 401 | BufferBlocks, 402 | &rc); 403 | if (rc != CL_SUCCESS) 404 | elog(ERROR, "failed on clCreateBuffer: %d", rc); 405 | gettimeofday(&tv2, NULL); 406 | elog(LOG, "clCreateBuffer takes %.2fsec to map %zuGB", 407 | TIMEVAL_DIFF(&tv2, &tv1), ((Size)NBuffers * (Size) BLCKSZ) >> 30); 408 | #endif 409 | } 410 | 411 | void 412 | _PG_init(void) 413 | { 414 | if (!process_shared_preload_libraries_in_progress) 415 | elog(ERROR, "gputest must be loaded via shared_preload_libraries"); 416 | 417 | shmem_startup_hook_next = shmem_startup_hook; 418 | shmem_startup_hook = gputest_init; 419 | } 420 | -------------------------------------------------------------------------------- /memeat.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | static int usage(int argc, char * const argv[]) 9 | { 10 | printf("usage: %s -s \n", argv[0]); 11 | 12 | exit(0); 13 | } 14 | 15 | int main(int argc, char * const argv[]) 16 | { 17 | size_t size = (1UL << 30); /* 1GB */ 18 | char *buffer; 19 | int c; 20 | 21 | while ((c = getopt(argc, argv, "s:")) != -1) 22 | { 23 | if (c == 's') 24 | { 25 | int i, unit; 26 | 27 | for (i=0; optarg[i] >= '0' && optarg[i] <= '9'; i++); 28 | 29 | if (strcasecmp(optarg + i, "") == 0) 30 | size = atol(optarg); 31 | else if (strcasecmp(optarg + i, "k") == 0) 32 | size = atol(optarg) << 10; 33 | else if (strcasecmp(optarg + i, "m") == 0) 34 | size = atol(optarg) << 20; 35 | else if (strcasecmp(optarg + i, "g") == 0) 36 | size = atol(optarg) << 30; 37 | else 38 | usage(argc, argv); 39 | } 40 | else 41 | usage(argc, argv); 42 | } 43 | 44 | /* memory allocation */ 45 | buffer = malloc(size); 46 | if (!buffer) 47 | { 48 | printf("failed to allocate %lu bytes : %s\n", size, strerror(errno)); 49 | return 1; 50 | } 51 | memset(buffer, 0, size); 52 | 53 | /* memory pinning */ 54 | if (mlockall(MCL_FUTURE) != 0) 55 | { 56 | printf("failed to lock memory : %s\n", strerror(errno)); 57 | return 1; 58 | } 59 | printf("OK, %s allocated and pinned %lu bytes\n", argv[0], size); 60 | 61 | /* infinite sleep */ 62 | for (;;) 63 | sleep(60); 64 | 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /nvinfo.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | static void 6 | __ereport(const char *func_name, int lineno, 7 | CUresult errcode, const char *msg) 8 | { 9 | const char *err_name; 10 | const char *err_str; 11 | 12 | cuGetErrorName(errcode, &err_name); 13 | cuGetErrorString(errcode, &err_str); 14 | 15 | fprintf(stderr, "%s:%d %s (%s:%s)\n", 16 | func_name, lineno, msg, err_name, err_str); 17 | exit(0); 18 | } 19 | #define ereport(errcode,msg) \ 20 | __ereport(__FUNCTION__,__LINE__,(errcode),(msg)) 21 | 22 | #define lengthof(array) (sizeof (array) / sizeof ((array)[0])) 23 | 24 | #define ATTR_INT 0 25 | #define ATTR_BYTES 1 26 | #define ATTR_KB 2 27 | #define ATTR_MB 3 28 | #define ATTR_KHZ 4 29 | #define ATTR_COMPUTEMODE 5 30 | #define ATTR_BOOL 6 31 | 32 | static struct { 33 | CUdevice_attribute attnum; 34 | int atttype; 35 | const char *attname; 36 | } attr_catalog[] = { 37 | {CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, 38 | ATTR_INT, "Max # of threads per block"}, 39 | {CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, 40 | ATTR_INT, "Max block dimension X"}, 41 | {CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, 42 | ATTR_INT, "Max block dimension Y"}, 43 | {CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, 44 | ATTR_INT, "Max block dimension Z"}, 45 | {CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, 46 | ATTR_INT, "Max grid dimension X"}, 47 | {CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, 48 | ATTR_INT, "Max grid dimension Y"}, 49 | {CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, 50 | ATTR_INT, "Max grid dimension Z"}, 51 | {CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, 52 | ATTR_BYTES, "Max shared memory per block in bytes"}, 53 | {CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, 54 | ATTR_BYTES, "Total constant memory"}, 55 | {CU_DEVICE_ATTRIBUTE_WARP_SIZE, 56 | ATTR_INT, "Warp size"}, 57 | {CU_DEVICE_ATTRIBUTE_MAX_PITCH, 58 | ATTR_INT, "Max pitch"}, 59 | {CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, 60 | ATTR_INT, "Max registers per block"}, 61 | {CU_DEVICE_ATTRIBUTE_CLOCK_RATE, 62 | ATTR_KHZ, "Clock rate [kHZ]"}, 63 | {CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, 64 | ATTR_INT, "Texture alignment"}, 65 | {CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, 66 | ATTR_INT, "Number of multiprocessors"}, 67 | {CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, 68 | ATTR_BOOL, "Has kernel execution timeout"}, 69 | {CU_DEVICE_ATTRIBUTE_INTEGRATED, 70 | ATTR_BOOL, "Host integrated memory"}, 71 | {CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, 72 | ATTR_BOOL, "Host memory mapping to device"}, 73 | {CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, 74 | ATTR_COMPUTEMODE, "Compute mode"}, 75 | {CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT, 76 | ATTR_INT, "Surface alignment"}, 77 | {CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, 78 | ATTR_BOOL, "Concurrent kernels"}, 79 | {CU_DEVICE_ATTRIBUTE_ECC_ENABLED, 80 | ATTR_BOOL, "ECC memory is supported"}, 81 | {CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, 82 | ATTR_INT, "PCI Bus ID"}, 83 | {CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, 84 | ATTR_INT, "PCI Device ID"}, 85 | {CU_DEVICE_ATTRIBUTE_TCC_DRIVER, 86 | ATTR_BOOL, "TCC driver model"}, 87 | {CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, 88 | ATTR_KHZ, "Peak memory clock rate"}, 89 | {CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, 90 | ATTR_INT, "Global memory bus width"}, 91 | {CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, 92 | ATTR_INT, "L2 cache size"}, 93 | {CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, 94 | ATTR_INT, "Max threads per multiprocessor"}, 95 | {CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, 96 | ATTR_INT, "Number of asynchronous engines"}, 97 | {CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, 98 | ATTR_BOOL, "Unified address space support"}, 99 | {CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, 100 | ATTR_INT, "PCI domain ID"}, 101 | {CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, 102 | ATTR_INT, "Compute Capability Major"}, 103 | {CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, 104 | ATTR_INT, "Compute Capability Minor"}, 105 | {CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED, 106 | ATTR_BOOL, "Stream priorities supported"}, 107 | {CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED, 108 | ATTR_BOOL, "L1 cache on global memory"}, 109 | {CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED, 110 | ATTR_BOOL, "L1 cache on local memory"}, 111 | {CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, 112 | ATTR_BYTES, "Max shared memory per multiprocessor"}, 113 | {CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR, 114 | ATTR_INT, "Max # of 32bit registers per multiprocessor"}, 115 | {CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, 116 | ATTR_BOOL, "Can allocate managed memory"}, 117 | {CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD, 118 | ATTR_BOOL, "Device is on a multi-GPU board"}, 119 | {CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID, 120 | ATTR_INT, "Unique id of the device if multi-GPU board"}, 121 | {CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, 122 | ATTR_INT, "Max threads per block"}, 123 | }; 124 | 125 | int 126 | main(int argc, const char *argv[]) 127 | { 128 | CUdevice device; 129 | CUresult rc; 130 | int i, j, count; 131 | const char *label; 132 | 133 | rc = cuInit(0); 134 | if (rc != CUDA_SUCCESS) 135 | ereport(rc, "failed on cuInit"); 136 | 137 | rc = cuDeviceGetCount(&count); 138 | if (rc != CUDA_SUCCESS) 139 | ereport(rc, "failed on cuDeviceGetCount"); 140 | 141 | for (i = 0; i < count; i++) 142 | { 143 | char dev_name[256]; 144 | size_t dev_memsz; 145 | int dev_prop; 146 | 147 | rc = cuDeviceGet(&device, i); 148 | if (rc != CUDA_SUCCESS) 149 | ereport(rc, "failed on cuDeviceGet"); 150 | 151 | rc = cuDeviceGetName(dev_name, sizeof(dev_name), device); 152 | if (rc != CUDA_SUCCESS) 153 | ereport(rc, "failed on cuDeviceGetName"); 154 | printf("device name: %s\n", dev_name); 155 | 156 | rc = cuDeviceTotalMem(&dev_memsz, device); 157 | if (rc != CUDA_SUCCESS) 158 | ereport(rc, "failed on cuDeviceTotalMem"); 159 | printf("global memory size: %zuMB\n", dev_memsz >> 20); 160 | 161 | for (j=0; j < lengthof(attr_catalog); j++) 162 | { 163 | const char *attname = attr_catalog[j].attname; 164 | int attnum = attr_catalog[j].attnum; 165 | int atttype = attr_catalog[j].atttype; 166 | 167 | rc = cuDeviceGetAttribute(&dev_prop, attnum, device); 168 | if (rc != CUDA_SUCCESS) 169 | ereport(rc, "failed on cuDeviceGetAttribute"); 170 | switch (atttype) 171 | { 172 | case ATTR_BYTES: 173 | printf("%s: %d\n", attname, dev_prop); 174 | break; 175 | case ATTR_KB: 176 | printf("%s: %dkB\n", attname, dev_prop); 177 | break; 178 | case ATTR_MB: 179 | printf("%s: %dMB\n", attname, dev_prop); 180 | break; 181 | case ATTR_KHZ: 182 | printf("%s: %dkHZ\n", attname, dev_prop); 183 | break; 184 | case ATTR_COMPUTEMODE: 185 | switch (dev_prop) 186 | { 187 | case CU_COMPUTEMODE_DEFAULT: 188 | label = "default"; 189 | break; 190 | case CU_COMPUTEMODE_EXCLUSIVE: 191 | label = "exclusive"; 192 | break; 193 | case CU_COMPUTEMODE_PROHIBITED: 194 | label = "prohibited"; 195 | break; 196 | case CU_COMPUTEMODE_EXCLUSIVE_PROCESS: 197 | label = "exclusive process"; 198 | break; 199 | default: 200 | label = "unknown"; 201 | break; 202 | } 203 | printf("%s: %s\n", attname, label); 204 | break; 205 | case ATTR_BOOL: 206 | printf("%s: %s\n", attname, dev_prop ? "true" : "false"); 207 | break; 208 | default: 209 | printf("%s: %d\n", attname, dev_prop); 210 | break; 211 | } 212 | } 213 | } 214 | return 0; 215 | } 216 | -------------------------------------------------------------------------------- /opencl_entry.c: -------------------------------------------------------------------------------- 1 | /* 2 | * opencl_entry.c 3 | * 4 | * Entrypoint of OpenCL interfaces that should be resolved and linked 5 | * at run-time. 6 | * 7 | * -- 8 | * Copyright 2013 (c) PG-Strom Development Team 9 | * Copyright 2011-2012 (c) KaiGai Kohei 10 | * 11 | * This software is an extension of PostgreSQL; You can use, copy, 12 | * modify or distribute it under the terms of 'LICENSE' included 13 | * within this package. 14 | */ 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | /* 22 | * Init opencl stubs 23 | */ 24 | static void *opencl_library_handle = NULL; 25 | 26 | void 27 | opencl_entry_init(void) 28 | { 29 | opencl_library_handle = dlopen("libOpenCL.so", RTLD_NOW | RTLD_LOCAL); 30 | if (!opencl_library_handle) 31 | { 32 | fprintf(stderr, "could not open OpenCL library: %s\n", 33 | dlerror()); 34 | exit(1); 35 | } 36 | } 37 | 38 | static void * 39 | get_opencl_function(const char *func_name) 40 | { 41 | void *func_addr; 42 | 43 | assert(opencl_library_handle != NULL); 44 | 45 | func_addr = dlsym(opencl_library_handle, func_name); 46 | if (!func_addr) 47 | { 48 | fprintf(stderr, "could not find symbol \"%s\" : %s\n", 49 | func_name, dlerror()); 50 | exit(1); 51 | } 52 | return func_addr; 53 | } 54 | 55 | /* 56 | * Query Platform Info 57 | */ 58 | cl_int 59 | clGetPlatformIDs(cl_uint num_entries, 60 | cl_platform_id *platforms, 61 | cl_uint *num_platforms) 62 | { 63 | static cl_int (*p_clGetPlatformIDs)(cl_uint num_entries, 64 | cl_platform_id *platforms, 65 | cl_uint *num_platforms) = NULL; 66 | if (!p_clGetPlatformIDs) 67 | p_clGetPlatformIDs = get_opencl_function("clGetPlatformIDs"); 68 | 69 | return (*p_clGetPlatformIDs)(num_entries, 70 | platforms, 71 | num_platforms); 72 | } 73 | 74 | cl_int 75 | clGetPlatformInfo(cl_platform_id platform, 76 | cl_platform_info param_name, 77 | size_t param_value_size, 78 | void *param_value, 79 | size_t *param_value_size_ret) 80 | { 81 | static cl_int (*p_clGetPlatformInfo)(cl_platform_id platform, 82 | cl_platform_info param_name, 83 | size_t param_value_size, 84 | void *param_value, 85 | size_t *param_value_size_ret) = NULL; 86 | if (!p_clGetPlatformInfo) 87 | p_clGetPlatformInfo = get_opencl_function("clGetPlatformInfo"); 88 | 89 | return (*p_clGetPlatformInfo)(platform, 90 | param_name, 91 | param_value_size, 92 | param_value, 93 | param_value_size_ret); 94 | } 95 | 96 | /* 97 | * Query Devices 98 | */ 99 | cl_int 100 | clGetDeviceIDs(cl_platform_id platform, 101 | cl_device_type device_type, 102 | cl_uint num_entries, 103 | cl_device_id *devices, 104 | cl_uint *num_devices) 105 | { 106 | static cl_int (*p_clGetDeviceIDs)(cl_platform_id platform, 107 | cl_device_type device_type, 108 | cl_uint num_entries, 109 | cl_device_id *devices, 110 | cl_uint *num_devices) = NULL; 111 | if (!p_clGetDeviceIDs) 112 | p_clGetDeviceIDs = get_opencl_function("clGetDeviceIDs"); 113 | 114 | return (*p_clGetDeviceIDs)(platform, 115 | device_type, 116 | num_entries, 117 | devices, 118 | num_devices); 119 | } 120 | 121 | cl_int 122 | clGetDeviceInfo(cl_device_id device, 123 | cl_device_info param_name, 124 | size_t param_value_size, 125 | void *param_value, 126 | size_t *param_value_size_ret) 127 | { 128 | static cl_int (*p_clGetDeviceInfo)(cl_device_id device, 129 | cl_device_info param_name, 130 | size_t param_value_size, 131 | void *param_value, 132 | size_t *param_value_size_ret) = NULL; 133 | if (!p_clGetDeviceInfo) 134 | p_clGetDeviceInfo = get_opencl_function("clGetDeviceInfo"); 135 | 136 | return (*p_clGetDeviceInfo)(device, 137 | param_name, 138 | param_value_size, 139 | param_value, 140 | param_value_size_ret); 141 | } 142 | 143 | cl_context 144 | clCreateContext(const cl_context_properties *properties, 145 | cl_uint num_devices, 146 | const cl_device_id *devices, 147 | void (CL_CALLBACK *pfn_notify)( 148 | const char *errinfo, 149 | const void *private_info, 150 | size_t cb, 151 | void *user_data), 152 | void *user_data, 153 | cl_int *errcode_ret) 154 | { 155 | static cl_context (*p_clCreateContext)( 156 | const cl_context_properties *properties, 157 | cl_uint num_devices, 158 | const cl_device_id *devices, 159 | void (CL_CALLBACK *pfn_notify)( 160 | const char *errinfo, 161 | const void *private_info, 162 | size_t cb, 163 | void *user_data), 164 | void *user_data, 165 | cl_int *errcode_ret) = NULL; 166 | 167 | if (!p_clCreateContext) 168 | p_clCreateContext = get_opencl_function("clCreateContext"); 169 | 170 | return (*p_clCreateContext)(properties, 171 | num_devices, 172 | devices, 173 | pfn_notify, 174 | user_data, 175 | errcode_ret); 176 | } 177 | 178 | cl_int 179 | clReleaseContext(cl_context context) 180 | { 181 | static cl_int (*p_clReleaseContext)(cl_context) = NULL; 182 | 183 | if (!p_clReleaseContext) 184 | p_clReleaseContext = get_opencl_function("clReleaseContext"); 185 | 186 | return (*p_clReleaseContext)(context); 187 | } 188 | 189 | cl_program 190 | clCreateProgramWithSource(cl_context context, 191 | cl_uint count, 192 | const char **strings, 193 | const size_t *lengths, 194 | cl_int *errcode_ret) 195 | { 196 | static cl_program (*p_clCreateProgramWithSource)( 197 | cl_context context, 198 | cl_uint count, 199 | const char **strings, 200 | const size_t *lengths, 201 | cl_int *errcode_ret) = NULL; 202 | 203 | if (!p_clCreateProgramWithSource) 204 | p_clCreateProgramWithSource 205 | = get_opencl_function("clCreateProgramWithSource"); 206 | 207 | return (*p_clCreateProgramWithSource)(context, 208 | count, 209 | strings, 210 | lengths, 211 | errcode_ret); 212 | } 213 | 214 | cl_int 215 | clReleaseProgram(cl_program program) 216 | { 217 | static cl_int (*p_clReleaseProgram)(cl_program program) = NULL; 218 | 219 | if (!p_clReleaseProgram) 220 | p_clReleaseProgram = get_opencl_function("clReleaseProgram"); 221 | 222 | return (*p_clReleaseProgram)(program); 223 | } 224 | 225 | cl_int 226 | clBuildProgram(cl_program program, 227 | cl_uint num_devices, 228 | const cl_device_id *device_list, 229 | const char *options, 230 | void (CL_CALLBACK *pfn_notify)( 231 | cl_program program, 232 | void *user_data), 233 | void *user_data) 234 | { 235 | static cl_int (*p_clBuildProgram)( 236 | cl_program program, 237 | cl_uint num_devices, 238 | const cl_device_id *device_list, 239 | const char *options, 240 | void (CL_CALLBACK *pfn_notify)( 241 | cl_program program, 242 | void *user_data), 243 | void *user_data) = NULL; 244 | 245 | if (!p_clBuildProgram) 246 | p_clBuildProgram = get_opencl_function("clBuildProgram"); 247 | 248 | return (*p_clBuildProgram)(program, 249 | num_devices, 250 | device_list, 251 | options, 252 | pfn_notify, 253 | user_data); 254 | } 255 | 256 | cl_int 257 | clGetProgramBuildInfo(cl_program program, 258 | cl_device_id device, 259 | cl_program_build_info param_name, 260 | size_t param_value_size, 261 | void *param_value, 262 | size_t *param_value_size_ret) 263 | { 264 | static cl_int (*p_clGetProgramBuildInfo)( 265 | cl_program program, 266 | cl_device_id device, 267 | cl_program_build_info param_name, 268 | size_t param_value_size, 269 | void *param_value, 270 | size_t *param_value_size_ret) = NULL; 271 | 272 | if (!p_clGetProgramBuildInfo) 273 | p_clGetProgramBuildInfo = get_opencl_function("clGetProgramBuildInfo"); 274 | 275 | return (*p_clGetProgramBuildInfo)(program, 276 | device, 277 | param_name, 278 | param_value_size, 279 | param_value, 280 | param_value_size_ret); 281 | } 282 | 283 | cl_mem clCreateBuffer(cl_context context, 284 | cl_mem_flags flags, 285 | size_t size, 286 | void *host_ptr, 287 | cl_int *errcode_ret) 288 | { 289 | static cl_mem (*p_clCreateBuffer)( 290 | cl_context context, 291 | cl_mem_flags flags, 292 | size_t size, 293 | void *host_ptr, 294 | cl_int *errcode_ret) = NULL; 295 | 296 | if (!p_clCreateBuffer) 297 | p_clCreateBuffer = get_opencl_function("clCreateBuffer"); 298 | 299 | return (*p_clCreateBuffer)(context, 300 | flags, 301 | size, 302 | host_ptr, 303 | errcode_ret); 304 | } 305 | 306 | cl_int clEnqueueReadBuffer(cl_command_queue command_queue, 307 | cl_mem buffer, 308 | cl_bool blocking_read, 309 | size_t offset, 310 | size_t size, 311 | void *ptr, 312 | cl_uint num_events_in_wait_list, 313 | const cl_event *event_wait_list, 314 | cl_event *event) 315 | { 316 | static cl_int (*p_clEnqueueReadBuffer)( 317 | cl_command_queue command_queue, 318 | cl_mem buffer, 319 | cl_bool blocking_read, 320 | size_t offset, 321 | size_t size, 322 | void *ptr, 323 | cl_uint num_events_in_wait_list, 324 | const cl_event *event_wait_list, 325 | cl_event *event) = NULL; 326 | 327 | if (!p_clEnqueueReadBuffer) 328 | p_clEnqueueReadBuffer = get_opencl_function("clEnqueueReadBuffer"); 329 | 330 | return (*p_clEnqueueReadBuffer)(command_queue, 331 | buffer, 332 | blocking_read, 333 | offset, 334 | size, 335 | ptr, 336 | num_events_in_wait_list, 337 | event_wait_list, 338 | event); 339 | } 340 | 341 | cl_int clEnqueueWriteBuffer(cl_command_queue command_queue, 342 | cl_mem buffer, 343 | cl_bool blocking_write, 344 | size_t offset, 345 | size_t size, 346 | const void *ptr, 347 | cl_uint num_events_in_wait_list, 348 | const cl_event *event_wait_list, 349 | cl_event *event) 350 | { 351 | static cl_int (*p_clEnqueueWriteBuffer)( 352 | cl_command_queue command_queue, 353 | cl_mem buffer, 354 | cl_bool blocking_write, 355 | size_t offset, 356 | size_t size, 357 | const void *ptr, 358 | cl_uint num_events_in_wait_list, 359 | const cl_event *event_wait_list, 360 | cl_event *event) = NULL; 361 | 362 | if (!p_clEnqueueWriteBuffer) 363 | p_clEnqueueWriteBuffer = get_opencl_function("clEnqueueWriteBuffer"); 364 | 365 | return (*p_clEnqueueWriteBuffer)(command_queue, 366 | buffer, 367 | blocking_write, 368 | offset, 369 | size, 370 | ptr, 371 | num_events_in_wait_list, 372 | event_wait_list, 373 | event); 374 | } 375 | 376 | cl_int clReleaseMemObject(cl_mem memobj) 377 | { 378 | static cl_int (*p_clReleaseMemObject)(cl_mem memobj) = NULL; 379 | 380 | if (!p_clReleaseMemObject) 381 | p_clReleaseMemObject = get_opencl_function("clReleaseMemObject"); 382 | 383 | return (*p_clReleaseMemObject)(memobj); 384 | } 385 | 386 | cl_command_queue clCreateCommandQueue(cl_context context, 387 | cl_device_id device, 388 | cl_command_queue_properties properties, 389 | cl_int *errcode_ret) 390 | { 391 | static cl_command_queue (*p_clCreateCommandQueue)( 392 | cl_context context, 393 | cl_device_id device, 394 | cl_command_queue_properties properties, 395 | cl_int *errcode_ret) = NULL; 396 | 397 | if (!p_clCreateCommandQueue) 398 | p_clCreateCommandQueue = get_opencl_function("clCreateCommandQueue"); 399 | 400 | return (*p_clCreateCommandQueue)(context, 401 | device, 402 | properties, 403 | errcode_ret); 404 | } 405 | 406 | cl_int clReleaseCommandQueue(cl_command_queue command_queue) 407 | { 408 | static cl_int (*p_clReleaseCommandQueue)( 409 | cl_command_queue command_queue) = NULL; 410 | 411 | if (!p_clReleaseCommandQueue) 412 | p_clReleaseCommandQueue = get_opencl_function("clReleaseCommandQueue"); 413 | 414 | return (*p_clReleaseCommandQueue)(command_queue); 415 | } 416 | 417 | cl_kernel clCreateKernel(cl_program program, 418 | const char *kernel_name, 419 | cl_int *errcode_ret) 420 | { 421 | static cl_kernel (*p_clCreateKernel)( 422 | cl_program program, 423 | const char *kernel_name, 424 | cl_int *errcode_ret) = NULL; 425 | 426 | if (!p_clCreateKernel) 427 | p_clCreateKernel = get_opencl_function("clCreateKernel"); 428 | return (*p_clCreateKernel)(program, kernel_name, errcode_ret); 429 | } 430 | 431 | cl_int clSetKernelArg(cl_kernel kernel, 432 | cl_uint arg_index, 433 | size_t arg_size, 434 | const void *arg_value) 435 | { 436 | static cl_int (*p_clSetKernelArg)( 437 | cl_kernel kernel, 438 | cl_uint arg_index, 439 | size_t arg_size, 440 | const void *arg_value) = NULL; 441 | 442 | if (!p_clSetKernelArg) 443 | p_clSetKernelArg = get_opencl_function("clSetKernelArg"); 444 | return (*p_clSetKernelArg)(kernel, arg_index, arg_size, arg_value); 445 | } 446 | 447 | cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, 448 | cl_kernel kernel, 449 | cl_uint work_dim, 450 | const size_t *global_work_offset, 451 | const size_t *global_work_size, 452 | const size_t *local_work_size, 453 | cl_uint num_events_in_wait_list, 454 | const cl_event *event_wait_list, 455 | cl_event *event) 456 | { 457 | static cl_int (*p_clEnqueueNDRangeKernel)( 458 | cl_command_queue command_queue, 459 | cl_kernel kernel, 460 | cl_uint work_dim, 461 | const size_t *global_work_offset, 462 | const size_t *global_work_size, 463 | const size_t *local_work_size, 464 | cl_uint num_events_in_wait_list, 465 | const cl_event *event_wait_list, 466 | cl_event *event) = NULL; 467 | 468 | if (!p_clEnqueueNDRangeKernel) 469 | p_clEnqueueNDRangeKernel 470 | = get_opencl_function("clEnqueueNDRangeKernel"); 471 | 472 | return (*p_clEnqueueNDRangeKernel)(command_queue, 473 | kernel, 474 | work_dim, 475 | global_work_offset, 476 | global_work_size, 477 | local_work_size, 478 | num_events_in_wait_list, 479 | event_wait_list, 480 | event); 481 | } 482 | 483 | cl_int clReleaseKernel(cl_kernel kernel) 484 | { 485 | static cl_int (*p_clReleaseKernel)(cl_kernel kernel) = NULL; 486 | 487 | if (!p_clReleaseKernel) 488 | p_clReleaseKernel = get_opencl_function("clReleaseKernel"); 489 | return (*p_clReleaseKernel)(kernel); 490 | } 491 | 492 | cl_int clSetEventCallback(cl_event event, 493 | cl_int command_exec_callback_type , 494 | void (CL_CALLBACK *pfn_event_notify)( 495 | cl_event event, 496 | cl_int event_command_exec_status, 497 | void *user_data), 498 | void *user_data) 499 | { 500 | static (*p_clSetEventCallback)( 501 | cl_event event, 502 | cl_int command_exec_callback_type , 503 | void (CL_CALLBACK *pfn_event_notify)( 504 | cl_event event, 505 | cl_int event_command_exec_status, 506 | void *user_data), 507 | void *user_data) = NULL; 508 | 509 | if (!p_clSetEventCallback) 510 | p_clSetEventCallback = get_opencl_function("clSetEventCallback"); 511 | return (*p_clSetEventCallback)(event, command_exec_callback_type, 512 | pfn_event_notify, user_data); 513 | } 514 | 515 | cl_int clWaitForEvents(cl_uint num_events, 516 | const cl_event *event_list) 517 | { 518 | static cl_int (*p_clWaitForEvents)( 519 | cl_uint num_events, 520 | const cl_event *event_list) = NULL; 521 | 522 | if (!p_clWaitForEvents) 523 | p_clWaitForEvents = get_opencl_function("clWaitForEvents"); 524 | 525 | return (*p_clWaitForEvents)(num_events, event_list); 526 | } 527 | 528 | cl_int clReleaseEvent(cl_event event) 529 | { 530 | static cl_int (*p_clReleaseEvent)( 531 | cl_event event) = NULL; 532 | if (!p_clReleaseEvent) 533 | p_clReleaseEvent = get_opencl_function("clReleaseEvent"); 534 | return (*p_clReleaseEvent)(event); 535 | } 536 | 537 | cl_int clFinish(cl_command_queue command_queue) 538 | { 539 | static cl_int (*p_clFinish)(cl_command_queue command_queue) = NULL; 540 | 541 | if (!p_clFinish) 542 | p_clFinish = get_opencl_function("clFinish"); 543 | 544 | return (*p_clFinish)(command_queue); 545 | } 546 | 547 | const char * 548 | opencl_strerror(cl_int errcode) 549 | { 550 | switch (errcode) 551 | { 552 | case CL_SUCCESS: 553 | return "success"; 554 | case CL_DEVICE_NOT_FOUND: 555 | return "device not found"; 556 | case CL_DEVICE_NOT_AVAILABLE: 557 | return "device not available"; 558 | case CL_COMPILER_NOT_AVAILABLE: 559 | return "compiler not available"; 560 | case CL_MEM_OBJECT_ALLOCATION_FAILURE: 561 | return "memory object allocation failure"; 562 | case CL_OUT_OF_RESOURCES: 563 | return "out of resources"; 564 | case CL_OUT_OF_HOST_MEMORY: 565 | return "out of host memory"; 566 | case CL_PROFILING_INFO_NOT_AVAILABLE: 567 | return "profiling info not available"; 568 | case CL_MEM_COPY_OVERLAP: 569 | return "memory copy overlap"; 570 | case CL_IMAGE_FORMAT_MISMATCH: 571 | return "image format mismatch"; 572 | case CL_IMAGE_FORMAT_NOT_SUPPORTED: 573 | return "image format not supported"; 574 | case CL_BUILD_PROGRAM_FAILURE: 575 | return "build program failure"; 576 | case CL_MAP_FAILURE: 577 | return "map failure"; 578 | case CL_MISALIGNED_SUB_BUFFER_OFFSET: 579 | return "misaligned sub-buffer offset"; 580 | case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: 581 | return "execution status error for event in wait list"; 582 | case CL_INVALID_VALUE: 583 | return "invalid value"; 584 | case CL_INVALID_DEVICE_TYPE: 585 | return "invalid device type"; 586 | case CL_INVALID_PLATFORM: 587 | return "invalid platform"; 588 | case CL_INVALID_DEVICE: 589 | return "invalid device"; 590 | case CL_INVALID_CONTEXT: 591 | return "invalid context"; 592 | case CL_INVALID_QUEUE_PROPERTIES: 593 | return "invalid queue properties"; 594 | case CL_INVALID_COMMAND_QUEUE: 595 | return "invalid command queue"; 596 | case CL_INVALID_HOST_PTR: 597 | return "invalid host pointer"; 598 | case CL_INVALID_MEM_OBJECT: 599 | return "invalid memory object"; 600 | case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: 601 | return "invalid image format descriptor"; 602 | case CL_INVALID_IMAGE_SIZE: 603 | return "invalid image size"; 604 | case CL_INVALID_SAMPLER: 605 | return "invalid sampler"; 606 | case CL_INVALID_BINARY: 607 | return "invalid binary"; 608 | case CL_INVALID_BUILD_OPTIONS: 609 | return "invalid build options"; 610 | case CL_INVALID_PROGRAM: 611 | return "invalid program"; 612 | case CL_INVALID_PROGRAM_EXECUTABLE: 613 | return "invalid program executable"; 614 | case CL_INVALID_KERNEL_NAME: 615 | return "invalid kernel name"; 616 | case CL_INVALID_KERNEL_DEFINITION: 617 | return "invalid kernel definition"; 618 | case CL_INVALID_KERNEL: 619 | return "invalid kernel"; 620 | case CL_INVALID_ARG_INDEX: 621 | return "invalid argument index"; 622 | case CL_INVALID_ARG_VALUE: 623 | return "invalid argument value"; 624 | case CL_INVALID_ARG_SIZE: 625 | return "invalid argument size"; 626 | case CL_INVALID_KERNEL_ARGS: 627 | return "invalid kernel arguments"; 628 | case CL_INVALID_WORK_DIMENSION: 629 | return "invalid work dimension"; 630 | case CL_INVALID_WORK_GROUP_SIZE: 631 | return "invalid group size"; 632 | case CL_INVALID_WORK_ITEM_SIZE: 633 | return "invalid item size"; 634 | case CL_INVALID_GLOBAL_OFFSET: 635 | return "invalid global offset"; 636 | case CL_INVALID_EVENT_WAIT_LIST: 637 | return "invalid wait list"; 638 | case CL_INVALID_EVENT: 639 | return "invalid event"; 640 | case CL_INVALID_OPERATION: 641 | return "invalid operation"; 642 | case CL_INVALID_GL_OBJECT: 643 | return "invalid GL object"; 644 | case CL_INVALID_BUFFER_SIZE: 645 | return "invalid buffer size"; 646 | case CL_INVALID_MIP_LEVEL: 647 | return "invalid MIP level"; 648 | case CL_INVALID_GLOBAL_WORK_SIZE: 649 | return "invalid global work size"; 650 | case CL_INVALID_PROPERTY: 651 | return "invalid property"; 652 | } 653 | return "unknown error code"; 654 | } 655 | --------------------------------------------------------------------------------