├── sgc
├── .rvmrc
├── .gitignore
├── lib
│ ├── rubycuda.rb
│ ├── rubycu.rb
│ ├── cuda
│ │ ├── driver
│ │ │ ├── extconf.rb
│ │ │ ├── rubycu.o
│ │ │ ├── rubycu.bundle
│ │ │ ├── mkmf.log
│ │ │ ├── Makefile
│ │ │ └── rubycu.cpp
│ │ ├── runtime
│ │ │ ├── rubycuda.rb
│ │ │ ├── version.rb
│ │ │ ├── error.rb
│ │ │ ├── thread.rb
│ │ │ ├── cuda.rb
│ │ │ ├── memory.rb
│ │ │ ├── stream.rb
│ │ │ ├── device.rb
│ │ │ ├── event.rb
│ │ │ ├── function.rb
│ │ │ └── ffi-cuda.rb
│ │ └── ruby
│ │ │ └── cu.rb
│ ├── madison
│ │ ├── kernel
│ │ │ ├── kernel.h
│ │ │ ├── libkernel.so
│ │ │ ├── libkernel.10.so
│ │ │ ├── kernel.cu
│ │ │ └── test.cu
│ │ ├── matrix.rb
│ │ └── comparable.rb
│ ├── ffi
│ │ └── prettystruct.rb
│ └── memory
│ │ ├── interface
│ │ ├── ipointer.rb
│ │ └── ibuffer.rb
│ │ ├── pointer.rb
│ │ └── buffer.rb
├── visualize.sh
└── visualize.gp
└── .gitignore
/sgc/.rvmrc:
--------------------------------------------------------------------------------
1 | rvm 1.9.2
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | */libkernel.so
2 |
--------------------------------------------------------------------------------
/sgc/.gitignore:
--------------------------------------------------------------------------------
1 | *.csv
2 | *.png
3 | a.out
4 |
--------------------------------------------------------------------------------
/sgc/lib/rubycuda.rb:
--------------------------------------------------------------------------------
1 | require 'cuda/runtime/rubycuda'
2 |
--------------------------------------------------------------------------------
/sgc/lib/rubycu.rb:
--------------------------------------------------------------------------------
1 | require 'cuda/driver/rubycu'
2 | require 'cuda/ruby/cu'
3 |
--------------------------------------------------------------------------------
/sgc/lib/cuda/driver/extconf.rb:
--------------------------------------------------------------------------------
1 | require 'mkmf'
2 | have_library("cuda")
3 | create_makefile("rubycu")
4 |
--------------------------------------------------------------------------------
/sgc/lib/cuda/driver/rubycu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zog/Sandbox/master/sgc/lib/cuda/driver/rubycu.o
--------------------------------------------------------------------------------
/sgc/lib/madison/kernel/kernel.h:
--------------------------------------------------------------------------------
1 | #define DIMENSIONS 10
2 | #define BLOCK_SIZE 16
3 | #define CLUSTER_SIZE 16
4 |
--------------------------------------------------------------------------------
/sgc/lib/cuda/driver/rubycu.bundle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zog/Sandbox/master/sgc/lib/cuda/driver/rubycu.bundle
--------------------------------------------------------------------------------
/sgc/lib/madison/kernel/libkernel.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zog/Sandbox/master/sgc/lib/madison/kernel/libkernel.so
--------------------------------------------------------------------------------
/sgc/lib/madison/kernel/libkernel.10.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zog/Sandbox/master/sgc/lib/madison/kernel/libkernel.10.so
--------------------------------------------------------------------------------
/sgc/visualize.sh:
--------------------------------------------------------------------------------
1 | #! /bin/sh
2 | # visualize.sh
3 |
4 | cat result.csv|sort -n -k1 -k2 > result_sorted.csv
5 | awk '{ print;if ((NR % 512) == 0) printf("\n");}' result_sorted.csv > result_sorted_final.csv
6 | gnuplot visualize.gp ; open heatmaps.png
--------------------------------------------------------------------------------
/sgc/lib/ffi/prettystruct.rb:
--------------------------------------------------------------------------------
1 | require 'ffi'
2 |
3 |
4 | module FFI
5 |
6 | # This class is obtained from ffi-tk (https://github.com/Tass/ffi-tk).
7 | class PrettyStruct < FFI::Struct
8 | ACCESSOR_CODE = <<-CODE
9 | def {name}; self[{sym}]; end
10 | def {name}=(value) self[{sym}] = value; end
11 | CODE
12 |
13 | def self.layout(*kvs)
14 | kvs.each_slice(2) do |key, value|
15 | eval ACCESSOR_CODE.gsub(/\{(.*?)\}/, '{name}' => key, '{sym}' => ":#{key}")
16 | end
17 |
18 | super
19 | end
20 |
21 | def inspect
22 | kvs = members.zip(values)
23 | kvs.map!{|key, value| "%s=%s" % [key, value.inspect] }
24 | "<%s %s>" % [self.class, kvs.join(' ')]
25 | end
26 | end
27 |
28 | end # module
29 |
--------------------------------------------------------------------------------
/sgc/lib/cuda/runtime/rubycuda.rb:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2010 Chung Shin Yee
3 | #
4 | # shinyee@speedgocomputing.com
5 | # http://www.speedgocomputing.com
6 | # http://github.com/xman/sgc-ruby-cuda
7 | # http://rubyforge.org/projects/rubycuda
8 | #
9 | # This file is part of SGC-Ruby-CUDA.
10 | #
11 | # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with SGC-Ruby-CUDA. If not, see .
23 | #
24 |
25 | require 'cuda/runtime/ffi-cuda'
26 | require 'cuda/runtime/cuda'
27 | require 'cuda/runtime/error'
28 | require 'cuda/runtime/version'
29 | require 'cuda/runtime/device'
30 | require 'cuda/runtime/thread'
31 | require 'cuda/runtime/memory'
32 | require 'cuda/runtime/function'
33 | require 'cuda/runtime/stream'
34 | require 'cuda/runtime/event'
35 |
--------------------------------------------------------------------------------
/sgc/lib/memory/interface/ipointer.rb:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2010 Chung Shin Yee
3 | #
4 | # shinyee@speedgocomputing.com
5 | # http://www.speedgocomputing.com
6 | # http://github.com/xman/sgc-ruby-cuda
7 | # http://rubyforge.org/projects/rubycuda
8 | #
9 | # This file is part of SGC-Ruby-CUDA.
10 | #
11 | # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with SGC-Ruby-CUDA. If not, see .
23 | #
24 |
25 | module SGC
26 | module Memory
27 |
28 | module IMemoryPointer
29 |
30 | def initialize(value = nil); end
31 |
32 | def ptr; raise NotImplementedError; end
33 | def ptr=(value); raise NotImplementedError; end
34 | def offset(index); raise NotImplementedError; end
35 | def ref; raise NotImplementedError; end
36 |
37 | end
38 |
39 | end # module
40 | end # module
41 |
--------------------------------------------------------------------------------
/sgc/visualize.gp:
--------------------------------------------------------------------------------
1 | set terminal png transparent nocrop enhanced font arial 8 size 1000, 1000
2 | set output 'heatmaps.png'
3 | unset key
4 | set view map
5 | set style data linespoints
6 | set xtics border in scale 0,0 mirror norotate offset character 0, 0, 0
7 | set ytics border in scale 0,0 mirror norotate offset character 0, 0, 0
8 | set ztics border in scale 0,0 nomirror norotate offset character 0, 0, 0
9 | set nocbtics
10 | set title "Heat Map generated by 'plot' from a stream of XYZ values\nNB: Rows must be separated by blank lines!"
11 | set rrange [ * : * ] noreverse nowriteback # (currently [8.98847e+307:-8.98847e+307] )
12 | set trange [ * : * ] noreverse nowriteback # (currently [-5.00000:5.00000] )
13 | set urange [ * : * ] noreverse nowriteback # (currently [-5.00000:5.00000] )
14 | set vrange [ * : * ] noreverse nowriteback # (currently [-5.00000:5.00000] )
15 | set xrange [ -0.5 : * ] noreverse nowriteback
16 | set x2range [ * : * ] noreverse nowriteback # (currently [-0.500000:4.50000] )
17 | set yrange [ -0.5 : * ] noreverse nowriteback
18 | set y2range [ * : * ] noreverse nowriteback # (currently [-0.500000:4.50000] )
19 | set zrange [ 0.0 : 1.0 ] noreverse nowriteback # (currently [0.00000:5.00000] )
20 | set cblabel "Score"
21 | set cbrange [ 0.00000 : * ] noreverse nowriteback
22 | set palette rgbformulae -7, 2, -7
23 | plot 'result_sorted_final.csv' using 2:1:3 with image
24 |
--------------------------------------------------------------------------------
/sgc/lib/madison/kernel/kernel.cu:
--------------------------------------------------------------------------------
1 | extern "C" {
2 | #include "kernel.h"
3 |
4 | __global__ void MatPopulate(float *A, int count)
5 | {
6 | int row = blockIdx.x;
7 | int col = threadIdx.x;
8 | A[row * DIMENSIONS + col] = (float)(row * DIMENSIONS + col)/(DIMENSIONS*count);
9 | }
10 |
11 | float score(float *A, float *B){
12 | float score = 0.0;
13 | for(int i=0; i.
23 | #
24 |
25 | require 'cuda/runtime/ffi-cuda'
26 |
27 |
28 | module SGC
29 | module Cuda
30 |
31 | def driver_version
32 | p = FFI::MemoryPointer.new(:int)
33 | status = API::cudaDriverGetVersion(p)
34 | Pvt::handle_error(status)
35 | p.read_int
36 | end
37 | module_function :driver_version
38 |
39 |
40 | def runtime_version
41 | p = FFI::MemoryPointer.new(:int)
42 | status = API::cudaRuntimeGetVersion(p)
43 | Pvt::handle_error(status)
44 | p.read_int
45 | end
46 | module_function :runtime_version
47 |
48 | end # module
49 | end # module
50 |
--------------------------------------------------------------------------------
/sgc/lib/memory/interface/ibuffer.rb:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2010 Chung Shin Yee
3 | #
4 | # shinyee@speedgocomputing.com
5 | # http://www.speedgocomputing.com
6 | # http://github.com/xman/sgc-ruby-cuda
7 | # http://rubyforge.org/projects/rubycuda
8 | #
9 | # This file is part of SGC-Ruby-CUDA.
10 | #
11 | # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with SGC-Ruby-CUDA. If not, see .
23 | #
24 |
25 | require 'memory/interface/ipointer'
26 |
27 |
28 | module SGC
29 | module Memory
30 |
31 | module IBuffer
32 |
33 | include IMemoryPointer
34 |
35 | def initialize(type, size); end
36 |
37 | def [](index); raise NotImplementedError; end
38 | def []=(index, value); raise NotImplementedError; end
39 | def size; raise NotImplementedError; end
40 | def element_size; raise NotImplementedError; end
41 |
42 | module ClassMethods
43 | def element_size(type); raise NotImplementedError; end
44 | end
45 |
46 | def self.included(base)
47 | base.extend(ClassMethods)
48 | end
49 |
50 | end
51 |
52 | end # module
53 | end # module
54 |
--------------------------------------------------------------------------------
/sgc/lib/memory/pointer.rb:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2010 Chung Shin Yee
3 | #
4 | # shinyee@speedgocomputing.com
5 | # http://www.speedgocomputing.com
6 | # http://github.com/xman/sgc-ruby-cuda
7 | # http://rubyforge.org/projects/rubycuda
8 | #
9 | # This file is part of SGC-Ruby-CUDA.
10 | #
11 | # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with SGC-Ruby-CUDA. If not, see .
23 | #
24 |
25 | require 'ffi'
26 | require 'memory/interface/ipointer'
27 |
28 |
29 | module SGC
30 | module Memory
31 |
32 | class MemoryPointer
33 |
34 | include IMemoryPointer
35 |
36 |
37 | def initialize(v = nil)
38 | @p = FFI::MemoryPointer.new(:pointer)
39 | @p.write_pointer(v)
40 | end
41 |
42 |
43 | def ptr
44 | @p.read_pointer
45 | end
46 |
47 |
48 | def ptr=(v)
49 | @p.write_pointer(v)
50 | v
51 | end
52 |
53 |
54 | def offset(i)
55 | MemoryPointer.new(@p.read_pointer.to_i + i)
56 | end
57 |
58 |
59 | def ref
60 | @p
61 | end
62 |
63 | end
64 |
65 | end # module
66 | end # module
67 |
--------------------------------------------------------------------------------
/sgc/lib/cuda/runtime/error.rb:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2010 Chung Shin Yee
3 | #
4 | # shinyee@speedgocomputing.com
5 | # http://www.speedgocomputing.com
6 | # http://github.com/xman/sgc-ruby-cuda
7 | # http://rubyforge.org/projects/rubycuda
8 | #
9 | # This file is part of SGC-Ruby-CUDA.
10 | #
11 | # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with SGC-Ruby-CUDA. If not, see .
23 | #
24 |
25 | require 'cuda/runtime/ffi-cuda'
26 |
27 |
28 | module SGC
29 | module Cuda
30 |
31 | def get_error_string(e)
32 | API::cudaGetErrorString(e)
33 | end
34 | module_function :get_error_string
35 |
36 |
37 | def get_last_error
38 | API::cudaGetLastError
39 | end
40 | module_function :get_last_error
41 |
42 |
43 | def peek_at_last_error
44 | API::cudaPeekAtLastError
45 | end
46 | module_function :peek_at_last_error
47 |
48 | module Pvt
49 |
50 | CUDA_SUCCESS = API::CudaError[:cudaSuccess]
51 | CUDA_ERROR_NOT_READY = API::CudaError[:cudaErrorNotReady]
52 |
53 | def self.handle_error(status)
54 | status == CUDA_SUCCESS or raise API::cudaGetErrorString(status)
55 | nil
56 | end
57 |
58 | end
59 |
60 | end # module
61 | end # module
62 |
--------------------------------------------------------------------------------
/sgc/lib/madison/matrix.rb:
--------------------------------------------------------------------------------
1 | module Madison
2 | require 'rubycuda'
3 | require 'madison/comparable'
4 |
5 |
6 | class Dimension
7 | # A vectors dimension key => value
8 | attr_accessor :i, :j
9 |
10 | def initialize matrix, i, j
11 | @matrix = matrix
12 | @i = i
13 | @j = j
14 | end
15 |
16 | def value= value
17 | @matrix.values[@i*@matrix.vectors_dimension + @j] = value
18 | end
19 |
20 | def key= value
21 | @matrix.keys[@i*@matrix.vectors_dimension + @j] = value
22 | end
23 |
24 | def value
25 | @matrix.values[@i*@matrix.vectors_dimension + @j]
26 | end
27 |
28 | def key
29 | @matrix.keys[@i*@matrix.vectors_dimension + @j]
30 | end
31 |
32 | def inspect
33 | "# #{value}>"
34 | end
35 | end
36 |
37 | class Matrix
38 | include SGC::Cuda
39 | include Madison::Comparable
40 |
41 | attr_reader :vectors_dimension
42 | attr_reader :count
43 | attr_reader :size
44 | attr_accessor :keys, :values
45 |
46 | def initialize type, vectors_count, vectors_dimension
47 | @last_id = 0
48 | @count = vectors_count
49 | @vectors_dimension = vectors_dimension
50 | @size = vectors_count * vectors_dimension
51 | @type = type
52 | @type_size = Buffer.element_size(type)
53 | @dimensions = Hash.new{|h, k| h[k] = {}}
54 |
55 | # the matrix used to store the vector dimensions values
56 | @values = Buffer.new(type, @size)
57 |
58 | # the matrix used to store the vector dimensions keys
59 | @keys = Buffer.new(:int, @size)
60 | end
61 |
62 | def inspect
63 | "#"
64 | end
65 |
66 | def dimensions i, j
67 | @dimensions[i][j] ||= Dimension.new self, i, j
68 | end
69 |
70 | def << vector
71 | raise "Already full" unless @last_id <= @count
72 | (0...[vector.size, @vectors_dimension].min).each do |k|
73 | dimensions(@last_id, k).value = vector.values[k]
74 | dimensions(@last_id, k).key = vector.keys[k].hash
75 | end
76 | @last_id += 1
77 | self
78 | end
79 | end
80 | end
--------------------------------------------------------------------------------
/sgc/lib/cuda/runtime/thread.rb:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2010 Chung Shin Yee
3 | #
4 | # shinyee@speedgocomputing.com
5 | # http://www.speedgocomputing.com
6 | # http://github.com/xman/sgc-ruby-cuda
7 | # http://rubyforge.org/projects/rubycuda
8 | #
9 | # This file is part of SGC-Ruby-CUDA.
10 | #
11 | # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with SGC-Ruby-CUDA. If not, see .
23 | #
24 |
25 | require 'cuda/runtime/ffi-cuda'
26 | require 'cuda/runtime/cuda'
27 | require 'cuda/runtime/error'
28 |
29 |
30 | module SGC
31 | module Cuda
32 |
33 | class CudaThread
34 |
35 | def self.exit
36 | status = API::cudaThreadExit
37 | Pvt::handle_error(status)
38 | self
39 | end
40 |
41 |
42 | def self.cache_config
43 | p = FFI::MemoryPointer.new(:int)
44 | status = API::cudaThreadGetCacheConfig(p)
45 | Pvt::handle_error(status)
46 | CudaFuncCache[p.read_int]
47 | end
48 |
49 |
50 | def self.cache_config=(config)
51 | status = API::cudaThreadSetCacheConfig(config)
52 | Pvt::handle_error(status)
53 | config
54 | end
55 |
56 |
57 | def self.limit(limit)
58 | p = FFI::MemoryPointer.new(:size_t)
59 | status = API::cudaThreadGetLimit(p, limit)
60 | Pvt::handle_error(status)
61 | p.read_long
62 | end
63 |
64 |
65 | def self.limit=(*limit_value_pair)
66 | limit, value = limit_value_pair.flatten
67 | status = API::cudaThreadSetLimit(limit, value)
68 | Pvt::handle_error(status)
69 | limit_value_pair
70 | end
71 |
72 |
73 | def self.synchronize
74 | status = API::cudaThreadSynchronize
75 | Pvt::handle_error(status)
76 | self
77 | end
78 |
79 | end
80 |
81 | end # module
82 | end # module
83 |
--------------------------------------------------------------------------------
/sgc/lib/cuda/runtime/cuda.rb:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2010 Chung Shin Yee
3 | #
4 | # shinyee@speedgocomputing.com
5 | # http://www.speedgocomputing.com
6 | # http://github.com/xman/sgc-ruby-cuda
7 | # http://rubyforge.org/projects/rubycuda
8 | #
9 | # This file is part of SGC-Ruby-CUDA.
10 | #
11 | # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with SGC-Ruby-CUDA. If not, see .
23 | #
24 |
25 | require 'cuda/runtime/ffi-cuda'
26 | require 'memory/buffer'
27 |
28 |
29 | module SGC
30 | module Cuda
31 |
32 | CudaError_t = CudaError = API::CudaError
33 | CudaDeviceFlags = API::CudaDeviceFlags
34 | CudaEventFlags = API::CudaEventFlags
35 | CudaHostAllocFlags = API::CudaHostAllocFlags
36 | CudaArrayFlags = API::CudaArrayFlags
37 | CudaMemcpyKind = API::CudaMemcpyKind
38 | CudaChannelFormatKind = API::CudaChannelFormatKind
39 | CudaFuncCache = API::CudaFuncCache
40 | CudaLimit = API::CudaLimit
41 | CudaComputeMode = API::CudaComputeMode
42 | CudaSurfaceBoundaryMode = API::CudaSurfaceBoundaryMode
43 | CudaSurfaceFormatMode = API::CudaSurfaceFormatMode
44 | CudaTextureAddressMode = API::CudaTextureAddressMode
45 | CudaTextureFilterMode = API::CudaTextureFilterMode
46 | CudaTextureReadMode = API::CudaTextureReadMode
47 |
48 | Dim3 = API::Dim3
49 | CudaDeviceProp = API::CudaDeviceProp
50 | CudaFuncAttributes = API::CudaFuncAttributes
51 | CudaChannelFormatDesc = API::CudaChannelFormatDesc
52 | CudaPitchedPtr = API::CudaPitchedPtr
53 | CudaPos = API::CudaPos
54 | CudaExtent = API::CudaExtent
55 | CudaMemcpy3DParms = API::CudaMemcpy3DParms
56 | TextureReference = API::TextureReference
57 | SurfaceReference = API::SurfaceReference
58 |
59 | Buffer = SGC::Memory::Buffer
60 |
61 | end # module
62 | end # module
63 |
--------------------------------------------------------------------------------
/sgc/lib/cuda/driver/mkmf.log:
--------------------------------------------------------------------------------
1 | have_library: checking for main() in -lcuda... -------------------- no
2 |
3 | "gcc -o conftest -I/Users/zog/.rvm/rubies/ruby-1.9.2-p136/include/ruby-1.9.1/i386-darwin9.8.0 -I/Users/zog/.rvm/rubies/ruby-1.9.2-p136/include/ruby-1.9.1/ruby/backward -I/Users/zog/.rvm/rubies/ruby-1.9.2-p136/include/ruby-1.9.1 -I. -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -O3 -ggdb -Wextra -Wno-unused-parameter -Wno-parentheses -Wpointer-arith -Wwrite-strings -Wno-missing-field-initializers -Wshorten-64-to-32 -Wno-long-long -fno-common -pipe conftest.c -L. -L/Users/zog/.rvm/rubies/ruby-1.9.2-p136/lib -L. -L/usr/local/lib -lruby.1.9.1-static -lpthread -ldl -lobjc "
4 | checked program was:
5 | /* begin */
6 | 1: #include "ruby.h"
7 | 2:
8 | 3: int main() {return 0;}
9 | /* end */
10 |
11 | "gcc -o conftest -I/Users/zog/.rvm/rubies/ruby-1.9.2-p136/include/ruby-1.9.1/i386-darwin9.8.0 -I/Users/zog/.rvm/rubies/ruby-1.9.2-p136/include/ruby-1.9.1/ruby/backward -I/Users/zog/.rvm/rubies/ruby-1.9.2-p136/include/ruby-1.9.1 -I. -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -O3 -ggdb -Wextra -Wno-unused-parameter -Wno-parentheses -Wpointer-arith -Wwrite-strings -Wno-missing-field-initializers -Wshorten-64-to-32 -Wno-long-long -fno-common -pipe conftest.c -L. -L/Users/zog/.rvm/rubies/ruby-1.9.2-p136/lib -L. -L/usr/local/lib -lruby.1.9.1-static -lcuda -lpthread -ldl -lobjc "
12 | ld: library not found for -lcuda
13 | collect2: ld returned 1 exit status
14 | checked program was:
15 | /* begin */
16 | 1: #include "ruby.h"
17 | 2:
18 | 3: /*top*/
19 | 4: int main() {return 0;}
20 | 5: int t() { void ((*volatile p)()); p = (void ((*)()))main; return 0; }
21 | /* end */
22 |
23 | "gcc -o conftest -I/Users/zog/.rvm/rubies/ruby-1.9.2-p136/include/ruby-1.9.1/i386-darwin9.8.0 -I/Users/zog/.rvm/rubies/ruby-1.9.2-p136/include/ruby-1.9.1/ruby/backward -I/Users/zog/.rvm/rubies/ruby-1.9.2-p136/include/ruby-1.9.1 -I. -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -O3 -ggdb -Wextra -Wno-unused-parameter -Wno-parentheses -Wpointer-arith -Wwrite-strings -Wno-missing-field-initializers -Wshorten-64-to-32 -Wno-long-long -fno-common -pipe conftest.c -L. -L/Users/zog/.rvm/rubies/ruby-1.9.2-p136/lib -L. -L/usr/local/lib -lruby.1.9.1-static -lcuda -lpthread -ldl -lobjc "
24 | ld: library not found for -lcuda
25 | collect2: ld returned 1 exit status
26 | checked program was:
27 | /* begin */
28 | 1: #include "ruby.h"
29 | 2:
30 | 3: /*top*/
31 | 4: int main() {return 0;}
32 | 5: int t() { main(); return 0; }
33 | /* end */
34 |
35 | --------------------
36 |
37 |
--------------------------------------------------------------------------------
/sgc/lib/cuda/runtime/memory.rb:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2010 Chung Shin Yee
3 | #
4 | # shinyee@speedgocomputing.com
5 | # http://www.speedgocomputing.com
6 | # http://github.com/xman/sgc-ruby-cuda
7 | # http://rubyforge.org/projects/rubycuda
8 | #
9 | # This file is part of SGC-Ruby-CUDA.
10 | #
11 | # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with SGC-Ruby-CUDA. If not, see .
23 | #
24 |
25 | require 'cuda/runtime/ffi-cuda'
26 | require 'cuda/runtime/error'
27 | require 'memory/pointer'
28 |
29 |
30 | module SGC
31 | module Cuda
32 |
33 | class CudaDeviceMemory
34 |
35 | def self.malloc(nbytes)
36 | p = SGC::Memory::MemoryPointer.new
37 | status = API::cudaMalloc(p.ref, nbytes)
38 | Pvt::handle_error(status)
39 | p
40 | end
41 |
42 |
43 | def self.free(devptr)
44 | status = API::cudaFree(devptr.ptr)
45 | Pvt::handle_error(status)
46 | nil
47 | end
48 |
49 | end
50 |
51 |
52 | module CudaMemory
53 |
54 | def memcpy(dst_ptr, src_ptr, nbytes, memcpy_kind)
55 | status = API::cudaMemcpy(dst_ptr.ptr, src_ptr.ptr, nbytes, memcpy_kind)
56 | Pvt::handle_error(status)
57 | end
58 | module_function :memcpy
59 |
60 | def memcpy_htoh(dst_ptr, src_ptr, nbytes)
61 | memcpy(dst_ptr, src_ptr, nbytes, :cudaMemcpyHostToHost)
62 | end
63 | module_function :memcpy_htoh
64 |
65 | def memcpy_htod(dst_ptr, src_ptr, nbytes)
66 | memcpy(dst_ptr, src_ptr, nbytes, :cudaMemcpyHostToDevice)
67 | end
68 | module_function :memcpy_htod
69 |
70 | def memcpy_dtoh(dst_ptr, src_ptr, nbytes)
71 | memcpy(dst_ptr, src_ptr, nbytes, :cudaMemcpyDeviceToHost)
72 | end
73 | module_function :memcpy_dtoh
74 |
75 | def memcpy_dtod(dst_ptr, src_ptr, nbytes)
76 | memcpy(dst_ptr, src_ptr, nbytes, :cudaMemcpyDeviceToDevice)
77 | end
78 | module_function :memcpy_dtod
79 |
80 | end
81 |
82 | end # module
83 | end # module
84 |
--------------------------------------------------------------------------------
/sgc/lib/cuda/runtime/stream.rb:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2010 Chung Shin Yee
3 | #
4 | # shinyee@speedgocomputing.com
5 | # http://www.speedgocomputing.com
6 | # http://github.com/xman/sgc-ruby-cuda
7 | # http://rubyforge.org/projects/rubycuda
8 | #
9 | # This file is part of SGC-Ruby-CUDA.
10 | #
11 | # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with SGC-Ruby-CUDA. If not, see .
23 | #
24 |
25 | require 'cuda/runtime/ffi-cuda'
26 | require 'cuda/runtime/error'
27 |
28 |
29 | module SGC
30 | module Cuda
31 |
32 | class CudaStream
33 |
34 | def initialize
35 | @p = FFI::MemoryPointer.new(:pointer)
36 | end
37 |
38 |
39 | def create
40 | status = API::cudaStreamCreate(@p)
41 | Pvt::handle_error(status)
42 | self
43 | end
44 |
45 |
46 | def destroy
47 | status = API::cudaStreamDestroy(@p.read_pointer)
48 | Pvt::handle_error(status)
49 | @p.write_pointer(0)
50 | nil
51 | end
52 |
53 |
54 | def query
55 | status = API::cudaStreamQuery(@p.read_pointer)
56 | if status == Pvt::CUDA_SUCCESS
57 | return true
58 | elsif status == Pvt::CUDA_ERROR_NOT_READY
59 | return false
60 | end
61 | Pvt::hanld_error(status)
62 | self
63 | end
64 |
65 |
66 | def synchronize
67 | status = API::cudaStreamSynchronize(@p.read_pointer)
68 | Pvt::handle_error(status)
69 | self
70 | end
71 |
72 |
73 | def wait_event(event, flags = 0)
74 | status = API::cudaStreamWaitEvent(@p.read_pointer, event, flags)
75 | Pvt::handle_error(status)
76 | self
77 | end
78 |
79 |
80 | def self.wait_event(event, flags = 0)
81 | p = FFI::MemoryPointer.new(:pointer)
82 | p.write_pointer(0)
83 | status = API::cudaStreamWaitEvent(p.read_pointer, event, flags)
84 | Pvt::handle_error(status)
85 | self
86 | end
87 |
88 | def to_ptr
89 | @p.read_pointer
90 | end
91 |
92 | end
93 |
94 | end # module
95 | end # module
96 |
--------------------------------------------------------------------------------
/sgc/lib/memory/buffer.rb:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2010 Chung Shin Yee
3 | #
4 | # shinyee@speedgocomputing.com
5 | # http://www.speedgocomputing.com
6 | # http://github.com/xman/sgc-ruby-cuda
7 | # http://rubyforge.org/projects/rubycuda
8 | #
9 | # This file is part of SGC-Ruby-CUDA.
10 | #
11 | # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with SGC-Ruby-CUDA. If not, see .
23 | #
24 |
25 | require 'ffi'
26 |
27 | require 'memory/interface/ibuffer'
28 | require 'memory/pointer'
29 |
30 |
31 | module SGC
32 | module Memory
33 |
34 | class Buffer
35 |
36 | include IBuffer
37 |
38 |
39 | def initialize(type, size)
40 | @@reads[type] && @@writes[type] or raise "Invalid buffer element type."
41 |
42 | @reader = @@reads[type]
43 | @writer = @@writes[type]
44 | @ptr = FFI::MemoryPointer.new(type, size)
45 | @size = size
46 | end
47 |
48 |
49 | def [](i)
50 | assert_index(i)
51 | @ptr[i].send(@reader)
52 | end
53 |
54 |
55 | def []=(i, v)
56 | assert_index(i)
57 | @ptr[i].send(@writer, v)
58 | v
59 | end
60 |
61 |
62 | def size
63 | @size
64 | end
65 |
66 |
67 | def element_size
68 | @ptr.type_size
69 | end
70 |
71 |
72 | def ptr
73 | @ptr
74 | end
75 |
76 |
77 | def offset(i)
78 | assert_index(i)
79 | MemoryPointer.new(@ptr[i])
80 | end
81 |
82 |
83 | def self.element_size(type)
84 | @@sizes[type]
85 | end
86 |
87 | protected
88 |
89 | def assert_index(i)
90 | i >= 0 && i < @size or raise IndexError, "Invalid index to buffer: index = #{i}. Expect index in 0..#{@size-1}"
91 | end
92 |
93 | @@reads = { int: :read_int, long: :read_long, float: :read_float }
94 | @@writes = { int: :write_int, long: :write_long, float: :write_float }
95 | @@sizes = { int: 4, long: FFI::TypeDefs[:long].size, float: 4 }
96 |
97 | end
98 |
99 | end # module
100 | end # module
101 |
--------------------------------------------------------------------------------
/sgc/lib/cuda/runtime/device.rb:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2010 Chung Shin Yee
3 | #
4 | # shinyee@speedgocomputing.com
5 | # http://www.speedgocomputing.com
6 | # http://github.com/xman/sgc-ruby-cuda
7 | # http://rubyforge.org/projects/rubycuda
8 | #
9 | # This file is part of SGC-Ruby-CUDA.
10 | #
11 | # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with SGC-Ruby-CUDA. If not, see .
23 | #
24 |
25 | require 'cuda/runtime/ffi-cuda'
26 | require 'cuda/runtime/error'
27 |
28 |
29 | module SGC
30 | module Cuda
31 |
32 | class CudaDevice
33 |
34 | def self.count
35 | p = FFI::MemoryPointer.new(:int)
36 | status = API::cudaGetDeviceCount(p)
37 | Pvt::handle_error(status)
38 | p.read_int
39 | end
40 |
41 |
42 | def self.get
43 | p = FFI::MemoryPointer.new(:int)
44 | status = API::cudaGetDevice(p)
45 | Pvt::handle_error(status)
46 | p.read_int
47 | end
48 | class << self; alias_method :current, :get; end
49 |
50 |
51 | def self.set(devid)
52 | status = API::cudaSetDevice(devid)
53 | Pvt::handle_error(status)
54 | self
55 | end
56 | class << self; alias_method :current=, :set; end
57 |
58 |
59 | def self.choose(prop)
60 | pdev = FFI::MemoryPointer.new(:int)
61 | status = API::cudaChooseDevice(pdev, prop.to_ptr)
62 | Pvt::handle_error(status)
63 | pdev.read_int
64 | end
65 |
66 |
67 | def self.properties(devid = self.get)
68 | prop = API::CudaDeviceProp.new
69 | status = API::cudaGetDeviceProperties(prop.to_ptr, devid)
70 | Pvt::handle_error(status)
71 | prop
72 | end
73 |
74 |
75 | def self.flags=(flags)
76 | if flags.is_a?(Symbol)
77 | flags = CudaDeviceFlags[flags]
78 | end
79 |
80 | status = API::cudaSetDeviceFlags(flags)
81 | Pvt::handle_error(status)
82 | flags
83 | end
84 |
85 |
86 | def self.valid_devices=(devs)
87 | p = FFI::MemoryPointer.new(:int, devs.count)
88 | devs.each_with_index do |devid, i|
89 | p[i].write_int(devid)
90 | end
91 | status = API::cudaSetValidDevices(p, devs.count)
92 | Pvt::handle_error(status)
93 | devs
94 | end
95 |
96 | end
97 |
98 | end # module
99 | end # module
100 |
--------------------------------------------------------------------------------
/sgc/lib/cuda/runtime/event.rb:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2010 Chung Shin Yee
3 | #
4 | # shinyee@speedgocomputing.com
5 | # http://www.speedgocomputing.com
6 | # http://github.com/xman/sgc-ruby-cuda
7 | # http://rubyforge.org/projects/rubycuda
8 | #
9 | # This file is part of SGC-Ruby-CUDA.
10 | #
11 | # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with SGC-Ruby-CUDA. If not, see .
23 | #
24 |
25 | require 'cuda/runtime/ffi-cuda'
26 | require 'cuda/runtime/error'
27 |
28 |
29 | module SGC
30 | module Cuda
31 |
32 | class CudaEvent
33 |
34 | def initialize
35 | @p = FFI::MemoryPointer.new(:pointer)
36 | end
37 |
38 |
39 | def create(flags = CUDA_EVENT_DEFAULT)
40 | if flags == CUDA_EVENT_DEFAULT
41 | status = API::cudaEventCreate(@p)
42 | else
43 | flags = CudaEventFlags[flags] if flags.is_a?(Symbol)
44 | status = API::cudaEventCreateWithFlags(@p, flags)
45 | end
46 | Pvt::handle_error(status)
47 | self
48 | end
49 |
50 |
51 | def destroy
52 | status = API::cudaEventDestroy(@p.read_pointer)
53 | Pvt::handle_error(status)
54 | @p.write_pointer(0)
55 | nil
56 | end
57 |
58 |
59 | def query
60 | status = API::cudaEventQuery(@p.read_pointer)
61 | if status == Pvt::CUDA_SUCCESS
62 | return true
63 | elsif status == Pvt::CUDA_ERROR_NOT_READ
64 | return false
65 | end
66 | Pvt::handle_error(status)
67 | self
68 | end
69 |
70 |
71 | def record(stream = 0)
72 | if stream == 0
73 | p = FFI::MemoryPointer.new(:pointer)
74 | p.write_pointer(0)
75 | stream = p.read_pointer
76 | else
77 | stream = stream.to_ptr
78 | end
79 | status = API::cudaEventRecord(@p.read_pointer, stream)
80 | Pvt::handle_error(status)
81 | self
82 | end
83 |
84 |
85 | def synchronize
86 | status = API::cudaEventSynchronize(@p.read_pointer)
87 | Pvt::handle_error(status)
88 | self
89 | end
90 |
91 |
92 | def to_ptr
93 | @p.read_pointer
94 | end
95 |
96 |
97 | def self.elapsed_time(event_start, event_end)
98 | t = FFI::MemoryPointer.new(:float)
99 | API::cudaEventElapsedTime(t, event_start.to_ptr, event_end.to_ptr)
100 | t.read_float
101 | end
102 |
103 | protected
104 |
105 | CUDA_EVENT_DEFAULT = CudaEventFlags[:cudaEventDefault]
106 |
107 | end
108 |
109 | end # module
110 | end # module
111 |
--------------------------------------------------------------------------------
/sgc/lib/cuda/ruby/cu.rb:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2010 Chung Shin Yee
2 | #
3 | # shinyee@speedgocomputing.com
4 | # http://www.speedgocomputing.com
5 | # http://github.com/xman/sgc-ruby-cuda
6 | # http://rubyforge.org/projects/rubycuda
7 | #
8 | # This file is part of SGC-Ruby-CUDA.
9 | #
10 | # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
11 | # it under the terms of the GNU General Public License as published by
12 | # the Free Software Foundation, either version 3 of the License, or
13 | # (at your option) any later version.
14 | #
15 | # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | # GNU General Public License for more details.
19 | #
20 | # You should have received a copy of the GNU General Public License
21 | # along with SGC-Ruby-CUDA. If not, see .
22 |
23 |
24 | module SGC
25 | module CU
26 |
27 |
28 | class CUDevice
29 |
30 | # See CUDevice::get_count.
31 | def self.count
32 | self.get_count
33 | end
34 |
35 | # See CUDevice#get_name.
36 | def name
37 | get_name
38 | end
39 |
40 | # See CUDevice#get_attribute.
41 | def attribute(attr)
42 | get_attribute(attr)
43 | end
44 |
45 | # See CUDevice#get_properties.
46 | def properties
47 | get_properties
48 | end
49 |
50 | end
51 |
52 |
53 | class CUContext
54 |
55 | # See CUContext::get_device.
56 | def self.device
57 | self.get_device
58 | end
59 |
60 | # See CUContext::get_limit.
61 | def self.limit(lim)
62 | get_limit(lim)
63 | end
64 |
65 | # See CUContext::get_cache_config.
66 | def self.cache_config
67 | get_cache_config
68 | end
69 |
70 | # See CUContext#get_api_version.
71 | def api_version
72 | get_api_version
73 | end
74 |
75 | end
76 |
77 |
78 | class CUModule
79 |
80 | # See CUModule#get_function.
81 | def function(name_str)
82 | get_function(name_str)
83 | end
84 |
85 | # See CUModule#get_global.
86 | def global(name_str)
87 | get_global(name_str)
88 | end
89 |
90 | # See CUModule#get_texref.
91 | def texref(name_str)
92 | get_texref(name_str)
93 | end
94 |
95 | end
96 |
97 |
98 | class CUFunction
99 |
100 | # See CUFunction#get_attribute.
101 | def attribute(attr)
102 | get_attribute(attr)
103 | end
104 |
105 | end
106 |
107 |
108 | class CUTexRef
109 |
110 | # See CUTexRef#get_address.
111 | def address
112 | get_address
113 | end
114 |
115 | # See CUTexRef#get_address_mode.
116 | def address_mode(dim)
117 | get_address_mode(dim)
118 | end
119 |
120 | # See CUTexRef#get_filter_mode.
121 | def filter_mode
122 | get_filter_mode
123 | end
124 |
125 | # See CUTexRef#get_flags.
126 | def flags
127 | get_flags
128 | end
129 |
130 | end
131 |
132 |
133 | # See ::driver_get_version.
134 | def driver_version
135 | driver_get_version
136 | end
137 | module_function :driver_version
138 |
139 |
140 | end # module
141 | end # module
142 |
--------------------------------------------------------------------------------
/sgc/lib/cuda/runtime/function.rb:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2010 Chung Shin Yee
3 | #
4 | # shinyee@speedgocomputing.com
5 | # http://www.speedgocomputing.com
6 | # http://github.com/xman/sgc-ruby-cuda
7 | # http://rubyforge.org/projects/rubycuda
8 | #
9 | # This file is part of SGC-Ruby-CUDA.
10 | #
11 | # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with SGC-Ruby-CUDA. If not, see .
23 | #
24 |
25 | require 'cuda/runtime/ffi-cuda'
26 | require 'cuda/runtime/cuda'
27 | require 'cuda/runtime/error'
28 | require 'memory/pointer'
29 | require 'dl'
30 |
31 |
32 | module SGC
33 | module Cuda
34 |
35 | class CudaFunction
36 |
37 | attr_reader :name
38 |
39 |
40 | def initialize(name)
41 | @name = name
42 | end
43 |
44 |
45 | def attributes
46 | a = CudaFuncAttributes.new
47 | status = API::cudaFuncGetAttributes(a.to_ptr, @name)
48 | Pvt::handle_error(status)
49 | a
50 | end
51 |
52 |
53 | def cache_config=(config)
54 | status = API::cudaFuncSetCacheConfig(@name, config)
55 | Pvt::handle_error(status)
56 | config
57 | end
58 |
59 |
60 | def launch
61 | status = API::cudaLaunch(@name)
62 | Pvt::handle_error(status)
63 | self
64 | end
65 |
66 |
67 | def self.configure(grid_dim, block_dim, shared_mem_size = 0, stream = 0)
68 | status = API::cudaConfigureCall(grid_dim, block_dim, shared_mem_size, stream)
69 | Pvt::handle_error(status)
70 | self
71 | end
72 |
73 |
74 | def self.setup(*args)
75 | offset = 0
76 | args.each do |x|
77 | case x
78 | when Fixnum
79 | p = FFI::MemoryPointer.new(:int)
80 | p.write_int(x)
81 | size = 4
82 | when Float
83 | p = FFI::MemoryPointer.new(:float)
84 | p.write_float(x)
85 | size = 4
86 | when SGC::Memory::MemoryPointer
87 | p = x.ref
88 | size = FFI::MemoryPointer.size
89 | else
90 | raise TypeError, "Invalid type of argument #{x.to_s}."
91 | end
92 | offset = align_up(offset, size)
93 | status = API::cudaSetupArgument(p, size, offset)
94 | Pvt::handle_error(status)
95 | offset += size
96 | end
97 | end
98 |
99 |
100 | def self.load_lib(name)
101 | raise NotImplementedError
102 | end
103 |
104 |
105 | def self.load_lib_file(name)
106 | @@libs << DL::dlopen(name)
107 | # API::ffi_lib(name)
108 | self
109 | end
110 |
111 |
112 | def self.unload_all_libs
113 | @@libs.each do |h|
114 | h.close
115 | end
116 | @@libs = []
117 | self
118 | end
119 |
120 | protected
121 |
122 | def self.align_up(offset, alignment)
123 | (offset + alignment - 1) & ~(alignment - 1)
124 | end
125 |
126 | @@libs = []
127 |
128 | end
129 |
130 | end # module
131 | end # module
132 |
--------------------------------------------------------------------------------
/sgc/lib/madison/kernel/test.cu:
--------------------------------------------------------------------------------
1 | #include
2 | //#include
3 | #include
4 | #include
5 | //#include
6 |
7 | #define DIMENSIONS 5
8 | #define BLOCK_SIZE 16
9 |
10 |
11 | // Kernel definition
12 | //__global__ void MatAdd(float A[N][N], float B[N][N],
13 | // float C[N][N])
14 | //{
15 | // int i = threadIdx.x;
16 | // int j = threadIdx.y;
17 | // C[i][j] = A[i][j] + B[i][j];
18 | //}
19 |
20 | // Matrices are stored in row-major order:
21 | // M(row, col) = *(M.elements + row * M.width + col)
22 |
23 | __global__ void MatPopulate(float *A, int count)
24 | {
25 | int row = blockIdx.x;
26 | int col = threadIdx.x;
27 | A[row * DIMENSIONS + col] = (float)(row * DIMENSIONS + col)/(DIMENSIONS*count);
28 | }
29 |
30 | float score(float *A, float *B){
31 | float score = 0.0;
32 | for(int i=0; i>>(d_elements, count);
73 | cudaMemcpy(elements, d_elements, size,
74 | cudaMemcpyDeviceToHost);
75 | for(int i=0; i 2 && !strcmp(argv[2], "raw")){
89 | printf("\nraw\n");
90 | float _score;
91 | for(int i=0; i>>(d_elements, d_scores, count);
107 | cudaMemcpy(scores, d_scores, size2,
108 | cudaMemcpyDeviceToHost);
109 | }
110 | float sum = 0.0;
111 | for (int i=0;i Cluster ##{c}"
49 | CudaMemory.memcpy_htod(@values_dev_1, self.values.offset(c*offset_increment), offset_increment * @type_size)
50 | CudaMemory.memcpy_htod(@keys_dev_1, self.keys.offset(c*offset_increment), offset_increment * INTEGER_SIZE)
51 |
52 | (0...other_clusters_count).each do |cc|
53 | self.class.log ">> with Cluster ##{cc}"
54 | compare_cluster_with(matrix, c, cc, CLUSTER_SIZE, CLUSTER_SIZE)
55 | end
56 | # We have to handle the leftovers => if we have 66 blocks and CLUSTER_SIZE == 64, we have to handle 2 blocks separately
57 | if other_leftovers_count > 0
58 | self.class.log ">> with the leftovers"
59 | compare_cluster_with(matrix, c, other_clusters_count, CLUSTER_SIZE, other_leftovers_count)
60 | end
61 | end
62 | if self_leftovers_count > 0
63 | self.class.log "\n> The leftovers"
64 | c = self_clusters_count
65 | CudaMemory.memcpy_htod(@values_dev_1, self.values.offset(c*offset_increment), self_leftovers_count * BLOCK_SIZE * self.vectors_dimension * @type_size)
66 | CudaMemory.memcpy_htod(@keys_dev_1, self.keys.offset(c*offset_increment), self_leftovers_count * BLOCK_SIZE * self.vectors_dimension * INTEGER_SIZE)
67 |
68 | (0...other_clusters_count).each do |cc|
69 | self.class.log ">> with Cluster ##{cc}"
70 | compare_cluster_with(matrix, self_clusters_count, cc, self_leftovers_count, CLUSTER_SIZE)
71 | end
72 | # We have to handle the leftovers => if we have 66 blocks and CLUSTER_SIZE == 64, we have to handle 2 blocks separately
73 | if other_leftovers_count > 0
74 | self.class.log ">> with the leftovers"
75 | compare_cluster_with(matrix, self_clusters_count, other_clusters_count, self_leftovers_count, other_leftovers_count)
76 | end
77 | end
78 | end
79 |
80 | def compare_cluster_with(matrix, cluster, offset, current_cluster_size, size)
81 | puts [matrix.inspect, cluster, offset, current_cluster_size, size]
82 | puts matrix.inspect
83 | puts size * BLOCK_SIZE * self.vectors_dimension * @type_size
84 | CudaMemory.memcpy_htod(@values_dev_2, matrix.values.offset(offset * CLUSTER_SIZE * BLOCK_SIZE * self.vectors_dimension), size * BLOCK_SIZE * self.vectors_dimension * @type_size)
85 | CudaMemory.memcpy_htod(@keys_dev_2, matrix.keys.offset(offset * CLUSTER_SIZE * BLOCK_SIZE * self.vectors_dimension), size * BLOCK_SIZE * self.vectors_dimension * INTEGER_SIZE)
86 |
87 | CudaFunction.configure(Dim3.new(current_cluster_size, 1, 1), Dim3.new(BLOCK_SIZE, 1, 1))
88 | CudaFunction.setup(@values_dev_1, @values_dev_2, @keys_dev_1, @keys_dev_2, @scores_dev, size * BLOCK_SIZE)
89 | f = CudaFunction.new("ParallelScore")
90 | f.launch
91 | CudaMemory.memcpy_dtoh(@scores, @scores_dev, @scores_size * @type_size)
92 | # @scores.each do |s| puts s end
93 |
94 | $stderr.puts "#{cluster * CLUSTER_SIZE * BLOCK_SIZE} .. #{(cluster) * BLOCK_SIZE * CLUSTER_SIZE + current_cluster_size * BLOCK_SIZE - 1} x #{offset * CLUSTER_SIZE * BLOCK_SIZE} .. #{offset * CLUSTER_SIZE * BLOCK_SIZE + size * BLOCK_SIZE - 1}"
95 | self.class.output_scores(current_cluster_size * BLOCK_SIZE, size * BLOCK_SIZE, cluster * CLUSTER_SIZE * BLOCK_SIZE, offset * CLUSTER_SIZE * BLOCK_SIZE, @scores)
96 | end
97 |
98 | def prepare_kernel_lib
99 | kernel_dir = "#{File.dirname(__FILE__)}/kernel"
100 | File.open("#{kernel_dir}/kernel.h", 'w') do |f|
101 | f.write "#define DIMENSIONS #{self.vectors_dimension}\n"
102 | f.write "#define BLOCK_SIZE #{BLOCK_SIZE}\n"
103 | f.write "#define CLUSTER_SIZE #{CLUSTER_SIZE}\n"
104 | end
105 | system "cd #{kernel_dir}; rm libkernel.*.so;nvcc -shared -Xcompiler -fPIC kernel.cu -o libkernel.#{self.vectors_dimension}.so"
106 | "#{kernel_dir}/libkernel.#{self.vectors_dimension}.so"
107 | end
108 | end
109 |
110 | module ClassMethods
111 |
112 | def log message
113 | $stderr.puts message
114 | end
115 |
116 | def output_scores rows, cols, offset_x, offset_y, score
117 | (0...rows).each do |i|
118 | (0...cols).each do |j|
119 | real_i = offset_x + i
120 | real_j = offset_y + j
121 | puts "#{real_i}\t #{real_j}\t %.3f\n" % (score.is_a?(SGC::Memory::Buffer) ? score[i * cols + j] : score)
122 | end
123 | end
124 | end
125 | end
126 | end
127 | end
--------------------------------------------------------------------------------
/sgc/lib/cuda/runtime/ffi-cuda.rb:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2010 Chung Shin Yee
3 | #
4 | # shinyee@speedgocomputing.com
5 | # http://www.speedgocomputing.com
6 | # http://github.com/xman/sgc-ruby-cuda
7 | # http://rubyforge.org/projects/rubycuda
8 | #
9 | # This file is part of SGC-Ruby-CUDA.
10 | #
11 | # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with SGC-Ruby-CUDA. If not, see .
23 | #
24 |
25 | require 'ffi'
26 | require 'ffi/prettystruct'
27 |
28 |
29 | module SGC
30 | module Cuda
31 | module API
32 |
33 | extend FFI::Library
34 | ffi_lib "cudart"
35 |
36 | CudaError = enum(
37 | :cudaSuccess, 0,
38 | :cudaErrorMissingConfiguration, 1,
39 | :cudaErrorMemoryAllocation, 2,
40 | :cudaErrorInitializationError, 3,
41 | :cudaErrorLaunchFailure, 4,
42 | :cudaErrorPriorLaunchFailure, 5,
43 | :cudaErrorLaunchTimeout, 6,
44 | :cudaErrorLaunchOutOfResources, 7,
45 | :cudaErrorInvalidDeviceFunction, 8,
46 | :cudaErrorInvalidConfiguration, 9,
47 | :cudaErrorInvalidDevice, 10,
48 | :cudaErrorInvalidValue, 11,
49 | :cudaErrorInvalidPitchValue, 12,
50 | :cudaErrorInvalidSymbol, 13,
51 | :cudaErrorMapBufferObjectFailed, 14,
52 | :cudaErrorUnmapBufferObjectFailed, 15,
53 | :cudaErrorInvalidHostPointer, 16,
54 | :cudaErrorInvalidDevicePointer, 17,
55 | :cudaErrorInvalidTexture, 18,
56 | :cudaErrorInvalidTextureBinding, 19,
57 | :cudaErrorInvalidChannelDescriptor, 20,
58 | :cudaErrorInvalidMemcpyDirection, 21,
59 | :cudaErrorAddressOfConstant, 22,
60 | :cudaErrorTextureFetchFailed, 23,
61 | :cudaErrorTextureNotBound, 24,
62 | :cudaErrorSynchronizationError, 25,
63 | :cudaErrorInvalidFilterSetting, 26,
64 | :cudaErrorInvalidNormSetting, 27,
65 | :cudaErrorMixedDeviceExecution, 28,
66 | :cudaErrorCudartUnloading, 29,
67 | :cudaErrorUnknown, 30,
68 | :cudaErrorNotYetImplemented, 31,
69 | :cudaErrorMemoryValueTooLarge, 32,
70 | :cudaErrorInvalidResourceHandle, 33,
71 | :cudaErrorNotReady, 34,
72 | :cudaErrorInsufficientDriver, 35,
73 | :cudaErrorSetOnActiveProcess, 36,
74 | :cudaErrorInvalidSurface, 37,
75 | :cudaErrorNoDevice, 38,
76 | :cudaErrorECCUncorrectable, 39,
77 | :cudaErrorSharedObjectSymbolNotFound, 40,
78 | :cudaErrorSharedObjectInitFailed, 41,
79 | :cudaErrorUnsupportedLimit, 42,
80 | :cudaErrorDuplicateVariableName, 43,
81 | :cudaErrorDuplicateTextureName, 44,
82 | :cudaErrorDuplicateSurfaceName, 45,
83 | :cudaErrorDevicesUnavailable, 46,
84 | :cudaErrorInvalidKernelImage, 47,
85 | :cudaErrorNoKernelImageForDevice, 48,
86 | :cudaErrorIncompatibleDriverContext, 49,
87 | :cudaErrorStartupFailure, 0x7F,
88 | :cudaErrorApiFailureBase, 10000,
89 | )
90 | CudaError_t = CudaError
91 |
92 | CudaDeviceFlags = enum(
93 | :cudaDeviceScheduleAuto, 0,
94 | :cudaDeviceScheduleSpin, 1,
95 | :cudaDeviceScheduleYield, 2,
96 | :cudaDeviceBlockingSync, 4,
97 | :cudaDeviceMapHost, 8,
98 | :cudaDeviceLmemResizeToMax, 16,
99 | )
100 |
101 | CudaEventFlags = enum(
102 | :cudaEventDefault, 0,
103 | :cudaEventBlockingSync, 1,
104 | :cudaEventDisableTiming, 2,
105 | )
106 |
107 | CudaHostAllocFlags = enum(
108 | :cudaHostAllocDefault, 0,
109 | :cudaHostAllocPortable, 1,
110 | :cudaHostAllocMapped, 2,
111 | :cudaHostAllocWriteCombined, 4,
112 | )
113 |
114 | CudaArrayFlags = enum(
115 | :cudaArrayDefault, 0x00,
116 | :cudaArraySurfaceLoadStore, 0x02,
117 | )
118 |
119 | CudaMemcpyKind = enum(
120 | :cudaMemcpyHostToHost, 0,
121 | :cudaMemcpyHostToDevice, 1,
122 | :cudaMemcpyDeviceToHost, 2,
123 | :cudaMemcpyDeviceToDevice, 3,
124 | )
125 |
126 | CudaChannelFormatKind = enum(
127 | :cudaChannelFormatKindSigned, 0,
128 | :cudaChannelFormatKindUnsigned, 1,
129 | :cudaChannelFormatKindFloat, 2,
130 | :cudaChannelFormatKindNone,3,
131 | )
132 |
133 | CudaFuncCache = enum(
134 | :cudaFuncCachePreferNone, 0,
135 | :cudaFuncCachePreferShared, 1,
136 | :cudaFuncCachePreferL1, 2,
137 | )
138 |
139 | CudaLimit = enum(
140 | :cudaLimitStackSize, 0x00,
141 | :cudaLimitPrintfFifoSize, 0x01,
142 | :cudaLimitMallocHeapSize, 0x02,
143 | )
144 |
145 | CudaComputeMode = enum(
146 | :cudaComputeModeDefault, 0,
147 | :cudaComputeModeExclusive, 1,
148 | :cudaComputeModeProhibited, 2,
149 | )
150 |
151 | CudaSurfaceBoundaryMode = enum(
152 | :cudaBoundaryModeZero, 0,
153 | :cudaBoundaryModeClamp, 1,
154 | :cudaBoundaryModeTrap, 2,
155 | )
156 |
157 | CudaSurfaceFormatMode = enum(
158 | :cudaFormatModeForced, 0,
159 | :cudaFormatModeAuto, 1,
160 | )
161 |
162 | CudaTextureAddressMode = enum(
163 | :cudaAddressModeWrap, 0,
164 | :cudaAddressModeClamp, 1,
165 | :cudaAddressModeMirror, 2,
166 | :cudaAddressModeBorder, 3,
167 | )
168 |
169 | CudaTextureFilterMode = enum(
170 | :cudaFilterModePoint, 0,
171 | :cudaFilterModeLinear, 1,
172 | )
173 |
174 | CudaTextureReadMode = enum(
175 | :cudaReadModeElementType, 0,
176 | :cudaReadModeNormalizedFloat, 1,
177 | )
178 |
179 | typedef :pointer, :CudaStream
180 | typedef :pointer, :CudaEvent
181 |
182 | typedef :CudaStream, :CudaStream_t
183 | typedef :CudaEvent, :CudaEvent_t
184 |
185 |
186 | class Dim3 < FFI::Struct
187 | layout(
188 | :array, [:uint, 3],
189 | )
190 |
191 | alias :init :initialize
192 | alias :get :[]
193 | alias :set :[]=
194 | private :init, :get, :set
195 |
196 | def initialize(x, y, z)
197 | init
198 | @array = get(:array)
199 | @array[0], @array[1], @array[2] = x, y, z
200 | end
201 |
202 | def [](index); @array[index]; end
203 | def []=(index, value); @array[index] = value; end
204 |
205 | def x; @array[0]; end
206 | def y; @array[1]; end
207 | def z; @array[2]; end
208 |
209 | def x=(value); @array[0] = value; end
210 | def y=(value); @array[1] = value; end
211 | def z=(value); @array[2] = value; end
212 |
213 | end
214 |
215 | class CudaDeviceProp < FFI::PrettyStruct
216 | layout(
217 | :name, [:char, 256],
218 | :totalGlobalMem, :size_t,
219 | :sharedMemPerBlock, :size_t,
220 | :regsPerBlock, :int,
221 | :warpSize, :int,
222 | :memPitch, :size_t,
223 | :maxThreadsPerBlock, :int,
224 | :maxThreadsDim, [:int, 3],
225 | :maxGridSize, [:int, 3],
226 | :clockRate, :int,
227 | :totalConstMem, :size_t,
228 | :major, :int,
229 | :minor, :int,
230 | :textureAlignment, :size_t,
231 | :deviceOverlap, :int,
232 | :multiProcessorCount, :int,
233 | :kernelExecTimeoutEnabled, :int,
234 | :integrated, :int,
235 | :canMapHostMemory, :int,
236 | :computeMode, :int,
237 | :maxTexture1D, :int,
238 | :maxTexture2D, [:int, 2],
239 | :maxTexture3D, [:int, 3],
240 | :maxTexture2DArray, [:int, 3],
241 | :surfaceAlignment, :size_t,
242 | :concurrentKernels, :int,
243 | :ECCEnabled, :int,
244 | :pciBusID, :int,
245 | :__cudaReserved, [:int, 21],
246 | )
247 | end
248 |
249 | class CudaFuncAttributes < FFI::PrettyStruct
250 | layout(
251 | :sharedSizeBytes, :size_t,
252 | :constSizeBytes, :size_t,
253 | :localSizeBytes, :size_t,
254 | :maxThreadsPerBlock, :int,
255 | :numRegs, :int,
256 | :ptxVersion, :int,
257 | :binaryVersion, :int,
258 | :__cudaReserved, [:int, 6],
259 | )
260 | end
261 |
262 | class CudaChannelFormatDesc < FFI::PrettyStruct
263 | layout(
264 | :x, :int,
265 | :y, :int,
266 | :z, :int,
267 | :w, :int,
268 | :f, CudaChannelFormatKind,
269 | )
270 | end
271 |
272 | class CudaPitchedPtr < FFI::PrettyStruct
273 | layout(
274 | :ptr, :pointer,
275 | :pitch, :size_t,
276 | :xsize, :size_t,
277 | :ysize, :size_t,
278 | )
279 | end
280 |
281 | class CudaPos < FFI::PrettyStruct
282 | layout(
283 | :x, :size_t,
284 | :y, :size_t,
285 | :z, :size_t,
286 | )
287 | end
288 |
289 | class CudaExtent < FFI::PrettyStruct
290 | layout(
291 | :width, :size_t,
292 | :height, :size_t,
293 | :depth, :size_t,
294 | )
295 | end
296 |
297 | class CudaMemcpy3DParms < FFI::PrettyStruct
298 | layout(
299 | :srcArray, :pointer,
300 | :srcPos, CudaPos,
301 | :srcPtr, CudaPitchedPtr,
302 | :dstArray, :pointer,
303 | :dstPos, CudaPos,
304 | :dstPtr, CudaPitchedPtr,
305 | :extent, CudaExtent,
306 | :kind, CudaMemcpyKind,
307 | )
308 | end
309 |
310 | class TextureReference < FFI::PrettyStruct
311 | layout(
312 | :normalized, :int,
313 | :filterMode, CudaTextureFilterMode,
314 | :addressMode, [CudaTextureAddressMode, 3],
315 | :channelDesc, CudaChannelFormatDesc,
316 | :__cudaReserved, [:int, 16],
317 | )
318 | end
319 |
320 | class SurfaceReference < FFI::PrettyStruct
321 | layout(
322 | :channelDesc, CudaChannelFormatDesc,
323 | )
324 | end
325 |
326 | # CUDA Version Management.
327 | attach_function :cudaDriverGetVersion, [:pointer], :int
328 | attach_function :cudaRuntimeGetVersion, [:pointer], :int
329 |
330 | # CUDA Error Handling.
331 | attach_function :cudaGetErrorString, [CudaError], :string
332 | attach_function :cudaGetLastError, [], :int
333 | attach_function :cudaPeekAtLastError, [], :int
334 |
335 | # CUDA Device Management.
336 | attach_function :cudaChooseDevice, [:pointer, :pointer], :int
337 | attach_function :cudaGetDevice, [:pointer], :int
338 | attach_function :cudaGetDeviceCount, [:pointer], :int
339 | attach_function :cudaGetDeviceProperties, [:pointer, :int], :int
340 | attach_function :cudaSetDevice, [:int], :int
341 | attach_function :cudaSetDeviceFlags, [:uint], :int
342 | attach_function :cudaSetValidDevices, [:pointer, :int], :int
343 |
344 | # CUDA Thread Management.
345 | attach_function :cudaThreadExit, [], :int
346 | attach_function :cudaThreadGetCacheConfig, [:pointer], :int
347 | attach_function :cudaThreadGetLimit, [:pointer, CudaLimit], :int
348 | attach_function :cudaThreadSetCacheConfig, [CudaFuncCache], :int
349 | attach_function :cudaThreadSetLimit, [CudaLimit, :size_t], :int
350 | attach_function :cudaThreadSynchronize, [], :int
351 |
352 | # CUDA Memory Management.
353 | attach_function :cudaFree, [:pointer], :int
354 | attach_function :cudaFreeArray, [:pointer], :int
355 | attach_function :cudaFreeHost, [:pointer], :int
356 | attach_function :cudaGetSymbolAddress, [:pointer, :string], :int
357 | attach_function :cudaGetSymbolSize, [:pointer, :string], :int
358 | attach_function :cudaHostAlloc, [:pointer, :size_t, :uint], :int
359 | attach_function :cudaHostGetDevicePointer, [:pointer, :pointer, :uint], :int
360 | attach_function :cudaHostGetFlags, [:pointer, :pointer], :int
361 | attach_function :cudaMalloc, [:pointer, :size_t], :int
362 | attach_function :cudaMalloc3D, [:pointer, CudaExtent.by_value], :int
363 | attach_function :cudaMalloc3DArray, [:pointer, :pointer, CudaExtent.by_value, :uint], :int
364 | attach_function :cudaMallocArray, [:pointer, :pointer, :size_t, :size_t, :uint], :int
365 | attach_function :cudaMallocHost, [:pointer, :size_t], :int
366 | attach_function :cudaMallocPitch, [:pointer, :pointer, :size_t, :size_t], :int
367 | attach_function :cudaMemcpy, [:pointer, :pointer, :size_t, CudaMemcpyKind], :int
368 | attach_function :cudaMemcpy2D, [:pointer, :size_t, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind], :int
369 | attach_function :cudaMemcpy2DArrayToArray, [:pointer, :size_t, :size_t, :pointer, :size_t, :size_t, :size_t, :size_t, CudaMemcpyKind], :int
370 | attach_function :cudaMemcpy2DAsync, [:pointer, :size_t, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :int
371 | attach_function :cudaMemcpy2DFromArray, [:pointer, :size_t, :pointer, :size_t, :size_t, :size_t, :size_t, CudaMemcpyKind], :int
372 | attach_function :cudaMemcpy2DFromArrayAsync, [:pointer, :size_t, :pointer, :size_t, :size_t, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :int
373 | attach_function :cudaMemcpy2DToArray, [:pointer, :size_t, :size_t, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind], :int
374 | attach_function :cudaMemcpy2DToArrayAsync, [:pointer, :size_t, :size_t, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :int
375 | attach_function :cudaMemcpy3D, [:pointer], :int
376 | attach_function :cudaMemcpy3DAsync, [:pointer, :CudaStream], :int
377 | attach_function :cudaMemcpyArrayToArray, [:pointer, :size_t, :size_t, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind], :int
378 | attach_function :cudaMemcpyAsync, [:pointer, :pointer, :size_t, CudaMemcpyKind, :CudaStream], :int
379 | attach_function :cudaMemcpyFromArray, [:pointer, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind], :int
380 | attach_function :cudaMemcpyFromArrayAsync, [:pointer, :pointer, :size_t, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :int
381 | attach_function :cudaMemcpyFromSymbol, [:pointer, :string, :size_t, :size_t, CudaMemcpyKind], :int
382 | attach_function :cudaMemcpyFromSymbolAsync, [:pointer, :string, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :int
383 | attach_function :cudaMemcpyToArray, [:pointer, :size_t, :size_t, :pointer, :size_t, CudaMemcpyKind], :int
384 | attach_function :cudaMemcpyToArrayAsync, [:pointer, :size_t, :size_t, :pointer, :size_t, CudaMemcpyKind, :CudaStream], :int
385 | attach_function :cudaMemcpyToSymbol, [:string, :pointer, :size_t, :size_t, CudaMemcpyKind], :int
386 | attach_function :cudaMemcpyToSymbolAsync, [:string, :pointer, :size_t, :size_t, CudaMemcpyKind, :CudaStream], :int
387 | attach_function :cudaMemGetInfo, [:pointer, :pointer], :int
388 | attach_function :cudaMemset, [:pointer, :int, :size_t], :int
389 | attach_function :cudaMemset2D, [:pointer, :size_t, :int, :size_t, :size_t], :int
390 | attach_function :cudaMemset2DAsync, [:pointer, :size_t, :int, :size_t, :size_t, :CudaStream], :int
391 | attach_function :cudaMemset3D, [CudaPitchedPtr.by_value, :int, CudaExtent.by_value], :int
392 | attach_function :cudaMemset3DAsync, [CudaPitchedPtr.by_value, :int, CudaExtent.by_value, :CudaStream], :int
393 | attach_function :cudaMemsetAsync, [:pointer, :int, :size_t, :CudaStream], :int
394 | # attach_function :make_cudaExtent, [:size_t, :size_t, :size_t], CudaExtent
395 | # attach_function :make_cudaPitchedPtr, [:pointer, :size_t, :size_t, :size_t], CudaPitchedPtr
396 | # attach_function :make_cudaPos, [:size_t, :size_t, :size_t], CudaPos
397 |
398 | def make_cudaExtent(w, h, d)
399 | e = CudaExtent.new
400 | e[:width], e[:height], e[:depth] = w, h, d
401 | e
402 | end
403 |
404 | def make_cudaPitchedPtr(d, p, xsz, ysz)
405 | s = CudaPitchedPtr.new
406 | s[:ptr] = d
407 | s[:pitch] = p
408 | s[:xsize] = xsz
409 | s[:ysize] = ysz
410 | s
411 | end
412 |
413 | def make_cudaPos(x, y, z)
414 | p = CudaPos.new
415 | p[:x] = x
416 | p[:y] = y
417 | p[:z] = z
418 | p
419 | end
420 |
421 | # CUDA Execution Control.
422 | attach_function :cudaConfigureCall, [Dim3.by_value, Dim3.by_value, :size_t, :uint], :int
423 | attach_function :cudaFuncGetAttributes, [:pointer, :string], :int
424 | attach_function :cudaFuncSetCacheConfig, [:string, CudaFuncCache], :int
425 | attach_function :cudaLaunch, [:string], :int
426 | attach_function :cudaSetDoubleForDevice, [:pointer], :int
427 | attach_function :cudaSetDoubleForHost, [:pointer], :int
428 | attach_function :cudaSetupArgument, [:pointer, :size_t, :size_t], :int
429 |
430 | # CUDA Stream Management.
431 | attach_function :cudaStreamCreate, [:pointer], :int
432 | attach_function :cudaStreamDestroy, [:CudaStream], :int
433 | attach_function :cudaStreamQuery, [:CudaStream], :int
434 | attach_function :cudaStreamSynchronize, [:CudaStream], :int
435 | attach_function :cudaStreamWaitEvent, [:CudaStream, :CudaEvent, :uint], :int
436 |
437 | # CUDA Event Management.
438 | attach_function :cudaEventCreate, [:pointer], :int
439 | attach_function :cudaEventCreateWithFlags, [:pointer, :uint], :int
440 | attach_function :cudaEventDestroy, [:CudaEvent], :int
441 | attach_function :cudaEventElapsedTime, [:pointer, :CudaEvent, :CudaEvent], :int
442 | attach_function :cudaEventQuery, [:CudaEvent], :int
443 | attach_function :cudaEventRecord, [:CudaEvent, :CudaStream], :int
444 | attach_function :cudaEventSynchronize, [:CudaEvent], :int
445 |
446 | # CUDA Texture Reference Management.
447 | attach_function :cudaBindTexture, [:pointer, :pointer, :pointer, :pointer, :size_t], :int
448 | attach_function :cudaBindTexture2D, [:pointer, :pointer, :pointer, :pointer, :size_t, :size_t, :size_t], :int
449 | attach_function :cudaBindTextureToArray, [:pointer, :pointer, :pointer], :int
450 | attach_function :cudaCreateChannelDesc, [:int, :int, :int, :int, CudaChannelFormatKind], CudaChannelFormatDesc.by_value
451 | attach_function :cudaGetChannelDesc, [:pointer, :pointer], :int
452 | attach_function :cudaGetTextureAlignmentOffset, [:pointer, :pointer], :int
453 | attach_function :cudaGetTextureReference, [:pointer, :string], :int
454 | attach_function :cudaUnbindTexture, [:pointer], :int
455 |
456 | # CUDA Surface Reference Management.
457 | attach_function :cudaBindSurfaceToArray, [:pointer, :pointer, :pointer], :int
458 | attach_function :cudaGetSurfaceReference, [:pointer, :string], :int
459 |
460 | end # module
461 | end # module
462 | end # module
463 |
--------------------------------------------------------------------------------
/sgc/lib/cuda/driver/rubycu.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | # Copyright (c) 2010 Chung Shin Yee
3 | #
4 | # shinyee@speedgocomputing.com
5 | # http://www.speedgocomputing.com
6 | # http://github.com/xman/sgc-ruby-cuda
7 | # http://rubyforge.org/projects/rubycuda
8 | #
9 | # This file is part of SGC-Ruby-CUDA.
10 | #
11 | # SGC-Ruby-CUDA is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # SGC-Ruby-CUDA is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with SGC-Ruby-CUDA. If not, see .
23 | */
24 |
25 | #include
26 | #include "ruby.h"
27 | #include "cuda.h"
28 |
29 | namespace SGC {
30 | namespace CU {
31 |
32 | // {{{ SGC Ruby modules.
33 | static VALUE rb_mSGC;
34 | static VALUE rb_mCU;
35 | static VALUE rb_mIBuffer;
36 | static VALUE rb_mIBufferClassMethods;
37 | // }}}
38 |
39 | // {{{ CUDA Ruby classes.
40 | static VALUE rb_cCUDevice;
41 | static VALUE rb_cCUContext;
42 | static VALUE rb_cCUContextFlags;
43 | static VALUE rb_cCULimit;
44 | static VALUE rb_cCUModule;
45 | static VALUE rb_cCUFunction;
46 | static VALUE rb_cCUFunctionAttribute;
47 | static VALUE rb_cCUFunctionCache;
48 | static VALUE rb_cCUDevicePtr;
49 | static VALUE rb_cCUDeviceAttribute;
50 | static VALUE rb_cCUComputeMode;
51 | static VALUE rb_cCUStream;
52 | static VALUE rb_cCUEvent;
53 | static VALUE rb_cCUEventFlags;
54 | static VALUE rb_cCUAddressMode;
55 | static VALUE rb_cCUFilterMode;
56 | static VALUE rb_cCUTexRefFlags;
57 | static VALUE rb_cCUTexRef;
58 | static VALUE rb_cCUResult;
59 | // }}}
60 |
61 | // {{{ SGC Ruby classes.
62 | static VALUE rb_eCUStandardError;
63 |
64 | static VALUE rb_eCUDeviceError;
65 | static VALUE rb_eCUDeviceNotInitializedError;
66 | static VALUE rb_eCUDeviceDeinitializedError;
67 | static VALUE rb_eCUNoDeviceError;
68 | static VALUE rb_eCUInvalidDeviceError;
69 |
70 | static VALUE rb_eCUMapError;
71 | static VALUE rb_eCUMapFailedError;
72 | static VALUE rb_eCUUnMapFailedError;
73 | static VALUE rb_eCUArrayIsMappedError;
74 | static VALUE rb_eCUAlreadyMappedError;
75 | static VALUE rb_eCUNotMappedError;
76 | static VALUE rb_eCUNotMappedAsArrayError;
77 | static VALUE rb_eCUNotMappedAsPointerError;
78 |
79 | static VALUE rb_eCUContextError;
80 | static VALUE rb_eCUInvalidContextError;
81 | static VALUE rb_eCUContextAlreadyCurrentError;
82 | static VALUE rb_eCUUnsupportedLimitError;
83 |
84 | static VALUE rb_eCULaunchError;
85 | static VALUE rb_eCULaunchFailedError;
86 | static VALUE rb_eCULaunchOutOfResourcesError;
87 | static VALUE rb_eCULaunchTimeoutError;
88 | static VALUE rb_eCULaunchIncompatibleTexturingError;
89 |
90 | static VALUE rb_eCUParameterError;
91 | static VALUE rb_eCUInvalidValueError;
92 | static VALUE rb_eCUInvalidHandleError;
93 |
94 | static VALUE rb_eCUMemoryError;
95 | static VALUE rb_eCUOutOfMemoryError;
96 |
97 | static VALUE rb_eCULibraryError;
98 | static VALUE rb_eCUSharedObjectSymbolNotFoundError;
99 | static VALUE rb_eCUSharedObjectInitFailedError;
100 |
101 | static VALUE rb_eCUHardwareError;
102 | static VALUE rb_eCUECCUncorrectableError;
103 |
104 | static VALUE rb_eCUFileError;
105 | static VALUE rb_eCUNoBinaryForGPUError;
106 | static VALUE rb_eCUFileNotFoundError;
107 | static VALUE rb_eCUInvalidSourceError;
108 | static VALUE rb_eCUInvalidImageError;
109 |
110 | static VALUE rb_eCUReferenceError;
111 | static VALUE rb_eCUReferenceNotFoundError;
112 |
113 | static VALUE rb_eCUOtherError;
114 | static VALUE rb_eCUAlreadyAcquiredError;
115 | static VALUE rb_eCUNotReadyError;
116 | static VALUE rb_eCUOperatingSystemError;
117 |
118 | static VALUE rb_eCUUnknownError;
119 |
120 | static VALUE rb_cMemoryPointer;
121 | static VALUE rb_cMemoryBuffer;
122 | static VALUE rb_cInt32Buffer;
123 | static VALUE rb_cInt64Buffer;
124 | static VALUE rb_cFloat32Buffer;
125 | static VALUE rb_cFloat64Buffer;
126 | // }}}
127 |
128 | // {{{ SGC C/C++ structures.
129 | typedef struct {
130 | char* p;
131 | } MemoryPointer;
132 |
133 | typedef struct : MemoryPointer {
134 | size_t size;
135 | bool is_page_locked;
136 | } MemoryBuffer;
137 |
138 | template
139 | struct TypedBuffer : public MemoryBuffer {};
140 |
141 | typedef struct TypedBuffer Int32Buffer;
142 | typedef struct TypedBuffer Int64Buffer;
143 | typedef struct TypedBuffer Float32Buffer;
144 | typedef struct TypedBuffer Float64Buffer;
145 | // }}}
146 |
147 | // {{{ Function prototypes.
148 | static VALUE device_ptr_alloc(VALUE klass);
149 | static VALUE device_ptr_initialize(int argc, VALUE* argv, VALUE self);
150 | // }}}
151 |
152 | // {{{ SGC helpers.
153 | template
154 | static void generic_free(void* p)
155 | {
156 | delete static_cast(p);
157 | }
158 |
159 | template
160 | static VALUE to_rb(T v);
161 |
162 | VALUE to_rb(bool b)
163 | {
164 | if (b) {
165 | return Qtrue;
166 | }
167 | return Qfalse;
168 | }
169 |
170 | template <>
171 | VALUE to_rb(int v)
172 | {
173 | return INT2FIX(v);
174 | }
175 |
176 | template <>
177 | VALUE to_rb(long v)
178 | {
179 | return LONG2NUM(v);
180 | }
181 |
182 | template <>
183 | VALUE to_rb(float v)
184 | {
185 | return DBL2NUM(static_cast(v));
186 | }
187 |
188 | template <>
189 | VALUE to_rb(double v)
190 | {
191 | return DBL2NUM(v);
192 | }
193 |
194 | template
195 | static T to_ctype(VALUE v);
196 |
197 | template <>
198 | bool to_ctype(VALUE b)
199 | {
200 | if (b == Qfalse || b == Qnil) {
201 | return false;
202 | }
203 | return true;
204 | }
205 |
206 | template <>
207 | int to_ctype(VALUE v)
208 | {
209 | return NUM2INT(v);
210 | }
211 |
212 | template <>
213 | unsigned int to_ctype(VALUE v)
214 | {
215 | return NUM2UINT(v);
216 | }
217 |
218 | template <>
219 | long to_ctype(VALUE v)
220 | {
221 | return NUM2LONG(v);
222 | }
223 |
224 | template <>
225 | unsigned long to_ctype(VALUE v)
226 | {
227 | return NUM2ULONG(v);
228 | }
229 |
230 | template <>
231 | float to_ctype(VALUE v)
232 | {
233 | return static_cast(NUM2DBL(v));
234 | }
235 |
236 | template <>
237 | double to_ctype(VALUE v)
238 | {
239 | return NUM2DBL(v);
240 | }
241 |
242 | // in ary[0]: Class contains class constants.
243 | // in ary[1]: Constant to match.
244 | // out ary[2]: Label matches with constant.
245 | static VALUE class_const_match(VALUE current_label, VALUE* ary)
246 | {
247 | const VALUE& rb_class_const = ary[0];
248 | const VALUE& constant_value = ary[1];
249 | VALUE& label = ary[2];
250 | VALUE v = rb_const_get(rb_class_const, SYM2ID(current_label));
251 | if (FIX2INT(v) == FIX2INT(constant_value)) {
252 | label = current_label;
253 | return Qtrue;
254 | }
255 | return Qfalse;
256 | }
257 |
258 | // Extend _klass_ with the module _mod::ClassMethods_.
259 | static VALUE module_included_classmethods_hook(VALUE mod, VALUE klass)
260 | {
261 | VALUE m = rb_cvar_get(mod, rb_intern("ClassMethods"));
262 | rb_extend_object(klass, m);
263 | return Qnil;
264 | }
265 |
266 | #define RAISE_CU_STD_ERROR_FORMATTED(status, format, ...) rb_raise(rb_hash_aref(rb_error_class_by_enum, INT2FIX(status)), "%s:%d " format, __FILE__, __LINE__, __VA_ARGS__)
267 | #define RAISE_CU_STD_ERROR(status, message) RAISE_CU_STD_ERROR_FORMATTED(status, "%s", message)
268 | // }}}
269 |
270 | // {{{ SGC Ruby data.
271 | static VALUE rb_error_class_by_enum;
272 | // }}}
273 |
274 |
275 | // {{{ CUdevice
276 |
277 | /* call-seq: CUDevice.get_count -> Fixnum
278 | *
279 | * Return the number of CUDA devices.
280 | */
281 | static VALUE device_get_count(VALUE klass)
282 | {
283 | int count;
284 | CUresult status = cuDeviceGetCount(&count);
285 | if (status != CUDA_SUCCESS) {
286 | RAISE_CU_STD_ERROR(status, "Failed to get device count.");
287 | }
288 | return INT2FIX(count);
289 | }
290 |
291 | /* call-seq: CUDevice.get(index) -> CUDevice
292 | *
293 | * Return a CUDevice instance corresponding to CUDA device _index_ (0..CUDevice.get_count-1).
294 | */
295 | static VALUE device_get(VALUE klass, VALUE num)
296 | {
297 | CUdevice* pdev;
298 | VALUE rb_pdev = rb_class_new_instance(0, NULL, rb_cCUDevice);
299 | Data_Get_Struct(rb_pdev, CUdevice, pdev);
300 | int i = FIX2INT(num);
301 | CUresult status = cuDeviceGet(pdev, i);
302 | if (status != CUDA_SUCCESS) {
303 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to get device %d.", i);
304 | }
305 | return rb_pdev;
306 | }
307 |
308 | static VALUE device_alloc(VALUE klass)
309 | {
310 | CUdevice* p = new CUdevice;
311 | return Data_Wrap_Struct(klass, 0, generic_free, p);
312 | }
313 |
314 | static VALUE device_initialize(int argc, VALUE* argv, VALUE self)
315 | {
316 | return self;
317 | }
318 |
319 | /* call-seq: dev.get_name -> String
320 | *
321 | * Return the name of _self_ with a maximum of 255 characters.
322 | */
323 | static VALUE device_get_name(VALUE self)
324 | {
325 | CUdevice* p;
326 | Data_Get_Struct(self, CUdevice, p);
327 | char name[256];
328 | CUresult status = cuDeviceGetName(name, 256, *p);
329 | if (status != CUDA_SUCCESS) {
330 | RAISE_CU_STD_ERROR(status, "Failed to get device name.");
331 | }
332 | return rb_str_new2(name);
333 | }
334 |
335 | /* call-seq: dev.compute_capability -> Hash { major:, minor: }
336 | *
337 | * Return the compute capability of _self_.
338 | *
339 | * # For a device with compute capability 1.3:
340 | * dev.compute_capability #=> { major: 1, minor: 3 }
341 | */
342 | static VALUE device_compute_capability(VALUE self)
343 | {
344 | CUdevice* p;
345 | Data_Get_Struct(self, CUdevice, p);
346 | int major;
347 | int minor;
348 | CUresult status = cuDeviceComputeCapability(&major, &minor, *p);
349 | if (status != CUDA_SUCCESS) {
350 | RAISE_CU_STD_ERROR(status, "Failed to query device compute capability.");
351 | }
352 | VALUE h = rb_hash_new();
353 | rb_hash_aset(h, ID2SYM(rb_intern("major")), INT2FIX(major));
354 | rb_hash_aset(h, ID2SYM(rb_intern("minor")), INT2FIX(minor));
355 | return h;
356 | }
357 |
358 | /* call-seq: dev.get_attribute(attribute) -> Fixnum
359 | *
360 | * Return _attribute_ (CUDeviceAttribute) of _self_.
361 | *
362 | * dev.get_attribute(CUDeviceAttribute::MAX_THREADS_PER_BLOCK) #=> 512
363 | * dev.get_attribute(CUDeviceAttribute::MULTIPROCESSOR_COUNT) #=> 30
364 | * dev.get_attribute(CUDeviceAttribute::MAX_SHARED_MEMORY_PER_BLOCK) #=> 16384
365 | */
366 | static VALUE device_get_attribute(VALUE self, VALUE attribute)
367 | {
368 | CUdevice* p;
369 | Data_Get_Struct(self, CUdevice, p);
370 | int v;
371 | CUresult status = cuDeviceGetAttribute(&v, static_cast(FIX2INT(attribute)), *p);
372 | if (status != CUDA_SUCCESS) {
373 | VALUE attributes = rb_funcall(rb_cCUDeviceAttribute, rb_intern("constants"), 0);
374 | VALUE ary[3] = { rb_cCUDeviceAttribute, attribute, Qnil };
375 | rb_block_call(attributes, rb_intern("find"), 0, NULL, RUBY_METHOD_FUNC(class_const_match), (VALUE)ary);
376 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to query device attribute: %s.", rb_id2name(SYM2ID(ary[2])));
377 | }
378 | return INT2FIX(v);
379 | }
380 |
381 | /* call-seq: dev.get_properties -> Hash
382 | *
383 | * Return the properties of _self_ in a hash with the following keys:
384 | * * :clock_rate
385 | * * :max_grid_size
386 | * * :max_threads_dim
387 | * * :max_threads_per_block
388 | * * :mem_pitch
389 | * * :regs_per_block
390 | * * :shared_mem_per_block
391 | * * :simd_width
392 | * * :texture_align
393 | * * :total_constant_memory
394 | */
395 | static VALUE device_get_properties(VALUE self)
396 | {
397 | CUdevice* pdevice;
398 | Data_Get_Struct(self, CUdevice, pdevice);
399 | CUdevprop prop;
400 | CUresult status = cuDeviceGetProperties(&prop, *pdevice);
401 | if (status != CUDA_SUCCESS) {
402 | RAISE_CU_STD_ERROR(status, "Failed to get device properties.");
403 | }
404 |
405 | VALUE max_grid_size = rb_ary_new3(3, INT2FIX(prop.maxGridSize[0]), INT2FIX(prop.maxGridSize[1]), INT2FIX(prop.maxGridSize[2]));
406 | VALUE max_threads_dim = rb_ary_new3(3, INT2FIX(prop.maxThreadsDim[0]), INT2FIX(prop.maxThreadsDim[1]), INT2FIX(prop.maxThreadsDim[2]));
407 |
408 | VALUE h = rb_hash_new();
409 | rb_hash_aset(h, ID2SYM(rb_intern("clock_rate")), INT2FIX(prop.clockRate));
410 | rb_hash_aset(h, ID2SYM(rb_intern("max_grid_size")), max_grid_size);
411 | rb_hash_aset(h, ID2SYM(rb_intern("max_threads_dim")), max_threads_dim);
412 | rb_hash_aset(h, ID2SYM(rb_intern("max_threads_per_block")), INT2FIX(prop.maxThreadsPerBlock));
413 | rb_hash_aset(h, ID2SYM(rb_intern("mem_pitch")), INT2FIX(prop.memPitch));
414 | rb_hash_aset(h, ID2SYM(rb_intern("regs_per_block")), INT2FIX(prop.regsPerBlock));
415 | rb_hash_aset(h, ID2SYM(rb_intern("shared_mem_per_block")), INT2FIX(prop.sharedMemPerBlock));
416 | rb_hash_aset(h, ID2SYM(rb_intern("simd_width")), INT2FIX(prop.SIMDWidth));
417 | rb_hash_aset(h, ID2SYM(rb_intern("texture_align")), INT2FIX(prop.textureAlign));
418 | rb_hash_aset(h, ID2SYM(rb_intern("total_constant_memory")), INT2FIX(prop.totalConstantMemory));
419 | return h;
420 | }
421 |
422 | /* call-seq: dev.total_mem -> Numeric
423 | *
424 | * Return the total amount of device memory in bytes.
425 | */
426 | static VALUE device_total_mem(VALUE self)
427 | {
428 | CUdevice* p;
429 | Data_Get_Struct(self, CUdevice, p);
430 | size_t nbytes;
431 | CUresult status = cuDeviceTotalMem(&nbytes, *p);
432 | if (status != CUDA_SUCCESS) {
433 | RAISE_CU_STD_ERROR(status, "Failed to get device total amount of memory available.");
434 | }
435 | return SIZET2NUM(nbytes);
436 | }
437 |
438 | // }}}
439 |
440 |
441 | // {{{ CUcontext
442 |
443 | static VALUE context_alloc(VALUE klass)
444 | {
445 | CUcontext* p = new CUcontext;
446 | return Data_Wrap_Struct(klass, 0, generic_free, p);
447 | }
448 |
449 | static VALUE context_initialize(int argc, VALUE* argv, VALUE self)
450 | {
451 | return self;
452 | }
453 |
454 | /* call-seq: ctx.create(device) -> self
455 | * ctx.create(flags, device) -> self
456 | *
457 | * Create a new CUDA context with _flags_ (CUContextFlags) and _device_ (CUDevice),
458 | * then associate it with the calling thread, and return the context.
459 | * Setting flags to 0 or ommitting flags uses SCHED_AUTO.
460 | *
461 | * dev = CUDevice.get(0)
462 | * ctx = CUContext.new
463 | * ctx.create(dev) #=> ctx
464 | * ctx.create(0, dev) #=> ctx
465 | * ctx.create(CUContextFlags::SCHED_SPIN | CUContextFlags::BLOCKING_SYNC, dev) #=> ctx
466 | */
467 | static VALUE context_create(int argc, VALUE* argv, VALUE self)
468 | {
469 | if (argc <= 0 || argc > 2) {
470 | rb_raise(rb_eArgError, "wrong number of arguments (%d for 1 or 2).", argc);
471 | }
472 |
473 | CUcontext* pcontext;
474 | CUdevice* pdevice;
475 | unsigned int flags = 0;
476 | Data_Get_Struct(self, CUcontext, pcontext);
477 | if (argc == 2) {
478 | flags = FIX2UINT(argv[0]);
479 | Data_Get_Struct(argv[1], CUdevice, pdevice);
480 | } else { // argc == 1
481 | Data_Get_Struct(argv[0], CUdevice, pdevice);
482 | }
483 | CUresult status = cuCtxCreate(pcontext, flags, *pdevice);
484 | if (status != CUDA_SUCCESS) {
485 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to create context: flags = 0x%x.", flags);
486 | }
487 | return self;
488 | }
489 |
490 | /* call-seq: ctx.destroy -> nil
491 | *
492 | * Destroy the CUDA context _self_.
493 | */
494 | static VALUE context_destroy(VALUE self)
495 | {
496 | CUcontext* p;
497 | Data_Get_Struct(self, CUcontext, p);
498 | CUresult status = cuCtxDestroy(*p);
499 | if (status != CUDA_SUCCESS) {
500 | RAISE_CU_STD_ERROR(status, "Failed to destroy context.");
501 | }
502 | return Qnil;
503 | }
504 |
505 | /* call-seq: ctx.attach -> self
506 | * ctx.attach(flags) -> self
507 | *
508 | * Increment the reference count on _self_.
509 | * Currently, _flags_ must be set to 0.
510 | */
511 | static VALUE context_attach(int argc, VALUE* argv, VALUE self)
512 | {
513 | CUcontext* p;
514 | unsigned int flags = 0;
515 | Data_Get_Struct(self, CUcontext, p);
516 | if (argc == 1) {
517 | flags = FIX2UINT(argv[0]);
518 | }
519 | CUresult status = cuCtxAttach(p, flags);
520 | if (status != CUDA_SUCCESS) {
521 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to attach context: flags = 0x%x.", flags);
522 | }
523 | return self;
524 | }
525 |
526 |
527 | /* call-seq: ctx.detach -> nil
528 | *
529 | * Decrement the reference count on _self_.
530 | */
531 | static VALUE context_detach(VALUE self)
532 | {
533 | CUcontext* p;
534 | Data_Get_Struct(self, CUcontext, p);
535 | CUresult status = cuCtxDetach(*p);
536 | if (status != CUDA_SUCCESS) {
537 | RAISE_CU_STD_ERROR(status, "Failed to detach context.");
538 | }
539 | return Qnil;
540 | }
541 |
542 | /* call-seq: ctx.push_current -> self
543 | *
544 | * Push _self_ onto the context stack, which becomes currently active context.
545 | */
546 | static VALUE context_push_current(VALUE self)
547 | {
548 | CUcontext* p;
549 | Data_Get_Struct(self, CUcontext, p);
550 | CUresult status = cuCtxPushCurrent(*p);
551 | if (status != CUDA_SUCCESS) {
552 | RAISE_CU_STD_ERROR(status, "Failed to push this context.");
553 | }
554 | return self;
555 | }
556 |
557 | /* call-seq: ctx.get_api_version -> Numeric
558 | *
559 | * Return the API version used to create _self_.
560 | */
561 | static VALUE context_get_api_version(VALUE self)
562 | {
563 | CUcontext* p;
564 | Data_Get_Struct(self, CUcontext, p);
565 | unsigned int version;
566 | CUresult status = cuCtxGetApiVersion(*p, &version);
567 | if (status != CUDA_SUCCESS) {
568 | RAISE_CU_STD_ERROR(status, "Failed to get the API version of this context.");
569 | }
570 | return UINT2NUM(version);
571 | }
572 |
573 | /* call-seq: CUContext.get_api_version -> Numeric
574 | *
575 | * Return the API version used to create current context.
576 | */
577 | static VALUE context_get_api_version_singleton(VALUE klass)
578 | {
579 | unsigned int version;
580 | CUresult status = cuCtxGetApiVersion(NULL, &version);
581 | if (status != CUDA_SUCCESS) {
582 | RAISE_CU_STD_ERROR(status, "Failed to get the API version of current context.");
583 | }
584 | return UINT2NUM(version);
585 | }
586 |
587 | /* call-seq: CUContext.get_device -> CUDevice
588 | *
589 | * Return the device associated to the current CUDA context.
590 | */
591 | static VALUE context_get_device(VALUE klass)
592 | {
593 | VALUE device = rb_class_new_instance(0, NULL, rb_cCUDevice);
594 | CUdevice* pdevice;
595 | Data_Get_Struct(device, CUdevice, pdevice);
596 | CUresult status = cuCtxGetDevice(pdevice);
597 | if (status != CUDA_SUCCESS) {
598 | RAISE_CU_STD_ERROR(status, "Failed to get current context's device.");
599 | }
600 | return device;
601 | }
602 |
603 | /* call-seq: CUContext.get_limit(limit) -> Numeric
604 | *
605 | * Return the _limit_ (CULimit) of the current CUDA context.
606 | *
607 | * CUContext.get_limit(CULimit::STACK_SIZE) #=> 8192
608 | */
609 | static VALUE context_get_limit(VALUE klass, VALUE limit)
610 | {
611 | CUlimit l = static_cast(FIX2UINT(limit));
612 | size_t v = 0;
613 | CUresult status = cuCtxGetLimit(&v, l);
614 | if (status != CUDA_SUCCESS) {
615 | VALUE limits = rb_funcall(rb_cCULimit, rb_intern("constants"), 0);
616 | VALUE ary[3] = { rb_cCULimit, limit, Qnil };
617 | rb_block_call(limits, rb_intern("find"), 0, NULL, RUBY_METHOD_FUNC(class_const_match), (VALUE)ary);
618 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to get context limit: %s.", rb_id2name(SYM2ID(ary[2])));
619 | }
620 | return SIZET2NUM(v);
621 | }
622 |
623 | /* call-seq: CUContext.set_limit(limit, value) -> nil
624 | *
625 | * Set the _limit_ (CULimit) of the current CUDA context.
626 | *
627 | * CUContext.set_limit(CULimit::STACK_SIZE, 8192) #=> nil
628 | */
629 | static VALUE context_set_limit(VALUE klass, VALUE limit, VALUE value)
630 | {
631 | CUlimit l = static_cast(FIX2UINT(limit));
632 | CUresult status = cuCtxSetLimit(l, NUM2SIZET(value));
633 | if (status != CUDA_SUCCESS) {
634 | VALUE limits = rb_funcall(rb_cCULimit, rb_intern("constants"), 0);
635 | VALUE ary[3] = { rb_cCULimit, limit, Qnil };
636 | rb_block_call(limits, rb_intern("find"), 0, NULL, RUBY_METHOD_FUNC(class_const_match), (VALUE)ary);
637 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to set context limit: %s to %lu.", rb_id2name(SYM2ID(ary[2])), NUM2SIZET(value));
638 | }
639 | return Qnil;
640 | }
641 |
642 | /* call-seq: CUContext.get_cache_config -> CUFunctionCache
643 | *
644 | * Return the cache config of the current CUDA context.
645 | *
646 | * CUContext.get_cache_config #=> 1
647 | */
648 | static VALUE context_get_cache_config(VALUE klass)
649 | {
650 | CUfunc_cache config;
651 | CUresult status = cuCtxGetCacheConfig(&config);
652 | if (status != CUDA_SUCCESS) {
653 | RAISE_CU_STD_ERROR(status, "Failed to get context cache config.");
654 | }
655 | return UINT2NUM(static_cast(config));
656 | }
657 |
658 | /* call-seq: CUContext.set_cache_config(config) -> nil
659 | *
660 | * Set the cache with _config_ (CUFunctionCache) for the current CUDA context.
661 | *
662 | * CUContext.set_cache_config(CUFunctionCache::PREFER_SHARED) #=> nil
663 | */
664 | static VALUE context_set_cache_config(VALUE klass, VALUE config)
665 | {
666 | CUresult status = cuCtxSetCacheConfig(static_cast(FIX2UINT(config)));
667 | if (status != CUDA_SUCCESS) {
668 | VALUE configs = rb_funcall(rb_cCUFunctionCache, rb_intern("constants"), 0);
669 | VALUE ary[3] = { rb_cCUFunctionCache, config, Qnil };
670 | rb_block_call(configs, rb_intern("find"), 0, NULL, RUBY_METHOD_FUNC(class_const_match), (VALUE)ary);
671 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to set context cache config: %s.", rb_id2name(SYM2ID(ary[2])));
672 | }
673 | return Qnil;
674 | }
675 |
676 | /* call-seq: CUContext.pop_current -> CUContext
677 | *
678 | * Pop the current CUDA context from the context stack, which becomes inactive.
679 | */
680 | static VALUE context_pop_current(VALUE klass)
681 | {
682 | VALUE context = rb_class_new_instance(0, NULL, rb_cCUContext);
683 | CUcontext* pcontext;
684 | Data_Get_Struct(context, CUcontext, pcontext);
685 | CUresult status = cuCtxPopCurrent(pcontext);
686 | if (status != CUDA_SUCCESS) {
687 | RAISE_CU_STD_ERROR(status, "Failed to pop current context.");
688 | }
689 | return context;
690 | }
691 |
692 | /* call-seq: CUContext.synchronize -> nil
693 | *
694 | * Block until all the tasks of the current CUDA context complete.
695 | */
696 | static VALUE context_synchronize(VALUE klass)
697 | {
698 | CUresult status = cuCtxSynchronize();
699 | if (status != CUDA_SUCCESS) {
700 | RAISE_CU_STD_ERROR(status, "Failed to synchronize this context.");
701 | }
702 | return Qnil;
703 | }
704 |
705 | // }}}
706 |
707 |
708 | // {{{ CUmodule
709 |
710 | static VALUE module_alloc(VALUE klass)
711 | {
712 | CUmodule* p = new CUmodule;
713 | return Data_Wrap_Struct(klass, 0, generic_free, p);
714 | }
715 |
716 | static VALUE module_initialize(int argc, VALUE* argv, VALUE self)
717 | {
718 | return self;
719 | }
720 |
721 | /* call-seq: mod.load(path) -> self
722 | *
723 | * Load a compute module from the file at _path_ into the current CUDA context.
724 | * The file should be a cubin file or a PTX file.
725 | *
726 | * A PTX file may be obtained by compiling the .cu file using nvcc with -ptx option.
727 | * $ nvcc -ptx vadd.cu
728 | */
729 | static VALUE module_load(VALUE self, VALUE str)
730 | {
731 | CUmodule* p;
732 | Data_Get_Struct(self, CUmodule, p);
733 | CUresult status = cuModuleLoad(p, StringValuePtr(str));
734 | if (status != CUDA_SUCCESS) {
735 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to load module: %s.", StringValuePtr(str));
736 | }
737 | return self;
738 | }
739 |
740 | /* call-seq: mod.load_data(image_str) -> self
741 | *
742 | * Load a compute module from the String _image_str_ which contains a cubin or a PTX data
743 | * into the current CUDA context.
744 | *
745 | *
See also CUModule#load.
746 | */
747 | static VALUE module_load_data(VALUE self, VALUE image)
748 | {
749 | CUmodule* p;
750 | Data_Get_Struct(self, CUmodule, p);
751 | CUresult status = cuModuleLoadData(p, StringValuePtr(image));
752 | if (status != CUDA_SUCCESS) {
753 | RAISE_CU_STD_ERROR(status, "Failed to load module data.");
754 | }
755 | return self;
756 | }
757 |
758 | /* call-seq: mod.unload -> self
759 | *
760 | * Unload _self_ from the current CUDA context.
761 | */
762 | static VALUE module_unload(VALUE self)
763 | {
764 | CUmodule* p;
765 | Data_Get_Struct(self, CUmodule, p);
766 | CUresult status = cuModuleUnload(*p);
767 | if (status != CUDA_SUCCESS) {
768 | RAISE_CU_STD_ERROR(status, "Failed to unload module.");
769 | }
770 | return self;
771 | }
772 |
773 | /* call-seq: mod.get_function(name_str) -> CUFunction
774 | *
775 | * Return a CUFunction instance corresponding to the function name _name_str_ in the loaded compute module.
776 | * A compute module was loaded with CUModule#load and alike methods.
777 | */
778 | static VALUE module_get_function(VALUE self, VALUE str)
779 | {
780 | CUmodule* p;
781 | Data_Get_Struct(self, CUmodule, p);
782 | CUfunction* pfunc = new CUfunction;
783 | CUresult status = cuModuleGetFunction(pfunc, *p, StringValuePtr(str));
784 | if (status != CUDA_SUCCESS) {
785 | delete pfunc;
786 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to get module function: %s.", StringValuePtr(str));
787 | }
788 | return Data_Wrap_Struct(rb_cCUFunction, 0, generic_free, pfunc);
789 | }
790 |
791 | /* call-seq: mod.get_global(name_str) -> [CUDevicePtr, Numeric]
792 | *
793 | * Return the CUDevicePtr corresponding to the global variable in the loaded compute module and its size in bytes.
794 | */
795 | static VALUE module_get_global(VALUE self, VALUE str)
796 | {
797 | CUmodule* p;
798 | Data_Get_Struct(self, CUmodule, p);
799 | VALUE rb_devptr = device_ptr_alloc(rb_cCUDevicePtr);
800 | device_ptr_initialize(0, NULL, rb_devptr);
801 | CUdeviceptr* pdevptr;
802 | Data_Get_Struct(rb_devptr, CUdeviceptr, pdevptr);
803 | size_t nbytes;
804 | CUresult status = cuModuleGetGlobal(pdevptr, &nbytes, *p, StringValuePtr(str));
805 | if (status != CUDA_SUCCESS) {
806 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to get module global: %s.", StringValuePtr(str));
807 | }
808 | return rb_ary_new3(2, rb_devptr, SIZET2NUM(nbytes));
809 | }
810 |
811 | /* call-seq: mod.get_texref(name_str) -> CUTexRef
812 | *
813 | * Return a CUTexRef instance corresponding to the texture name _name_str_ in the loaded compute module.
814 | */
815 | static VALUE module_get_texref(VALUE self, VALUE str)
816 | {
817 | CUmodule* pmodule;
818 | CUtexref* ptexref;
819 | Data_Get_Struct(self, CUmodule, pmodule);
820 | VALUE rb_texref = rb_class_new_instance(0, NULL, rb_cCUTexRef);
821 | Data_Get_Struct(rb_texref, CUtexref, ptexref);
822 | CUresult status = cuModuleGetTexRef(ptexref, *pmodule, StringValuePtr(str));
823 | if (status != CUDA_SUCCESS) {
824 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to get module texture reference: %s.", StringValuePtr(str));
825 | }
826 | return rb_texref;
827 | }
828 |
829 | // }}}
830 |
831 |
832 | // {{{ CUdeviceptr
833 |
834 | static VALUE device_ptr_alloc(VALUE klass)
835 | {
836 | CUdeviceptr* p = new CUdeviceptr;
837 | return Data_Wrap_Struct(klass, 0, generic_free, p);
838 | }
839 |
840 | static VALUE device_ptr_initialize(int argc, VALUE* argv, VALUE self)
841 | {
842 | CUdeviceptr* p;
843 | Data_Get_Struct(self, CUdeviceptr, p);
844 | *p = static_cast(0);
845 | return self;
846 | }
847 |
848 | /* call-seq: devptr.offset(offset) -> CUDevicePtr
849 | *
850 | * Return a CUDevicePtr instance pointing to the memory location _offset_ (bytes) from _self_.
851 | */
852 | static VALUE device_ptr_offset(VALUE self, VALUE offset)
853 | {
854 | CUdeviceptr* pdevptr;
855 | CUdeviceptr* pdevptr_offset;
856 | Data_Get_Struct(self, CUdeviceptr, pdevptr);
857 | VALUE rb_pdevptr_offset = rb_class_new_instance(0, NULL, rb_cCUDevicePtr);
858 | Data_Get_Struct(rb_pdevptr_offset, CUdeviceptr, pdevptr_offset);
859 | *pdevptr_offset = *pdevptr + NUM2UINT(offset);
860 | return rb_pdevptr_offset;
861 | }
862 |
863 | /* call-seq: devptr.mem_alloc(nbytes) -> self
864 | *
865 | * Allocate _nbytes_ device memory and let _self_ points to this allocated memory.
866 | */
867 | static VALUE device_ptr_mem_alloc(VALUE self, VALUE nbytes)
868 | {
869 | CUdeviceptr* p;
870 | Data_Get_Struct(self, CUdeviceptr, p);
871 | CUresult status = cuMemAlloc(p, NUM2UINT(nbytes));
872 | if (status != CUDA_SUCCESS) {
873 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to allocate memory: size = %u.", NUM2UINT(nbytes));
874 | }
875 | return self;
876 | }
877 |
878 | /* call-seq: devptr.mem_free -> self
879 | *
880 | * Free the allocated device memory _self_ pointing to.
881 | */
882 | static VALUE device_ptr_mem_free(VALUE self)
883 | {
884 | CUdeviceptr* p;
885 | Data_Get_Struct(self, CUdeviceptr, p);
886 | CUresult status = cuMemFree(*p);
887 | if (status != CUDA_SUCCESS) {
888 | RAISE_CU_STD_ERROR(status, "Failed to free memory.");
889 | }
890 | return self;
891 | }
892 |
893 | // }}}
894 |
895 |
896 | // {{{ CUfunction
897 |
898 | static VALUE function_alloc(VALUE klass)
899 | {
900 | CUfunction* p = new CUfunction;
901 | return Data_Wrap_Struct(klass, 0, generic_free, p);
902 | }
903 |
904 | static VALUE function_initialize(int argc, VALUE* argv, VALUE self)
905 | {
906 | return self;
907 | }
908 |
909 | /* call-seq: func.set_param(arg1, arg2, *other_args) -> self
910 | *
911 | * Set the argument list of _self_ to _arg1_, _arg2_, *other_args.
912 | */
913 | static VALUE function_set_param(int argc, VALUE* argv, VALUE self)
914 | {
915 | #define ALIGN_UP(offset, alignment) (offset) = ((offset) + (alignment) - 1) & ~((alignment) - 1)
916 |
917 | int offset = 0;
918 | CUfunction* pfunc;
919 | Data_Get_Struct(self, CUfunction, pfunc);
920 |
921 | CUresult status = CUDA_ERROR_UNKNOWN;
922 | for (int i = 0; i < argc; ++i) {
923 | if (CLASS_OF(argv[i]) == rb_cCUDevicePtr) {
924 | CUdeviceptr* p;
925 | Data_Get_Struct(argv[i], CUdeviceptr, p);
926 | ALIGN_UP(offset, __alignof(*p));
927 | status = cuParamSetv(*pfunc, offset, p, sizeof(*p));
928 | if (status != CUDA_SUCCESS) break;
929 | offset += sizeof(*p);
930 | } else if (CLASS_OF(argv[i]) == rb_cFixnum) {
931 | int num = FIX2INT(argv[i]);
932 | ALIGN_UP(offset, __alignof(num));
933 | status = cuParamSeti(*pfunc, offset, num);
934 | if (status != CUDA_SUCCESS) break;
935 | offset += sizeof(int);
936 | } else if (CLASS_OF(argv[i]) == rb_cFloat) {
937 | float num = static_cast(NUM2DBL(argv[i]));
938 | ALIGN_UP(offset, __alignof(num));
939 | status = cuParamSetf(*pfunc, offset, num);
940 | if (status != CUDA_SUCCESS) break;
941 | offset += sizeof(float);
942 | } else {
943 | rb_raise(rb_eArgError, "Invalid type of argument %d.", i+1);
944 | }
945 | }
946 | if (argc > 0 && status != CUDA_SUCCESS) {
947 | RAISE_CU_STD_ERROR(status, "Failed to set function parameters.");
948 | }
949 |
950 | status = cuParamSetSize(*pfunc, offset);
951 | if (status != CUDA_SUCCESS) {
952 | RAISE_CU_STD_ERROR(status, "Failed to set function parameter size.");
953 | }
954 | return self;
955 | }
956 |
957 | /* call-seq: func.set_texref(texref) -> self
958 | *
959 | * Add the _texref_ to the argument list of _self_.
960 | *
961 | * Note: This method is *deprecated*. This is no longer necessary.
962 | */
963 | static VALUE function_set_texref(VALUE self, VALUE texref)
964 | {
965 | rb_warn("CUFunction#set_texref is deprecated.");
966 | CUfunction* pfunc;
967 | CUtexref* ptexref;
968 | Data_Get_Struct(self, CUfunction, pfunc);
969 | Data_Get_Struct(texref, CUtexref, ptexref);
970 | CUresult status = cuParamSetTexRef(*pfunc, CU_PARAM_TR_DEFAULT, *ptexref);
971 | if (status != CUDA_SUCCESS) {
972 | RAISE_CU_STD_ERROR(status, "Failed to set function texture reference.");
973 | }
974 | return self;
975 | }
976 |
977 | /* call-seq: func.set_block_shape(xdim) -> self
978 | * func.set_block_shape(xdim, ydim) -> self
979 | * func.set_block_shape(xdim, ydim, zdim) -> self
980 | *
981 | * Set the block dimensions to use for next launch. _ydim_ and _zdim_ which may be omitted are default to 1.
982 | */
983 | static VALUE function_set_block_shape(int argc, VALUE* argv, VALUE self)
984 | {
985 | if (argc <= 0 || argc > 3) {
986 | rb_raise(rb_eArgError, "wrong number of arguments (%d for 1 to 3 integers).", argc);
987 | }
988 |
989 | CUfunction* pfunc;
990 | Data_Get_Struct(self, CUfunction, pfunc);
991 |
992 | int xdim = FIX2INT(argv[0]);
993 | int ydim = 1;
994 | int zdim = 1;
995 |
996 | if (argc >= 2) {
997 | ydim = FIX2INT(argv[1]);
998 | }
999 | if (argc >= 3) {
1000 | zdim = FIX2INT(argv[2]);
1001 | }
1002 |
1003 | CUresult status = cuFuncSetBlockShape(*pfunc, xdim, ydim, zdim);
1004 | if (status != CUDA_SUCCESS) {
1005 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to set function block shape: (x,y,z) = (%d,%d,%d).", xdim, ydim, zdim);
1006 | }
1007 | return self;
1008 | }
1009 |
1010 | /* call-seq: func.set_shared_size(nbytes) -> self
1011 | *
1012 | * Set the dynamic shared-memory size to use for next launch.
1013 | */
1014 | static VALUE function_set_shared_size(VALUE self, VALUE nbytes)
1015 | {
1016 | CUfunction* p;
1017 | Data_Get_Struct(self, CUfunction, p);
1018 | CUresult status = cuFuncSetSharedSize(*p, NUM2UINT(nbytes));
1019 | if (status != CUDA_SUCCESS) {
1020 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to set function shared memory size: %u.", NUM2UINT(nbytes));
1021 | }
1022 | return self;
1023 | }
1024 |
1025 | /* call-seq: func.launch -> self
1026 | *
1027 | * Launch _self_ to execute on a CUDA device.
1028 | */
1029 | static VALUE function_launch(VALUE self)
1030 | {
1031 | CUfunction* p;
1032 | Data_Get_Struct(self, CUfunction, p);
1033 | CUresult status = cuLaunch(*p);
1034 | if (status != CUDA_SUCCESS) {
1035 | RAISE_CU_STD_ERROR(status, "Failed to launch kernel function on 1x1x1 grid of blocks.");
1036 | }
1037 | return self;
1038 | }
1039 |
1040 | /* call-seq: func.launch_grid(xdim) -> self
1041 | * func.launch_grid(xdim, ydim) -> self
1042 | *
1043 | * Launch _self_ with grid dimensions (xdim, ydim) to execute on a CUDA device.
1044 | * _ydim_ which may be omitted is default to 1.
1045 | */
1046 | static VALUE function_launch_grid(int argc, VALUE* argv, VALUE self)
1047 | {
1048 | if (argc <= 0 || argc > 2) {
1049 | rb_raise(rb_eArgError, "wrong number of arguments (%d for 1 or 2 integers).", argc);
1050 | }
1051 |
1052 | CUfunction* pfunc;
1053 | Data_Get_Struct(self, CUfunction, pfunc);
1054 |
1055 | int xdim = FIX2INT(argv[0]);
1056 | int ydim = 1;
1057 |
1058 | if (argc >= 2) {
1059 | ydim = FIX2INT(argv[1]);
1060 | }
1061 |
1062 | CUresult status = cuLaunchGrid(*pfunc, xdim, ydim);
1063 | if (status != CUDA_SUCCESS) {
1064 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to launch kernel function on %dx%d grid of blocks.", xdim, ydim);
1065 | }
1066 | return self;
1067 | }
1068 |
1069 | /* call-seq: func.launch_grid_async(xdim, stream) -> self
1070 | * func.launch_grid_async(xdim, ydim, stream) -> self
1071 | *
1072 | * Launch _self_ with grid dimensions (xdim, ydim) on _stream_ asynchronously to execute on a CUDA device.
1073 | * _ydim_ which may be omitted is default to 1. Setting _stream_ to anything other than an instance of CUStream
1074 | * will execute on the default stream 0.
1075 | */
1076 | static VALUE function_launch_grid_async(int argc, VALUE* argv, VALUE self)
1077 | {
1078 | if (argc < 2 || argc > 3) {
1079 | rb_raise(rb_eArgError, "wrong number of arguments (%d for 2 or 3).", argc);
1080 | }
1081 |
1082 | CUfunction* pfunc;
1083 | CUstream *pstream = NULL;
1084 | CUstream stream0 = 0;
1085 | Data_Get_Struct(self, CUfunction, pfunc);
1086 |
1087 | int xdim = FIX2INT(argv[0]);
1088 | int ydim = 1;
1089 |
1090 | if (argc == 2) {
1091 | if (CLASS_OF(argv[1]) == rb_cCUStream) {
1092 | Data_Get_Struct(argv[1], CUstream, pstream);
1093 | } else {
1094 | pstream = &stream0;
1095 | }
1096 | } else if (argc == 3) {
1097 | ydim = FIX2INT(argv[1]);
1098 | if (CLASS_OF(argv[2]) == rb_cCUStream) {
1099 | Data_Get_Struct(argv[2], CUstream, pstream);
1100 | } else {
1101 | pstream = &stream0;
1102 | }
1103 | }
1104 |
1105 | CUresult status = cuLaunchGridAsync(*pfunc, xdim, ydim, *pstream);
1106 | if (status != CUDA_SUCCESS) {
1107 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to launch kernel function asynchronously on %dx%d grid of blocks.", xdim, ydim);
1108 | }
1109 | return self;
1110 | }
1111 |
1112 | /* call-seq: func.get_attribute(attribute) -> Fixnum
1113 | *
1114 | * Return _attribute_ (CUFunctionAttribute) of _self_.
1115 | *
1116 | * func.get_attribute(CUFunctionAttribute::MAX_THREADS_PER_BLOCK) #=> 512
1117 | * func.get_attribute(CUFunctionAttribute::SHARED_SIZE_BYTES) #=> 44
1118 | * func.get_attribute(CUFunctionAttribute::NUM_REGS) #=> 3
1119 | */
1120 | static VALUE function_get_attribute(VALUE self, VALUE attribute)
1121 | {
1122 | CUfunction* p;
1123 | Data_Get_Struct(self, CUfunction, p);
1124 | int v;
1125 | CUresult status = cuFuncGetAttribute(&v, static_cast(FIX2INT(attribute)), *p);
1126 | if (status != CUDA_SUCCESS) {
1127 | VALUE attributes = rb_funcall(rb_cCUFunctionAttribute, rb_intern("constants"), 0);
1128 | VALUE ary[3] = { rb_cCUFunctionAttribute, attribute, Qnil };
1129 | rb_block_call(attributes, rb_intern("find"), 0, NULL, RUBY_METHOD_FUNC(class_const_match), (VALUE)ary);
1130 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to query function attribute: %s.", rb_id2name(SYM2ID(ary[2])));
1131 | }
1132 | return INT2FIX(v);
1133 | }
1134 |
1135 | /* call-seq: func.set_cache_config(config) -> self
1136 | *
1137 | * Set the preferred cache configuration (CUFunctionCache) to use for next launch.
1138 | */
1139 | static VALUE function_set_cache_config(VALUE self, VALUE config)
1140 | {
1141 | CUfunction* p;
1142 | Data_Get_Struct(self, CUfunction, p);
1143 | CUresult status = cuFuncSetCacheConfig(*p, static_cast(FIX2UINT(config)));
1144 | if (status != CUDA_SUCCESS) {
1145 | VALUE configs = rb_funcall(rb_cCUFunctionCache, rb_intern("constants"), 0);
1146 | VALUE ary[3] = { rb_cCUFunctionCache, config, Qnil };
1147 | rb_block_call(configs, rb_intern("find"), 0, NULL, RUBY_METHOD_FUNC(class_const_match), (VALUE)ary);
1148 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to set function cache config: %s.", rb_id2name(SYM2ID(ary[2])));
1149 | }
1150 | return self;
1151 | }
1152 |
1153 | // }}}
1154 |
1155 |
1156 | // {{{ CUstream
1157 |
1158 | static VALUE stream_alloc(VALUE klass)
1159 | {
1160 | CUstream* p = new CUstream;
1161 | return Data_Wrap_Struct(klass, 0, generic_free, p);
1162 | }
1163 |
1164 | static VALUE stream_initialize(VALUE self)
1165 | {
1166 | return self;
1167 | }
1168 |
1169 | /* call-seq: stream.create -> self
1170 | * stream.create(flags) -> self
1171 | *
1172 | * Create a stream and set _self_ to this stream. Currently, _flags_ must be set to 0.
1173 | */
1174 | static VALUE stream_create(int argc, VALUE* argv, VALUE self)
1175 | {
1176 | if (argc < 0 || argc > 1) {
1177 | rb_raise(rb_eArgError, "wrong number of arguments (%d for 0 or 1).", argc);
1178 | }
1179 |
1180 | CUstream* p;
1181 | unsigned int flags = 0;
1182 | Data_Get_Struct(self, CUstream, p);
1183 | if (argc == 1) {
1184 | flags = FIX2UINT(argv[0]);
1185 | }
1186 | CUresult status = cuStreamCreate(p, flags);
1187 | if (status != CUDA_SUCCESS) {
1188 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to create stream: flags = 0x%x", flags);
1189 | }
1190 | return self;
1191 | }
1192 |
1193 | /* call-seq: stream.destroy -> nil
1194 | *
1195 | * Destroy the stream _self_.
1196 | */
1197 | static VALUE stream_destroy(VALUE self)
1198 | {
1199 | CUstream* p;
1200 | Data_Get_Struct(self, CUstream, p);
1201 | CUresult status = cuStreamDestroy(*p);
1202 | if (status != CUDA_SUCCESS) {
1203 | RAISE_CU_STD_ERROR(status, "Failed to destroy stream.");
1204 | }
1205 | return Qnil;
1206 | }
1207 |
1208 | /* call-seq: stream.query -> true or false
1209 | *
1210 | * Return true if all operations in _self_ have completed. Otherwise, return false.
1211 | */
1212 | static VALUE stream_query(VALUE self)
1213 | {
1214 | CUstream* p;
1215 | Data_Get_Struct(self, CUstream, p);
1216 | CUresult status = cuStreamQuery(*p);
1217 | if (status == CUDA_SUCCESS) {
1218 | return Qtrue;
1219 | } else if (status == CUDA_ERROR_NOT_READY) {
1220 | return Qfalse;
1221 | } else {
1222 | RAISE_CU_STD_ERROR(status, "Failed to query stream.");
1223 | }
1224 | }
1225 |
1226 | /* call-seq: stream.synchronize -> self
1227 | *
1228 | * Block until all operations in _self_ complete.
1229 | */
1230 | static VALUE stream_synchronize(VALUE self)
1231 | {
1232 | CUstream* p;
1233 | Data_Get_Struct(self, CUstream, p);
1234 | CUresult status = cuStreamSynchronize(*p);
1235 | if (status != CUDA_SUCCESS) {
1236 | RAISE_CU_STD_ERROR(status, "Failed to synchronize stream.");
1237 | }
1238 | return self;
1239 | }
1240 |
1241 | /* call-seq: stream.wait_event(event) -> self
1242 | * stream.wait_event(event, flags) -> self
1243 | *
1244 | * Let all future operations submitted to _self_ wait until _event_ (CUEvent) complete before beginning execution.
1245 | * Currently, _flags_ must be 0.
1246 | */
1247 | static VALUE stream_wait_event(int argc, VALUE* argv, VALUE self)
1248 | {
1249 | if (argc <= 0 || argc > 2) {
1250 | rb_raise(rb_eArgError, "wrong number of arguments (%d for 1 or 2).", argc);
1251 | }
1252 |
1253 | CUstream* pstream;
1254 | CUevent* pevent;
1255 | unsigned int flags = 0;
1256 | Data_Get_Struct(self, CUstream, pstream);
1257 | Data_Get_Struct(argv[0], CUevent, pevent);
1258 | if (argc == 2) {
1259 | flags = FIX2UINT(argv[1]);
1260 | }
1261 | CUresult status = cuStreamWaitEvent(*pstream, *pevent, flags);
1262 | if (status != CUDA_SUCCESS) {
1263 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to make stream's future operations to wait event: flags = 0x%x", flags);
1264 | }
1265 | return self;
1266 | }
1267 |
1268 | /* call-seq: CUStream.wait_event(event) -> nil
1269 | * CUStream.wait_event(event, flags) -> nil
1270 | *
1271 | * Let all future operations submitted to stream 0 (NULL stream) wait until _event_ (CUEvent) complete before beginning execution.
1272 | * Currently, _flags_ must be 0.
1273 | */
1274 | static VALUE stream_wait_event_singleton(int argc, VALUE* argv, VALUE klass)
1275 | {
1276 | if (argc <= 0 || argc > 2) {
1277 | rb_raise(rb_eArgError, "wrong number of arguments (%d for 1 or 2).", argc);
1278 | }
1279 |
1280 | CUevent* pevent;
1281 | unsigned int flags = 0;
1282 | Data_Get_Struct(argv[0], CUevent, pevent);
1283 | if (argc == 2) {
1284 | flags = FIX2UINT(argv[1]);
1285 | }
1286 | CUresult status = cuStreamWaitEvent(0, *pevent, flags);
1287 | if (status != CUDA_SUCCESS) {
1288 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to make current stream's future operations to wait event: flags = 0x%x", flags);
1289 | }
1290 | return Qnil;
1291 | }
1292 |
1293 | // }}}
1294 |
1295 |
1296 | // {{{ CUevent
1297 |
1298 | static VALUE event_alloc(VALUE klass)
1299 | {
1300 | CUevent* p = new CUevent;
1301 | return Data_Wrap_Struct(klass, 0, generic_free, p);
1302 | }
1303 |
1304 | static VALUE event_initialize(VALUE self)
1305 | {
1306 | return self;
1307 | }
1308 |
1309 | /* call-seq: event.create -> self
1310 | * event.create(flags) -> self
1311 | *
1312 | * Create an event with _flags_ (CUEventFlags) and set _self_ to this event.
1313 | * The _flags_ is default to CUEventFlags::DEFAULT.
1314 | *
1315 | * event.create #=> self
1316 | * event.create(CUEventFlags::DEFAULT) #=> self
1317 | * event.create(CUEventFlags::BLOCKING_SYNC) #=> self
1318 | */
1319 | static VALUE event_create(int argc, VALUE* argv, VALUE self)
1320 | {
1321 | if (argc < 0 || argc > 1) {
1322 | rb_raise(rb_eArgError, "wrong number of arguments (%d for 0 or 1).", argc);
1323 | }
1324 |
1325 | CUevent* p;
1326 | unsigned int flags = CU_EVENT_DEFAULT;
1327 | Data_Get_Struct(self, CUevent, p);
1328 | if (argc == 1) {
1329 | flags = FIX2UINT(argv[0]);
1330 | }
1331 | CUresult status = cuEventCreate(p, flags);
1332 | if (status != CUDA_SUCCESS) {
1333 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to create event: flags = 0x%x.", flags);
1334 | }
1335 | return self;
1336 | }
1337 |
1338 | /* call-seq: event.destroy -> nil
1339 | *
1340 | * Destroy the event _self_.
1341 | */
1342 | static VALUE event_destroy(VALUE self)
1343 | {
1344 | CUevent* p;
1345 | Data_Get_Struct(self, CUevent, p);
1346 | CUresult status = cuEventDestroy(*p);
1347 | if (status != CUDA_SUCCESS) {
1348 | RAISE_CU_STD_ERROR(status, "Failed to destroy event.");
1349 | }
1350 | return Qnil;
1351 | }
1352 |
1353 | /* call-seq: event.query -> true or false
1354 | *
1355 | * Return true if _self_ has been recorded. Otherwise, return false.
1356 | */
1357 | static VALUE event_query(VALUE self)
1358 | {
1359 | CUevent* p;
1360 | Data_Get_Struct(self, CUevent, p);
1361 | CUresult status = cuEventQuery(*p);
1362 | if (status == CUDA_SUCCESS) {
1363 | return Qtrue;
1364 | } else if (status == CUDA_ERROR_NOT_READY) {
1365 | return Qfalse;
1366 | } else if (status == CUDA_ERROR_INVALID_VALUE) {
1367 | RAISE_CU_STD_ERROR(status, "Failed to query event: cuEventRecord() has not been called on this event.");
1368 | } else {
1369 | RAISE_CU_STD_ERROR(status, "Failed to query event.");
1370 | }
1371 | }
1372 |
1373 | /* call-seq: event.record(stream) -> self
1374 | *
1375 | * Record event _self_ asynchronously in _stream_.
1376 | * Setting _stream_ to anything other than an instance of CUStream will record on the default stream 0.
1377 | */
1378 | static VALUE event_record(VALUE self, VALUE rb_stream)
1379 | {
1380 | CUevent* pevent = NULL;
1381 | CUstream* pstream = NULL;
1382 | CUresult status;
1383 | Data_Get_Struct(self, CUevent, pevent);
1384 | if (CLASS_OF(rb_stream) == rb_cCUStream) {
1385 | Data_Get_Struct(rb_stream, CUstream, pstream);
1386 | status = cuEventRecord(*pevent, *pstream);
1387 | } else {
1388 | status = cuEventRecord(*pevent, 0);
1389 | }
1390 | if (status == CUDA_ERROR_INVALID_VALUE) {
1391 | RAISE_CU_STD_ERROR(status, "Failed to record event: cuEventRecord() has been called and has not been recorded yet.");
1392 | } else if (status != CUDA_SUCCESS) {
1393 | RAISE_CU_STD_ERROR(status, "Failed to record event.");
1394 | }
1395 | return self;
1396 | }
1397 |
1398 | /* call-seq: event.synchronize -> self
1399 | *
1400 | * Block until _self_ has been recorded.
1401 | */
1402 | static VALUE event_synchronize(VALUE self)
1403 | {
1404 | CUevent* p;
1405 | Data_Get_Struct(self, CUevent, p);
1406 | CUresult status = cuEventSynchronize(*p);
1407 | // TODO: Handle status == CUDA_ERROR_INVALID_VALUE
1408 | if (status != CUDA_SUCCESS) {
1409 | RAISE_CU_STD_ERROR(status, "Failed to synchronize event.");
1410 | }
1411 | return self;
1412 | }
1413 |
1414 | /* call-seq: event.elapsed_time(event_start, event_end) -> Numeric
1415 | *
1416 | * Return the elapsed time (ms) from _event_start_ (CUEvent) to _event_end_ (CUEvent).
1417 | */
1418 | static VALUE event_elapsed_time(VALUE klass, VALUE event_start, VALUE event_end)
1419 | {
1420 | CUevent* pevent_start;
1421 | CUevent* pevent_end;
1422 | Data_Get_Struct(event_start, CUevent, pevent_start);
1423 | Data_Get_Struct(event_end, CUevent, pevent_end);
1424 | float etime;
1425 | CUresult status = cuEventElapsedTime(&etime, *pevent_start, *pevent_end);
1426 | if (status == CUDA_ERROR_NOT_READY) {
1427 | RAISE_CU_STD_ERROR(status, "Failed to get elapsed time of events: either event has not been recorded yet.");
1428 | } else if (status != CUDA_SUCCESS) {
1429 | RAISE_CU_STD_ERROR(status, "Failed to get elapsed time of events.");
1430 | }
1431 | return DBL2NUM(etime);
1432 | }
1433 |
1434 | // }}}
1435 |
1436 |
1437 | // {{{ CUtexref
1438 |
1439 | static VALUE texref_alloc(VALUE klass)
1440 | {
1441 | CUtexref* p = new CUtexref;
1442 | return Data_Wrap_Struct(klass, 0, generic_free, p);
1443 | }
1444 |
1445 | static VALUE texref_initialize(VALUE self)
1446 | {
1447 | return self;
1448 | }
1449 |
1450 | /* call-seq: texref.create -> self
1451 | *
1452 | * Create a texture reference and set _self_ to this texture reference.
1453 | *
1454 | * Note: This method is *deprecated*.
1455 | */
1456 | static VALUE texref_create(VALUE self)
1457 | {
1458 | rb_warn("CUTexRef#create is deprecated.");
1459 | CUtexref* p;
1460 | Data_Get_Struct(self, CUtexref, p);
1461 | CUresult status = cuTexRefCreate(p);
1462 | if (status != CUDA_SUCCESS) {
1463 | RAISE_CU_STD_ERROR(status, "Failed to create texture.");
1464 | }
1465 | return self;
1466 | }
1467 |
1468 | /* call-seq: texref.destroy -> nil
1469 | *
1470 | * Destroy the texture reference _self_.
1471 | *
1472 | * Note: This method is *deprecated*.
1473 | */
1474 | static VALUE texref_destroy(VALUE self)
1475 | {
1476 | rb_warn("CUTexRef#destroy is deprecated.");
1477 | CUtexref* p;
1478 | Data_Get_Struct(self, CUtexref, p);
1479 | CUresult status = cuTexRefDestroy(*p);
1480 | if (status != CUDA_SUCCESS) {
1481 | RAISE_CU_STD_ERROR(status, "Failed to destroy texture.");
1482 | }
1483 | return Qnil;
1484 | }
1485 |
1486 | /* call-seq: texref.get_address -> CUDevicePtr
1487 | *
1488 | * Return a CUDevicePtr instance bound to the texture reference.
1489 | */
1490 | static VALUE texref_get_address(VALUE self)
1491 | {
1492 | CUtexref* ptexref;
1493 | CUdeviceptr* pdevptr;
1494 | Data_Get_Struct(self, CUtexref, ptexref);
1495 | VALUE rb_devptr = rb_class_new_instance(0, NULL, rb_cCUDevicePtr);
1496 | Data_Get_Struct(rb_devptr, CUdeviceptr, pdevptr);
1497 | CUresult status = cuTexRefGetAddress(pdevptr, *ptexref);
1498 | if (status != CUDA_SUCCESS) {
1499 | RAISE_CU_STD_ERROR(status, "Failed to get texture address.");
1500 | }
1501 | return rb_devptr;
1502 | }
1503 |
1504 | /* call-seq: texref.get_address_mode(dim) -> Fixnum
1505 | *
1506 | * Return the address mode of the dimension _dim_ (0..2) of _self_.
1507 | */
1508 | static VALUE texref_get_address_mode(VALUE self, VALUE dim)
1509 | {
1510 | CUtexref* p;
1511 | CUaddress_mode mode;
1512 | Data_Get_Struct(self, CUtexref, p);
1513 | CUresult status = cuTexRefGetAddressMode(&mode, *p, FIX2INT(dim));
1514 | if (status != CUDA_SUCCESS) {
1515 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to get texture address mode: dim = %d.", FIX2INT(dim));
1516 | }
1517 | return INT2FIX(mode);
1518 | }
1519 |
1520 | /* call-seq: texref.get_filter_mode -> Fixnum
1521 | *
1522 | * Return the filter mode of _self_.
1523 | */
1524 | static VALUE texref_get_filter_mode(VALUE self)
1525 | {
1526 | CUtexref* p;
1527 | CUfilter_mode mode;
1528 | Data_Get_Struct(self, CUtexref, p);
1529 | CUresult status = cuTexRefGetFilterMode(&mode, *p);
1530 | if (status != CUDA_SUCCESS) {
1531 | RAISE_CU_STD_ERROR(status, "Failed to get texture filter mode.");
1532 | }
1533 | return INT2FIX(mode);
1534 | }
1535 |
1536 | /* call-seq: texref.get_flags -> Numeric
1537 | *
1538 | * Return the flags of _self_.
1539 | */
1540 | static VALUE texref_get_flags(VALUE self)
1541 | {
1542 | CUtexref* p;
1543 | unsigned int flags;
1544 | Data_Get_Struct(self, CUtexref, p);
1545 | CUresult status = cuTexRefGetFlags(&flags, *p);
1546 | if (status != CUDA_SUCCESS) {
1547 | RAISE_CU_STD_ERROR(status, "Failed to get texture flags.");
1548 | }
1549 | return UINT2NUM(flags);
1550 | }
1551 |
1552 | /* call-seq: texref.set_address(devptr, nbytes) -> Numeric
1553 | *
1554 | * Bind _devptr_ (CUDevicePtr) with _nbytes_ to _self_.
1555 | */
1556 | static VALUE texref_set_address(VALUE self, VALUE rb_device_ptr, VALUE nbytes)
1557 | {
1558 | CUtexref* ptexref;
1559 | CUdeviceptr* pdevptr;
1560 | size_t offset;
1561 | Data_Get_Struct(self, CUtexref, ptexref);
1562 | Data_Get_Struct(rb_device_ptr, CUdeviceptr, pdevptr);
1563 | CUresult status = cuTexRefSetAddress(&offset, *ptexref, *pdevptr, NUM2UINT(nbytes));
1564 | if (status != CUDA_SUCCESS) {
1565 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to set texture address: nbytes = %u.", NUM2UINT(nbytes));
1566 | }
1567 | return SIZET2NUM(offset);
1568 | }
1569 |
1570 | /* call-seq: texref.set_address_mode(dim, mode) -> self
1571 | *
1572 | * Set the address mode of _self_ with _dim_ (0..2) and _mode_ (CUAddressMode).
1573 | */
1574 | static VALUE texref_set_address_mode(VALUE self, VALUE dim, VALUE mode)
1575 | {
1576 | CUtexref* p;
1577 | Data_Get_Struct(self, CUtexref, p);
1578 | CUresult status = cuTexRefSetAddressMode(*p, FIX2INT(dim), static_cast(FIX2INT(mode)));
1579 | if (status != CUDA_SUCCESS) {
1580 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to set texture address mode: dim = %d, mode = %d", FIX2INT(dim), FIX2INT(mode));
1581 | }
1582 | return self;
1583 | }
1584 |
1585 | /* call-seq: texref.set_filter_mode(mode) -> self
1586 | *
1587 | * Set the filter mode of _self_ with _mode_ (CUFilterMode).
1588 | */
1589 | static VALUE texref_set_filter_mode(VALUE self, VALUE mode)
1590 | {
1591 | CUtexref* p;
1592 | Data_Get_Struct(self, CUtexref, p);
1593 | CUresult status = cuTexRefSetFilterMode(*p, static_cast(FIX2INT(mode)));
1594 | if (status != CUDA_SUCCESS) {
1595 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to set texture filter mode: mode = %d.", FIX2INT(mode));
1596 | }
1597 | return self;
1598 | }
1599 |
1600 | /* call-seq: texref.set_flags(flags) -> self
1601 | *
1602 | * Set the _flags_ (CUTexRefFlags) of _self_.
1603 | */
1604 | static VALUE texref_set_flags(VALUE self, VALUE flags)
1605 | {
1606 | CUtexref* p;
1607 | Data_Get_Struct(self, CUtexref, p);
1608 | CUresult status = cuTexRefSetFlags(*p, NUM2UINT(flags));
1609 | if (status != CUDA_SUCCESS) {
1610 | RAISE_CU_STD_ERROR_FORMATTED(status, "Failed to set texture flags: flags = 0x%x.", NUM2UINT(flags));
1611 | }
1612 | return self;
1613 | }
1614 |
1615 | // }}}
1616 |
1617 |
1618 | // {{{ Memory pointer
1619 | static VALUE memory_pointer_alloc(VALUE klass)
1620 | {
1621 | MemoryPointer* ppointer = new MemoryPointer;
1622 | ppointer->p = NULL;
1623 | return Data_Wrap_Struct(klass, 0, generic_free, ppointer);
1624 | }
1625 |
1626 | static VALUE memory_pointer_initialize(VALUE self)
1627 | {
1628 | return self;
1629 | }
1630 | // }}}
1631 |
1632 |
1633 | // {{{ Buffer
1634 |
1635 | /* call-seq: Buffer.new(size, options = {}) -> Buffer
1636 | *
1637 | * Create a buffer with _size_ elements.
1638 | *
1639 | * Options:
1640 | * * _page_locked_ - Allocate page-locked memory if _:page_locked_ is true. Otherwise, allocate pageable memory.
1641 | *
1642 | * Buffer.new(10) # Allocate 10 elements with pageable memory.
1643 | * Buffer.new(20, page_locked: true) # Allocate 20 elements with page-locked memory.
1644 | */
1645 | static VALUE ibuffer_initialize(int argc, VALUE* argv, VALUE self)
1646 | {
1647 | // This function exists for documentation only.
1648 | rb_notimplement();
1649 | return Qnil;
1650 | }
1651 |
1652 | /* call-seq: Buffer.element_size
1653 | *
1654 | * Return the size of an element of this Buffer in bytes.
1655 | */
1656 | static VALUE ibuffer_element_size(VALUE klass)
1657 | {
1658 | rb_notimplement();
1659 | return Qnil;
1660 | }
1661 |
1662 | /* call-seq: buffer.size -> Numeric
1663 | *
1664 | * Return the number of elements in this buffer.
1665 | */
1666 | static VALUE ibuffer_size(VALUE self)
1667 | {
1668 | rb_notimplement();
1669 | return Qnil;
1670 | }
1671 |
1672 | /* call-seq: buffer.page_locked? -> true or false
1673 | *
1674 | * Return true if this buffer is page-locked allocated.
1675 | * Otherwise, return false.
1676 | */
1677 | static VALUE ibuffer_is_page_locked(VALUE self)
1678 | {
1679 | rb_notimplement();
1680 | return Qnil;
1681 | }
1682 |
1683 | /* call-seq: buffer.offset(index) -> MemoryPointer
1684 | *
1685 | * Return the memory pointer of the element at _index_ (0...size) in this buffer.
1686 | */
1687 | static VALUE ibuffer_offset(VALUE self, VALUE offset)
1688 | {
1689 | rb_notimplement();
1690 | return Qnil;
1691 | }
1692 |
1693 | /* call-seq: buffer[index] -> Object
1694 | *
1695 | * Return the element at _index_ (0...size) in this buffer.
1696 | */
1697 | static VALUE ibuffer_element_get(VALUE self, VALUE index)
1698 | {
1699 | rb_notimplement();
1700 | return Qnil;
1701 | }
1702 |
1703 | /* call-seq: buffer[index] = value -> Object
1704 | *
1705 | * Set the element at _index_ (0...size) in this buffer to _value_.
1706 | * Return _value_.
1707 | */
1708 | static VALUE ibuffer_element_set(VALUE self, VALUE index, VALUE value)
1709 | {
1710 | rb_notimplement();
1711 | return Qnil;
1712 | }
1713 |
1714 | static void memory_buffer_free(void* p)
1715 | {
1716 | MemoryBuffer* pbuffer = static_cast(p);
1717 | if (pbuffer->is_page_locked) {
1718 | cuMemFreeHost(reinterpret_cast(pbuffer->p));
1719 | } else {
1720 | delete[] pbuffer->p;
1721 | }
1722 | delete pbuffer;
1723 | }
1724 |
1725 | static VALUE memory_buffer_alloc(VALUE klass)
1726 | {
1727 | MemoryBuffer* pbuffer = new MemoryBuffer;
1728 | pbuffer->size = 0;
1729 | pbuffer->is_page_locked = false;
1730 | pbuffer->p = NULL;
1731 | return Data_Wrap_Struct(klass, 0, memory_buffer_free, pbuffer);
1732 | }
1733 |
1734 | static VALUE memory_buffer_element_size(VALUE klass)
1735 | {
1736 | return INT2FIX(1);
1737 | }
1738 |
1739 | static VALUE memory_buffer_initialize(int argc, VALUE* argv, VALUE self)
1740 | {
1741 | if (argc < 1 || argc > 2) {
1742 | rb_raise(rb_eArgError, "wrong number of arguments (%d for 1 or 2).", argc);
1743 | }
1744 |
1745 | bool use_page_locked = false;
1746 | size_t nbytes = NUM2SIZET(argv[0]);
1747 | if (argc == 2 && CLASS_OF(argv[1]) == rb_cHash) {
1748 | if (rb_hash_aref(argv[1], ID2SYM(rb_intern("page_locked"))) == Qtrue) {
1749 | use_page_locked = true;
1750 | }
1751 | }
1752 |
1753 | MemoryBuffer* pbuffer;
1754 | Data_Get_Struct(self, MemoryBuffer, pbuffer);
1755 | pbuffer->size = nbytes;
1756 | if (use_page_locked) {
1757 | CUresult status = cuMemAllocHost(reinterpret_cast(&pbuffer->p), nbytes);
1758 | if (status != CUDA_SUCCESS) {
1759 | RAISE_CU_STD_ERROR(status, "Failed to allocate page-locked host memory.");
1760 | }
1761 | pbuffer->is_page_locked = true;
1762 | } else {
1763 | pbuffer->p = new char[nbytes];
1764 | pbuffer->is_page_locked = false;
1765 | }
1766 | std::memset(static_cast(pbuffer->p), 0, pbuffer->size);
1767 | return self;
1768 | }
1769 |
1770 | static VALUE memory_buffer_size(VALUE self)
1771 | {
1772 | MemoryBuffer* pbuffer;
1773 | Data_Get_Struct(self, MemoryBuffer, pbuffer);
1774 | return SIZET2NUM(pbuffer->size);
1775 | }
1776 |
1777 | static VALUE memory_buffer_is_page_locked(VALUE self)
1778 | {
1779 | MemoryBuffer* pbuffer;
1780 | Data_Get_Struct(self, MemoryBuffer, pbuffer);
1781 | return to_rb(pbuffer->is_page_locked);
1782 | }
1783 |
1784 | static VALUE memory_buffer_offset(VALUE self, VALUE offset)
1785 | {
1786 | MemoryBuffer* pbuffer;
1787 | MemoryPointer* ppointer_offset;
1788 | Data_Get_Struct(self, MemoryBuffer, pbuffer);
1789 | VALUE rb_ppointer_offset = rb_class_new_instance(0, NULL, rb_cMemoryPointer);
1790 | Data_Get_Struct(rb_ppointer_offset, MemoryPointer, ppointer_offset);
1791 | ppointer_offset->p = pbuffer->p + NUM2SIZET(offset);
1792 | return rb_ppointer_offset;
1793 | }
1794 |
1795 | static VALUE memory_buffer_element_get(VALUE self, VALUE index)
1796 | {
1797 | size_t i = NUM2SIZET(index);
1798 | MemoryBuffer* pbuffer;
1799 | Data_Get_Struct(self, MemoryBuffer, pbuffer);
1800 | int element = static_cast(pbuffer->p[i]);
1801 | return to_rb(element);
1802 | }
1803 |
1804 | static VALUE memory_buffer_element_set(VALUE self, VALUE index, VALUE value)
1805 | {
1806 | size_t i = NUM2SIZET(index);
1807 | MemoryBuffer* pbuffer;
1808 | Data_Get_Struct(self, MemoryBuffer, pbuffer);
1809 | pbuffer->p[i] = static_cast(FIX2INT(value));
1810 | return value;
1811 | }
1812 |
1813 | template
1814 | static void buffer_free(void* p)
1815 | {
1816 | typedef struct TypedBuffer TBuffer;
1817 | TBuffer* pbuffer = static_cast(p);
1818 | if (pbuffer->is_page_locked) {
1819 | cuMemFreeHost(reinterpret_cast(pbuffer->p));
1820 | } else {
1821 | delete[] pbuffer->p;
1822 | }
1823 | delete pbuffer;
1824 | }
1825 |
1826 | template
1827 | static VALUE buffer_alloc(VALUE klass)
1828 | {
1829 | typedef struct TypedBuffer TBuffer;
1830 | TBuffer* pbuffer = new TBuffer;
1831 | pbuffer->size = 0;
1832 | pbuffer->p = NULL;
1833 | return Data_Wrap_Struct(klass, 0, &buffer_free, pbuffer);
1834 | }
1835 |
1836 | template
1837 | static VALUE buffer_element_size(VALUE klass)
1838 | {
1839 | return INT2FIX(sizeof(TElement));
1840 | }
1841 | typedef VALUE (*BufferElementSizeFunctionType)(VALUE);
1842 |
1843 | template
1844 | static VALUE buffer_initialize(int argc, VALUE* argv, VALUE self)
1845 | {
1846 | if (argc <= 0 || argc >= 3) {
1847 | rb_raise(rb_eArgError, "wrong number of arguments (%d for 1 or 2).", argc);
1848 | }
1849 |
1850 | bool use_page_locked = false;
1851 | VALUE n = NUM2SIZET(argv[0]);
1852 | if (argc == 2 && CLASS_OF(argv[1]) == rb_cHash) {
1853 | if (rb_hash_aref(argv[1], ID2SYM(rb_intern("page_locked"))) == Qtrue) {
1854 | use_page_locked = true;
1855 | }
1856 | }
1857 |
1858 | typedef struct TypedBuffer TBuffer;
1859 | TBuffer* pbuffer;
1860 | Data_Get_Struct(self, TBuffer, pbuffer);
1861 | pbuffer->size = n*sizeof(TElement);
1862 | if (use_page_locked) {
1863 | CUresult status = cuMemAllocHost(reinterpret_cast(&pbuffer->p), n*sizeof(TElement));
1864 | if (status != CUDA_SUCCESS) {
1865 | RAISE_CU_STD_ERROR(status, "Failed to allocate page-locked host memory.");
1866 | }
1867 | pbuffer->is_page_locked = true;
1868 | } else {
1869 | pbuffer->p = reinterpret_cast(new TElement[n]);
1870 | pbuffer->is_page_locked = false;
1871 | }
1872 | std::memset(static_cast(pbuffer->p), 0, pbuffer->size);
1873 | return self;
1874 | }
1875 | typedef VALUE (*BufferInitializeFunctionType)(int, VALUE*, VALUE);
1876 |
1877 | template
1878 | static VALUE buffer_size(VALUE self)
1879 | {
1880 | MemoryBuffer* pbuffer;
1881 | Data_Get_Struct(self, MemoryBuffer, pbuffer);
1882 | return SIZET2NUM(pbuffer->size / sizeof(TElement));
1883 | }
1884 | typedef VALUE (*BufferSizeFunctionType)(VALUE);
1885 |
1886 | template
1887 | static VALUE buffer_is_page_locked(VALUE self)
1888 | {
1889 | MemoryBuffer* pbuffer;
1890 | Data_Get_Struct(self, MemoryBuffer, pbuffer);
1891 | return to_rb(pbuffer->is_page_locked);
1892 | }
1893 | typedef VALUE (*BufferIsPageLocked)(VALUE);
1894 |
1895 | template
1896 | static VALUE buffer_offset(VALUE self, VALUE offset)
1897 | {
1898 | typedef struct TypedBuffer TBuffer;
1899 | TBuffer* pbuffer;
1900 | MemoryPointer* ppointer_offset;
1901 | Data_Get_Struct(self, TBuffer, pbuffer);
1902 | VALUE rb_ppointer_offset = rb_class_new_instance(0, NULL, rb_cMemoryPointer);
1903 | Data_Get_Struct(rb_ppointer_offset, MemoryPointer, ppointer_offset);
1904 | ppointer_offset->p = pbuffer->p + NUM2SIZET(offset)*sizeof(TElement);
1905 | return rb_ppointer_offset;
1906 | }
1907 | typedef VALUE (*BufferOffsetFunctionType)(VALUE, VALUE);
1908 |
1909 | template
1910 | static VALUE buffer_element_get(VALUE self, VALUE index)
1911 | {
1912 | typedef struct TypedBuffer TBuffer;
1913 | size_t i = NUM2SIZET(index);
1914 | TBuffer* pbuffer;
1915 | Data_Get_Struct(self, TBuffer, pbuffer);
1916 | TElement* e = reinterpret_cast(pbuffer->p);
1917 | TElement element = e[i];
1918 | return to_rb(element);
1919 | }
1920 | typedef VALUE (*BufferElementGetFunctionType)(VALUE, VALUE);
1921 |
1922 | template
1923 | static VALUE buffer_element_set(VALUE self, VALUE index, VALUE value)
1924 | {
1925 | typedef struct TypedBuffer TBuffer;
1926 | size_t i = NUM2SIZET(index);
1927 | TElement v = to_ctype(value);
1928 | TBuffer* pbuffer;
1929 | Data_Get_Struct(self, TBuffer, pbuffer);
1930 | TElement* e = reinterpret_cast(pbuffer->p);
1931 | e[i] = v;
1932 | return value;
1933 | }
1934 | typedef VALUE (*BufferElementSetFunctionType)(VALUE, VALUE, VALUE);
1935 |
1936 | // }}}
1937 |
1938 |
1939 | // {{{ Memory
1940 |
1941 | /* call-seq: memcpy_htod(dst_devptr, src_mem, nbytes) -> nil
1942 | *
1943 | * Copy _nbytes_ from host memory at _src_mem_ to device memory at _dst_devptr_.
1944 | */
1945 | static VALUE memcpy_htod(VALUE self, VALUE rb_device_ptr, VALUE rb_memory, VALUE nbytes)
1946 | {
1947 | CUdeviceptr* pdevice_ptr;
1948 | MemoryPointer* pmem;
1949 | Data_Get_Struct(rb_device_ptr, CUdeviceptr, pdevice_ptr);
1950 | Data_Get_Struct(rb_memory, MemoryPointer, pmem);
1951 | CUresult status = cuMemcpyHtoD(*pdevice_ptr, static_cast(pmem->p), NUM2UINT(nbytes));
1952 | if (status != CUDA_SUCCESS) {
1953 | RAISE_CU_STD_ERROR(status, "Failed to copy memory from host to device.");
1954 | }
1955 | return Qnil;
1956 | }
1957 |
1958 | /* call-seq: memcpy_htod_async(dst_devptr, src_mem, nbytes, stream) -> nil
1959 | *
1960 | * Copy _nbytes_ from host memory at _src_mem_ to device memory at _dst_devptr_ in _stream_ asynchronously.
1961 | *
1962 | * Note: The _src_mem_ should be *page-locked* memory.
1963 | */
1964 | static VALUE memcpy_htod_async(VALUE self, VALUE rb_device_ptr, VALUE rb_memory, VALUE nbytes, VALUE rb_stream)
1965 | {
1966 | CUdeviceptr* pdevice_ptr;
1967 | MemoryPointer* pmem;
1968 | CUstream* pstream;
1969 | CUstream stream0 = 0;
1970 | Data_Get_Struct(rb_device_ptr, CUdeviceptr, pdevice_ptr);
1971 | Data_Get_Struct(rb_memory, MemoryPointer, pmem);
1972 | if (CLASS_OF(rb_stream) == rb_cCUStream) {
1973 | Data_Get_Struct(rb_stream, CUstream, pstream);
1974 | } else {
1975 | pstream = &stream0;
1976 | }
1977 | CUresult status = cuMemcpyHtoDAsync(*pdevice_ptr, static_cast(pmem->p), NUM2UINT(nbytes), *pstream);
1978 | if (status != CUDA_SUCCESS) {
1979 | RAISE_CU_STD_ERROR(status, "Failed to copy memory asynchronously from host to device.");
1980 | }
1981 | return Qnil;
1982 | }
1983 |
1984 | /* call-seq: memcpy_dtoh(dst_mem, src_devptr, nbytes) -> nil
1985 | *
1986 | * Copy _nbytes_ from device memory at _src_devptr_ to host memory at _dst_mem_.
1987 | */
1988 | static VALUE memcpy_dtoh(VALUE self, VALUE rb_memory, VALUE rb_device_ptr, VALUE nbytes)
1989 | {
1990 | MemoryPointer* pmem;
1991 | CUdeviceptr* pdevice_ptr;
1992 | Data_Get_Struct(rb_device_ptr, CUdeviceptr, pdevice_ptr);
1993 | Data_Get_Struct(rb_memory, MemoryPointer, pmem);
1994 | CUresult status = cuMemcpyDtoH(static_cast(pmem->p), *pdevice_ptr, NUM2UINT(nbytes));
1995 | if (status != CUDA_SUCCESS) {
1996 | RAISE_CU_STD_ERROR(status, "Failed to copy memory from device to host.");
1997 | }
1998 | return Qnil;
1999 | }
2000 |
2001 | /* call-seq: memcpy_dtoh_async(dst_mem, src_devptr, nbytes, stream) -> nil
2002 | *
2003 | * Copy _nbytes_ from device memory at _src_devptr_ to host memory at _dst_mem_ in _stream_ asynchronously.
2004 | *
2005 | * Note: The _dst_mem_ should be *page-locked* memory.
2006 | */
2007 | static VALUE memcpy_dtoh_async(VALUE self, VALUE rb_memory, VALUE rb_device_ptr, VALUE nbytes, VALUE rb_stream)
2008 | {
2009 | MemoryPointer* pmem;
2010 | CUdeviceptr* pdevice_ptr;
2011 | CUstream* pstream;
2012 | CUstream stream0 = 0;
2013 | Data_Get_Struct(rb_device_ptr, CUdeviceptr, pdevice_ptr);
2014 | Data_Get_Struct(rb_memory, MemoryPointer, pmem);
2015 | if (CLASS_OF(rb_stream) == rb_cCUStream) {
2016 | Data_Get_Struct(rb_stream, CUstream, pstream);
2017 | } else {
2018 | pstream = &stream0;
2019 | }
2020 | CUresult status = cuMemcpyDtoHAsync(static_cast(pmem->p), *pdevice_ptr, NUM2UINT(nbytes), *pstream);
2021 | if (status != CUDA_SUCCESS) {
2022 | RAISE_CU_STD_ERROR(status, "Failed to copy memory asynchronously from device to host.");
2023 | }
2024 | return Qnil;
2025 | }
2026 |
2027 | /* call-seq: memcpy_dtod(dst_devptr, src_devptr, nbytes) -> nil
2028 | *
2029 | * Copy _nbytes_ from device memory at _src_devptr_ to device memory at _dst_devptr_ asynchronously.
2030 | */
2031 | static VALUE memcpy_dtod(VALUE self, VALUE rb_device_ptr_dst, VALUE rb_device_ptr_src, VALUE nbytes)
2032 | {
2033 | CUdeviceptr* dst;
2034 | CUdeviceptr* src;
2035 | Data_Get_Struct(rb_device_ptr_dst, CUdeviceptr, dst);
2036 | Data_Get_Struct(rb_device_ptr_src, CUdeviceptr, src);
2037 | CUresult status = cuMemcpyDtoD(*dst, *src, NUM2UINT(nbytes));
2038 | if (status != CUDA_SUCCESS) {
2039 | RAISE_CU_STD_ERROR(status, "Failed to copy memory from device to device.");
2040 | }
2041 | return Qnil;
2042 | }
2043 |
2044 | /* call-seq: memcpy_dtod_async(dst_devptr, src_devptr, nbytes, stream) -> nil
2045 | *
2046 | * Copy _nbytes_ from device memory at _src_devptr_ to device memory at _dst_devptr_ in _stream_ asynchronously.
2047 | */
2048 | static VALUE memcpy_dtod_async(VALUE self, VALUE rb_device_ptr_dst, VALUE rb_device_ptr_src, VALUE nbytes, VALUE rb_stream)
2049 | {
2050 | CUdeviceptr* dst;
2051 | CUdeviceptr* src;
2052 | CUstream *pstream;
2053 | CUstream stream0 = 0;
2054 | Data_Get_Struct(rb_device_ptr_dst, CUdeviceptr, dst);
2055 | Data_Get_Struct(rb_device_ptr_src, CUdeviceptr, src);
2056 | if (CLASS_OF(rb_stream) == rb_cCUStream) {
2057 | Data_Get_Struct(rb_stream, CUstream, pstream);
2058 | } else {
2059 | pstream = &stream0;
2060 | }
2061 | CUresult status = cuMemcpyDtoDAsync(*dst, *src, NUM2UINT(nbytes), *pstream);
2062 | if (status != CUDA_SUCCESS) {
2063 | RAISE_CU_STD_ERROR(status, "Failed to copy memory asynchronously from device to device.");
2064 | }
2065 | return Qnil;
2066 | }
2067 |
2068 | /* call-seq: mem_get_info -> Hash { free:, total: }
2069 | *
2070 | * Return a hash { free:, total: } with the amount of free and total device memory in bytes.
2071 | */
2072 | static VALUE mem_get_info(VALUE self)
2073 | {
2074 | size_t free_memory;
2075 | size_t total_memory;
2076 | CUresult status = cuMemGetInfo(&free_memory, &total_memory);
2077 | if (status != CUDA_SUCCESS) {
2078 | RAISE_CU_STD_ERROR(status, "Failed to get memory information.");
2079 | }
2080 | VALUE h = rb_hash_new();
2081 | rb_hash_aset(h, ID2SYM(rb_intern("free")), UINT2NUM(free_memory));
2082 | rb_hash_aset(h, ID2SYM(rb_intern("total")), UINT2NUM(total_memory));
2083 | return h;
2084 | }
2085 |
2086 | // }}}
2087 |
2088 |
2089 | // {{{ Driver
2090 |
2091 | /* call-seq: driver_get_version -> Fixnum
2092 | *
2093 | * Return the version number of the installed CUDA driver.
2094 | */
2095 | static VALUE driver_get_version()
2096 | {
2097 | int v;
2098 | cuDriverGetVersion(&v);
2099 | return INT2FIX(v);
2100 | }
2101 |
2102 | // }}}
2103 |
2104 |
2105 | // {{{ Doc
2106 |
2107 | /* Document-class: SGC::CU::MemoryBuffer
2108 | * See IBuffer and IBuffer::ClassMethods.
2109 | *
2110 | * Note: ELEMENT_SIZE is *deprecated*. Use MemoryBuffer.element_size.
2111 | */
2112 |
2113 | /* Document-class: SGC::CU::Int32Buffer
2114 | * See IBuffer and IBuffer::ClassMethods.
2115 | *
2116 | * Note: ELEMENT_SIZE is *deprecated*. Use Int32Buffer.element_size.
2117 | */
2118 |
2119 | /* Document-class: SGC::CU::Int64Buffer
2120 | * See IBuffer and IBuffer::ClassMethods.
2121 | *
2122 | * Note: ELEMENT_SIZE is *deprecated*. Use Int64Buffer.element_size.
2123 | */
2124 |
2125 | /* Document-class: SGC::CU::Float32Buffer
2126 | * See IBuffer and IBuffer::ClassMethods.
2127 | *
2128 | * Note: ELEMENT_SIZE is *deprecated*. Use Float32Buffer.element_size.
2129 | */
2130 |
2131 | /* Document-class: SGC::CU::Float64Buffer
2132 | * See IBuffer and IBuffer::ClassMethods.
2133 | *
2134 | * Note: ELEMENT_SIZE is *deprecated*. Use Float64Buffer.element_size.
2135 | */
2136 |
2137 | // }}}
2138 |
2139 |
2140 | extern "C" void Init_rubycu()
2141 | {
2142 | rb_mSGC = rb_define_module("SGC");
2143 | rb_mCU = rb_define_module_under(rb_mSGC, "CU");
2144 |
2145 | rb_cCUDevice = rb_define_class_under(rb_mCU, "CUDevice", rb_cObject);
2146 | rb_define_singleton_method(rb_cCUDevice, "get_count", RUBY_METHOD_FUNC(device_get_count), 0);
2147 | rb_define_singleton_method(rb_cCUDevice, "get", RUBY_METHOD_FUNC(device_get), 1);
2148 | rb_define_alloc_func(rb_cCUDevice, device_alloc);
2149 | rb_define_method(rb_cCUDevice, "initialize", RUBY_METHOD_FUNC(device_initialize), -1);
2150 | rb_define_method(rb_cCUDevice, "get_name", RUBY_METHOD_FUNC(device_get_name), 0);
2151 | rb_define_method(rb_cCUDevice, "compute_capability", RUBY_METHOD_FUNC(device_compute_capability), 0);
2152 | rb_define_method(rb_cCUDevice, "get_attribute", RUBY_METHOD_FUNC(device_get_attribute), 1);
2153 | rb_define_method(rb_cCUDevice, "get_properties", RUBY_METHOD_FUNC(device_get_properties), 0);
2154 | rb_define_method(rb_cCUDevice, "total_mem", RUBY_METHOD_FUNC(device_total_mem), 0);
2155 |
2156 | rb_cCUComputeMode = rb_define_class_under(rb_mCU, "CUComputeMode", rb_cObject);
2157 | rb_define_const(rb_cCUComputeMode, "DEFAULT", INT2FIX(CU_COMPUTEMODE_DEFAULT));
2158 | rb_define_const(rb_cCUComputeMode, "EXCLUSIVE", INT2FIX(CU_COMPUTEMODE_EXCLUSIVE));
2159 | rb_define_const(rb_cCUComputeMode, "PROHIBITED", INT2FIX(CU_COMPUTEMODE_PROHIBITED));
2160 |
2161 | rb_cCUDeviceAttribute = rb_define_class_under(rb_mCU, "CUDeviceAttribute", rb_cObject);
2162 | rb_define_const(rb_cCUDeviceAttribute, "MAX_THREADS_PER_BLOCK", INT2FIX(CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK));
2163 | rb_define_const(rb_cCUDeviceAttribute, "MAX_BLOCK_DIM_X", INT2FIX(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X));
2164 | rb_define_const(rb_cCUDeviceAttribute, "MAX_BLOCK_DIM_Y", INT2FIX(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y));
2165 | rb_define_const(rb_cCUDeviceAttribute, "MAX_BLOCK_DIM_Z", INT2FIX(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z));
2166 | rb_define_const(rb_cCUDeviceAttribute, "MAX_GRID_DIM_X", INT2FIX(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X));
2167 | rb_define_const(rb_cCUDeviceAttribute, "MAX_GRID_DIM_Y", INT2FIX(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y));
2168 | rb_define_const(rb_cCUDeviceAttribute, "MAX_GRID_DIM_Z", INT2FIX(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z));
2169 | rb_define_const(rb_cCUDeviceAttribute, "MAX_REGISTERS_PER_BLOCK", INT2FIX(CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK));
2170 | rb_define_const(rb_cCUDeviceAttribute, "MAX_SHARED_MEMORY_PER_BLOCK", INT2FIX(CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK));
2171 | rb_define_const(rb_cCUDeviceAttribute, "TOTAL_CONSTANT_MEMORY", INT2FIX(CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY));
2172 | rb_define_const(rb_cCUDeviceAttribute, "WARP_SIZE", INT2FIX(CU_DEVICE_ATTRIBUTE_WARP_SIZE));
2173 | rb_define_const(rb_cCUDeviceAttribute, "MAX_PITCH", INT2FIX(CU_DEVICE_ATTRIBUTE_MAX_PITCH));
2174 | rb_define_const(rb_cCUDeviceAttribute, "CLOCK_RATE", INT2FIX(CU_DEVICE_ATTRIBUTE_CLOCK_RATE));
2175 | rb_define_const(rb_cCUDeviceAttribute, "TEXTURE_ALIGNMENT", INT2FIX(CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT));
2176 | rb_define_const(rb_cCUDeviceAttribute, "GPU_OVERLAP", INT2FIX(CU_DEVICE_ATTRIBUTE_GPU_OVERLAP));
2177 | rb_define_const(rb_cCUDeviceAttribute, "MULTIPROCESSOR_COUNT", INT2FIX(CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT));
2178 | rb_define_const(rb_cCUDeviceAttribute, "KERNEL_EXEC_TIMEOUT", INT2FIX(CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT));
2179 | rb_define_const(rb_cCUDeviceAttribute, "INTEGRATED", INT2FIX(CU_DEVICE_ATTRIBUTE_INTEGRATED));
2180 | rb_define_const(rb_cCUDeviceAttribute, "CAN_MAP_HOST_MEMORY", INT2FIX(CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY));
2181 | rb_define_const(rb_cCUDeviceAttribute, "COMPUTE_MODE", INT2FIX(CU_DEVICE_ATTRIBUTE_COMPUTE_MODE));
2182 | rb_define_const(rb_cCUDeviceAttribute, "MAXIMUM_TEXTURE1D_WIDTH", INT2FIX(CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH));
2183 | rb_define_const(rb_cCUDeviceAttribute, "MAXIMUM_TEXTURE2D_WIDTH", INT2FIX(CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH));
2184 | rb_define_const(rb_cCUDeviceAttribute, "MAXIMUM_TEXTURE3D_WIDTH", INT2FIX(CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH));
2185 | rb_define_const(rb_cCUDeviceAttribute, "MAXIMUM_TEXTURE2D_HEIGHT", INT2FIX(CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT));
2186 | rb_define_const(rb_cCUDeviceAttribute, "MAXIMUM_TEXTURE3D_HEIGHT", INT2FIX(CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT));
2187 | rb_define_const(rb_cCUDeviceAttribute, "MAXIMUM_TEXTURE3D_DEPTH", INT2FIX(CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH));
2188 | rb_define_const(rb_cCUDeviceAttribute, "MAXIMUM_TEXTURE2D_ARRAY_WIDTH", INT2FIX(CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH));
2189 | rb_define_const(rb_cCUDeviceAttribute, "MAXIMUM_TEXTURE2D_ARRAY_HEIGHT", INT2FIX(CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT));
2190 | rb_define_const(rb_cCUDeviceAttribute, "MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES", INT2FIX(CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES));
2191 | rb_define_const(rb_cCUDeviceAttribute, "SURFACE_ALIGNMENT", INT2FIX(CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT));
2192 | rb_define_const(rb_cCUDeviceAttribute, "CONCURRENT_KERNELS", INT2FIX(CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS));
2193 | rb_define_const(rb_cCUDeviceAttribute, "ECC_ENABLED", INT2FIX(CU_DEVICE_ATTRIBUTE_ECC_ENABLED));
2194 | rb_define_const(rb_cCUDeviceAttribute, "PCI_BUS_ID", INT2FIX(CU_DEVICE_ATTRIBUTE_PCI_BUS_ID));
2195 | rb_define_const(rb_cCUDeviceAttribute, "PCI_DEVICE_ID", INT2FIX(CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID));
2196 | rb_define_const(rb_cCUDeviceAttribute, "TCC_DRIVER", INT2FIX(CU_DEVICE_ATTRIBUTE_TCC_DRIVER));
2197 |
2198 | rb_cCUContext = rb_define_class_under(rb_mCU, "CUContext", rb_cObject);
2199 | rb_define_alloc_func(rb_cCUContext, context_alloc);
2200 | rb_define_method(rb_cCUContext, "initialize", RUBY_METHOD_FUNC(context_initialize), -1);
2201 | rb_define_method(rb_cCUContext, "create", RUBY_METHOD_FUNC(context_create), -1);
2202 | rb_define_method(rb_cCUContext, "destroy", RUBY_METHOD_FUNC(context_destroy), 0);
2203 | rb_define_method(rb_cCUContext, "attach", RUBY_METHOD_FUNC(context_attach), -1);
2204 | rb_define_method(rb_cCUContext, "detach", RUBY_METHOD_FUNC(context_detach), 0);
2205 | rb_define_method(rb_cCUContext, "push_current", RUBY_METHOD_FUNC(context_push_current), 0);
2206 | rb_define_method(rb_cCUContext, "get_api_version", RUBY_METHOD_FUNC(context_get_api_version), 0);
2207 | rb_define_singleton_method(rb_cCUContext, "get_device", RUBY_METHOD_FUNC(context_get_device), 0);
2208 | rb_define_singleton_method(rb_cCUContext, "get_limit", RUBY_METHOD_FUNC(context_get_limit), 1);
2209 | rb_define_singleton_method(rb_cCUContext, "set_limit", RUBY_METHOD_FUNC(context_set_limit), 2);
2210 | rb_define_singleton_method(rb_cCUContext, "get_cache_config", RUBY_METHOD_FUNC(context_get_cache_config), 0);
2211 | rb_define_singleton_method(rb_cCUContext, "set_cache_config", RUBY_METHOD_FUNC(context_set_cache_config), 1);
2212 | rb_define_singleton_method(rb_cCUContext, "get_api_version", RUBY_METHOD_FUNC(context_get_api_version_singleton), 0);
2213 | rb_define_singleton_method(rb_cCUContext, "pop_current", RUBY_METHOD_FUNC(context_pop_current), 0);
2214 | rb_define_singleton_method(rb_cCUContext, "synchronize", RUBY_METHOD_FUNC(context_synchronize), 0);
2215 |
2216 | rb_cCUContextFlags = rb_define_class_under(rb_mCU, "CUContextFlags", rb_cObject);
2217 | rb_define_const(rb_cCUContextFlags, "SCHED_AUTO", INT2FIX(CU_CTX_SCHED_AUTO));
2218 | rb_define_const(rb_cCUContextFlags, "SCHED_SPIN", INT2FIX(CU_CTX_SCHED_SPIN));
2219 | rb_define_const(rb_cCUContextFlags, "SCHED_YIELD", INT2FIX(CU_CTX_SCHED_YIELD));
2220 | rb_define_const(rb_cCUContextFlags, "BLOCKING_SYNC", INT2FIX(CU_CTX_BLOCKING_SYNC));
2221 | rb_define_const(rb_cCUContextFlags, "MAP_HOST", INT2FIX(CU_CTX_MAP_HOST));
2222 | rb_define_const(rb_cCUContextFlags, "LMEM_RESIZE_TO_MAX", INT2FIX(CU_CTX_LMEM_RESIZE_TO_MAX));
2223 |
2224 | rb_cCULimit = rb_define_class_under(rb_mCU, "CULimit", rb_cObject);
2225 | rb_define_const(rb_cCULimit, "STACK_SIZE", INT2FIX(CU_LIMIT_STACK_SIZE));
2226 | rb_define_const(rb_cCULimit, "PRINTF_FIFO_SIZE", INT2FIX(CU_LIMIT_PRINTF_FIFO_SIZE));
2227 | rb_define_const(rb_cCULimit, "MALLOC_HEAP_SIZE", INT2FIX(CU_LIMIT_MALLOC_HEAP_SIZE));
2228 |
2229 | rb_cCUModule = rb_define_class_under(rb_mCU, "CUModule", rb_cObject);
2230 | rb_define_alloc_func(rb_cCUModule, module_alloc);
2231 | rb_define_method(rb_cCUModule, "initialize", RUBY_METHOD_FUNC(module_initialize), -1);
2232 | rb_define_method(rb_cCUModule, "load", RUBY_METHOD_FUNC(module_load), 1);
2233 | rb_define_method(rb_cCUModule, "load_data", RUBY_METHOD_FUNC(module_load_data), 1);
2234 | rb_define_method(rb_cCUModule, "unload", RUBY_METHOD_FUNC(module_unload), 0);
2235 | rb_define_method(rb_cCUModule, "get_function", RUBY_METHOD_FUNC(module_get_function), 1);
2236 | rb_define_method(rb_cCUModule, "get_global", RUBY_METHOD_FUNC(module_get_global), 1);
2237 | rb_define_method(rb_cCUModule, "get_texref", RUBY_METHOD_FUNC(module_get_texref), 1);
2238 |
2239 | rb_cCUDevicePtr = rb_define_class_under(rb_mCU, "CUDevicePtr", rb_cObject);
2240 | rb_define_alloc_func(rb_cCUDevicePtr, device_ptr_alloc);
2241 | rb_define_method(rb_cCUDevicePtr, "initialize", RUBY_METHOD_FUNC(device_ptr_initialize), -1);
2242 | rb_define_method(rb_cCUDevicePtr, "offset", RUBY_METHOD_FUNC(device_ptr_offset), 1);
2243 | rb_define_method(rb_cCUDevicePtr, "mem_alloc", RUBY_METHOD_FUNC(device_ptr_mem_alloc), 1);
2244 | rb_define_method(rb_cCUDevicePtr, "mem_free", RUBY_METHOD_FUNC(device_ptr_mem_free), 0);
2245 |
2246 | rb_cCUFunction = rb_define_class_under(rb_mCU, "CUFunction", rb_cObject);
2247 | rb_define_alloc_func(rb_cCUFunction, function_alloc);
2248 | rb_define_method(rb_cCUFunction, "initialize", RUBY_METHOD_FUNC(function_initialize), -1);
2249 | rb_define_method(rb_cCUFunction, "set_param", RUBY_METHOD_FUNC(function_set_param), -1);
2250 | rb_define_method(rb_cCUFunction, "set_texref", RUBY_METHOD_FUNC(function_set_texref), 1);
2251 | rb_define_method(rb_cCUFunction, "set_block_shape", RUBY_METHOD_FUNC(function_set_block_shape), -1);
2252 | rb_define_method(rb_cCUFunction, "set_shared_size", RUBY_METHOD_FUNC(function_set_shared_size), 1);
2253 | rb_define_method(rb_cCUFunction, "launch", RUBY_METHOD_FUNC(function_launch), 0);
2254 | rb_define_method(rb_cCUFunction, "launch_grid", RUBY_METHOD_FUNC(function_launch_grid), -1);
2255 | rb_define_method(rb_cCUFunction, "launch_grid_async", RUBY_METHOD_FUNC(function_launch_grid_async), -1);
2256 | rb_define_method(rb_cCUFunction, "get_attribute", RUBY_METHOD_FUNC(function_get_attribute), 1);
2257 | rb_define_method(rb_cCUFunction, "set_cache_config", RUBY_METHOD_FUNC(function_set_cache_config), 1);
2258 |
2259 | rb_cCUFunctionAttribute = rb_define_class_under(rb_mCU, "CUFunctionAttribute", rb_cObject);
2260 | rb_define_const(rb_cCUFunctionAttribute, "MAX_THREADS_PER_BLOCK", INT2FIX(CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK));
2261 | rb_define_const(rb_cCUFunctionAttribute, "SHARED_SIZE_BYTES", INT2FIX(CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES));
2262 | rb_define_const(rb_cCUFunctionAttribute, "CONST_SIZE_BYTES", INT2FIX(CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES));
2263 | rb_define_const(rb_cCUFunctionAttribute, "LOCAL_SIZE_BYTES", INT2FIX(CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES));
2264 | rb_define_const(rb_cCUFunctionAttribute, "NUM_REGS", INT2FIX(CU_FUNC_ATTRIBUTE_NUM_REGS));
2265 | rb_define_const(rb_cCUFunctionAttribute, "PTX_VERSION", INT2FIX(CU_FUNC_ATTRIBUTE_PTX_VERSION));
2266 | rb_define_const(rb_cCUFunctionAttribute, "BINARY_VERSION", INT2FIX(CU_FUNC_ATTRIBUTE_BINARY_VERSION));
2267 |
2268 | rb_cCUFunctionCache = rb_define_class_under(rb_mCU, "CUFunctionCache", rb_cObject);
2269 | rb_define_const(rb_cCUFunctionCache, "PREFER_NONE", INT2FIX(CU_FUNC_CACHE_PREFER_NONE));
2270 | rb_define_const(rb_cCUFunctionCache, "PREFER_SHARED", INT2FIX(CU_FUNC_CACHE_PREFER_SHARED));
2271 | rb_define_const(rb_cCUFunctionCache, "PREFER_L1", INT2FIX(CU_FUNC_CACHE_PREFER_L1));
2272 |
2273 | rb_cCUStream = rb_define_class_under(rb_mCU, "CUStream", rb_cObject);
2274 | rb_define_alloc_func(rb_cCUStream, stream_alloc);
2275 | rb_define_method(rb_cCUStream, "initialize", RUBY_METHOD_FUNC(stream_initialize), 0);
2276 | rb_define_method(rb_cCUStream, "create", RUBY_METHOD_FUNC(stream_create), -1);
2277 | rb_define_method(rb_cCUStream, "destroy", RUBY_METHOD_FUNC(stream_destroy), 0);
2278 | rb_define_method(rb_cCUStream, "query", RUBY_METHOD_FUNC(stream_query), 0);
2279 | rb_define_method(rb_cCUStream, "synchronize", RUBY_METHOD_FUNC(stream_synchronize), 0);
2280 | rb_define_method(rb_cCUStream, "wait_event", RUBY_METHOD_FUNC(stream_wait_event), -1);
2281 | rb_define_singleton_method(rb_cCUStream, "wait_event", RUBY_METHOD_FUNC(stream_wait_event_singleton), -1);
2282 |
2283 | rb_cCUEvent = rb_define_class_under(rb_mCU, "CUEvent", rb_cObject);
2284 | rb_define_alloc_func(rb_cCUEvent, event_alloc);
2285 | rb_define_method(rb_cCUEvent, "initialize", RUBY_METHOD_FUNC(event_initialize), 0);
2286 | rb_define_method(rb_cCUEvent, "create", RUBY_METHOD_FUNC(event_create), -1);
2287 | rb_define_method(rb_cCUEvent, "destroy", RUBY_METHOD_FUNC(event_destroy), 0);
2288 | rb_define_method(rb_cCUEvent, "query", RUBY_METHOD_FUNC(event_query), 0);
2289 | rb_define_method(rb_cCUEvent, "record", RUBY_METHOD_FUNC(event_record), 1);
2290 | rb_define_method(rb_cCUEvent, "synchronize", RUBY_METHOD_FUNC(event_synchronize), 0);
2291 | rb_define_singleton_method(rb_cCUEvent, "elapsed_time", RUBY_METHOD_FUNC(event_elapsed_time), 2);
2292 |
2293 | rb_cCUEventFlags = rb_define_class_under(rb_mCU, "CUEventFlags", rb_cObject);
2294 | rb_define_const(rb_cCUEventFlags, "DEFAULT", INT2FIX(CU_EVENT_DEFAULT));
2295 | rb_define_const(rb_cCUEventFlags, "BLOCKING_SYNC", INT2FIX(CU_EVENT_BLOCKING_SYNC));
2296 | rb_define_const(rb_cCUEventFlags, "DISABLE_TIMING", INT2FIX(CU_EVENT_DISABLE_TIMING));
2297 |
2298 | rb_cCUAddressMode = rb_define_class_under(rb_mCU, "CUAddressMode", rb_cObject);
2299 | rb_define_const(rb_cCUAddressMode, "WRAP", INT2FIX(CU_TR_ADDRESS_MODE_WRAP));
2300 | rb_define_const(rb_cCUAddressMode, "CLAMP", INT2FIX(CU_TR_ADDRESS_MODE_CLAMP));
2301 | rb_define_const(rb_cCUAddressMode, "MIRROR", INT2FIX(CU_TR_ADDRESS_MODE_MIRROR));
2302 | rb_define_const(rb_cCUAddressMode, "BORDER", INT2FIX(CU_TR_ADDRESS_MODE_BORDER));
2303 |
2304 | rb_cCUFilterMode = rb_define_class_under(rb_mCU, "CUFilterMode", rb_cObject);
2305 | rb_define_const(rb_cCUFilterMode, "POINT", INT2FIX(CU_TR_FILTER_MODE_POINT));
2306 | rb_define_const(rb_cCUFilterMode, "LINEAR", INT2FIX(CU_TR_FILTER_MODE_LINEAR));
2307 |
2308 | rb_cCUTexRefFlags = rb_define_class_under(rb_mCU, "CUTexRefFlags", rb_cObject);
2309 | rb_define_const(rb_cCUTexRefFlags, "READ_AS_INTEGER", INT2FIX(CU_TRSF_READ_AS_INTEGER));
2310 | rb_define_const(rb_cCUTexRefFlags, "NORMALIZED_COORDINATES", INT2FIX(CU_TRSF_NORMALIZED_COORDINATES));
2311 |
2312 | rb_cCUTexRef = rb_define_class_under(rb_mCU, "CUTexRef", rb_cObject);
2313 | rb_define_alloc_func(rb_cCUTexRef, texref_alloc);
2314 | rb_define_method(rb_cCUTexRef, "initialize", RUBY_METHOD_FUNC(texref_initialize), 0);
2315 | rb_define_method(rb_cCUTexRef, "create", RUBY_METHOD_FUNC(texref_create), 0);
2316 | rb_define_method(rb_cCUTexRef, "destroy", RUBY_METHOD_FUNC(texref_destroy), 0);
2317 | rb_define_method(rb_cCUTexRef, "get_address", RUBY_METHOD_FUNC(texref_get_address), 0);
2318 | rb_define_method(rb_cCUTexRef, "get_address_mode", RUBY_METHOD_FUNC(texref_get_address_mode), 1);
2319 | rb_define_method(rb_cCUTexRef, "get_filter_mode", RUBY_METHOD_FUNC(texref_get_filter_mode), 0);
2320 | rb_define_method(rb_cCUTexRef, "get_flags", RUBY_METHOD_FUNC(texref_get_flags), 0);
2321 | rb_define_method(rb_cCUTexRef, "set_address", RUBY_METHOD_FUNC(texref_set_address), 2);
2322 | rb_define_method(rb_cCUTexRef, "set_address_mode", RUBY_METHOD_FUNC(texref_set_address_mode), 2);
2323 | rb_define_method(rb_cCUTexRef, "set_filter_mode", RUBY_METHOD_FUNC(texref_set_filter_mode), 1);
2324 | rb_define_method(rb_cCUTexRef, "set_flags", RUBY_METHOD_FUNC(texref_set_flags), 1);
2325 |
2326 | rb_cCUResult = rb_define_class_under(rb_mCU, "CUResult", rb_cObject);
2327 | rb_define_const(rb_cCUResult, "SUCCESS", INT2FIX(CUDA_SUCCESS));
2328 | rb_define_const(rb_cCUResult, "ERROR_INVALID_VALUE", INT2FIX(CUDA_ERROR_INVALID_VALUE));
2329 | rb_define_const(rb_cCUResult, "ERROR_OUT_OF_MEMORY", INT2FIX(CUDA_ERROR_OUT_OF_MEMORY));
2330 | rb_define_const(rb_cCUResult, "ERROR_NOT_INITIALIZED", INT2FIX(CUDA_ERROR_NOT_INITIALIZED));
2331 | rb_define_const(rb_cCUResult, "ERROR_DEINITIALIZED", INT2FIX(CUDA_ERROR_DEINITIALIZED));
2332 | rb_define_const(rb_cCUResult, "ERROR_NO_DEVICE", INT2FIX(CUDA_ERROR_NO_DEVICE));
2333 | rb_define_const(rb_cCUResult, "ERROR_INVALID_DEVICE", INT2FIX(CUDA_ERROR_INVALID_DEVICE));
2334 | rb_define_const(rb_cCUResult, "ERROR_INVALID_IMAGE", INT2FIX(CUDA_ERROR_INVALID_IMAGE));
2335 | rb_define_const(rb_cCUResult, "ERROR_INVALID_CONTEXT", INT2FIX(CUDA_ERROR_INVALID_CONTEXT));
2336 | rb_define_const(rb_cCUResult, "ERROR_CONTEXT_ALREADY_CURRENT", INT2FIX(CUDA_ERROR_CONTEXT_ALREADY_CURRENT));
2337 | rb_define_const(rb_cCUResult, "ERROR_MAP_FAILED", INT2FIX(CUDA_ERROR_MAP_FAILED));
2338 | rb_define_const(rb_cCUResult, "ERROR_UNMAP_FAILED", INT2FIX(CUDA_ERROR_UNMAP_FAILED));
2339 | rb_define_const(rb_cCUResult, "ERROR_ARRAY_IS_MAPPED", INT2FIX(CUDA_ERROR_ARRAY_IS_MAPPED));
2340 | rb_define_const(rb_cCUResult, "ERROR_ALREADY_MAPPED", INT2FIX(CUDA_ERROR_ALREADY_MAPPED));
2341 | rb_define_const(rb_cCUResult, "ERROR_NO_BINARY_FOR_GPU", INT2FIX(CUDA_ERROR_NO_BINARY_FOR_GPU));
2342 | rb_define_const(rb_cCUResult, "ERROR_ALREADY_ACQUIRED", INT2FIX(CUDA_ERROR_ALREADY_ACQUIRED));
2343 | rb_define_const(rb_cCUResult, "ERROR_NOT_MAPPED", INT2FIX(CUDA_ERROR_NOT_MAPPED));
2344 | rb_define_const(rb_cCUResult, "ERROR_NOT_MAPPED_AS_ARRAY", INT2FIX(CUDA_ERROR_NOT_MAPPED_AS_ARRAY));
2345 | rb_define_const(rb_cCUResult, "ERROR_NOT_MAPPED_AS_POINTER", INT2FIX(CUDA_ERROR_NOT_MAPPED_AS_POINTER));
2346 | rb_define_const(rb_cCUResult, "ERROR_ECC_UNCORRECTABLE", INT2FIX(CUDA_ERROR_ECC_UNCORRECTABLE));
2347 | rb_define_const(rb_cCUResult, "ERROR_UNSUPPORTED_LIMIT", INT2FIX(CUDA_ERROR_UNSUPPORTED_LIMIT));
2348 | rb_define_const(rb_cCUResult, "ERROR_INVALID_SOURCE", INT2FIX(CUDA_ERROR_INVALID_SOURCE));
2349 | rb_define_const(rb_cCUResult, "ERROR_FILE_NOT_FOUND", INT2FIX(CUDA_ERROR_FILE_NOT_FOUND));
2350 | rb_define_const(rb_cCUResult, "ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND", INT2FIX(CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND));
2351 | rb_define_const(rb_cCUResult, "ERROR_SHARED_OBJECT_INIT_FAILED", INT2FIX(CUDA_ERROR_SHARED_OBJECT_INIT_FAILED));
2352 | rb_define_const(rb_cCUResult, "ERROR_OPERATING_SYSTEM", INT2FIX(CUDA_ERROR_OPERATING_SYSTEM));
2353 | rb_define_const(rb_cCUResult, "ERROR_INVALID_HANDLE", INT2FIX(CUDA_ERROR_INVALID_HANDLE));
2354 | rb_define_const(rb_cCUResult, "ERROR_NOT_FOUND", INT2FIX(CUDA_ERROR_NOT_FOUND));
2355 | rb_define_const(rb_cCUResult, "ERROR_NOT_READY", INT2FIX(CUDA_ERROR_NOT_READY));
2356 | rb_define_const(rb_cCUResult, "ERROR_LAUNCH_FAILED", INT2FIX(CUDA_ERROR_LAUNCH_FAILED));
2357 | rb_define_const(rb_cCUResult, "ERROR_LAUNCH_OUT_OF_RESOURCES", INT2FIX(CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES));
2358 | rb_define_const(rb_cCUResult, "ERROR_LAUNCH_TIMEOUT", INT2FIX(CUDA_ERROR_LAUNCH_TIMEOUT));
2359 | rb_define_const(rb_cCUResult, "ERROR_LAUNCH_INCOMPATIBLE_TEXTURING" , INT2FIX(CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING));
2360 | rb_define_const(rb_cCUResult, "ERROR_UNKNOWN", INT2FIX(CUDA_ERROR_UNKNOWN));
2361 |
2362 | rb_eCUStandardError = rb_define_class_under(rb_mCU, "CUStandardError", rb_eStandardError);
2363 |
2364 | rb_eCUDeviceError = rb_define_class_under(rb_mCU, "CUDeviceError", rb_eCUStandardError);
2365 | rb_eCUDeviceNotInitializedError = rb_define_class_under(rb_mCU, "CUDeviceNotInitializedError", rb_eCUDeviceError);
2366 | rb_eCUDeviceDeinitializedError = rb_define_class_under(rb_mCU, "CUDeviceDeinitializedError", rb_eCUDeviceError);
2367 | rb_eCUNoDeviceError = rb_define_class_under(rb_mCU, "CUNoDeviceError", rb_eCUDeviceError);
2368 | rb_eCUInvalidDeviceError = rb_define_class_under(rb_mCU, "CUInvalidDeviceError", rb_eCUDeviceError);
2369 |
2370 | rb_eCUMapError = rb_define_class_under(rb_mCU, "CUMapError", rb_eCUStandardError);
2371 | rb_eCUMapFailedError = rb_define_class_under(rb_mCU, "CUMapFailedError", rb_eCUMapError);
2372 | rb_eCUUnMapFailedError = rb_define_class_under(rb_mCU, "CUUnMapFailedError", rb_eCUMapError);
2373 | rb_eCUArrayIsMappedError = rb_define_class_under(rb_mCU, "CUArrayIsMappedError", rb_eCUMapError);
2374 | rb_eCUAlreadyMappedError = rb_define_class_under(rb_mCU, "CUAlreadyMappedError", rb_eCUMapError);
2375 | rb_eCUNotMappedError = rb_define_class_under(rb_mCU, "CUNotMappedError", rb_eCUMapError);
2376 | rb_eCUNotMappedAsArrayError = rb_define_class_under(rb_mCU, "CUNotMappedAsArrayError", rb_eCUMapError);
2377 | rb_eCUNotMappedAsPointerError = rb_define_class_under(rb_mCU, "CUNotMappedAsPointerError", rb_eCUMapError);
2378 |
2379 | rb_eCUContextError = rb_define_class_under(rb_mCU, "CUContextError", rb_eCUStandardError);
2380 | rb_eCUInvalidContextError = rb_define_class_under(rb_mCU, "CUInvalidContextError", rb_eCUContextError);
2381 | rb_eCUContextAlreadyCurrentError = rb_define_class_under(rb_mCU, "CUContextAlreadyCurrentError", rb_eCUContextError);
2382 | rb_eCUUnsupportedLimitError = rb_define_class_under(rb_mCU, "CUUnsupportedLimitError", rb_eCUContextError);
2383 |
2384 | rb_eCULaunchError = rb_define_class_under(rb_mCU, "CULaunchError", rb_eCUStandardError);
2385 | rb_eCULaunchFailedError = rb_define_class_under(rb_mCU, "CULaunchFailedError", rb_eCULaunchError);
2386 | rb_eCULaunchOutOfResourcesError = rb_define_class_under(rb_mCU, "CULaunchOutOfResourcesError", rb_eCULaunchError);
2387 | rb_eCULaunchTimeoutError = rb_define_class_under(rb_mCU, "CULaunchTimeoutError", rb_eCULaunchError);
2388 | rb_eCULaunchIncompatibleTexturingError = rb_define_class_under(rb_mCU, "CULaunchIncompatibleTexturingError", rb_eCULaunchError);
2389 |
2390 | rb_eCUParameterError = rb_define_class_under(rb_mCU, "CUParameterError", rb_eCUStandardError);
2391 | rb_eCUInvalidValueError = rb_define_class_under(rb_mCU, "CUInvalidValueError", rb_eCUParameterError);
2392 | rb_eCUInvalidHandleError = rb_define_class_under(rb_mCU, "CUInvalidHandleError", rb_eCUParameterError);
2393 |
2394 | rb_eCUMemoryError = rb_define_class_under(rb_mCU, "CUMemoryError", rb_eCUStandardError);
2395 | rb_eCUOutOfMemoryError = rb_define_class_under(rb_mCU, "CUOutOfMemoryError", rb_eCUMemoryError);
2396 |
2397 | rb_eCULibraryError = rb_define_class_under(rb_mCU, "CULibraryError", rb_eCUStandardError);
2398 | rb_eCUSharedObjectSymbolNotFoundError = rb_define_class_under(rb_mCU, "CUSharedObjectSymbolNotFoundError", rb_eCULibraryError);
2399 | rb_eCUSharedObjectInitFailedError = rb_define_class_under(rb_mCU, "CUSharedObjectInitFailedError", rb_eCULibraryError);
2400 |
2401 | rb_eCUHardwareError = rb_define_class_under(rb_mCU, "CUHardwareError", rb_eCUStandardError);
2402 | rb_eCUECCUncorrectableError = rb_define_class_under(rb_mCU, "CUECCUncorrectableError", rb_eCUHardwareError);
2403 |
2404 | rb_eCUFileError = rb_define_class_under(rb_mCU, "CUFileError", rb_eCUStandardError);
2405 | rb_eCUNoBinaryForGPUError = rb_define_class_under(rb_mCU, "CUNoBinaryForGPUError", rb_eCUFileError);
2406 | rb_eCUFileNotFoundError = rb_define_class_under(rb_mCU, "CUFileNotFoundError", rb_eCUFileError);
2407 | rb_eCUInvalidSourceError = rb_define_class_under(rb_mCU, "CUInvalidSourceError", rb_eCUFileError);
2408 | rb_eCUInvalidImageError = rb_define_class_under(rb_mCU, "CUInvalidImageError", rb_eCUFileError);
2409 |
2410 | rb_eCUReferenceError = rb_define_class_under(rb_mCU, "CUReferenceError", rb_eCUStandardError);
2411 | rb_eCUReferenceNotFoundError = rb_define_class_under(rb_mCU, "CUReferenceNotFoundError", rb_eCUReferenceError);
2412 |
2413 | rb_eCUOtherError = rb_define_class_under(rb_mCU, "CUOtherError", rb_eCUStandardError);
2414 | rb_eCUAlreadyAcquiredError = rb_define_class_under(rb_mCU, "CUAlreadyAcquiredError", rb_eCUOtherError);
2415 | rb_eCUNotReadyError = rb_define_class_under(rb_mCU, "CUNotReadyError", rb_eCUOtherError);
2416 | rb_eCUOperatingSystemError = rb_define_class_under(rb_mCU, "CUOperatingSystemError", rb_eCUOtherError);
2417 |
2418 | rb_eCUUnknownError = rb_define_class_under(rb_mCU, "CUUnknownError", rb_eCUStandardError);
2419 |
2420 | rb_error_class_by_enum = rb_hash_new();
2421 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_NOT_INITIALIZED), rb_eCUDeviceNotInitializedError);
2422 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_DEINITIALIZED) , rb_eCUDeviceDeinitializedError);
2423 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_NO_DEVICE) , rb_eCUNoDeviceError);
2424 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_INVALID_DEVICE) , rb_eCUInvalidDeviceError);
2425 |
2426 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_MAP_FAILED) , rb_eCUMapFailedError);
2427 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_UNMAP_FAILED) , rb_eCUUnMapFailedError);
2428 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_ARRAY_IS_MAPPED) , rb_eCUArrayIsMappedError);
2429 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_ALREADY_MAPPED) , rb_eCUAlreadyMappedError);
2430 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_NOT_MAPPED) , rb_eCUNotMappedError);
2431 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_NOT_MAPPED_AS_ARRAY) , rb_eCUNotMappedAsArrayError);
2432 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_NOT_MAPPED_AS_POINTER), rb_eCUNotMappedAsPointerError);
2433 |
2434 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_INVALID_CONTEXT) , rb_eCUInvalidContextError);
2435 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_CONTEXT_ALREADY_CURRENT), rb_eCUContextAlreadyCurrentError);
2436 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_UNSUPPORTED_LIMIT) , rb_eCUUnsupportedLimitError);
2437 |
2438 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_LAUNCH_FAILED) , rb_eCULaunchFailedError);
2439 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES) , rb_eCULaunchOutOfResourcesError);
2440 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_LAUNCH_TIMEOUT) , rb_eCULaunchTimeoutError);
2441 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING), rb_eCULaunchIncompatibleTexturingError);
2442 |
2443 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_INVALID_VALUE) , rb_eCUInvalidValueError);
2444 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_INVALID_HANDLE) , rb_eCUInvalidHandleError);
2445 |
2446 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_OUT_OF_MEMORY), rb_eCUOutOfMemoryError);
2447 |
2448 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND), rb_eCUSharedObjectSymbolNotFoundError);
2449 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_SHARED_OBJECT_INIT_FAILED) , rb_eCUSharedObjectInitFailedError);
2450 |
2451 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_ECC_UNCORRECTABLE), rb_eCUECCUncorrectableError);
2452 |
2453 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_NO_BINARY_FOR_GPU), rb_eCUNoBinaryForGPUError);
2454 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_FILE_NOT_FOUND) , rb_eCUFileNotFoundError);
2455 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_INVALID_SOURCE) , rb_eCUInvalidSourceError);
2456 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_INVALID_IMAGE) , rb_eCUInvalidImageError);
2457 |
2458 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_NOT_FOUND), rb_eCUReferenceNotFoundError);
2459 |
2460 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_ALREADY_ACQUIRED), rb_eCUAlreadyAcquiredError);
2461 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_NOT_READY) , rb_eCUNotReadyError);
2462 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_OPERATING_SYSTEM), rb_eCUOperatingSystemError);
2463 |
2464 | rb_hash_aset(rb_error_class_by_enum, INT2FIX(CUDA_ERROR_UNKNOWN), rb_eCUUnknownError);
2465 |
2466 | rb_cMemoryPointer = rb_define_class_under(rb_mCU, "MemoryPointer", rb_cObject);
2467 | rb_define_alloc_func(rb_cMemoryPointer, memory_pointer_alloc);
2468 | rb_define_method(rb_cMemoryPointer, "initialize", RUBY_METHOD_FUNC(memory_pointer_initialize), 0);
2469 |
2470 | rb_mIBuffer = rb_define_module_under(rb_mCU, "IBuffer");
2471 | rb_define_singleton_method(rb_mIBuffer, "included", RUBY_METHOD_FUNC(module_included_classmethods_hook), 1);
2472 | rb_define_method(rb_mIBuffer, "initialize", RUBY_METHOD_FUNC(ibuffer_initialize), -1);
2473 | rb_define_method(rb_mIBuffer, "size", RUBY_METHOD_FUNC(ibuffer_size), 0);
2474 | rb_define_method(rb_mIBuffer, "page_locked?", RUBY_METHOD_FUNC(ibuffer_is_page_locked), 0);
2475 | rb_define_method(rb_mIBuffer, "offset", RUBY_METHOD_FUNC(ibuffer_offset), 1);
2476 | rb_define_method(rb_mIBuffer, "[]", RUBY_METHOD_FUNC(ibuffer_element_get), 1);
2477 | rb_define_method(rb_mIBuffer, "[]=", RUBY_METHOD_FUNC(ibuffer_element_set), 2);
2478 |
2479 | rb_mIBufferClassMethods = rb_define_module_under(rb_mIBuffer, "ClassMethods");
2480 | rb_define_method(rb_mIBufferClassMethods, "element_size", RUBY_METHOD_FUNC(ibuffer_element_size), 0);
2481 |
2482 | rb_cMemoryBuffer = rb_define_class_under(rb_mCU, "MemoryBuffer", rb_cMemoryPointer);
2483 | rb_include_module(rb_cMemoryBuffer, rb_mIBuffer);
2484 | module_included_classmethods_hook(rb_mIBuffer, rb_cMemoryBuffer);
2485 | rb_define_alloc_func(rb_cMemoryBuffer, memory_buffer_alloc);
2486 | rb_define_singleton_method(rb_cMemoryBuffer, "element_size", RUBY_METHOD_FUNC(memory_buffer_element_size), 0);
2487 | rb_define_method(rb_cMemoryBuffer, "initialize", RUBY_METHOD_FUNC(memory_buffer_initialize), -1);
2488 | rb_define_method(rb_cMemoryBuffer, "size", RUBY_METHOD_FUNC(memory_buffer_size), 0);
2489 | rb_define_method(rb_cMemoryBuffer, "page_locked?", RUBY_METHOD_FUNC(memory_buffer_is_page_locked), 0);
2490 | rb_define_method(rb_cMemoryBuffer, "offset", RUBY_METHOD_FUNC(memory_buffer_offset), 1);
2491 | rb_define_method(rb_cMemoryBuffer, "[]", RUBY_METHOD_FUNC(memory_buffer_element_get), 1);
2492 | rb_define_method(rb_cMemoryBuffer, "[]=", RUBY_METHOD_FUNC(memory_buffer_element_set), 2);
2493 |
2494 | rb_cInt32Buffer = rb_define_class_under(rb_mCU, "Int32Buffer", rb_cMemoryBuffer);
2495 | rb_define_alloc_func(rb_cInt32Buffer, buffer_alloc);
2496 | rb_define_const(rb_cInt32Buffer, "ELEMENT_SIZE", INT2FIX(sizeof(int)));
2497 | rb_define_singleton_method(rb_cInt32Buffer, "element_size", RUBY_METHOD_FUNC(static_cast(&buffer_element_size)), 0);
2498 | rb_define_method(rb_cInt32Buffer, "initialize", RUBY_METHOD_FUNC(static_cast(&buffer_initialize)) , -1);
2499 | rb_define_method(rb_cInt32Buffer, "size", RUBY_METHOD_FUNC(static_cast(&buffer_size)), 0);
2500 | rb_define_method(rb_cInt32Buffer, "page_locked?", RUBY_METHOD_FUNC(static_cast(&buffer_is_page_locked)), 0);
2501 | rb_define_method(rb_cInt32Buffer, "offset", RUBY_METHOD_FUNC(static_cast(&buffer_offset)), 1);
2502 | rb_define_method(rb_cInt32Buffer, "[]", RUBY_METHOD_FUNC(static_cast(&buffer_element_get)), 1);
2503 | rb_define_method(rb_cInt32Buffer, "[]=", RUBY_METHOD_FUNC(static_cast(&buffer_element_set)), 2);
2504 |
2505 | rb_cInt64Buffer = rb_define_class_under(rb_mCU, "Int64Buffer", rb_cMemoryBuffer);
2506 | rb_define_alloc_func(rb_cInt64Buffer, buffer_alloc);
2507 | rb_define_const(rb_cInt64Buffer, "ELEMENT_SIZE", INT2FIX(sizeof(long)));
2508 | rb_define_singleton_method(rb_cInt64Buffer, "element_size", RUBY_METHOD_FUNC(static_cast(&buffer_element_size)), 0);
2509 | rb_define_method(rb_cInt64Buffer, "initialize", RUBY_METHOD_FUNC(static_cast(&buffer_initialize)) , -1);
2510 | rb_define_method(rb_cInt64Buffer, "size", RUBY_METHOD_FUNC(static_cast(&buffer_size)), 0);
2511 | rb_define_method(rb_cInt64Buffer, "page_locked?", RUBY_METHOD_FUNC(static_cast(&buffer_is_page_locked)), 0);
2512 | rb_define_method(rb_cInt64Buffer, "offset", RUBY_METHOD_FUNC(static_cast(&buffer_offset)), 1);
2513 | rb_define_method(rb_cInt64Buffer, "[]", RUBY_METHOD_FUNC(static_cast(&buffer_element_get)), 1);
2514 | rb_define_method(rb_cInt64Buffer, "[]=", RUBY_METHOD_FUNC(static_cast(&buffer_element_set)), 2);
2515 |
2516 | rb_cFloat32Buffer = rb_define_class_under(rb_mCU, "Float32Buffer", rb_cMemoryBuffer);
2517 | rb_define_alloc_func(rb_cFloat32Buffer, buffer_alloc);
2518 | rb_define_const(rb_cFloat32Buffer, "ELEMENT_SIZE", INT2FIX(sizeof(float)));
2519 | rb_define_singleton_method(rb_cFloat32Buffer, "element_size", RUBY_METHOD_FUNC(static_cast(&buffer_element_size)), 0);
2520 | rb_define_method(rb_cFloat32Buffer, "initialize", RUBY_METHOD_FUNC(static_cast(&buffer_initialize)) , -1);
2521 | rb_define_method(rb_cFloat32Buffer, "size", RUBY_METHOD_FUNC(static_cast(&buffer_size)), 0);
2522 | rb_define_method(rb_cFloat32Buffer, "page_locked?", RUBY_METHOD_FUNC(static_cast(&buffer_is_page_locked)), 0);
2523 | rb_define_method(rb_cFloat32Buffer, "offset", RUBY_METHOD_FUNC(static_cast(&buffer_offset)), 1);
2524 | rb_define_method(rb_cFloat32Buffer, "[]", RUBY_METHOD_FUNC(static_cast(&buffer_element_get)), 1);
2525 | rb_define_method(rb_cFloat32Buffer, "[]=", RUBY_METHOD_FUNC(static_cast(&buffer_element_set)), 2);
2526 |
2527 | rb_cFloat64Buffer = rb_define_class_under(rb_mCU, "Float64Buffer", rb_cMemoryBuffer);
2528 | rb_define_alloc_func(rb_cFloat64Buffer, buffer_alloc);
2529 | rb_define_const(rb_cFloat64Buffer, "ELEMENT_SIZE", INT2FIX(sizeof(double)));
2530 | rb_define_method(rb_cFloat64Buffer, "initialize", RUBY_METHOD_FUNC(static_cast(&buffer_initialize)) , -1);
2531 | rb_define_singleton_method(rb_cFloat64Buffer, "element_size", RUBY_METHOD_FUNC(static_cast(&buffer_element_size)), 0);
2532 | rb_define_method(rb_cFloat64Buffer, "size", RUBY_METHOD_FUNC(static_cast(&buffer_size)), 0);
2533 | rb_define_method(rb_cFloat64Buffer, "page_locked?", RUBY_METHOD_FUNC(static_cast(&buffer_is_page_locked)), 0);
2534 | rb_define_method(rb_cFloat64Buffer, "offset", RUBY_METHOD_FUNC(static_cast(&buffer_offset)), 1);
2535 | rb_define_method(rb_cFloat64Buffer, "[]", RUBY_METHOD_FUNC(static_cast(&buffer_element_get)), 1);
2536 | rb_define_method(rb_cFloat64Buffer, "[]=", RUBY_METHOD_FUNC(static_cast(&buffer_element_set)), 2);
2537 |
2538 | rb_define_module_function(rb_mCU, "memcpy_htod", RUBY_METHOD_FUNC(memcpy_htod), 3);
2539 | rb_define_module_function(rb_mCU, "memcpy_dtoh", RUBY_METHOD_FUNC(memcpy_dtoh), 3);
2540 | rb_define_module_function(rb_mCU, "memcpy_dtod", RUBY_METHOD_FUNC(memcpy_dtod), 3);
2541 | rb_define_module_function(rb_mCU, "memcpy_htod_async", RUBY_METHOD_FUNC(memcpy_htod_async), 4);
2542 | rb_define_module_function(rb_mCU, "memcpy_dtoh_async", RUBY_METHOD_FUNC(memcpy_dtoh_async), 4);
2543 | rb_define_module_function(rb_mCU, "memcpy_dtod_async", RUBY_METHOD_FUNC(memcpy_dtod_async), 4);
2544 | rb_define_module_function(rb_mCU, "mem_get_info", RUBY_METHOD_FUNC(mem_get_info), 0);
2545 |
2546 | rb_define_module_function(rb_mCU, "driver_get_version", RUBY_METHOD_FUNC(driver_get_version), 0);
2547 |
2548 | CUresult status = cuInit(0);
2549 | if (status != CUDA_SUCCESS) {
2550 | RAISE_CU_STD_ERROR(status, "Failed to initialize the CUDA driver API.");
2551 | }
2552 | }
2553 |
2554 | } // namespace
2555 | } // namespace
2556 |
--------------------------------------------------------------------------------