├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── Makefile ├── README.md ├── build.rs ├── link.sh └── src ├── dot.cpp ├── dot_gpu.cu ├── dot_gpu.h └── main.rs /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | target 3 | .vscode -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "bitflags" 5 | version = "1.0.4" 6 | source = "registry+https://github.com/rust-lang/crates.io-index" 7 | 8 | [[package]] 9 | name = "cc" 10 | version = "1.0.35" 11 | source = "registry+https://github.com/rust-lang/crates.io-index" 12 | 13 | [[package]] 14 | name = "cloudabi" 15 | version = "0.0.3" 16 | source = "registry+https://github.com/rust-lang/crates.io-index" 17 | dependencies = [ 18 | "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", 19 | ] 20 | 21 | [[package]] 22 | name = "fuchsia-cprng" 23 | version = "0.1.1" 24 | source = "registry+https://github.com/rust-lang/crates.io-index" 25 | 26 | [[package]] 27 | name = "libc" 28 | version = "0.2.51" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | 31 | [[package]] 32 | name = "rand" 33 | version = "0.5.6" 34 | source = "registry+https://github.com/rust-lang/crates.io-index" 35 | dependencies = [ 36 | "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", 37 | "fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", 38 | "libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)", 39 | "rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", 40 | "winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", 41 | ] 42 | 43 | [[package]] 44 | name = "rand_core" 45 | version = "0.3.1" 46 | source = "registry+https://github.com/rust-lang/crates.io-index" 47 | dependencies = [ 48 | "rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 49 | ] 50 | 51 | [[package]] 52 | name = "rand_core" 53 | version = "0.4.0" 54 | source = "registry+https://github.com/rust-lang/crates.io-index" 55 | 56 | [[package]] 57 | name = "rust_cuda" 58 | version = "0.1.0" 59 | dependencies = [ 60 | "cc 1.0.35 (registry+https://github.com/rust-lang/crates.io-index)", 61 | "libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)", 62 | "rand 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", 63 | ] 64 | 65 | [[package]] 66 | name = "winapi" 67 | version = "0.3.7" 68 | source = "registry+https://github.com/rust-lang/crates.io-index" 69 | dependencies = [ 70 | "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 71 | "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 72 | ] 73 | 74 | [[package]] 75 | name = "winapi-i686-pc-windows-gnu" 76 | version = "0.4.0" 77 | source = "registry+https://github.com/rust-lang/crates.io-index" 78 | 79 | [[package]] 80 | name = "winapi-x86_64-pc-windows-gnu" 81 | version = "0.4.0" 82 | source = "registry+https://github.com/rust-lang/crates.io-index" 83 | 84 | [metadata] 85 | "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" 86 | "checksum cc 1.0.35 (registry+https://github.com/rust-lang/crates.io-index)" = "5e5f3fee5eeb60324c2781f1e41286bdee933850fff9b3c672587fed5ec58c83" 87 | "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" 88 | "checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" 89 | "checksum libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)" = "bedcc7a809076656486ffe045abeeac163da1b558e963a31e29fbfbeba916917" 90 | "checksum rand 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c618c47cd3ebd209790115ab837de41425723956ad3ce2e6a7f09890947cacb9" 91 | "checksum rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" 92 | "checksum rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0e7a549d590831370895ab7ba4ea0c1b6b011d106b5ff2da6eee112615e6dc0" 93 | "checksum winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "f10e386af2b13e47c89e7236a7a14a086791a2b88ebad6df9bf42040195cf770" 94 | "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 95 | "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 96 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rust_cuda" 3 | version = "0.1.0" 4 | authors = ["MoonL1ght "] 5 | build = "build.rs" 6 | links = "cudart" 7 | 8 | [dependencies] 9 | libc = "0.2" 10 | rand = "0.5.5" 11 | 12 | [build-dependencies] 13 | cc = "1.0" -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CARGO = cargo 2 | 3 | build-dev: 4 | $(CARGO) build 5 | 6 | build: 7 | $(CARGO) build --release 8 | 9 | run: 10 | $(CARGO) run -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Rust Lang + CUDA C + C++ integratoin -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | extern crate cc; 2 | 3 | fn main() { 4 | cc::Build::new() 5 | .cuda(true) 6 | .cpp(true) 7 | .flag("-cudart=shared") 8 | .files(&["./src/dot.cpp", "./src/dot_gpu.cu"]) 9 | .compile("dot.a"); 10 | println!("cargo:rustc-link-search=native=/Developer/NVIDIA/CUDA-10.1/lib"); 11 | println!("cargo:rustc-link-search=/Developer/NVIDIA/CUDA-10.1/lib"); 12 | println!("cargo:rustc-env=LD_LIBRARY_PATH=/Developer/NVIDIA/CUDA-10.1/lib"); 13 | println!("cargo:rustc-link-lib=dylib=cudart"); 14 | } -------------------------------------------------------------------------------- /link.sh: -------------------------------------------------------------------------------- 1 | ln -s /Developer/NVIDIA/CUDA-10.1/lib/* /Users/Alexander/.rustup/toolchains/nightly-x86_64-apple-darwin/lib -------------------------------------------------------------------------------- /src/dot.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "dot_gpu.h" 3 | 4 | using namespace std; 5 | 6 | void display_vector(float *v, size_t N) { 7 | cout << "["; 8 | for (size_t i = 0; i < N; i++) { 9 | cout << v[i]; 10 | if (i != N - 1) { 11 | cout << ", "; 12 | } 13 | } 14 | cout << "]" << endl; 15 | } 16 | 17 | extern "C" { 18 | float dot(float *v1, float *v2, size_t N) { 19 | cout << "Calling gpu dot product" << endl; 20 | cout << "Got two vectors from rust:" << endl; 21 | display_vector(v1, N); 22 | display_vector(v2, N); 23 | float *gpu_res; 24 | float res = 0.0; 25 | gpu_res = gpu_dot(v1, v2, N); 26 | for (int i = 0; i < blocksPerGrid; i++) { 27 | res += gpu_res[i]; 28 | } 29 | free(gpu_res); 30 | return res; 31 | } 32 | } -------------------------------------------------------------------------------- /src/dot_gpu.cu: -------------------------------------------------------------------------------- 1 | #include "dot_gpu.h" 2 | 3 | __global__ void dot__(float *v1, float *v2, float *res, int N) { 4 | __shared__ float cache [threadsPerBlock]; 5 | int tid = threadIdx.x + blockIdx.x * blockDim.x; 6 | int cacheIndex = threadIdx.x; 7 | float temp = 0.0; 8 | while (tid < N) { 9 | temp += v1[tid] * v2[tid]; 10 | tid += blockDim.x * gridDim.x; 11 | } 12 | cache[cacheIndex] = temp; 13 | 14 | __syncthreads(); 15 | 16 | int i = blockDim.x / 2; 17 | while (i != 0) { 18 | if (cacheIndex < i) { 19 | cache[cacheIndex] += cache[cacheIndex + i]; 20 | } 21 | __syncthreads(); 22 | i /= 2; 23 | } 24 | 25 | if (cacheIndex == 0) { 26 | res[blockIdx.x] = cache[0]; 27 | } 28 | } 29 | 30 | float * gpu_dot (float *v1, float *v2, size_t N) { 31 | float *dev_v1, *dev_v2, *dev_res, *res; 32 | res = new float[blocksPerGrid]; 33 | 34 | cudaMalloc((void**)&dev_v1, N * sizeof(float)); 35 | cudaMalloc((void**)&dev_v2, N * sizeof(float)); 36 | cudaMalloc((void**)&dev_res, blocksPerGrid * sizeof(float)); 37 | 38 | cudaMemcpy(dev_v1, v1, N * sizeof(float), cudaMemcpyHostToDevice); 39 | cudaMemcpy(dev_v2, v2, N * sizeof(float), cudaMemcpyHostToDevice); 40 | 41 | dot__<<>>(dev_v1, dev_v2, dev_res, (int)N); 42 | cudaMemcpy(res, dev_res, blocksPerGrid * sizeof(float), cudaMemcpyDeviceToHost); 43 | 44 | cudaFree(dev_v1); 45 | cudaFree(dev_v2); 46 | cudaFree(dev_res); 47 | 48 | return res; 49 | } -------------------------------------------------------------------------------- /src/dot_gpu.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | const int threadsPerBlock = 512; 4 | const int blocksPerGrid = 64; 5 | 6 | float * gpu_dot(float *v1, float *v2, size_t N); -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate libc; 2 | extern crate rand; 3 | 4 | use libc::{c_float, size_t}; 5 | use rand::Rng; 6 | 7 | const VEC_SIZE: usize = 10; 8 | const MAX: f32 = 10.; 9 | const MIN: f32 = 0.; 10 | 11 | 12 | extern "C" { 13 | fn dot(v1: *mut c_float, v2: *mut c_float, N: size_t) -> c_float; 14 | } 15 | 16 | fn cpu_dot(v1: Vec, v2: Vec) -> f32 { 17 | let mut res: f32 = 0.; 18 | for i in 0..v1.len() { 19 | res += v1[i] * v2[i]; 20 | } 21 | return res; 22 | } 23 | 24 | fn main() { 25 | let mut v1: Vec = Vec::new(); 26 | let mut v2: Vec = Vec::new(); 27 | let mut gpu_res: c_float; 28 | let mut cpu_res: f32 = 0.; 29 | 30 | let mut rng = rand::thread_rng(); 31 | for _ in 0..VEC_SIZE { 32 | v1.push(rng.gen_range(MIN, MAX)); 33 | v2.push(rng.gen_range(MIN, MAX)); 34 | } 35 | 36 | println!("{:?}", v1); 37 | println!("{:?}", v2); 38 | 39 | println!("GPU computing started"); 40 | unsafe { 41 | gpu_res = dot(v1.as_mut_ptr(), v2.as_mut_ptr(), VEC_SIZE); 42 | } 43 | println!("GPU computing finished"); 44 | println!("GPU dot product result: {}", gpu_res); 45 | 46 | cpu_res = cpu_dot(v1, v2); 47 | println!("CPU dot product result: {}", cpu_res); 48 | } --------------------------------------------------------------------------------