├── .gitignore ├── Makefile ├── c_src ├── Makefile ├── pcuda_buffer.h ├── pcuda_float_buffer.cpp ├── pcuda_int_buffer.cpp ├── pcuda_ops.cu ├── pcuda_ops.h ├── pcuda_string.cu ├── pcuda_string.h ├── pcuda_string_buffer.cpp └── pteracuda_nifs.cpp ├── rebar ├── rebar.config └── src ├── pteracuda.app.src ├── pteracuda_app.erl ├── pteracuda_bench.erl ├── pteracuda_buffer.erl ├── pteracuda_commands.hrl ├── pteracuda_context.erl ├── pteracuda_demo.erl ├── pteracuda_internals.hrl ├── pteracuda_nifs.erl ├── pteracuda_stress.erl └── pteracuda_sup.erl /.gitignore: -------------------------------------------------------------------------------- 1 | ebin/* 2 | priv/* 3 | .eunit 4 | c_src/*.o 5 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: compile eunit 2 | 3 | compile: 4 | @./rebar compile 5 | 6 | clean: 7 | @./rebar clean 8 | @rm -f c_src/*.o c_src/*flymake.h 9 | 10 | eunit: 11 | @./rebar eunit 12 | -------------------------------------------------------------------------------- /c_src/Makefile: -------------------------------------------------------------------------------- 1 | all: compile 2 | 3 | compile: pcuda_ops.o 4 | 5 | pcuda_ops.o: 6 | nvcc $(NVCCFLAGS) pcuda_ops.cu 7 | 8 | clean: 9 | @nvcc -c -clean *.cu 10 | -------------------------------------------------------------------------------- /c_src/pcuda_buffer.h: -------------------------------------------------------------------------------- 1 | #ifndef PCUDA_BUFFER 2 | #define PCUDA_BUFFER 3 | 4 | #include 5 | #include 6 | 7 | #include "erl_nif.h" 8 | 9 | enum PCudaBufferTypes { 10 | BUF_TYPE_INTEGER, 11 | BUF_TYPE_STRING, 12 | BUF_TYPE_FLOAT 13 | }; 14 | 15 | class PCudaBuffer { 16 | public: 17 | virtual ~PCudaBuffer() { }; 18 | virtual unsigned int size() = 0; 19 | virtual PCudaBufferTypes type() = 0; 20 | virtual bool sort() = 0; 21 | virtual bool contains(ErlNifEnv *env, ERL_NIF_TERM rawTarget) = 0; 22 | virtual void write(ErlNifEnv *env, ERL_NIF_TERM data) = 0; 23 | virtual void delete_at(unsigned long position) = 0; 24 | virtual bool insert_at(unsigned long position, ErlNifEnv *env, ERL_NIF_TERM value) = 0; 25 | virtual void clear() = 0; 26 | virtual bool copy(PCudaBuffer *src) = 0; 27 | virtual ERL_NIF_TERM intersect(ErlNifEnv *env, PCudaBuffer *other) = 0; 28 | virtual ERL_NIF_TERM minmax(ErlNifEnv *env) = 0; 29 | virtual ERL_NIF_TERM toErlTerms(ErlNifEnv *env) = 0; 30 | }; 31 | 32 | class PCudaIntBuffer : public PCudaBuffer { 33 | public: 34 | PCudaIntBuffer(); 35 | virtual ~PCudaIntBuffer(); 36 | virtual unsigned int size(); 37 | virtual PCudaBufferTypes type() { return BUF_TYPE_INTEGER; }; 38 | virtual bool sort(); 39 | virtual bool contains(ErlNifEnv *env, ERL_NIF_TERM rawTarget); 40 | virtual ERL_NIF_TERM toErlTerms(ErlNifEnv *env); 41 | virtual void write(ErlNifEnv *env, ERL_NIF_TERM data); 42 | virtual void delete_at(unsigned long position); 43 | virtual bool insert_at(unsigned long position, ErlNifEnv *env, ERL_NIF_TERM value); 44 | virtual void clear(); 45 | virtual bool copy(PCudaBuffer *src); 46 | virtual ERL_NIF_TERM intersect(ErlNifEnv *env, PCudaBuffer *other); 47 | virtual ERL_NIF_TERM minmax(ErlNifEnv *env); 48 | 49 | protected: 50 | std::vector *data; 51 | }; 52 | 53 | class PCudaFloatBuffer : public PCudaBuffer { 54 | public: 55 | PCudaFloatBuffer(); 56 | virtual ~PCudaFloatBuffer(); 57 | virtual unsigned int size(); 58 | virtual PCudaBufferTypes type() { return BUF_TYPE_FLOAT; }; 59 | virtual bool sort(); 60 | virtual bool contains(ErlNifEnv *env, ERL_NIF_TERM rawTarget); 61 | virtual ERL_NIF_TERM toErlTerms(ErlNifEnv *env); 62 | virtual void write(ErlNifEnv *env, ERL_NIF_TERM data); 63 | virtual void delete_at(unsigned long position); 64 | virtual bool insert_at(unsigned long position, ErlNifEnv *env, ERL_NIF_TERM value); 65 | virtual void clear(); 66 | virtual bool copy(PCudaBuffer *src); 67 | virtual ERL_NIF_TERM intersect(ErlNifEnv *env, PCudaBuffer *other); 68 | virtual ERL_NIF_TERM minmax(ErlNifEnv *env); 69 | 70 | protected: 71 | std::vector *data; 72 | }; 73 | 74 | class PCudaStringBuffer : public PCudaBuffer { 75 | public: 76 | PCudaStringBuffer(); 77 | virtual ~PCudaStringBuffer(); 78 | virtual unsigned int size(); 79 | virtual PCudaBufferTypes type() { return BUF_TYPE_STRING; }; 80 | virtual bool sort(); 81 | virtual bool contains(ErlNifEnv *env, ERL_NIF_TERM rawTarget); 82 | virtual ERL_NIF_TERM toErlTerms(ErlNifEnv *env); 83 | virtual void write(ErlNifEnv *env, ERL_NIF_TERM data); 84 | virtual void delete_at(unsigned long position); 85 | virtual bool insert_at(unsigned long position, ErlNifEnv *env, ERL_NIF_TERM value); 86 | virtual void clear(); 87 | virtual bool copy(PCudaBuffer *src); 88 | virtual ERL_NIF_TERM intersect(ErlNifEnv *env, PCudaBuffer *other); 89 | virtual ERL_NIF_TERM minmax(ErlNifEnv *env) { return enif_make_atom(env, "error"); }; 90 | 91 | protected: 92 | std::vector *data; 93 | }; 94 | #endif 95 | -------------------------------------------------------------------------------- /c_src/pcuda_float_buffer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "pcuda_buffer.h" 3 | #include "pcuda_ops.h" 4 | 5 | PCudaFloatBuffer::PCudaFloatBuffer() { 6 | this->data = new std::vector(); 7 | } 8 | 9 | PCudaFloatBuffer::~PCudaFloatBuffer() { 10 | delete this->data; 11 | } 12 | 13 | unsigned int PCudaFloatBuffer::size() { 14 | return this->data->size(); 15 | } 16 | 17 | void PCudaFloatBuffer::write(ErlNifEnv *env, ERL_NIF_TERM data) { 18 | ERL_NIF_TERM head; 19 | double value; 20 | 21 | while (enif_get_list_cell(env, data, &head, &data)) { 22 | if (enif_get_double(env, head, &value)) { 23 | this->data->push_back(value); 24 | } 25 | } 26 | } 27 | 28 | void PCudaFloatBuffer::delete_at(unsigned long position) { 29 | std::vector::iterator iter = this->data->begin(); 30 | for (unsigned long i = 0; i < position; i++) { 31 | iter++; 32 | } 33 | this->data->erase(iter); 34 | } 35 | 36 | bool PCudaFloatBuffer::insert_at(unsigned long position, ErlNifEnv *env, ERL_NIF_TERM rawValue) { 37 | double value; 38 | if (enif_get_double(env, rawValue, &value)) { 39 | std::vector::iterator iter = this->data->begin(); 40 | for (unsigned long i = 0; i < position; i++) { 41 | iter++; 42 | } 43 | this->data->insert(iter, 1, value); 44 | return true; 45 | } 46 | return false; 47 | } 48 | 49 | bool PCudaFloatBuffer::sort() { 50 | return pcuda_float_sort(this->data); 51 | } 52 | 53 | bool PCudaFloatBuffer::contains(ErlNifEnv *env, ERL_NIF_TERM rawTarget) { 54 | double target; 55 | if (enif_get_double(env, rawTarget, &target)) { 56 | return pcuda_float_binary_search(this->data, target); 57 | } 58 | else { 59 | return false; 60 | } 61 | } 62 | 63 | ERL_NIF_TERM PCudaFloatBuffer::toErlTerms(ErlNifEnv *env) { 64 | std::vector::iterator iter; 65 | ERL_NIF_TERM retval = enif_make_list(env, 0); 66 | if (this->data->size() > 0) { 67 | for (iter = this->data->end(); iter != this->data->begin();) { 68 | --iter; 69 | retval = enif_make_list_cell(env, enif_make_double(env, *iter), retval); 70 | } 71 | } 72 | return retval; 73 | } 74 | 75 | void PCudaFloatBuffer::clear() { 76 | this->data->clear(); 77 | } 78 | 79 | bool PCudaFloatBuffer::copy(PCudaBuffer *src) { 80 | if (src->type() == BUF_TYPE_FLOAT) { 81 | PCudaFloatBuffer *source = (PCudaFloatBuffer *) src; 82 | std::vector::iterator iter; 83 | for (iter = source->data->begin(); iter != source->data->end(); ++iter) { 84 | this->data->push_back(*iter); 85 | } 86 | return true; 87 | } 88 | return false; 89 | } 90 | 91 | ERL_NIF_TERM PCudaFloatBuffer::intersect(ErlNifEnv *env, PCudaBuffer *otherBuffer) { 92 | ERL_NIF_TERM retval = enif_make_list(env, 0); 93 | std::vector intersection; 94 | if (otherBuffer->type() == BUF_TYPE_FLOAT) { 95 | PCudaFloatBuffer *other = (PCudaFloatBuffer *) otherBuffer; 96 | pcuda_float_intersection(this->data, other->data, &intersection); 97 | if (intersection.size() > 0) { 98 | for (std::vector::iterator iter = intersection.end(); iter != intersection.begin();) { 99 | --iter; 100 | retval = enif_make_list_cell(env, enif_make_double(env, *iter), retval); 101 | } 102 | } 103 | } 104 | return retval; 105 | } 106 | 107 | ERL_NIF_TERM PCudaFloatBuffer::minmax(ErlNifEnv *env) { 108 | double minmax[2]; 109 | pcuda_float_minmax(this->data, &minmax[0]); 110 | return enif_make_tuple2(env, enif_make_long(env, minmax[0]), enif_make_long(env, minmax[1])); 111 | } 112 | -------------------------------------------------------------------------------- /c_src/pcuda_int_buffer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "pcuda_buffer.h" 3 | #include "pcuda_ops.h" 4 | 5 | PCudaIntBuffer::PCudaIntBuffer() { 6 | this->data = new std::vector(); 7 | } 8 | 9 | PCudaIntBuffer::~PCudaIntBuffer() { 10 | delete this->data; 11 | } 12 | 13 | unsigned int PCudaIntBuffer::size() { 14 | return this->data->size(); 15 | } 16 | 17 | void PCudaIntBuffer::write(ErlNifEnv *env, ERL_NIF_TERM data) { 18 | ERL_NIF_TERM head; 19 | long value; 20 | 21 | while (enif_get_list_cell(env, data, &head, &data)) { 22 | if (enif_get_long(env, head, &value)) { 23 | this->data->push_back(value); 24 | } 25 | } 26 | } 27 | 28 | void PCudaIntBuffer::delete_at(unsigned long position) { 29 | std::vector::iterator iter = this->data->begin(); 30 | for (unsigned long i = 0; i < position; i++) { 31 | iter++; 32 | } 33 | this->data->erase(iter); 34 | } 35 | 36 | bool PCudaIntBuffer::insert_at(unsigned long position, ErlNifEnv *env, ERL_NIF_TERM rawValue) { 37 | long value; 38 | if (enif_get_long(env, rawValue, &value)) { 39 | std::vector::iterator iter = this->data->begin(); 40 | for (unsigned long i = 0; i < position; i++) { 41 | iter++; 42 | } 43 | this->data->insert(iter, 1, value); 44 | return true; 45 | } 46 | return false; 47 | } 48 | 49 | bool PCudaIntBuffer::sort() { 50 | return pcuda_integer_sort(this->data); 51 | } 52 | 53 | bool PCudaIntBuffer::contains(ErlNifEnv *env, ERL_NIF_TERM rawTarget) { 54 | long target; 55 | if (enif_get_long(env, rawTarget, &target)) { 56 | return pcuda_integer_binary_search(this->data, target); 57 | } 58 | else { 59 | return false; 60 | } 61 | } 62 | 63 | ERL_NIF_TERM PCudaIntBuffer::toErlTerms(ErlNifEnv *env) { 64 | std::vector::iterator iter; 65 | ERL_NIF_TERM retval = enif_make_list(env, 0); 66 | if (this->data->size() > 0) { 67 | for (iter = this->data->end(); iter != this->data->begin();) { 68 | --iter; 69 | retval = enif_make_list_cell(env, enif_make_long(env, *iter), retval); 70 | } 71 | } 72 | return retval; 73 | } 74 | 75 | void PCudaIntBuffer::clear() { 76 | this->data->clear(); 77 | } 78 | 79 | bool PCudaIntBuffer::copy(PCudaBuffer *src) { 80 | if (src->type() == BUF_TYPE_INTEGER) { 81 | PCudaIntBuffer *source = (PCudaIntBuffer *) src; 82 | std::vector::iterator iter; 83 | for (iter = source->data->begin(); iter != source->data->end(); ++iter) { 84 | this->data->push_back(*iter); 85 | } 86 | return true; 87 | } 88 | return false; 89 | } 90 | 91 | ERL_NIF_TERM PCudaIntBuffer::intersect(ErlNifEnv *env, PCudaBuffer *otherBuffer) { 92 | ERL_NIF_TERM retval = enif_make_list(env, 0); 93 | std::vector intersection; 94 | if (otherBuffer->type() == BUF_TYPE_INTEGER) { 95 | PCudaIntBuffer *other = (PCudaIntBuffer *) otherBuffer; 96 | pcuda_integer_intersection(this->data, other->data, &intersection); 97 | if (intersection.size() > 0) { 98 | for (std::vector::iterator iter = intersection.end(); iter != intersection.begin();) { 99 | --iter; 100 | retval = enif_make_list_cell(env, enif_make_long(env, *iter), retval); 101 | } 102 | } 103 | } 104 | return retval; 105 | } 106 | 107 | ERL_NIF_TERM PCudaIntBuffer::minmax(ErlNifEnv *env) { 108 | long minmax[2]; 109 | pcuda_integer_minmax(this->data, &minmax[0]); 110 | return enif_make_tuple2(env, enif_make_long(env, minmax[0]), enif_make_long(env, minmax[1])); 111 | } 112 | -------------------------------------------------------------------------------- /c_src/pcuda_ops.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "cuda.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "pcuda_string.h" 16 | 17 | PCudaString::PCudaString() { 18 | this->len = -1; 19 | this->str = NULL; 20 | } 21 | 22 | PCudaString::PCudaString(const std::string& other) { 23 | this->len = other.length(); 24 | this->ptr = thrust::device_malloc(this->len + 1); 25 | this->str = raw_pointer_cast(this->ptr); 26 | cudaMemcpy(this->str, other.c_str(), this->len, cudaMemcpyHostToDevice); 27 | } 28 | 29 | PCudaString::PCudaString(const PCudaString& other) { 30 | this->len = other.len; 31 | this->str = other.str; 32 | this->ptr = other.ptr; 33 | } 34 | 35 | int PCudaString::length() { 36 | return this->len; 37 | } 38 | 39 | int PCudaString::cstr_length() { 40 | return this->len + 1; 41 | } 42 | 43 | PCudaString::operator std::string() { 44 | std::string retval; 45 | thrust::copy(this->ptr, this->ptr + this->len, back_inserter(retval)); 46 | return retval; 47 | } 48 | 49 | 50 | void PCudaString::destroy() { 51 | if (this->str) { 52 | thrust::device_free(this->ptr); 53 | this->str = NULL; 54 | this->len = -1; 55 | } 56 | } 57 | 58 | bool operator< (PCudaString lhs, PCudaString rhs) { 59 | char *l = lhs.str; 60 | char *r = rhs.str; 61 | while((*l && *r) && *l == *r) { 62 | ++l; 63 | ++r; 64 | } 65 | return *l < *r; 66 | } 67 | 68 | bool pcuda_integer_sort(std::vector *data) { 69 | thrust::device_vector device = *data; 70 | thrust::sort(device.begin(), device.end()); 71 | thrust::copy(device.begin(), device.end(), data->begin()); 72 | return true; 73 | } 74 | 75 | bool pcuda_float_sort(std::vector *data) { 76 | thrust::device_vector device = *data; 77 | thrust::sort(device.begin(), device.end()); 78 | thrust::copy(device.begin(), device.end(), data->begin()); 79 | return true; 80 | } 81 | 82 | bool pcuda_string_sort(std::vector *data) { 83 | printf("In pcuda_string_sort\n"); 84 | thrust::device_vector device; 85 | printf("Reserving memory\n"); 86 | device.reserve(data->size()); 87 | printf("Copying data to device\n"); 88 | for (std::vector::iterator iter = data->begin(); 89 | iter != data->end(); ++iter) { 90 | std::string s = *iter; 91 | device.push_back(s); 92 | } 93 | printf("On-device sort\n"); 94 | thrust::sort(device.begin(), device.end()); 95 | printf("Copying data from device\n"); 96 | thrust::host_vector results = device; 97 | data->clear(); 98 | for (thrust::host_vector::iterator iter = results.begin(); 99 | iter != results.end(); ++iter) { 100 | PCudaString cs = *iter; 101 | std::string s = cs; 102 | cs.destroy(); 103 | data->push_back(s); 104 | } 105 | printf("Done!\n"); 106 | return true; 107 | } 108 | 109 | bool pcuda_integer_binary_search(std::vector *data, long target) { 110 | thrust::device_vector device = *data; 111 | return thrust::binary_search(device.begin(), device.end(), target, thrust::less()); 112 | } 113 | 114 | bool pcuda_float_binary_search(std::vector *data, double target) { 115 | thrust::device_vector device = *data; 116 | return thrust::binary_search(device.begin(), device.end(), target, thrust::less()); 117 | } 118 | 119 | void pcuda_integer_intersection(std::vector *first, std::vector *second, 120 | std::vector *intersection) { 121 | thrust::set_intersection(first->begin(), first->end(), 122 | second->begin(), second->end(), std::back_inserter(*intersection)); 123 | } 124 | 125 | void pcuda_float_intersection(std::vector *first, std::vector *second, 126 | std::vector *intersection) { 127 | thrust::set_intersection(first->begin(), first->end(), 128 | second->begin(), second->end(), std::back_inserter(*intersection)); 129 | } 130 | 131 | void pcuda_integer_minmax(std::vector *data, long *minmax) { 132 | thrust::pair::iterator, 133 | std::vector::iterator> result = thrust::minmax_element(data->begin(), data->end()); 134 | minmax[0] = *result.first; 135 | minmax[1] = *result.second; 136 | } 137 | 138 | void pcuda_float_minmax(std::vector *data, double *minmax) { 139 | thrust::pair::iterator, 140 | std::vector::iterator> result = thrust::minmax_element(data->begin(), data->end()); 141 | minmax[0] = *result.first; 142 | minmax[1] = *result.second; 143 | } 144 | -------------------------------------------------------------------------------- /c_src/pcuda_ops.h: -------------------------------------------------------------------------------- 1 | #ifndef PCUDA_OPS 2 | #define PCUDA_OPS 3 | 4 | #include 5 | #include 6 | 7 | bool pcuda_integer_sort(std::vector *data); 8 | bool pcuda_integer_binary_search(std::vector *data, long target); 9 | void pcuda_integer_intersection(std::vector *first, std::vector *second, std::vector *intersection); 10 | void pcuda_integer_minmax(std::vector *data, long *minmax); 11 | 12 | bool pcuda_float_sort(std::vector *data); 13 | bool pcuda_float_binary_search(std::vector *data, double target); 14 | void pcuda_float_intersection(std::vector *first, std::vector *second, std::vector *intersection); 15 | void pcuda_float_minmax(std::vector *data, double *minmax); 16 | 17 | // Work in progress 18 | bool pcuda_string_sort(std::vector *data); 19 | #endif 20 | -------------------------------------------------------------------------------- /c_src/pcuda_string.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "cuda.h" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "pcuda_string.h" 10 | 11 | PCudaString::PCudaString() { 12 | this->len = -1; 13 | this->str = NULL; 14 | } 15 | 16 | PCudaString::PCudaString(const std::string& other) { 17 | this->len = other.length(); 18 | this->ptr = thrust::device_malloc(this->len + 1); 19 | this->str = raw_pointer_cast(this->ptr); 20 | cudaMemcpy(this->str, other.c_str(), this->len, cudaMemcpyHostToDevice); 21 | } 22 | 23 | inline PCudaString::PCudaString(const PCudaString& other) { 24 | this->len = other.len; 25 | this->str = other.str; 26 | this->ptr = other.ptr; 27 | } 28 | 29 | int PCudaString::length() { 30 | return this->len; 31 | } 32 | 33 | int PCudaString::cstr_length() { 34 | return this->len + 1; 35 | } 36 | 37 | PCudaString::operator std::string() { 38 | std::string retval; 39 | thrust::copy(this->ptr, this->ptr + this->len, back_inserter(retval)); 40 | return retval; 41 | } 42 | 43 | 44 | void PCudaString::destroy() { 45 | if (this->str) { 46 | thrust::device_free(this->ptr); 47 | this->str = NULL; 48 | this->len = -1; 49 | } 50 | } 51 | 52 | bool operator< (PCudaString lhs, PCudaString rhs) { 53 | char *l = lhs.str; 54 | char *r = rhs.str; 55 | while((*l && *r) && *l == *r) { 56 | ++l; 57 | ++r; 58 | } 59 | return *l < *r; 60 | } 61 | -------------------------------------------------------------------------------- /c_src/pcuda_string.h: -------------------------------------------------------------------------------- 1 | #ifndef PCUDA_STRING 2 | #define PCUDA_STRING 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | class PCudaString { 10 | public: 11 | __host__ __device__ PCudaString(); 12 | __host__ PCudaString(const std::string& other); 13 | __host__ __device__ inline PCudaString(const PCudaString& other); 14 | 15 | __host__ int length(); 16 | __host__ int cstr_length(); 17 | // Explicit destructor as the C++ one gets called 18 | // while code is running on the CUDA card 19 | __host__ void destroy(); 20 | 21 | __host__ operator std::string(); 22 | 23 | // Begrudgingly made these public so on-device 24 | // sorting would work 25 | char *str; 26 | thrust::device_ptr ptr; 27 | int len; 28 | }; 29 | 30 | __device__ bool operator< (PCudaString lhs, PCudaString rhs); 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /c_src/pcuda_string_buffer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include "erl_nif.h" 7 | 8 | #include "pcuda_buffer.h" 9 | #include "pcuda_ops.h" 10 | 11 | PCudaStringBuffer::PCudaStringBuffer() { 12 | this->data = new std::vector(); 13 | } 14 | 15 | PCudaStringBuffer::~PCudaStringBuffer() { 16 | delete this->data; 17 | } 18 | 19 | unsigned int PCudaStringBuffer::size() { 20 | return this->data->size(); 21 | } 22 | 23 | bool PCudaStringBuffer::sort() { 24 | return pcuda_string_sort(this->data); 25 | } 26 | 27 | bool PCudaStringBuffer::contains(ErlNifEnv *env, ERL_NIF_TERM rawTarget) { 28 | return false; 29 | } 30 | 31 | ERL_NIF_TERM PCudaStringBuffer::toErlTerms(ErlNifEnv *env) { 32 | ErlNifBinary bin; 33 | std::vector::iterator iter; 34 | ERL_NIF_TERM retval = enif_make_list(env, 0); 35 | if (this->data->size() > 0) { 36 | for (iter = this->data->end(); iter != this->data->begin();) { 37 | --iter; 38 | std::string s = *iter; 39 | if (enif_alloc_binary(s.size(), &bin)) { 40 | memcpy(bin.data, s.data(), bin.size); 41 | retval = enif_make_list_cell(env, enif_make_binary(env, &bin), retval); 42 | } 43 | } 44 | } 45 | return retval; 46 | } 47 | 48 | void PCudaStringBuffer::write(ErlNifEnv *env, ERL_NIF_TERM data) { 49 | ERL_NIF_TERM head; 50 | ErlNifBinary bin; 51 | 52 | while (enif_get_list_cell(env, data, &head, &data)) { 53 | if (enif_inspect_binary(env, head, &bin)) { 54 | std::string s((char *) bin.data, bin.size); 55 | this->data->push_back(s); 56 | } 57 | } 58 | } 59 | 60 | void PCudaStringBuffer::delete_at(unsigned long position) { 61 | std::vector::iterator iter = this->data->begin(); 62 | for (unsigned long i = 0; i < position; i++) { 63 | iter++; 64 | } 65 | this->data->erase(iter); 66 | } 67 | 68 | bool PCudaStringBuffer::insert_at(unsigned long position, ErlNifEnv *env, ERL_NIF_TERM value) { 69 | ErlNifBinary bin; 70 | if (enif_inspect_binary(env, value, &bin)) { 71 | std::vector::iterator iter = this->data->begin(); 72 | for (unsigned long i = 0; i < position; i++) { 73 | iter++; 74 | } 75 | this->data->insert(iter, 1, std::string((char *) bin.data, bin.size)); 76 | return true; 77 | } 78 | return false; 79 | } 80 | 81 | void PCudaStringBuffer::clear() { 82 | this->data->clear(); 83 | } 84 | 85 | bool PCudaStringBuffer::copy(PCudaBuffer *src) { 86 | if (src->type() == BUF_TYPE_STRING) { 87 | PCudaStringBuffer *source = (PCudaStringBuffer *) src; 88 | std::vector::iterator iter; 89 | for (iter = source->data->begin(); iter != source->data->end(); ++iter) { 90 | this->data->push_back(*iter); 91 | } 92 | return true; 93 | } 94 | return false; 95 | } 96 | 97 | ERL_NIF_TERM PCudaStringBuffer::intersect(ErlNifEnv *env, PCudaBuffer *other) { 98 | return enif_make_list(env, 0); 99 | } 100 | -------------------------------------------------------------------------------- /c_src/pteracuda_nifs.cpp: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------- 2 | // 3 | // pteracuda: An Erlang framework for performing CUDA-enabled operations 4 | // 5 | // Copyright (c) 2011 Hypothetical Labs, Inc. All Rights Reserved. 6 | // 7 | // This file is provided to you under the Apache License, 8 | // Version 2.0 (the "License"); you may not use this file 9 | // except in compliance with the License. You may obtain 10 | // a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, 15 | // software distributed under the License is distributed on an 16 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | // KIND, either express or implied. See the License for the 18 | // specific language governing permissions and limitations 19 | // under the License. 20 | // 21 | // ------------------------------------------------------------------- 22 | #include 23 | 24 | #include "cuda.h" 25 | #include "cuda_runtime_api.h" 26 | #include "erl_nif.h" 27 | 28 | #include "pcuda_buffer.h" 29 | 30 | extern "C" { 31 | static int pteracuda_on_load(ErlNifEnv *env, void **priv_data, ERL_NIF_TERM load_info); 32 | 33 | ERL_NIF_TERM pteracuda_nifs_new_context(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); 34 | ERL_NIF_TERM pteracuda_nifs_destroy_context(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); 35 | 36 | ERL_NIF_TERM pteracuda_nifs_new_int_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); 37 | ERL_NIF_TERM pteracuda_nifs_new_string_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); 38 | ERL_NIF_TERM pteracuda_nifs_new_float_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); 39 | 40 | ERL_NIF_TERM pteracuda_nifs_destroy_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); 41 | ERL_NIF_TERM pteracuda_nifs_buffer_size(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); 42 | 43 | ERL_NIF_TERM pteracuda_nifs_write_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); 44 | ERL_NIF_TERM pteracuda_nifs_read_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); 45 | ERL_NIF_TERM pteracuda_nifs_buffer_delete(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); 46 | ERL_NIF_TERM pteracuda_nifs_buffer_insert(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); 47 | ERL_NIF_TERM pteracuda_nifs_sort_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); 48 | ERL_NIF_TERM pteracuda_nifs_clear_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); 49 | ERL_NIF_TERM pteracuda_nifs_buffer_contains(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); 50 | ERL_NIF_TERM pteracuda_nifs_copy_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); 51 | ERL_NIF_TERM pteracuda_nifs_buffer_intersection(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); 52 | ERL_NIF_TERM pteracuda_nifs_buffer_minmax(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); 53 | 54 | static ErlNifFunc pteracuda_nif_funcs[] = { 55 | {"new_context", 0, pteracuda_nifs_new_context}, 56 | {"new_context", 1, pteracuda_nifs_new_context}, 57 | {"destroy_context", 1, pteracuda_nifs_destroy_context}, 58 | {"new_int_buffer", 0, pteracuda_nifs_new_int_buffer}, 59 | {"new_string_buffer", 0, pteracuda_nifs_new_string_buffer}, 60 | {"new_float_buffer", 0, pteracuda_nifs_new_float_buffer}, 61 | {"destroy_buffer", 1, pteracuda_nifs_destroy_buffer}, 62 | {"buffer_size", 1, pteracuda_nifs_buffer_size}, 63 | {"write_buffer", 2, pteracuda_nifs_write_buffer}, 64 | {"buffer_delete", 2, pteracuda_nifs_buffer_delete}, 65 | {"buffer_insert", 3, pteracuda_nifs_buffer_insert}, 66 | {"read_buffer", 1, pteracuda_nifs_read_buffer}, 67 | {"sort_buffer", 2, pteracuda_nifs_sort_buffer}, 68 | {"clear_buffer", 1, pteracuda_nifs_clear_buffer}, 69 | {"buffer_contains", 3, pteracuda_nifs_buffer_contains}, 70 | {"copy_buffer", 2, pteracuda_nifs_copy_buffer}, 71 | {"buffer_intersection", 3, pteracuda_nifs_buffer_intersection}, 72 | {"buffer_minmax", 2, pteracuda_nifs_buffer_minmax} 73 | }; 74 | } 75 | 76 | static ErlNifResourceType *pteracuda_buffer_resource; 77 | static ErlNifResourceType *pteracuda_context_resource; 78 | 79 | struct PCudaBufferRef { 80 | PCudaBuffer *buffer; 81 | bool destroyed; 82 | }; 83 | 84 | struct PCudaContextRef { 85 | CUcontext ctx; 86 | bool destroyed; 87 | }; 88 | 89 | static ERL_NIF_TERM ATOM_TRUE; 90 | static ERL_NIF_TERM ATOM_FALSE; 91 | static ERL_NIF_TERM ATOM_OK; 92 | static ERL_NIF_TERM ATOM_ERROR; 93 | static ERL_NIF_TERM ATOM_WRONG_TYPE; 94 | static ERL_NIF_TERM OOM_ERROR; 95 | 96 | ERL_NIF_INIT(pteracuda_nifs, pteracuda_nif_funcs, &pteracuda_on_load, NULL, NULL, NULL); 97 | 98 | static int pteracuda_on_load(ErlNifEnv *env, void **priv_data, ERL_NIF_TERM load_info) { 99 | if (cuInit(0) == CUDA_SUCCESS) { 100 | ATOM_TRUE = enif_make_atom(env, "true"); 101 | ATOM_FALSE = enif_make_atom(env, "false"); 102 | ATOM_OK = enif_make_atom(env, "ok"); 103 | ATOM_ERROR = enif_make_atom(env, "error"); 104 | ATOM_WRONG_TYPE = enif_make_atom(env, "wrong_type"); 105 | pteracuda_buffer_resource = enif_open_resource_type(env, NULL, "pteracuda_buffer_resource", 106 | NULL, ERL_NIF_RT_CREATE, 0); 107 | pteracuda_context_resource = enif_open_resource_type(env, NULL, "pteracuda_context_resource", 108 | NULL, ERL_NIF_RT_CREATE, 0); 109 | /* Pre-alloate OOM error in case we run out of memory later */ 110 | OOM_ERROR = enif_make_tuple2(env, ATOM_ERROR, enif_make_atom(env, "out_of_memory")); 111 | return 0; 112 | } 113 | else { 114 | return -1; 115 | } 116 | } 117 | 118 | ERL_NIF_TERM pteracuda_nifs_new_context(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 119 | CUdevice device; 120 | int deviceNum = 0; 121 | PCudaContextRef *ref = (PCudaContextRef *) enif_alloc_resource(pteracuda_context_resource, sizeof(PCudaContextRef)); 122 | if (!ref) { 123 | return OOM_ERROR; 124 | } 125 | if (argc == 1 && !enif_get_int(env, argv[0], &deviceNum)) { 126 | return enif_make_badarg(env); 127 | } 128 | if (cuDeviceGet(&device, deviceNum) == CUDA_SUCCESS && 129 | cuCtxCreate(&(ref->ctx), CU_CTX_SCHED_AUTO, device) == CUDA_SUCCESS) { 130 | ref->destroyed = false; 131 | ERL_NIF_TERM result = enif_make_resource(env, ref); 132 | enif_release_resource(ref); 133 | return enif_make_tuple2(env, ATOM_OK, result); 134 | } 135 | else { 136 | return ATOM_ERROR; 137 | } 138 | } 139 | 140 | ERL_NIF_TERM pteracuda_nifs_destroy_context(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 141 | PCudaContextRef *ref; 142 | if (argc != 1 || !enif_get_resource(env, argv[0], pteracuda_context_resource, (void **) &ref)) { 143 | return enif_make_badarg(env); 144 | } 145 | if (!ref->destroyed) { 146 | cuCtxDestroy(ref->ctx); 147 | ref->destroyed = true; 148 | } 149 | return ATOM_OK; 150 | } 151 | 152 | ERL_NIF_TERM pteracuda_nifs_new_int_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 153 | PCudaBufferRef *ref = (PCudaBufferRef *) enif_alloc_resource(pteracuda_buffer_resource, sizeof(PCudaBufferRef)); 154 | if (!ref) { 155 | return OOM_ERROR; 156 | } 157 | ref->buffer = new PCudaIntBuffer(); 158 | ref->destroyed = false; 159 | ERL_NIF_TERM res = enif_make_resource(env, ref); 160 | enif_release_resource(ref); 161 | return enif_make_tuple2(env, ATOM_OK, res); 162 | } 163 | 164 | ERL_NIF_TERM pteracuda_nifs_new_string_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 165 | PCudaBufferRef *ref = (PCudaBufferRef *) enif_alloc_resource(pteracuda_buffer_resource, sizeof(PCudaBufferRef)); 166 | if (!ref) { 167 | return OOM_ERROR; 168 | } 169 | ref->buffer = new PCudaStringBuffer(); 170 | ref->destroyed = false; 171 | ERL_NIF_TERM res = enif_make_resource(env, ref); 172 | enif_release_resource(ref); 173 | return enif_make_tuple2(env, ATOM_OK, res); 174 | } 175 | 176 | ERL_NIF_TERM pteracuda_nifs_new_float_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 177 | PCudaBufferRef *ref = (PCudaBufferRef *) enif_alloc_resource(pteracuda_buffer_resource, sizeof(PCudaBufferRef)); 178 | if (!ref) { 179 | return OOM_ERROR; 180 | } 181 | ref->buffer = new PCudaFloatBuffer(); 182 | ref->destroyed = false; 183 | ERL_NIF_TERM res = enif_make_resource(env, ref); 184 | enif_release_resource(ref); 185 | return enif_make_tuple2(env, ATOM_OK, res); 186 | } 187 | 188 | ERL_NIF_TERM pteracuda_nifs_destroy_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 189 | PCudaBufferRef *ref; 190 | if (argc != 1 || !enif_get_resource(env, argv[0], pteracuda_buffer_resource, (void **) &ref)) { 191 | return enif_make_badarg(env); 192 | } 193 | if (!ref->destroyed) { 194 | delete ref->buffer; 195 | ref->destroyed = true; 196 | } 197 | return ATOM_OK; 198 | } 199 | 200 | ERL_NIF_TERM pteracuda_nifs_write_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 201 | PCudaBufferRef *ref; 202 | if (argc != 2 || !enif_get_resource(env, argv[0], pteracuda_buffer_resource, (void **) &ref)) { 203 | return enif_make_badarg(env); 204 | } 205 | ref->buffer->write(env, argv[1]); 206 | return ATOM_OK; 207 | } 208 | 209 | ERL_NIF_TERM pteracuda_nifs_buffer_delete(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 210 | PCudaBufferRef *ref; 211 | unsigned long position; 212 | if (argc != 2 || !enif_get_resource(env, argv[0], pteracuda_buffer_resource, (void **) &ref) || 213 | !enif_get_ulong(env, argv[1], &position)) { 214 | return enif_make_badarg(env); 215 | } 216 | if (position > ref->buffer->size()) { 217 | return ATOM_ERROR; 218 | } 219 | ref->buffer->delete_at(position); 220 | return ATOM_OK; 221 | } 222 | 223 | ERL_NIF_TERM pteracuda_nifs_buffer_insert(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 224 | PCudaBufferRef *ref; 225 | unsigned long position; 226 | if (argc != 3 || !enif_get_resource(env, argv[0], pteracuda_buffer_resource, (void **) &ref) || 227 | !enif_get_ulong(env, argv[1], &position)) { 228 | return enif_make_badarg(env); 229 | } 230 | if (position > ref->buffer->size()) { 231 | return ATOM_ERROR; 232 | } 233 | if (ref->buffer->insert_at(position, env, argv[2])) { 234 | return ATOM_OK; 235 | } 236 | else { 237 | return ATOM_ERROR; 238 | } 239 | } 240 | 241 | ERL_NIF_TERM pteracuda_nifs_buffer_size(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 242 | PCudaBufferRef *ref; 243 | if (argc != 1 || !enif_get_resource(env, argv[0], pteracuda_buffer_resource, (void **) &ref)) { 244 | return enif_make_badarg(env); 245 | } 246 | return enif_make_tuple2(env, ATOM_OK, enif_make_long(env, ref->buffer->size())); 247 | } 248 | 249 | ERL_NIF_TERM pteracuda_nifs_sort_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 250 | PCudaContextRef *ctxRef; 251 | PCudaBufferRef *ref; 252 | if (argc != 2 || !enif_get_resource(env, argv[0], pteracuda_context_resource, (void **) &ctxRef) || 253 | !enif_get_resource(env, argv[1], pteracuda_buffer_resource, (void **) &ref)) { 254 | return enif_make_badarg(env); 255 | } 256 | cuCtxSetCurrent(ctxRef->ctx); 257 | if (ref->buffer->sort()) { 258 | return ATOM_OK; 259 | } 260 | else { 261 | return ATOM_ERROR; 262 | } 263 | } 264 | 265 | ERL_NIF_TERM pteracuda_nifs_read_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 266 | PCudaBufferRef *ref; 267 | if (argc != 1 || !enif_get_resource(env, argv[0], pteracuda_buffer_resource, (void **) &ref)) { 268 | return enif_make_badarg(env); 269 | } 270 | ERL_NIF_TERM data = ref->buffer->toErlTerms(env); 271 | return enif_make_tuple2(env, ATOM_OK, data); 272 | } 273 | 274 | ERL_NIF_TERM pteracuda_nifs_clear_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 275 | PCudaBufferRef *ref; 276 | if (argc != 1 || !enif_get_resource(env, argv[0], pteracuda_buffer_resource, (void **) &ref)) { 277 | return enif_make_badarg(env); 278 | } 279 | ref->buffer->clear(); 280 | return ATOM_OK; 281 | } 282 | 283 | ERL_NIF_TERM pteracuda_nifs_buffer_contains(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 284 | PCudaContextRef *ctxRef; 285 | PCudaBufferRef *ref; 286 | if (argc !=3 || !enif_get_resource(env, argv[0], pteracuda_context_resource, (void **) &ctxRef) || 287 | !enif_get_resource(env, argv[1], pteracuda_buffer_resource, (void **) &ref)) { 288 | return enif_make_badarg(env); 289 | } 290 | if (ref->buffer->size() > 0) { 291 | cuCtxSetCurrent(ctxRef->ctx); 292 | if (ref->buffer->contains(env, argv[2])) { 293 | return ATOM_TRUE; 294 | } 295 | else { 296 | return ATOM_FALSE; 297 | } 298 | } 299 | else { 300 | return ATOM_FALSE; 301 | } 302 | } 303 | 304 | ERL_NIF_TERM pteracuda_nifs_copy_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 305 | PCudaBufferRef *src, *dest; 306 | if (argc !=2 || !enif_get_resource(env, argv[0], pteracuda_buffer_resource, (void **) &src) || 307 | !enif_get_resource(env, argv[1], pteracuda_buffer_resource, (void **) &dest)) { 308 | return enif_make_badarg(env); 309 | } 310 | 311 | if (dest->buffer->copy(src->buffer)) { 312 | return ATOM_OK; 313 | } 314 | else { 315 | return ATOM_ERROR; 316 | } 317 | } 318 | 319 | ERL_NIF_TERM pteracuda_nifs_buffer_intersection(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 320 | PCudaContextRef *ctxRef; 321 | PCudaBufferRef *first, *second; 322 | if (argc !=3 || !enif_get_resource(env, argv[0], pteracuda_context_resource, (void **) &ctxRef) || 323 | !enif_get_resource(env, argv[1], pteracuda_buffer_resource, (void **) &first) || 324 | !enif_get_resource(env, argv[2], pteracuda_buffer_resource, (void **) &second)) { 325 | return enif_make_badarg(env); 326 | } 327 | cuCtxSetCurrent(ctxRef->ctx); 328 | return enif_make_tuple2(env, ATOM_OK, first->buffer->intersect(env, second->buffer)); 329 | } 330 | 331 | ERL_NIF_TERM pteracuda_nifs_buffer_minmax(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 332 | PCudaContextRef *ctxRef; 333 | PCudaBufferRef *bufRef; 334 | if (argc !=2 || !enif_get_resource(env, argv[0], pteracuda_context_resource, (void **) &ctxRef) || 335 | !enif_get_resource(env, argv[1], pteracuda_buffer_resource, (void **) &bufRef)) { 336 | return enif_make_badarg(env); 337 | } 338 | if (bufRef->buffer->size() == 0) { 339 | return enif_make_tuple2(env, ATOM_OK, enif_make_tuple2(env, enif_make_int(env, 0), 340 | enif_make_int(env, 0))); 341 | } 342 | cuCtxSetCurrent(ctxRef->ctx); 343 | return enif_make_tuple2(env, ATOM_OK, bufRef->buffer->minmax(env)); 344 | } 345 | -------------------------------------------------------------------------------- /rebar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevsmith/pteracuda/61a6180b95bd2093a63e867403f9f387efd6e52c/rebar -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {port_sources, ["c_src/*.cpp"]}. 2 | {so_name, "pteracuda_nifs.so"}. 3 | 4 | {port_envs, [{"(linux)", "CXX", "clang++"}, 5 | {"(linux)", "NVCCFLAGS", "-arch=sm_20 -O2 -c -Xcompiler -shared -Xcompiler -fPIC"}, 6 | {"(linux)", "LDFLAGS", "$LDFLAGS c_src/pcuda_ops.o -lcuda -L/usr/local/cuda/lib64 -lcudart -lstdc++"}, 7 | {"(linux)", "CXXFLAGS", "$CXXFLAGS -O2 -I/usr/local/cuda/include"}]}. 8 | 9 | {port_pre_script, {"make -C c_src", ""}}. 10 | {port_cleanup_script, "make -C c_src clean"}. 11 | -------------------------------------------------------------------------------- /src/pteracuda.app.src: -------------------------------------------------------------------------------- 1 | {application, pteracuda, 2 | [ 3 | {description, ""}, 4 | {vsn, "1"}, 5 | {registered, []}, 6 | {applications, [ 7 | kernel, 8 | stdlib 9 | ]}, 10 | {mod, { pteracuda_app, []}}, 11 | {env, []} 12 | ]}. 13 | -------------------------------------------------------------------------------- /src/pteracuda_app.erl: -------------------------------------------------------------------------------- 1 | -module(pteracuda_app). 2 | 3 | -behaviour(application). 4 | 5 | %% Application callbacks 6 | -export([start/2, stop/1]). 7 | 8 | %% =================================================================== 9 | %% Application callbacks 10 | %% =================================================================== 11 | 12 | start(_StartType, _StartArgs) -> 13 | pteracuda_sup:start_link(). 14 | 15 | stop(_State) -> 16 | ok. 17 | -------------------------------------------------------------------------------- /src/pteracuda_bench.erl: -------------------------------------------------------------------------------- 1 | -module(pteracuda_bench). 2 | 3 | -export([run/0]). 4 | 5 | run() -> 6 | {T1, T2, T3} = erlang:now(), 7 | random:seed(T1, T2, T3), 8 | F = fun(_, _) -> random:uniform(100) > 50 end, 9 | N = lists:sort(F, lists:seq(1, 50000)), 10 | Erlang = bench_pure_erlang(N, 10, []), 11 | {ok, B} = pteracuda_nifs:new_buffer(), 12 | Cuda = bench_cuda(B, N, 10, []), 13 | pteracuda_nifs:destroy_buffer(B), 14 | {lists:sum(Erlang) / length(Erlang), lists:sum(Cuda) / length(Cuda)}. 15 | 16 | bench_pure_erlang(_N, 0, Accum) -> 17 | Accum1 = lists:delete(lists:max(Accum), Accum), 18 | lists:delete(lists:min(Accum1), Accum1); 19 | bench_pure_erlang(N, Count, Accum) -> 20 | {Time, _} = timer:tc(lists, sort, [N]), 21 | bench_pure_erlang(N, Count - 1, [Time|Accum]). 22 | 23 | bench_cuda(_Buf, _N, 0, Accum) -> 24 | Accum1 = lists:delete(lists:max(Accum), Accum), 25 | lists:delete(lists:min(Accum1), Accum1); 26 | bench_cuda(Buf, N, Count, Accum) -> 27 | pteracuda_nifs:write_buffer(Buf, N), 28 | {Time, _} = timer:tc(pteracuda_nifs, sort_buffer, [Buf]), 29 | pteracuda_nifs:clear_buffer(Buf), 30 | bench_cuda(Buf, N, Count - 1, [Time|Accum]). 31 | -------------------------------------------------------------------------------- /src/pteracuda_buffer.erl: -------------------------------------------------------------------------------- 1 | -module(pteracuda_buffer). 2 | 3 | -include("pteracuda_internals.hrl"). 4 | 5 | -export([new/1, 6 | destroy/1, 7 | size/1, 8 | write/2, 9 | read/1, 10 | duplicate/1, 11 | clear/1, 12 | sort/2, 13 | contains/3, 14 | intersection/3, 15 | minmax/2]). 16 | 17 | new(integer) -> 18 | {ok, Buf} = pteracuda_nifs:new_int_buffer(), 19 | {ok, #pc_buffer{type=integer, ref=Buf}}; 20 | new(float) -> 21 | {ok, Buf} = pteracuda_nifs:new_float_buffer(), 22 | {ok, #pc_buffer{type=float, ref=Buf}}; 23 | new(string) -> 24 | {ok, Buf} = pteracuda_nifs:new_string_buffer(), 25 | {ok, #pc_buffer{type=string, ref=Buf}}. 26 | 27 | destroy(#pc_buffer{ref=Ref}) -> 28 | pteracuda_nifs:destroy_buffer(Ref), 29 | ok. 30 | 31 | size(#pc_buffer{ref=Ref}) -> 32 | pteracuda_nifs:buffer_size(Ref). 33 | 34 | write(#pc_buffer{ref=Ref, type=Type}, Data) when Type =:= integer orelse 35 | Type =:= string orelse 36 | Type =:= float -> 37 | pteracuda_nifs:write_buffer(Ref, Data). 38 | 39 | read(#pc_buffer{ref=Ref}) -> 40 | pteracuda_nifs:read_buffer(Ref). 41 | 42 | duplicate(#pc_buffer{ref=Ref, type=Type}) when Type =:= integer orelse 43 | Type =:= string orelse 44 | Type =:= float -> 45 | {ok, OtherBuf} = new(Type), 46 | pteracuda_nifs:copy_buffer(Ref, OtherBuf#pc_buffer.ref), 47 | {ok, OtherBuf}. 48 | 49 | clear(#pc_buffer{ref=Ref}) -> 50 | pteracuda_nifs:clear_buffer(Ref). 51 | 52 | sort(#pc_context{ref=Ctx}, #pc_buffer{ref=Buf}) -> 53 | pteracuda_nifs:sort_buffer(Ctx, Buf). 54 | 55 | contains(#pc_context{ref=Ctx}, #pc_buffer{ref=Buf}, Value) -> 56 | pteracuda_nifs:buffer_contains(Ctx, Buf, Value). 57 | 58 | intersection(#pc_context{ref=Ctx}, #pc_buffer{ref=Buf1}, #pc_buffer{ref=Buf2}) -> 59 | pteracuda_nifs:buffer_intersection(Ctx, Buf1, Buf2). 60 | 61 | minmax(#pc_context{ref=Ctx}, #pc_buffer{ref=Buf}) -> 62 | pteracuda_nifs:buffer_minmax(Ctx, Buf). 63 | -------------------------------------------------------------------------------- /src/pteracuda_commands.hrl: -------------------------------------------------------------------------------- 1 | %% Commands 2 | -define(SHUTDOWN, 0). 3 | -------------------------------------------------------------------------------- /src/pteracuda_context.erl: -------------------------------------------------------------------------------- 1 | -module(pteracuda_context). 2 | 3 | -include("pteracuda_internals.hrl"). 4 | 5 | -export([new/0, 6 | new/1, 7 | destroy/1]). 8 | 9 | new() -> 10 | {ok, Ctx} = pteracuda_nifs:new_context(), 11 | {ok, #pc_context{ref=Ctx}}. 12 | 13 | new(Device) when is_integer(Device) -> 14 | {ok, Ctx} = pteracuda_nifs:new_context(Device), 15 | {ok, #pc_context{ref=Ctx}}. 16 | 17 | destroy(#pc_context{ref=Ctx}) -> 18 | pteracuda_nifs:destroy_context(Ctx). 19 | -------------------------------------------------------------------------------- /src/pteracuda_demo.erl: -------------------------------------------------------------------------------- 1 | -module(pteracuda_demo). 2 | 3 | -compile([export_all, 4 | native]). 5 | 6 | start(N) -> 7 | {T1, T2, T3} = erlang:now(), 8 | random:seed(T1, T2, T3), 9 | io:format("Generating test data: ~p~n", [N]), 10 | D = [random:uniform(N) || _ <- lists:seq(1, N)], 11 | io:format("Measuring performance "), 12 | {Time1, _} = timer:tc(lists, sort, [D]), 13 | io:format("."), 14 | {ok, C} = pteracuda_context:new(), 15 | {ok, B} = pteracuda_buffer:new(integer), 16 | pteracuda_buffer:write(B, D), 17 | {Time2, _} = timer:tc(pteracuda_demo, pteracuda_sort, [C, B, D]), 18 | io:format(".~n"), 19 | io:format("Erlang: ~pms, CUDA: ~pms~n", [Time1 / 1000, Time2 / 1000]). 20 | 21 | pteracuda_sort(C, B, D) -> 22 | pteracuda_buffer:write(B, D), 23 | pteracuda_buffer:sort(C, B), 24 | pteracuda_buffer:read(B). 25 | -------------------------------------------------------------------------------- /src/pteracuda_internals.hrl: -------------------------------------------------------------------------------- 1 | -record(pc_buffer, {type, 2 | ref}). 3 | 4 | -record(pc_context, {ref}). 5 | -------------------------------------------------------------------------------- /src/pteracuda_nifs.erl: -------------------------------------------------------------------------------- 1 | -module(pteracuda_nifs). 2 | 3 | -define(NIF_API_VERSION, 1). 4 | -define(MISSING_NIF, throw({error, missing_nif})). 5 | 6 | -ifdef(TEST). 7 | -include_lib("eunit/include/eunit.hrl"). 8 | -endif. 9 | 10 | -on_load(init/0). 11 | 12 | -export([init/0]). 13 | 14 | %% API 15 | -export([new_context/0, 16 | new_context/1, 17 | destroy_context/1]). 18 | 19 | -export([new_int_buffer/0, 20 | new_string_buffer/0, 21 | new_float_buffer/0, 22 | destroy_buffer/1, 23 | buffer_size/1]). 24 | 25 | -export([write_buffer/2, 26 | buffer_delete/2, 27 | buffer_insert/3, 28 | read_buffer/1, 29 | clear_buffer/1, 30 | copy_buffer/2]). 31 | 32 | -export([sort_buffer/2, 33 | buffer_contains/3, 34 | buffer_intersection/3, 35 | buffer_minmax/2]). 36 | 37 | new_context() -> 38 | ?MISSING_NIF. 39 | 40 | new_context(_DeviceNum) -> 41 | ?MISSING_NIF. 42 | 43 | destroy_context(_Ctx) -> 44 | ?MISSING_NIF. 45 | 46 | new_int_buffer() -> 47 | ?MISSING_NIF. 48 | 49 | new_string_buffer() -> 50 | ?MISSING_NIF. 51 | 52 | new_float_buffer() -> 53 | ?MISSING_NIF. 54 | 55 | destroy_buffer(_Buffer) -> 56 | ?MISSING_NIF. 57 | 58 | buffer_size(_Buffer) -> 59 | ?MISSING_NIF. 60 | 61 | read_buffer(_Buffer) -> 62 | ?MISSING_NIF. 63 | 64 | write_buffer(_Buffer, _Data) -> 65 | ?MISSING_NIF. 66 | 67 | buffer_delete(_Buffer, _Pos) -> 68 | ?MISSING_NIF. 69 | 70 | buffer_insert(_Buffer, _Pos, _Value) -> 71 | ?MISSING_NIF. 72 | 73 | sort_buffer(_Ctx, _Buffer) -> 74 | ?MISSING_NIF. 75 | 76 | clear_buffer(_Buffer) -> 77 | ?MISSING_NIF. 78 | 79 | copy_buffer(_From, _To) -> 80 | ?MISSING_NIF. 81 | 82 | buffer_contains(_Ctx, _Buffer, _Value) -> 83 | ?MISSING_NIF. 84 | 85 | buffer_intersection(_Ctx, _First, _Second) -> 86 | ?MISSING_NIF. 87 | 88 | buffer_minmax(_Ctx, _Buffer) -> 89 | ?MISSING_NIF. 90 | 91 | init() -> 92 | PrivDir = case code:priv_dir(pteracuda) of 93 | {error, bad_name} -> 94 | D = filename:dirname(code:which(?MODULE)), 95 | filename:join([D, "..", "priv"]); 96 | Dir -> 97 | Dir 98 | end, 99 | SoName = filename:join([PrivDir, "pteracuda_nifs"]), 100 | erlang:load_nif(SoName, ?NIF_API_VERSION). 101 | 102 | -ifdef(TEST). 103 | 104 | create_destroy_test() -> 105 | {ok, Buf} = pteracuda_nifs:new_int_buffer(), 106 | ok = pteracuda_nifs:destroy_buffer(Buf). 107 | 108 | create_destroy_float_test() -> 109 | {ok, Buf} = pteracuda_nifs:new_float_buffer(), 110 | ok = pteracuda_nifs:destroy_buffer(Buf). 111 | 112 | create_write_destroy_test() -> 113 | {ok, Buf} = pteracuda_nifs:new_int_buffer(), 114 | pteracuda_nifs:write_buffer(Buf, [1,2,3,4,5]), 115 | {ok, 5} = pteracuda_nifs:buffer_size(Buf), 116 | ok = pteracuda_nifs:destroy_buffer(Buf). 117 | 118 | create_write_destroy_float_test() -> 119 | {ok, Buf} = pteracuda_nifs:new_float_buffer(), 120 | pteracuda_nifs:write_buffer(Buf, [0.01, 0.002, 0.0003, 0.4, 1.5]), 121 | {ok, 5} = pteracuda_nifs:buffer_size(Buf), 122 | ok = pteracuda_nifs:destroy_buffer(Buf). 123 | 124 | create_write_delete_test() -> 125 | {ok, Buf} = pteracuda_nifs:new_int_buffer(), 126 | ok = pteracuda_nifs:write_buffer(Buf, [1,2,3,4,5]), 127 | ok = pteracuda_nifs:buffer_delete(Buf, 1), 128 | {ok, [1,3,4,5]} = pteracuda_nifs:read_buffer(Buf), 129 | ok = pteracuda_nifs:buffer_delete(Buf, 0), 130 | {ok, [3,4,5]} = pteracuda_nifs:read_buffer(Buf), 131 | pteracuda_nifs:destroy_buffer(Buf). 132 | 133 | create_write_delete_float_test() -> 134 | {ok, Buf} = pteracuda_nifs:new_float_buffer(), 135 | ok = pteracuda_nifs:write_buffer(Buf, [1.1,1.2,1.3,1.4,1.5]), 136 | ok = pteracuda_nifs:buffer_delete(Buf, 1), 137 | {ok, [1.1,1.3,1.4,1.5]} = pteracuda_nifs:read_buffer(Buf), 138 | ok = pteracuda_nifs:buffer_delete(Buf, 0), 139 | {ok, [1.3,1.4,1.5]} = pteracuda_nifs:read_buffer(Buf), 140 | pteracuda_nifs:destroy_buffer(Buf). 141 | 142 | insert_test() -> 143 | {ok, Buf} = pteracuda_nifs:new_int_buffer(), 144 | ok = pteracuda_nifs:buffer_insert(Buf, 0, 1), 145 | error = pteracuda_nifs:buffer_insert(Buf, 5, 2), 146 | {ok, [1]} = pteracuda_nifs:read_buffer(Buf), 147 | ok = pteracuda_nifs:clear_buffer(Buf), 148 | ok = pteracuda_nifs:write_buffer(Buf, [1,2,3,4,5]), 149 | ok = pteracuda_nifs:buffer_insert(Buf, 2, 6), 150 | {ok, [1,2,6,3,4,5]} = pteracuda_nifs:read_buffer(Buf), 151 | pteracuda_nifs:destroy_buffer(Buf). 152 | 153 | insert_float_test() -> 154 | {ok, Buf} = pteracuda_nifs:new_float_buffer(), 155 | ok = pteracuda_nifs:buffer_insert(Buf, 0, 1.0), 156 | error = pteracuda_nifs:buffer_insert(Buf, 5, 2.0), 157 | {ok, [1.0]} = pteracuda_nifs:read_buffer(Buf), 158 | ok = pteracuda_nifs:clear_buffer(Buf), 159 | ok = pteracuda_nifs:write_buffer(Buf, [1.0,2.0,3.0,4.0,5.0]), 160 | ok = pteracuda_nifs:buffer_insert(Buf, 2, 6.0), 161 | {ok, [1.0,2.0,6.0,3.0,4.0,5.0]} = pteracuda_nifs:read_buffer(Buf), 162 | pteracuda_nifs:destroy_buffer(Buf). 163 | 164 | create_write_sort_destroy_test() -> 165 | {ok, Buf} = pteracuda_nifs:new_int_buffer(), 166 | {ok, Ctx} = pteracuda_nifs:new_context(), 167 | ok = pteracuda_nifs:write_buffer(Buf, [3,2,1,4,5]), 168 | {ok, 5} = pteracuda_nifs:buffer_size(Buf), 169 | ok = pteracuda_nifs:sort_buffer(Ctx, Buf), 170 | {ok, [1,2,3,4,5]} = pteracuda_nifs:read_buffer(Buf), 171 | ok = pteracuda_nifs:destroy_buffer(Buf), 172 | ok = pteracuda_nifs:destroy_context(Ctx). 173 | 174 | create_write_sort_destroy_float_test() -> 175 | {ok, Buf} = pteracuda_nifs:new_float_buffer(), 176 | {ok, Ctx} = pteracuda_nifs:new_context(), 177 | ok = pteracuda_nifs:write_buffer(Buf, [3.1,2.1,1.1,4.1,5.1]), 178 | {ok, 5} = pteracuda_nifs:buffer_size(Buf), 179 | ok = pteracuda_nifs:sort_buffer(Ctx, Buf), 180 | {ok, [1.1,2.1,3.1,4.1,5.1]} = pteracuda_nifs:read_buffer(Buf), 181 | ok = pteracuda_nifs:destroy_buffer(Buf), 182 | ok = pteracuda_nifs:destroy_context(Ctx). 183 | 184 | create_write_clear_test() -> 185 | {ok, Buf} = pteracuda_nifs:new_int_buffer(), 186 | ok = pteracuda_nifs:write_buffer(Buf, [3,2,1,4,5]), 187 | {ok, 5} = pteracuda_nifs:buffer_size(Buf), 188 | pteracuda_nifs:clear_buffer(Buf), 189 | {ok, 0} = pteracuda_nifs:buffer_size(Buf), 190 | ok = pteracuda_nifs:destroy_buffer(Buf). 191 | 192 | create_write_contains_test() -> 193 | {ok, Buf} = pteracuda_nifs:new_int_buffer(), 194 | {ok, Ctx} = pteracuda_nifs:new_context(), 195 | N = lists:seq(1, 1000), 196 | ok = pteracuda_nifs:write_buffer(Buf, N), 197 | true = pteracuda_nifs:buffer_contains(Ctx, Buf, 513), 198 | false = pteracuda_nifs:buffer_contains(Ctx, Buf, 1500), 199 | ok = pteracuda_nifs:destroy_buffer(Buf), 200 | ok = pteracuda_nifs:destroy_context(Ctx). 201 | 202 | create_write_contains_float_test() -> 203 | {ok, Buf} = pteracuda_nifs:new_float_buffer(), 204 | {ok, Ctx} = pteracuda_nifs:new_context(), 205 | N = [X + 0.0001 || X <- lists:seq(1, 1000)], 206 | ok = pteracuda_nifs:write_buffer(Buf, N), 207 | true = pteracuda_nifs:buffer_contains(Ctx, Buf, 513.0001), 208 | false = pteracuda_nifs:buffer_contains(Ctx, Buf, 1500.0), 209 | ok = pteracuda_nifs:destroy_buffer(Buf), 210 | ok = pteracuda_nifs:destroy_context(Ctx). 211 | 212 | create_copy_test() -> 213 | {ok, Buf} = pteracuda_nifs:new_int_buffer(), 214 | ok = pteracuda_nifs:write_buffer(Buf, lists:seq(1, 1000)), 215 | {ok, Buf1} = pteracuda_nifs:new_int_buffer(), 216 | ok = pteracuda_nifs:copy_buffer(Buf, Buf1), 217 | {ok, 1000} = pteracuda_nifs:buffer_size(Buf1), 218 | ok = pteracuda_nifs:destroy_buffer(Buf), 219 | ok = pteracuda_nifs:destroy_buffer(Buf1). 220 | 221 | intersection_test() -> 222 | {ok, B1} = pteracuda_nifs:new_int_buffer(), 223 | {ok, B2} = pteracuda_nifs:new_int_buffer(), 224 | {ok, Ctx} = pteracuda_nifs:new_context(), 225 | ok = pteracuda_nifs:write_buffer(B1, lists:seq(1, 100)), 226 | ok = pteracuda_nifs:write_buffer(B2, lists:seq(90, 190)), 227 | {ok, IB} = pteracuda_nifs:buffer_intersection(Ctx, B1, B2), 228 | 11 = length(IB), 229 | pteracuda_nifs:destroy_context(Ctx), 230 | pteracuda_nifs:destroy_buffer(B1), 231 | pteracuda_nifs:destroy_buffer(B2). 232 | 233 | minmax_test() -> 234 | {ok, B} = pteracuda_nifs:new_int_buffer(), 235 | {ok, Ctx} = pteracuda_nifs:new_context(), 236 | F = fun(_, _) -> random:uniform(100) > 49 end, 237 | N = lists:sort(F, lists:seq(1, 5000)), 238 | pteracuda_nifs:write_buffer(B, N), 239 | pteracuda_nifs:sort_buffer(Ctx, B), 240 | {ok, {1, 5000}} = pteracuda_nifs:buffer_minmax(Ctx, B), 241 | pteracuda_nifs:destroy_buffer(B), 242 | pteracuda_nifs:destroy_context(Ctx). 243 | 244 | -endif. 245 | -------------------------------------------------------------------------------- /src/pteracuda_stress.erl: -------------------------------------------------------------------------------- 1 | -module(pteracuda_stress). 2 | 3 | -export([run/0]). 4 | 5 | run() -> 6 | {T1, T2, T3} = erlang:now(), 7 | random:seed(T1, T2, T3), 8 | F = fun(_, _) -> random:uniform(100) > 50 end, 9 | Data = lists:sort(F, lists:seq(1, 1000000)), 10 | io:format("Pid: ~p~n", [os:getpid()]), 11 | io:get_chars("Press any key when ready...", 1), 12 | stress(Data, 1000000). 13 | 14 | stress(_Data, 0) -> 15 | ok; 16 | stress(Data, Count) -> 17 | {ok, B} = pteracuda_nifs:new_buffer(), 18 | pteracuda_nifs:write_buffer(B, Data), 19 | pteracuda_nifs:sort_buffer(B), 20 | %{ok, SD} = pteracuda_nifs:read_buffer(B), 21 | pteracuda_nifs:destroy_buffer(B), 22 | io:format("~p~n", [1000000 - Count]), 23 | %% case length(SD) of 24 | %% 1000000 -> 25 | %% io:format("~p...ok~n", [1000000 - Count]); 26 | %% _ -> 27 | %% io:format("~p...bad~n", [1000000 - Count]) 28 | %% end, 29 | stress(Data, Count - 1). 30 | -------------------------------------------------------------------------------- /src/pteracuda_sup.erl: -------------------------------------------------------------------------------- 1 | 2 | -module(pteracuda_sup). 3 | 4 | -behaviour(supervisor). 5 | 6 | %% API 7 | -export([start_link/0]). 8 | 9 | %% Supervisor callbacks 10 | -export([init/1]). 11 | 12 | %% Helper macro for declaring children of supervisor 13 | -define(CHILD(I, Type), {I, {I, start_link, []}, permanent, 5000, Type, [I]}). 14 | 15 | %% =================================================================== 16 | %% API functions 17 | %% =================================================================== 18 | 19 | start_link() -> 20 | supervisor:start_link({local, ?MODULE}, ?MODULE, []). 21 | 22 | %% =================================================================== 23 | %% Supervisor callbacks 24 | %% =================================================================== 25 | 26 | init([]) -> 27 | {ok, { {one_for_one, 5, 10}, []} }. 28 | 29 | --------------------------------------------------------------------------------