├── core ├── cuda │ ├── .gitkeep │ ├── rk.cpp │ └── rk_kernel.cu ├── opencl │ ├── .gitkeep │ ├── rk.cpp │ ├── rk_kernel.cl │ └── opcl.cpp ├── fiber.cpp ├── c │ ├── rk.cpp │ └── rk_kernel.cpp └── dataset.cpp ├── tests ├── main.cpp ├── include │ ├── rk_kernel_fixture.h │ └── rk_kernel_extra.h ├── fixtures │ └── rk_kernel_fixture.cpp └── units │ └── rk_kernel_tests.cpp ├── include ├── rk_opencl_kernel.h ├── rk_cuda_kernel.h ├── rk.h ├── cone_collection.h ├── fiber.h ├── output.h ├── cylinder_collection.h ├── cone.h ├── window_manager.h ├── cylinder.h ├── rk_c_kernel.h ├── input.h ├── dataset.h ├── scene.h └── opcl.h ├── .gitignore ├── example-factories ├── lines.php ├── random.php ├── rotation.php └── gaussian.php ├── io ├── gui │ ├── primitives │ │ ├── cone_collection.cpp │ │ ├── cylinder_collection.cpp │ │ ├── cone.cpp │ │ └── cylinder.cpp │ ├── window_manager.cpp │ └── scene.cpp ├── output.cpp └── input.cpp ├── main.cpp ├── README └── Makefile /core/cuda/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/opencl/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char **argv) { 4 | ::testing::InitGoogleTest(&argc, argv); 5 | return RUN_ALL_TESTS(); 6 | } 7 | -------------------------------------------------------------------------------- /include/rk_opencl_kernel.h: -------------------------------------------------------------------------------- 1 | #define MAX_POINTS 10000 2 | #include 3 | 4 | void opencl_init(char* kernel_name, vector *v0, int count_v0, double h, int n_x, int n_y, int n_z, vector_field field, runge_kutta::Fiber **fibers); 5 | -------------------------------------------------------------------------------- /tests/include/rk_kernel_fixture.h: -------------------------------------------------------------------------------- 1 | namespace runge_kutta{ 2 | class RKKernelFixture : public testing::Test{ 3 | protected: 4 | DataSet _dataset, _dataset2; 5 | vector _v1, _v2, _v_zero, _v_unity, _v1_oposite, _v2_oposite, _v_middle; 6 | 7 | virtual void SetUp(); 8 | }; 9 | } 10 | -------------------------------------------------------------------------------- /include/rk_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #define MAX_POINTS 10000 2 | 3 | extern "C" void rk2_caller(vector *v0, int count_v0, double h, int n_x, int n_y, int n_z, vector_field field, runge_kutta::Fiber **fibers); 4 | extern "C" void rk4_caller(vector *v0, int count_v0, double h, int n_x, int n_y, int n_z, vector_field field, runge_kutta::Fiber **fibers); 5 | -------------------------------------------------------------------------------- /include/rk.h: -------------------------------------------------------------------------------- 1 | namespace runge_kutta{ 2 | class RungeKutta{ 3 | private: 4 | DataSet _dataset; 5 | vector *_v0; 6 | unsigned _count_v0; 7 | double _h; 8 | public: 9 | RungeKutta(DataSet dataset, vector *v0, unsigned count_v0, double h); 10 | Fiber *order2(); 11 | Fiber *order4(); 12 | }; 13 | } 14 | -------------------------------------------------------------------------------- /include/cone_collection.h: -------------------------------------------------------------------------------- 1 | namespace runge_kutta{ 2 | class ConeCollection{ 3 | private: 4 | unsigned _cone_count; 5 | Cone *_cones; 6 | public: 7 | ConeCollection(); 8 | void addCone(vector p1, vector p2); 9 | void render(GLfloat red, GLfloat green, GLfloat blue, GLdouble angle_x, GLdouble angle_y); 10 | }; 11 | } 12 | -------------------------------------------------------------------------------- /include/fiber.h: -------------------------------------------------------------------------------- 1 | namespace runge_kutta{ 2 | class Fiber{ 3 | private: 4 | unsigned _pointsCount; 5 | vector *_points; 6 | public: 7 | Fiber(unsigned pointsCount); 8 | Fiber(); 9 | void setPoint(unsigned order, vector point); 10 | vector getPoint(unsigned order); 11 | unsigned pointsCount(); 12 | }; 13 | } 14 | -------------------------------------------------------------------------------- /include/output.h: -------------------------------------------------------------------------------- 1 | namespace runge_kutta{ 2 | class Output{ 3 | private: 4 | DataSet _dataSet; 5 | unsigned _fibersCount; 6 | Fiber *_rk2Fibers; 7 | Fiber *_rk4Fibers; 8 | public: 9 | Output(DataSet dataSet, unsigned fibersCount, Fiber *rk2Fibers, Fiber *rk4Fibers); 10 | void gnuplotInput(); 11 | void gui(); 12 | }; 13 | } 14 | -------------------------------------------------------------------------------- /include/cylinder_collection.h: -------------------------------------------------------------------------------- 1 | namespace runge_kutta{ 2 | class CylinderCollection{ 3 | private: 4 | unsigned _cylinder_count; 5 | Cylinder *_cylinders; 6 | public: 7 | CylinderCollection(); 8 | void addCylinder(vector p1, vector p2); 9 | void render(GLfloat red, GLfloat green, GLfloat blue, GLdouble angle_x, GLdouble angle_y); 10 | }; 11 | } 12 | -------------------------------------------------------------------------------- /include/cone.h: -------------------------------------------------------------------------------- 1 | namespace runge_kutta{ 2 | class Cone{ 3 | public: 4 | GLdouble _x1; 5 | GLdouble _y1; 6 | GLdouble _z1; 7 | GLdouble _x2; 8 | GLdouble _y2; 9 | GLdouble _z2; 10 | Cone(double x1, double y1, double z1, double x2, double y2, double z2); 11 | void render(GLfloat red, GLfloat green, GLfloat blue, GLdouble angle_x, GLdouble angle_y); 12 | }; 13 | } 14 | -------------------------------------------------------------------------------- /include/window_manager.h: -------------------------------------------------------------------------------- 1 | namespace runge_kutta{ 2 | class WindowManager{ 3 | public: 4 | WindowManager(Scene scene); 5 | void loop(); 6 | static void display(); 7 | static void reshape(int width, int height); 8 | static void idle(); 9 | static void key_pressed (unsigned char key, int x, int y); 10 | static void key_pressed_special (int key, int x, int y); 11 | }; 12 | } 13 | -------------------------------------------------------------------------------- /include/cylinder.h: -------------------------------------------------------------------------------- 1 | namespace runge_kutta{ 2 | class Cylinder{ 3 | public: 4 | GLdouble _x1; 5 | GLdouble _y1; 6 | GLdouble _z1; 7 | GLdouble _x2; 8 | GLdouble _y2; 9 | GLdouble _z2; 10 | Cylinder(double x1, double y1, double z1, double x2, double y2, double z2); 11 | void render(GLfloat red, GLfloat green, GLfloat blue, GLdouble angle_x, GLdouble angle_y); 12 | }; 13 | } 14 | -------------------------------------------------------------------------------- /tests/include/rk_kernel_extra.h: -------------------------------------------------------------------------------- 1 | vector sum(vector v1, vector v2); 2 | vector subtract(vector v1, vector v2); 3 | vector mult_scalar(vector v, double scalar); 4 | void set(vector *x, vector y); 5 | double module(vector v); 6 | double distance(vector x, vector y); 7 | vector nearest_neighbour(vector v0, int n_x, int n_y, int n_z, vector_field field); 8 | vector trilinear_interpolation(vector v0, int n_x, int n_y, int n_z, vector_field field); 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.o 3 | rk 4 | test 5 | example 6 | example3d 7 | rotationField 8 | randomField 9 | linesField 10 | gaussianField 11 | a.out 12 | out 13 | plot.dat 14 | rk2.dat 15 | rk4.dat 16 | rk2-vs-rk4.p 17 | dhelix.hdr 18 | dhelix.img 19 | dhelix_vector.hdr 20 | dhelix_vector.img 21 | dhelix_vector.in 22 | dhelix_vector.singlein 23 | gtest* 24 | tests_runner 25 | libgtest.a 26 | runge-kutta.sublime-project 27 | runge-kutta.sublime-workspace -------------------------------------------------------------------------------- /include/rk_c_kernel.h: -------------------------------------------------------------------------------- 1 | #define MAX_POINTS 10000 2 | 3 | typedef struct ker_args{ 4 | int id; 5 | vector *v0; 6 | int count_v0; 7 | double h; 8 | int n_x; 9 | int n_y; 10 | int n_z; 11 | vector_field field; 12 | runge_kutta::Fiber *fibers; 13 | } kernel_args; 14 | 15 | void rk2_caller(vector *v0, int count_v0, double h, int n_x, int n_y, int n_z, vector_field field, runge_kutta::Fiber **fibers); 16 | void rk4_caller(vector *v0, int count_v0, double h, int n_x, int n_y, int n_z, vector_field field, runge_kutta::Fiber **fibers); -------------------------------------------------------------------------------- /include/input.h: -------------------------------------------------------------------------------- 1 | namespace runge_kutta{ 2 | class Input{ 3 | private: 4 | char *_file_name; 5 | unsigned _file_type; 6 | DataSet parseNative(double *h, vector **v0, unsigned *v0_count); 7 | DataSet parseAnalyze(double *h, vector **v0, unsigned *v0_count); 8 | public: 9 | static const unsigned NATIVE_TYPE = 0; 10 | static const unsigned ANALYZE_TYPE = 1; 11 | Input(char *file_name, unsigned file_type); 12 | Input(); 13 | DataSet parse(double *h, vector **v0, unsigned *v0_count); 14 | }; 15 | } 16 | -------------------------------------------------------------------------------- /core/fiber.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using namespace runge_kutta; 6 | 7 | Fiber::Fiber(unsigned pointsCount){ 8 | _pointsCount = pointsCount; 9 | 10 | _points = (vector *) malloc(pointsCount*sizeof(vector)); 11 | } 12 | 13 | Fiber::Fiber(){ 14 | _pointsCount = 0; 15 | _points = NULL; 16 | } 17 | 18 | void Fiber::setPoint(unsigned order, vector point){ 19 | _points[order] = point; 20 | } 21 | vector Fiber::getPoint(unsigned order){ 22 | return _points[order]; 23 | } 24 | unsigned Fiber::pointsCount(){ 25 | return _pointsCount; 26 | } 27 | -------------------------------------------------------------------------------- /include/dataset.h: -------------------------------------------------------------------------------- 1 | typedef struct vec{ 2 | double x; 3 | double y; 4 | double z; 5 | } vector; 6 | 7 | typedef vector *vector_field; 8 | 9 | namespace runge_kutta{ 10 | class DataSet{ 11 | private: 12 | unsigned _n_x; 13 | unsigned _n_y; 14 | unsigned _n_z; 15 | vector_field _field; 16 | public: 17 | DataSet(); 18 | DataSet(unsigned nx, unsigned ny, unsigned nz, vector_field field); 19 | unsigned n_x(); 20 | unsigned n_y(); 21 | unsigned n_z(); 22 | vector_field field(); 23 | vector field(unsigned x, unsigned y, unsigned z); 24 | static unsigned offset(unsigned nx, unsigned ny, unsigned x, unsigned y, unsigned z); 25 | }; 26 | } 27 | -------------------------------------------------------------------------------- /example-factories/lines.php: -------------------------------------------------------------------------------- 1 | 30 | -------------------------------------------------------------------------------- /example-factories/random.php: -------------------------------------------------------------------------------- 1 | 25 | -------------------------------------------------------------------------------- /core/c/rk.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace runge_kutta; 8 | 9 | RungeKutta::RungeKutta(DataSet dataset, vector *v0, unsigned count_v0, double h){ 10 | _dataset = dataset; 11 | _v0 = v0; 12 | _count_v0 = count_v0; 13 | _h = h; 14 | } 15 | 16 | Fiber *RungeKutta::order2(){ 17 | Fiber *fibers; 18 | 19 | rk2_caller(_v0, _count_v0, _h, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field(), &fibers); 20 | 21 | return fibers; 22 | } 23 | 24 | Fiber *RungeKutta::order4(){ 25 | Fiber *fibers; 26 | 27 | rk4_caller(_v0, _count_v0, _h, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field(), &fibers); 28 | 29 | return fibers; 30 | } 31 | -------------------------------------------------------------------------------- /example-factories/rotation.php: -------------------------------------------------------------------------------- 1 | 29 | -------------------------------------------------------------------------------- /core/cuda/rk.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace runge_kutta; 8 | 9 | RungeKutta::RungeKutta(DataSet dataset, vector *v0, unsigned count_v0, double h){ 10 | _dataset = dataset; 11 | _v0 = v0; 12 | _count_v0 = count_v0; 13 | _h = h; 14 | } 15 | 16 | Fiber *RungeKutta::order2(){ 17 | Fiber *fibers; 18 | 19 | rk2_caller(_v0, _count_v0, _h, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field(), &fibers); 20 | 21 | return fibers; 22 | } 23 | 24 | Fiber *RungeKutta::order4(){ 25 | Fiber *fibers; 26 | 27 | rk4_caller(_v0, _count_v0, _h, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field(), &fibers); 28 | 29 | return fibers; 30 | } 31 | -------------------------------------------------------------------------------- /io/gui/primitives/cone_collection.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace runge_kutta; 8 | 9 | ConeCollection::ConeCollection(){ 10 | _cone_count = 0; 11 | _cones = NULL; 12 | } 13 | 14 | void ConeCollection::addCone(vector p1, vector p2){ 15 | _cones = (Cone *) realloc(_cones, (_cone_count + 1)*sizeof(Cone)); 16 | _cones[_cone_count] = Cone(p1.x, p1.y, p1.z, p2.x, p2.y, p2.z); 17 | _cone_count++; 18 | } 19 | 20 | void ConeCollection::render(GLfloat red, GLfloat green, GLfloat blue, GLdouble angle_x, GLdouble angle_y){ 21 | unsigned cone; 22 | 23 | for(cone = 0; cone < _cone_count; cone++) 24 | _cones[cone].render(red, green, blue, angle_x, angle_y); 25 | } 26 | -------------------------------------------------------------------------------- /core/opencl/rk.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace runge_kutta; 8 | RK_OpenCL OpenCL_Object; 9 | 10 | RungeKutta::RungeKutta(DataSet dataset, vector *v0, unsigned count_v0, double h){ 11 | _dataset = dataset; 12 | _v0 = v0; 13 | _count_v0 = count_v0; 14 | _h = h; 15 | } 16 | 17 | Fiber *RungeKutta::order2(){ 18 | Fiber *fibers; 19 | 20 | OpenCL_Object.rk2_caller( _v0, _count_v0, _h, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field(), &fibers); 21 | 22 | return fibers; 23 | } 24 | 25 | Fiber *RungeKutta::order4(){ 26 | Fiber *fibers; 27 | 28 | OpenCL_Object.rk4_caller(_v0, _count_v0, _h, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field(), &fibers); 29 | 30 | return fibers; 31 | } 32 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace runge_kutta; 11 | 12 | int main(int argc, char *argv[]){ 13 | unsigned v0_count; 14 | vector *v0; 15 | double h; 16 | Input file; 17 | 18 | if(argc <= 2 && strcmp(argv[1], "--analyze") != 0){ 19 | file = Input(argv[1], Input::NATIVE_TYPE); 20 | }else{ 21 | file = Input(argv[2], Input::ANALYZE_TYPE); 22 | } 23 | DataSet dataset = file.parse(&h, &v0, &v0_count); 24 | 25 | RungeKutta rk = RungeKutta(dataset, v0, v0_count, h); 26 | Fiber *rk4_fibers = rk.order4(); 27 | Fiber *rk2_fibers = rk.order2(); 28 | 29 | Output output = Output(dataset, v0_count, rk2_fibers, rk4_fibers); 30 | output.gnuplotInput(); 31 | output.gui(); 32 | 33 | return 0; 34 | } -------------------------------------------------------------------------------- /io/gui/primitives/cylinder_collection.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace runge_kutta; 8 | 9 | CylinderCollection::CylinderCollection(){ 10 | _cylinder_count = 0; 11 | _cylinders = NULL; 12 | } 13 | 14 | void CylinderCollection::addCylinder(vector p1, vector p2){ 15 | _cylinders = (Cylinder *) realloc(_cylinders, (_cylinder_count + 1)*sizeof(Cylinder)); 16 | _cylinders[_cylinder_count] = Cylinder(p1.x, p1.y, p1.z, p2.x, p2.y, p2.z); 17 | _cylinder_count++; 18 | } 19 | 20 | void CylinderCollection::render(GLfloat red, GLfloat green, GLfloat blue, GLdouble angle_x, GLdouble angle_y){ 21 | unsigned cylinder; 22 | 23 | for(cylinder = 0; cylinder < _cylinder_count; cylinder++) 24 | _cylinders[cylinder].render(red, green, blue, angle_x, angle_y); 25 | } 26 | -------------------------------------------------------------------------------- /core/dataset.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using namespace runge_kutta; 6 | 7 | DataSet::DataSet(){ 8 | _n_x = _n_y = _n_z = 0; 9 | _field = NULL; 10 | } 11 | 12 | DataSet::DataSet(unsigned nx, unsigned ny, unsigned nz, vector_field field){ 13 | _n_x = nx; 14 | _n_y = ny; 15 | _n_z = nz; 16 | 17 | _field = field; 18 | } 19 | 20 | unsigned DataSet::n_x(){ 21 | return _n_x; 22 | } 23 | 24 | unsigned DataSet::n_y(){ 25 | return _n_y; 26 | } 27 | 28 | unsigned DataSet::n_z(){ 29 | return _n_z; 30 | } 31 | 32 | vector_field DataSet::field(){ 33 | return _field; 34 | } 35 | 36 | vector DataSet::field(unsigned x, unsigned y, unsigned z){ 37 | return _field[offset(_n_x, _n_y, x, y, z)]; 38 | } 39 | 40 | unsigned DataSet::offset(unsigned nx, unsigned ny, unsigned x, unsigned y, unsigned z){ 41 | return x + nx*y + ny*nx*z; 42 | } 43 | -------------------------------------------------------------------------------- /example-factories/gaussian.php: -------------------------------------------------------------------------------- 1 | 30 | -------------------------------------------------------------------------------- /include/scene.h: -------------------------------------------------------------------------------- 1 | namespace runge_kutta{ 2 | class Scene{ 3 | private: 4 | ConeCollection _vector_field; 5 | CylinderCollection _rk2_cylinders; 6 | CylinderCollection _rk4_cylinders; 7 | GLdouble _x_angle; 8 | GLdouble _y_angle; 9 | GLdouble _translation_x; 10 | GLdouble _translation_y; 11 | GLdouble _translation_z; 12 | bool _display_rk2; 13 | bool _display_rk4; 14 | bool _display_vf; 15 | 16 | void renderVectorField(); 17 | void renderAxis(); 18 | void renderCylinders(); 19 | void ilumination(); 20 | public: 21 | Scene(); 22 | Scene(DataSet data_set, unsigned fibers_count, Fiber *rk2_fibers, Fiber *rk4_fibers); 23 | void render(); 24 | void increaseXAngle(); 25 | void decreaseXAngle(); 26 | void increaseYAngle(); 27 | void decreaseYAngle(); 28 | void increaseX(); 29 | void increaseY(); 30 | void decreaseX(); 31 | void decreaseY(); 32 | void increaseScale(); 33 | void decreaseScale(); 34 | void toogleRK2(); 35 | void toogleRK4(); 36 | void toogleDS(); 37 | }; 38 | } 39 | -------------------------------------------------------------------------------- /tests/fixtures/rk_kernel_fixture.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | using namespace runge_kutta; 9 | 10 | void RKKernelFixture::SetUp(){ 11 | vector_field vf, vf2; 12 | int i, j, k, index; 13 | 14 | vf = (vector_field) malloc(8*sizeof(vector)); 15 | for(i = 0; i < 8; i++){ 16 | vf[i].x = 1.0; 17 | vf[i].y = 1.0; 18 | vf[i].z = 1.0; 19 | } 20 | 21 | _dataset = DataSet(2, 2, 2, vf); 22 | 23 | vf2 = (vector_field) malloc(8*sizeof(vector)); 24 | for(i = 0; i < 2; i++){ 25 | for(j = 0; j < 2; j++){ 26 | for(k = 0; k < 2; k++){ 27 | index = i + 2*j + 4*k; 28 | if(j == 0){ 29 | vf2[index].x = 1.0; vf2[index].y = 0; vf2[index].z = 0; 30 | }else{ 31 | vf2[index].x = 0; vf2[index].y = 1.0; vf2[index].z = 0; 32 | } 33 | } 34 | } 35 | } 36 | 37 | _dataset2 = DataSet(2, 2, 2, vf2); 38 | 39 | _v1.x = 10; _v1.y = 5; _v1.z = 0; 40 | _v2.x = 0; _v2.y = 5; _v2.z = 10; 41 | _v1_oposite.x = -10; _v1_oposite.y = -5; _v1_oposite.z = 0; 42 | _v2_oposite.x = 0; _v2_oposite.y = -5; _v2_oposite.z = -10; 43 | _v_zero.x = _v_zero.y = _v_zero.z = 0; 44 | _v_unity.x = _v_unity.y = _v_unity.z = 1; 45 | _v_middle.x = _v_middle.y = _v_middle.z = 0.5; 46 | } 47 | -------------------------------------------------------------------------------- /io/output.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | using namespace runge_kutta; 15 | 16 | Output::Output(DataSet dataSet, unsigned fibersCount, Fiber *rk2Fibers, Fiber *rk4Fibers){ 17 | _dataSet = dataSet; 18 | _fibersCount = fibersCount; 19 | _rk2Fibers = rk2Fibers; 20 | _rk4Fibers = rk4Fibers; 21 | } 22 | 23 | void Output::gnuplotInput(){ 24 | FILE *rk2_dat, *rk4_dat, *rk2_vs_rk4; 25 | unsigned i,k; 26 | 27 | rk2_dat = fopen("rk2.dat", "w"); 28 | rk4_dat = fopen("rk4.dat", "w"); 29 | rk2_vs_rk4 = fopen("rk2-vs-rk4.p", "w"); 30 | 31 | for(k = 0; k < _fibersCount; k++){ 32 | for(i = 0; i < _rk2Fibers[k].pointsCount(); i++) 33 | fprintf(rk2_dat, "%f %f %f\n", _rk2Fibers[k].getPoint(i).x, _rk2Fibers[k].getPoint(i).y, _rk2Fibers[k].getPoint(i).z); 34 | fprintf(rk2_dat, "\n"); 35 | 36 | for(i = 0; i < _rk4Fibers[k].pointsCount(); i++) 37 | fprintf(rk4_dat, "%f %f %f\n", _rk4Fibers[k].getPoint(i).x, _rk4Fibers[k].getPoint(i).y, _rk4Fibers[k].getPoint(i).z); 38 | fprintf(rk4_dat, "\n"); 39 | } 40 | 41 | fclose(rk2_dat); 42 | fclose(rk4_dat); 43 | 44 | fprintf(rk2_vs_rk4, "set xrange[0:%d]\n", _dataSet.n_x() - 1); 45 | fprintf(rk2_vs_rk4, "set yrange[0:%d]\n", _dataSet.n_y() - 1); 46 | fprintf(rk2_vs_rk4, "set zrange[0:%d]\n", _dataSet.n_z() - 1); 47 | fprintf(rk2_vs_rk4, "splot \"rk2.dat\" w lines lt 1, \"rk4.dat\" w lines lt 2\n"); 48 | fprintf(rk2_vs_rk4, "pause -1"); 49 | 50 | fclose(rk2_vs_rk4); 51 | } 52 | 53 | void Output::gui(){ 54 | WindowManager wm = WindowManager(Scene(_dataSet, _fibersCount, _rk2Fibers, _rk4Fibers)); 55 | wm.loop(); 56 | } 57 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Brasil - São Paulo - São Paulo 2 | Instituto de Metemática e Estatística da Universidade de São Paulo (IME - USP) 3 | C++, CUDA and OpenCL implementations of Runge-Kutta methods (orders 2 and 4) applied to vector fields 4 | 5 | ########### 6 | # LICENSE # 7 | ########### 8 | 9 | LGPL v3.0 10 | Please see http://www.gnu.org/licenses/lgpl-3.0.txt 11 | 12 | ############## 13 | # DEVELOPERS # 14 | ############## 15 | 16 | Orientation: Assistant Professor of Computer Science MARCEL P. JACKOWSKI 17 | 18 | Development: Rafael Reggiani Manzo (CUDA and C++) 19 | Giancarlo Rigo (OpenCL and C++) 20 | 21 | ######### 22 | # INPUT # 23 | ######### 24 | 25 | #Standard 26 | 27 | The input file should be formated as the following: 28 | 29 | x y z 30 | v0_count 31 | v0_x v0_y v0_z 32 | . 33 | . 34 | . 35 | v0_x v0_y v0_z 36 | p_x p_y p_z 37 | . 38 | . 39 | . 40 | p_x p_y p_z 41 | p_x p_y p_z1 42 | . 43 | . 44 | . 45 | p_x p_y p_z1 46 | . 47 | . 48 | . 49 | 50 | To see a example run: make examples 51 | 52 | #Analyze 53 | 54 | Another option is the Analyze format. To do so, you need to run the program with: 55 | 56 | ./rk --analyze 57 | 58 | Then you will be prompted for other params by the program. 59 | 60 | The support to Analyze was achieved by the use of the open library CImg (http://cimg.sourceforge.net/index.shtml) distributed under the CeCILL-C license (http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.html). 61 | 62 | ###################### 63 | # Interface Commands # 64 | ###################### 65 | 66 | To translate the result use the keys: w, a, s, d 67 | To rotate the result over the X-axis use the up and down arrows 68 | To rotate the result over the Y-axis use the left and right arrows 69 | To zoom in use the key + 70 | To zoom out use the key - 71 | To toogle RK2 visualization use 2 72 | To toogle RK4 visualization use 4 73 | To toogle the vector field visualization use f 74 | -------------------------------------------------------------------------------- /include/opcl.h: -------------------------------------------------------------------------------- 1 | #ifdef __APPLE__ 2 | #include 3 | #else 4 | #include 5 | #endif 6 | 7 | #ifdef cl_khr_fp64 8 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 9 | #define TYPE double 10 | #elif defined(cl_amd_fp64) 11 | #define TYPE double 12 | #pragma OPENCL EXTENSION cl_amd_fp64 : enable 13 | #else 14 | #define TYPE double 15 | #endif 16 | 17 | namespace runge_kutta{ 18 | class RK_OpenCL{ 19 | private: 20 | cl_platform_id _platform; 21 | cl_context _context; 22 | cl_device_id* _devices; 23 | cl_command_queue _queue; 24 | cl_kernel _kernel; 25 | cl_program _program; 26 | cl_event _event; 27 | cl_mem _opencl_points, _opencl_n_points, _opencl_v0, _opencl_count_v0, _opencl_h, _opencl_n_x, _opencl_n_y, _opencl_n_z,_opencl_max_points, _opencl_field; 28 | double _time; 29 | unsigned int _devices_found; 30 | unsigned int _device_used; 31 | void opencl_create_platform(unsigned int num_platforms); 32 | void opencl_get_devices_id(); 33 | void opencl_create_context(); 34 | void opencl_create_queue(); 35 | char* opencl_load_program_from_source(int *size); 36 | void opencl_build_program(); 37 | void opencl_create_program(); 38 | void opencl_create_kernel(char* kernel_name); 39 | void opencl_prepare_kernel(vector *v0, unsigned int count_v0, double h, int n_x,int n_y,int n_z, vector_field field, unsigned int max_points); 40 | void opencl_run_kernel(unsigned int count_v0, unsigned int max_points, runge_kutta::Fiber **fibers); 41 | void opencl_init(char* kernel_name, vector *v0, int count_v0, double h, int n_x, int n_y, int n_z, vector_field field, Fiber **fibers); 42 | void opencl_time(cl_event* timer); 43 | public: 44 | RK_OpenCL(); 45 | void rk2_caller(vector *v0, int count_v0, double h, int n_x, int n_y, int n_z, vector_field field, Fiber **fibers); 46 | void rk4_caller(vector *v0, int count_v0, double h, int n_x, int n_y, int n_z, vector_field field, Fiber **fibers); 47 | }; 48 | } 49 | -------------------------------------------------------------------------------- /io/gui/primitives/cone.cpp: -------------------------------------------------------------------------------- 1 | /************************************/ 2 | /* Adaptation of the cylinder class */ 3 | /************************************/ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define CONE_RADIUS 0.1 12 | #define CONE_SUBDIVISIONS 36 13 | 14 | using namespace runge_kutta; 15 | 16 | Cone::Cone(double x1, double y1, double z1, double x2, double y2, double z2){ 17 | _x1 = x1; 18 | _y1 = y1; 19 | _z1 = z1; 20 | _x2 = x2; 21 | _y2 = y2; 22 | _z2 = z2; 23 | } 24 | 25 | void Cone::render(GLfloat red, GLfloat green, GLfloat blue, GLdouble angle_x, GLdouble angle_y){ 26 | GLdouble vx, vy, vz, v, ax; 27 | GLUquadricObj *_quadric; 28 | 29 | _quadric = gluNewQuadric(); 30 | gluQuadricNormals(_quadric, GLU_SMOOTH); 31 | 32 | vx = _x2 - _x1; 33 | vy = _y2 - _y1; 34 | vz = _z2 - _z1; 35 | 36 | //handle the degenerate case of z1 == z2 with an approximation 37 | if (vz > -0.000001 && vz < 0.000001){ 38 | if (vz >= 0.0){ 39 | vz = 0.000001; 40 | }else{ 41 | vz = -0.000001; 42 | } 43 | } 44 | 45 | if (vy > -0.000001 && vy < 0.000001){ 46 | if (vy >= 0.0){ 47 | vy = -0.000001; 48 | }else{ 49 | vy = 0.000001; 50 | } 51 | } 52 | 53 | if (vx > -0.000001 && vx < 0.000001){ 54 | if (vx >= 0.0){ 55 | vx = 0.000001; 56 | }else{ 57 | vx = -0.000001; 58 | } 59 | } 60 | 61 | v = sqrt( vx*vx + vy*vy + vz*vz ); 62 | ax = (180.0/M_PI)*acos( vz/v ); 63 | 64 | /*if(v < 0) 65 | v = -CONE_HEIGHT; 66 | else 67 | v = CONE_HEIGHT;*/ 68 | 69 | if( vz < 0.0 ) ax = -ax; 70 | 71 | glPushMatrix(); 72 | glColor3f(red, green, blue); 73 | glRotated(angle_x, 1.0, 0.0, 0.0); 74 | glRotated(angle_y, 0.0, 1.0, 0.0); 75 | 76 | glTranslated( _x1, _y1, _z1 ); 77 | glRotated(ax, -vy, vx, 0.0); 78 | gluQuadricOrientation(_quadric,GLU_OUTSIDE); 79 | gluCylinder(_quadric, CONE_RADIUS, 0.0f, v, CONE_SUBDIVISIONS, 1); 80 | 81 | //draw the cap 82 | //gluQuadricOrientation(_quadric,GLU_INSIDE); 83 | //gluDisk(_quadric, 0.0, CONE_RADIUS, CONE_SUBDIVISIONS, 1); 84 | //glTranslated( 0,0,v ); 85 | glPopMatrix(); 86 | 87 | gluDeleteQuadric(_quadric); 88 | } 89 | -------------------------------------------------------------------------------- /io/gui/window_manager.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace runge_kutta; 12 | 13 | Scene _scene; 14 | 15 | WindowManager::WindowManager(Scene scene){ 16 | int argc = 0; 17 | char *argv[1]; 18 | 19 | _scene = scene; 20 | 21 | glutInit(&argc, argv); 22 | glutInitWindowSize(800,600); 23 | glutInitWindowPosition(0,0); 24 | glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH); 25 | glutCreateWindow("Runge-Kutta"); 26 | glViewport(0, 0, 800, 600); 27 | 28 | glutDisplayFunc(display); 29 | glutReshapeFunc(reshape); 30 | glutIdleFunc(idle); 31 | glutKeyboardFunc(key_pressed); 32 | glutSpecialFunc(key_pressed_special); 33 | } 34 | 35 | void WindowManager::loop(){ 36 | glutMainLoop(); 37 | } 38 | 39 | void WindowManager::display(){ 40 | glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); 41 | glClearColor(1.0f, 1.0f, 1.0f, 0.0f); 42 | 43 | _scene.render(); 44 | 45 | glutSwapBuffers(); 46 | } 47 | 48 | void WindowManager::reshape(int width, int height){ 49 | glViewport (0, 0, (GLsizei) width, (GLsizei) height); 50 | glMatrixMode (GL_PROJECTION); 51 | glLoadIdentity(); 52 | gluPerspective(90.0,((float) width)/((float) height),0.01,5000); 53 | glMatrixMode (GL_MODELVIEW); 54 | } 55 | 56 | void WindowManager::idle(){ 57 | glutPostRedisplay(); 58 | } 59 | 60 | void WindowManager::key_pressed (unsigned char key, int x, int y) { 61 | if(key == '+') 62 | _scene.decreaseScale(); 63 | else if(key == '-') 64 | _scene.increaseScale(); 65 | else if(key == 's') 66 | _scene.increaseY(); 67 | else if(key == 'w') 68 | _scene.decreaseY(); 69 | else if(key == 'a') 70 | _scene.decreaseX(); 71 | else if(key == 'd') 72 | _scene.increaseX(); 73 | else if(key == '2') 74 | _scene.toogleRK2(); 75 | else if(key == '4') 76 | _scene.toogleRK4(); 77 | else if(key == 'f') 78 | _scene.toogleDS(); 79 | } 80 | 81 | void WindowManager::key_pressed_special (int key, int x, int y) { 82 | if(key == GLUT_KEY_LEFT) 83 | _scene.decreaseYAngle(); 84 | else if(key == GLUT_KEY_RIGHT) 85 | _scene.increaseYAngle(); 86 | else if(key == GLUT_KEY_UP) 87 | _scene.decreaseXAngle(); 88 | else if(key == GLUT_KEY_DOWN) 89 | _scene.increaseXAngle(); 90 | } 91 | -------------------------------------------------------------------------------- /io/gui/primitives/cylinder.cpp: -------------------------------------------------------------------------------- 1 | /************************************************************************************************/ 2 | /* The original render method was written by Curran Kelleher in 2008 and was modified by us */ 3 | /* with corrections mostly on the cylinder direction */ 4 | /* */ 5 | /* The original method is under public domain and can be found in: */ 6 | /* http://lifeofaprogrammergeek.blogspot.com.br/2008/08/opengl-example-rendering-cylinders.html */ 7 | /************************************************************************************************/ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #define CYLINDER_RADIUS 0.25 16 | #define CYLINDER_SUBDIVISIONS 8 17 | 18 | using namespace runge_kutta; 19 | 20 | Cylinder::Cylinder(double x1, double y1, double z1, double x2, double y2, double z2){ 21 | _x1 = x1; 22 | _y1 = y1; 23 | _z1 = z1; 24 | _x2 = x2; 25 | _y2 = y2; 26 | _z2 = z2; 27 | } 28 | 29 | void Cylinder::render(GLfloat red, GLfloat green, GLfloat blue, GLdouble angle_x, GLdouble angle_y){ 30 | GLdouble vx, vy, vz, v, ax; 31 | GLUquadricObj *_quadric; 32 | 33 | _quadric = gluNewQuadric(); 34 | //gluQuadricDrawStyle(_quadric, GLU_LINE); /* wireframe */ 35 | gluQuadricNormals(_quadric, GLU_SMOOTH); 36 | 37 | vx = _x2 - _x1; 38 | vy = _y2 - _y1; 39 | vz = _z2 - _z1; 40 | 41 | //handle the degenerate case of z1 == z2 with an approximation 42 | if (vz > -0.000001 && vz < 0.000001){ 43 | if (vz >= 0.0){ 44 | vz = 0.000001; 45 | }else{ 46 | vz = -0.000001; 47 | } 48 | } 49 | 50 | if (vy > -0.000001 && vy < 0.000001){ 51 | if (vy >= 0.0){ 52 | vy = -0.000001; 53 | }else{ 54 | vy = 0.000001; 55 | } 56 | } 57 | 58 | if (vx > -0.000001 && vx < 0.000001){ 59 | if (vx >= 0.0){ 60 | vx = 0.000001; 61 | }else{ 62 | vx = -0.000001; 63 | } 64 | } 65 | 66 | v = sqrt( vx*vx + vy*vy + vz*vz ); 67 | ax = (180.0/M_PI)*acos( vz/v ); 68 | 69 | if( vz < 0.0 ) ax = -ax; 70 | 71 | glPushMatrix(); 72 | glColor3f(red, green, blue); 73 | glRotated(angle_x, 1.0, 0.0, 0.0); 74 | glRotated(angle_y, 0.0, 1.0, 0.0); 75 | 76 | //draw the cylinder body 77 | glTranslated( _x1, _y1, _z1 ); 78 | glRotated(ax, -vy, vx, 0.0); 79 | gluQuadricOrientation(_quadric,GLU_OUTSIDE); 80 | gluCylinder(_quadric, CYLINDER_RADIUS, CYLINDER_RADIUS, v, CYLINDER_SUBDIVISIONS, 1); 81 | 82 | //draw the first cap 83 | gluQuadricOrientation(_quadric,GLU_INSIDE); 84 | gluDisk(_quadric, 0.0, CYLINDER_RADIUS, CYLINDER_SUBDIVISIONS, 1); 85 | glTranslated( 0,0,v ); 86 | 87 | //draw the second cap 88 | gluQuadricOrientation(_quadric,GLU_OUTSIDE); 89 | gluDisk(_quadric, 0.0, CYLINDER_RADIUS, CYLINDER_SUBDIVISIONS, 1); 90 | glPopMatrix(); 91 | 92 | gluDeleteQuadric(_quadric); 93 | } 94 | -------------------------------------------------------------------------------- /io/input.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace runge_kutta; 9 | using namespace cimg_library; 10 | 11 | Input::Input(char *file_name, unsigned file_type){ 12 | _file_name = (char *) malloc(strlen(file_name)*sizeof(char)); 13 | strcpy(_file_name, file_name); 14 | _file_type = file_type; 15 | } 16 | 17 | Input::Input(){ 18 | _file_name = NULL; 19 | _file_type = 2; 20 | } 21 | 22 | DataSet Input::parse(double *h, vector **v0, unsigned *v0_count){ 23 | if(_file_type == NATIVE_TYPE) 24 | return parseNative(h,v0,v0_count); 25 | else if(_file_type == ANALYZE_TYPE) 26 | return parseAnalyze(h,v0,v0_count); 27 | 28 | return DataSet(); 29 | } 30 | 31 | DataSet Input::parseNative(double *h, vector **v0, unsigned *v0_count){ 32 | unsigned i, j, k; 33 | unsigned *n_x, *n_y, *n_z; 34 | vector_field *field; 35 | FILE *fp; 36 | 37 | fp = fopen(_file_name, "r"); 38 | 39 | n_x = (unsigned *) malloc(sizeof(unsigned *)); 40 | n_y = (unsigned *) malloc(sizeof(unsigned *)); 41 | n_z = (unsigned *) malloc(sizeof(unsigned *)); 42 | 43 | field = (vector_field *) malloc(sizeof(vector_field *)); 44 | 45 | fscanf(fp, "%u", n_x); 46 | fscanf(fp, "%u", n_y); 47 | fscanf(fp, "%u", n_z); 48 | 49 | fscanf(fp, "%u", v0_count); 50 | *v0 = (vector *) malloc( (*v0_count)*sizeof(vector) ); 51 | for(i = 0; i < *v0_count; i++){ 52 | fscanf(fp, "%lf", &(((*v0)[i]).x)); 53 | fscanf(fp, "%lf", &(((*v0)[i]).y)); 54 | fscanf(fp, "%lf", &(((*v0)[i]).z)); 55 | } 56 | fscanf(fp, "%lf", h); 57 | 58 | *field = (vector_field) malloc( (*n_x)*(*n_y)*(*n_z)*sizeof(vector) ); 59 | 60 | for(k = 0; k < *n_z; k++){ 61 | for(i = 0; i < *n_x; i++){ 62 | for(j = 0; j < *n_y; j++){ 63 | fscanf(fp, "%lf", &(((*field)[DataSet::offset(*n_x, *n_y, i, j, k)]).x)); 64 | fscanf(fp, "%lf", &(((*field)[DataSet::offset(*n_x, *n_y, i, j, k)]).y)); 65 | fscanf(fp, "%lf", &(((*field)[DataSet::offset(*n_x, *n_y, i, j, k)]).z)); 66 | } 67 | } 68 | } 69 | 70 | return DataSet(*n_x, *n_y, *n_z, *field); 71 | } 72 | 73 | DataSet Input::parseAnalyze(double *h, vector **v0, unsigned *v0_count){ 74 | unsigned i, j, k; 75 | unsigned *n_x, *n_y, *n_z; 76 | double multiplier; 77 | vector_field *field; 78 | 79 | CImg img(_file_name); 80 | 81 | n_x = (unsigned *) malloc(sizeof(unsigned *)); 82 | n_y = (unsigned *) malloc(sizeof(unsigned *)); 83 | n_z = (unsigned *) malloc(sizeof(unsigned *)); 84 | 85 | field = (vector_field *) malloc(sizeof(vector_field *)); 86 | 87 | *n_x = img.width(); 88 | *n_y = img.height(); 89 | *n_z = img.depth(); 90 | 91 | printf("\nPlease enter the initial points count:\n"); 92 | scanf("%u", v0_count); 93 | *v0 = (vector *) malloc( (*v0_count)*sizeof(vector) ); 94 | for(i = 0; i < *v0_count; i++){ 95 | printf("\nPlease enter the %dth initial points coordinates:\n", i); 96 | scanf("%lf", &(((*v0)[i]).x)); 97 | scanf("%lf", &(((*v0)[i]).y)); 98 | scanf("%lf", &(((*v0)[i]).z)); 99 | } 100 | printf("\nPlease enter the step size:\n"); 101 | scanf("%lf", h); 102 | 103 | printf("\nPlease enter and multiplier for the vector magnitude:\n"); 104 | scanf("%lf", &multiplier); 105 | 106 | *field = (vector_field) malloc( (*n_x)*(*n_y)*(*n_z)*sizeof(vector) ); 107 | 108 | for(k = 0; k < *n_z; k++){ 109 | for(i = 0; i < *n_x; i++){ 110 | for(j = 0; j < *n_y; j++){ 111 | ((*field)[DataSet::offset(*n_x, *n_y, i, j, k)]).x = img(i,j,k,0)*multiplier; 112 | ((*field)[DataSet::offset(*n_x, *n_y, i, j, k)]).y = img(i,j,k,1)*multiplier; 113 | ((*field)[DataSet::offset(*n_x, *n_y, i, j, k)]).z = img(i,j,k,2)*multiplier; 114 | } 115 | } 116 | } 117 | 118 | return DataSet(*n_x, *n_y, *n_z, *field); 119 | } 120 | -------------------------------------------------------------------------------- /tests/units/rk_kernel_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | using namespace runge_kutta; 14 | 15 | TEST_F(RKKernelFixture, sum){ 16 | EXPECT_EQ(_v_zero.x, (sum(_v2, _v2_oposite)).x); 17 | EXPECT_EQ(_v_zero.y, (sum(_v2, _v2_oposite)).y); 18 | EXPECT_EQ(_v_zero.z, (sum(_v2, _v2_oposite)).z); 19 | 20 | EXPECT_EQ(_v_unity.x, (sum(_v_zero, _v_unity)).x); 21 | EXPECT_EQ(_v_unity.y, (sum(_v_zero, _v_unity)).y); 22 | EXPECT_EQ(_v_unity.z, (sum(_v_zero, _v_unity)).z); 23 | 24 | EXPECT_EQ(10, (sum(_v1, _v2)).x); 25 | EXPECT_EQ(10, (sum(_v1, _v2)).y); 26 | EXPECT_EQ(10, (sum(_v1, _v2)).z); 27 | 28 | EXPECT_EQ((sum(_v2, _v1)).x, (sum(_v1, _v2)).x); 29 | EXPECT_EQ((sum(_v2, _v1)).y, (sum(_v1, _v2)).y); 30 | EXPECT_EQ((sum(_v2, _v1)).z, (sum(_v1, _v2)).z); 31 | } 32 | 33 | TEST_F(RKKernelFixture, subtract){ 34 | EXPECT_EQ(_v_zero.x, (subtract(_v2, _v2)).x); 35 | EXPECT_EQ(_v_zero.y, (subtract(_v2, _v2)).y); 36 | EXPECT_EQ(_v_zero.z, (subtract(_v2, _v2)).z); 37 | 38 | EXPECT_EQ(_v1.x, (subtract(_v1, _v_zero)).x); 39 | EXPECT_EQ(_v1.y, (subtract(_v1, _v_zero)).y); 40 | EXPECT_EQ(_v1.z, (subtract(_v1, _v_zero)).z); 41 | 42 | EXPECT_EQ(10, (subtract(_v1, _v2)).x); 43 | EXPECT_EQ(0, (subtract(_v1, _v2)).y); 44 | EXPECT_EQ(-10, (subtract(_v1, _v2)).z); 45 | } 46 | 47 | TEST_F(RKKernelFixture, mult_scalar){ 48 | EXPECT_EQ(15, (mult_scalar(_v1, 1.5)).x); 49 | EXPECT_EQ(7.5, (mult_scalar(_v1, 1.5)).y); 50 | EXPECT_EQ(0, (mult_scalar(_v1, 1.5)).z); 51 | 52 | EXPECT_EQ(_v_zero.x, (mult_scalar(_v_zero, 1.5)).x); 53 | EXPECT_EQ(_v_zero.y, (mult_scalar(_v_zero, 1.5)).y); 54 | EXPECT_EQ(_v_zero.z, (mult_scalar(_v_zero, 1.5)).z); 55 | 56 | EXPECT_EQ(_v2.x, (mult_scalar(_v2, 1)).x); 57 | EXPECT_EQ(_v2.y, (mult_scalar(_v2, 1)).y); 58 | EXPECT_EQ(_v2.z, (mult_scalar(_v2, 1)).z); 59 | } 60 | 61 | TEST_F(RKKernelFixture, set){ 62 | vector aux; 63 | 64 | set(&aux, _v1); 65 | 66 | EXPECT_EQ(_v1.x, aux.x); 67 | EXPECT_EQ(_v1.y, aux.y); 68 | EXPECT_EQ(_v1.z, aux.z); 69 | } 70 | 71 | TEST_F(RKKernelFixture, module){ 72 | EXPECT_EQ(0, module(_v_zero)); 73 | EXPECT_EQ(sqrt(3), module(_v_unity)); 74 | } 75 | 76 | TEST_F(RKKernelFixture, distance){ 77 | EXPECT_EQ(0, distance(_v_zero, _v_zero)); 78 | EXPECT_EQ(module(_v2), distance(_v_zero, _v2)); 79 | } 80 | 81 | TEST_F(RKKernelFixture, nearest_neighbour){ 82 | EXPECT_EQ(_dataset.field(0,0,0).x, (nearest_neighbour(_v_zero, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field())).x); 83 | EXPECT_EQ(_dataset.field(0,0,0).y, (nearest_neighbour(_v_zero, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field())).y); 84 | EXPECT_EQ(_dataset.field(0,0,0).z, (nearest_neighbour(_v_zero, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field())).z); 85 | 86 | EXPECT_EQ(_v_zero.x, (nearest_neighbour(_v1, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field())).x); 87 | EXPECT_EQ(_v_zero.y, (nearest_neighbour(_v1, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field())).y); 88 | EXPECT_EQ(_v_zero.z, (nearest_neighbour(_v1, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field())).z); 89 | 90 | EXPECT_EQ(_dataset.field(0,0,0).x, (nearest_neighbour(_v_middle, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field())).x); 91 | EXPECT_EQ(_dataset.field(0,0,0).y, (nearest_neighbour(_v_middle, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field())).y); 92 | EXPECT_EQ(_dataset.field(0,0,0).z, (nearest_neighbour(_v_middle, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field())).z); 93 | } 94 | 95 | TEST_F(RKKernelFixture, trilinear_interpolation){ 96 | vector aux; 97 | aux.x = aux.y = aux.z = 1; 98 | 99 | EXPECT_EQ(aux.x, (trilinear_interpolation(_v_middle, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field())).x); 100 | EXPECT_EQ(aux.y, (trilinear_interpolation(_v_middle, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field())).y); 101 | EXPECT_EQ(aux.z, (trilinear_interpolation(_v_middle, _dataset.n_x(), _dataset.n_y(), _dataset.n_z(), _dataset.field())).z); 102 | 103 | aux.x = 0.5; aux.y = 0.5; aux.z = 0; 104 | 105 | EXPECT_EQ(aux.x, (trilinear_interpolation(_v_middle, _dataset2.n_x(), _dataset2.n_y(), _dataset2.n_z(), _dataset2.field())).x); 106 | EXPECT_EQ(aux.y, (trilinear_interpolation(_v_middle, _dataset2.n_x(), _dataset2.n_y(), _dataset2.n_z(), _dataset2.field())).y); 107 | EXPECT_EQ(aux.z, (trilinear_interpolation(_v_middle, _dataset2.n_x(), _dataset2.n_y(), _dataset2.n_z(), _dataset2.field())).z); 108 | } 109 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean_compiling_results clean_others clean_plot clean_examples examples cuda opencl c gtest 2 | 3 | #vars 4 | SO = $(shell uname -s) 5 | CUDA_FLAGS=-arch sm_20 6 | OPENCL_FLAGS=-lOpenCL 7 | C_FLAGS=-Wall -pedantic 8 | C_EXTRA_FLAGS=-Wextra 9 | 10 | ifeq ($(SO),Darwin) 11 | STATIC_LIBS=-framework Glut -framework OpenGL -lm -lpthread -L/usr/X11/lib -lX11 12 | OPENCL_FLAGS=-framework OpenCL 13 | else 14 | STATIC_LIBS=-lglut -lGL -lGLU -lm -lpthread -lX11 15 | OPENCL_FLAGS=-lOpenCL 16 | endif 17 | 18 | GENERAL_OBJECTS=main.o input.o dataset.o fiber.o output.o cylinder.o window_manager.o scene.o cylinder_collection.o cone.o cone_collection.o 19 | 20 | LIBRARIES_PATH=include 21 | GTEST_PATH=gtest-1.6.0 22 | 23 | #default C++ version 24 | c: $(GENERAL_OBJECTS) rk_c.o rk_kernel_c.o 25 | g++ $(C_ALL_FLAGS) -I$(LIBRARIES_PATH) $(GENERAL_OBJECTS) rk_c.o rk_kernel_c.o -o rk $(STATIC_LIBS) 26 | 27 | #general objects 28 | 29 | main.o: main.cpp include/input.h include/output.h include/rk.h include/dataset.h include/fiber.h 30 | g++ -c $(C_ALL_FLAGS) -I$(LIBRARIES_PATH) main.cpp 31 | 32 | input.o: io/input.cpp include/input.h include/dataset.h 33 | g++ -c $(C_ALL_FLAGS) -I$(LIBRARIES_PATH) io/input.cpp 34 | 35 | output.o: io/output.cpp include/output.h include/dataset.h include/fiber.h include/scene.h include/window_manager.h include/cylinder.h include/cylinder_collection.h 36 | g++ -c $(C_ALL_FLAGS) -I$(LIBRARIES_PATH) io/output.cpp 37 | 38 | scene.o: io/gui/scene.cpp include/scene.h include/cylinder_collection.h include/cylinder.h include/dataset.h include/fiber.h 39 | g++ -c $(C_ALL_FLAGS) -I$(LIBRARIES_PATH) io/gui/scene.cpp 40 | 41 | window_manager.o: io/gui/window_manager.cpp include/window_manager.h include/dataset.h include/fiber.h include/cylinder.h include/cylinder_collection.h 42 | g++ -c $(C_FLAGS) -I$(LIBRARIES_PATH) io/gui/window_manager.cpp 43 | 44 | cylinder.o: io/gui/primitives/cylinder.cpp include/cylinder.h 45 | g++ -c $(C_ALL_FLAGS) -I$(LIBRARIES_PATH) io/gui/primitives/cylinder.cpp 46 | 47 | cylinder_collection.o: io/gui/primitives/cylinder_collection.cpp include/cylinder_collection.h include/dataset.h include/cylinder.h 48 | g++ -c $(C_ALL_FLAGS) -I$(LIBRARIES_PATH) io/gui/primitives/cylinder_collection.cpp 49 | 50 | cone.o: io/gui/primitives/cone.cpp include/cone.h 51 | g++ -c $(C_ALL_FLAGS) -I$(LIBRARIES_PATH) io/gui/primitives/cone.cpp 52 | 53 | cone_collection.o: io/gui/primitives/cone_collection.cpp include/cone_collection.h include/dataset.h include/cone.h 54 | g++ -c $(C_ALL_FLAGS) -I$(LIBRARIES_PATH) io/gui/primitives/cone_collection.cpp 55 | 56 | dataset.o: core/dataset.cpp include/dataset.h 57 | g++ -c $(C_ALL_FLAGS) -I$(LIBRARIES_PATH) core/dataset.cpp 58 | 59 | fiber.o: core/fiber.cpp include/fiber.h include/dataset.h 60 | g++ -c $(C_ALL_FLAGS) -I$(LIBRARIES_PATH) core/fiber.cpp 61 | 62 | #C++ 63 | rk_c.o: core/c/rk.cpp include/rk.h include/dataset.h include/fiber.h include/rk_c_kernel.h 64 | g++ -c $(C_ALL_FLAGS) -I$(LIBRARIES_PATH) core/c/rk.cpp -o rk_c.o 65 | 66 | rk_kernel_c.o: core/c/rk_kernel.cpp include/rk_c_kernel.h include/dataset.h include/fiber.h 67 | g++ -c $(C_ALL_FLAGS) -I$(LIBRARIES_PATH) core/c/rk_kernel.cpp -o rk_kernel_c.o 68 | 69 | #CUDA 70 | cuda: $(GENERAL_OBJECTS) rk_cuda.o rk_cuda_kernel.o 71 | nvcc $(GENERAL_OBJECTS) rk_cuda.o rk_cuda_kernel.o -o rk $(CUDA_FLAGS) $(STATIC_LIBS) 72 | 73 | rk_cuda_kernel.o: core/cuda/rk_kernel.cu include/rk_cuda_kernel.h include/dataset.h include/fiber.h 74 | nvcc -c -I$(LIBRARIES_PATH) core/cuda/rk_kernel.cu -o rk_cuda_kernel.o $(CUDA_FLAGS) 75 | 76 | rk_cuda.o: core/cuda/rk.cpp include/rk_cuda_kernel.h include/rk.h include/dataset.h include/fiber.h 77 | nvcc -c -I$(LIBRARIES_PATH) core/cuda/rk.cpp -o rk_cuda.o $(CUDA_FLAGS) 78 | 79 | #OPENCL 80 | opencl: $(GENERAL_OBJECTS) rk_opencl.o opcl.o 81 | g++ $(GENERAL_OBJECTS) rk_opencl.o opcl.o -o rk $(OPENCL_FLAGS) $(STATIC_LIBS) 82 | 83 | opcl.o: core/opencl/opcl.cpp include/opcl.h include/dataset.h include/fiber.h 84 | g++ -c $(C_ALL_FLAGS) -I$(LIBRARIES_PATH) core/opencl/opcl.cpp -o opcl.o 85 | 86 | rk_opencl.o: core/opencl/rk.cpp core/opencl/opcl.cpp include/rk_opencl_kernel.h include/opcl.h include/rk.h include/dataset.h include/fiber.h 87 | g++ -c $(C_ALL_FLAGS) -I$(LIBRARIES_PATH) core/opencl/rk.cpp -o rk_opencl.o 88 | 89 | #OTHER 90 | examples: 91 | php example-factories/rotation.php 92 | php example-factories/random.php 93 | php example-factories/lines.php 94 | php example-factories/gaussian.php 95 | 96 | clean_compiling_results: 97 | rm -f *.o rk 98 | 99 | clean_plot: 100 | rm -f rk2.dat rk4.dat rk2-vs-rk4.p 101 | 102 | clean_others: 103 | rm -f *~ example-factories/*~ include/*~ core/*~ core/c/*~ core/cuda/*~ core/opencl/*~ io/*~ io/gui/*~ io/gui/primitives/*~ 104 | 105 | clean_examples: 106 | rm -f rotationField randomField gaussianField linesField 107 | 108 | clean_tests: 109 | rm -f libgtest.a gtest-1.6.0.zip 110 | rm -rf gtest-1.6.0 111 | 112 | clean: clean_compiling_results clean_others clean_plot 113 | -------------------------------------------------------------------------------- /io/gui/scene.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define MIN_FIELD_VEC_MAG 0.000001 13 | 14 | using namespace runge_kutta; 15 | 16 | Scene::Scene(){ 17 | _x_angle = 0.0; 18 | _y_angle = 0.0; 19 | _translation_x = 0.0; 20 | _translation_y = 0.0; 21 | _translation_z = 0.0; 22 | _display_rk2 = true; 23 | _display_rk4 = true; 24 | _display_vf = false; 25 | } 26 | 27 | Scene::Scene(DataSet data_set, unsigned fibers_count, Fiber *rk2_fibers, Fiber *rk4_fibers){ 28 | unsigned fiber_index, point_index, i, j, k; 29 | Fiber fiber; 30 | vector initial_point, final_point; 31 | 32 | _x_angle = 0.0; 33 | _y_angle = 0.0; 34 | _translation_x = 0.0; 35 | _translation_y = 0.0; 36 | _translation_z = 0.0; 37 | _display_rk2 = true; 38 | _display_rk4 = true; 39 | _display_vf = false; 40 | 41 | //Fibers abstraction 42 | for(fiber_index = 0; fiber_index < fibers_count; fiber_index++){ 43 | //RK2 44 | fiber = rk2_fibers[fiber_index]; 45 | for(point_index = 1; point_index < fiber.pointsCount(); point_index++){ 46 | initial_point = fiber.getPoint(point_index - 1); 47 | final_point = fiber.getPoint(point_index); 48 | _rk2_cylinders.addCylinder(initial_point, final_point); 49 | } 50 | 51 | //RK4 52 | fiber = rk4_fibers[fiber_index]; 53 | for(point_index = 1; point_index < fiber.pointsCount(); point_index++){ 54 | initial_point = fiber.getPoint(point_index - 1); 55 | final_point = fiber.getPoint(point_index); 56 | _rk4_cylinders.addCylinder(initial_point, final_point); 57 | } 58 | } 59 | 60 | //Dataset abstraction 61 | for(i = 0; i < data_set.n_x(); i++) 62 | for(j = 0; j < data_set.n_y(); j++) 63 | for(k = 0; k < data_set.n_z(); k++){ 64 | initial_point.x = (double) i; 65 | initial_point.y = (double) j; 66 | initial_point.z = (double) k; 67 | 68 | final_point.x = (double) (i + data_set.field(i, j, k).x); 69 | final_point.y = (double) (j + data_set.field(i, j, k).y); 70 | final_point.z = (double) (k + data_set.field(i, j, k).z); 71 | 72 | if(sqrt(data_set.field(i, j, k).x*data_set.field(i, j, k).x + 73 | data_set.field(i, j, k).y*data_set.field(i, j, k).y + 74 | data_set.field(i, j, k).z*data_set.field(i, j, k).z 75 | ) > MIN_FIELD_VEC_MAG) 76 | _vector_field.addCone(initial_point, final_point); 77 | } 78 | } 79 | 80 | void Scene::render(){ 81 | glPushMatrix(); 82 | glLoadIdentity(); 83 | 84 | gluLookAt(0, 0,-150, 0,0,0, 0,1,0); 85 | //TODO: ilumination(); 86 | 87 | glTranslated(_translation_x, _translation_y, _translation_z); 88 | 89 | renderAxis(); 90 | renderVectorField(); 91 | renderCylinders(); 92 | glPopMatrix(); 93 | } 94 | 95 | void Scene::renderVectorField(){ 96 | if(_display_vf) _vector_field.render(0.0,0.0,1.0, _x_angle, _y_angle); 97 | } 98 | 99 | void Scene::renderAxis(){ 100 | glPushMatrix(); 101 | glColor3f(0.0,0.0,0.0); 102 | 103 | glRotated(_x_angle, 1.0, 0.0, 0.0); 104 | glRotated(_y_angle, 0.0, 1.0, 0.0); 105 | 106 | glBegin(GL_LINES); 107 | glVertex3f(0.0,0.0,0.0); 108 | glVertex3f(300.0,0.0,0.0); 109 | glEnd(); 110 | 111 | glBegin(GL_LINES); 112 | glVertex3f(0.0,0.0,0.0); 113 | glVertex3f(0.0,300.0,0.0); 114 | glEnd(); 115 | 116 | glBegin(GL_LINES); 117 | glVertex3f(0.0,0.0,0.0); 118 | glVertex3f(0.0,0.0,300.0); 119 | glEnd(); 120 | glPopMatrix(); 121 | } 122 | 123 | void Scene::renderCylinders(){ 124 | glPushMatrix(); 125 | if(_display_rk2) _rk2_cylinders.render(0.0, 1.0, 0.0, _x_angle, _y_angle); 126 | if(_display_rk4) _rk4_cylinders.render(1.0, 0.0, 0.0, _x_angle, _y_angle); 127 | glPopMatrix(); 128 | } 129 | 130 | void Scene::increaseXAngle(){ 131 | _x_angle += 1; 132 | } 133 | 134 | void Scene::decreaseXAngle(){ 135 | _x_angle -= 1; 136 | } 137 | 138 | void Scene::increaseYAngle(){ 139 | _y_angle += 1; 140 | } 141 | 142 | void Scene::decreaseYAngle(){ 143 | _y_angle -= 1; 144 | } 145 | 146 | void Scene::increaseX(){ 147 | _translation_x += 1; 148 | } 149 | 150 | void Scene::decreaseX(){ 151 | _translation_x -= 1; 152 | } 153 | 154 | void Scene::increaseY(){ 155 | _translation_y += 1; 156 | } 157 | 158 | void Scene::decreaseY(){ 159 | _translation_y -= 1; 160 | } 161 | 162 | void Scene::increaseScale(){ 163 | _translation_z += 1; 164 | } 165 | 166 | void Scene::decreaseScale(){ 167 | _translation_z -= 1; 168 | } 169 | 170 | void Scene::ilumination(){ 171 | const GLfloat light_ambient[] = { 1.0f, 1.0f, 1.0f, 1.0f }; 172 | const GLfloat light_diffuse[] = { 1.0f, 1.0f, 1.0f, 1.0f }; 173 | const GLfloat light_specular[] = { 1.0f, 1.0f, 1.0f, 1.0f }; 174 | const GLfloat light_position[] = { 0.0f, 0.0f, -1250.0f, 0.0f }; 175 | const GLfloat mat_ambient[] = { 0.7f, 0.7f, 0.7f, 1.0f }; 176 | const GLfloat mat_diffuse[] = { 0.8f, 0.8f, 0.8f, 1.0f }; 177 | const GLfloat mat_specular[] = { 1.0f, 1.0f, 1.0f, 1.0f }; 178 | const GLfloat high_shininess[] = { 100.0f }; 179 | 180 | glEnable(GL_LIGHT0); 181 | glEnable(GL_NORMALIZE); 182 | glEnable(GL_COLOR_MATERIAL); 183 | glEnable(GL_LIGHTING); 184 | 185 | glLightfv(GL_LIGHT0, GL_AMBIENT, light_ambient); 186 | glLightfv(GL_LIGHT0, GL_DIFFUSE, light_diffuse); 187 | glLightfv(GL_LIGHT0, GL_SPECULAR, light_specular); 188 | glLightfv(GL_LIGHT0, GL_POSITION, light_position); 189 | 190 | glMaterialfv(GL_FRONT, GL_AMBIENT, mat_ambient); 191 | glMaterialfv(GL_FRONT, GL_DIFFUSE, mat_diffuse); 192 | glMaterialfv(GL_FRONT, GL_SPECULAR, mat_specular); 193 | glMaterialfv(GL_FRONT, GL_SHININESS, high_shininess); 194 | } 195 | 196 | void Scene::toogleRK2(){ 197 | _display_rk2 = !_display_rk2; 198 | } 199 | 200 | void Scene::toogleRK4(){ 201 | _display_rk4 = !_display_rk4; 202 | } 203 | 204 | void Scene::toogleDS(){ 205 | _display_vf = !_display_vf; 206 | } 207 | -------------------------------------------------------------------------------- /core/opencl/rk_kernel.cl: -------------------------------------------------------------------------------- 1 | #define MAX_POINTS 10000 2 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 3 | 4 | typedef struct vec{ 5 | double x; 6 | double y; 7 | double z; 8 | } vector; 9 | 10 | typedef vector* vector_field; 11 | 12 | unsigned int opencl_offset(int n_x, int n_y, unsigned int x, unsigned int y, unsigned int z){ 13 | return x + n_x*y + n_y*n_x*z; 14 | } 15 | 16 | vector mult_scalar(vector v, double scalar){ 17 | vector mult; 18 | 19 | mult.x = v.x*scalar; 20 | mult.y = v.y*scalar; 21 | mult.z = v.z*scalar; 22 | 23 | return mult; 24 | } 25 | 26 | vector sum(vector v1, vector v2){ 27 | vector sum; 28 | 29 | sum.x = v1.x + v2.x; 30 | sum.y = v1.y + v2.y; 31 | sum.z = v1.z + v2.z; 32 | 33 | return sum; 34 | } 35 | 36 | vector subtract(vector v1, vector v2){ 37 | vector subtraction; 38 | 39 | subtraction.x = v1.x - v2.x; 40 | subtraction.y = v1.y - v2.y; 41 | subtraction.z = v1.z - v2.z; 42 | 43 | return subtraction; 44 | } 45 | 46 | double module(vector v){ 47 | return sqrt(pow(v.x, 2) + pow(v.y, 2) + pow(v.z, 2)); 48 | } 49 | 50 | void set(vector *v1, vector v2){ 51 | (*v1).x = v2.x; 52 | (*v1).y = v2.y; 53 | (*v1).z = v2.z; 54 | } 55 | 56 | /************************************/ 57 | /* Auxiliary Aproximation Functions */ 58 | /************************************/ 59 | 60 | vector nearest_neighbour(vector v0, __global int* n_x,__global int* n_y,__global int* n_z, __global vector* field){ 61 | int x, y, z; 62 | vector zero; 63 | 64 | zero.x = zero.y = zero.z = 0.0; 65 | 66 | if( (v0.x - floor(v0.x)) > 0.5 && v0.x < (*n_x - 1)) 67 | x = (int) ceil(v0.x); 68 | else 69 | x = (int) floor(v0.x); 70 | 71 | if( (v0.y - floor(v0.y)) > 0.5 && v0.y < (*n_y - 1)) 72 | y = (int) ceil(v0.y); 73 | else 74 | y = (int) floor(v0.y); 75 | 76 | if( (v0.z - floor(v0.z)) > 0.5 && v0.z < (*n_z - 1)) 77 | z = (int) ceil(v0.z); 78 | else 79 | z = (int) floor(v0.z); 80 | 81 | if(x >= *n_x || y >= *n_y || z >= *n_z || x < 0 || y < 0 || z < 0){ 82 | return zero; 83 | }else{ 84 | return field[opencl_offset(*n_x, *n_y, x, y, z)]; 85 | } 86 | } 87 | vector trilinear_interpolation(vector v0, __global int* n_x,__global int* n_y,__global int* n_z, __global vector* field){ 88 | int x1, y1, z1, x0, y0, z0; 89 | double xd, yd, zd; 90 | 91 | vector P1, P2, P3, P4, P5, P6, P7, P8, X1, X2, X3, X4, Y1, Y2, final; 92 | 93 | x1 = ceil(v0.x); 94 | y1 = ceil(v0.y); 95 | z1 = ceil(v0.z); 96 | x0 = floor(v0.x); 97 | y0 = floor(v0.y); 98 | z0 = floor(v0.z); 99 | xd = v0.x - x0; 100 | yd = v0.y - y0; 101 | zd = v0.z - z0; 102 | 103 | if(x1 >= *n_x || y1 >= *n_y || z1 >= *n_z || x0 < 0 || y0 < 0 || z0 < 0){ 104 | return nearest_neighbour(v0, n_x, n_y, n_z, field); 105 | }else{ 106 | set(&P1, field[opencl_offset(*n_x, *n_y, x0, y0, z0)]); 107 | set(&P2, field[opencl_offset(*n_x, *n_y, x1, y0, z0)]); 108 | set(&P3, field[opencl_offset(*n_x, *n_y, x0, y0, z1)]); 109 | set(&P4, field[opencl_offset(*n_x, *n_y, x1, y0, z1)]); 110 | set(&P5, field[opencl_offset(*n_x, *n_y, x0, y1, z0)]); 111 | set(&P6, field[opencl_offset(*n_x, *n_y, x1, y1, z0)]); 112 | set(&P7, field[opencl_offset(*n_x, *n_y, x0, y1, z1)]); 113 | set(&P8, field[opencl_offset(*n_x, *n_y, x1, y1, z1)]); 114 | 115 | set(&X1, sum(P1, mult_scalar( subtract(P2, P1) , xd ) )); 116 | set(&X2, sum(P3, mult_scalar( subtract(P4, P3) , xd ) )); 117 | set(&X3, sum(P5, mult_scalar( subtract(P6, P5) , xd ) )); 118 | set(&X4, sum(P7, mult_scalar( subtract(P8, P7) , xd ) )); 119 | 120 | set(&Y1, sum(X1, mult_scalar( subtract(X3, X1) , yd ) )); 121 | set(&Y2, sum(X2, mult_scalar( subtract(X4, X2) , yd ) )); 122 | 123 | set(&final, sum(Y1, mult_scalar( subtract(Y2, Y1) , zd ) )); 124 | 125 | return final; 126 | } 127 | } 128 | 129 | /***********/ 130 | /* Kernels */ 131 | /***********/ 132 | 133 | __kernel void rk2_kernel(__global vector *v0, __global unsigned int* count_v0,__global double* h,__global int* n_x,__global int* n_y,__global int* n_z, __global vector* field,__global vector *points,__global unsigned int *n_points, __global unsigned int* max_points){ 134 | vector k1, k2, initial, direction; 135 | int i, n_points_aux; 136 | 137 | n_points_aux = 0; 138 | 139 | i = get_global_id(0); 140 | 141 | set( &initial, v0[i] ); 142 | set( &direction, field[opencl_offset(*n_x, *n_y, (unsigned int) initial.x, (unsigned int) initial.y, (unsigned int) initial.z)] ); 143 | 144 | while(module(direction) > 0.0 && (n_points_aux < (*max_points) && n_points_aux < MAX_POINTS)){ 145 | n_points_aux++; 146 | 147 | points[opencl_offset((*count_v0), 0, i, n_points_aux - 1, 0)] = initial; 148 | 149 | set( &k1, mult_scalar( direction, *h ) ); 150 | set( &k2, mult_scalar( trilinear_interpolation(sum(initial, mult_scalar( k1, 0.5 )), n_x, n_y, n_z, field), *h) ); 151 | 152 | set( &initial, sum( initial, k2) ); 153 | set( &direction, trilinear_interpolation(initial, n_x, n_y, n_z, field) ); 154 | } 155 | 156 | n_points[i] = n_points_aux; 157 | } 158 | 159 | __kernel void rk4_kernel(__global vector *v0, __global unsigned int* count_v0,__global double* h,__global int* n_x,__global int* n_y,__global int* n_z, __global vector* field,__global vector *points,__global unsigned int *n_points, __global unsigned int* max_points){ 160 | vector k1, k2, k3, k4, initial, direction; 161 | unsigned int i, n_points_aux; 162 | 163 | n_points_aux = 0; 164 | 165 | i = get_global_id(0); 166 | 167 | set( &initial, v0[i] ); 168 | set( &direction, field[opencl_offset(*n_x, *n_y,(unsigned int) initial.x,(unsigned int) initial.y,(unsigned int) initial.z)] ); 169 | 170 | while(module(direction) > 0.0 && (n_points_aux < (*max_points) && n_points_aux < MAX_POINTS)){ 171 | n_points_aux++; 172 | 173 | points[opencl_offset((*count_v0), 0, i, n_points_aux - 1, 0)] = initial; 174 | 175 | set( &k1, mult_scalar( direction, *h ) ); 176 | set( &k2, mult_scalar( trilinear_interpolation(sum(initial, mult_scalar( k1, 0.5 )), n_x, n_y, n_z, field), *h) ); 177 | set( &k3, mult_scalar( trilinear_interpolation(sum(initial, mult_scalar( k2, 0.5 )), n_x, n_y, n_z, field), *h) ); 178 | set( &k4, mult_scalar( trilinear_interpolation(sum(initial, k3), n_x, n_y, n_z, field), *h) ); 179 | 180 | set( &initial, sum( initial, sum( mult_scalar( k1 , 0.166666667 ), sum( mult_scalar( k2, 0.333333333 ), sum( mult_scalar( k3, 0.333333333 ), mult_scalar( k4, 0.166666667 ) ) ) ) ) ); 181 | set( &direction, trilinear_interpolation(initial, n_x, n_y, n_z, field) ); 182 | } 183 | 184 | n_points[i] = n_points_aux; 185 | } 186 | -------------------------------------------------------------------------------- /core/opencl/opcl.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace runge_kutta; 7 | 8 | RK_OpenCL::RK_OpenCL(){ 9 | _device_used = 0; 10 | _time = 0.0; 11 | } 12 | 13 | void RK_OpenCL::opencl_create_platform(unsigned int num_platforms){ 14 | cl_uint num_platforms_found; 15 | 16 | clGetPlatformIDs( 0, NULL, &num_platforms_found); 17 | if ( clGetPlatformIDs( num_platforms, &_platform, &num_platforms_found ) != CL_SUCCESS ){ 18 | printf("\nERROR: Failed to create platform.\n"); 19 | exit(-1); 20 | } 21 | } 22 | 23 | void RK_OpenCL::opencl_get_devices_id() { 24 | 25 | clGetDeviceIDs(_platform, CL_DEVICE_TYPE_ALL, 0, NULL, &_devices_found); 26 | _devices = (cl_device_id*) malloc(_devices_found*(sizeof(cl_device_id))); 27 | 28 | if(clGetDeviceIDs( _platform, CL_DEVICE_TYPE_ALL, _devices_found, _devices, NULL) != CL_SUCCESS){ 29 | printf("\nERROR: Failed to get devices id's.\n"); 30 | exit(-1); 31 | } 32 | } 33 | 34 | void RK_OpenCL::opencl_create_context(){ 35 | if( (_context = clCreateContext( 0, 1, _devices, NULL, NULL, NULL )) == NULL ){ 36 | printf("\nERROR: Failed to create context.\n"); 37 | exit(-1); 38 | } 39 | } 40 | 41 | void RK_OpenCL::opencl_create_queue(){ 42 | if((_queue = clCreateCommandQueue(_context, _devices[_device_used], CL_QUEUE_PROFILING_ENABLE, NULL)) == NULL ){ 43 | printf("\nERROR: Failed to create queue.\n"); 44 | exit(-1); 45 | } 46 | } 47 | 48 | char* RK_OpenCL::opencl_load_program_from_source(int *size) { 49 | char* program_string; 50 | FILE* prog; 51 | 52 | prog = fopen("core/opencl/rk_kernel.cl", "r"); 53 | fseek(prog, 0, SEEK_END); 54 | *size = ftell(prog); 55 | fseek(prog, 0, SEEK_SET); 56 | 57 | program_string = (char*) malloc((*size+1)*sizeof(char)); 58 | *size = fread(program_string, 1, *size, prog); 59 | fclose(prog); 60 | program_string[*size] = '\0'; 61 | 62 | return program_string; 63 | } 64 | 65 | void RK_OpenCL::opencl_build_program(){ 66 | int err; 67 | char *build_log; 68 | size_t ret_val_size; 69 | 70 | err = clBuildProgram(_program, 0, NULL, NULL, NULL, NULL); 71 | if ( err != CL_SUCCESS ) { 72 | printf("\nERROR: Failed to build program.\n"); 73 | clGetProgramBuildInfo(_program, _devices[_device_used], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); 74 | 75 | build_log = (char*) malloc((ret_val_size+1)*sizeof(char)); 76 | clGetProgramBuildInfo(_program, _devices[_device_used], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL); 77 | build_log[ret_val_size] = '\0'; 78 | printf("BUILD LOG: \n %s\n", build_log); 79 | exit(-1); 80 | } 81 | } 82 | 83 | void RK_OpenCL::opencl_create_program(){ 84 | char* program_source; 85 | int size; 86 | size_t prog_size; 87 | cl_int err; 88 | 89 | program_source = opencl_load_program_from_source(&size); 90 | prog_size = (size_t)size; 91 | _program = clCreateProgramWithSource(_context, 1, (const char**)&program_source, &prog_size, &err); 92 | 93 | if ( err != CL_SUCCESS ){ 94 | printf("\nERROR: Failed to create program.\n"); 95 | exit(-1); 96 | } 97 | opencl_build_program(); 98 | } 99 | 100 | void RK_OpenCL::opencl_create_kernel(char* kernel_name){ 101 | 102 | _kernel = clCreateKernel(_program, kernel_name, NULL); 103 | if ( _kernel == NULL ){ 104 | printf("\nERROR: Failed to create kernel %s.\n",kernel_name); 105 | exit(-1); 106 | } 107 | } 108 | 109 | void RK_OpenCL::opencl_prepare_kernel(vector *v0, unsigned int count_v0, double h, int n_x,int n_y,int n_z, vector_field field, unsigned int max_points){ 110 | 111 | _opencl_v0 = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(vector)*(count_v0), v0, NULL); 112 | _opencl_count_v0 = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &count_v0, NULL); 113 | _opencl_h = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(double), &h, NULL); 114 | _opencl_n_x = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &n_x, NULL); 115 | _opencl_n_y = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &n_y, NULL); 116 | _opencl_n_z = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &n_z, NULL); 117 | _opencl_field = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(vector)*n_x*n_y*n_z, field, NULL); 118 | _opencl_points = clCreateBuffer(_context, CL_MEM_WRITE_ONLY , sizeof(vector)*count_v0*max_points, NULL, NULL); 119 | _opencl_n_points = clCreateBuffer(_context, CL_MEM_WRITE_ONLY , sizeof(unsigned int)*(count_v0), NULL, NULL); 120 | _opencl_max_points = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(unsigned int), &max_points, NULL); 121 | 122 | clSetKernelArg(_kernel, 0, sizeof(cl_mem), (void *)&_opencl_v0); 123 | clSetKernelArg(_kernel, 1, sizeof(cl_mem), (void *)&_opencl_count_v0); 124 | clSetKernelArg(_kernel, 2, sizeof(cl_mem), (void *)&_opencl_h); 125 | clSetKernelArg(_kernel, 3, sizeof(cl_mem), (void *)&_opencl_n_x); 126 | clSetKernelArg(_kernel, 4, sizeof(cl_mem), (void *)&_opencl_n_y); 127 | clSetKernelArg(_kernel, 5, sizeof(cl_mem), (void *)&_opencl_n_z); 128 | clSetKernelArg(_kernel, 6, sizeof(cl_mem), (void *)&_opencl_field); 129 | clSetKernelArg(_kernel, 7, sizeof(cl_mem), (void *)&_opencl_points); 130 | clSetKernelArg(_kernel, 8, sizeof(cl_mem), (void *)&_opencl_n_points); 131 | clSetKernelArg(_kernel, 9, sizeof(cl_mem), (void *)&_opencl_max_points); 132 | 133 | clFinish(_queue); 134 | } 135 | 136 | void RK_OpenCL::opencl_run_kernel(unsigned int count_v0, unsigned int max_points, Fiber **fibers){ 137 | size_t work_dim[1]; 138 | unsigned int i, j, *n_points; 139 | vector *points; 140 | 141 | n_points = (unsigned int*) malloc(count_v0*sizeof(unsigned int)); 142 | points = (vector*) malloc(count_v0*max_points*sizeof(vector)); 143 | 144 | work_dim[0] = count_v0; 145 | clEnqueueNDRangeKernel(_queue, _kernel, 1, NULL, work_dim, NULL, 0, NULL, &_event); 146 | opencl_time(&_event); 147 | clReleaseEvent(_event); 148 | clFinish(_queue); 149 | 150 | if( clEnqueueReadBuffer(_queue, _opencl_n_points, CL_TRUE, 0, sizeof(unsigned int)*count_v0, n_points, 0, NULL, &_event) == CL_INVALID_VALUE ){ 151 | printf("\nERROR: Failed to read buffer \"n_points\".\n"); 152 | exit(-1); 153 | } 154 | opencl_time(&_event); 155 | clReleaseEvent(_event); 156 | 157 | if( clEnqueueReadBuffer(_queue, _opencl_points, CL_TRUE, 0, sizeof(vector)*count_v0*max_points, points, 0, NULL, &_event) == CL_INVALID_VALUE ){ 158 | printf("\nERROR: Failed to read buffer \"points\".\n"); 159 | exit(-1); 160 | } 161 | opencl_time(&_event); 162 | clReleaseEvent(_event); 163 | 164 | *fibers = (runge_kutta::Fiber *) malloc(count_v0*sizeof(runge_kutta::Fiber)); 165 | for(i = 0; i < count_v0; i++){ 166 | (*fibers)[i] = runge_kutta::Fiber(n_points[i]); 167 | for(j = 0; j < n_points[i]; j++){ 168 | (*fibers)[i].setPoint(j, points[(i+count_v0*j)]); 169 | } 170 | } 171 | 172 | free(n_points); 173 | free(points); 174 | } 175 | 176 | void RK_OpenCL::opencl_time(cl_event* timer){ 177 | cl_ulong _start, _finish; 178 | 179 | clWaitForEvents(1, &_event); 180 | 181 | if (clGetEventProfilingInfo(_event, CL_PROFILING_COMMAND_START, (size_t)sizeof(cl_ulong), &_start, NULL) != CL_SUCCESS) 182 | printf("\nERROR: Failed to get profiling info.\n"); 183 | 184 | if (clGetEventProfilingInfo(_event, CL_PROFILING_COMMAND_END, (size_t)sizeof(cl_ulong), &_finish, NULL) != CL_SUCCESS) 185 | printf("\nERROR: Failed to get profiling info.\n"); 186 | 187 | _time += (double)(_finish-_start); 188 | } 189 | 190 | void RK_OpenCL::opencl_init(char* kernel_name, vector *v0, int count_v0, double h, int n_x, int n_y, int n_z, vector_field field, Fiber **fibers){ 191 | unsigned int max_points; 192 | 193 | max_points = 10000; 194 | 195 | opencl_create_platform(2); 196 | opencl_get_devices_id(); 197 | opencl_create_context(); 198 | opencl_create_queue(); 199 | opencl_create_program(); 200 | opencl_create_kernel(kernel_name); 201 | opencl_prepare_kernel(v0, count_v0, h, n_x, n_y, n_z, field, max_points); 202 | opencl_run_kernel(count_v0, max_points, fibers); 203 | } 204 | 205 | void RK_OpenCL::rk2_caller(vector *v0, int count_v0, double h, int n_x, int n_y, int n_z, vector_field field, Fiber **fibers){ 206 | opencl_init((char*)"rk2_kernel",v0, count_v0, h, n_x, n_y, n_z, field, fibers); 207 | printf("CPU/GPU time for RK2: %fs\n", _time*1.0e-9); 208 | } 209 | 210 | void RK_OpenCL::rk4_caller(vector *v0, int count_v0, double h, int n_x, int n_y, int n_z, vector_field field, Fiber **fibers){ 211 | opencl_init((char*)"rk4_kernel",v0, count_v0, h, n_x, n_y, n_z, field, fibers); 212 | printf("CPU/GPU time for RK4: %fs\n", _time*1.0e-9); 213 | } 214 | -------------------------------------------------------------------------------- /core/c/rk_kernel.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace runge_kutta; 11 | 12 | vector sum(vector v1, vector v2){ 13 | vector sum; 14 | 15 | sum.x = v1.x + v2.x; 16 | sum.y = v1.y + v2.y; 17 | sum.z = v1.z + v2.z; 18 | 19 | return sum; 20 | } 21 | 22 | vector subtract(vector v1, vector v2){ 23 | vector subtraction; 24 | 25 | subtraction.x = v1.x - v2.x; 26 | subtraction.y = v1.y - v2.y; 27 | subtraction.z = v1.z - v2.z; 28 | 29 | return subtraction; 30 | } 31 | 32 | vector mult_scalar(vector v, double scalar){ 33 | vector mult; 34 | 35 | mult.x = v.x*scalar; 36 | mult.y = v.y*scalar; 37 | mult.z = v.z*scalar; 38 | 39 | return mult; 40 | } 41 | 42 | void set(vector *x, vector y){ 43 | (*x).x = y.x; 44 | (*x).y = y.y; 45 | (*x).z = y.z; 46 | } 47 | 48 | double module(vector v){ 49 | return sqrt(pow(v.x, 2) + pow(v.y, 2) + pow(v.z, 2)); 50 | } 51 | 52 | double distance(vector x, vector y){ 53 | return module(sum(x, mult_scalar(y, -1.0))); 54 | } 55 | 56 | vector nearest_neighbour(vector v0, int n_x, int n_y, int n_z, vector_field field){ 57 | int x, y, z; 58 | vector zero; 59 | 60 | zero.x = zero.y = zero.z = 0.0; 61 | 62 | if( (v0.x - floor(v0.x)) > 0.5 && v0.x < (n_x - 1)) 63 | x = (int) ceil(v0.x); 64 | else 65 | x = (int) floor(v0.x); 66 | 67 | if( (v0.y - floor(v0.y)) > 0.5 && v0.y < (n_y - 1)) 68 | y = (int) ceil(v0.y); 69 | else 70 | y = (int) floor(v0.y); 71 | 72 | if( (v0.z - floor(v0.z)) > 0.5 && v0.z < (n_z - 1)) 73 | z = (int) ceil(v0.z); 74 | else 75 | z = (int) floor(v0.z); 76 | 77 | if(x >= n_x || y >= n_y || z >= n_z || x < 0 || y < 0 || z < 0) 78 | return zero; 79 | else 80 | return field[DataSet::offset(n_x, n_y, x, y, z)]; 81 | } 82 | 83 | vector trilinear_interpolation(vector v0, int n_x, int n_y, int n_z, vector_field field){ 84 | int x1, y1, z1, x0, y0, z0; 85 | double xd, yd, zd; 86 | 87 | vector P1, P2, P3, P4, P5, P6, P7, P8, X1, X2, X3, X4, Y1, Y2, final; 88 | 89 | x1 = ceil(v0.x); 90 | y1 = ceil(v0.y); 91 | z1 = ceil(v0.z); 92 | x0 = floor(v0.x); 93 | y0 = floor(v0.y); 94 | z0 = floor(v0.z); 95 | xd = v0.x - x0; 96 | yd = v0.y - y0; 97 | zd = v0.z - z0; 98 | 99 | if(x1 >= n_x || y1 >= n_y || z1 >= n_z || x0 < 0 || y0 < 0 || z0 < 0){ 100 | return nearest_neighbour(v0, n_x, n_y, n_z, field); 101 | }else{ 102 | set(&P1, field[DataSet::offset(n_x, n_y, x0, y0, z0)]); 103 | set(&P2, field[DataSet::offset(n_x, n_y, x1, y0, z0)]); 104 | set(&P3, field[DataSet::offset(n_x, n_y, x0, y0, z1)]); 105 | set(&P4, field[DataSet::offset(n_x, n_y, x1, y0, z1)]); 106 | set(&P5, field[DataSet::offset(n_x, n_y, x0, y1, z0)]); 107 | set(&P6, field[DataSet::offset(n_x, n_y, x1, y1, z0)]); 108 | set(&P7, field[DataSet::offset(n_x, n_y, x0, y1, z1)]); 109 | set(&P8, field[DataSet::offset(n_x, n_y, x1, y1, z1)]); 110 | 111 | set(&X1, sum(P1, mult_scalar( subtract(P2, P1) , xd ) )); 112 | set(&X2, sum(P3, mult_scalar( subtract(P4, P3) , xd ) )); 113 | set(&X3, sum(P5, mult_scalar( subtract(P6, P5) , xd ) )); 114 | set(&X4, sum(P7, mult_scalar( subtract(P8, P7) , xd ) )); 115 | 116 | set(&Y1, sum(X1, mult_scalar( subtract(X3, X1) , yd ) )); 117 | set(&Y2, sum(X2, mult_scalar( subtract(X4, X2) , yd ) )); 118 | 119 | set(&final, sum(Y1, mult_scalar( subtract(Y2, Y1) , zd ) )); 120 | 121 | return final; 122 | } 123 | } 124 | 125 | void *rk2_kernel(void *args){ 126 | int j, n_points_aux; 127 | vector k1, k2, initial, direction; 128 | vector *points_aux; 129 | kernel_args arguments = *( (kernel_args *) args ); 130 | 131 | points_aux = NULL; 132 | n_points_aux = 0; 133 | 134 | set( &initial, arguments.v0[arguments.id] ); 135 | set( &direction, arguments.field[DataSet::offset(arguments.n_x, arguments.n_y, initial.x, initial.y, initial.z)] ); 136 | 137 | while(module(direction) > 0 && n_points_aux < MAX_POINTS){ 138 | n_points_aux++; 139 | points_aux = (vector *) realloc(points_aux, n_points_aux*sizeof(vector)); 140 | 141 | set( &(points_aux[n_points_aux - 1]), initial); 142 | 143 | set( &k1, mult_scalar( direction, arguments.h ) ); 144 | set( &k2, mult_scalar( trilinear_interpolation(sum(initial, mult_scalar( k1, 0.5 )), arguments.n_x, arguments.n_y, arguments.n_z, arguments.field), arguments.h) ); 145 | 146 | set( &initial, sum( initial, k2) ); 147 | set( &direction, trilinear_interpolation(initial, arguments.n_x, arguments.n_y, arguments.n_z, arguments.field) ); 148 | } 149 | 150 | (arguments.fibers)[arguments.id] = Fiber(n_points_aux); 151 | for(j = 0; j < n_points_aux; j++) 152 | (arguments.fibers)[arguments.id].setPoint(j, points_aux[j]); 153 | 154 | return NULL; 155 | } 156 | 157 | void rk2_caller(vector *v0, int count_v0, double h, int n_x, int n_y, int n_z, vector_field field, Fiber **fibers){ 158 | int i; 159 | clock_t start, finish; 160 | kernel_args *arguments; 161 | pthread_t *threads; 162 | 163 | start = clock(); 164 | 165 | *fibers = (Fiber *) malloc(count_v0*sizeof(Fiber)); 166 | 167 | threads = (pthread_t *) malloc(count_v0*sizeof(pthread_t)); 168 | arguments = (kernel_args *) malloc(count_v0*sizeof(kernel_args)); 169 | for(i = 0; i < count_v0; i++){ 170 | arguments[i].id = i; 171 | arguments[i].v0 = v0; 172 | arguments[i].count_v0 = count_v0; 173 | arguments[i].h = h; 174 | arguments[i].n_x = n_x; 175 | arguments[i].n_y = n_y; 176 | arguments[i].n_z = n_z; 177 | arguments[i].field = field; 178 | arguments[i].fibers = *fibers; 179 | 180 | pthread_create(&(threads[i]), NULL, rk2_kernel, (void *) ( &(arguments[i]) ) ); 181 | } 182 | 183 | for(i = 0; i < count_v0; i++) 184 | pthread_join(threads[i], NULL); 185 | 186 | 187 | finish = clock(); 188 | 189 | printf("CPU time for RK2: %fs\n", ((double) (finish - start))/((double) CLOCKS_PER_SEC)); 190 | } 191 | 192 | void *rk4_kernel(void *args){ 193 | int j, n_points_aux; 194 | vector k1, k2, k3, k4, initial, direction; 195 | vector *points_aux; 196 | kernel_args arguments = *( (kernel_args *) args ); 197 | 198 | points_aux = NULL; 199 | n_points_aux = 0; 200 | 201 | set( &initial, arguments.v0[arguments.id] ); 202 | set( &direction, arguments.field[DataSet::offset(arguments.n_x, arguments.n_y, initial.x, initial.y, initial.z)] ); 203 | 204 | while(module(direction) > 0 && n_points_aux < MAX_POINTS){ 205 | n_points_aux++; 206 | points_aux = (vector *) realloc(points_aux, n_points_aux*sizeof(vector)); 207 | 208 | set( &(points_aux[n_points_aux - 1]), initial); 209 | 210 | set( &k1, mult_scalar( direction, arguments.h ) ); 211 | set( &k2, mult_scalar( trilinear_interpolation(sum(initial, mult_scalar( k1, 0.5 )), arguments.n_x, arguments.n_y, arguments.n_z, arguments.field), arguments.h) ); 212 | set( &k3, mult_scalar( trilinear_interpolation(sum(initial, mult_scalar( k2, 0.5 )), arguments.n_x, arguments.n_y, arguments.n_z, arguments.field), arguments.h) ); 213 | set( &k4, mult_scalar( trilinear_interpolation(sum(initial, k3), arguments.n_x, arguments.n_y, arguments.n_z, arguments.field), arguments.h) ); 214 | 215 | set( &initial, sum( initial, sum( mult_scalar( k1 , 0.166666667 ), sum( mult_scalar( k2, 0.333333333 ), sum( mult_scalar( k3, 0.333333333 ), mult_scalar( k4, 0.166666667 ) ) ) ) ) ); 216 | set( &direction, trilinear_interpolation(initial, arguments.n_x, arguments.n_y, arguments.n_z, arguments.field) ); 217 | } 218 | 219 | (arguments.fibers)[arguments.id] = Fiber(n_points_aux); 220 | for(j = 0; j < n_points_aux; j++) 221 | (arguments.fibers)[arguments.id].setPoint(j, points_aux[j]); 222 | 223 | return NULL; 224 | } 225 | 226 | void rk4_caller(vector *v0, int count_v0, double h, int n_x, int n_y, int n_z, vector_field field, Fiber **fibers){ 227 | int i; 228 | clock_t start, finish; 229 | kernel_args *arguments; 230 | pthread_t *threads; 231 | 232 | start = clock(); 233 | 234 | *fibers = (Fiber *) malloc(count_v0*sizeof(Fiber)); 235 | 236 | threads = (pthread_t *) malloc(count_v0*sizeof(pthread_t)); 237 | arguments = (kernel_args *) malloc(count_v0*sizeof(kernel_args)); 238 | for(i = 0; i < count_v0; i++){ 239 | arguments[i].id = i; 240 | arguments[i].v0 = v0; 241 | arguments[i].count_v0 = count_v0; 242 | arguments[i].h = h; 243 | arguments[i].n_x = n_x; 244 | arguments[i].n_y = n_y; 245 | arguments[i].n_z = n_z; 246 | arguments[i].field = field; 247 | arguments[i].fibers = *fibers; 248 | 249 | pthread_create(&(threads[i]), NULL, rk4_kernel, (void *) ( &(arguments[i]) ) ); 250 | } 251 | 252 | for(i = 0; i < count_v0; i++) 253 | pthread_join(threads[i], NULL); 254 | 255 | finish = clock(); 256 | 257 | printf("CPU time for RK4: %fs\n", ((double) (finish - start))/((double) CLOCKS_PER_SEC)); 258 | } -------------------------------------------------------------------------------- /core/cuda/rk_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "cuda.h" 5 | #include "cuda_runtime.h" 6 | #include 7 | #include 8 | #include 9 | 10 | /******************************/ 11 | /* Auxiliary Vector Functions */ 12 | /******************************/ 13 | 14 | /*FIXME: there must be libraries inside CUDA to work with vectors*/ 15 | 16 | __device__ int cuda_offset(int n_x, int n_y, int x, int y, int z){ 17 | return x + n_x*y + n_y*n_x*z; 18 | } 19 | 20 | __device__ vector sum(vector v1, vector v2){ 21 | vector sum; 22 | 23 | sum.x = v1.x + v2.x; 24 | sum.y = v1.y + v2.y; 25 | sum.z = v1.z + v2.z; 26 | 27 | return sum; 28 | } 29 | 30 | __device__ vector subtract(vector v1, vector v2){ 31 | vector subtraction; 32 | 33 | subtraction.x = v1.x - v2.x; 34 | subtraction.y = v1.y - v2.y; 35 | subtraction.z = v1.z - v2.z; 36 | 37 | return subtraction; 38 | } 39 | 40 | 41 | __device__ vector mult_scalar(vector v, double scalar){ 42 | vector mult; 43 | 44 | mult.x = v.x*scalar; 45 | mult.y = v.y*scalar; 46 | mult.z = v.z*scalar; 47 | 48 | return mult; 49 | } 50 | 51 | __device__ void set(vector *x, vector y){ 52 | (*x).x = y.x; 53 | (*x).y = y.y; 54 | (*x).z = y.z; 55 | } 56 | 57 | __device__ double module(vector v){ 58 | return sqrt(pow(v.x, 2) + pow(v.y, 2) + pow(v.z, 2)); 59 | } 60 | 61 | __device__ double distance(vector x, vector y){ 62 | return module(sum(x, mult_scalar(y, -1.0))); 63 | } 64 | 65 | /************************************/ 66 | /* Auxiliary Aproximation Functions */ 67 | /************************************/ 68 | 69 | __device__ vector nearest_neighbour(vector v0, int n_x, int n_y, int n_z, vector_field field){ 70 | int x, y, z; 71 | vector zero; 72 | 73 | zero.x = zero.y = zero.z = 0.0; 74 | 75 | if( (v0.x - floor(v0.x)) > 0.5 && v0.x < (n_x - 1)) 76 | x = (int) ceil(v0.x); 77 | else 78 | x = (int) floor(v0.x); 79 | 80 | if( (v0.y - floor(v0.y)) > 0.5 && v0.y < (n_y - 1)) 81 | y = (int) ceil(v0.y); 82 | else 83 | y = (int) floor(v0.y); 84 | 85 | if( (v0.z - floor(v0.z)) > 0.5 && v0.z < (n_z - 1)) 86 | z = (int) ceil(v0.z); 87 | else 88 | z = (int) floor(v0.z); 89 | 90 | if(x >= n_x || y >= n_y || z >= n_z || x < 0 || y < 0 || z < 0){ 91 | return zero; 92 | }else{ 93 | return field[cuda_offset(n_x, n_y, x, y, z)]; 94 | } 95 | } 96 | 97 | __device__ vector trilinear_interpolation(vector v0, int n_x, int n_y, int n_z, vector_field field){ 98 | int x1, y1, z1, x0, y0, z0; 99 | double xd, yd, zd; 100 | 101 | vector P1, P2, P3, P4, P5, P6, P7, P8, X1, X2, X3, X4, Y1, Y2, final; 102 | 103 | x1 = ceil(v0.x); 104 | y1 = ceil(v0.y); 105 | z1 = ceil(v0.z); 106 | x0 = floor(v0.x); 107 | y0 = floor(v0.y); 108 | z0 = floor(v0.z); 109 | xd = v0.x - x0; 110 | yd = v0.y - y0; 111 | zd = v0.z - z0; 112 | 113 | if(x1 >= n_x || y1 >= n_y || z1 >= n_z || x0 < 0 || y0 < 0 || z0 < 0){ 114 | return nearest_neighbour(v0, n_x, n_y, n_z, field); 115 | }else{ 116 | set(&P1, field[cuda_offset(n_x, n_y, x0, y0, z0)]); 117 | set(&P2, field[cuda_offset(n_x, n_y, x1, y0, z0)]); 118 | set(&P3, field[cuda_offset(n_x, n_y, x0, y0, z1)]); 119 | set(&P4, field[cuda_offset(n_x, n_y, x1, y0, z1)]); 120 | set(&P5, field[cuda_offset(n_x, n_y, x0, y1, z0)]); 121 | set(&P6, field[cuda_offset(n_x, n_y, x1, y1, z0)]); 122 | set(&P7, field[cuda_offset(n_x, n_y, x0, y1, z1)]); 123 | set(&P8, field[cuda_offset(n_x, n_y, x1, y1, z1)]); 124 | 125 | set(&X1, sum(P1, mult_scalar( subtract(P2, P1) , xd ) )); 126 | set(&X2, sum(P3, mult_scalar( subtract(P4, P3) , xd ) )); 127 | set(&X3, sum(P5, mult_scalar( subtract(P6, P5) , xd ) )); 128 | set(&X4, sum(P7, mult_scalar( subtract(P8, P7) , xd ) )); 129 | 130 | set(&Y1, sum(X1, mult_scalar( subtract(X3, X1) , yd ) )); 131 | set(&Y2, sum(X2, mult_scalar( subtract(X4, X2) , yd ) )); 132 | 133 | set(&final, sum(Y1, mult_scalar( subtract(Y2, Y1) , zd ) )); 134 | 135 | return final; 136 | } 137 | } 138 | 139 | /***********/ 140 | /* Kernels */ 141 | /***********/ 142 | 143 | __global__ void rk2_kernel(vector *v0, int count_v0, double h, int n_x, int n_y, int n_z, vector_field field, vector *points, int *n_points, int max_points){ 144 | /*TODO: moving the field to the shared memory should increase performance*/ 145 | vector k1, k2, initial, direction; 146 | int i, n_points_aux; 147 | 148 | n_points_aux = 0; 149 | 150 | i = threadIdx.x; 151 | 152 | set( &initial, v0[i] ); 153 | set( &direction, field[cuda_offset(n_x, n_y, initial.x, initial.y, initial.z)] ); 154 | 155 | while(module(direction) > 0.0 && (n_points_aux < max_points && n_points_aux < MAX_POINTS)){ 156 | n_points_aux++; 157 | 158 | set( &(points[cuda_offset(count_v0, 0, i, n_points_aux - 1, 0)]), initial ); 159 | 160 | set( &k1, mult_scalar( direction, h ) ); 161 | set( &k2, mult_scalar( trilinear_interpolation(sum(initial, mult_scalar( k1, 0.5 )), n_x, n_y, n_z, field), h) ); 162 | 163 | set( &initial, sum( initial, k2) ); 164 | set( &direction, trilinear_interpolation(initial, n_x, n_y, n_z, field) ); 165 | } 166 | 167 | n_points[i] = n_points_aux; 168 | n_points_aux = 0; 169 | } 170 | 171 | __global__ void rk4_kernel(vector *v0, int count_v0, double h, int n_x, int n_y, int n_z, vector_field field, vector *points, int *n_points, int max_points){ 172 | /*TODO: moving the field to the shared memory should increase performance*/ 173 | vector k1, k2, k3, k4, initial, direction; 174 | int i, n_points_aux; 175 | 176 | n_points_aux = 0; 177 | 178 | i = threadIdx.x; 179 | 180 | set( &initial, v0[i] ); 181 | set( &direction, field[cuda_offset(n_x, n_y, initial.x, initial.y, initial.z)] ); 182 | 183 | while(module(direction) > 0.0 && (n_points_aux < max_points && n_points_aux < MAX_POINTS)){ 184 | n_points_aux++; 185 | 186 | set( &(points[cuda_offset(count_v0, 0, i, n_points_aux - 1, 0)]), initial ); 187 | 188 | set( &k1, mult_scalar( direction, h ) ); 189 | set( &k2, mult_scalar( trilinear_interpolation(sum(initial, mult_scalar( k1, 0.5 )), n_x, n_y, n_z, field), h) ); 190 | set( &k3, mult_scalar( trilinear_interpolation(sum(initial, mult_scalar( k2, 0.5 )), n_x, n_y, n_z, field), h) ); 191 | set( &k4, mult_scalar( trilinear_interpolation(sum(initial, k3), n_x, n_y, n_z, field), h) ); 192 | 193 | set( &initial, sum( initial, sum( mult_scalar( k1 , 0.166666667 ), sum( mult_scalar( k2, 0.333333333 ), sum( mult_scalar( k3, 0.333333333 ), mult_scalar( k4, 0.166666667 ) ) ) ) ) ); 194 | set( &direction, trilinear_interpolation(initial, n_x, n_y, n_z, field) ); 195 | } 196 | 197 | n_points[i] = n_points_aux; 198 | } 199 | 200 | /***********/ 201 | /* Callers */ 202 | /***********/ 203 | 204 | void rk2_caller(vector *v0, int count_v0, double h, int n_x, int n_y, int n_z, vector_field field, runge_kutta::Fiber **fibers){ 205 | vector *d_v0; 206 | vector_field d_field; 207 | vector *d_points, *points_aux; 208 | int *d_n_points, *n_points_aux; 209 | int i, j, max_points; 210 | cudaEvent_t start, finish; 211 | float time; 212 | size_t available, total; 213 | 214 | cudaEventCreate(&start); 215 | cudaEventCreate(&finish); 216 | 217 | cudaEventRecord(start, 0); 218 | 219 | if(cudaMalloc(&d_v0, count_v0*sizeof(vector)) == cudaErrorMemoryAllocation){ 220 | printf("\nCould not allocate %fMB for the initial points\n", (count_v0*sizeof(vector))/1024.0/1024.0); 221 | exit(-1); 222 | } 223 | if(cudaMalloc(&d_field, n_x*n_y*n_z*sizeof(vector)) == cudaErrorMemoryAllocation){ 224 | printf("\nCould not allocate %fMB for the vector field\n", (n_x*n_y*n_z*sizeof(vector))/1024.0/1024.0); 225 | exit(-1); 226 | } 227 | if(cudaMalloc(&d_n_points, count_v0*sizeof(int)) == cudaErrorMemoryAllocation){ 228 | printf("\nCould not allocate %fMB for the points count vector\n", (count_v0*sizeof(vector))/1024.0/1024.0); 229 | exit(-1); 230 | } 231 | cudaMemGetInfo(&available, &total); 232 | max_points = ((available*0.9)/(sizeof(vector)*count_v0)); 233 | if(cudaMalloc(&d_points, count_v0*max_points*sizeof(vector)) == cudaErrorMemoryAllocation){ 234 | printf("\nCould not allocate %fMB for the fibers\n", (count_v0*max_points*sizeof(vector))/1024.0/1024.0); 235 | exit(-1); 236 | } 237 | 238 | cudaMemcpy(d_v0, v0, count_v0*sizeof(vector), cudaMemcpyHostToDevice); 239 | cudaMemcpy(d_field, field, n_x*n_y*n_z*sizeof(vector), cudaMemcpyHostToDevice); 240 | 241 | /*TODO: adjust threads per block to maximize performance*/ 242 | rk2_kernel<<<1,count_v0>>>(d_v0, count_v0, h, n_x, n_y, n_z, d_field, d_points, d_n_points, max_points); 243 | cudaDeviceSynchronize(); 244 | 245 | n_points_aux = (int *) malloc(count_v0*sizeof(int)); 246 | points_aux = (vector *) malloc(count_v0*max_points*sizeof(vector));; 247 | 248 | cudaMemcpy(n_points_aux, d_n_points, count_v0*sizeof(int), cudaMemcpyDeviceToHost); 249 | cudaMemcpy(points_aux, d_points, count_v0*max_points*sizeof(vector), cudaMemcpyDeviceToHost); 250 | 251 | cudaEventRecord(finish, 0); 252 | cudaEventSynchronize(finish); 253 | 254 | cudaEventElapsedTime(&time, start, finish); 255 | cudaEventDestroy(start); 256 | cudaEventDestroy(finish); 257 | 258 | printf("GPU time for RK2: %fs\n", time/1000.0); 259 | 260 | *fibers = (runge_kutta::Fiber *) malloc(count_v0*sizeof(runge_kutta::Fiber)); 261 | for(i = 0; i < count_v0; i++){ 262 | (*fibers)[i] = runge_kutta::Fiber(n_points_aux[i]); 263 | for(j = 0; j < n_points_aux[i]; j++){ 264 | (*fibers)[i].setPoint(j, points_aux[runge_kutta::DataSet::offset(count_v0, 0, i, j, 0)]); 265 | } 266 | } 267 | 268 | cudaFree(d_v0); 269 | cudaFree(d_field); 270 | cudaFree(d_points); 271 | cudaFree(d_n_points); 272 | } 273 | 274 | void rk4_caller(vector *v0, int count_v0, double h, int n_x, int n_y, int n_z, vector_field field, runge_kutta::Fiber **fibers){ 275 | vector *d_v0; 276 | vector_field d_field; 277 | vector *d_points, *points_aux; 278 | int *d_n_points, *n_points_aux; 279 | int i, j, max_points; 280 | cudaEvent_t start, finish; 281 | float time; 282 | size_t available, total; 283 | 284 | cudaEventCreate(&start); 285 | cudaEventCreate(&finish); 286 | 287 | cudaEventRecord(start, 0); 288 | 289 | if(cudaMalloc(&d_v0, count_v0*sizeof(vector)) == cudaErrorMemoryAllocation){ 290 | printf("\nCould not allocate %fMB for the initial points\n", (count_v0*sizeof(vector))/1024.0/1024.0); 291 | exit(-1); 292 | } 293 | if(cudaMalloc(&d_field, n_x*n_y*n_z*sizeof(vector)) == cudaErrorMemoryAllocation){ 294 | printf("\nCould not allocate %fMB for the vector field\n", (n_x*n_y*n_z*sizeof(vector))/1024.0/1024.0); 295 | exit(-1); 296 | } 297 | if(cudaMalloc(&d_n_points, count_v0*sizeof(int)) == cudaErrorMemoryAllocation){ 298 | printf("\nCould not allocate %fMB for the points count vector\n", (count_v0*sizeof(vector))/1024.0/1024.0); 299 | exit(-1); 300 | } 301 | cudaMemGetInfo(&available, &total); 302 | max_points = ((available*0.9)/(sizeof(vector)*count_v0)); 303 | if(cudaMalloc(&d_points, count_v0*max_points*sizeof(vector)) == cudaErrorMemoryAllocation){ 304 | printf("\nCould not allocate %fMB for the fibers\n", (count_v0*max_points*sizeof(vector))/1024.0/1024.0); 305 | exit(-1); 306 | } 307 | 308 | cudaMemcpy(d_v0, v0, count_v0*sizeof(vector), cudaMemcpyHostToDevice); 309 | cudaMemcpy(d_field, field, n_x*n_y*n_z*sizeof(vector), cudaMemcpyHostToDevice); 310 | 311 | 312 | /*TODO: adjust threads per block to maximize performance*/ 313 | rk4_kernel<<<1,count_v0>>>(d_v0, count_v0, h, n_x, n_y, n_z, d_field, d_points, d_n_points, max_points); 314 | cudaDeviceSynchronize(); 315 | 316 | n_points_aux = (int *) malloc(count_v0*sizeof(int)); 317 | points_aux = (vector *) malloc(count_v0*max_points*sizeof(vector)); 318 | 319 | cudaMemcpy(n_points_aux, d_n_points, count_v0*sizeof(int), cudaMemcpyDeviceToHost); 320 | cudaMemcpy(points_aux, d_points, count_v0*max_points*sizeof(vector), cudaMemcpyDeviceToHost); 321 | 322 | cudaEventRecord(finish, 0); 323 | cudaEventSynchronize(finish); 324 | 325 | cudaEventElapsedTime(&time, start, finish); 326 | cudaEventDestroy(start); 327 | cudaEventDestroy(finish); 328 | 329 | printf("GPU time for RK4: %fs\n", time/1000.0); 330 | 331 | *fibers = (runge_kutta::Fiber *) malloc(count_v0*sizeof(runge_kutta::Fiber)); 332 | for(i = 0; i < count_v0; i++){ 333 | (*fibers)[i] = runge_kutta::Fiber(n_points_aux[i]); 334 | for(j = 0; j < n_points_aux[i]; j++){ 335 | (*fibers)[i].setPoint(j, points_aux[runge_kutta::DataSet::offset(count_v0, 0, i, j, 0)]); 336 | } 337 | } 338 | 339 | cudaFree(d_v0); 340 | cudaFree(d_field); 341 | cudaFree(d_points); 342 | cudaFree(d_n_points); 343 | } --------------------------------------------------------------------------------