├── .gitignore ├── src ├── Resources │ ├── fem.png │ └── fem2.png ├── test │ ├── test_data │ │ ├── simple.mat │ │ ├── simpleb.mat │ │ ├── tetVolA.mat │ │ ├── tetVolb.mat │ │ ├── simpleAns.mat │ │ ├── simpleTri.mat │ │ ├── tetVolAns.mat │ │ ├── simpleTriAns.mat │ │ └── simpleTrib.mat │ ├── sanity3D.cc │ ├── tetVol.cc │ ├── sanity2D.cc │ └── CMakeLists.txt ├── core │ ├── include │ │ ├── cuda_resources.h │ │ ├── cycles │ │ │ ├── vcycle.h │ │ │ ├── fcycle.h │ │ │ ├── wcycle.h │ │ │ ├── cgcycle.h │ │ │ └── cycle.h │ │ ├── amg_signal.h │ │ ├── Logger.h │ │ ├── Helper.h │ │ ├── allocator.h │ │ ├── FEM │ │ │ ├── FEM2D.h │ │ │ └── FEM3D.h │ │ ├── error.h │ │ ├── util.h │ │ ├── smoothedMG │ │ │ ├── aggregators │ │ │ │ ├── Timer.h │ │ │ │ ├── aggregator.h │ │ │ │ └── mis.h │ │ │ └── smoothedMG_amg_level.h │ │ ├── types.h │ │ ├── amg.h │ │ ├── Color.h │ │ ├── my_timer.h │ │ ├── cutil.h │ │ ├── amg_level.h │ │ ├── tetmesh.h │ │ └── smoothers │ │ │ ├── gauss_seidel.h │ │ │ └── smoother.h │ ├── cuda │ │ ├── cuda_resources.cu │ │ ├── aggregator.cu │ │ ├── smoother.cu │ │ ├── allocator.cu │ │ ├── amg_signal.cu │ │ ├── cgcycle.cu │ │ ├── amg_level.cu │ │ ├── FEM2D.cu │ │ └── randomizedMIS_GPU.cu │ ├── aggmis │ │ ├── include │ │ │ ├── AggMIS_MIS_CPU.h │ │ │ ├── AggMIS_IOHelpers.h │ │ │ ├── AggMIS_MIS_GPU.h │ │ │ ├── Timer.h │ │ │ ├── AggMIS_Metrics.h │ │ │ ├── AggMIS_FileIO.h │ │ │ ├── AggMIS_Aggregation_GPU.h │ │ │ ├── AggMIS_Aggregation_CPU.h │ │ │ ├── AggMIS_Types.h │ │ │ ├── AggMIS_GraphHelpers.h │ │ │ └── AggMIS_MergeSplitConditioner_CPU.h │ │ └── cuda │ │ │ ├── AggMIS_IOHelpers.cu │ │ │ ├── TriMesh_connectivity.cu │ │ │ ├── AggMIS_MIS_CPU.cu │ │ │ └── AggMIS_Aggregation_GPU.cu │ └── CMakeLists.txt ├── CTestConfig.cmake ├── examples │ ├── CMakeLists.txt │ ├── example2.cu │ └── example1.cu ├── cuda_compute_capability.c ├── CMakeLists.txt ├── FEMSolver.h └── CodeCoverage.cmake ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | msvc 2 | build 3 | Debug 4 | Release 5 | *.swp 6 | ~* 7 | *.project 8 | *.cproject 9 | -------------------------------------------------------------------------------- /src/Resources/fem.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/Resources/fem.png -------------------------------------------------------------------------------- /src/Resources/fem2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/Resources/fem2.png -------------------------------------------------------------------------------- /src/test/test_data/simple.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/simple.mat -------------------------------------------------------------------------------- /src/test/test_data/simpleb.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/simpleb.mat -------------------------------------------------------------------------------- /src/test/test_data/tetVolA.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/tetVolA.mat -------------------------------------------------------------------------------- /src/test/test_data/tetVolb.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/tetVolb.mat -------------------------------------------------------------------------------- /src/test/test_data/simpleAns.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/simpleAns.mat -------------------------------------------------------------------------------- /src/test/test_data/simpleTri.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/simpleTri.mat -------------------------------------------------------------------------------- /src/test/test_data/tetVolAns.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/tetVolAns.mat -------------------------------------------------------------------------------- /src/test/test_data/simpleTriAns.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/simpleTriAns.mat -------------------------------------------------------------------------------- /src/test/test_data/simpleTrib.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/simpleTrib.mat -------------------------------------------------------------------------------- /src/core/include/cuda_resources.h: -------------------------------------------------------------------------------- 1 | #ifndef __CUDA_RESOURCES_H__ 2 | #define __CUDA_RESOURCES_H__ 3 | 4 | #include 5 | #include 6 | 7 | int getMaxThreads(const int max_regs_per_thread, int cuda_device); 8 | 9 | #endif //end __CUDA_RESOURCES_H__ 10 | -------------------------------------------------------------------------------- /src/CTestConfig.cmake: -------------------------------------------------------------------------------- 1 | SET(CTEST_PROJECT_NAME "SCI-Solver_FEM") 2 | SET(CTEST_NIGHTLY_START_TIME "00:00:00 MDT") 3 | SET(CTEST_DROP_METHOD "http") 4 | SET(CTEST_DROP_SITE "my.cdash.org") 5 | SET(CTEST_DROP_LOCATION "/submit.php?project=SCI-Solver_FEM") 6 | SET(CTEST_DROP_SITE_CDASH TRUE) 7 | -------------------------------------------------------------------------------- /src/examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories("${CMAKE_SOURCE_DIR}/core/include") 2 | include_directories("${CMAKE_SOURCE_DIR}") 3 | 4 | CUDA_ADD_EXECUTABLE(Example1 example1.cu) 5 | TARGET_LINK_LIBRARIES(Example1 FEM_SOLVER FEM_CORE) 6 | 7 | CUDA_ADD_EXECUTABLE(Example2 example2.cu) 8 | TARGET_LINK_LIBRARIES(Example2 FEM_SOLVER FEM_CORE) -------------------------------------------------------------------------------- /src/core/cuda/cuda_resources.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int getMaxThreads( 6 | const int max_regs_per_thread, 7 | int cuda_device) { 8 | cudaDeviceProp d; 9 | cudaGetDeviceProperties(&d, cuda_device); 10 | return d.regsPerBlock / max_regs_per_thread; 11 | } 12 | -------------------------------------------------------------------------------- /src/core/include/cycles/vcycle.h: -------------------------------------------------------------------------------- 1 | #ifndef __VCYCLE_H__ 2 | #define __VCYCLE_H__ 3 | 4 | #include 5 | template 6 | class V_Cycle { 7 | public: 8 | inline V_Cycle(AMG_Level *next, const Vector& b, Vector &x) { 9 | next->cycle(V_CYCLE,b,x); 10 | } 11 | }; 12 | #endif 13 | -------------------------------------------------------------------------------- /src/core/include/amg_signal.h: -------------------------------------------------------------------------------- 1 | #ifndef __AMG_SIGNAL_H__ 2 | #define __AMG_SIGNAL_H__ 3 | 4 | /***************************************** 5 | * A class that will install all signal 6 | * handlers when constructed 7 | ****************************************/ 8 | class SignalHandler { 9 | public: 10 | SignalHandler(); 11 | }; 12 | #endif 13 | -------------------------------------------------------------------------------- /src/core/include/cycles/fcycle.h: -------------------------------------------------------------------------------- 1 | #ifndef __FCYCLE_H__ 2 | #define __FCYCLE_H__ 3 | template 4 | class F_Cycle { 5 | public: 6 | inline F_Cycle(AMG_Level *next, const Vector& b, Vector &x) { 7 | if(next->isFinest()) 8 | next->cycle(F_CYCLE,b,x); 9 | else { 10 | next->cycle(W_CYCLE,b,x); 11 | next->cycle(V_CYCLE,b,x); 12 | } 13 | } 14 | }; 15 | #endif 16 | -------------------------------------------------------------------------------- /src/core/include/cycles/wcycle.h: -------------------------------------------------------------------------------- 1 | #ifndef __WCYCLE_H__ 2 | #define __WCYCLE_H__ 3 | 4 | 5 | template 6 | class W_Cycle { 7 | public: 8 | inline W_Cycle(AMG_Level *next, const Vector& b, Vector &x) { 9 | if(next->isFinest()) 10 | next->cycle(W_CYCLE,b,x); 11 | else { 12 | next->cycle(W_CYCLE,b,x); 13 | next->cycle(W_CYCLE,b,x); 14 | } 15 | } 16 | }; 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /src/core/aggmis/include/AggMIS_MIS_CPU.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: AggMIS_MIS_CPU.h 3 | * Author: T. James Lewis 4 | * 5 | * Created on June 25, 2013, 6:13 PM 6 | */ 7 | 8 | #ifndef AGGMIS_MIS_CPU_H 9 | #define AGGMIS_MIS_CPU_H 10 | #include 11 | #include 12 | namespace AggMIS { 13 | namespace MIS { 14 | Types::IntVector_h* FloodFillMIS(int k, Types::Graph_h &graph); 15 | Types::IntVector_h* NaiveMIS(int k, Types::Graph_h &graph); 16 | } 17 | } 18 | #endif /* AGGMIS_MIS_CPU_H */ 19 | 20 | -------------------------------------------------------------------------------- /src/core/include/Logger.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: Logger.h 3 | * Author: T. James Lewis 4 | * 5 | * Created on July 25, 2013, 12:44 PM 6 | */ 7 | 8 | #ifndef LOGGER_H 9 | #define LOGGER_H 10 | 11 | #include 12 | #include 13 | #include 14 | inline void Log(std::string fileName, std::string output) { 15 | std::ofstream outputFile; 16 | outputFile.open(fileName.c_str(), std::ofstream::app); 17 | outputFile << output << "\n"; 18 | outputFile.close(); 19 | } 20 | 21 | #endif /* LOGGER_H */ 22 | 23 | -------------------------------------------------------------------------------- /src/core/aggmis/include/AggMIS_IOHelpers.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: AggMIS_IOHelpers.h 3 | * Author: T. James Lewis 4 | * 5 | * Created on May 25, 2013, 4:13 PM 6 | */ 7 | 8 | #ifndef AGGMIS_IOHELPERS_H 9 | #define AGGMIS_IOHELPERS_H 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace AggMIS { 17 | namespace InputHelpers { 18 | std::string GetNonEmptyLineCIN(); 19 | int GetSingleIntegerValueCIN(); 20 | std::vector GetIntegerValuesCIN(); 21 | } 22 | } 23 | 24 | #endif /* AGGMIS_IOHELPERS_H */ 25 | 26 | -------------------------------------------------------------------------------- /src/core/include/cycles/cgcycle.h: -------------------------------------------------------------------------------- 1 | #ifndef __CGCYCLE_H__ 2 | #define __CGCYCLE_H__ 3 | 4 | template class CG_Cycle; 5 | template class CG_Flex_Cycle; 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | template 12 | class CG_Flex_Cycle { 13 | public: 14 | typedef typename Matrix::value_type ValueType; 15 | CG_Flex_Cycle(CycleType next_cycle, int num_iters, 16 | AMG_Level *next, const Matrix_hyb_d_CG &Aell, 17 | const Vector_d_CG &b, Vector_d_CG &x, CGType tol, int maxiters, bool verbose = false); 18 | }; 19 | #endif 20 | -------------------------------------------------------------------------------- /src/core/cuda/aggregator.cu: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | /********************************************* 4 | * Allocates selector based on passed in type 5 | *********************************************/ 6 | template 7 | Aggregator* Aggregator::allocate(int type) 8 | { 9 | //if (type == 0) 10 | // return new MIS_Aggregator < Matrix, Vector > ; 11 | //else 12 | return new RandMIS_Aggregator < Matrix, Vector > ; 13 | } 14 | 15 | /**************************************** 16 | * Explict instantiations 17 | ***************************************/ 18 | template class Aggregator < Matrix_h, Vector_h > ; 19 | template class Aggregator < Matrix_d, Vector_d > ; 20 | -------------------------------------------------------------------------------- /src/core/include/Helper.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: Helper.h 3 | * Author: T. James Lewis 4 | * 5 | * Created on July 17, 2013, 12:24 PM 6 | */ 7 | 8 | #ifndef HELPER_H 9 | #define HELPER_H 10 | namespace Helper { 11 | template 12 | int BinarySearch(T value, T* array, int size) { 13 | int imin = 0; 14 | int imax = size - 1; 15 | while (imin < imax) { 16 | int imid = (imax + imin) / 2; 17 | if (array[imid] < value) 18 | imin = imid + 1; 19 | else 20 | imax = imid; 21 | } 22 | if (imax == imin && array[imin] == value) 23 | return imin; 24 | else 25 | return -1; 26 | } 27 | } 28 | #endif /* HELPER_H */ 29 | 30 | -------------------------------------------------------------------------------- /src/core/include/allocator.h: -------------------------------------------------------------------------------- 1 | #ifndef __ALLOCATOR_H__ 2 | #define __ALLOCATOR_H__ 3 | 4 | #include 5 | #include 6 | /*********************************************************** 7 | * Class to allocate arrays of memory for temperary use. 8 | * The allocator will hold onto the memory for the next call. 9 | * This allows memory like Vectors to be reused in different 10 | * parts of the algorithm without having to store it in 11 | * a class and hold onto even when it isn't being used. 12 | ***********************************************************/ 13 | template 14 | class Allocator { 15 | typedef std::stack FreeList; 16 | typedef std::map FreeMap; 17 | 18 | public: 19 | static T* allocate(int size); 20 | static void free(T *v,int size); 21 | static void clear(); 22 | 23 | private: 24 | static FreeMap free_vars; //a map of vector lists 25 | }; 26 | #endif 27 | -------------------------------------------------------------------------------- /src/core/include/FEM/FEM2D.h: -------------------------------------------------------------------------------- 1 | #ifndef __FEM2D_H__ 2 | #define __FEM2D_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | class FEM2D 9 | { 10 | public: 11 | FEM2D() {}; 12 | FEM2D(TriMesh* meshPtr); 13 | void initializeWithTriMesh(TriMesh* meshPtr); 14 | void assemble(TriMesh* meshPtr, Matrix_ell_d_CG &A, Vector_d_CG &b); 15 | void assemble(TriMesh* meshPtr, Matrix_d_CG &A, Vector_d_CG &b); 16 | void JacobiGLZW(Vector_h_CG& Z, Vector_h_CG& weight, int degree, int alpha, int beta); 17 | void JacobiGRZW(Vector_h_CG& Z, Vector_h_CG& weight, int degree, int alpha, int beta); 18 | void JacobiPoly(int degree, Vector_h_CG x, int alpha, int beta, Vector_h_CG &y); 19 | void JacobiPolyDerivative(int degree, Vector_h_CG &x, int alpha, int beta, Vector_h_CG &y); 20 | void JacobiGZeros(int degree, int alpha, int beta, Vector_h_CG &z); 21 | 22 | IdxVector_d d_tri0; 23 | IdxVector_d d_tri1; 24 | IdxVector_d d_tri2; 25 | Vector_d_CG d_vx; 26 | Vector_d_CG d_vy; 27 | int nv; 28 | int ne; 29 | }; 30 | #endif 31 | -------------------------------------------------------------------------------- /src/test/sanity3D.cc: -------------------------------------------------------------------------------- 1 | #include "gtest/gtest.h" 2 | #include "FEMSolver.h" 3 | TEST(SanityTests, EggCarton3D) { 4 | //test the egg carton 5 | FEMSolver cfg(std::string(TEST_DATA_DIR) + "/simple", true, true); 6 | float lambda = 1.f; 7 | //read the A matrix 8 | cfg.readMatlabSparseMatrix(std::string(TEST_DATA_DIR) + "/simple.mat"); 9 | //read the b vector 10 | Vector_h_CG b_h(cfg.getMatrixRows(), 1.0), x_h(cfg.getMatrixRows(), 0.), x_answer; 11 | cfg.readMatlabArray(std::string(TEST_DATA_DIR) + "/simpleb.mat", &b_h); 12 | //solve 13 | cfg.solveFEM(&x_h, &b_h); 14 | //read in known answer 15 | cfg.readMatlabArray(std::string(TEST_DATA_DIR) + "/simpleAns.mat", &x_answer); 16 | //look for error 17 | double error = 0.f; 18 | std::vector x_actual; 19 | for (int i = 0; i < cfg.getMatrixRows(); i++) { 20 | error += (x_h[i] - x_answer[i]) * (x_h[i] - x_answer[i]); 21 | x_actual.push_back(x_h[i]); 22 | } 23 | std::cout << "The error is : " << std::sqrt(error) << std::endl; 24 | ASSERT_TRUE(std::sqrt(error) < 1.); 25 | } 26 | -------------------------------------------------------------------------------- /src/test/tetVol.cc: -------------------------------------------------------------------------------- 1 | #include "gtest/gtest.h" 2 | #include "FEMSolver.h" 3 | TEST(SanityTests, EggCarton3D) { 4 | //test the egg carton 5 | FEMSolver cfg(std::string(TEST_DATA_DIR) + "/tetVol", true, true); 6 | float lambda = 1.f; 7 | //read the A matrix 8 | cfg.readMatlabSparseMatrix(std::string(TEST_DATA_DIR) + "/tetVolA.mat"); 9 | //read the b vector 10 | Vector_h_CG b_h(cfg.getMatrixRows(), 1.0), x_h(cfg.getMatrixRows(), 0.), x_answer; 11 | cfg.readMatlabArray(std::string(TEST_DATA_DIR) + "/tetVolb.mat", &b_h); 12 | //solve 13 | cfg.solveFEM(&x_h, &b_h); 14 | //read in known answer 15 | cfg.readMatlabArray(std::string(TEST_DATA_DIR) + "/tetVolAns.mat", &x_answer); 16 | //look for error 17 | double error = 0.f; 18 | std::vector x_actual; 19 | for (int i = 0; i < cfg.getMatrixRows(); i++) { 20 | error += (x_h[i] - x_answer[i]) * (x_h[i] - x_answer[i]); 21 | x_actual.push_back(x_h[i]); 22 | } 23 | std::cout << "The error is : " << std::sqrt(error) << std::endl; 24 | ASSERT_TRUE(std::sqrt(error) < 25.); 25 | } 26 | -------------------------------------------------------------------------------- /src/test/sanity2D.cc: -------------------------------------------------------------------------------- 1 | #include "gtest/gtest.h" 2 | #include "FEMSolver.h" 3 | TEST(SanityTests, EggCarton2D) { 4 | //test the egg carton 5 | FEMSolver cfg(std::string(TEST_DATA_DIR) + "/simple.ply", false, true); 6 | float lambda = 1.f; 7 | //read the A matrix 8 | cfg.readMatlabSparseMatrix(std::string(TEST_DATA_DIR) + "/simpleTri.mat"); 9 | //read the b vector 10 | Vector_h_CG b_h(cfg.getMatrixRows(), 1.0), x_h(cfg.getMatrixRows(), 0.), x_answer; 11 | cfg.readMatlabArray(std::string(TEST_DATA_DIR) + "/simpleTrib.mat", &b_h); 12 | //solve 13 | cfg.solveFEM(&x_h, &b_h); 14 | //read in known answer 15 | cfg.readMatlabArray(std::string(TEST_DATA_DIR) + "/simpleTriAns.mat", &x_answer); 16 | //look for error 17 | double error = 0.f; 18 | std::vector x_actual; 19 | for (int i = 0; i < x_answer.size(); i++) { 20 | error += (x_h[i] - x_answer[i]) * (x_h[i] - x_answer[i]); 21 | x_actual.push_back(x_h[i]); 22 | } 23 | std::cout << "The error is : " << std::sqrt(error) << std::endl; 24 | ASSERT_TRUE(std::sqrt(error) < 100.); 25 | } 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2014 The Scientific Computing and Imaging Institute 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/core/include/cycles/cycle.h: -------------------------------------------------------------------------------- 1 | #ifndef __CYCLE_H__ 2 | #define __CYCLE_H__ 3 | 4 | enum CycleType {V_CYCLE,W_CYCLE,F_CYCLE,K_CYCLE}; 5 | 6 | #include 7 | #include 8 | 9 | template void dispatch_cycle(int num_iters, 10 | CycleType cycle, AMG_Level *level, const Vector& b, Vector &x); 11 | 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | /******************************************************* 19 | * Dispatches the cycle that is passed in 20 | *******************************************************/ 21 | template 22 | void dispatch_cycle(int num_iters, CycleType cycle, AMG_Level 23 | *level, const Vector& b, Vector &x) { 24 | switch(cycle) { 25 | case V_CYCLE: 26 | V_Cycle(level,b,x); 27 | break; 28 | case W_CYCLE: 29 | W_Cycle(level,b,x); 30 | break; 31 | case F_CYCLE: 32 | F_Cycle(level,b,x); 33 | break; 34 | case K_CYCLE: 35 | CG_Flex_Cycle(K_CYCLE,num_iters,level,b,x); 36 | break; 37 | default: 38 | FatalError("dispatch_cycle not defined for cycle type"); 39 | } 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /src/core/cuda/smoother.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /*************************************** 6 | * Source Definitions 7 | ***************************************/ 8 | 9 | template 10 | Smoother::~Smoother() { 11 | }; 12 | 13 | template 14 | void Smoother::smooth_with_0_initial_guess(const Matrix &A, const Vector &b, Vector &x) { 15 | //by default set x to zero and call smooth. smoothers can optimize this path if they wish 16 | cusp::blas::fill(x,0); 17 | smooth(A,b,x); 18 | }; 19 | 20 | #include 21 | /********************************************* 22 | * Allocates smoothers based on passed in type 23 | *********************************************/ 24 | template 25 | Smoother* Smoother::allocate(double smootherWeight, 26 | int preInnerIters, int postInnerIters, int postRelaxes, const Matrix_d& A) 27 | { 28 | return new gauss_seidel( smootherWeight, 29 | preInnerIters, postInnerIters, postRelaxes, A); 30 | } 31 | 32 | /**************************************** 33 | * Explict instantiations 34 | ***************************************/ 35 | //template class Smoother; 36 | template class Smoother; 37 | 38 | -------------------------------------------------------------------------------- /src/core/aggmis/include/AggMIS_MIS_GPU.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: AggMIS_MIS_GPU.h 3 | * Author: T. James Lewis 4 | * 5 | * Created on April 17, 2013, 12:49 PM 6 | */ 7 | 8 | #ifndef AGGMIS_MIS_GPU_H 9 | #define AGGMIS_MIS_GPU_H 10 | #include "AggMIS_Types.h" 11 | namespace AggMIS { 12 | namespace MIS { 13 | namespace Kernels { 14 | __global__ void GenerateRandoms(int size, 15 | int iterations, 16 | unsigned int *randoms, 17 | unsigned int *seeds); 18 | __global__ void PreInitialize(int size, 19 | unsigned int *randoms, 20 | int *bestSeen, 21 | int *origin, 22 | int *mis); 23 | __global__ void Initialize(int size, 24 | unsigned int *randoms, 25 | int *bestSeen, 26 | int *origin, 27 | int *mis, 28 | int *incomplete); 29 | __global__ void Iterate(int size, 30 | int *originIn, 31 | int *originOut, 32 | int *bestSeenIn, 33 | int *bestSeenOut, 34 | int *adjIndexes, 35 | int *adjacency); 36 | __global__ void Finalize(int size, 37 | int *originIn, 38 | int *originOut, 39 | int *bestSeenIn, 40 | int *bestSeenOut, 41 | int *adjIndexes, 42 | int *adjacency, 43 | int *mis, 44 | int *incomplete); 45 | } 46 | Types::IntVector_d* RandomizedMIS(int k, Types::Graph_d &graph); 47 | bool IsValidKMIS(Types::IntVector_d &misIn, 48 | Types::Graph_d &graph, int k, bool verbose); 49 | } 50 | } 51 | 52 | 53 | #endif /* AGGMIS_MIS_GPU_H */ 54 | 55 | -------------------------------------------------------------------------------- /src/core/include/FEM/FEM3D.h: -------------------------------------------------------------------------------- 1 | #ifndef __FEM3D_H__ 2 | #define __FEM3D_H__ 3 | 4 | #include 5 | #include 6 | #define DEGREE 4 7 | 8 | class FEM3D 9 | { 10 | public: 11 | FEM3D() {}; 12 | FEM3D(TetMesh* meshPtr); 13 | void initializeWithTetMesh(TetMesh* meshPtr); 14 | void assemble(TetMesh* meshPtr, Matrix_ell_d_CG &A, Vector_d_CG &b, bool isdevice); 15 | void JacobiGLZW(Vector_h_CG& Z, Vector_h_CG& weight, int degree, int alpha, int beta); 16 | void JacobiGRZW(Vector_h_CG& Z, Vector_h_CG& weight, int degree, int alpha, int beta); 17 | void JacobiPoly(int degree, Vector_h_CG x, int alpha, int beta, Vector_h_CG &y); 18 | void JacobiPolyDerivative(int degree, Vector_h_CG &x, int alpha, int beta, Vector_h_CG &y); 19 | void JacobiGZeros(int degree, int alpha, int beta, Vector_h_CG &z); 20 | void Transform2StdTetSpace(const Vector_h_CG &z_x, const Vector_h_CG &z_y, const Vector_h_CG &z_z, CGType(*VecXYZ)[DEGREE][DEGREE][3]); 21 | void EvalBasisTet(CGType(*coefmatBaseTet)[4], const CGType(*qdTet)[DEGREE][DEGREE][3], CGType(*phiTet)[DEGREE][DEGREE][4]); 22 | void IntegrationInTet(Vector_h_CG &phi, Vector_h_CG &weight_x, Vector_h_CG &weight_y, Vector_h_CG &weight_z, Vector_h_CG &integralMass); 23 | CGType Integration_Quadrilateral_3d(Matrix_ell_d_CG::value_type(*fx)[DEGREE][DEGREE], Vector_h_CG &w_x, Vector_h_CG &w_y, Vector_h_CG &w_z); 24 | 25 | IdxVector_d d_tri0; 26 | IdxVector_d d_tri1; 27 | IdxVector_d d_tri2; 28 | IdxVector_d d_tri3; 29 | Vector_d_CG d_vx; 30 | Vector_d_CG d_vy; 31 | Vector_d_CG d_vz; 32 | int nv; 33 | int ne; 34 | }; 35 | #endif 36 | -------------------------------------------------------------------------------- /src/core/cuda/allocator.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | template 9 | Allocator::FreeMap Allocator::free_vars; 10 | 11 | template 12 | inline T* allocate(int size) { 13 | return new T(size); 14 | }; 15 | 16 | template<> 17 | inline int* allocate(int size) { 18 | return new int[size]; 19 | } 20 | 21 | template 22 | T* Allocator::allocate(int size) { 23 | //locate free var list for the right size 24 | FreeList &f_vars=free_vars[size]; 25 | 26 | T *v; 27 | if(f_vars.empty()) //if there are no free vectors 28 | { 29 | //create a new vector 30 | v=::allocate(size); 31 | } 32 | else { 33 | //set the return value to the previously freed vector 34 | v=f_vars.top(); 35 | //remove the vector from the free vector list 36 | f_vars.pop(); 37 | } 38 | return v; 39 | } 40 | 41 | template 42 | void Allocator::free(T* v,int size) { 43 | //add the vector to the free vector list 44 | free_vars[size].push(v); 45 | } 46 | 47 | template 48 | void Allocator::clear() { 49 | for(typename FreeMap::iterator m_iter=free_vars.begin();m_iter!=free_vars.end();m_iter++) 50 | { 51 | FreeList &stack=m_iter->second; 52 | while(!stack.empty()) { 53 | delete stack.top(); 54 | stack.pop(); 55 | } 56 | } 57 | free_vars.clear(); 58 | } 59 | 60 | /**************************************** 61 | * Explict instantiations 62 | ***************************************/ 63 | template class Allocator; 64 | template class Allocator; 65 | -------------------------------------------------------------------------------- /src/core/include/error.h: -------------------------------------------------------------------------------- 1 | #ifndef __MYERROR_H__ 2 | #define __MYERROR_H__ 3 | #ifndef WIN32 4 | #include 5 | #include 6 | #include 7 | #include 8 | #endif 9 | #include 10 | 11 | /****************************************************** 12 | * prints the current stack trace 13 | *****************************************************/ 14 | inline void printStackTrace() { 15 | #ifndef WIN32 16 | const int MAX_STACK=30; 17 | size_t n; 18 | static void *addresses[MAX_STACK]; 19 | n=backtrace(addresses,MAX_STACK); 20 | 21 | if(n<2) 22 | return; 23 | 24 | char **names=backtrace_symbols( addresses, n ); 25 | 26 | printf("Backtrace for pid %d:\n",getpid()); 27 | 28 | for(int i=1;i 5 | #include 6 | 7 | /**************************************************** 8 | * Debugging tools 9 | ***************************************************/ 10 | template 11 | void printVector(const char* label, const Vector &v) 12 | { 13 | std::cout << label << ": "; 14 | for(int i=0;i 22 | void printDense(const Matrix& A) 23 | { 24 | for(int i=0;i 47 | template 48 | void printMatrix(const Matrix& A, char* fname) 49 | { 50 | std::ofstream fout; 51 | fout.open(fname); 52 | 53 | fout << "%%MatrixMarket matrix coordinate real general" << std::endl; 54 | fout << std::setprecision(16) << std::fixed << A.num_rows << " " << A.num_cols << " " << A.num_entries << std::endl; 55 | for(int i=0;i 12 | #include 13 | 14 | int main() { 15 | int deviceCount, device, major = 9999, minor = 9999; 16 | int gpuDeviceCount = 0; 17 | struct cudaDeviceProp properties; 18 | 19 | if (cudaGetDeviceCount(&deviceCount) != cudaSuccess) 20 | { 21 | printf("Couldn't get device count: %s\n", cudaGetErrorString(cudaGetLastError())); 22 | return 1; 23 | } 24 | /* machines with no GPUs can still report one emulation device */ 25 | for (device = 0; device < deviceCount; ++device) { 26 | cudaGetDeviceProperties(&properties, device); 27 | if (properties.major != 9999) {/* 9999 means emulation only */ 28 | ++gpuDeviceCount; 29 | /* get minimum compute capability of all devices */ 30 | if (major > properties.major) { 31 | major = properties.major; 32 | minor = properties.minor; 33 | } else if (minor > properties.minor) { 34 | minor = properties.minor; 35 | } 36 | } 37 | } 38 | 39 | /* don't just return the number of gpus, because other runtime cuda 40 | errors can also yield non-zero return values */ 41 | if (gpuDeviceCount > 0) { 42 | if (major == 2 && minor == 1) 43 | { 44 | // There is no --arch compute_21 flag for nvcc, so force minor to 0 45 | minor = 0; 46 | } 47 | /* this output will be parsed by FindCUDA.cmake */ 48 | printf("%d%d", major, minor); 49 | return 0; /* success */ 50 | } 51 | return 1; /* failure */ 52 | } -------------------------------------------------------------------------------- /src/core/aggmis/include/Timer.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Timer.h 3 | // ======= 4 | // High Resolution Timer. 5 | // This timer is able to measure the elapsed time with 1 micro-second accuracy 6 | // in both Windows, Linux and Unix system 7 | // 8 | // AUTHOR: Song Ho Ahn (song.ahn@gmail.com) 9 | // CREATED: 2003-01-13 10 | // UPDATED: 2006-01-13 11 | // 12 | // Copyright (c) 2003 Song Ho Ahn 13 | ////////////////////////////////////////////////////////////////////////////// 14 | 15 | #ifndef TIMER_H_DEF 16 | #define TIMER_H_DEF 17 | 18 | #ifdef WIN32 // Windows system specific 19 | #include 20 | #else // Unix based system specific 21 | #include 22 | #endif 23 | 24 | 25 | class Timer 26 | { 27 | public: 28 | Timer(); // default constructor 29 | ~Timer(); // default destructor 30 | 31 | void start(); // start timer 32 | void stop(); // stop the timer 33 | double getElapsedTime(); // get elapsed time in second 34 | double getElapsedTimeInSec(); // get elapsed time in second (same as getElapsedTime) 35 | double getElapsedTimeInMilliSec(); // get elapsed time in milli-second 36 | double getElapsedTimeInMicroSec(); // get elapsed time in micro-second 37 | 38 | 39 | protected: 40 | 41 | 42 | private: 43 | double startTimeInMicroSec; // starting time in micro-second 44 | double endTimeInMicroSec; // ending time in micro-second 45 | int stopped; // stop flag 46 | #ifdef WIN32 47 | LARGE_INTEGER frequency; // ticks per second 48 | LARGE_INTEGER startCount; // 49 | LARGE_INTEGER endCount; // 50 | #else 51 | timeval startCount; // 52 | timeval endCount; // 53 | #endif 54 | }; 55 | 56 | #endif // TIMER_H_DEF 57 | -------------------------------------------------------------------------------- /src/core/include/smoothedMG/aggregators/Timer.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Timer.h 3 | // ======= 4 | // High Resolution Timer. 5 | // This timer is able to measure the elapsed time with 1 micro-second accuracy 6 | // in both Windows, Linux and Unix system 7 | // 8 | // AUTHOR: Song Ho Ahn (song.ahn@gmail.com) 9 | // CREATED: 2003-01-13 10 | // UPDATED: 2006-01-13 11 | // 12 | // Copyright (c) 2003 Song Ho Ahn 13 | ////////////////////////////////////////////////////////////////////////////// 14 | 15 | #ifndef TIMER_H_DEF 16 | #define TIMER_H_DEF 17 | 18 | #ifdef WIN32 // Windows system specific 19 | #include 20 | #else // Unix based system specific 21 | #include 22 | #endif 23 | 24 | 25 | class Timer 26 | { 27 | public: 28 | Timer(); // default constructor 29 | ~Timer(); // default destructor 30 | 31 | void start(); // start timer 32 | void stop(); // stop the timer 33 | double getElapsedTime(); // get elapsed time in second 34 | double getElapsedTimeInSec(); // get elapsed time in second (same as getElapsedTime) 35 | double getElapsedTimeInMilliSec(); // get elapsed time in milli-second 36 | double getElapsedTimeInMicroSec(); // get elapsed time in micro-second 37 | 38 | 39 | protected: 40 | 41 | 42 | private: 43 | double startTimeInMicroSec; // starting time in micro-second 44 | double endTimeInMicroSec; // ending time in micro-second 45 | int stopped; // stop flag 46 | #ifdef WIN32 47 | LARGE_INTEGER frequency; // ticks per second 48 | LARGE_INTEGER startCount; // 49 | LARGE_INTEGER endCount; // 50 | #else 51 | timeval startCount; // 52 | timeval endCount; // 53 | #endif 54 | }; 55 | 56 | #endif // TIMER_H_DEF 57 | -------------------------------------------------------------------------------- /src/core/aggmis/cuda/AggMIS_IOHelpers.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * File: AggMIS_IOHelpers.cu 3 | * Author: T. James Lewis 4 | * 5 | * Created on May 25, 2013, 4:43 PM 6 | */ 7 | #include "AggMIS_IOHelpers.h" 8 | namespace AggMIS { 9 | namespace InputHelpers { 10 | std::string GetNonEmptyLineCIN() { 11 | std::string b; 12 | char dumb; 13 | while (std::cin.peek() == '\n') 14 | std::cin.get(dumb); 15 | std::getline(std::cin, b); 16 | return b; 17 | } 18 | int GetSingleIntegerValuecin() { 19 | std::string input; 20 | char dumb; 21 | while (true) 22 | { 23 | while (std::cin.peek() == '\n') 24 | std::cin.get(dumb); 25 | std::getline(std::cin, input); 26 | std::stringstream str(input); 27 | int result; 28 | if (str >> result) 29 | return result; 30 | std::cout << "Please enter a number\n:"; 31 | } 32 | } 33 | std::vector GetIntegerValuescin() { 34 | std::string input; 35 | char dumb; 36 | int value; 37 | std::vector values; 38 | while (true) 39 | { 40 | while (std::cin.peek() == '\n') 41 | std::cin.get(dumb); 42 | std::getline(std::cin, input); 43 | std::stringstream stream(input); 44 | while(!stream.eof()) 45 | { 46 | if (stream >> value) 47 | values.push_back(value); 48 | else 49 | { 50 | stream.clear(); 51 | std::string dumber; 52 | stream >> dumber; 53 | } 54 | } 55 | if (values.size() > 0) 56 | return values; 57 | std::cout << "Please enter at least one number\n:"; 58 | } 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # gtest external download 2 | set(GTEST_INSTALL_DIR "${EXTERNAL_DIR}/gtest") 3 | if (WIN32) 4 | set(GTEST_SHARED_ARG ON) 5 | else() 6 | set(GTEST_SHARED_ARG OFF) 7 | endif() 8 | 9 | ExternalProject_Add( gtest 10 | PREFIX "${GTEST_INSTALL_DIR}" 11 | GIT_REPOSITORY "https://github.com/google/googletest.git" 12 | INSTALL_DIR "${GTEST_INSTALL_DIR}/include" 13 | BINARY_DIR "${GTEST_INSTALL_DIR}/lib" 14 | INSTALL_COMMAND "" 15 | CMAKE_ARGS 16 | -Dgtest_build_tests:BOOL=OFF 17 | -DBUILD_GMOCK:BOOL=OFF 18 | -DBUILD_GTEST:BOOL=ON 19 | -Dgtest_force_shared_crt:BOOL=${GTEST_SHARED_ARG} 20 | ) 21 | ExternalProject_Add_Step( gtest copy_deps 22 | COMMAND ${CMAKE_COMMAND} -E copy_directory ${GTEST_INSTALL_DIR}/src/gtest/googletest/include ${GTEST_INSTALL_DIR}/include 23 | DEPENDEES install 24 | ) 25 | include_directories("${GTEST_INSTALL_DIR}/include") 26 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../core2d/include) 27 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../core3d/include) 28 | SET(GTEST_LIBRARY "${GTEST_INSTALL_DIR}/lib/googletest/${CMAKE_STATIC_LIBRARY_PREFIX}gtest${CMAKE_STATIC_LIBRARY_SUFFIX};${GTEST_INSTALL_DIR}/lib/googletest/${CMAKE_STATIC_LIBRARY_PREFIX}gtest_main${CMAKE_STATIC_LIBRARY_SUFFIX}") 29 | ######################################################################## 30 | 31 | # Definitions for Tests 32 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/test/bin) 33 | add_definitions(-DTEST_DATA_DIR="${CMAKE_CURRENT_SOURCE_DIR}/test_data/") 34 | include_directories(${CMAKE_SOURCE_DIR}/core/include) 35 | include_directories(${CMAKE_SOURCE_DIR}) 36 | #test macro 37 | function(NEWTEST name) 38 | add_executable(${name} ${name}.cc) 39 | if (NOT WIN32) 40 | target_link_libraries(${name} ${GTEST_LIBRARY} FEM_SOLVER FEM_CORE -lpthread) 41 | else () 42 | target_link_libraries(${name} ${GTEST_LIBRARY} FEM_SOLVER FEM_CORE ) 43 | endif() 44 | add_test(${name} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${name}) 45 | add_dependencies(${name} gtest FEM_SOLVER FEM_CORE) 46 | endfunction() 47 | #the tests 48 | newtest(sanity2D) 49 | newtest(sanity3D) 50 | newtest(tetVol) 51 | -------------------------------------------------------------------------------- /src/core/include/smoothedMG/smoothedMG_amg_level.h: -------------------------------------------------------------------------------- 1 | #ifndef __SMOOTHEDMG_AMG_LEVEL_H__ 2 | #define __SMOOTHEDMG_AMG_LEVEL_H__ 3 | 4 | template class SmoothedMG_AMG_Level; 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | /*************************************************** 13 | * Classical AMG Base Class 14 | * Defines the AMG solve algorithm, decendents must 15 | * define markCoarseFinePoints() and 16 | * generateInterpoloationMatrix() 17 | **************************************************/ 18 | template 19 | class SmoothedMG_AMG_Level : public AMG_Level 20 | { 21 | friend class AMG; 22 | typedef typename Matrix::value_type ValueType; 23 | typedef typename Matrix::index_type IndexType; 24 | typedef typename Matrix::memory_space MemorySpace; 25 | public: 26 | SmoothedMG_AMG_Level(AMG *amg); 27 | ~SmoothedMG_AMG_Level(); 28 | 29 | // void setup(); 30 | void createNextLevel(bool verbose = false); 31 | void restrictResidual(const Vector &r, Vector &rr); 32 | void prolongateAndApplyCorrection(const Vector &c, Vector &x, Vector &tmp); 33 | 34 | protected: 35 | 36 | void generateMatrixCsr(IdxVector_d &permutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel); 37 | void generateMatrixSymmetric_d(IdxVector_d &permutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, bool verbose = false); 38 | void generateProlongatorFull_d(IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx); 39 | void computeProlongationOperator(); 40 | void computeRestrictionOperator(); 41 | void generateNextLevelMatrixFull_d(bool verbose = false); 42 | void computeAOperator(); 43 | 44 | Matrix P, R; 45 | Matrix_coo_d P_d, R_d; 46 | Matrix_coo_h Acoo; 47 | Matrix_coo_d Acoo_d; 48 | 49 | Matrix_coo_h AinCoo; 50 | 51 | Aggregator* aggregator; 52 | IdxVector_h aggregateIdx; 53 | IdxVector_h partitionIdx; 54 | IdxVector_h permutation_h; 55 | IdxVector_h ipermutation_h; 56 | }; 57 | #endif 58 | -------------------------------------------------------------------------------- /src/core/include/smoothedMG/aggregators/aggregator.h: -------------------------------------------------------------------------------- 1 | #ifndef __AGGREGATOR_H__ 2 | #define __AGGREGATOR_H__ 3 | template class Aggregator; 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | template 11 | class Aggregator 12 | { 13 | typedef typename Matrix::value_type ValueType; 14 | 15 | public: 16 | virtual void computePermutation(TriMesh* meshPtr, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitonlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize) = 0; 17 | virtual void computePermutation(TetMesh* meshPtr, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitonlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize) = 0; 18 | virtual void computePermutation(int nn, int* xadj, int* adjncy, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitionlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize) = 0; 19 | virtual void computePermutation_d(IdxVector_d &adjIndexesIn, IdxVector_d &adjacencyIn, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false) = 0; 20 | virtual void computePermutation_d(TriMesh* meshPtr, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false) = 0; 21 | virtual void computePermutation_d(TetMesh* meshPtr, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false) = 0; 22 | 23 | virtual ~Aggregator() 24 | { 25 | } 26 | static Aggregator* allocate(int type); 27 | 28 | }; 29 | #endif 30 | -------------------------------------------------------------------------------- /src/core/include/types.h: -------------------------------------------------------------------------------- 1 | #ifndef __TYPES_H__ 2 | #define __TYPES_H__ 3 | #include 4 | #include 5 | #include 6 | 7 | typedef double CGType; 8 | //typedef float CGType; 9 | typedef float AMGType; 10 | typedef double AssembleType; 11 | 12 | template 13 | class myEll 14 | { 15 | IndexType num_rows; 16 | IndexType num_entries; 17 | cusp::array1d column_indices; 18 | cusp::array1d values; 19 | }; 20 | 21 | typedef myEll myEll_h_CG; 22 | typedef myEll myEll_d_CG; 23 | 24 | 25 | typedef cusp::csr_matrix Matrix_h_CG; 26 | typedef cusp::csr_matrix Matrix_d_CG; 27 | 28 | typedef cusp::array1d Vector_h_CG; 29 | typedef cusp::array1d Vector_d_CG; 30 | 31 | 32 | typedef cusp::ell_matrix Matrix_ell_d_CG; 33 | typedef cusp::ell_matrix Matrix_ell_h_CG; 34 | 35 | typedef cusp::coo_matrix Matrix_coo_d_CG; 36 | typedef cusp::coo_matrix Matrix_coo_h_CG; 37 | 38 | typedef cusp::hyb_matrix Matrix_hyb_d_CG; 39 | typedef cusp::hyb_matrix Matrix_hyb_h_CG; 40 | 41 | typedef cusp::csr_matrix Matrix_h; 42 | typedef cusp::csr_matrix Matrix_d; 43 | 44 | typedef cusp::array1d Vector_h; 45 | typedef cusp::array1d Vector_d; 46 | 47 | typedef cusp::array1d IdxVector_h; 48 | typedef cusp::array1d IdxVector_d; 49 | 50 | typedef cusp::ell_matrix Matrix_ell_d; 51 | typedef cusp::ell_matrix Matrix_ell_h; 52 | 53 | typedef cusp::coo_matrix Matrix_coo_d; 54 | typedef cusp::coo_matrix Matrix_coo_h; 55 | 56 | typedef cusp::hyb_matrix Matrix_hyb_d; 57 | typedef cusp::hyb_matrix Matrix_hyb_h; 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /src/core/aggmis/include/AggMIS_Metrics.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: AggMIS_Metrics.h 3 | * Author: T. James Lewis 4 | * 5 | * Created on May 1, 2013, 12:19 PM 6 | */ 7 | 8 | #ifndef AGGMIS_METRICS_H 9 | #define AGGMIS_METRICS_H 10 | #include "AggMIS_Types.h" 11 | #include 12 | #include 13 | #include 14 | #include 15 | namespace AggMIS { 16 | namespace Metrics { 17 | class MetricsContext { 18 | public: 19 | MetricsContext(Types::Graph_h &graph, Types::IntVector_h &aggregation); 20 | double GetConvexityRatio(int aggId); 21 | double GetEccentricityRatio(int aggId); 22 | double GetMinimumEnclosingBallRatio(int aggId); 23 | int GetAggregateCount(); 24 | 25 | private: 26 | // Data structures 27 | Types::Graph_h *graph; 28 | Types::IntVector_h aggregation; 29 | std::vector > aggregates; 30 | std::vector > convexAggregates; 31 | int currentAggregate; 32 | 33 | // Counters 34 | int distanceLookups, makeConvexCalls; 35 | 36 | // Internal Methods 37 | int Distance(int a, int b); 38 | double GetEccentricityRatio(std::vector &aggregate); 39 | double GetMinimumEnclosingBallRatio(std::vector &aggregate); 40 | void MakeConvex(std::vector &aggregate); 41 | void EnsureConvex(int aggId); 42 | std::vector* FindCentroid(std::vector& aggregate); 43 | int FindMassScore(int node, std::vector& aggregate); 44 | std::map* FindDistances(int rootNode, std::vector& aggregate); 45 | std::vector >* GetShortestPaths(int startId, int endId, 46 | std::map &distances); 47 | std::vector >* FindExternalsInPaths(std::vector& aggregate, 48 | std::vector >* p); 49 | bool IsPathSatisfied(std::set& required, 50 | std::vector >& pathOptions); 51 | std::set* BruteForceMinimalNodes(std::vector< 52 | std::vector > >& pathOptions); 53 | bool IncrementGuessVector(std::vector& guess, std::vector > >& externalOptions); 55 | 56 | // Setup helpers 57 | void Initialize(); 58 | void GetAggregates(); 59 | }; 60 | } 61 | } 62 | 63 | #endif /* AGGMIS_METRICS_H */ 64 | 65 | -------------------------------------------------------------------------------- /src/core/cuda/amg_signal.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | typedef void (*signal_handler)(int); 6 | #ifndef WIN32 7 | const int NUM_SIGS=11; 8 | static int SIGNALS[NUM_SIGS] = {SIGINT,SIGQUIT,SIGILL,SIGABRT,SIGFPE,SIGKILL,SIGSEGV,SIGTERM,SIGPIPE,SIGUSR1,SIGUSR2}; 9 | #endif 10 | /**************************************** 11 | * converts a signal to a string 12 | ****************************************/ 13 | inline const char* getSigString(int sig) { 14 | switch(sig) 15 | { 16 | case SIGINT: 17 | return "SIGINT (interrupt)"; 18 | case SIGILL: 19 | return "SIGILL (illegal instruction)"; 20 | case SIGABRT: 21 | return "SIGABRT (abort)"; 22 | case SIGFPE: 23 | return "SIGFPE (floating point exception)"; 24 | case SIGSEGV: 25 | return "SIGSEGV (segmentation violation)"; 26 | case SIGTERM: 27 | return "SIGTERM (terminated)"; 28 | #ifndef WIN32 29 | case SIGKILL: 30 | return "SIGKILL (killed)"; 31 | case SIGQUIT: 32 | return "SIGQUIT (quit)"; 33 | case SIGPIPE: 34 | return "SIGPIPE (broken pipe)"; 35 | case SIGUSR1: 36 | return "SIGUSR1 (user 1)"; 37 | case SIGUSR2: 38 | return "SIGUSR2 (user 2)"; 39 | #endif 40 | default: 41 | return "UNKNOWN"; 42 | } 43 | } 44 | 45 | /***************************************** 46 | * handles the signals by printing the 47 | * error message, the stack, and exiting 48 | * where appropriate 49 | ****************************************/ 50 | inline void handle_signals(int sig) { 51 | printf("Caught signal %d - %s\n",sig,getSigString(sig)); 52 | switch(sig) { 53 | case SIGINT: 54 | case SIGTERM: 55 | //don't print stack trace since the user interrupted this one 56 | exit(1); 57 | break; 58 | #ifndef WIN32 59 | case SIGUSR1: case SIGUSR2: //user defined signal to print the backtrace but continue running 60 | printStackTrace(); 61 | break; 62 | case SIGKILL: 63 | case SIGQUIT: 64 | #endif 65 | default: 66 | printStackTrace(); 67 | exit(1); 68 | } 69 | } 70 | 71 | #include 72 | SignalHandler::SignalHandler() { 73 | #ifndef WIN32 74 | struct sigaction action; 75 | sigemptyset(&action.sa_mask); 76 | action.sa_flags=0; 77 | action.sa_handler = handle_signals; 78 | for(int i=0;i= argc) break; 18 | fname = std::string(argv[i + 1]); 19 | i++; 20 | } else if (strcmp(argv[i], "-b") == 0) { 21 | if (i + 1 >= argc) break; 22 | bName = std::string(argv[i + 1]); 23 | i++; 24 | } else if (strcmp(argv[i], "-A") == 0) { 25 | if (i + 1 >= argc) break; 26 | Aname = std::string(argv[i + 1]); 27 | i++; 28 | } 29 | } 30 | //Our main configuration object. We will set aspects where the 31 | // default values are not what we desire. 32 | FEMSolver cfg(fname, false, verbose); 33 | if (!Aname.empty()) { 34 | //Import stiffness matrix (A) 35 | if (cfg.readMatlabSparseMatrix(Aname) != 0) 36 | std::cerr << "Failed to read in A matrix: " << Aname << std::endl; 37 | } 38 | //intialize the b matrix to ones for now. 39 | Vector_h_CG b_h(cfg.getMatrixRows(), 1.0); 40 | if (!bName.empty()) { 41 | //Import right-hand-side single-column array (b) 42 | if (cfg.readMatlabArray(bName, &b_h) != 0) 43 | std::cerr << "Failed to read in b array: " << bName << std::endl; 44 | } 45 | //The answer vector. 46 | Vector_h_CG x_h(cfg.getMatrixRows(), 0.0); //intial X vector 47 | //The final call to the solver 48 | cfg.solveFEM(&x_h, &b_h); 49 | //At this point, you can do what you need with the matrices. 50 | cfg.writeMatlabArray("output.mat", x_h); 51 | //write the VTK 52 | std::vector vals; 53 | for (size_t i = 0; i < x_h.size(); i++){ 54 | vals.push_back(x_h[i]); 55 | } 56 | int pos = cfg.filename_.find_last_of("/"); 57 | if (pos == std::string::npos) 58 | pos = cfg.filename_.find_last_of("\\"); 59 | std::string outname = cfg.filename_.substr(pos + 1, 60 | cfg.filename_.size() - 1); 61 | pos = outname.find_last_of("."); 62 | outname = outname.substr(0, pos); 63 | cfg.writeVTK(vals, outname); 64 | return 0; 65 | } 66 | -------------------------------------------------------------------------------- /src/core/cuda/cgcycle.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | template 7 | CG_Flex_Cycle::CG_Flex_Cycle(CycleType next_cycle, int num_iters, AMG_Level *next, const Matrix_hyb_d_CG &Aell, const Vector_d_CG &b, Vector_d_CG &x, CGType tol, int maxiters, bool verbose) 8 | { 9 | 10 | typedef typename Matrix::value_type ValueType; 11 | typedef typename Matrix::index_type IndexType; 12 | typedef typename Matrix::memory_space MemorySpace; 13 | 14 | 15 | int N = b.size(); 16 | ValueType bnorm = cusp::blas::nrm2(b); 17 | Vector_d_CG y(N); 18 | 19 | Vector_d_CG z(N); 20 | Vector_d_CG r(N); 21 | Vector_d_CG d(N); 22 | Vector_d_CG p(N); 23 | 24 | cusp::multiply(Aell, x, y); 25 | cusp::blas::axpby(b, y, r, ValueType(1), ValueType(-1)); 26 | next->cycle_level0(next_cycle, r, z); 27 | cusp::blas::copy(z, p); 28 | 29 | ValueType rzold = cusp::blas::dotc(r, z); 30 | ValueType rznew; 31 | 32 | int niter = 0; 33 | double iter_start, iter_stop; 34 | iter_start = CLOCK(); 35 | while(niter < maxiters) 36 | { 37 | 38 | cusp::multiply(Aell, p, y); 39 | ValueType yp = cusp::blas::dotc(y, p); 40 | ValueType alpha = rzold / yp; 41 | cusp::blas::axpy(p, x, alpha); 42 | cusp::blas::axpy(y, r, -alpha); 43 | ValueType normr = cusp::blas::nrm2(r); 44 | if (verbose) 45 | std::cout << "normr=" << std::scientific << normr << " niter=" << niter << std::endl; 46 | 47 | if( (normr / bnorm) <= tol) 48 | break; 49 | 50 | niter++; 51 | next->cycle_level0(next_cycle, r, z, verbose); 52 | rznew = cusp::blas::dotc(z, r); 53 | ValueType beta = rznew / rzold; 54 | cusp::blas::axpby(z, p, p, ValueType(1), beta); 55 | rzold = rznew; 56 | } 57 | cudaThreadSynchronize(); 58 | iter_stop = CLOCK(); 59 | if (verbose) { 60 | std::cout << "average time per iteration: " << (iter_stop-iter_start) / niter << std::endl; 61 | std::cout << "total solve time: " << (iter_stop-iter_start) << std::endl; 62 | } 63 | 64 | y.clear(); 65 | z.clear(); 66 | r.clear(); 67 | d.clear(); 68 | p.clear(); 69 | } 70 | 71 | /**************************************** 72 | * Explict instantiations 73 | ***************************************/ 74 | template class CG_Flex_Cycle; 75 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(FEM_Solver) 3 | 4 | if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") 5 | add_definitions(-D_CRT_SECURE_NO_WARNINGS) 6 | endif() 7 | 8 | SET(BUILD_EXAMPLES false CACHE BOOL "Build sample executables") 9 | 10 | find_package(CUDA REQUIRED) 11 | include_directories(${CUDA_INCLUDE_DIRS}) 12 | ################### 13 | # Externals 14 | include(ExternalProject) 15 | set(EXTERNAL_DIR "${CMAKE_BINARY_DIR}/externals") 16 | # Metis 17 | set(METIS_INSTALL_DIR "${EXTERNAL_DIR}/metis") 18 | ExternalProject_Add( metis 19 | PREFIX "${METIS_INSTALL_DIR}" 20 | GIT_REPOSITORY "https://github.com/cibc-internal/metis-4.0.3.git" 21 | INSTALL_DIR "${METIS_INSTALL_DIR}/include" 22 | BINARY_DIR "${METIS_INSTALL_DIR}/lib" 23 | INSTALL_COMMAND "" 24 | ) 25 | ExternalProject_Add_Step( metis copy_deps 26 | COMMAND ${CMAKE_COMMAND} -E copy_directory ${METIS_INSTALL_DIR}/src/metis/Lib ${METIS_INSTALL_DIR}/include 27 | DEPENDEES install 28 | ) 29 | include_directories("${METIS_INSTALL_DIR}/include") 30 | SET(METIS_LIBRARY "${METIS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}metis${CMAKE_STATIC_LIBRARY_SUFFIX}") 31 | # cusp 32 | set(CUSP_INSTALL_DIR "${EXTERNAL_DIR}/cusp") 33 | ExternalProject_Add( cusp 34 | PREFIX "${CUSP_INSTALL_DIR}" 35 | GIT_REPOSITORY "https://github.com/cibc-internal/cusp.git" 36 | INSTALL_DIR "${CUSP_INSTALL_DIR}/include" 37 | BINARY_DIR "${CUSP_INSTALL_DIR}/lib" 38 | INSTALL_COMMAND "" 39 | BUILD_COMMAND "" 40 | CONFIGURE_COMMAND "" 41 | ) 42 | include_directories("${CUSP_INSTALL_DIR}/src") 43 | ################### 44 | # Core / Examples 45 | add_subdirectory(core) 46 | 47 | include_directories(core/include) 48 | include_directories(${CMAKE_CURRENT_DIRECTORY}) 49 | 50 | CUDA_ADD_LIBRARY(FEM_SOLVER FEMSolver.cu FEMSolver.h) 51 | TARGET_LINK_LIBRARIES(FEM_SOLVER FEM_CORE) 52 | ADD_DEPENDENCIES(FEM_SOLVER FEM_CORE) 53 | 54 | if (${BUILD_EXAMPLES}) 55 | add_subdirectory(examples) 56 | endif() 57 | ################### 58 | # Testing 59 | INCLUDE(CTest) 60 | if (BUILD_TESTING) 61 | ENABLE_TESTING() 62 | # Use gcov for code coverage (useful for seeing test completeness) 63 | OPTION(USE_GCOV "Enable code coverage for GNUCXX Platforms" OFF) 64 | if (USE_GCOV) 65 | if(CMAKE_COMPILER_IS_GNUCXX) 66 | INCLUDE(${PROJECT_SOURCE_DIR}/CodeCoverage.cmake) 67 | SET(CMAKE_CXX_FLAGS "-g -O0 -fprofile-arcs -ftest-coverage") 68 | SET(CMAKE_C_FLAGS "-g -O0 -fprofile-arcs -ftest-coverage") 69 | endif() 70 | endif() 71 | ADD_SUBDIRECTORY(test) 72 | endif() 73 | -------------------------------------------------------------------------------- /src/examples/example1.cu: -------------------------------------------------------------------------------- 1 | #include "FEMSolver.h" 2 | /** 3 | * SCI-Solver_FEM :: Example 1 4 | * This example is the basic steps for running the solver: 5 | * 1. We define our main FEMSolver object. 6 | * 2. We set all of the parameters we want. (Otherwise defaults used.) 7 | * 3. We read in our input data mesh. 8 | * 4. We declare all the variables we need for the solver (matrices). 9 | * 5. We invoke the "setupFEM" call, which does all of the work. 10 | */ 11 | 12 | int main(int argc, char** argv) 13 | { 14 | //option 15 | std::string Aname = "", bName, fname = "../src/test/test_data/CubeMesh_size256step16"; 16 | bool verbose = false; 17 | for (int i = 0; i < argc; i++) { 18 | if (strcmp(argv[i], "-v") == 0) { 19 | verbose = true; 20 | } else if (strcmp(argv[i], "-i") == 0) { 21 | if (i + 1 >= argc) break; 22 | fname = std::string(argv[i + 1]); 23 | i++; 24 | } else if (strcmp(argv[i], "-b") == 0) { 25 | if (i + 1 >= argc) break; 26 | bName = std::string(argv[i + 1]); 27 | i++; 28 | } else if (strcmp(argv[i], "-A") == 0) { 29 | if (i + 1 >= argc) break; 30 | Aname = std::string(argv[i + 1]); 31 | i++; 32 | } 33 | } 34 | //Our main configuration object. We will set aspects where the 35 | // default values are not what we desire. 36 | FEMSolver cfg(fname, true, verbose); 37 | if (!Aname.empty()) { 38 | //Import stiffness matrix (A) 39 | if (cfg.readMatlabSparseMatrix(Aname) != 0) 40 | std::cerr << "Failed to read in A matrix: " << Aname << std::endl; 41 | } 42 | //intialize the b matrix to ones for now. 43 | Vector_h_CG b_h(cfg.getMatrixRows(), 1.0); 44 | if (!bName.empty()) { 45 | //Import right-hand-side single-column array (b) 46 | if (cfg.readMatlabArray(bName, &b_h) != 0) 47 | std::cerr << "Failed to read in b array: " << bName << std::endl; 48 | } 49 | //The answer vector. 50 | Vector_h_CG x_h(cfg.getMatrixRows(), 0.0); //intial X vector 51 | //The final call to the solver 52 | cfg.solveFEM(&x_h, &b_h); 53 | //At this point, you can do what you need with the matrices. 54 | if (cfg.writeMatlabArray("output.mat", x_h)) { 55 | std::cerr << "failed to write matlab file." << std::endl; 56 | } 57 | //write the VTK 58 | std::vector vals; 59 | for (size_t i = 0; i < x_h.size(); i++){ 60 | vals.push_back(x_h[i]); 61 | } 62 | int pos = cfg.filename_.find_last_of("/"); 63 | if (pos == std::string::npos) 64 | pos = cfg.filename_.find_last_of("\\"); 65 | std::string outname = cfg.filename_.substr(pos + 1, 66 | cfg.filename_.size() - 1); 67 | cfg.writeVTK(vals, outname); 68 | return 0; 69 | } 70 | -------------------------------------------------------------------------------- /src/core/aggmis/include/AggMIS_FileIO.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: AggMIS_FileIO.h 3 | * Author: nachtluce 4 | * 5 | * Created on April 17, 2013, 4:23 PM 6 | */ 7 | 8 | #ifndef AGGMIS_FILEIO_H 9 | #define AGGMIS_FILEIO_H 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "TriMesh.h" 16 | #include "tetmesh.h" 17 | #include "AggMIS_Types.h" 18 | namespace AggMIS { 19 | namespace FileIO 20 | { 21 | class DataCollector { 22 | public: 23 | DataCollector(std::string title); 24 | void set(std::string name, double value); 25 | void set(std::string name, int value); 26 | void set(std::string name, std::string value); 27 | void set(std::string name, double value, bool keep); 28 | void set(std::string name, int value, bool keep); 29 | void set(std::string name, std::string value, bool keep); 30 | void closeRow(); 31 | void blankRow(); 32 | void writeOutCSV(std::ostream *outputStream); 33 | private: 34 | vector > data; 35 | vector keeping; 36 | std::string title; 37 | bool dirty; 38 | }; 39 | 40 | // Takes a filename and tries to load a graph from it 41 | // by automatically detecting the file type. 42 | Types::Graph_h* GetGraphFromFile_Auto(std::string filename); 43 | 44 | // Takes an input stream and reads in a graph in text csr format 45 | Types::Graph_h* GetGraphFromFile_CSR(std::istream *theInput); 46 | 47 | // Takes an input stream and reads in a graph in .MSH format 48 | Types::Graph_h* GetGraphFromFile_MSH(std::istream *theInput); 49 | 50 | // Takes a filename and loads the graph from a triangular mesh 51 | // stored in .ply format using Trimesh library 52 | Types::Graph_h* GetGraphFromFile_TriMesh(std::string filename); 53 | 54 | // Takes a filename and loads the graph from a tetrahedral mesh 55 | // stored in .node/.ele format using the tetmesh library 56 | Types::Graph_h* GetGraphFromFile_TetMesh(std::string filename); 57 | 58 | // Takes a filename and loads a vector from it 59 | Types::IntVector_h* GetVectorFromFile_BIN(std::string filename); 60 | 61 | // Writes out the graph to the specified file in CSR format 62 | void WriteGraphToFile_CSR(Types::Graph_h graph, std::string filename); 63 | 64 | // Writes out vector to the specified file 65 | void WriteVectorToFile_BIN(Types::IntVector_h toWrite, std::string filename); 66 | } 67 | } 68 | #endif /* AGGMIS_FILEIO_H */ 69 | 70 | 71 | -------------------------------------------------------------------------------- /src/core/include/amg.h: -------------------------------------------------------------------------------- 1 | #ifndef __AMG_H__ 2 | #define __AMG_H__ 3 | template class AMG; 4 | 5 | enum SolverType {AMG_SOLVER,PCG_SOLVER}; 6 | 7 | enum ConvergenceType { ABSOLUTE_CONVERGENCE, RELATIVE_CONVERGENCE }; 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "TriMesh.h" 14 | #include "tetmesh.h" 15 | 16 | /********************************************************* 17 | * AMG Class 18 | * This class provides the user interface to the AMG 19 | * solver library. 20 | ********************************************************/ 21 | template 22 | class AMG 23 | { 24 | typedef typename Matrix::value_type ValueType; 25 | friend class AMG_Level; 26 | friend class SmoothedMG_AMG_Level; 27 | 28 | public: 29 | AMG(bool verbose, int convergeType, int cycleType, 30 | int solverType, double tolerance, int cycleIters, int maxIters, 31 | int maxLevels, int topSize, double smootherWeight, 32 | int preInnerIters, int postInnerIters, int postRelaxes, 33 | int dsType, int randMisParameters, int partitionMaxSize, double proOmega, 34 | int aggregatorType, int blockSize, TriMesh* triMesh, TetMesh* tetMesh); 35 | ~AMG(); 36 | 37 | void solve(const Vector_d_CG &b, Vector_d_CG &x); 38 | void solve_iteration(const Vector_d_CG &b, Vector_d_CG &x); 39 | 40 | void setup(const Matrix_d &Acsr_d); 41 | 42 | void printGridStatistics(); 43 | 44 | // profiling & debug output 45 | void printProfile(); 46 | void printCoarsePoints(); 47 | void printConnections(); 48 | //config parameters 49 | bool verbose_; 50 | ConvergenceType convergeType_; 51 | CycleType cycleType_; 52 | SolverType solverType_; 53 | double tolerance_; 54 | int cycleIters_; 55 | int maxIters_; 56 | int maxLevels_; 57 | int topSize_; 58 | double smootherWeight_; 59 | int preInnerIters_; // the pre inner iterations for GSINNER 60 | int postInnerIters_; // the post inner iterations for GSINNER 61 | int postRelaxes_; // the number of post relax iterations 62 | int dsType_; 63 | int randMisParameters_; 64 | int partitionMaxSize_; 65 | double proOmega_; 66 | int aggregatorType_; 67 | int blockSize_; 68 | TriMesh* triMesh_; 69 | TetMesh* tetMesh_; 70 | private: 71 | bool converged(const Vector &r, ValueType &nrm); 72 | 73 | cusp::detail::lu_solver LU; 74 | 75 | AMG_Level* fine; 76 | ValueType initial_nrm; 77 | int iterations; 78 | int num_levels; 79 | int coarsestlevel; 80 | 81 | Matrix_hyb_d_CG Ahyb_d_CG; 82 | 83 | double solve_start, solve_stop; 84 | double setup_start, setup_stop; 85 | }; 86 | #endif 87 | -------------------------------------------------------------------------------- /src/FEMSolver.h: -------------------------------------------------------------------------------- 1 | #ifndef __FEMSOLVER_H__ 2 | #define __FEMSOLVER_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "TriMesh.h" 10 | #include "tetmesh.h" 11 | #include "types.h" 12 | 13 | /** The class that represents all of the available options for FEM */ 14 | class FEMSolver { 15 | private: 16 | class SparseEntry_t { 17 | public: 18 | int32_t row_; 19 | int32_t col_; 20 | float val_; 21 | SparseEntry_t(int32_t r, int32_t c, float v) : row_(r), col_(c), val_( 22 | static_cast(v)) {} 23 | ~SparseEntry_t() {} 24 | }; 25 | bool InitCUDA(); 26 | static bool compare_sparse_entry(SparseEntry_t a, SparseEntry_t b); 27 | public: 28 | FEMSolver(std::string fname = "../src/test/test_data/simple", 29 | bool isTetMesh = true, bool verbose = false); 30 | virtual ~FEMSolver(); 31 | void solveFEM(Vector_h_CG* x_h, Vector_h_CG* b_h); 32 | void getMatrixFromMesh(); 33 | int readMatlabSparseMatrix(const std::string &filename); 34 | int readMatlabArray(const std::string &filename, Vector_h_CG* rhs); 35 | int writeMatlabArray(const std::string &filename, const Vector_h_CG &array); 36 | void checkMatrixForValidContents(Matrix_ell_h* A_h); 37 | void writeVTK(std::vector values, std::string fname); 38 | size_t getMatrixRows(); 39 | //data members 40 | bool verbose_; // output verbosity 41 | std::string filename_; // mesh file name 42 | int maxLevels_; // the maximum number of levels 43 | int maxIters_; // the maximum solve iterations 44 | int preInnerIters_; // the pre inner iterations for GSINNER 45 | int postInnerIters_; // the post inner iterations for GSINNER 46 | int postRelaxes_; // the number of post relax iterations 47 | int cycleIters_; // the number of CG iterations per outer iteration 48 | int dsType_; // data structure type 49 | int topSize_; // max size of coarsest level 50 | int randMisParameters_; // max size of coarsest level 51 | int partitionMaxSize_; // max size of of the partition 52 | int aggregatorType_; // aggregator oldMis (0), metis bottom up (1), 53 | // metis top down (2), aggMisGPU (3), aggMisCPU (4), newMisLight (5) 54 | int convergeType_; // the convergence tolerance algorithm 55 | double tolerance_; // the convergence tolerance 56 | int cycleType_; // the cycle algorithm 57 | int solverType_; // the solving algorithm 58 | double smootherWeight_; // the weight parameter used in a smoother 59 | double proOmega_; // the weight parameter used in prolongator smoother 60 | int device_; // the GPU device number to specify 61 | int blockSize_; 62 | //The pointers to the meshes 63 | TetMesh * tetMesh_; 64 | TriMesh * triMesh_; 65 | //The A matrix used by the solver 66 | Matrix_ell_h A_h_; 67 | }; 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /src/core/include/Color.h: -------------------------------------------------------------------------------- 1 | #ifndef COLOR_H 2 | #define COLOR_H 3 | /* 4 | Szymon Rusinkiewicz 5 | Princeton University 6 | 7 | Color.h 8 | Random class for encapsulating colors... 9 | */ 10 | 11 | #include "Vec.h" 12 | #include 13 | #include 14 | #ifndef M_PI 15 | # define M_PI 3.14159265358979323846 16 | #endif 17 | 18 | 19 | class Color : public Vec<3,double> { 20 | public: 21 | Color() 22 | {} 23 | Color(const Vec<3,float> &v_) : Vec<3,double>(v_) 24 | {} 25 | Color(const Vec<3,double> &v_) : Vec<3,double>((double)v_[0], (double)v_[1], (double)v_[2]) 26 | {} 27 | Color(float r, float g, float b) : Vec<3,double>(r,g,b) 28 | {} 29 | Color(double r, double g, double b) : Vec<3,double>((double)r, (double)g, (double)b) 30 | {} 31 | explicit Color(const float *rgb) : Vec<3,double>(rgb[0], rgb[1], rgb[2]) 32 | {} 33 | explicit Color(const double *rgb) : Vec<3,double>((double)rgb[0], (double)rgb[1], (double)rgb[2]) 34 | {} 35 | 36 | // Implicit conversion from float would be bad, so we have an 37 | // explicit constructor and an assignment statement. 38 | explicit Color(float c) : Vec<3,double>(c,c,c) 39 | {} 40 | explicit Color(double c) : Vec<3,double>((double)c, (double)c, (double)c) 41 | {} 42 | Color &operator = (float c) 43 | { return *this = Color(c); } 44 | Color &operator = (double c) 45 | { return *this = Color(c); } 46 | 47 | // Assigning from ints divides by 255 48 | Color(int r, int g, int b) 49 | { 50 | const float mult = 1.0f / 255.0f; 51 | *this = Color(mult*r, mult*g, mult*b); 52 | } 53 | explicit Color(const int *rgb) 54 | { *this = Color(rgb[0], rgb[1], rgb[2]); } 55 | explicit Color(const unsigned char *rgb) 56 | { *this = Color(rgb[0], rgb[1], rgb[2]); } 57 | explicit Color(int c) 58 | { *this = Color(c,c,c); } 59 | Color &operator = (int c) 60 | { return *this = Color(c); } 61 | 62 | static Color black() 63 | { return Color(0.0f, 0.0f, 0.0f); } 64 | static Color white() 65 | { return Color(1.0f, 1.0f, 1.0f); } 66 | static Color red() 67 | { return Color(1.0f, 0.0f, 0.0f); } 68 | static Color green() 69 | { return Color(0.0f, 1.0f, 0.0f); } 70 | static Color blue() 71 | { return Color(0.0f, 0.0f, 1.0f); } 72 | static Color yellow() 73 | { return Color(1.0f, 1.0f, 0.0f); } 74 | static Color cyan() 75 | { return Color(0.0f, 1.0f, 1.0f); } 76 | static Color magenta() 77 | { return Color(1.0f, 0.0f, 1.0f); } 78 | static Color hsv(float h, float s, float v) 79 | { 80 | // From FvD 81 | if (s <= 0.0f) 82 | return Color(v,v,v); 83 | h = fmod(h, float(2.0f * M_PI)); 84 | if (h < 0.0) 85 | h += (float)(2.0 * M_PI); 86 | h /= (float)(M_PI / 3.0); 87 | int i = int(std::floor(h)); 88 | float f = h - i; 89 | float p = v * (1.0f - s); 90 | float q = v * (1.0f - (s*f)); 91 | float t = v * (1.0f - (s*(1.0f-f))); 92 | switch(i) { 93 | case 0: return Color(v, t, p); 94 | case 1: return Color(q, v, p); 95 | case 2: return Color(p, v, t); 96 | case 3: return Color(p, q, v); 97 | case 4: return Color(t, p, v); 98 | default: return Color(v, p, q); 99 | } 100 | } 101 | }; 102 | 103 | #endif 104 | -------------------------------------------------------------------------------- /src/core/include/my_timer.h: -------------------------------------------------------------------------------- 1 | #ifndef __MY_TIMER_H__ 2 | #define __MY_TIMER_H__ 3 | #ifdef __MACH__ 4 | #include 5 | #define CLOCK_REALTIME 0 6 | #define CLOCK_MONOTONIC 0 7 | int inline clock_gettime(int clk_id, struct timespec *t){ 8 | mach_timebase_info_data_t timebase; 9 | mach_timebase_info(&timebase); 10 | uint64_t time; 11 | time = mach_absolute_time(); 12 | double nseconds = ((double)time * (double)timebase.numer)/((double)timebase.denom); 13 | double seconds = ((double)time * (double)timebase.numer)/((double)timebase.denom * 1e9); 14 | t->tv_sec = seconds; 15 | t->tv_nsec = nseconds; 16 | return seconds+nseconds*1e-9;; 17 | } 18 | #else 19 | #include 20 | #endif 21 | 22 | 23 | /********************************************** 24 | * A simple high resolution timer 25 | *********************************************/ 26 | double inline CLOCK() { 27 | #ifdef WIN32 28 | #include 29 | #include 30 | SYSTEMTIME st; 31 | GetSystemTime(&st); 32 | return ((st.wDay * 24. + st.wHour) * 60. + st.wMinute) * 60. + st.wSecond + st.wMilliseconds / 1000.; 33 | #else 34 | timespec ts; 35 | clock_gettime(CLOCK_REALTIME,&ts); 36 | return ts.tv_sec+ts.tv_nsec*1e-9; 37 | #endif 38 | } 39 | 40 | 41 | /********************************************** 42 | * class for holding profiling data if desired 43 | *********************************************/ 44 | 45 | #include 46 | #include 47 | #include 48 | #include 49 | 50 | typedef std::map Event; 51 | typedef std::map::iterator Eiter; 52 | 53 | class levelProfile { 54 | private: 55 | #ifdef PROFILE 56 | Event Times; 57 | Event Tic; 58 | #endif 59 | 60 | public: 61 | levelProfile() { } 62 | ~levelProfile() {} 63 | 64 | inline void tic(const char *event) 65 | { 66 | #ifdef PROFILE 67 | // cudaThreadSynchronize(); 68 | Tic[event] = CLOCK(); 69 | #endif 70 | } 71 | 72 | inline void toc(const char *event) { 73 | #ifdef PROFILE 74 | // cudaThreadSynchronize(); 75 | double t = CLOCK(); 76 | Times[event] += t-Tic[event]; 77 | #endif 78 | } 79 | 80 | #ifdef PROFILE 81 | std::vector 82 | #else 83 | void 84 | #endif 85 | inline getHeaders() 86 | { 87 | #ifdef PROFILE 88 | std::vector headerVec; 89 | for (Eiter it=Times.begin(); it!=Times.end(); ++it) { 90 | headerVec.push_back(it->first); 91 | } 92 | return headerVec; 93 | #endif 94 | } 95 | 96 | #ifdef PROFILE 97 | std::vector 98 | #else 99 | void 100 | #endif 101 | inline getTimes() 102 | { 103 | #ifdef PROFILE 104 | std::vector times; 105 | for (Eiter it=Times.begin(); it!=Times.end(); ++it) { 106 | times.push_back(it->second); 107 | } 108 | return times; 109 | #endif 110 | } 111 | 112 | /******************************************** 113 | * Reset all events 114 | *******************************************/ 115 | inline void resetTimer() { 116 | #ifdef PROFILE 117 | for (Eiter it=Times.begin(); it!=Times.end(); ++it) { 118 | it->second = 0.0; 119 | } 120 | #endif 121 | } 122 | }; 123 | #endif 124 | -------------------------------------------------------------------------------- /src/core/include/cutil.h: -------------------------------------------------------------------------------- 1 | #ifndef __CUTIL_H__ 2 | #define __CUTIL_H__ 3 | 4 | #include 5 | #include 6 | 7 | 8 | 9 | /********************************************************** 10 | * Checks for a cuda error and if one exists prints it, 11 | * the stack trace, and exits 12 | *********************************************************/ 13 | #define cudaCheckError() { \ 14 | cudaError_t e=cudaGetLastError(); \ 15 | char error_str[100]; \ 16 | if(e!=cudaSuccess) { \ 17 | sprintf(error_str,"Cuda failure: '%s'",cudaGetErrorString(e)); \ 18 | FatalError(error_str); \ 19 | } \ 20 | } 21 | 22 | #define cudaSafeCall(x) {(x); cudaCheckError()} 23 | 24 | template 25 | void computeResidual(const Matrix& A, const Vector& x, const Vector& b, Vector& r); 26 | 27 | template 28 | __global__ void find_diag_kernel(const IndexType num_rows, const IndexType num_cols, const IndexType num_cols_per_row, const IndexType pitch, 29 | const IndexType * Aj, 30 | const ValueType* Ax, 31 | ValueType* diag) 32 | { 33 | const IndexType thread_id = blockDim.x * blockIdx.x + threadIdx.x; 34 | const IndexType grid_size = gridDim.x * blockDim.x; 35 | 36 | for (IndexType row = thread_id; row < num_rows; row += grid_size) 37 | { 38 | IndexType offset = row; 39 | 40 | for (IndexType n = 0; n < num_cols_per_row; n++) 41 | { 42 | const IndexType col = Aj[offset]; 43 | 44 | if (col == row) 45 | { 46 | const ValueType A_ij = Ax[offset]; 47 | diag[row] = A_ij; 48 | } 49 | 50 | offset += pitch; 51 | } 52 | } 53 | } 54 | 55 | /************************************************** 56 | * structs for converting between signed and unsigned values without 57 | * type casting. 58 | * ************************************************/ 59 | 60 | /***************************** 61 | * Generic converter for unsigned types. 62 | * This becomes a no op 63 | *****************************/ 64 | template 65 | struct intuint 66 | { 67 | 68 | union 69 | { 70 | GlobalOrdinal ival; 71 | GlobalOrdinal uval; 72 | }; 73 | }; 74 | 75 | /*************************** 76 | * char converter 77 | **************************/ 78 | template <> 79 | struct intuint 80 | { 81 | 82 | union 83 | { 84 | char ival; 85 | unsigned char uval; 86 | }; 87 | }; 88 | 89 | /*************************** 90 | * Short converter 91 | **************************/ 92 | template <> 93 | struct intuint 94 | { 95 | 96 | union 97 | { 98 | short ival; 99 | unsigned short uval; 100 | }; 101 | }; 102 | 103 | /*************************** 104 | * Integer converter 105 | **************************/ 106 | template <> 107 | struct intuint 108 | { 109 | 110 | union 111 | { 112 | int ival; 113 | unsigned int uval; 114 | }; 115 | }; 116 | 117 | /*************************** 118 | * long converter 119 | **************************/ 120 | template <> 121 | struct intuint 122 | { 123 | 124 | union 125 | { 126 | long ival; 127 | unsigned long uval; 128 | }; 129 | }; 130 | 131 | struct metisinput 132 | { 133 | int nn; 134 | int* xadj; 135 | int* adjncy; 136 | }; 137 | 138 | struct cudaCSRGraph 139 | { 140 | int nn; 141 | int* xadj; 142 | int* adjncy; 143 | }; 144 | 145 | template 146 | void trimesh2ell(TriMesh* meshPtr, Matrix &A); 147 | 148 | template 149 | void trimesh2csr(TriMesh* meshPtr, Matrix &A); 150 | 151 | template 152 | void tetmesh2ell(TetMesh* meshPtr, Matrix &A, bool verbose); 153 | 154 | void convertSym2gen(Matrix_d_CG &Acsr, Matrix_coo_d_CG &Aout); 155 | #endif 156 | -------------------------------------------------------------------------------- /src/core/include/amg_level.h: -------------------------------------------------------------------------------- 1 | #ifndef __AMG_LEVEL_H__ 2 | #define __AMG_LEVEL_H__ 3 | template class AMG_Level; 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "TriMesh.h" 12 | #include "tetmesh.h" 13 | #include 14 | 15 | /******************************************************** 16 | * AMG Level class: 17 | * This class is a base class for AMG levels. This 18 | * class is a linked list of levels where each 19 | * level contains the solution state for that level. 20 | ********************************************************/ 21 | template 22 | class AMG_Level 23 | { 24 | friend class AMG; 25 | public: 26 | 27 | AMG_Level(AMG *amg) : smoother(0), amg(amg), next(0), init(false) 28 | {}; 29 | virtual ~AMG_Level(); 30 | 31 | virtual void restrictResidual(const Vector &r, Vector &rr) = 0; 32 | virtual void prolongateAndApplyCorrection(const Vector &c, Vector &x, Vector &tmp) = 0; 33 | virtual void createNextLevel(bool verbose = false) = 0; 34 | 35 | void setup(); 36 | void cycle(CycleType cycle, Vector_d &b, Vector_d &x, bool verbose = false); 37 | void cycle_level0(CycleType cycle, Vector_d_CG &b, Vector_d_CG &x, bool verbose = false); 38 | 39 | void setInitCycle() 40 | { 41 | init = true; 42 | } 43 | 44 | void unsetInitCycle() 45 | { 46 | init = false; 47 | } 48 | 49 | int getLevel() 50 | { 51 | return level_id; 52 | } 53 | 54 | bool isInitCycle() 55 | { 56 | return init; 57 | } 58 | 59 | inline Matrix_d& getA_d() 60 | { 61 | return A_d; 62 | } 63 | 64 | inline bool isFinest() 65 | { 66 | return level_id == 0; 67 | } 68 | 69 | inline bool isCoarsest() 70 | { 71 | return next == NULL; 72 | } 73 | 74 | static AMG_Level* allocate(AMG*amg); 75 | 76 | protected: 77 | typedef typename Matrix::index_type IndexType; 78 | typedef typename Matrix::value_type ValueType; 79 | typedef typename Matrix::memory_space MemorySpace; 80 | levelProfile Profile; 81 | std::vector originalRow; 82 | std::vector getOriginalRows(); 83 | 84 | protected: 85 | TriMesh* m_meshPtr; 86 | TetMesh* m_tetmeshPtr; 87 | int nn; 88 | IdxVector_h m_xadj; 89 | IdxVector_h m_adjncy; 90 | 91 | IdxVector_d m_xadj_d; 92 | IdxVector_d m_adjncy_d; 93 | 94 | int nnout; 95 | // int* m_xadjout; 96 | // int* m_adjncyout; 97 | IdxVector_h m_xadjout; 98 | IdxVector_h m_adjncyout; 99 | IdxVector_d m_xadjout_d; 100 | IdxVector_d m_adjncyout_d; 101 | 102 | int largestblock; 103 | int largestblocksize; 104 | // Matrix A; 105 | Vector prolongator; //incomplete prolongator 106 | Matrix_coo_h prolongatorFull; 107 | Matrix_ell_h AinEll; 108 | Matrix_h AinCsr; 109 | // Matrix_coo_h AinSysCoo; 110 | Matrix_coo_h Aout; 111 | // Matrix_coo_h AoutSys; 112 | IdxVector_h partSyncIdx_h; 113 | IdxVector_h segSyncIdx_h; 114 | 115 | 116 | Vector_d prolongator_d; //incomplete prolongator 117 | Matrix_hyb_d prolongatorFull_d; 118 | Matrix_hyb_d restrictorFull_d; 119 | Matrix_d A_d; 120 | Matrix_ell_d AinEll_d; 121 | Matrix_d AinCSR_d; 122 | Matrix_coo_d Aout_d; 123 | IdxVector_d AinBlockIdx_d; 124 | IdxVector_d AoutBlockIdx_d; 125 | Matrix_coo_d AinSysCoo_d; 126 | Matrix_coo_d AoutSys_d; 127 | Vector_d bc_d, xc_d, r_d; 128 | IdxVector_d aggregateIdx_d; 129 | IdxVector_d partitionIdx_d; 130 | IdxVector_d permutation_d; 131 | IdxVector_d ipermutation_d; 132 | IdxVector_d partSyncIdx_d; 133 | IdxVector_d segSyncIdx_d; 134 | Smoother* smoother; 135 | 136 | 137 | AMG* amg; 138 | AMG_Level* next; 139 | int largest_num_entries; 140 | int largest_num_per_row; 141 | int largest_num_segment; 142 | int level_id; 143 | bool init; //marks if the x vector needs to be initialized 144 | }; 145 | #endif 146 | -------------------------------------------------------------------------------- /src/core/include/tetmesh.h: -------------------------------------------------------------------------------- 1 | #ifndef TETMESH_H 2 | #define TETMESH_H 3 | /* 4 | TetMesh: Class for tetrahedral meshes based on TriMesh by 5 | Szymon Rusinkiewicz 6 | Princeton University 7 | 8 | TriMesh.h 9 | Class for triangle meshes. 10 | */ 11 | 12 | #define LARGENUM 10000000.0 13 | #define SMALLNUM 0.00000001 14 | #define ONE 1 15 | #define CURVATURE 2 16 | #define NOISE 3 17 | #define SPEEDTYPE ONE 18 | #ifndef M_PI 19 | #define M_PI 3.14159265358979323846 20 | #endif 21 | 22 | #include "Vec.h" 23 | #include 24 | #include 25 | #include 26 | 27 | class TetMesh 28 | { 29 | 30 | public: 31 | // Types 32 | 33 | struct Tet 34 | { 35 | int v[4]; 36 | 37 | Tet() 38 | { 39 | } 40 | 41 | Tet(const int &v0, const int &v1, const int &v2, const int &v3) 42 | { 43 | v[0] = v0; 44 | v[1] = v1; 45 | v[2] = v2; 46 | v[3] = v3; 47 | } 48 | 49 | Tet(const int *v_) 50 | { 51 | v[0] = v_[0]; 52 | v[1] = v_[1]; 53 | v[2] = v_[2]; 54 | v[3] = v_[3]; 55 | } 56 | 57 | int &operator[] (int i) 58 | { 59 | return v[i]; 60 | } 61 | 62 | const int &operator[] (int i)const 63 | { 64 | return v[i]; 65 | } 66 | 67 | operator const int * () const 68 | { 69 | return &(v[0]); 70 | } 71 | 72 | operator const int * () 73 | { 74 | return &(v[0]); 75 | } 76 | 77 | operator int * () 78 | { 79 | return &(v[0]); 80 | } 81 | 82 | int indexof(int v_) const 83 | { 84 | return (v[0] == v_) ? 0 : 85 | (v[1] == v_) ? 1 : 86 | (v[2] == v_) ? 2 : 87 | (v[3] == v_) ? 3 : -1; 88 | } 89 | }; 90 | 91 | // The basics: vertices and faces 92 | std::vector vertices; 93 | std::vector tets; 94 | std::vector matlabels; 95 | // Connectivity structures: 96 | // For each vertex, all neighboring vertices 97 | std::vector< std::vector > neighbors; 98 | // For each vertex, all neighboring faces 99 | std::vector< std::vector > adjacenttets; 100 | std::vector across_face; 101 | 102 | std::vector radiusInscribe; 103 | 104 | void need_meshquality(); 105 | 106 | void need_neighbors(); 107 | void need_adjacenttets(); 108 | void need_across_face(); 109 | void need_meshinfo(); 110 | void need_Rinscribe(); 111 | void rescale(int size); 112 | 113 | //Tet mesh constructor 114 | // nodefilename: file containing the XYZ position of each node or point. 115 | // This must have the extension .node, and have the following 116 | // characteristics: ASCII text with one node per line. Values are space- 117 | // delimited. First line is a header line with 4 values: 'n 3 0 0' 118 | // where n is the total number of nodes. Subsequent lines have the 119 | // format 'i x y z' where i is the node number (starts at 1), 120 | // and xyz are floats representing the node position in 3D space. 121 | // elefilename: file containing the 4 nodes that define each tetrahedron. 122 | // This must have the extension .ele, and have the following 123 | // characteristics: ASCII text with one element per line. Values are 124 | // space delimited. First line is a header line with 3 values: 't 4 0' 125 | // where t is the total number of elements. Subsequent lines have the 126 | // format 't a b c d' where t is the element number (starts at 1), 127 | // and abcd are integers representing the node numbers from that file. 128 | // zero_based: set to true if the element numbers in the file are zero- 129 | // based (defaults to false). 130 | // verbose: set to true for verbose output 131 | static TetMesh *read(const char *nodefilename, const char* elefilename, const bool verbose = false); 132 | //void write(const char *filename); 133 | 134 | // Debugging printout, controllable by a "verbose"ness parameter 135 | static int verbose; 136 | static void set_verbose(int); 137 | static int dprintf(const char *format, ...); 138 | 139 | //Constructor 140 | 141 | TetMesh() 142 | { 143 | } 144 | }; 145 | 146 | #endif 147 | -------------------------------------------------------------------------------- /src/core/include/smoothedMG/aggregators/mis.h: -------------------------------------------------------------------------------- 1 | #ifndef __MIS_H__ 2 | #define __MIS_H__ 3 | 4 | #include 5 | #include "types.h" 6 | 7 | extern "C" 8 | { 9 | #include "metis.h" 10 | } 11 | 12 | #include 13 | #include 14 | template class MIS_Aggregator; 15 | template class RandMIS_Aggregator; 16 | 17 | #include 18 | 19 | template 20 | class MIS_Aggregator : public Aggregator 21 | { 22 | typedef typename Matrix::value_type ValueType; 23 | public: 24 | void computePermutation(TriMesh* meshPtr, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitionlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize); 25 | void computePermutation(TetMesh* meshPtr, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitonlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize); 26 | void computePermutation(int nn, int* xadj, int* adjncy, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitionlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize); 27 | void computePermutation_d(IdxVector_d &adjIndexesIn, IdxVector_d &adjacencyIn, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false); 28 | void computePermutation_d(TriMesh* meshPtr, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false); 29 | void computePermutation_d(TetMesh* meshPtr, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false); 30 | private: 31 | void aggregateGraphMIS(int n, int *adjIndexes, int *adjacency, int *partition, int *partCount); 32 | }; 33 | 34 | template 35 | class RandMIS_Aggregator : public Aggregator 36 | { 37 | typedef typename Matrix::value_type ValueType; 38 | public: 39 | void computePermutation(TriMesh* meshPtr, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitionlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize); 40 | void computePermutation(TetMesh* meshPtr, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitonlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize); 41 | void computePermutation(int nn, int* xadj, int* adjncy, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitionlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize); 42 | void computePermutation_d(IdxVector_d &adjIndexesIn, IdxVector_d &adjacencyIn, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false); 43 | void computePermutation_d(TriMesh* meshPtr, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false); 44 | void computePermutation_d(TetMesh* meshPtr, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false); 45 | private: 46 | void extendedMIS(int n, int depth, int *adjIndexes, int *adjacency, int *partition, int *partCount, bool verbose = false); 47 | }; 48 | #endif 49 | -------------------------------------------------------------------------------- /src/core/aggmis/include/AggMIS_Aggregation_GPU.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: AggMIS_Aggregation_GPU.h 3 | * Author: T. James Lewis 4 | * 5 | * Created on April 19, 2013, 11:30 AM 6 | */ 7 | 8 | #ifndef AGGMIS_AGGREGATION_GPU_H 9 | #define AGGMIS_AGGREGATION_GPU_H 10 | #include "AggMIS_Types.h" 11 | #include "AggMIS_GraphHelpers.h" 12 | #include "thrust/scan.h" 13 | #include "thrust/count.h" 14 | namespace AggMIS { 15 | namespace Aggregation { 16 | namespace Kernels { 17 | /** 18 | * In this kernel each node checks if it has been allocated 19 | * to an aggregate, if it has not, it checks to see if any 20 | * of its neighbors have been. If they have, it finds the 21 | * aggregate to which more neighbors belong to and assigns 22 | * itself to it. 23 | * @param size The number of nodes in the graph 24 | * @param adjIndexes The adjacency indices of the graph 25 | * @param adjacency The adjacency list of the graph 26 | * @param partIn The aggregation labeling from last cycle 27 | * @param partOut The aggregation labeling being found 28 | * @param aggregated Flags whether node has been aggregated 29 | */ 30 | __global__ void allocateNodesKernel(int size, 31 | int *adjIndexes, 32 | int *adjacency, 33 | int *partIn, 34 | int *partOut, 35 | int *aggregated); 36 | /** 37 | * This kernel does the same flood filling that the allocate 38 | * kernel but only propogates between nodes of the same 39 | * aggregate. Used to verify that an aggregation consists of 40 | * aggregates which are connected components. 41 | * @param size Number of nodes in graph 42 | * @param adjIndices Graph adjacency indices 43 | * @param adjacency Graph adjacency 44 | * @param aggregation Current aggregation 45 | * @param valuesIn The values from last cycle 46 | * @param valuesOut The values to write this cycle 47 | * @param incomplete Flag which indicates whether done 48 | */ 49 | __global__ void checkAggregationFillAggregates(int size, 50 | int *adjIndices, 51 | int *adjacency, 52 | int* aggregation, 53 | int* valuesIn, 54 | int* valuesOut, 55 | int* incomplete); 56 | } 57 | namespace Functors { 58 | /** 59 | * This functor is used to sequentially number elements 60 | * in a vector. Argument a is the element in the labeling 61 | * vector. Argument b is the element in the pre-fixed sum 62 | * vector of the labels. If the element is not labeled it 63 | * returns -1. Otherwise it returns the new label id 64 | */ 65 | struct NumberParts { 66 | __host__ __device__ 67 | int operator()(const int &a, const int &b) const 68 | { 69 | if (b == 0) 70 | return -1; 71 | 72 | return a - 1; 73 | } 74 | }; 75 | } 76 | /** 77 | * This method allocates each node of the graph to the nearest 78 | * root node using simple path distance and breaking ties by 79 | * adjacency. 80 | * @param graph The input graph to be aggregated 81 | * @param roots The set of initial root points as a vector the size 82 | * of the number of graph nodes, with entries being either 0=non root 83 | * or 1=root 84 | * @return A vector with an entry for each graph node indicating which 85 | * zero indexed aggregate it was allocated to. 86 | */ 87 | Types::IntVector_d* AggregateToNearest(Types::Graph_d &graph, 88 | Types::IntVector_d &roots); 89 | /** 90 | * Checks if the given labeling constitutes a valid aggregation 91 | * of the graph. Checks that the aggregate ID's form an 92 | * uninterrupted sequence starting from zero, and that each 93 | * aggregate is a connected component. 94 | * @param graph The graph 95 | * @param aggregation The node labeling to verify 96 | * @param verbose Prints more info if true 97 | * @return True if valid aggregation false otherwise 98 | */ 99 | bool IsValidAggregation(Types::Graph_d &graph, 100 | Types::IntVector_d &aggregation, 101 | bool verbose); 102 | Types::Graph_d* GetAggregateMap(Types::IntVector_d& aggregation); 103 | // double GetEdgeCutRatio(Graph_d &graph, IntVector_d& aggregation); 104 | } 105 | } 106 | 107 | #endif /* AGGMIS_AGGREGATION_GPU_H */ 108 | 109 | -------------------------------------------------------------------------------- /src/core/aggmis/include/AggMIS_Aggregation_CPU.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: AggMIS_Aggregation_CPU.h 3 | * Author: T. James Lewis 4 | * 5 | * Created on July 3, 2013, 4:21 PM 6 | */ 7 | 8 | #ifndef AGGMIS_AGGREGATION_CPU_H 9 | #define AGGMIS_AGGREGATION_CPU_H 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace AggMIS { 17 | namespace Aggregation { 18 | /** 19 | * This method returns an aggregation where each node in the graph is 20 | * assigned to the nearest root point. 21 | * @param graph The graph being aggregated 22 | * @param roots The root points for the aggregation 23 | * @return An array where return[i] is the ID of the aggregate to which 24 | * node i is assigned. 25 | */ 26 | Types::IntVector_h* AggregateToNearest(Types::Graph_h &graph, 27 | Types::IntVector_h &roots); 28 | /** 29 | * This method checks if the given aggregation is valid. 30 | * @param graph The graph that was aggregated. 31 | * @param aggregation The aggregation array to check 32 | * @param verbose If true, more output will be printed. 33 | * @return True if aggregation is valid, False otherwise. 34 | */ 35 | bool IsValidAggregation(Types::Graph_h &graph, 36 | Types::IntVector_h &aggregation, 37 | bool verbose); 38 | /** 39 | * This method returns an array where each element is the count of 40 | * nodes in the corresponding aggregate. 41 | * @param aggregation The aggregation array. 42 | * @return An array giving the size of each aggregate. 43 | */ 44 | Types::IntVector_h* GetPartSizes(Types::IntVector_h &aggregation); 45 | /** 46 | * This method returns an array where each element is the sum of the 47 | * weights of all nodes in the corresponding aggregate. 48 | * @param aggregation The aggregation array. 49 | * @param nodeWeights The weights of each node. 50 | * @return An array giving the total weight of each aggregate. 51 | */ 52 | Types::IntVector_h* GetPartSizes(Types::IntVector_h &aggregation, 53 | Types::IntVector_h &nodeWeights); 54 | /** 55 | * This method returns a sub-graph of the input graph with only nodes 56 | * contained in nodeList, and only edges between nodes both contained 57 | * in nodeList. 58 | * @param graph The graph to get sub-graph of. 59 | * @param nodeList The list of nodes defining the subgraph 60 | * @return A graph where return[i][j] is the ID of the j'th neighbor 61 | * of the i'th node of the graph. 62 | */ 63 | std::vector >* GetAggregateGraph(Types::Graph_h &graph, 64 | std::vector &nodeList); 65 | /** 66 | * Returns a node in the given graph with maximal path distance from the 67 | * specified start node. 68 | * @param graph The graph. 69 | * @param start The start node. 70 | * @return The ID of a node such that no other node has a higher 71 | * distance to the start node. 72 | */ 73 | int FindFarthestNode(std::vector > &graph, 74 | int start); 75 | /** 76 | * Marks the distance of all nodes in the given graph from the start 77 | * point. After completion distances[i] will contain the distance from 78 | * node i to the startPoint. 79 | * @param graph The input graph. 80 | * @param distances The array to mark distances in. 81 | * @param startPoint The starting point. 82 | */ 83 | void MarkDistances(std::vector > &graph, 84 | std::vector &distances, 85 | int startPoint); 86 | /** 87 | * Marks the distance of all nodes in the given graph from the set of 88 | * nodes specified in startPoints. After completion distances[i] will 89 | * contain the distance from node i to the nearest node in startPoints. 90 | * @param graph 91 | * @param distances 92 | * @param startPoints 93 | */ 94 | void MarkDistances(std::vector > &graph, 95 | std::vector &distances, 96 | std::vector startPoints); 97 | /** 98 | * Returns the set of nodes in the given graph for which the sum of all 99 | * distances from them to all other nodes is minimal. 100 | * @param graph The input graph. 101 | * @return The set of nodes in the given graph for which the sum of all 102 | * distances from them to all other nodes is minimal. 103 | */ 104 | int FindMassScore(std::vector > &graph, 105 | int startPoint); 106 | std::vector* GetCentroid(std::vector > &graph, 107 | int startPoint); 108 | } 109 | } 110 | 111 | #endif /* AGGMIS_AGGREGATION_CPU_H */ 112 | 113 | -------------------------------------------------------------------------------- /src/core/aggmis/include/AggMIS_Types.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: AggMIS_Types.h 3 | * Author: T. James Lewis 4 | * 5 | * Created on April 15, 2013, 2:18 PM 6 | */ 7 | 8 | #ifndef AGGMIS_TYPES_H 9 | #define AGGMIS_TYPES_H 10 | #include "thrust/device_vector.h" 11 | #include "thrust/host_vector.h" 12 | #include "thrust/sort.h" 13 | #include "thrust/functional.h" 14 | #include "thrust/unique.h" 15 | #include "my_timer.h" 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | namespace AggMIS { 22 | bool CheckCudaError(cudaError_t code, const char *file, int line); 23 | namespace Types { 24 | typedef thrust::device_vector IntVector_d; 25 | typedef thrust::device_vector UIntVector_d; 26 | typedef thrust::host_vector IntVector_h; 27 | typedef thrust::host_vector IntVector_h_ptr; 28 | typedef thrust::host_vector IntVector_d_ptr; 29 | 30 | struct DGraph { 31 | __host__ __device__ DGraph(int _size, 32 | int *_ind, 33 | int *_adj) 34 | :size(_size), 35 | ind(_ind), 36 | adj(_adj){} 37 | __device__ int getNeighbor(int node, int neighbor) { 38 | return adj[ind[node] + neighbor]; 39 | } 40 | int size; 41 | int *ind; 42 | int *adj; 43 | }; 44 | 45 | class JTimer { 46 | public: 47 | JTimer(); 48 | ~JTimer(); 49 | void start(); 50 | void stop(); 51 | double getElapsedTimeInSec(bool host); 52 | double getElapsedTimeInMilliSec(bool host); 53 | private: 54 | 55 | double startTimeHost, endTimeHost; 56 | cudaEvent_t startTimeCuda, endTimeCuda; 57 | bool started, stopped; 58 | float elapsedCudaTime; 59 | }; 60 | 61 | // Forward declarations of classes so the conversion constructors 62 | // will compile. 63 | class Graph_d; 64 | class Graph_h; 65 | 66 | class Graph_d { 67 | public: 68 | Graph_d(IntVector_d &indices, IntVector_d &adjacency); 69 | Graph_d(IntVector_h &indices, IntVector_h &adjacency); 70 | Graph_d(IntVector_d *indices, IntVector_d *adjacency); 71 | Graph_d(Graph_h &graph); 72 | Graph_d(); 73 | ~Graph_d(); 74 | int Size(); 75 | int* indStart(); 76 | int* adjStart(); 77 | DGraph GetD(); 78 | IntVector_d *indices; 79 | IntVector_d *adjacency; 80 | private: 81 | bool willClean; 82 | }; 83 | class Graph_h { 84 | public: 85 | Graph_h(IntVector_d &indices, IntVector_d &adjacency); 86 | Graph_h(IntVector_h &indices, IntVector_h &adjacency); 87 | Graph_h(IntVector_h *indices, IntVector_h *adjacency); 88 | Graph_h(Graph_d &graph); 89 | Graph_h(); 90 | ~Graph_h(); 91 | int Size(); 92 | int* nStart(int node); 93 | int* nEnd(int node); 94 | IntVector_h *indices; 95 | IntVector_h *adjacency; 96 | private: 97 | bool willClean; 98 | }; 99 | 100 | int* StartOf(IntVector_d &target); 101 | int* StartOf(IntVector_d *target); 102 | 103 | namespace Compare { 104 | bool AreEqual(IntVector_h& a, 105 | IntVector_h& b, 106 | bool verbose); 107 | bool AreEqual(IntVector_d& a, 108 | IntVector_d& b, 109 | bool verbose); 110 | bool AreEqual(IntVector_h& a, 111 | IntVector_d& b, 112 | bool verbose); 113 | bool AreEqual(IntVector_d& a, 114 | IntVector_h& b, 115 | bool verbose); 116 | bool AreEqual(std::vector > &a, 117 | std::vector > &b, 118 | bool verbose); 119 | bool AreEqual(Graph_h& a, 120 | Graph_h& b, 121 | bool verbose); 122 | bool AreEqual(Graph_d& a, 123 | Graph_d& b, 124 | bool verbose); 125 | bool AreEqual(Graph_h& a, 126 | Graph_d& b, 127 | bool verbose); 128 | bool AreEqual(Graph_d& a, 129 | Graph_h& b, 130 | bool verbose); 131 | } 132 | namespace Display { 133 | void Print(IntVector_h& toPrint, 134 | int start, 135 | int end, 136 | std::string message); 137 | void Print(IntVector_d& toPrint, 138 | int start, 139 | int end, 140 | std::string message); 141 | void Print(IntVector_d& toPrint, 142 | std::string message); 143 | void Print(IntVector_h& toPrint, 144 | std::string message); 145 | void Print(std::vector > >& toPrint, std::string message); 146 | void Print(std::vector >& toPrint, 147 | std::string message); 148 | void Print(std::vector &toPrint, 149 | int start, 150 | int end, 151 | std::string message); 152 | void Print(std::vector &toPrint, 153 | std::string message); 154 | } 155 | } 156 | } 157 | #endif /* AGGMIS_TYPES_H */ 158 | -------------------------------------------------------------------------------- /src/core/cuda/amg_level.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | template 6 | AMG_Level::~AMG_Level() 7 | { 8 | if (smoother != 0) delete smoother; 9 | if (next != 0) delete next; 10 | } 11 | 12 | #include 13 | template 14 | AMG_Level* AMG_Level::allocate(AMG*amg) 15 | { 16 | return new SmoothedMG_AMG_Level(amg); 17 | } 18 | 19 | /****************************************************** 20 | * Recusively solves the system on this level 21 | ******************************************************/ 22 | template 23 | void AMG_Level::cycle(CycleType cycle, Vector_d& b_d, Vector_d& x_d, bool verbose) 24 | { 25 | if (isCoarsest()) //solve directly 26 | { 27 | cusp::array1d temp_b(b_d); 28 | cusp::array1d temp_x(x_d.size()); 29 | amg->LU(temp_b, temp_x); 30 | x_d = temp_x; 31 | return; 32 | } else 33 | { 34 | switch (amg->dsType_) 35 | { 36 | case 0: 37 | smoother->preRRRFullSymmetric(AinSysCoo_d, AoutSys_d, AinBlockIdx_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, 38 | level_id, largestblocksize, largest_num_entries, verbose); 39 | break; 40 | case 1: 41 | smoother->preRRRFull(AinEll_d, Aout_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, level_id, largestblocksize); 42 | break; 43 | case 2: 44 | smoother->preRRRFullCsr(AinCSR_d, Aout_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, level_id, largestblocksize, largest_num_entries, largest_num_per_row); 45 | break; 46 | default: 47 | std::cout << "Wrong DStype 1!" << std::endl; 48 | exit(0); 49 | 50 | } 51 | next->cycle(V_CYCLE, bc_d, xc_d, verbose); 52 | switch (amg->dsType_) 53 | { 54 | case 0: 55 | smoother->postPCRFullSymmetric(AinSysCoo_d, AinBlockIdx_d, AoutSys_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, 56 | level_id, largestblocksize, largest_num_entries); 57 | break; 58 | case 1: 59 | smoother->postPCRFull(AinEll_d, Aout_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, level_id, largestblocksize); 60 | break; 61 | case 2: 62 | smoother->postPCRFullCsr(AinCSR_d, Aout_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, level_id, largestblocksize, largest_num_entries, largest_num_per_row); 63 | break; 64 | default: 65 | std::cout << "Wrong DStype 0!" << std::endl; 66 | exit(0); 67 | 68 | } 69 | 70 | } 71 | } 72 | 73 | 74 | template 75 | void AMG_Level::cycle_level0(CycleType cycle, Vector_d_CG &b_d_CG, Vector_d_CG &x_d_CG, bool verbose) 76 | { 77 | if (isCoarsest()) //solve directly 78 | { 79 | cusp::array1d temp_b = b_d_CG; 80 | cusp::array1d temp_x(x_d_CG.size()); 81 | amg->LU(temp_b, temp_x); 82 | x_d_CG = temp_x; 83 | 84 | return; 85 | } else 86 | { 87 | Vector_d b_d = b_d_CG; 88 | Vector_d x_d(x_d_CG.size(), 0.0); 89 | switch (amg->dsType_) 90 | { 91 | case 0: 92 | smoother->preRRRFullSymmetric(AinSysCoo_d, AoutSys_d, AinBlockIdx_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, 93 | level_id, largestblocksize, largest_num_entries, verbose); 94 | break; 95 | case 1: 96 | smoother->preRRRFull(AinEll_d, Aout_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, level_id, largestblocksize); 97 | break; 98 | case 2: 99 | smoother->preRRRFullCsr(AinCSR_d, Aout_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, level_id, largestblocksize, largest_num_entries, largest_num_per_row); 100 | break; 101 | default: 102 | std::cout << "Wrong DStype 1!" << std::endl; 103 | exit(0); 104 | 105 | } 106 | next->cycle(V_CYCLE, bc_d, xc_d, verbose); 107 | switch (amg->dsType_) 108 | { 109 | case 0: 110 | smoother->postPCRFullSymmetric(AinSysCoo_d, AinBlockIdx_d, AoutSys_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, 111 | level_id, largestblocksize, largest_num_entries); 112 | break; 113 | case 1: 114 | smoother->postPCRFull(AinEll_d, Aout_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, level_id, largestblocksize); 115 | break; 116 | case 2: 117 | smoother->postPCRFullCsr(AinCSR_d, Aout_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, level_id, largestblocksize, largest_num_entries, largest_num_per_row); 118 | break; 119 | default: 120 | std::cout << "Wrong DStype 0!" << std::endl; 121 | exit(0); 122 | 123 | } 124 | 125 | x_d_CG = x_d; 126 | b_d_CG = b_d; 127 | } 128 | } 129 | 130 | #include 131 | 132 | template 133 | void AMG_Level::setup() 134 | { 135 | smoother = Smoother::allocate(amg->smootherWeight_, 136 | amg->preInnerIters_, amg->postInnerIters_, amg->postRelaxes_, A_d); 137 | } 138 | 139 | template 140 | std::vector AMG_Level::getOriginalRows() 141 | { 142 | return originalRow; 143 | } 144 | 145 | /**************************************** 146 | * Explict instantiations 147 | ***************************************/ 148 | template class AMG_Level < Matrix_h, Vector_h > ; 149 | template class AMG_Level < Matrix_d, Vector_d > ; 150 | -------------------------------------------------------------------------------- /src/core/aggmis/include/AggMIS_GraphHelpers.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: AggMIS_GraphHelpers.h 3 | * Author: T. James Lewis 4 | * 5 | * Created on April 16, 2013, 2:58 PM 6 | */ 7 | #ifndef AGGMIS_GRAPHHELPERS_H 8 | #define AGGMIS_GRAPHHELPERS_H 9 | #include 10 | #include 11 | #include 12 | namespace AggMIS { 13 | namespace GraphHelpers { 14 | 15 | // GPU Kernels 16 | namespace Kernels { 17 | __global__ void mapAdjacencyToBlockKernel(int size, 18 | int *adjIndexes, 19 | int *adjacency, 20 | int *adjacencyBlockLabel, 21 | int *blockMappedAdjacency, 22 | int *fineAggregate); 23 | __global__ void findPartIndicesNegStartKernel(int size, 24 | int *array, 25 | int *partIndices); 26 | __global__ void findPartIndicesKernel(int size, 27 | int *array, 28 | int *partIndices); 29 | __global__ void findAdjacencySizesKernel(int size, 30 | int *adjIndexes, 31 | int *output); 32 | __global__ void accumulatedPartSizesKernel(int size, 33 | int *part, 34 | int *weights, 35 | int *accumulatedSize); 36 | __global__ void unaccumulatedPartSizesKernel(int size, 37 | int *accumulatedSize, 38 | int *sizes); 39 | } 40 | Types::Graph_d* GetInducedGraph(Types::Graph_d &graph, 41 | Types::IntVector_d &aggregation); 42 | Types::Graph_h* GetInducedGraph(Types::Graph_h &graph, 43 | Types::IntVector_h &aggregation); 44 | void mapAdjacencyToBlock(Types::IntVector_d &adjIndexes, 45 | Types::IntVector_d &adjacency, 46 | Types::IntVector_d &adjacencyBlockLabel, 47 | Types::IntVector_d &blockMappedAdjacency, 48 | Types::IntVector_d &fineAggregate); 49 | void getPartIndicesNegStart(Types::IntVector_d& sortedPartition, 50 | Types::IntVector_d& partIndices); 51 | /** 52 | * Gets a vector where the values are the indices of the elements 53 | * @param size Size of vector to create 54 | * @return A pointer to newly created vector 55 | */ 56 | Types::IntVector_d* GetIndicesVector(int size); 57 | /** 58 | * Writes the index of each vector element as its value 59 | * @param size The size the vector should be 60 | * @param toSet The vector to set (Overwritten) 61 | */ 62 | void SetToIndicesVector(int size, 63 | Types::IntVector_d& toSet); 64 | /** 65 | * Gets the size (count of nodes) of each aggregate. 66 | * @param aggregation Labels each node with its aggregate ID 67 | * @param sizes Vector to output computed sized (Overwritten) 68 | */ 69 | void getPartSizes(Types::IntVector_d &aggregation, 70 | Types::IntVector_d &sizes); 71 | /** 72 | * Gets the size of each aggregate, taking into account the weight of 73 | * each node. 74 | * @param aggregation Labels each node with its aggregate ID 75 | * @param sizes Vector to put the computed sizes into (Overwritten) 76 | * @param weights The weights of each graph node 77 | */ 78 | void getPartSizes(Types::IntVector_d &aggregation, 79 | Types::IntVector_d &sizes, 80 | Types::IntVector_d &weights); 81 | /** 82 | * Finds the valence of each node in the given graph. 83 | * @param graph Input graph 84 | * @return A vector containing the valence of each node 85 | */ 86 | Types::IntVector_d* GetValences(Types::Graph_d &graph); 87 | Types::IntVector_h* GetValences(Types::Graph_h &graph); 88 | /** 89 | * Checks if a graph is a valid undirected graph. Valid being that each 90 | * node listing a node as neighbor is a neighbor of the listed node, and 91 | * that all listed neighbors are valid graph nodes. 92 | * @param graph The graph to check 93 | * @return True if graph is valid, false otherwise 94 | */ 95 | bool IsGraphValid(Types::Graph_d &graph); 96 | /** 97 | * Checks if a graph is a valid undirected graph. Valid being that each 98 | * node listing a node as neighbor is a neighbor of the listed node, and 99 | * that all listed neighbors are valid graph nodes. 100 | * @param graph The graph to check 101 | * @return True if graph is valid, false otherwise 102 | */ 103 | bool IsGraphValid(Types::Graph_h &graph); 104 | /** 105 | * Checks if a graph is a valid undirected graph. Valid being that each 106 | * node listing a node as neighbor is a neighbor of the listed node, and 107 | * that all listed neighbors are valid graph nodes. 108 | * @param indices The vector of indices into adjacency list 109 | * @param adjacency The adjacency list 110 | * @return True if graph is valid, false otherwise 111 | */ 112 | bool IsGraphValid(Types::IntVector_d &indices, Types::IntVector_d &adjacency); 113 | /** 114 | * Checks if a graph is a valid undirected graph. Valid being that each 115 | * node listing a node as neighbor is a neighbor of the listed node, and 116 | * that all listed neighbors are valid graph nodes. 117 | * @param indices The vector of indices into adjacency list 118 | * @param adjacency The adjacency list 119 | * @return True if graph is valid, false otherwise 120 | */ 121 | bool IsGraphValid(Types::IntVector_h &indices, Types::IntVector_h &adjacency); 122 | } 123 | } 124 | #endif /* AGGMIS_GRAPHHELPERS_H */ 125 | 126 | -------------------------------------------------------------------------------- /src/CodeCoverage.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # 2012-01-31, Lars Bilke 3 | # - Enable Code Coverage 4 | # 5 | # 2013-09-17, Joakim Söderberg 6 | # - Added support for Clang. 7 | # - Some additional usage instructions. 8 | # 9 | # USAGE: 10 | 11 | # 0. (Mac only) If you use Xcode 5.1 make sure to patch geninfo as described here: 12 | # http://stackoverflow.com/a/22404544/80480 13 | # 14 | # 1. Copy this file into your cmake modules path. 15 | # 16 | # 2. Add the following line to your CMakeLists.txt: 17 | # INCLUDE(CodeCoverage) 18 | # 19 | # 3. Set compiler flags to turn off optimization and enable coverage: 20 | # SET(CMAKE_CXX_FLAGS "-g -O0 -fprofile-arcs -ftest-coverage") 21 | # SET(CMAKE_C_FLAGS "-g -O0 -fprofile-arcs -ftest-coverage") 22 | # 23 | # 3. Use the function SETUP_TARGET_FOR_COVERAGE to create a custom make target 24 | # which runs your test executable and produces a lcov code coverage report: 25 | # Example: 26 | # SETUP_TARGET_FOR_COVERAGE( 27 | # my_coverage_target # Name for custom target. 28 | # test_driver # Name of the test driver executable that runs the tests. 29 | # # NOTE! This should always have a ZERO as exit code 30 | # # otherwise the coverage generation will not complete. 31 | # coverage # Name of output directory. 32 | # ) 33 | # 34 | # 4. Build a Debug build: 35 | # cmake -DCMAKE_BUILD_TYPE=Debug .. 36 | # make 37 | # make my_coverage_target 38 | # 39 | # 40 | 41 | # Check prereqs 42 | FIND_PROGRAM( GCOV_PATH gcov ) 43 | FIND_PROGRAM( LCOV_PATH lcov ) 44 | FIND_PROGRAM( GENHTML_PATH genhtml ) 45 | FIND_PROGRAM( GCOVR_PATH gcovr PATHS ${CMAKE_SOURCE_DIR}/tests) 46 | 47 | IF(NOT GCOV_PATH) 48 | MESSAGE(FATAL_ERROR "gcov not found! Aborting...") 49 | ENDIF() # NOT GCOV_PATH 50 | 51 | IF(NOT CMAKE_COMPILER_IS_GNUCXX) 52 | # Clang version 3.0.0 and greater now supports gcov as well. 53 | MESSAGE(WARNING "Compiler is not GNU gcc! Clang Version 3.0.0 and greater supports gcov as well, but older versions don't.") 54 | 55 | IF(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") 56 | MESSAGE(FATAL_ERROR "Compiler is not GNU gcc! Aborting...") 57 | ENDIF() 58 | ENDIF() # NOT CMAKE_COMPILER_IS_GNUCXX 59 | 60 | SET(CMAKE_CXX_FLAGS_COVERAGE 61 | "-g -O0 --coverage -fprofile-arcs -ftest-coverage" 62 | CACHE STRING "Flags used by the C++ compiler during coverage builds." 63 | FORCE ) 64 | SET(CMAKE_C_FLAGS_COVERAGE 65 | "-g -O0 --coverage -fprofile-arcs -ftest-coverage" 66 | CACHE STRING "Flags used by the C compiler during coverage builds." 67 | FORCE ) 68 | SET(CMAKE_EXE_LINKER_FLAGS_COVERAGE 69 | "" 70 | CACHE STRING "Flags used for linking binaries during coverage builds." 71 | FORCE ) 72 | SET(CMAKE_SHARED_LINKER_FLAGS_COVERAGE 73 | "" 74 | CACHE STRING "Flags used by the shared libraries linker during coverage builds." 75 | FORCE ) 76 | MARK_AS_ADVANCED( 77 | CMAKE_CXX_FLAGS_COVERAGE 78 | CMAKE_C_FLAGS_COVERAGE 79 | CMAKE_EXE_LINKER_FLAGS_COVERAGE 80 | CMAKE_SHARED_LINKER_FLAGS_COVERAGE ) 81 | 82 | IF ( NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "Coverage")) 83 | MESSAGE( WARNING "Code coverage results with an optimized (non-Debug -- currently '${CMAKE_BUILD_TYPE}') build may be misleading" ) 84 | ENDIF() # NOT CMAKE_BUILD_TYPE STREQUAL "Debug" 85 | 86 | 87 | # Param _targetname The name of new the custom make target 88 | # Param _testrunner The name of the target which runs the tests. 89 | # MUST return ZERO always, even on errors. 90 | # If not, no coverage report will be created! 91 | # Param _outputname lcov output is generated as _outputname.info 92 | # HTML report is generated in _outputname/index.html 93 | # Optional fourth parameter is passed as arguments to _testrunner 94 | # Pass them in list form, e.g.: "-j;2" for -j 2 95 | FUNCTION(SETUP_TARGET_FOR_COVERAGE _targetname _testrunner _outputname) 96 | 97 | IF(NOT LCOV_PATH) 98 | MESSAGE(FATAL_ERROR "lcov not found! Aborting...") 99 | ENDIF() # NOT LCOV_PATH 100 | 101 | IF(NOT GENHTML_PATH) 102 | MESSAGE(FATAL_ERROR "genhtml not found! Aborting...") 103 | ENDIF() # NOT GENHTML_PATH 104 | 105 | # Setup target 106 | ADD_CUSTOM_TARGET(${_targetname} 107 | 108 | # Cleanup lcov 109 | ${LCOV_PATH} --directory . --zerocounters 110 | 111 | # Run tests 112 | COMMAND ${_testrunner} ${ARGV3} 113 | 114 | # Capturing lcov counters and generating report 115 | COMMAND ${LCOV_PATH} --directory . --capture --output-file ${_outputname}.info 116 | COMMAND ${LCOV_PATH} --remove ${_outputname}.info 'tests/*' '/usr/*' --output-file ${_outputname}.info.cleaned 117 | COMMAND ${GENHTML_PATH} -o ${_outputname} ${_outputname}.info.cleaned 118 | COMMAND ${CMAKE_COMMAND} -E remove ${_outputname}.info ${_outputname}.info.cleaned 119 | 120 | WORKING_DIRECTORY ${CMAKE_BINARY_DIR} 121 | COMMENT "Resetting code coverage counters to zero.\nProcessing code coverage counters and generating report." 122 | ) 123 | 124 | # Show info where to find the report 125 | ADD_CUSTOM_COMMAND(TARGET ${_targetname} POST_BUILD 126 | COMMAND ; 127 | COMMENT "Open ./${_outputname}/index.html in your browser to view the coverage report." 128 | ) 129 | 130 | ENDFUNCTION() # SETUP_TARGET_FOR_COVERAGE 131 | 132 | # Param _targetname The name of new the custom make target 133 | # Param _testrunner The name of the target which runs the tests 134 | # Param _outputname cobertura output is generated as _outputname.xml 135 | # Optional fourth parameter is passed as arguments to _testrunner 136 | # Pass them in list form, e.g.: "-j;2" for -j 2 137 | FUNCTION(SETUP_TARGET_FOR_COVERAGE_COBERTURA _targetname _testrunner _outputname) 138 | 139 | IF(NOT PYTHON_EXECUTABLE) 140 | MESSAGE(FATAL_ERROR "Python not found! Aborting...") 141 | ENDIF() # NOT PYTHON_EXECUTABLE 142 | 143 | IF(NOT GCOVR_PATH) 144 | MESSAGE(FATAL_ERROR "gcovr not found! Aborting...") 145 | ENDIF() # NOT GCOVR_PATH 146 | 147 | ADD_CUSTOM_TARGET(${_targetname} 148 | 149 | # Run tests 150 | ${_testrunner} ${ARGV3} 151 | 152 | # Running gcovr 153 | COMMAND ${GCOVR_PATH} -x -r ${CMAKE_SOURCE_DIR} -e '${CMAKE_SOURCE_DIR}/tests/' -o ${_outputname}.xml 154 | WORKING_DIRECTORY ${CMAKE_BINARY_DIR} 155 | COMMENT "Running gcovr to produce Cobertura code coverage report." 156 | ) 157 | 158 | # Show info where to find the report 159 | ADD_CUSTOM_COMMAND(TARGET ${_targetname} POST_BUILD 160 | COMMAND ; 161 | COMMENT "Cobertura code coverage report saved in ${_outputname}.xml." 162 | ) 163 | 164 | ENDFUNCTION() # SETUP_TARGET_FOR_COVERAGE_COBERTURA 165 | -------------------------------------------------------------------------------- /src/core/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | #CMAKE SETTING CUDA_NVCC_FLAGS 2 | ############################# 3 | # Check for GPUs present and their compute capability 4 | # based on http://stackoverflow.com/questions/2285185/easiest-way-to-test-for-existence-of-cuda-capable-gpu-from-cmake/2297877#2297877 (Christopher Bruns) 5 | if(CUDA_FOUND AND NOT CUDA_HAVE_GPU) 6 | message(STATUS "Checking CUDA compute capability.") 7 | try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR 8 | ${CMAKE_BINARY_DIR} 9 | ${CMAKE_SOURCE_DIR}/cuda_compute_capability.c 10 | CMAKE_FLAGS 11 | -DINCLUDE_DIRECTORIES:STRING=${CUDA_TOOLKIT_INCLUDE} 12 | -DLINK_LIBRARIES:STRING=${CUDA_CUDART_LIBRARY} 13 | COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR 14 | RUN_OUTPUT_VARIABLE RUN_OUTPUT_VAR) 15 | # COMPILE_RESULT_VAR is TRUE when compile succeeds 16 | # RUN_RESULT_VAR is zero when a GPU is found 17 | if(COMPILE_RESULT_VAR AND NOT RUN_RESULT_VAR) 18 | set(CUDA_HAVE_GPU TRUE CACHE INTERNAL BOOL "Whether CUDA-capable GPU is present") 19 | set(CUDA_COMPUTE_CAPABILITY ${RUN_OUTPUT_VAR} CACHE STRING "Compute capability of CUDA-capable GPU present") 20 | set(CUDA_GENERATE_CODE "arch=compute_${CUDA_COMPUTE_CAPABILITY},code=sm_${CUDA_COMPUTE_CAPABILITY}" CACHE STRING "Which GPU architectures to generate code for (each arch/code pair will be passed as --generate-code option to nvcc, separate multiple pairs by ;)") 21 | mark_as_advanced(CUDA_COMPUTE_CAPABILITY CUDA_GENERATE_CODE) 22 | LIST(REMOVE_ITEM CUDA_NVCC_FLAGS "-arch=sm_10" "-arch=sm_11" "-arch=sm_12" "-arch=sm_13" "-arch=sm_20" "-arch=sm_21" "-arch=sm_30" "-arch=sm_31") 23 | set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-arch=sm_${CUDA_COMPUTE_CAPABILITY}" CACHE LIST "Semi-colon delimit multiple arguments." FORCE) 24 | message(STATUS "Automatic Compute Capability Set: ${CUDA_COMPUTE_CAPABILITY}") 25 | else() 26 | message(STATUS "You must set your compute capability manually") 27 | set(CUDA_HAVE_GPU FALSE CACHE INTERNAL BOOL "Whether CUDA-capable GPU is present") 28 | endif() 29 | endif() 30 | 31 | IF (NOT CUDA_HAVE_GPU) 32 | SET(CUDA_USE_COMPUTE_CAPABILITY_10 false CACHE BOOL "Use Compute Capability 1.0") 33 | SET(CUDA_USE_COMPUTE_CAPABILITY_11 false CACHE BOOL "Use Compute Capability 1.1") 34 | SET(CUDA_USE_COMPUTE_CAPABILITY_12 false CACHE BOOL "Use Compute Capability 1.2") 35 | SET(CUDA_USE_COMPUTE_CAPABILITY_13 false CACHE BOOL "Use Compute Capability 1.3") 36 | SET(CUDA_USE_COMPUTE_CAPABILITY_20 true CACHE BOOL "Use Compute Capability 2.0") 37 | SET(CUDA_USE_COMPUTE_CAPABILITY_21 false CACHE BOOL "Use Compute Capability 2.1") 38 | 39 | IF(CUDA_USE_COMPUTE_CAPABILITY_10) 40 | LIST(REMOVE_ITEM CUDA_NVCC_FLAGS "-arch=sm_10" "-arch=sm_11" "-arch=sm_12" "-arch=sm_13" "-arch=sm_20" "-arch=sm_21") 41 | LIST(APPEND CUDA_NVCC_FLAGS "-arch=sm_10") 42 | SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} CACHE LIST "Semi-colon delimit multiple arguments." FORCE) 43 | set(CUDA_HAVE_GPU TRUE CACHE INTERNAL BOOL "Whether CUDA-capable GPU is present") 44 | ELSEIF(CUDA_USE_COMPUTE_CAPABILITY_11) 45 | LIST(REMOVE_ITEM CUDA_NVCC_FLAGS "-arch=sm_10" "-arch=sm_11" "-arch=sm_12" "-arch=sm_13" "-arch=sm_20" "-arch=sm_21") 46 | LIST(APPEND CUDA_NVCC_FLAGS "-arch=sm_11") 47 | SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} CACHE LIST "Semi-colon delimit multiple arguments." FORCE) 48 | set(CUDA_HAVE_GPU TRUE CACHE INTERNAL BOOL "Whether CUDA-capable GPU is present") 49 | ELSEIF(CUDA_USE_COMPUTE_CAPABILITY_12) 50 | LIST(REMOVE_ITEM CUDA_NVCC_FLAGS "-arch=sm_10" "-arch=sm_11" "-arch=sm_12" "-arch=sm_13" "-arch=sm_20" "-arch=sm_21") 51 | LIST(APPEND CUDA_NVCC_FLAGS "-arch=sm_12") 52 | SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} CACHE LIST "Semi-colon delimit multiple arguments." FORCE) 53 | set(CUDA_HAVE_GPU TRUE CACHE INTERNAL BOOL "Whether CUDA-capable GPU is present") 54 | ELSEIF(CUDA_USE_COMPUTE_CAPABILITY_13) 55 | LIST(REMOVE_ITEM CUDA_NVCC_FLAGS "-arch=sm_10" "-arch=sm_11" "-arch=sm_12" "-arch=sm_13" "-arch=sm_20" "-arch=sm_21") 56 | LIST(APPEND CUDA_NVCC_FLAGS "-arch=sm_13") 57 | SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} CACHE LIST "Semi-colon delimit multiple arguments." FORCE) 58 | set(CUDA_HAVE_GPU TRUE CACHE INTERNAL BOOL "Whether CUDA-capable GPU is present") 59 | ELSEIF(CUDA_USE_COMPUTE_CAPABILITY_20) 60 | LIST(REMOVE_ITEM CUDA_NVCC_FLAGS "-arch=sm_10" "-arch=sm_11" "-arch=sm_12" "-arch=sm_13" "-arch=sm_20" "-arch=sm_21") 61 | LIST(APPEND CUDA_NVCC_FLAGS "-arch=sm_20") 62 | SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} CACHE LIST "Semi-colon delimit multiple arguments." FORCE) 63 | set(CUDA_HAVE_GPU TRUE CACHE INTERNAL BOOL "Whether CUDA-capable GPU is present") 64 | ELSEIF(CUDA_USE_COMPUTE_CAPABILITY_21) 65 | LIST(REMOVE_ITEM CUDA_NVCC_FLAGS "-arch=sm_10" "-arch=sm_11" "-arch=sm_12" "-arch=sm_13" "-arch=sm_20" "-arch=sm_21") 66 | LIST(APPEND CUDA_NVCC_FLAGS "-arch=sm_21") 67 | SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} CACHE LIST "Semi-colon delimit multiple arguments." FORCE) 68 | set(CUDA_HAVE_GPU TRUE CACHE INTERNAL BOOL "Whether CUDA-capable GPU is present") 69 | ENDIF() 70 | ENDIF () 71 | 72 | IF(NOT CUDA_HAVE_GPU) 73 | message( FATAL_ERROR "You must set your compute capability" ) 74 | ENDIF() 75 | #END CMAKE SETTING CUDA_NVCC_FLAGS 76 | ################### 77 | 78 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) 79 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include/cycles) 80 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include/smoothers) 81 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/aggmis/include) 82 | include_directories(${METIS_PATH}/Lib) 83 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}) 84 | 85 | FILE(GLOB CORE_CUDA_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/cuda/*.cu 86 | ${CMAKE_CURRENT_SOURCE_DIR}/aggmis/cuda/*.cu) 87 | 88 | FILE(GLOB CORE_CUDA_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.h 89 | ${CMAKE_CURRENT_SOURCE_DIR}/include/smoothers/*.h 90 | ${CMAKE_CURRENT_SOURCE_DIR}/include/smoothedMG/*.h 91 | ${CMAKE_CURRENT_SOURCE_DIR}/include/smoothedMG/aggregators/*.h 92 | ${CMAKE_CURRENT_SOURCE_DIR}/include/FEM/*.h 93 | ${CMAKE_CURRENT_SOURCE_DIR}/include/cycles/*.h 94 | ${CMAKE_CURRENT_SOURCE_DIR}/aggmis/include/*.h 95 | ${CMAKE_CURRENT_SOURCE_DIR}/cuda/perform_element_loop_3D.cuh 96 | ${CMAKE_CURRENT_SOURCE_DIR}/cuda/perform_element_loop_2D.cuh) 97 | 98 | CUDA_ADD_LIBRARY(FEM_CORE STATIC ${CORE_CUDA_SOURCES} ${CORE_CUDA_HEADERS}) 99 | ADD_DEPENDENCIES(FEM_CORE metis) 100 | ADD_DEPENDENCIES(FEM_CORE cusp) 101 | if (CMAKE_SYSTEM_NAME MATCHES "Linux") 102 | TARGET_LINK_LIBRARIES(FEM_CORE -ldl ${METIS_LIBRARY}) 103 | else() 104 | TARGET_LINK_LIBRARIES(FEM_CORE ${METIS_LIBRARY}) 105 | endif() 106 | -------------------------------------------------------------------------------- /src/core/aggmis/include/AggMIS_MergeSplitConditioner_CPU.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: AggMIS_MergeSplitConditioner_CPU.h 3 | * Author: T. James Lewis 4 | * 5 | * Created on July 4, 2013, 1:14 PM 6 | */ 7 | 8 | #ifndef AGGMIS_MERGESPLITCONDITIONER_CPU_H 9 | #define AGGMIS_MERGESPLITCONDITIONER_CPU_H 10 | 11 | #include "AggMIS_Types.h" 12 | #include "AggMIS_Aggregation_CPU.h" 13 | #include "AggMIS_GraphHelpers.h" 14 | #include "thrust/count.h" 15 | #include "AggMIS_IOHelpers.h" 16 | 17 | namespace AggMIS { 18 | namespace MergeSplitCPU { 19 | 20 | // Functors for Thrust calls 21 | namespace Functors { 22 | struct isOutSized:public thrust::unary_function { 23 | const int a, b; 24 | isOutSized(int _a, int _b):a(_a),b(_b){} 25 | 26 | __host__ __device__ 27 | bool operator()(const int &x) const 28 | { 29 | return x < a || x > b; 30 | } 31 | }; 32 | struct lessThan:public thrust::unary_function { 33 | const int a; 34 | 35 | lessThan(int _a): a(_a){} 36 | 37 | __host__ __device__ 38 | int operator()(const int &x) const 39 | { 40 | return x < a; 41 | } 42 | }; 43 | struct greaterThan:public thrust::unary_function { 44 | const int a; 45 | 46 | greaterThan(int _a): a(_a){} 47 | 48 | __host__ __device__ 49 | int operator()(const int &x) const 50 | { 51 | return x > a; 52 | } 53 | }; 54 | struct NotNegOne:public thrust::unary_function { 55 | __host__ __device__ 56 | int operator()(int a) const 57 | { 58 | // If value is negative return 0 else return one 59 | return a < 0 ? 0 : 1; 60 | } 61 | }; 62 | struct EqualTo:public thrust::unary_function { 63 | const int a; 64 | 65 | EqualTo(int _a): a(_a){} 66 | 67 | __host__ __device__ 68 | int operator()(const int &x) const 69 | { 70 | return x == a; 71 | } 72 | }; 73 | } 74 | 75 | // Merge and Split conditioner main container 76 | class MergeSplitConditionerCPU { 77 | public: 78 | /** 79 | * The primary constructor it creates a conditioner given the 80 | * specified graph and aggregation. It makes an internal copy of 81 | * the aggregation vector, but it uses a pointer back to the given 82 | * Graph object 83 | * @param graph The graph that the aggregation being conditioned 84 | * is an aggregation of. 85 | * @param aggregation A vector which labels every node in the graph 86 | * with an aggregate ID. 87 | */ 88 | MergeSplitConditionerCPU(Types::Graph_h &graph, 89 | Types::IntVector_h &aggregation); 90 | void SetSizeBounds(int min, int max); 91 | void SetVerbose(bool v); 92 | /** 93 | * Sets the node weights by swapping the contents of the provided 94 | * vector into the nodeWeights member. Then it re-computes the part 95 | * sizes with weighting. 96 | * @param nodeWeights A vector containing the weights of each node 97 | * the contents of this vector are swapped out by the method. 98 | */ 99 | void SetNodeWeights(Types::IntVector_h &nodeWeights); 100 | /** 101 | * Getter for the aggregation vector. 102 | * @return A pointer to the current aggregation vector 103 | */ 104 | Types::IntVector_h* GetAggregation(); 105 | /** 106 | * Getter for the NodeWeights vector 107 | * @return A pointer to the current NodeWeights vector 108 | */ 109 | Types::IntVector_h* GetNodeWeights(); 110 | void CycleMerges(bool force); 111 | void CycleSplits(bool force); 112 | void CycleMergeSplits(float minImprove, 113 | int desiredSize); 114 | bool Condition(int desiredSize, 115 | bool respectUpper, 116 | float tolerance, 117 | float minImprove, 118 | int maxCycles); 119 | void PrintProgress(std::ostream* output, 120 | std::string note, 121 | bool graphStat, 122 | bool progressStat, 123 | bool sizeStat); 124 | void PrintSizeStats(std::ostream* output, 125 | bool makeHeader); 126 | void PrintProgressStats(std::ostream* output, 127 | bool makeHeader); 128 | void PrintGraphStats(std::ostream* output, 129 | bool makeHeader); 130 | void InteractiveConsole(std::string message); 131 | private: 132 | bool MarkMerges(bool force); 133 | bool MarkSplits(bool force); 134 | void MarkMergeSplits(int desiredSize); 135 | void MakeSplits(); 136 | void MakeMerges(bool markSplits); 137 | void MakeMergesDirect(bool force); 138 | int MergeAggregates(int aggA, int aggB); 139 | int MergeAggregates(int aggA, int aggB, bool fillSpot); 140 | void MakeSplitsDirect(bool force); 141 | void SplitAggregate(int agg, int newAgg); 142 | void MakeMergeSplits(int desiredSize); 143 | void UnlinkAggregate(int aggId); 144 | void FixSizesFromAggMap(int aggId); 145 | void LinkAggregate(int aggId); 146 | void FillAggAdjacency(); 147 | void FillAggMap(); 148 | void ValidateAggAdjacency(); 149 | void ValidateAggMap(); 150 | void ValidatePartSizes(); 151 | void ValidateArraySizes(std::string message); 152 | 153 | // Data members 154 | Types::Graph_h *graph; 155 | Types::Graph_h *inducedGraph; 156 | 157 | int minSize, 158 | maxSize, 159 | outsizedParts, 160 | merges, 161 | mergeSplits, 162 | splits; 163 | 164 | Types::IntVector_h aggregation, 165 | nodeWeights, 166 | distances, 167 | partSizes, 168 | weightedSizes, 169 | splitsToMake, 170 | mergesToMake, 171 | mergeOffsets; 172 | 173 | // Stores lists of nodes in each aggregate 174 | std::vector > aggMap; 175 | 176 | // Stores the neighbors of each aggregate 177 | std::vector > aggAdjacency; 178 | 179 | // Stores the root point sets for each aggregate 180 | std::vector > rootPoints; 181 | 182 | bool verbose; 183 | }; 184 | } 185 | } 186 | #endif /* AGGMIS_MERGESPLITCONDITIONER_CPU_H */ 187 | 188 | -------------------------------------------------------------------------------- /src/core/include/smoothers/gauss_seidel.h: -------------------------------------------------------------------------------- 1 | #ifndef __GAUSSSEIDEL_H__ 2 | #define __GAUSSSEIDEL_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | /***************************************************** 10 | * Jacobi smoother 11 | ****************************************************/ 12 | template 13 | class gauss_seidel : public Smoother < Matrix, Vector > 14 | { 15 | public: 16 | typedef typename Matrix::value_type ValueType; 17 | typedef typename Matrix::index_type IndexType; 18 | typedef typename Matrix::memory_space MemorySpace; 19 | 20 | gauss_seidel(double smootherWeight, 21 | int preInnerIters, int postInnerIters, int postRelaxes, 22 | const Matrix_d& Ainit); 23 | void find_diag(const Matrix_ell_d& A); 24 | void smooth(const Matrix& A, const Vector& b, Vector& x); 25 | void smooth_with_0_initial(const Matrix& A, const Vector &b, Vector &x); 26 | 27 | void preRRRFull(const cusp::ell_matrix& AinEll, 28 | const cusp::coo_matrix& AoutCoo, 29 | const cusp::array1d& aggregateIdx, 30 | const cusp::array1d& partitionIdx, 31 | const cusp::hyb_matrix& restrictor, 32 | const cusp::array1d& permutation, 33 | cusp::array1d& b, 34 | cusp::array1d& x, 35 | cusp::array1d& bc, 36 | int level_id, 37 | int largestblksz); 38 | 39 | void preRRRFullCsr(const cusp::csr_matrix& AinCsr, 40 | const cusp::coo_matrix& AoutCoo, 41 | const cusp::array1d& aggregateIdx, 42 | const cusp::array1d& partitionIdx, 43 | const cusp::hyb_matrix& restrictor, 44 | const cusp::array1d& permutation, 45 | cusp::array1d& b, 46 | cusp::array1d& x, 47 | cusp::array1d& bc, 48 | int level_id, 49 | int largestblksize, 50 | int largestnumentries, 51 | int largestnumperrow); 52 | 53 | void preRRRFullSymmetric(const cusp::coo_matrix& AinSysCoo, 54 | const cusp::coo_matrix& AoutSysCoo, 55 | const cusp::array1d& AinBlockIdx, 56 | const cusp::array1d& AoutBlockIdx, 57 | const cusp::array1d& aggregateIdx, 58 | const cusp::array1d& partitionIdx, 59 | const cusp::hyb_matrix& restrictor, 60 | const cusp::array1d& permutation, 61 | cusp::array1d& b, 62 | cusp::array1d& x, 63 | cusp::array1d& bc, 64 | int level_id, 65 | int largestblksz, 66 | int largestnumentries, 67 | bool verbose = false); 68 | void preRRRFullSymmetricSync(const cusp::coo_matrix& AinSysCoo, 69 | const cusp::coo_matrix& AoutSysCoo, 70 | const cusp::array1d& AinBlockIdx, 71 | const cusp::array1d& aggregateIdx, 72 | const cusp::array1d& partitionIdx, 73 | const cusp::hyb_matrix& restrictor, 74 | const cusp::array1d& permutation, 75 | cusp::array1d& b, 76 | cusp::array1d& x, 77 | cusp::array1d& bc, 78 | const cusp::array1d& segSyncIdx, 79 | const cusp::array1d& partSyncIdx, 80 | int level_id, 81 | int largestblksz, 82 | int largestnumentries); 83 | 84 | void postPCR(const cusp::ell_matrix& AinEll, 85 | const cusp::coo_matrix& AoutCoo, 86 | const cusp::array1d& aggregateIdx, 87 | const cusp::array1d& partitionIdx, 88 | const cusp::array1d& P, 89 | const cusp::array1d& b, 90 | cusp::array1d& x, 91 | cusp::array1d& xc); 92 | 93 | void postPCRFull(const cusp::ell_matrix& AinEll, 94 | const cusp::coo_matrix& AoutCoo, 95 | const cusp::array1d& AoutBlockIdx, 96 | const cusp::array1d& aggregateIdx, 97 | const cusp::array1d& partitionIdx, 98 | const cusp::hyb_matrix& prolongator, 99 | const cusp::array1d& permutation, 100 | const cusp::array1d& b, 101 | cusp::array1d& x, 102 | cusp::array1d& xc, 103 | int level_id, 104 | int largestblksz); 105 | 106 | void postPCRFullCsr(const cusp::csr_matrix& AinCsr, 107 | const cusp::coo_matrix& AoutCoo, 108 | const cusp::array1d& AoutBlockIdx, 109 | const cusp::array1d& aggregateIdx, 110 | const cusp::array1d& partitionIdx, 111 | const cusp::hyb_matrix& prolongator, 112 | const cusp::array1d& permutation, 113 | const cusp::array1d& b, 114 | cusp::array1d& x, 115 | cusp::array1d& xc, 116 | int level_id, 117 | int largestblksz, 118 | int largestnumentries, 119 | int largestnumperrow); 120 | 121 | void postPCRFullSymmetric(const cusp::coo_matrix& AinSysCoo, 122 | const cusp::array1d& AinBlockIdx, 123 | const cusp::coo_matrix& AoutSysCoo, 124 | const cusp::array1d& AoutBlockIdx, 125 | const cusp::array1d& aggregateIdx, 126 | const cusp::array1d& partitionIdx, 127 | const cusp::hyb_matrix& prolongator, 128 | const cusp::array1d& permutation, 129 | const cusp::array1d& b, 130 | cusp::array1d& x, 131 | cusp::array1d& xc, 132 | int level_id, 133 | int largestblksz, 134 | int largestnumentries); 135 | 136 | void postPCRFullSymmetricSync(const cusp::coo_matrix& AinSysCoo, 137 | const cusp::array1d& AinBlockIdx, 138 | const cusp::coo_matrix& AoutSysCoo, 139 | const cusp::array1d& AoutBlockIdx, 140 | const cusp::array1d& aggregateIdx, 141 | const cusp::array1d& partitionIdx, 142 | const cusp::hyb_matrix& prolongator, 143 | const cusp::array1d& permutation, 144 | const cusp::array1d& b, 145 | cusp::array1d& x, 146 | cusp::array1d& xc, 147 | const cusp::array1d& segSyncIdx, 148 | const cusp::array1d& partSyncIdx, 149 | int level_id, 150 | int largestblksz, 151 | int largestnumentries); 152 | 153 | public: 154 | double smootherWeight_; 155 | int nPreInnerIter_; 156 | int nPostInnerIter_; 157 | int post_relaxes_; 158 | 159 | }; 160 | #endif 161 | -------------------------------------------------------------------------------- /src/core/aggmis/cuda/TriMesh_connectivity.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Szymon Rusinkiewicz 3 | Princeton University 4 | 5 | TriMesh_connectivity.cc 6 | Manipulate data structures that describe connectivity between faces and verts. 7 | */ 8 | 9 | 10 | #include 11 | #include "TriMesh.h" 12 | #include 13 | 14 | void TriMesh::need_meshquality() 15 | { 16 | int max_valance = 0; 17 | int sum = 0; 18 | int avg_valance = 0; 19 | FILE *valancefile, *reratiofile; 20 | valancefile = fopen("valance.txt", "w"); 21 | reratiofile = fopen("reratio.txt", "w"); 22 | 23 | 24 | for(int i =0; iverbose) { 33 | printf("Max valance is %d\n", max_valance); 34 | printf("average valance is %d\n", avg_valance); 35 | } 36 | int ne = faces.size(); 37 | for(int i =0; iverbose) { 101 | printf("Finding vertex neighbors... "); 102 | } 103 | int nv = vertices.size(), nf = faces.size(); 104 | 105 | std::vector numneighbors(nv); 106 | for (int i = 0; i < nf; i++) { 107 | numneighbors[faces[i][0]]++; 108 | numneighbors[faces[i][1]]++; 109 | numneighbors[faces[i][2]]++; 110 | } 111 | 112 | neighbors.resize(nv); 113 | for (int i = 0; i < nv; i++) 114 | neighbors[i].reserve(numneighbors[i]+2); // Slop for boundaries 115 | 116 | for (int i = 0; i < nf; i++) { 117 | for (int j = 0; j < 3; j++) { 118 | std::vector &me = neighbors[faces[i][j]]; 119 | int n1 = faces[i][(j+1)%3]; 120 | int n2 = faces[i][(j+2)%3]; 121 | if (std::find(me.begin(), me.end(), n1) == me.end()) 122 | me.push_back(n1); 123 | if (std::find(me.begin(), me.end(), n2) == me.end()) 124 | me.push_back(n2); 125 | } 126 | } 127 | 128 | if (this->verbose) { 129 | printf("Done.\n"); 130 | } 131 | } 132 | 133 | void TriMesh::rescale(int size) 134 | { 135 | 136 | double minx = LARGENUM; 137 | double miny = LARGENUM; 138 | double minz = LARGENUM; 139 | double maxx = -LARGENUM; 140 | double maxy = -LARGENUM; 141 | double maxz = -LARGENUM; 142 | for(int v = 0; v < vertices.size(); v++) 143 | { 144 | double x = vertices[v][0]; 145 | double y = vertices[v][1]; 146 | double z = vertices[v][2]; 147 | if(x < minx) 148 | minx = x; 149 | if(y < miny) 150 | miny= y; 151 | if(z < minz) 152 | minz = z; 153 | 154 | if(x> maxx) 155 | maxx = x; 156 | if(y> maxy) 157 | maxy = y; 158 | if(z> maxz) 159 | maxz = z; 160 | } 161 | for(int v = 0; v < vertices.size(); v++) 162 | { 163 | 164 | vertices[v][0] -= minx; 165 | vertices[v][1] -= miny; 166 | //vertices[v][2] -= minz; 167 | 168 | 169 | vertices[v][0] = vertices[v][0] / (maxx - minx) * size; 170 | vertices[v][1] = vertices[v][1] / (maxy - miny) * size; 171 | //vertices[v][2] = vertices[v][2] / (maxz - minz) * size; 172 | 173 | 174 | } 175 | } 176 | 177 | void TriMesh::meshoptimization(int iterNum) 178 | { 179 | need_neighbors(); 180 | for(int i=0; iverbose) { 219 | printf("Finding vertex to triangle maps... "); 220 | } 221 | int nv = vertices.size(), nf = faces.size(); 222 | 223 | std::vector numadjacentfaces(nv); 224 | for (int i = 0; i < nf; i++) { 225 | numadjacentfaces[faces[i][0]]++; 226 | numadjacentfaces[faces[i][1]]++; 227 | numadjacentfaces[faces[i][2]]++; 228 | } 229 | 230 | adjacentfaces.resize(vertices.size()); 231 | for (int i = 0; i < nv; i++) 232 | adjacentfaces[i].reserve(numadjacentfaces[i]); 233 | 234 | for (int i = 0; i < nf; i++) { 235 | for (int j = 0; j < 3; j++) 236 | adjacentfaces[faces[i][j]].push_back(i); 237 | } 238 | 239 | if (this->verbose) { 240 | printf("Done.\n"); 241 | } 242 | } 243 | 244 | // Find the face across each edge from each other face (-1 on boundary) 245 | // If topology is bad, not necessarily what one would expect... 246 | void TriMesh::need_across_edge() 247 | { 248 | if (!across_edge.empty()) 249 | return; 250 | need_adjacentfaces(); 251 | 252 | if (this->verbose) { 253 | printf("Finding across-edge maps... "); 254 | } 255 | int nf = faces.size(); 256 | across_edge.resize(nf, Face(-1,-1,-1)); 257 | 258 | for (int i = 0; i < nf; i++) { 259 | for (int j = 0; j < 3; j++) { 260 | if (across_edge[i][j] != -1) 261 | continue; 262 | int v1 = faces[i][(j+1)%3]; 263 | int v2 = faces[i][(j+2)%3]; 264 | const std::vector &a1 = adjacentfaces[v1]; 265 | const std::vector &a2 = adjacentfaces[v2]; 266 | for (int k1 = 0; k1 < a1.size(); k1++) { 267 | int other = a1[k1]; 268 | if (other == i) 269 | continue; 270 | std::vector::const_iterator it = 271 | std::find(a2.begin(), a2.end(), other); 272 | if (it == a2.end()) 273 | continue; 274 | int ind = (faces[other].indexof(v1)+1)%3; 275 | if (faces[other][(ind+1)%3] != v2) 276 | continue; 277 | across_edge[i][j] = other; 278 | across_edge[other][ind] = i; 279 | break; 280 | } 281 | } 282 | } 283 | 284 | if (this->verbose) { 285 | printf("Done.\n"); 286 | } 287 | } 288 | 289 | -------------------------------------------------------------------------------- /src/core/aggmis/cuda/AggMIS_MIS_CPU.cu: -------------------------------------------------------------------------------- 1 | #include 2 | namespace AggMIS { 3 | namespace MIS { 4 | struct fringeNode 5 | { 6 | int nodeIdx; 7 | int visits; 8 | fringeNode(int n, int v) 9 | { 10 | nodeIdx = n; 11 | visits = v; 12 | } 13 | }; 14 | 15 | class fringeNodeComparer 16 | { 17 | public: 18 | bool operator()(fringeNode &f1, fringeNode &f2) 19 | { 20 | if (f1.visits < f2.visits) 21 | return true; 22 | return false; 23 | } 24 | }; 25 | AggMIS::Types::IntVector_h* FloodFillMIS(int k, AggMIS::Types::Graph_h &graph) { 26 | AggMIS::Types::IntVector_h *m = new AggMIS::Types::IntVector_h(graph.Size(), -1); 27 | AggMIS::Types::IntVector_h &mis = *m; 28 | AggMIS::Types::IntVector_h visited(graph.Size(), 0); 29 | AggMIS::Types::IntVector_h distances(graph.Size(), 1000); 30 | std::queue frontier; 31 | std::priority_queue, fringeNodeComparer> fringe; 32 | 33 | 34 | // Picking a random starting point: 35 | srand(time(NULL)); 36 | int starter = rand() % graph.Size(); 37 | 38 | bool incomplete = true; 39 | while (incomplete) 40 | { 41 | if (mis[starter] == -1) 42 | { 43 | fringeNode toAdd(starter, 1); 44 | fringe.push(toAdd); 45 | } 46 | 47 | while (!fringe.empty()) 48 | { 49 | // finding best fringe node 50 | int nodeToAdd = -1; 51 | while (!fringe.empty()) 52 | { 53 | fringeNode candidate = fringe.top(); 54 | fringe.pop(); 55 | if (distances[candidate.nodeIdx] > k) 56 | { 57 | nodeToAdd = candidate.nodeIdx; 58 | break; 59 | } 60 | } 61 | if (nodeToAdd == -1) 62 | { 63 | break; 64 | } 65 | 66 | mis[nodeToAdd] = 1; 67 | distances[nodeToAdd] = 0; 68 | 69 | // Pushing neighbors of mis node onto frontier to start out 70 | int start = (*(graph.indices))[nodeToAdd]; 71 | int end = (*(graph.indices))[nodeToAdd + 1]; 72 | for (int nIt = start; nIt < end; nIt++) 73 | { 74 | 75 | int neighbor = (*(graph.adjacency))[nIt]; 76 | if (distances[neighbor] > 1) 77 | { 78 | distances[neighbor] = 1; 79 | frontier.push(neighbor); 80 | } 81 | } 82 | 83 | // Exploring to the end of the frontier: 84 | while (!frontier.empty()) 85 | { 86 | int exploring = frontier.front(); 87 | frontier.pop(); 88 | 89 | int distance = distances[exploring]; 90 | 91 | // Mark out the node from the MIS 92 | mis[exploring] = 0; 93 | 94 | // Add the neighbors 95 | if (distance < k) 96 | { 97 | int start = (*(graph.indices))[exploring]; 98 | int end = (*(graph.indices))[exploring + 1]; 99 | for (int nIt = start; nIt < end; nIt++) 100 | { 101 | int neighbor = (*(graph.adjacency))[nIt]; 102 | if (distances[neighbor] > distance + 1) 103 | { 104 | distances[neighbor] = distance + 1; 105 | frontier.push(neighbor); 106 | } 107 | } 108 | } 109 | if (distance == k) 110 | { 111 | int start = (*(graph.indices))[exploring]; 112 | int end = (*(graph.indices))[exploring + 1]; 113 | for (int nIt = start; nIt < end; nIt++) 114 | { 115 | int neighbor = (*(graph.adjacency))[nIt]; 116 | if (distances[neighbor] >= distance + 1) 117 | { 118 | distances[neighbor] = distance + 1; 119 | fringeNode toAdd(neighbor, ++visited[neighbor]); 120 | fringe.push(toAdd); 121 | } 122 | } 123 | } 124 | } 125 | } 126 | 127 | incomplete = false; 128 | for (int i = 0; i < graph.Size(); i++) 129 | { 130 | if (mis[i] == -1) 131 | { 132 | incomplete = true; 133 | starter = i; 134 | break; 135 | } 136 | } 137 | } 138 | visited.clear(); 139 | distances.clear(); 140 | return m; 141 | } 142 | AggMIS::Types::IntVector_h* NaiveMIS(int k, AggMIS::Types::Graph_h graph) { 143 | AggMIS::Types::IntVector_h *m = new AggMIS::Types::IntVector_h(graph.Size(), -1); 144 | AggMIS::Types::IntVector_h &mis = *m; 145 | AggMIS::Types::IntVector_h distances(graph.Size(), 1000); 146 | for (int i = 0; i < graph.Size(); i++) 147 | { 148 | if (mis[i] == -1) 149 | { 150 | mis[i] = 1; 151 | distances[i] = 0; 152 | 153 | std::queue frontier; 154 | 155 | // Pushing neighbors of mis node onto frontier to start out 156 | int start = (*(graph.indices))[i]; 157 | int end = (*(graph.indices))[i + 1]; 158 | for (int nIt = start; nIt < end; nIt++) 159 | { 160 | int neighbor = (*(graph.adjacency))[nIt]; 161 | if (distances[neighbor] > 1) 162 | { 163 | frontier.push(neighbor); 164 | distances[neighbor] = 1; 165 | } 166 | } 167 | 168 | // Exploring to the end of the frontier: 169 | while (!frontier.empty()) 170 | { 171 | int exploring = frontier.front(); 172 | int distance = distances[exploring]; 173 | frontier.pop(); 174 | 175 | // Set node out of mis 176 | mis[exploring] = 0; 177 | 178 | // Add the neighbors 179 | if (distance < k) 180 | { 181 | int start = (*(graph.indices))[exploring]; 182 | int end = (*(graph.indices))[exploring + 1]; 183 | for (int nIt = start; nIt < end; nIt++) 184 | { 185 | int neighbor = (*(graph.adjacency))[nIt]; 186 | if (distances[neighbor] > distance + 1) 187 | { 188 | distances[neighbor] = distance + 1; 189 | frontier.push(neighbor); 190 | } 191 | } 192 | } 193 | } 194 | } 195 | } 196 | distances.clear(); 197 | return m; 198 | } 199 | } 200 | } -------------------------------------------------------------------------------- /src/core/cuda/FEM2D.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define PI 3.1415927 6 | 7 | FEM2D::FEM2D(TriMesh* meshPtr) 8 | { 9 | initializeWithTriMesh(meshPtr); 10 | } 11 | 12 | void FEM2D::initializeWithTriMesh(TriMesh* meshPtr) 13 | { 14 | nv = meshPtr->vertices.size(); 15 | ne = meshPtr->faces.size(); 16 | IdxVector_h tri0(ne); 17 | IdxVector_h tri1(ne); 18 | IdxVector_h tri2(ne); 19 | 20 | for(int i = 0; i < ne; i++) 21 | { 22 | tri0[i] = meshPtr->faces[i][0]; 23 | tri1[i] = meshPtr->faces[i][1]; 24 | tri2[i] = meshPtr->faces[i][2]; 25 | } 26 | 27 | Vector_h_CG vx(nv); 28 | Vector_h_CG vy(nv); 29 | 30 | for(int i = 0; i < nv; i++) 31 | { 32 | vx[i] = meshPtr->vertices[i][0]; 33 | vy[i] = meshPtr->vertices[i][1]; 34 | } 35 | 36 | d_tri0 = tri0; 37 | d_tri1 = tri1; 38 | d_tri2 = tri2; 39 | 40 | d_vx = vx; 41 | d_vy = vy; 42 | 43 | tri0.resize(0); 44 | tri1.resize(0); 45 | tri2.resize(0); 46 | vx.resize(0); 47 | vy.resize(0); 48 | } 49 | 50 | double compute_gamma(double x) 51 | { 52 | int i,k,m; 53 | double ga,gr,r,z; 54 | 55 | static double g[] = { 56 | 1.0, 57 | 0.5772156649015329, 58 | -0.6558780715202538, 59 | -0.420026350340952e-1, 60 | 0.1665386113822915, 61 | -0.421977345555443e-1, 62 | -0.9621971527877e-2, 63 | 0.7218943246663e-2, 64 | -0.11651675918591e-2, 65 | -0.2152416741149e-3, 66 | 0.1280502823882e-3, 67 | -0.201348547807e-4, 68 | -0.12504934821e-5, 69 | 0.1133027232e-5, 70 | -0.2056338417e-6, 71 | 0.6116095e-8, 72 | 0.50020075e-8, 73 | -0.11812746e-8, 74 | 0.1043427e-9, 75 | 0.77823e-11, 76 | -0.36968e-11, 77 | 0.51e-12, 78 | -0.206e-13, 79 | -0.54e-14, 80 | 0.14e-14}; 81 | 82 | if (x > 171.0) return 1e308; // This value is an overflow flag. 83 | if (x == (int)x) { 84 | if (x > 0.0) { 85 | ga = 1.0; // use factorial 86 | for (i=2;i 1.0) { 95 | z = fabs(x); 96 | m = (int)z; 97 | r = 1.0; 98 | for (k=1;k<=m;k++) { 99 | r *= (z-k); 100 | } 101 | z -= m; 102 | } 103 | else 104 | z = x; 105 | gr = g[24]; 106 | for (k=23;k>=0;k--) { 107 | gr = gr*z+g[k]; 108 | } 109 | ga = 1.0/(gr*z); 110 | if (fabs(x) > 1.0) { 111 | ga *= r; 112 | if (x < 0.0) { 113 | ga = -M_PI/(x*ga*sin(M_PI*x)); 114 | } 115 | } 116 | } 117 | return ga; 118 | } 119 | 120 | void FEM2D::JacobiPoly(int degree, Vector_h_CG x,int alpha,int beta, Vector_h_CG &y) 121 | { 122 | int s = x.size(); 123 | if (degree == 0) 124 | { 125 | 126 | y.resize(s); 127 | for (int i =0; i< s; i++) 128 | { 129 | y[i] = 1.0; 130 | } 131 | 132 | } 133 | else if(degree == 1) 134 | { 135 | y.resize(s); 136 | for (int i =0; i 11 | - [FEM Aknowledgements](#fem-aknowledgements) 12 | - [Requirements](#requirements) 13 | - [Building](#building)
14 | - [Linux and OSX](#linux-and-osx)
15 | - [Windows](#windows)
16 | - [Running Examples](#running-examples) 17 | - [Using the Library](#using-the-library) 18 | - [Testing](#testing)
19 | 20 |
21 | 22 |

FEM Aknowledgements

23 | ** 24 | Architecting the Finite Element Method Pipeline for the GPU**
25 | 26 | 27 | **AUTHORS:** 28 |
Zhisong Fu(*a*)
29 | T. James Lewis(*b*)
30 | Robert M. Kirby(*a*)
31 | Ross T. Whitaker(*a*)
32 | 33 | This library solves for the partial differential equations and coefficients values 34 | on vertices located on a tetrahedral or triangle mesh on the GPU. Several mesh formats 35 | are supported, and are read by the TetGen Library and the 36 | TriMesh Library. 37 | The METIS library is used to partition unstructured 38 | meshes. 39 | Google Test is used for testing. 40 |

41 | Requirements 42 | ============== 43 | 44 | * Git, CMake (3.0+ recommended), and the standard system build environment tools. 45 | * You will need a CUDA Compatible Graphics card. See here You will also need to be sure your card has CUDA compute capability of at least 2.0. 46 | * SCI-Solver_FEM is compatible with the latest CUDA toolkit (7.0). Download here. 47 | * This project has been tested on OpenSuse 12.3 (Dartmouth) on NVidia GeForce GTX 570 HD, Ubuntu 14.04 on NVidia GeForce GTX 560 Ti, Windows 7 on NVidia GeForce GTX 775M, and OSX 10.10 on NVidia GeForce GTX 775M. 48 | * If you have a CUDA compatible card with the above operating systems, and are experiencing issues, please contact the repository owners. 49 | * Windows: You will need Microsoft Visual Studio 2013 build tools. This document describes the "NMake" process. 50 | * OSX: Please be sure to follow setup for CUDA here. There are several compatability requirements for different MAC machines, including using a different version of CUDA (ie. 5.5). 51 | 52 | Building 53 | ============== 54 | 55 |

Linux and OSX

56 | In a terminal: 57 | ```c++ 58 | mkdir SCI-SOLVER_FEM/build 59 | cd SCI-SOLVER_FEM/build 60 | cmake ../src 61 | make 62 | ``` 63 | 64 |

Windows

65 | Open a Visual Studio (32 or 64 bit) Native Tools Command Prompt. 66 | Follow these commands: 67 | ```c++ 68 | mkdir C:\Path\To\SCI-Solver_FEM\build 69 | cd C:\Path\To\SCI-Solver_FEM\build 70 | cmake -G "NMake Makefiles" ..\src 71 | nmake 72 | ``` 73 | 74 | **Note:** For all platforms, you may need to specify your CUDA toolkit location (especially if you have multiple CUDA versions installed): 75 | ```c++ 76 | cmake -DCUDA_TOOLKIT_ROOT_DIR="~/NVIDIA/CUDA-7.0" ../src 77 | ``` 78 | (Assuming this is the location). 79 | 80 | **Note:** If you have compile errors such as undefined reference: atomicAdd, it is likely you need to set your compute capability manually. CMake outputs whether compute capability was determined automatically, or if you need to set it manually. The default (and known working) minimum compute capability is 2.0. 81 | 82 | ```c++ 83 | cmake -DCUDA_COMPUTE_CAPABILITY=20 ../src 84 | make 85 | ``` 86 | 87 | 88 | Running Examples 89 | ============== 90 | 91 | You will need to enable examples in your build to compile and run them 92 | 93 | ```c++ 94 | cmake -DBUILD_EXAMPLES=ON ../src 95 | make 96 | ``` 97 | 98 | You will find the example binaries built in the build/examples directory. 99 | 100 | Run the examples in the build directory: 101 | 102 | ```c++ 103 | examples/Example1 104 | examples/Example2 105 | ... 106 | ``` 107 | Each example has a -h flag that prints options for that example.
108 | 109 | Follow the example source code in src/examples to learn how to use the library. 110 |
111 | To run examples similar to the paper, the following example calls would do so:
112 | 2D FEM, Egg Carton
113 | examples/Example2 -v -i ../src/test/test_data/simple.ply -A "../src/test/test_data/simpleTri.mat" -b "../src/test/test_data/simpleTrib.mat"
114 | 115 | **NOTE** All examples output a set of result.vtk (name based off input 116 | filename) VTK files in the current directory. These files are easily viewed via VTK readers like Paraview. 117 | You can clip and add iso-values to more distinctly visualize the result. An output.mat 118 | MATLAB file is also written to file (solution coefficients). 119 | 120 | Using the Library 121 | ============== 122 | 123 | A basic usage of the library links to the libFEM_CORE library during build and 124 | includes the headers needed, which are usually no more than: 125 | 126 | ```c++ 127 | #include "FEMSolver.h" 128 | ``` 129 | 130 | Then a program would setup the FEM parameters using the 131 | "FEMSolver object" object and call 132 | object.solveFEM() to generate 133 | the answer matrices. 134 | 135 | Here is a minimal usage example (using a tet mesh).
136 | ```c++ 137 | #include 138 | int main(int argc, char *argv[]) 139 | { 140 | //the filename in the constructor below means ~/myTetMesh.node & ~/myTetMesh.ele 141 | FEMSolver data("~/myTetMesh", false, true); // tet mesh, not a tri mesh, and verbose 142 | //read in your Matrices (A matrix object is a member of FEMSolver) 143 | data.readMatlabSparseMatrix("~/A_MATRIX.mat"); 144 | Vector_h_CG b_h(cfg.getMatrixRows(), 1.0); 145 | data.readMatlabArray("~/b_array.mat", &b_h) 146 | //The answer vector. 147 | Vector_h_CG x_h(cfg.getMatrixRows(), 0.0); 148 | //Run the solver 149 | data.solveFEM(&x_h, &b_h); 150 | //now use the result 151 | data.writeMatlabArray("outputName.mat", x_h); 152 | //write the VTK 153 | std::vector vals; 154 | for (size_t i = 0; i < x_h.size(); i++){ 155 | vals.push_back(x_h[i]); 156 | } 157 | data.writeVTK(vals, "outputName"); 158 | return 0; 159 | } 160 | ``` 161 | 162 | You can access the A matrix and meshes directly: 163 | ```c++ 164 | TetMesh * FEMSolver::tetMesh_; 165 | TriMesh * FEMSolver::triMesh_; 166 | ``` 167 | 168 |

FEM Solver Parameters

169 | 170 | ```C++ 171 | class FEMSolver { 172 | bool verbose_; // output verbosity 173 | std::string filename_; // mesh file name 174 | int maxLevels_; // the maximum number of levels 175 | int maxIters_; // the maximum solve iterations 176 | int preInnerIters_; // the pre inner iterations for GSINNER 177 | int postInnerIters_; // the post inner iterations for GSINNER 178 | int postRelaxes_; // the number of post relax iterations 179 | int cycleIters_; // the number of CG iterations per outer iteration 180 | int dsType_; // data structure type 181 | int topSize_; // max size of coarsest level 182 | int randMisParameters_; // max size of coarsest level 183 | int partitionMaxSize_; // max size of of the partition 184 | int aggregatorType_; // aggregator oldMis (0), metis bottom up (1), 185 | // metis top down (2), aggMisGPU (3), aggMisCPU (4), newMisLight (5) 186 | int convergeType_; // the convergence tolerance algorithm 187 | double tolerance_; // the convergence tolerance 188 | int cycleType_; // the cycle algorithm 189 | int solverType_; // the solving algorithm 190 | double smootherWeight_; // the weight parameter used in a smoother 191 | double proOmega_; // the weight parameter used in prolongator smoother 192 | int device_; // the GPU device number to specify 193 | int blockSize_; 194 | ... 195 | }; 196 | ``` 197 |
198 | You will need to make sure your CMake/Makfile/Build setup knows where 199 | to point for the library and header files. See the examples and their CMakeLists.txt.

200 | Testing 201 | ============== 202 | The repo comes with a set of regression tests to see if recent changes break 203 | expected results. To build the tests, you will need to set 204 | BUILD_TESTING to "ON" in either ccmake or when calling CMake: 205 | 206 | ```c++ 207 | cmake -DBUILD_TESTING=ON ../src 208 | ``` 209 | After building, run make test or ctest in the build directory to run tests.
210 |

Windows

211 | The gtest library included in the repo needs to be built with 212 | forced shared libraries on Windows, so use the following: 213 | 214 | ```c++ 215 | cmake -DBUILD_TESTING=ON -Dgtest_forced_shared_crt=ON ../src 216 | ``` 217 | Be sure to include all other necessary CMake definitions as annotated above. 218 | -------------------------------------------------------------------------------- /src/core/cuda/randomizedMIS_GPU.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | __global__ void Generate_Randoms_Kernel(int size, int iterations, unsigned int *randoms, unsigned int *seeds) 4 | { 5 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 6 | unsigned int z = seeds[idx]; 7 | int offset = idx; 8 | int step = 32768; 9 | 10 | for (int i = 0; i < iterations; i++) 11 | { 12 | if (offset < size) 13 | { 14 | unsigned int b = (((z << 13) ^ z) >> 19); 15 | z = (((z & UINT_MAX) << 12) ^ b); 16 | randoms[offset] = z; 17 | offset += step; 18 | } 19 | } 20 | } 21 | 22 | __global__ void First_Initialize_Kernel(int size, unsigned int *randoms, int *bestSeen, int *origin) 23 | { 24 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 25 | if (idx < size) 26 | { 27 | // Set the origin to be self 28 | origin[idx] = idx; 29 | 30 | // Set the bestSeen value to be random 31 | bestSeen[idx] = randoms[idx] % 1000000; 32 | } 33 | } 34 | 35 | __global__ void Initialize_Kernel(int size, unsigned int *randoms, int *bestSeen, int *origin, int *mis, int *incomplete) 36 | { 37 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 38 | if (idx < size) 39 | { 40 | // Taustep is performed with S1=13, S2=19, S3=12, and M=UINT_MAX coded into kernel 41 | unsigned int z = randoms[idx]; 42 | unsigned int b = (((z << 13) ^ z) >> 19); 43 | z = (((z & UINT_MAX) << 12) ^ b); 44 | 45 | // Set the origin to be self 46 | origin[idx] = idx; 47 | 48 | // Set the bestSeen value to be either random from 0-1000000 or 1000001 if in MIS 49 | int status = mis[idx]; 50 | int value = 0; 51 | if (status == 1) 52 | value = 1000001; 53 | 54 | bestSeen[idx] = (mis[idx] == -1) ? (z % 1000000) : value; 55 | 56 | // Write out new random value for seeding 57 | randoms[idx] = z; 58 | } 59 | 60 | // Reset incomplete value 61 | if (idx == 0) 62 | incomplete[0] = 0; 63 | } 64 | 65 | __global__ void Iterate_Kernel(int size, int *originIn, int *originOut, int *bestSeenIn, int *bestSeenOut, int *adjIndexes, int *adjacency) 66 | { 67 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 68 | if (idx < size) 69 | { 70 | int bestSeen = bestSeenIn[idx]; 71 | int origin = originIn[idx]; 72 | if (bestSeen < 1000001) 73 | { 74 | int start = adjIndexes[idx]; 75 | int end = adjIndexes[idx + 1]; 76 | 77 | // Look at all the neighbors and take best values: 78 | for (int i = start; i < end; i++) 79 | { 80 | int neighbor = adjacency[i]; 81 | int challenger = bestSeenIn[neighbor]; 82 | int challengerOrigin = originIn[neighbor]; 83 | 84 | if (challenger > 0 && challenger == bestSeen && challengerOrigin > origin) 85 | { 86 | origin = challengerOrigin; 87 | } 88 | 89 | 90 | if (challenger > bestSeen) 91 | { 92 | bestSeen = challenger; 93 | origin = challengerOrigin; 94 | } 95 | } 96 | } 97 | 98 | // Write out the best values found 99 | bestSeenOut[idx] = bestSeen; 100 | originOut[idx] = origin; 101 | } 102 | } 103 | 104 | __global__ void Final_Iterate_Kernel(int size, int *originIn, int *originOut, int *bestSeenIn, int *bestSeenOut, int *adjIndexes, int *adjacency, int *mis, int *incomplete) 105 | { 106 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 107 | if (idx < size) 108 | { 109 | int bestSeen = bestSeenIn[idx]; 110 | int origin = originIn[idx]; 111 | if (bestSeen < 1000001) 112 | { 113 | int start = adjIndexes[idx]; 114 | int end = adjIndexes[idx + 1]; 115 | 116 | // Look at all the neighbors and take best values: 117 | for (int i = start; i < end; i++) 118 | { 119 | int neighbor = adjacency[i]; 120 | unsigned int challenger = bestSeenIn[neighbor]; 121 | int challengerOrigin = originIn[neighbor]; 122 | 123 | if (challenger > 0 && challenger == bestSeen && challengerOrigin > origin) 124 | { 125 | origin = challengerOrigin; 126 | } 127 | 128 | if (challenger > bestSeen) 129 | { 130 | bestSeen = challenger; 131 | origin = challengerOrigin; 132 | } 133 | } 134 | } 135 | 136 | // Write new MIS status 137 | int misStatus = -1; 138 | if (origin == idx) 139 | misStatus = 1; 140 | else if (bestSeen == 1000001) 141 | misStatus = 0; 142 | 143 | mis[idx] = misStatus; 144 | 145 | 146 | // If this node is still unassigned mark 147 | if (misStatus == -1) 148 | { 149 | incomplete[0] = 1; 150 | } 151 | } 152 | } 153 | 154 | void misHelpers::randomizedMIS(IdxVector_d adjIndexes, IdxVector_d adjacency, IdxVector_d &mis, int k) 155 | { 156 | // Setting to prefer the cache: 157 | cudaFuncSetCacheConfig(Initialize_Kernel, cudaFuncCachePreferL1); 158 | cudaFuncSetCacheConfig(Iterate_Kernel, cudaFuncCachePreferL1); 159 | cudaFuncSetCacheConfig(Final_Iterate_Kernel, cudaFuncCachePreferL1); 160 | 161 | cudaEvent_t s, e; 162 | cudaEventCreate(&s); 163 | cudaEventCreate(&e); 164 | 165 | int size = adjIndexes.size() - 1; 166 | mis.resize(size); 167 | thrust::fill(mis.begin(),mis.end(), -1); 168 | 169 | IntVector_d incomplete(1); // This is a single value that will be marked with 1 by initialize kernel if there are unallocated nodes //IdxVector_d misIn(size, -1); // The current MIS assignments 1 = in MIS, 0 = not in MIS, -1 = undetermined 170 | cusp::array1d randoms(size); // Set of random values generated by each threads random generator 171 | IntVector_d bestSeenIn(size); // Holds the highest value seen so far propogated through neigbhors each iteration 172 | IntVector_d bestSeenOut(size); // Holds the highest value seen so far propogated through neigbhors each iteration 173 | IntVector_d originIn(size); // The index where the best seen value originated 174 | IntVector_d originOut(size); // The index where the best seen value originated 175 | cusp::array1d seeds(32768); // Host side vector of initial random values 176 | 177 | // Getting raw pointers: 178 | int *incomplete_d = thrust::raw_pointer_cast(&incomplete[0]); 179 | int *misIn_d = thrust::raw_pointer_cast(&mis[0]); 180 | unsigned int *randoms_d = thrust::raw_pointer_cast(&randoms[0]); 181 | unsigned int *seeds_d = thrust::raw_pointer_cast(&seeds[0]); 182 | int *bestSeenIn_d = thrust::raw_pointer_cast(&bestSeenIn[0]); 183 | int *bestSeenOut_d = thrust::raw_pointer_cast(&bestSeenOut[0]); 184 | int *originIn_d = thrust::raw_pointer_cast(&originIn[0]); 185 | int *originOut_d = thrust::raw_pointer_cast(&originOut[0]); 186 | int *adjIndexes_d = thrust::raw_pointer_cast(&(adjIndexes[0])); 187 | int *adjacency_d = thrust::raw_pointer_cast(&(adjacency[0])); 188 | 189 | // Setting up for kernel launches 190 | int blockSize = 256; 191 | int nBlocks = size / blockSize + (size % blockSize == 0 ? 0 : 1); 192 | 193 | 194 | // Seeding the randoms array: 195 | srand(time(NULL)); 196 | unsigned *seeds_h = new unsigned[32768]; 197 | for (int i = 0; i < 32768; i++) 198 | seeds_h[i] = (unsigned)rand(); 199 | thrust::copy(seeds_h, seeds_h + 32768, seeds.begin()); 200 | int iterations = (size + 32767) / 32768; 201 | Generate_Randoms_Kernel <<<128, 256>>> (size, iterations, randoms_d, seeds_d); 202 | 203 | // Running the initialize kernel: 204 | First_Initialize_Kernel <<< nBlocks, blockSize >>> (size, randoms_d, bestSeenIn_d, originIn_d); 205 | 206 | // Running the iteration kernel k times swapping in and out for each iteration 207 | for (int i = 0; i < k; i++) 208 | { 209 | if (i < k - 1) 210 | { 211 | Iterate_Kernel <<< nBlocks, blockSize >>> (size, originIn_d, originOut_d, bestSeenIn_d, bestSeenOut_d, adjIndexes_d, adjacency_d); 212 | } 213 | else 214 | { 215 | Final_Iterate_Kernel <<< nBlocks, blockSize >>> (size, originIn_d, originOut_d, bestSeenIn_d, bestSeenOut_d, adjIndexes_d, adjacency_d, misIn_d, incomplete_d); 216 | } 217 | 218 | // Swap the pointers for the next iteration: 219 | int *temp = originIn_d; 220 | originIn_d = originOut_d; 221 | originOut_d = temp; 222 | 223 | int *temp2 = bestSeenIn_d; 224 | bestSeenIn_d = bestSeenOut_d; 225 | bestSeenOut_d = temp2; 226 | } 227 | 228 | // If not complete get new randoms and repeat 229 | cudaThreadSynchronize(); 230 | int unallocated = incomplete[0]; 231 | 232 | while (unallocated == 1) 233 | { 234 | // Initialize kernel 235 | Initialize_Kernel <<< nBlocks, blockSize >>> (size, randoms_d, bestSeenIn_d, originIn_d, misIn_d, incomplete_d); 236 | 237 | // Running the iteration kernel k times swapping in and out for each iteration 238 | for (int i = 0; i < k; i++) 239 | { 240 | if (i < k - 1) 241 | { 242 | Iterate_Kernel <<< nBlocks, blockSize >>> (size, originIn_d, originOut_d, bestSeenIn_d, bestSeenOut_d, adjIndexes_d, adjacency_d); 243 | } 244 | else 245 | { 246 | Final_Iterate_Kernel <<< nBlocks, blockSize >>> (size, originIn_d, originOut_d, bestSeenIn_d, bestSeenOut_d, adjIndexes_d, adjacency_d, misIn_d, incomplete_d); 247 | } 248 | 249 | 250 | // Swap the pointers for the next iteration: 251 | int *temp = originIn_d; 252 | originIn_d = originOut_d; 253 | originOut_d = temp; 254 | 255 | int *temp2 = bestSeenIn_d; 256 | bestSeenIn_d = bestSeenOut_d; 257 | bestSeenOut_d = temp2; 258 | } 259 | 260 | // Checking if done: 261 | cudaThreadSynchronize(); 262 | unallocated = incomplete[0]; 263 | } 264 | 265 | // Deallocating temporary arrays: 266 | incomplete.resize(0); 267 | randoms.resize(0); 268 | bestSeenIn.resize(0); 269 | bestSeenOut.resize(0); 270 | originIn.resize(0); 271 | originOut.resize(0); 272 | } 273 | -------------------------------------------------------------------------------- /src/core/include/smoothers/smoother.h: -------------------------------------------------------------------------------- 1 | #ifndef __SMOOTHER_H__ 2 | #define __SMOOTHER_H__ 3 | template class Smoother; 4 | 5 | enum SmootherType 6 | { 7 | JACOBI, JACOBI_NO_CUSP, GAUSSSEIDEL, POLYNOMIAL, GSINNER 8 | }; 9 | 10 | #include 11 | #include 12 | 13 | class FEMSolver; 14 | 15 | /************************************* 16 | * Smoother base class 17 | *************************************/ 18 | template 19 | class Smoother 20 | { 21 | typedef typename Matrix::value_type ValueType; 22 | typedef typename Matrix::index_type IndexType; 23 | typedef typename Matrix::memory_space MemorySpace; 24 | public: 25 | virtual void preRRRFull(const cusp::ell_matrix& AinEll, 26 | const cusp::coo_matrix& AoutCoo, 27 | const cusp::array1d& aggregateIdx, 28 | const cusp::array1d& partitionIdx, 29 | const cusp::hyb_matrix& restrictor, 30 | const cusp::array1d& permutation, 31 | cusp::array1d& b, 32 | cusp::array1d& x, 33 | cusp::array1d& bc, 34 | int level_id, 35 | int largestblksz) = 0; 36 | virtual void preRRRFullCsr(const cusp::csr_matrix& AinCsr, 37 | const cusp::coo_matrix& AoutCoo, 38 | const cusp::array1d& aggregateIdx, 39 | const cusp::array1d& partitionIdx, 40 | const cusp::hyb_matrix& restrictor, 41 | const cusp::array1d& permutation, 42 | cusp::array1d& b, 43 | cusp::array1d& x, 44 | cusp::array1d& bc, 45 | int level_id, 46 | int largestblksz, 47 | int largestnumentries, 48 | int largestnumperrow) = 0; 49 | 50 | virtual void preRRRFullSymmetric(const cusp::coo_matrix& AinSysCoo, 51 | const cusp::coo_matrix& AoutSysCoo, 52 | const cusp::array1d& AinBlockIdx, 53 | const cusp::array1d& AoutBlockIdx, 54 | const cusp::array1d& aggregateIdx, 55 | const cusp::array1d& partitionIdx, 56 | const cusp::hyb_matrix& restrictor, 57 | const cusp::array1d& permutation, 58 | cusp::array1d& b, 59 | cusp::array1d& x, 60 | cusp::array1d& bc, 61 | int level_id, 62 | int largestblksz, 63 | int largestnumentries, 64 | bool verbose = false) = 0; 65 | virtual void preRRRFullSymmetricSync(const cusp::coo_matrix& AinSysCoo, 66 | const cusp::coo_matrix& AoutSysCoo, 67 | const cusp::array1d& AinBlockIdx, 68 | const cusp::array1d& aggregateIdx, 69 | const cusp::array1d& partitionIdx, 70 | const cusp::hyb_matrix& restrictor, 71 | const cusp::array1d& permutation, 72 | cusp::array1d& b, 73 | cusp::array1d& x, 74 | cusp::array1d& bc, 75 | const cusp::array1d& segSyncIdx, 76 | const cusp::array1d& partSyncIdx, 77 | int level_id, 78 | int largestblksz, 79 | int largestnumentries) = 0; 80 | 81 | virtual void postPCR(const cusp::ell_matrix& AinEll, 82 | const cusp::coo_matrix& AoutCoo, 83 | const cusp::array1d& aggregateIdx, 84 | const cusp::array1d& partitionIdx, 85 | const cusp::array1d& P, 86 | const cusp::array1d& b, 87 | cusp::array1d& x, 88 | cusp::array1d& xc) = 0; 89 | 90 | virtual void postPCRFull(const cusp::ell_matrix& AinEll, 91 | const cusp::coo_matrix& AoutCoo, 92 | const cusp::array1d& AoutBlockIdx, 93 | const cusp::array1d& aggregateIdx, 94 | const cusp::array1d& partitionIdx, 95 | const cusp::hyb_matrix& prolongator, 96 | const cusp::array1d& permutation, 97 | const cusp::array1d& b, 98 | cusp::array1d& x, 99 | cusp::array1d& xc, 100 | int level_id, 101 | int largestblksz) = 0; 102 | virtual void postPCRFullCsr(const cusp::csr_matrix& AinCsr, 103 | const cusp::coo_matrix& AoutCoo, 104 | const cusp::array1d& AoutBlockIdx, 105 | const cusp::array1d& aggregateIdx, 106 | const cusp::array1d& partitionIdx, 107 | const cusp::hyb_matrix& prolongator, 108 | const cusp::array1d& permutation, 109 | const cusp::array1d& b, 110 | cusp::array1d& x, 111 | cusp::array1d& xc, 112 | int level_id, 113 | int largestblksz, 114 | int largestnumentries, 115 | int largestnumperrow) = 0; 116 | 117 | virtual void postPCRFullSymmetric(const cusp::coo_matrix& AinSysCoo, 118 | const cusp::array1d& AinBlockIdx, 119 | const cusp::coo_matrix& AoutSysCoo, 120 | const cusp::array1d& AoutBlockIdx, 121 | const cusp::array1d& aggregateIdx, 122 | const cusp::array1d& partitionIdx, 123 | const cusp::hyb_matrix& prolongator, 124 | const cusp::array1d& permutation, 125 | const cusp::array1d& b, 126 | cusp::array1d& x, 127 | cusp::array1d& xc, 128 | int level_id, 129 | int largestblksz, 130 | int largestnumentries) = 0; 131 | virtual void postPCRFullSymmetricSync(const cusp::coo_matrix& AinSysCoo, 132 | const cusp::array1d& AinBlockIdx, 133 | const cusp::coo_matrix& AoutSysCoo, 134 | const cusp::array1d& AoutBlockIdx, 135 | const cusp::array1d& aggregateIdx, 136 | const cusp::array1d& partitionIdx, 137 | const cusp::hyb_matrix& prolongator, 138 | const cusp::array1d& permutation, 139 | const cusp::array1d& b, 140 | cusp::array1d& x, 141 | cusp::array1d& xc, 142 | const cusp::array1d& segSyncIdx, 143 | const cusp::array1d& partSyncIdx, 144 | int level_id, 145 | int largestblksz, 146 | int largestnumentries) = 0; 147 | 148 | 149 | virtual void smooth(const Matrix &A, const Vector &b, Vector &x) = 0; 150 | virtual void smooth_with_0_initial_guess(const Matrix &A, const Vector &b, Vector &x); //default initializes the vector to 0 and calls smooth 151 | virtual ~Smoother(); 152 | static Smoother* allocate(double smootherWeight, 153 | int preInnerIters, int postInnerIters, int postRelaxes, const Matrix_d& A); 154 | Vector diag; 155 | }; 156 | #endif 157 | -------------------------------------------------------------------------------- /src/core/aggmis/cuda/AggMIS_Aggregation_GPU.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * File: AggMIS_Aggregation_GPU.cu 3 | * Author: T. James Lewis 4 | * 5 | * Created on April 19, 2013, 11:30 AM 6 | */ 7 | #include "AggMIS_Aggregation_GPU.h" 8 | #include "AggMIS_Types.h" 9 | #include "AggMIS_GraphHelpers.h" 10 | namespace AggMIS { 11 | namespace Aggregation { 12 | namespace Kernels { 13 | __global__ void allocateNodesKernel(int size, 14 | int *adjIndexes, 15 | int *adjacency, 16 | int *partIn, 17 | int *partOut, 18 | int *aggregated) { 19 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 20 | if (idx < size) 21 | { 22 | if (aggregated[idx] == 0) 23 | { 24 | int start = adjIndexes[idx]; 25 | int end = adjIndexes[idx + 1]; 26 | 27 | // Storage for possible aggregations. 28 | int candidates[10]; 29 | int candidateCounts[10]; 30 | for (int i = 0; i < 10; i++) 31 | { 32 | candidates[i] = -1; 33 | candidateCounts[i] = 0; 34 | } 35 | 36 | // Going through neighbors to aggregate: 37 | for (int i = start; i < end; i++) 38 | { 39 | int candidate = partIn[adjacency[i]]; 40 | if (candidate != -1) 41 | { 42 | for (int j = 0; j < 10 && candidate != -1; j++) 43 | { 44 | if (candidates[j] == -1) 45 | { 46 | candidates[j] = candidate; 47 | candidateCounts[j] = 1; 48 | } else 49 | { 50 | if (candidates[j] == candidate) 51 | { 52 | candidateCounts[j] += 1; 53 | candidate = -1; 54 | } 55 | } 56 | } 57 | } 58 | } 59 | 60 | // Finding the most adjacent aggregate and adding node to it: 61 | int addTo = candidates[0]; 62 | int count = candidateCounts[0]; 63 | for (int i = 1; i < 10; i++) 64 | { 65 | if (candidateCounts[i] > count) 66 | { 67 | count = candidateCounts[i]; 68 | addTo = candidates[i]; 69 | } 70 | } 71 | partOut[idx] = addTo; 72 | if (addTo != -1) 73 | { 74 | aggregated[idx] = 1; 75 | } 76 | } 77 | } 78 | } 79 | __global__ void checkAggregationFillAggregates(int size, 80 | int *adjIndices, 81 | int *adjacency, 82 | int* aggregation, 83 | int* valuesIn, 84 | int* valuesOut, 85 | int* incomplete) { 86 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 87 | if (idx < size) 88 | { 89 | // Find the currently marked distance 90 | int currentVal = valuesIn[idx]; 91 | int currentAgg = aggregation[idx]; 92 | 93 | // Checking if any neighbors have a better value 94 | int start = adjIndices[idx]; 95 | int end = adjIndices[idx + 1]; 96 | for (int i = start; i < end; i++) 97 | { 98 | int neighborAgg = aggregation[adjacency[i]]; 99 | int neighborVal = valuesIn[adjacency[i]]; 100 | if (neighborAgg == currentAgg && neighborVal > currentVal) 101 | { 102 | currentVal = neighborVal; 103 | incomplete[0] = 1; 104 | } 105 | } 106 | 107 | // Write out the distance to the output vector: 108 | valuesOut[idx] = currentVal; 109 | } 110 | } 111 | } 112 | 113 | // Functions 114 | AggMIS::Types::IntVector_d* AggregateToNearest(AggMIS::Types::Graph_d &graph, 115 | AggMIS::Types::IntVector_d &roots) { 116 | // Create temp vectors to work with 117 | int size = graph.Size(); 118 | AggMIS::Types::IntVector_d *aggregated = new AggMIS::Types::IntVector_d(roots); 119 | AggMIS::Types::IntVector_d *partIn = new AggMIS::Types::IntVector_d(roots); 120 | 121 | 122 | // Prefix sum to number aggregate roots: 123 | thrust::inclusive_scan(partIn->begin(), partIn->end(), partIn->begin()); 124 | 125 | // Transform non root nodes to -1 126 | thrust::transform(partIn->begin(), partIn->end(), aggregated->begin(), partIn->begin(), Functors::NumberParts()); 127 | AggMIS::Types::IntVector_d *partOut = new AggMIS::Types::IntVector_d(*partIn); 128 | 129 | // Preparing to call aggregate kernel: 130 | int *partIn_d = thrust::raw_pointer_cast(partIn->data()); // Pointer to partIn vector 131 | int *partOut_d = thrust::raw_pointer_cast(partOut->data()); // Pointer to partOut vector 132 | int *adjIndexes_d = thrust::raw_pointer_cast(graph.indices->data()); // Pointer to adjacency indexes 133 | int *adjacency_d = thrust::raw_pointer_cast(graph.adjacency->data()); // Pointer to adjacency 134 | int *aggregated_d = thrust::raw_pointer_cast(aggregated->data()); // Pointer to aggregated 135 | bool complete = false; // Indicates whether all nodes are aggregated 136 | 137 | // Figuring out block sizes for kernel call: 138 | int blockSize = 256; 139 | int nBlocks = size / blockSize + (size%blockSize == 0 ? 0 : 1); 140 | 141 | while (!complete) 142 | { 143 | // Allocating nodes 144 | Kernels::allocateNodesKernel << < nBlocks, blockSize >> > (size, adjIndexes_d, adjacency_d, partIn_d, partOut_d, aggregated_d); 145 | 146 | // Copying partOut to partIn 147 | thrust::copy(partOut->begin(), partOut->end(), partIn->begin()); 148 | 149 | // Checking if done 150 | int unallocatedNodes = thrust::count(aggregated->begin(), aggregated->end(), 0); 151 | complete = unallocatedNodes == 0; 152 | } 153 | 154 | // Cleaning up 155 | aggregated->clear(); 156 | partOut->clear(); 157 | delete aggregated; 158 | delete partOut; 159 | 160 | return partIn; 161 | } 162 | bool IsValidAggregation(AggMIS::Types::Graph_d &graph, 163 | AggMIS::Types::IntVector_d &aggregation, 164 | bool verbose) { 165 | // Counter for number of errors found 166 | int errors = 0; 167 | 168 | // Check to make sure that the aggregate id's are sequential 169 | AggMIS::Types::IntVector_d scratch(aggregation); 170 | thrust::sort(scratch.begin(), scratch.end()); 171 | int newLength = thrust::unique(scratch.begin(), scratch.end()) - scratch.begin(); 172 | scratch.resize(newLength); 173 | 174 | if (scratch[0] != 0 || scratch[scratch.size() - 1] != scratch.size() - 1) 175 | { 176 | if (verbose) { 177 | printf("Error found in aggregation: improper aggregate indices:\n"); 178 | int firstId = scratch[0]; 179 | int lastId = scratch[scratch.size() - 1]; 180 | int count = scratch.size(); 181 | printf("\tFirst index is %d, last index is %d, there are %d unique id's\n", firstId, lastId, count); 182 | } 183 | errors++; 184 | return false; 185 | } 186 | 187 | // Check to make sure each aggregate is a connected component 188 | AggMIS::Types::IntVector_d *valuesIn = GraphHelpers::GetIndicesVector(aggregation.size()); 189 | AggMIS::Types::IntVector_d valuesOut(aggregation.size()); 190 | AggMIS::Types::IntVector_d incomplete(1, 1); 191 | 192 | // Figuring out block sizes for kernel call: 193 | int size = graph.Size(); 194 | int blockSize = 256; 195 | int nBlocks = size / blockSize + (size%blockSize == 0 ? 0 : 1); 196 | 197 | // Getting raw pointers 198 | int *valuesIn_d = thrust::raw_pointer_cast(valuesIn->data()); 199 | int *valuesOut_d = thrust::raw_pointer_cast(&valuesOut[0]); 200 | int *incomplete_d = thrust::raw_pointer_cast(&incomplete[0]); 201 | int *adjacency_d = thrust::raw_pointer_cast(graph.adjacency->data()); 202 | int *adjIndices_d = thrust::raw_pointer_cast(graph.indices->data()); 203 | int *aggregation_d = thrust::raw_pointer_cast(&aggregation[0]); 204 | 205 | // Flood filling within each aggregate 206 | int *originalOut = valuesIn_d; 207 | while (incomplete[0] == 1) 208 | { 209 | incomplete[0] = 0; 210 | Kernels::checkAggregationFillAggregates << < nBlocks, blockSize >> > 211 | (size, adjIndices_d, adjacency_d, aggregation_d, valuesIn_d, valuesOut_d, incomplete_d); 212 | int *temp = valuesIn_d; 213 | valuesIn_d = valuesOut_d; 214 | valuesOut_d = temp; 215 | } 216 | 217 | if (originalOut != valuesOut_d) 218 | valuesOut.assign(valuesIn->begin(), valuesIn->end()); 219 | valuesIn->assign(aggregation.begin(), aggregation.end()); 220 | 221 | // 222 | int correctLength = newLength; 223 | thrust::sort(thrust::make_zip_iterator(thrust::make_tuple(valuesIn->begin(), valuesOut.begin())), 224 | thrust::make_zip_iterator(thrust::make_tuple(valuesIn->end(), valuesOut.end()))); 225 | newLength = thrust::unique(thrust::make_zip_iterator(thrust::make_tuple(valuesOut.begin(), valuesIn->begin())), 226 | thrust::make_zip_iterator(thrust::make_tuple(valuesOut.end(), valuesIn->end()))) 227 | - thrust::make_zip_iterator(thrust::make_tuple(valuesOut.begin(), valuesIn->begin())); 228 | 229 | valuesIn->resize(newLength); 230 | valuesOut.resize(newLength); 231 | 232 | if (newLength != correctLength) 233 | { 234 | if (verbose) 235 | printf("Error: there were %d connected components found and %d aggregates\n", newLength, correctLength); 236 | errors++; 237 | 238 | AggMIS::Types::IntVector_h aggIds(*valuesIn); 239 | AggMIS::Types::IntVector_h nodeIds(valuesOut); 240 | for (int i = 0; i < valuesOut.size() - 1; i++) 241 | { 242 | int currentAgg = aggIds[i]; 243 | int nextAgg = aggIds[i + 1]; 244 | if (currentAgg == nextAgg && verbose) 245 | printf("Aggregate %d was filled from %d and %d\n", currentAgg, nodeIds[i], nodeIds[i + 1]); 246 | } 247 | } 248 | 249 | // Clean up 250 | scratch.resize(0); 251 | valuesIn->resize(0); 252 | delete valuesIn; 253 | incomplete.resize(0); 254 | 255 | return errors == 0; 256 | } 257 | AggMIS::Types::Graph_d* GetAggregateMap(AggMIS::Types::IntVector_d& aggregation) { 258 | AggMIS::Types::Graph_d* output = new AggMIS::Types::Graph_d(); 259 | // Setting adjacency of output to be indices 260 | GraphHelpers::SetToIndicesVector(aggregation.size(), *(output->adjacency)); 261 | AggMIS::Types::IntVector_d aggLabels(aggregation.begin(), aggregation.end()); 262 | 263 | // Sorting by key to get node id's grouped by aggregates 264 | thrust::sort_by_key(aggLabels.begin(), aggLabels.end(), output->adjacency->begin()); 265 | 266 | // Resizing the indices to aggregate count 267 | int maxAggregate = aggLabels[aggLabels.size() - 1]; 268 | output->indices->resize(maxAggregate + 2, 0); 269 | 270 | // Figuring out block sizes for kernel call: 271 | int size = aggregation.size(); 272 | int blockSize = 256; 273 | int nBlocks = size / blockSize + (size%blockSize == 0 ? 0 : 1); 274 | 275 | // Calling kernel to find indices for each part: 276 | GraphHelpers::Kernels::findPartIndicesKernel << < nBlocks, blockSize >> > 277 | (size, 278 | AggMIS::Types::StartOf(aggLabels), 279 | output->indStart()); 280 | 281 | // Cleaning up 282 | aggLabels.clear(); 283 | 284 | return output; 285 | } 286 | } 287 | } 288 | --------------------------------------------------------------------------------